| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.990403071017274, |
| "eval_steps": 500, |
| "global_step": 1950, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0025591810620601407, |
| "grad_norm": 5.72460191514523, |
| "learning_rate": 2.0512820512820514e-07, |
| "loss": 0.8785, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.005118362124120281, |
| "grad_norm": 5.959578433623624, |
| "learning_rate": 4.102564102564103e-07, |
| "loss": 0.8678, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.007677543186180422, |
| "grad_norm": 5.902957688136316, |
| "learning_rate": 6.153846153846155e-07, |
| "loss": 0.8968, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.010236724248240563, |
| "grad_norm": 5.4723551383557805, |
| "learning_rate": 8.205128205128206e-07, |
| "loss": 0.8418, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.012795905310300703, |
| "grad_norm": 5.787912289462209, |
| "learning_rate": 1.0256410256410257e-06, |
| "loss": 0.886, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.015355086372360844, |
| "grad_norm": 5.557852691465803, |
| "learning_rate": 1.230769230769231e-06, |
| "loss": 0.9033, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.017914267434420986, |
| "grad_norm": 5.362076082832634, |
| "learning_rate": 1.4358974358974359e-06, |
| "loss": 0.8629, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.020473448496481125, |
| "grad_norm": 4.370734339586599, |
| "learning_rate": 1.6410256410256412e-06, |
| "loss": 0.8157, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.023032629558541268, |
| "grad_norm": 4.493266725432695, |
| "learning_rate": 1.8461538461538465e-06, |
| "loss": 0.8392, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.025591810620601407, |
| "grad_norm": 4.121132813839215, |
| "learning_rate": 2.0512820512820513e-06, |
| "loss": 0.8246, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02815099168266155, |
| "grad_norm": 2.4085810485641095, |
| "learning_rate": 2.2564102564102566e-06, |
| "loss": 0.8009, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.030710172744721688, |
| "grad_norm": 2.3319618373499087, |
| "learning_rate": 2.461538461538462e-06, |
| "loss": 0.8189, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.03326935380678183, |
| "grad_norm": 2.050198471153189, |
| "learning_rate": 2.666666666666667e-06, |
| "loss": 0.7671, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.03582853486884197, |
| "grad_norm": 2.5524552964667926, |
| "learning_rate": 2.8717948717948717e-06, |
| "loss": 0.7587, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.03838771593090211, |
| "grad_norm": 3.44805626463955, |
| "learning_rate": 3.0769230769230774e-06, |
| "loss": 0.779, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.04094689699296225, |
| "grad_norm": 3.559383059115368, |
| "learning_rate": 3.2820512820512823e-06, |
| "loss": 0.758, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.04350607805502239, |
| "grad_norm": 3.33809383465494, |
| "learning_rate": 3.487179487179487e-06, |
| "loss": 0.7136, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.046065259117082535, |
| "grad_norm": 3.2168388473087757, |
| "learning_rate": 3.692307692307693e-06, |
| "loss": 0.7369, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.04862444017914268, |
| "grad_norm": 2.5872597896051728, |
| "learning_rate": 3.897435897435898e-06, |
| "loss": 0.7323, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.05118362124120281, |
| "grad_norm": 2.0282374599316957, |
| "learning_rate": 4.102564102564103e-06, |
| "loss": 0.6876, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.053742802303262956, |
| "grad_norm": 1.5964078137928233, |
| "learning_rate": 4.307692307692308e-06, |
| "loss": 0.6678, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.0563019833653231, |
| "grad_norm": 1.2909627910012984, |
| "learning_rate": 4.512820512820513e-06, |
| "loss": 0.662, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.05886116442738324, |
| "grad_norm": 1.3177057124827334, |
| "learning_rate": 4.717948717948718e-06, |
| "loss": 0.6594, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.061420345489443376, |
| "grad_norm": 1.24942825931957, |
| "learning_rate": 4.923076923076924e-06, |
| "loss": 0.626, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.06397952655150352, |
| "grad_norm": 1.2673077068864744, |
| "learning_rate": 5.128205128205128e-06, |
| "loss": 0.6347, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.06653870761356366, |
| "grad_norm": 1.1563656900829429, |
| "learning_rate": 5.333333333333334e-06, |
| "loss": 0.6329, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.0690978886756238, |
| "grad_norm": 1.044094666100426, |
| "learning_rate": 5.538461538461539e-06, |
| "loss": 0.6254, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.07165706973768395, |
| "grad_norm": 0.9466038321235274, |
| "learning_rate": 5.743589743589743e-06, |
| "loss": 0.6295, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.07421625079974409, |
| "grad_norm": 0.7981163236308523, |
| "learning_rate": 5.948717948717949e-06, |
| "loss": 0.6155, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.07677543186180422, |
| "grad_norm": 0.9968280326546483, |
| "learning_rate": 6.153846153846155e-06, |
| "loss": 0.6113, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.07933461292386436, |
| "grad_norm": 0.9260190035124614, |
| "learning_rate": 6.358974358974359e-06, |
| "loss": 0.636, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.0818937939859245, |
| "grad_norm": 0.8477667711908703, |
| "learning_rate": 6.564102564102565e-06, |
| "loss": 0.63, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.08445297504798464, |
| "grad_norm": 0.6532816121925329, |
| "learning_rate": 6.76923076923077e-06, |
| "loss": 0.6108, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.08701215611004479, |
| "grad_norm": 0.7821169327010173, |
| "learning_rate": 6.974358974358974e-06, |
| "loss": 0.6408, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.08957133717210493, |
| "grad_norm": 0.7393409032705888, |
| "learning_rate": 7.17948717948718e-06, |
| "loss": 0.5915, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.09213051823416507, |
| "grad_norm": 0.6644188521415291, |
| "learning_rate": 7.384615384615386e-06, |
| "loss": 0.5961, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.09468969929622521, |
| "grad_norm": 0.593116184422468, |
| "learning_rate": 7.58974358974359e-06, |
| "loss": 0.5971, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.09724888035828536, |
| "grad_norm": 0.6919315440965734, |
| "learning_rate": 7.794871794871796e-06, |
| "loss": 0.5725, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.09980806142034548, |
| "grad_norm": 0.6959155805001593, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.5685, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.10236724248240563, |
| "grad_norm": 0.6398643666989815, |
| "learning_rate": 8.205128205128205e-06, |
| "loss": 0.6138, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.10492642354446577, |
| "grad_norm": 0.6361698523409376, |
| "learning_rate": 8.410256410256411e-06, |
| "loss": 0.5849, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.10748560460652591, |
| "grad_norm": 0.5303957188864263, |
| "learning_rate": 8.615384615384617e-06, |
| "loss": 0.5533, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.11004478566858605, |
| "grad_norm": 0.5370326395081426, |
| "learning_rate": 8.820512820512821e-06, |
| "loss": 0.5833, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.1126039667306462, |
| "grad_norm": 0.590660924049006, |
| "learning_rate": 9.025641025641027e-06, |
| "loss": 0.5888, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.11516314779270634, |
| "grad_norm": 0.540686200850521, |
| "learning_rate": 9.230769230769232e-06, |
| "loss": 0.5624, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.11772232885476648, |
| "grad_norm": 0.5777571666796689, |
| "learning_rate": 9.435897435897436e-06, |
| "loss": 0.5851, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.12028150991682661, |
| "grad_norm": 0.537318145066867, |
| "learning_rate": 9.641025641025642e-06, |
| "loss": 0.5644, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.12284069097888675, |
| "grad_norm": 0.5211947061981134, |
| "learning_rate": 9.846153846153848e-06, |
| "loss": 0.5591, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.1253998720409469, |
| "grad_norm": 0.5397404148687415, |
| "learning_rate": 1.0051282051282052e-05, |
| "loss": 0.5843, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.12795905310300704, |
| "grad_norm": 0.5297926804183296, |
| "learning_rate": 1.0256410256410256e-05, |
| "loss": 0.5277, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.13051823416506717, |
| "grad_norm": 0.605652388150248, |
| "learning_rate": 1.0461538461538463e-05, |
| "loss": 0.5724, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.13307741522712732, |
| "grad_norm": 0.45885002080359344, |
| "learning_rate": 1.0666666666666667e-05, |
| "loss": 0.5381, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.13563659628918745, |
| "grad_norm": 0.5930020802013372, |
| "learning_rate": 1.0871794871794871e-05, |
| "loss": 0.6045, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.1381957773512476, |
| "grad_norm": 0.5808129528368039, |
| "learning_rate": 1.1076923076923079e-05, |
| "loss": 0.5222, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.14075495841330773, |
| "grad_norm": 0.5154128262574531, |
| "learning_rate": 1.1282051282051283e-05, |
| "loss": 0.5578, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.1433141394753679, |
| "grad_norm": 0.5390655268219918, |
| "learning_rate": 1.1487179487179487e-05, |
| "loss": 0.5436, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.14587332053742802, |
| "grad_norm": 0.5586414645653933, |
| "learning_rate": 1.1692307692307694e-05, |
| "loss": 0.5353, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.14843250159948818, |
| "grad_norm": 0.6534105047151474, |
| "learning_rate": 1.1897435897435898e-05, |
| "loss": 0.5392, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.1509916826615483, |
| "grad_norm": 0.5945111461514314, |
| "learning_rate": 1.2102564102564102e-05, |
| "loss": 0.5542, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.15355086372360843, |
| "grad_norm": 0.6505380466948517, |
| "learning_rate": 1.230769230769231e-05, |
| "loss": 0.5252, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.1561100447856686, |
| "grad_norm": 0.6510336772233184, |
| "learning_rate": 1.2512820512820514e-05, |
| "loss": 0.5683, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.15866922584772872, |
| "grad_norm": 0.5509745528884461, |
| "learning_rate": 1.2717948717948718e-05, |
| "loss": 0.5205, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.16122840690978887, |
| "grad_norm": 0.5917102526960739, |
| "learning_rate": 1.2923076923076925e-05, |
| "loss": 0.5603, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.163787587971849, |
| "grad_norm": 0.560110790537656, |
| "learning_rate": 1.312820512820513e-05, |
| "loss": 0.545, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.16634676903390916, |
| "grad_norm": 0.5678363411921677, |
| "learning_rate": 1.3333333333333333e-05, |
| "loss": 0.5287, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.1689059500959693, |
| "grad_norm": 0.5819806020456917, |
| "learning_rate": 1.353846153846154e-05, |
| "loss": 0.5535, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.17146513115802944, |
| "grad_norm": 0.5558896303005907, |
| "learning_rate": 1.3743589743589745e-05, |
| "loss": 0.5648, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.17402431222008957, |
| "grad_norm": 0.5818297224628268, |
| "learning_rate": 1.3948717948717949e-05, |
| "loss": 0.545, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.1765834932821497, |
| "grad_norm": 0.5604385516101225, |
| "learning_rate": 1.4153846153846156e-05, |
| "loss": 0.5625, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.17914267434420986, |
| "grad_norm": 0.5262255043539227, |
| "learning_rate": 1.435897435897436e-05, |
| "loss": 0.553, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.18170185540626999, |
| "grad_norm": 0.5449392960823104, |
| "learning_rate": 1.4564102564102564e-05, |
| "loss": 0.532, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.18426103646833014, |
| "grad_norm": 0.5757069226256301, |
| "learning_rate": 1.4769230769230772e-05, |
| "loss": 0.538, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.18682021753039027, |
| "grad_norm": 0.5241911795419738, |
| "learning_rate": 1.4974358974358976e-05, |
| "loss": 0.537, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.18937939859245043, |
| "grad_norm": 0.4755835874750557, |
| "learning_rate": 1.517948717948718e-05, |
| "loss": 0.5015, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.19193857965451055, |
| "grad_norm": 0.5816260566225624, |
| "learning_rate": 1.5384615384615387e-05, |
| "loss": 0.5317, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.1944977607165707, |
| "grad_norm": 0.5324895484169254, |
| "learning_rate": 1.558974358974359e-05, |
| "loss": 0.503, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.19705694177863084, |
| "grad_norm": 0.5873638181887759, |
| "learning_rate": 1.5794871794871795e-05, |
| "loss": 0.5571, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.19961612284069097, |
| "grad_norm": 0.5466165214856151, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.5177, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.20217530390275112, |
| "grad_norm": 0.6209989234883905, |
| "learning_rate": 1.6205128205128207e-05, |
| "loss": 0.5268, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.20473448496481125, |
| "grad_norm": 0.5479973413858492, |
| "learning_rate": 1.641025641025641e-05, |
| "loss": 0.5096, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2072936660268714, |
| "grad_norm": 0.7512485115452843, |
| "learning_rate": 1.6615384615384618e-05, |
| "loss": 0.5398, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.20985284708893154, |
| "grad_norm": 0.5988795561569944, |
| "learning_rate": 1.6820512820512822e-05, |
| "loss": 0.5054, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.2124120281509917, |
| "grad_norm": 0.6349565283068188, |
| "learning_rate": 1.7025641025641026e-05, |
| "loss": 0.5301, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.21497120921305182, |
| "grad_norm": 0.7482046004578073, |
| "learning_rate": 1.7230769230769234e-05, |
| "loss": 0.5451, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.21753039027511195, |
| "grad_norm": 0.5452089019203088, |
| "learning_rate": 1.7435897435897438e-05, |
| "loss": 0.537, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.2200895713371721, |
| "grad_norm": 0.6062443857702727, |
| "learning_rate": 1.7641025641025642e-05, |
| "loss": 0.5057, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.22264875239923224, |
| "grad_norm": 0.6133096727031904, |
| "learning_rate": 1.784615384615385e-05, |
| "loss": 0.549, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.2252079334612924, |
| "grad_norm": 0.6734829746990577, |
| "learning_rate": 1.8051282051282053e-05, |
| "loss": 0.5549, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.22776711452335252, |
| "grad_norm": 0.5692319701895174, |
| "learning_rate": 1.8256410256410257e-05, |
| "loss": 0.5053, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.23032629558541268, |
| "grad_norm": 0.6551305606242741, |
| "learning_rate": 1.8461538461538465e-05, |
| "loss": 0.5451, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2328854766474728, |
| "grad_norm": 0.5869184912875696, |
| "learning_rate": 1.866666666666667e-05, |
| "loss": 0.5573, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.23544465770953296, |
| "grad_norm": 0.6175475389576918, |
| "learning_rate": 1.8871794871794873e-05, |
| "loss": 0.5239, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.2380038387715931, |
| "grad_norm": 0.6923526622405791, |
| "learning_rate": 1.907692307692308e-05, |
| "loss": 0.5178, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.24056301983365322, |
| "grad_norm": 0.6169855947769446, |
| "learning_rate": 1.9282051282051284e-05, |
| "loss": 0.5583, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.24312220089571338, |
| "grad_norm": 0.8905504059133514, |
| "learning_rate": 1.9487179487179488e-05, |
| "loss": 0.5577, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.2456813819577735, |
| "grad_norm": 0.7206999368653747, |
| "learning_rate": 1.9692307692307696e-05, |
| "loss": 0.5248, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.24824056301983366, |
| "grad_norm": 0.5689479892809097, |
| "learning_rate": 1.98974358974359e-05, |
| "loss": 0.5143, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.2507997440818938, |
| "grad_norm": 0.7120200186242683, |
| "learning_rate": 2.0102564102564104e-05, |
| "loss": 0.5363, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.2533589251439539, |
| "grad_norm": 0.5649172258139606, |
| "learning_rate": 2.0307692307692308e-05, |
| "loss": 0.5232, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.2559181062060141, |
| "grad_norm": 0.8530948421377756, |
| "learning_rate": 2.0512820512820512e-05, |
| "loss": 0.5127, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.25847728726807423, |
| "grad_norm": 0.7220421419640349, |
| "learning_rate": 2.0717948717948723e-05, |
| "loss": 0.5472, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.26103646833013433, |
| "grad_norm": 0.7179821332235292, |
| "learning_rate": 2.0923076923076927e-05, |
| "loss": 0.4748, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.2635956493921945, |
| "grad_norm": 0.5969399443763903, |
| "learning_rate": 2.112820512820513e-05, |
| "loss": 0.4869, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.26615483045425464, |
| "grad_norm": 0.7652225150209184, |
| "learning_rate": 2.1333333333333335e-05, |
| "loss": 0.5048, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.2687140115163148, |
| "grad_norm": 0.7026938678210959, |
| "learning_rate": 2.153846153846154e-05, |
| "loss": 0.5351, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.2712731925783749, |
| "grad_norm": 0.6625081293533241, |
| "learning_rate": 2.1743589743589743e-05, |
| "loss": 0.5071, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.27383237364043506, |
| "grad_norm": 0.7338103048357757, |
| "learning_rate": 2.1948717948717954e-05, |
| "loss": 0.5214, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.2763915547024952, |
| "grad_norm": 0.7157427738707126, |
| "learning_rate": 2.2153846153846158e-05, |
| "loss": 0.521, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.27895073576455537, |
| "grad_norm": 0.8391389612996835, |
| "learning_rate": 2.235897435897436e-05, |
| "loss": 0.5114, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.28150991682661547, |
| "grad_norm": 0.6739896722237592, |
| "learning_rate": 2.2564102564102566e-05, |
| "loss": 0.4607, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.2840690978886756, |
| "grad_norm": 0.7325968758566463, |
| "learning_rate": 2.276923076923077e-05, |
| "loss": 0.5114, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.2866282789507358, |
| "grad_norm": 0.9192765458484284, |
| "learning_rate": 2.2974358974358974e-05, |
| "loss": 0.5164, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.2891874600127959, |
| "grad_norm": 0.7638911037686114, |
| "learning_rate": 2.3179487179487184e-05, |
| "loss": 0.5056, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.29174664107485604, |
| "grad_norm": 0.7754803757011083, |
| "learning_rate": 2.338461538461539e-05, |
| "loss": 0.5137, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.2943058221369162, |
| "grad_norm": 0.7313220696083259, |
| "learning_rate": 2.3589743589743593e-05, |
| "loss": 0.5171, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.29686500319897635, |
| "grad_norm": 0.8944240636016003, |
| "learning_rate": 2.3794871794871797e-05, |
| "loss": 0.5631, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.29942418426103645, |
| "grad_norm": 0.827344957741263, |
| "learning_rate": 2.4e-05, |
| "loss": 0.5329, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.3019833653230966, |
| "grad_norm": 0.7763009278418379, |
| "learning_rate": 2.4205128205128205e-05, |
| "loss": 0.5341, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.30454254638515676, |
| "grad_norm": 0.7998736408167985, |
| "learning_rate": 2.4410256410256415e-05, |
| "loss": 0.52, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.30710172744721687, |
| "grad_norm": 0.7411952795822903, |
| "learning_rate": 2.461538461538462e-05, |
| "loss": 0.5418, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.309660908509277, |
| "grad_norm": 0.659770656309478, |
| "learning_rate": 2.4820512820512824e-05, |
| "loss": 0.5195, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.3122200895713372, |
| "grad_norm": 0.8056693118680838, |
| "learning_rate": 2.5025641025641028e-05, |
| "loss": 0.5215, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.31477927063339733, |
| "grad_norm": 0.9631898506281213, |
| "learning_rate": 2.523076923076923e-05, |
| "loss": 0.5283, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.31733845169545744, |
| "grad_norm": 0.7096814914325649, |
| "learning_rate": 2.5435897435897436e-05, |
| "loss": 0.5155, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.3198976327575176, |
| "grad_norm": 1.028582043530853, |
| "learning_rate": 2.5641025641025646e-05, |
| "loss": 0.5394, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.32245681381957775, |
| "grad_norm": 0.7239590324246933, |
| "learning_rate": 2.584615384615385e-05, |
| "loss": 0.5446, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.32501599488163785, |
| "grad_norm": 1.0571455117998556, |
| "learning_rate": 2.6051282051282054e-05, |
| "loss": 0.5335, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.327575175943698, |
| "grad_norm": 1.0256105590142106, |
| "learning_rate": 2.625641025641026e-05, |
| "loss": 0.5418, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.33013435700575816, |
| "grad_norm": 1.0959117099820284, |
| "learning_rate": 2.6461538461538463e-05, |
| "loss": 0.5544, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.3326935380678183, |
| "grad_norm": 0.7463871798931493, |
| "learning_rate": 2.6666666666666667e-05, |
| "loss": 0.4965, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.3352527191298784, |
| "grad_norm": 1.0194762534931083, |
| "learning_rate": 2.687179487179487e-05, |
| "loss": 0.4776, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.3378119001919386, |
| "grad_norm": 0.7119748311745303, |
| "learning_rate": 2.707692307692308e-05, |
| "loss": 0.5203, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.34037108125399873, |
| "grad_norm": 0.920481871489979, |
| "learning_rate": 2.7282051282051285e-05, |
| "loss": 0.5142, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.3429302623160589, |
| "grad_norm": 0.7065977141822832, |
| "learning_rate": 2.748717948717949e-05, |
| "loss": 0.5349, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.345489443378119, |
| "grad_norm": 0.9570189898635619, |
| "learning_rate": 2.7692307692307694e-05, |
| "loss": 0.556, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.34804862444017914, |
| "grad_norm": 0.8774520896998024, |
| "learning_rate": 2.7897435897435898e-05, |
| "loss": 0.5221, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.3506078055022393, |
| "grad_norm": 0.9754340258356095, |
| "learning_rate": 2.81025641025641e-05, |
| "loss": 0.5165, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.3531669865642994, |
| "grad_norm": 0.9512013949257682, |
| "learning_rate": 2.8307692307692312e-05, |
| "loss": 0.5094, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.35572616762635956, |
| "grad_norm": 1.0101279645456138, |
| "learning_rate": 2.8512820512820516e-05, |
| "loss": 0.5134, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.3582853486884197, |
| "grad_norm": 0.9392456947082269, |
| "learning_rate": 2.871794871794872e-05, |
| "loss": 0.5154, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.36084452975047987, |
| "grad_norm": 1.064205953704163, |
| "learning_rate": 2.8923076923076925e-05, |
| "loss": 0.5422, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.36340371081253997, |
| "grad_norm": 0.8361885058587943, |
| "learning_rate": 2.912820512820513e-05, |
| "loss": 0.5045, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.3659628918746001, |
| "grad_norm": 1.0678541131176078, |
| "learning_rate": 2.9333333333333333e-05, |
| "loss": 0.4843, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.3685220729366603, |
| "grad_norm": 0.6800509515379447, |
| "learning_rate": 2.9538461538461543e-05, |
| "loss": 0.5256, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.3710812539987204, |
| "grad_norm": 0.7904486157434544, |
| "learning_rate": 2.9743589743589747e-05, |
| "loss": 0.5225, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.37364043506078054, |
| "grad_norm": 0.816468325578623, |
| "learning_rate": 2.994871794871795e-05, |
| "loss": 0.5389, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.3761996161228407, |
| "grad_norm": 0.7918935996515184, |
| "learning_rate": 3.0153846153846155e-05, |
| "loss": 0.5014, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.37875879718490085, |
| "grad_norm": 0.7555828789735101, |
| "learning_rate": 3.035897435897436e-05, |
| "loss": 0.5236, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.38131797824696095, |
| "grad_norm": 1.0603633188732544, |
| "learning_rate": 3.0564102564102564e-05, |
| "loss": 0.5271, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.3838771593090211, |
| "grad_norm": 0.8796724653002846, |
| "learning_rate": 3.0769230769230774e-05, |
| "loss": 0.5214, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.38643634037108127, |
| "grad_norm": 0.6823417266648101, |
| "learning_rate": 3.097435897435898e-05, |
| "loss": 0.492, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.3889955214331414, |
| "grad_norm": 0.9675008798265416, |
| "learning_rate": 3.117948717948718e-05, |
| "loss": 0.5282, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.3915547024952015, |
| "grad_norm": 1.0136226084949147, |
| "learning_rate": 3.1384615384615386e-05, |
| "loss": 0.5044, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.3941138835572617, |
| "grad_norm": 0.9351502869426284, |
| "learning_rate": 3.158974358974359e-05, |
| "loss": 0.5006, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.39667306461932184, |
| "grad_norm": 0.9882679082998469, |
| "learning_rate": 3.1794871794871795e-05, |
| "loss": 0.5013, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.39923224568138194, |
| "grad_norm": 0.9382011251547424, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 0.4777, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.4017914267434421, |
| "grad_norm": 0.9879747473370469, |
| "learning_rate": 3.220512820512821e-05, |
| "loss": 0.4718, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.40435060780550225, |
| "grad_norm": 0.9016818417869712, |
| "learning_rate": 3.2410256410256413e-05, |
| "loss": 0.5294, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.4069097888675624, |
| "grad_norm": 0.9938632562876675, |
| "learning_rate": 3.261538461538462e-05, |
| "loss": 0.4991, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.4094689699296225, |
| "grad_norm": 1.3410049525059016, |
| "learning_rate": 3.282051282051282e-05, |
| "loss": 0.5095, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.41202815099168266, |
| "grad_norm": 0.8871685101904818, |
| "learning_rate": 3.3025641025641025e-05, |
| "loss": 0.5483, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.4145873320537428, |
| "grad_norm": 1.2268108807413454, |
| "learning_rate": 3.3230769230769236e-05, |
| "loss": 0.5174, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.4171465131158029, |
| "grad_norm": 0.9220772123108049, |
| "learning_rate": 3.343589743589744e-05, |
| "loss": 0.5181, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.4197056941778631, |
| "grad_norm": 1.0658979698672157, |
| "learning_rate": 3.3641025641025644e-05, |
| "loss": 0.539, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.42226487523992323, |
| "grad_norm": 0.97733215203732, |
| "learning_rate": 3.384615384615385e-05, |
| "loss": 0.5191, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.4248240563019834, |
| "grad_norm": 1.1290947988408833, |
| "learning_rate": 3.405128205128205e-05, |
| "loss": 0.5169, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.4273832373640435, |
| "grad_norm": 0.8361689719032068, |
| "learning_rate": 3.4256410256410256e-05, |
| "loss": 0.5114, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.42994241842610365, |
| "grad_norm": 0.9928527289292229, |
| "learning_rate": 3.446153846153847e-05, |
| "loss": 0.5123, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.4325015994881638, |
| "grad_norm": 0.7704183020179163, |
| "learning_rate": 3.466666666666667e-05, |
| "loss": 0.4828, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.4350607805502239, |
| "grad_norm": 0.9283903900385092, |
| "learning_rate": 3.4871794871794875e-05, |
| "loss": 0.5226, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.43761996161228406, |
| "grad_norm": 0.925519484340918, |
| "learning_rate": 3.507692307692308e-05, |
| "loss": 0.4956, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.4401791426743442, |
| "grad_norm": 0.9993231714335514, |
| "learning_rate": 3.5282051282051283e-05, |
| "loss": 0.5355, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.44273832373640437, |
| "grad_norm": 1.311826250820302, |
| "learning_rate": 3.548717948717949e-05, |
| "loss": 0.5237, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.44529750479846447, |
| "grad_norm": 0.9273485381049265, |
| "learning_rate": 3.56923076923077e-05, |
| "loss": 0.5131, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.44785668586052463, |
| "grad_norm": 1.248607874192031, |
| "learning_rate": 3.58974358974359e-05, |
| "loss": 0.4972, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.4504158669225848, |
| "grad_norm": 0.8222900953155937, |
| "learning_rate": 3.6102564102564106e-05, |
| "loss": 0.5364, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.45297504798464494, |
| "grad_norm": 1.123693838028357, |
| "learning_rate": 3.630769230769231e-05, |
| "loss": 0.5219, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.45553422904670504, |
| "grad_norm": 0.7807365104513995, |
| "learning_rate": 3.6512820512820514e-05, |
| "loss": 0.5182, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.4580934101087652, |
| "grad_norm": 0.9231872939650723, |
| "learning_rate": 3.671794871794872e-05, |
| "loss": 0.5235, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.46065259117082535, |
| "grad_norm": 0.8053927537642723, |
| "learning_rate": 3.692307692307693e-05, |
| "loss": 0.5202, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.46321177223288545, |
| "grad_norm": 1.1551405361338565, |
| "learning_rate": 3.712820512820513e-05, |
| "loss": 0.5173, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.4657709532949456, |
| "grad_norm": 0.9439986501141405, |
| "learning_rate": 3.733333333333334e-05, |
| "loss": 0.5232, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.46833013435700577, |
| "grad_norm": 1.106193836601767, |
| "learning_rate": 3.753846153846154e-05, |
| "loss": 0.5533, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.4708893154190659, |
| "grad_norm": 0.794287368963475, |
| "learning_rate": 3.7743589743589745e-05, |
| "loss": 0.4762, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.473448496481126, |
| "grad_norm": 0.8611752707863242, |
| "learning_rate": 3.794871794871795e-05, |
| "loss": 0.5274, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.4760076775431862, |
| "grad_norm": 0.8817686680939495, |
| "learning_rate": 3.815384615384616e-05, |
| "loss": 0.5188, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.47856685860524634, |
| "grad_norm": 0.8845212101910219, |
| "learning_rate": 3.8358974358974364e-05, |
| "loss": 0.5218, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.48112603966730644, |
| "grad_norm": 0.663678181823649, |
| "learning_rate": 3.856410256410257e-05, |
| "loss": 0.5227, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.4836852207293666, |
| "grad_norm": 0.755313719038617, |
| "learning_rate": 3.876923076923077e-05, |
| "loss": 0.5426, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.48624440179142675, |
| "grad_norm": 0.68323576325022, |
| "learning_rate": 3.8974358974358976e-05, |
| "loss": 0.5254, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.4888035828534869, |
| "grad_norm": 0.7623898782087327, |
| "learning_rate": 3.917948717948718e-05, |
| "loss": 0.5011, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.491362763915547, |
| "grad_norm": 0.6832707042582635, |
| "learning_rate": 3.938461538461539e-05, |
| "loss": 0.5318, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.49392194497760716, |
| "grad_norm": 0.8383249840589115, |
| "learning_rate": 3.9589743589743595e-05, |
| "loss": 0.5244, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.4964811260396673, |
| "grad_norm": 0.6885029349814699, |
| "learning_rate": 3.97948717948718e-05, |
| "loss": 0.5068, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.4990403071017274, |
| "grad_norm": 0.9336320586686789, |
| "learning_rate": 4e-05, |
| "loss": 0.5013, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.5015994881637876, |
| "grad_norm": 1.1273619177371816, |
| "learning_rate": 3.999996795609852e-05, |
| "loss": 0.4924, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.5041586692258477, |
| "grad_norm": 0.8484780646183507, |
| "learning_rate": 3.9999871824496765e-05, |
| "loss": 0.4774, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.5067178502879078, |
| "grad_norm": 0.7566071063905553, |
| "learning_rate": 3.999971160550277e-05, |
| "loss": 0.5328, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.509277031349968, |
| "grad_norm": 0.963017285802582, |
| "learning_rate": 3.999948729962994e-05, |
| "loss": 0.535, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.5118362124120281, |
| "grad_norm": 0.6417865549660366, |
| "learning_rate": 3.9999198907597046e-05, |
| "loss": 0.502, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5143953934740882, |
| "grad_norm": 0.8877386892780115, |
| "learning_rate": 3.999884643032821e-05, |
| "loss": 0.513, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.5169545745361485, |
| "grad_norm": 0.7952767722764698, |
| "learning_rate": 3.999842986895289e-05, |
| "loss": 0.5121, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.5195137555982086, |
| "grad_norm": 0.7352919644974529, |
| "learning_rate": 3.999794922480593e-05, |
| "loss": 0.4881, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.5220729366602687, |
| "grad_norm": 0.7101035899255743, |
| "learning_rate": 3.9997404499427494e-05, |
| "loss": 0.4941, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.5246321177223289, |
| "grad_norm": 0.7457517555879764, |
| "learning_rate": 3.9996795694563096e-05, |
| "loss": 0.5128, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.527191298784389, |
| "grad_norm": 0.6573802709079243, |
| "learning_rate": 3.999612281216358e-05, |
| "loss": 0.4949, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.5297504798464492, |
| "grad_norm": 0.7445707355838409, |
| "learning_rate": 3.9995385854385124e-05, |
| "loss": 0.5036, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.5323096609085093, |
| "grad_norm": 0.7713581724495707, |
| "learning_rate": 3.999458482358924e-05, |
| "loss": 0.4985, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.5348688419705694, |
| "grad_norm": 0.7752015257631751, |
| "learning_rate": 3.9993719722342726e-05, |
| "loss": 0.508, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.5374280230326296, |
| "grad_norm": 1.133508847111212, |
| "learning_rate": 3.999279055341771e-05, |
| "loss": 0.5261, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.5399872040946897, |
| "grad_norm": 0.7726822279135389, |
| "learning_rate": 3.999179731979162e-05, |
| "loss": 0.4672, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.5425463851567498, |
| "grad_norm": 0.7766807053995074, |
| "learning_rate": 3.9990740024647154e-05, |
| "loss": 0.4984, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.54510556621881, |
| "grad_norm": 0.5303932042183875, |
| "learning_rate": 3.9989618671372304e-05, |
| "loss": 0.5129, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.5476647472808701, |
| "grad_norm": 0.6026485573392468, |
| "learning_rate": 3.998843326356032e-05, |
| "loss": 0.5127, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.5502239283429302, |
| "grad_norm": 0.6118913537653141, |
| "learning_rate": 3.998718380500971e-05, |
| "loss": 0.458, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.5527831094049904, |
| "grad_norm": 0.6445812096951002, |
| "learning_rate": 3.998587029972423e-05, |
| "loss": 0.495, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.5553422904670505, |
| "grad_norm": 0.7741176770417552, |
| "learning_rate": 3.998449275191286e-05, |
| "loss": 0.5095, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.5579014715291107, |
| "grad_norm": 0.6972261092630921, |
| "learning_rate": 3.9983051165989814e-05, |
| "loss": 0.4871, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.5604606525911708, |
| "grad_norm": 0.6664826347703592, |
| "learning_rate": 3.998154554657448e-05, |
| "loss": 0.5137, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.5630198336532309, |
| "grad_norm": 0.6334253599100873, |
| "learning_rate": 3.997997589849145e-05, |
| "loss": 0.5494, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.5655790147152912, |
| "grad_norm": 0.7145250281069672, |
| "learning_rate": 3.99783422267705e-05, |
| "loss": 0.5219, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.5681381957773513, |
| "grad_norm": 0.8804491997042745, |
| "learning_rate": 3.997664453664654e-05, |
| "loss": 0.5305, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.5706973768394114, |
| "grad_norm": 0.5941374342473514, |
| "learning_rate": 3.9974882833559634e-05, |
| "loss": 0.492, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.5732565579014716, |
| "grad_norm": 0.8541812515528515, |
| "learning_rate": 3.997305712315497e-05, |
| "loss": 0.4994, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.5758157389635317, |
| "grad_norm": 0.8858077653575287, |
| "learning_rate": 3.9971167411282835e-05, |
| "loss": 0.5268, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.5783749200255918, |
| "grad_norm": 1.0807623477992674, |
| "learning_rate": 3.9969213703998606e-05, |
| "loss": 0.5047, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.580934101087652, |
| "grad_norm": 0.8213345788017161, |
| "learning_rate": 3.9967196007562725e-05, |
| "loss": 0.5302, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.5834932821497121, |
| "grad_norm": 0.6055767495165976, |
| "learning_rate": 3.996511432844067e-05, |
| "loss": 0.4833, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.5860524632117722, |
| "grad_norm": 0.9265228707425465, |
| "learning_rate": 3.996296867330296e-05, |
| "loss": 0.5146, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.5886116442738324, |
| "grad_norm": 1.228465580948079, |
| "learning_rate": 3.99607590490251e-05, |
| "loss": 0.474, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.5911708253358925, |
| "grad_norm": 0.8130275473999699, |
| "learning_rate": 3.9958485462687606e-05, |
| "loss": 0.4767, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.5937300063979527, |
| "grad_norm": 0.780191780172052, |
| "learning_rate": 3.995614792157592e-05, |
| "loss": 0.5037, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.5962891874600128, |
| "grad_norm": 0.9566872727407667, |
| "learning_rate": 3.995374643318045e-05, |
| "loss": 0.5152, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.5988483685220729, |
| "grad_norm": 1.1646368305147163, |
| "learning_rate": 3.9951281005196486e-05, |
| "loss": 0.5329, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.6014075495841331, |
| "grad_norm": 0.7216132566248876, |
| "learning_rate": 3.9948751645524235e-05, |
| "loss": 0.5285, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.6039667306461932, |
| "grad_norm": 1.0575511476108006, |
| "learning_rate": 3.994615836226874e-05, |
| "loss": 0.5364, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.6065259117082533, |
| "grad_norm": 0.9622069565170775, |
| "learning_rate": 3.994350116373991e-05, |
| "loss": 0.5067, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.6090850927703135, |
| "grad_norm": 0.7168012454794431, |
| "learning_rate": 3.9940780058452416e-05, |
| "loss": 0.5012, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.6116442738323736, |
| "grad_norm": 0.8302983155644609, |
| "learning_rate": 3.9937995055125774e-05, |
| "loss": 0.5282, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.6142034548944337, |
| "grad_norm": 0.7497453674792344, |
| "learning_rate": 3.9935146162684206e-05, |
| "loss": 0.4992, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.6167626359564939, |
| "grad_norm": 0.8217939024893259, |
| "learning_rate": 3.993223339025667e-05, |
| "loss": 0.4895, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.619321817018554, |
| "grad_norm": 0.7219771415337329, |
| "learning_rate": 3.992925674717683e-05, |
| "loss": 0.4636, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.6218809980806143, |
| "grad_norm": 0.9341076990511636, |
| "learning_rate": 3.9926216242983017e-05, |
| "loss": 0.5008, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.6244401791426744, |
| "grad_norm": 0.7304570272364406, |
| "learning_rate": 3.9923111887418185e-05, |
| "loss": 0.4921, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.6269993602047345, |
| "grad_norm": 1.0471475376229655, |
| "learning_rate": 3.9919943690429906e-05, |
| "loss": 0.4768, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.6295585412667947, |
| "grad_norm": 0.693079198064159, |
| "learning_rate": 3.991671166217031e-05, |
| "loss": 0.4786, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.6321177223288548, |
| "grad_norm": 1.1561745746369312, |
| "learning_rate": 3.991341581299609e-05, |
| "loss": 0.5182, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.6346769033909149, |
| "grad_norm": 0.8741051048237501, |
| "learning_rate": 3.991005615346843e-05, |
| "loss": 0.5024, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.6372360844529751, |
| "grad_norm": 0.9086658125226366, |
| "learning_rate": 3.990663269435298e-05, |
| "loss": 0.4974, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.6397952655150352, |
| "grad_norm": 0.9761201164077572, |
| "learning_rate": 3.9903145446619837e-05, |
| "loss": 0.5112, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.6423544465770953, |
| "grad_norm": 0.8136560445992532, |
| "learning_rate": 3.989959442144352e-05, |
| "loss": 0.4971, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.6449136276391555, |
| "grad_norm": 0.6200675715165651, |
| "learning_rate": 3.989597963020289e-05, |
| "loss": 0.506, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.6474728087012156, |
| "grad_norm": 0.9727978732394552, |
| "learning_rate": 3.989230108448115e-05, |
| "loss": 0.5132, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.6500319897632757, |
| "grad_norm": 0.7588308378509555, |
| "learning_rate": 3.9888558796065784e-05, |
| "loss": 0.4948, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.6525911708253359, |
| "grad_norm": 0.8984908598875354, |
| "learning_rate": 3.9884752776948564e-05, |
| "loss": 0.4912, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.655150351887396, |
| "grad_norm": 0.9180268932937014, |
| "learning_rate": 3.988088303932545e-05, |
| "loss": 0.5058, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.6577095329494562, |
| "grad_norm": 0.9874072428401991, |
| "learning_rate": 3.987694959559658e-05, |
| "loss": 0.5265, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.6602687140115163, |
| "grad_norm": 0.7774534286927767, |
| "learning_rate": 3.9872952458366267e-05, |
| "loss": 0.5116, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.6628278950735764, |
| "grad_norm": 0.8189986183875947, |
| "learning_rate": 3.9868891640442874e-05, |
| "loss": 0.507, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.6653870761356366, |
| "grad_norm": 0.6489509247329256, |
| "learning_rate": 3.9864767154838864e-05, |
| "loss": 0.5111, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.6679462571976967, |
| "grad_norm": 0.8761101947097708, |
| "learning_rate": 3.986057901477069e-05, |
| "loss": 0.5127, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.6705054382597568, |
| "grad_norm": 0.9687654727768278, |
| "learning_rate": 3.985632723365878e-05, |
| "loss": 0.547, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.673064619321817, |
| "grad_norm": 0.7867771900738217, |
| "learning_rate": 3.985201182512752e-05, |
| "loss": 0.516, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.6756238003838771, |
| "grad_norm": 0.6453678386295493, |
| "learning_rate": 3.984763280300514e-05, |
| "loss": 0.481, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.6781829814459372, |
| "grad_norm": 0.7765079788191963, |
| "learning_rate": 3.9843190181323744e-05, |
| "loss": 0.4913, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.6807421625079975, |
| "grad_norm": 0.6373936761246601, |
| "learning_rate": 3.983868397431923e-05, |
| "loss": 0.5133, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.6833013435700576, |
| "grad_norm": 0.730921099373597, |
| "learning_rate": 3.983411419643125e-05, |
| "loss": 0.5339, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.6858605246321178, |
| "grad_norm": 0.8152333844378884, |
| "learning_rate": 3.982948086230312e-05, |
| "loss": 0.4865, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.6884197056941779, |
| "grad_norm": 0.7100466271823358, |
| "learning_rate": 3.9824783986781897e-05, |
| "loss": 0.49, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.690978886756238, |
| "grad_norm": 0.8934016406293627, |
| "learning_rate": 3.982002358491817e-05, |
| "loss": 0.5208, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.6935380678182982, |
| "grad_norm": 0.6756901306503084, |
| "learning_rate": 3.981519967196614e-05, |
| "loss": 0.5191, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.6960972488803583, |
| "grad_norm": 0.8009942346681957, |
| "learning_rate": 3.98103122633835e-05, |
| "loss": 0.5067, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.6986564299424184, |
| "grad_norm": 0.6923671981740297, |
| "learning_rate": 3.980536137483141e-05, |
| "loss": 0.4868, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.7012156110044786, |
| "grad_norm": 0.9566550041102404, |
| "learning_rate": 3.980034702217445e-05, |
| "loss": 0.5398, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.7037747920665387, |
| "grad_norm": 0.8440959667430001, |
| "learning_rate": 3.979526922148058e-05, |
| "loss": 0.4658, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.7063339731285988, |
| "grad_norm": 0.8588196985974998, |
| "learning_rate": 3.9790127989021024e-05, |
| "loss": 0.5052, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.708893154190659, |
| "grad_norm": 0.7562049758522937, |
| "learning_rate": 3.978492334127032e-05, |
| "loss": 0.5267, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.7114523352527191, |
| "grad_norm": 0.632945819901848, |
| "learning_rate": 3.977965529490618e-05, |
| "loss": 0.4618, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.7140115163147792, |
| "grad_norm": 0.845801145881355, |
| "learning_rate": 3.9774323866809485e-05, |
| "loss": 0.4683, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.7165706973768394, |
| "grad_norm": 0.7371708961015324, |
| "learning_rate": 3.9768929074064206e-05, |
| "loss": 0.5364, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.7191298784388995, |
| "grad_norm": 0.7371186918676583, |
| "learning_rate": 3.976347093395736e-05, |
| "loss": 0.5061, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.7216890595009597, |
| "grad_norm": 0.7032178872579914, |
| "learning_rate": 3.9757949463978975e-05, |
| "loss": 0.5242, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.7242482405630198, |
| "grad_norm": 0.7617726502548777, |
| "learning_rate": 3.9752364681821973e-05, |
| "loss": 0.4888, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.7268074216250799, |
| "grad_norm": 0.7903937882632156, |
| "learning_rate": 3.9746716605382186e-05, |
| "loss": 0.5203, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.7293666026871402, |
| "grad_norm": 0.7645749463474476, |
| "learning_rate": 3.9741005252758255e-05, |
| "loss": 0.5116, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.7319257837492003, |
| "grad_norm": 0.6220992771519867, |
| "learning_rate": 3.973523064225159e-05, |
| "loss": 0.4671, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.7344849648112604, |
| "grad_norm": 0.5115715364977104, |
| "learning_rate": 3.972939279236627e-05, |
| "loss": 0.4565, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.7370441458733206, |
| "grad_norm": 0.7021026496208411, |
| "learning_rate": 3.9723491721809076e-05, |
| "loss": 0.5379, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.7396033269353807, |
| "grad_norm": 0.5201603136030488, |
| "learning_rate": 3.971752744948932e-05, |
| "loss": 0.4692, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.7421625079974408, |
| "grad_norm": 0.8208102312039668, |
| "learning_rate": 3.971149999451886e-05, |
| "loss": 0.4882, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.744721689059501, |
| "grad_norm": 1.119540735969476, |
| "learning_rate": 3.970540937621201e-05, |
| "loss": 0.5097, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.7472808701215611, |
| "grad_norm": 0.7635743214902218, |
| "learning_rate": 3.9699255614085495e-05, |
| "loss": 0.5101, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.7498400511836213, |
| "grad_norm": 0.8126931797929314, |
| "learning_rate": 3.969303872785837e-05, |
| "loss": 0.4889, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.7523992322456814, |
| "grad_norm": 0.7624486728902423, |
| "learning_rate": 3.9686758737451955e-05, |
| "loss": 0.4966, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.7549584133077415, |
| "grad_norm": 0.5229197667386186, |
| "learning_rate": 3.9680415662989806e-05, |
| "loss": 0.4886, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.7575175943698017, |
| "grad_norm": 0.6766454901060076, |
| "learning_rate": 3.967400952479759e-05, |
| "loss": 0.4661, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.7600767754318618, |
| "grad_norm": 0.6030935943397303, |
| "learning_rate": 3.966754034340308e-05, |
| "loss": 0.5526, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.7626359564939219, |
| "grad_norm": 0.688481808856117, |
| "learning_rate": 3.966100813953607e-05, |
| "loss": 0.5191, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.7651951375559821, |
| "grad_norm": 0.8026340827764397, |
| "learning_rate": 3.965441293412827e-05, |
| "loss": 0.4875, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.7677543186180422, |
| "grad_norm": 0.5877161986618623, |
| "learning_rate": 3.9647754748313294e-05, |
| "loss": 0.5581, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.7703134996801023, |
| "grad_norm": 0.7230843693397704, |
| "learning_rate": 3.964103360342658e-05, |
| "loss": 0.4941, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.7728726807421625, |
| "grad_norm": 0.6543565273084903, |
| "learning_rate": 3.963424952100529e-05, |
| "loss": 0.4749, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.7754318618042226, |
| "grad_norm": 0.6083305140129942, |
| "learning_rate": 3.962740252278827e-05, |
| "loss": 0.506, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.7779910428662828, |
| "grad_norm": 0.6712573537812702, |
| "learning_rate": 3.962049263071598e-05, |
| "loss": 0.4752, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.780550223928343, |
| "grad_norm": 0.8280090720406486, |
| "learning_rate": 3.96135198669304e-05, |
| "loss": 0.4891, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.783109404990403, |
| "grad_norm": 0.6598880067981137, |
| "learning_rate": 3.960648425377499e-05, |
| "loss": 0.4947, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.7856685860524633, |
| "grad_norm": 0.7265475529158774, |
| "learning_rate": 3.95993858137946e-05, |
| "loss": 0.4989, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.7882277671145234, |
| "grad_norm": 0.5888767351942641, |
| "learning_rate": 3.959222456973541e-05, |
| "loss": 0.4829, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.7907869481765835, |
| "grad_norm": 0.6375937306972569, |
| "learning_rate": 3.958500054454482e-05, |
| "loss": 0.4574, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.7933461292386437, |
| "grad_norm": 0.6933840977854485, |
| "learning_rate": 3.957771376137144e-05, |
| "loss": 0.5059, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.7959053103007038, |
| "grad_norm": 0.6896810504909161, |
| "learning_rate": 3.9570364243564966e-05, |
| "loss": 0.4992, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.7984644913627639, |
| "grad_norm": 0.6468420638732157, |
| "learning_rate": 3.9562952014676116e-05, |
| "loss": 0.496, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.8010236724248241, |
| "grad_norm": 0.6274952773967104, |
| "learning_rate": 3.955547709845656e-05, |
| "loss": 0.4874, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.8035828534868842, |
| "grad_norm": 0.5653574700934775, |
| "learning_rate": 3.9547939518858856e-05, |
| "loss": 0.5067, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.8061420345489443, |
| "grad_norm": 0.6550905511901227, |
| "learning_rate": 3.954033930003634e-05, |
| "loss": 0.5212, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.8087012156110045, |
| "grad_norm": 0.7494837380638635, |
| "learning_rate": 3.953267646634309e-05, |
| "loss": 0.505, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.8112603966730646, |
| "grad_norm": 0.635770951888326, |
| "learning_rate": 3.95249510423338e-05, |
| "loss": 0.4967, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.8138195777351248, |
| "grad_norm": 0.7410641008583149, |
| "learning_rate": 3.9517163052763756e-05, |
| "loss": 0.4773, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.8163787587971849, |
| "grad_norm": 0.7149351574867076, |
| "learning_rate": 3.9509312522588704e-05, |
| "loss": 0.4709, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.818937939859245, |
| "grad_norm": 0.823519098221886, |
| "learning_rate": 3.9501399476964806e-05, |
| "loss": 0.4867, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.8214971209213052, |
| "grad_norm": 0.7163722784021596, |
| "learning_rate": 3.9493423941248564e-05, |
| "loss": 0.507, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.8240563019833653, |
| "grad_norm": 0.5792885526249222, |
| "learning_rate": 3.948538594099668e-05, |
| "loss": 0.4863, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.8266154830454254, |
| "grad_norm": 0.8674821812665616, |
| "learning_rate": 3.9477285501966064e-05, |
| "loss": 0.4497, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.8291746641074856, |
| "grad_norm": 0.6695505549743577, |
| "learning_rate": 3.946912265011368e-05, |
| "loss": 0.4853, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.8317338451695457, |
| "grad_norm": 0.6407618610078097, |
| "learning_rate": 3.946089741159648e-05, |
| "loss": 0.4742, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.8342930262316058, |
| "grad_norm": 0.8505071134562596, |
| "learning_rate": 3.9452609812771346e-05, |
| "loss": 0.5346, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.836852207293666, |
| "grad_norm": 0.5413673046728109, |
| "learning_rate": 3.944425988019498e-05, |
| "loss": 0.4677, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.8394113883557262, |
| "grad_norm": 0.9828950428531091, |
| "learning_rate": 3.9435847640623806e-05, |
| "loss": 0.4808, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.8419705694177864, |
| "grad_norm": 0.6480252890065408, |
| "learning_rate": 3.942737312101394e-05, |
| "loss": 0.5019, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.8445297504798465, |
| "grad_norm": 0.9281412616435286, |
| "learning_rate": 3.9418836348521045e-05, |
| "loss": 0.5069, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.8470889315419066, |
| "grad_norm": 0.6511602292237915, |
| "learning_rate": 3.941023735050027e-05, |
| "loss": 0.5135, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.8496481126039668, |
| "grad_norm": 0.649990025588154, |
| "learning_rate": 3.9401576154506155e-05, |
| "loss": 0.4721, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.8522072936660269, |
| "grad_norm": 0.8525830113834602, |
| "learning_rate": 3.9392852788292556e-05, |
| "loss": 0.4747, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.854766474728087, |
| "grad_norm": 0.8122595340814978, |
| "learning_rate": 3.938406727981254e-05, |
| "loss": 0.5036, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.8573256557901472, |
| "grad_norm": 0.6813807690997764, |
| "learning_rate": 3.937521965721831e-05, |
| "loss": 0.4778, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.8598848368522073, |
| "grad_norm": 0.742372369654133, |
| "learning_rate": 3.936630994886109e-05, |
| "loss": 0.4912, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.8624440179142674, |
| "grad_norm": 0.6932498968117697, |
| "learning_rate": 3.9357338183291066e-05, |
| "loss": 0.5033, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.8650031989763276, |
| "grad_norm": 0.7002201428035697, |
| "learning_rate": 3.934830438925728e-05, |
| "loss": 0.4843, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.8675623800383877, |
| "grad_norm": 0.6143454063707157, |
| "learning_rate": 3.933920859570753e-05, |
| "loss": 0.4959, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.8701215611004478, |
| "grad_norm": 0.5609771595796579, |
| "learning_rate": 3.933005083178828e-05, |
| "loss": 0.4778, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.872680742162508, |
| "grad_norm": 0.5872526379907818, |
| "learning_rate": 3.932083112684459e-05, |
| "loss": 0.4736, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.8752399232245681, |
| "grad_norm": 0.5259898431650297, |
| "learning_rate": 3.931154951041998e-05, |
| "loss": 0.5061, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.8777991042866283, |
| "grad_norm": 0.6462230152484912, |
| "learning_rate": 3.930220601225638e-05, |
| "loss": 0.503, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.8803582853486884, |
| "grad_norm": 0.6210066174968442, |
| "learning_rate": 3.9292800662294e-05, |
| "loss": 0.4592, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.8829174664107485, |
| "grad_norm": 0.7033811879806838, |
| "learning_rate": 3.928333349067125e-05, |
| "loss": 0.4839, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.8854766474728087, |
| "grad_norm": 0.7112080589811673, |
| "learning_rate": 3.927380452772464e-05, |
| "loss": 0.4833, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.8880358285348688, |
| "grad_norm": 0.5763115473730898, |
| "learning_rate": 3.926421380398869e-05, |
| "loss": 0.5128, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.8905950095969289, |
| "grad_norm": 0.493333488186968, |
| "learning_rate": 3.925456135019582e-05, |
| "loss": 0.4777, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.8931541906589892, |
| "grad_norm": 0.5969713695047262, |
| "learning_rate": 3.924484719727625e-05, |
| "loss": 0.5548, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.8957133717210493, |
| "grad_norm": 0.5743073508713653, |
| "learning_rate": 3.923507137635792e-05, |
| "loss": 0.4993, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.8982725527831094, |
| "grad_norm": 0.5816084208432016, |
| "learning_rate": 3.922523391876638e-05, |
| "loss": 0.4974, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.9008317338451696, |
| "grad_norm": 0.6913456504659746, |
| "learning_rate": 3.921533485602467e-05, |
| "loss": 0.5038, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.9033909149072297, |
| "grad_norm": 0.5710576834895075, |
| "learning_rate": 3.920537421985327e-05, |
| "loss": 0.469, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.9059500959692899, |
| "grad_norm": 0.831878724155053, |
| "learning_rate": 3.9195352042169924e-05, |
| "loss": 0.5178, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.90850927703135, |
| "grad_norm": 0.5623208840830399, |
| "learning_rate": 3.9185268355089606e-05, |
| "loss": 0.4892, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.9110684580934101, |
| "grad_norm": 0.8167633776332197, |
| "learning_rate": 3.9175123190924384e-05, |
| "loss": 0.5193, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.9136276391554703, |
| "grad_norm": 0.5899572202270219, |
| "learning_rate": 3.916491658218333e-05, |
| "loss": 0.4739, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.9161868202175304, |
| "grad_norm": 0.7206391505617225, |
| "learning_rate": 3.9154648561572386e-05, |
| "loss": 0.4752, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.9187460012795905, |
| "grad_norm": 0.7210244727385913, |
| "learning_rate": 3.91443191619943e-05, |
| "loss": 0.4789, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.9213051823416507, |
| "grad_norm": 0.7320868272015927, |
| "learning_rate": 3.913392841654851e-05, |
| "loss": 0.503, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.9238643634037108, |
| "grad_norm": 0.8003299130587506, |
| "learning_rate": 3.9123476358531e-05, |
| "loss": 0.4917, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.9264235444657709, |
| "grad_norm": 0.7467581757493919, |
| "learning_rate": 3.911296302143426e-05, |
| "loss": 0.4973, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.9289827255278311, |
| "grad_norm": 0.695841862059534, |
| "learning_rate": 3.9102388438947104e-05, |
| "loss": 0.5014, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.9315419065898912, |
| "grad_norm": 0.8452597656759123, |
| "learning_rate": 3.909175264495464e-05, |
| "loss": 0.4528, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.9341010876519513, |
| "grad_norm": 0.8328804446765739, |
| "learning_rate": 3.9081055673538093e-05, |
| "loss": 0.5014, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.9366602687140115, |
| "grad_norm": 0.8976120702595518, |
| "learning_rate": 3.907029755897473e-05, |
| "loss": 0.4767, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.9392194497760716, |
| "grad_norm": 0.6729987700598375, |
| "learning_rate": 3.905947833573775e-05, |
| "loss": 0.4758, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.9417786308381318, |
| "grad_norm": 0.965419537056689, |
| "learning_rate": 3.904859803849617e-05, |
| "loss": 0.4952, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.944337811900192, |
| "grad_norm": 0.8355270729808283, |
| "learning_rate": 3.903765670211469e-05, |
| "loss": 0.5048, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.946896992962252, |
| "grad_norm": 0.7603742900226873, |
| "learning_rate": 3.902665436165364e-05, |
| "loss": 0.491, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.9494561740243123, |
| "grad_norm": 0.7054418825885687, |
| "learning_rate": 3.901559105236881e-05, |
| "loss": 0.4654, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.9520153550863724, |
| "grad_norm": 0.6840867683040738, |
| "learning_rate": 3.9004466809711343e-05, |
| "loss": 0.4789, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.9545745361484325, |
| "grad_norm": 0.7107919372914869, |
| "learning_rate": 3.8993281669327664e-05, |
| "loss": 0.5041, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.9571337172104927, |
| "grad_norm": 0.5705893437651786, |
| "learning_rate": 3.8982035667059327e-05, |
| "loss": 0.4724, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.9596928982725528, |
| "grad_norm": 0.6398479886368891, |
| "learning_rate": 3.897072883894291e-05, |
| "loss": 0.512, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.9622520793346129, |
| "grad_norm": 0.6496978842533705, |
| "learning_rate": 3.895936122120991e-05, |
| "loss": 0.4998, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.9648112603966731, |
| "grad_norm": 0.6636946077689827, |
| "learning_rate": 3.8947932850286585e-05, |
| "loss": 0.5105, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.9673704414587332, |
| "grad_norm": 0.8146680744120419, |
| "learning_rate": 3.893644376279392e-05, |
| "loss": 0.5081, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.9699296225207934, |
| "grad_norm": 0.6043005275708316, |
| "learning_rate": 3.8924893995547427e-05, |
| "loss": 0.465, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.9724888035828535, |
| "grad_norm": 0.7737447625323293, |
| "learning_rate": 3.8913283585557054e-05, |
| "loss": 0.4745, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.9750479846449136, |
| "grad_norm": 0.6584366000203595, |
| "learning_rate": 3.89016125700271e-05, |
| "loss": 0.4646, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.9776071657069738, |
| "grad_norm": 0.7267670466528602, |
| "learning_rate": 3.888988098635604e-05, |
| "loss": 0.5443, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.9801663467690339, |
| "grad_norm": 0.8281518075048059, |
| "learning_rate": 3.8878088872136446e-05, |
| "loss": 0.5175, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.982725527831094, |
| "grad_norm": 0.7524480266101858, |
| "learning_rate": 3.8866236265154864e-05, |
| "loss": 0.4752, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.9852847088931542, |
| "grad_norm": 0.8364410578252669, |
| "learning_rate": 3.885432320339167e-05, |
| "loss": 0.4752, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.9878438899552143, |
| "grad_norm": 0.684899934501505, |
| "learning_rate": 3.884234972502095e-05, |
| "loss": 0.4931, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.9904030710172744, |
| "grad_norm": 0.6158115167694288, |
| "learning_rate": 3.88303158684104e-05, |
| "loss": 0.4861, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.9929622520793346, |
| "grad_norm": 0.7078911086493701, |
| "learning_rate": 3.8818221672121204e-05, |
| "loss": 0.4921, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.9955214331413947, |
| "grad_norm": 0.5372294629344029, |
| "learning_rate": 3.8806067174907876e-05, |
| "loss": 0.513, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.9980806142034548, |
| "grad_norm": 0.7744974417312304, |
| "learning_rate": 3.879385241571817e-05, |
| "loss": 0.4669, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.000639795265515, |
| "grad_norm": 0.649847374858299, |
| "learning_rate": 3.878157743369294e-05, |
| "loss": 0.6153, |
| "step": 391 |
| }, |
| { |
| "epoch": 1.0031989763275753, |
| "grad_norm": 0.7557922196471385, |
| "learning_rate": 3.876924226816602e-05, |
| "loss": 0.3771, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.0057581573896353, |
| "grad_norm": 0.720395789720446, |
| "learning_rate": 3.875684695866409e-05, |
| "loss": 0.4498, |
| "step": 393 |
| }, |
| { |
| "epoch": 1.0083173384516955, |
| "grad_norm": 0.6684707254214445, |
| "learning_rate": 3.874439154490656e-05, |
| "loss": 0.4581, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.0108765195137557, |
| "grad_norm": 0.6845617164901984, |
| "learning_rate": 3.873187606680543e-05, |
| "loss": 0.4382, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.0134357005758157, |
| "grad_norm": 0.6687231254947439, |
| "learning_rate": 3.871930056446518e-05, |
| "loss": 0.3945, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.0159948816378759, |
| "grad_norm": 0.8306778742589912, |
| "learning_rate": 3.870666507818262e-05, |
| "loss": 0.4194, |
| "step": 397 |
| }, |
| { |
| "epoch": 1.018554062699936, |
| "grad_norm": 0.7593090405257436, |
| "learning_rate": 3.869396964844679e-05, |
| "loss": 0.4378, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.021113243761996, |
| "grad_norm": 0.545915139395705, |
| "learning_rate": 3.8681214315938786e-05, |
| "loss": 0.4009, |
| "step": 399 |
| }, |
| { |
| "epoch": 1.0236724248240563, |
| "grad_norm": 0.8728350624043225, |
| "learning_rate": 3.866839912153168e-05, |
| "loss": 0.4239, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.0262316058861165, |
| "grad_norm": 0.7685743303640589, |
| "learning_rate": 3.8655524106290345e-05, |
| "loss": 0.4433, |
| "step": 401 |
| }, |
| { |
| "epoch": 1.0287907869481765, |
| "grad_norm": 0.6436857445045538, |
| "learning_rate": 3.864258931147136e-05, |
| "loss": 0.4135, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.0313499680102367, |
| "grad_norm": 0.6043825799230796, |
| "learning_rate": 3.862959477852285e-05, |
| "loss": 0.4511, |
| "step": 403 |
| }, |
| { |
| "epoch": 1.033909149072297, |
| "grad_norm": 0.864882027042227, |
| "learning_rate": 3.8616540549084366e-05, |
| "loss": 0.4281, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.036468330134357, |
| "grad_norm": 0.5669229687455682, |
| "learning_rate": 3.860342666498677e-05, |
| "loss": 0.4265, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.0390275111964171, |
| "grad_norm": 1.092925757327788, |
| "learning_rate": 3.859025316825204e-05, |
| "loss": 0.4171, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.0415866922584773, |
| "grad_norm": 0.6235272525172618, |
| "learning_rate": 3.8577020101093214e-05, |
| "loss": 0.3889, |
| "step": 407 |
| }, |
| { |
| "epoch": 1.0441458733205373, |
| "grad_norm": 0.7044477218033379, |
| "learning_rate": 3.856372750591419e-05, |
| "loss": 0.4268, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.0467050543825975, |
| "grad_norm": 0.6758623607211073, |
| "learning_rate": 3.8550375425309643e-05, |
| "loss": 0.376, |
| "step": 409 |
| }, |
| { |
| "epoch": 1.0492642354446577, |
| "grad_norm": 0.703616580836483, |
| "learning_rate": 3.853696390206484e-05, |
| "loss": 0.4782, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.051823416506718, |
| "grad_norm": 0.7120884633418646, |
| "learning_rate": 3.8523492979155534e-05, |
| "loss": 0.4156, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.054382597568778, |
| "grad_norm": 0.7074414036356858, |
| "learning_rate": 3.850996269974782e-05, |
| "loss": 0.4044, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.0569417786308382, |
| "grad_norm": 0.6805988413911291, |
| "learning_rate": 3.849637310719799e-05, |
| "loss": 0.4659, |
| "step": 413 |
| }, |
| { |
| "epoch": 1.0595009596928984, |
| "grad_norm": 0.751339193242575, |
| "learning_rate": 3.84827242450524e-05, |
| "loss": 0.4362, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.0620601407549584, |
| "grad_norm": 0.8444583705618753, |
| "learning_rate": 3.846901615704734e-05, |
| "loss": 0.4671, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.0646193218170186, |
| "grad_norm": 0.5494970933209252, |
| "learning_rate": 3.845524888710885e-05, |
| "loss": 0.4192, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.0671785028790788, |
| "grad_norm": 0.6871657555654107, |
| "learning_rate": 3.844142247935265e-05, |
| "loss": 0.4392, |
| "step": 417 |
| }, |
| { |
| "epoch": 1.0697376839411388, |
| "grad_norm": 0.6255544924834299, |
| "learning_rate": 3.842753697808395e-05, |
| "loss": 0.4098, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.072296865003199, |
| "grad_norm": 0.5236343059225219, |
| "learning_rate": 3.84135924277973e-05, |
| "loss": 0.4039, |
| "step": 419 |
| }, |
| { |
| "epoch": 1.0748560460652592, |
| "grad_norm": 0.7081136566843064, |
| "learning_rate": 3.839958887317649e-05, |
| "loss": 0.42, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.0774152271273192, |
| "grad_norm": 0.630614377419562, |
| "learning_rate": 3.838552635909436e-05, |
| "loss": 0.4065, |
| "step": 421 |
| }, |
| { |
| "epoch": 1.0799744081893794, |
| "grad_norm": 0.5615204386321961, |
| "learning_rate": 3.8371404930612704e-05, |
| "loss": 0.4146, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.0825335892514396, |
| "grad_norm": 0.6565436867003143, |
| "learning_rate": 3.835722463298208e-05, |
| "loss": 0.4064, |
| "step": 423 |
| }, |
| { |
| "epoch": 1.0850927703134996, |
| "grad_norm": 0.5737679071890739, |
| "learning_rate": 3.83429855116417e-05, |
| "loss": 0.4552, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.0876519513755598, |
| "grad_norm": 0.6434114879042129, |
| "learning_rate": 3.832868761221926e-05, |
| "loss": 0.4441, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.09021113243762, |
| "grad_norm": 0.6708602456648777, |
| "learning_rate": 3.831433098053082e-05, |
| "loss": 0.4022, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.09277031349968, |
| "grad_norm": 0.623494522486998, |
| "learning_rate": 3.829991566258061e-05, |
| "loss": 0.4043, |
| "step": 427 |
| }, |
| { |
| "epoch": 1.0953294945617402, |
| "grad_norm": 0.698870377709763, |
| "learning_rate": 3.828544170456094e-05, |
| "loss": 0.4559, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.0978886756238004, |
| "grad_norm": 0.6409469981980804, |
| "learning_rate": 3.827090915285202e-05, |
| "loss": 0.423, |
| "step": 429 |
| }, |
| { |
| "epoch": 1.1004478566858604, |
| "grad_norm": 0.7366884864992727, |
| "learning_rate": 3.825631805402182e-05, |
| "loss": 0.4878, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.1030070377479206, |
| "grad_norm": 0.6346910715057278, |
| "learning_rate": 3.824166845482591e-05, |
| "loss": 0.3875, |
| "step": 431 |
| }, |
| { |
| "epoch": 1.1055662188099808, |
| "grad_norm": 0.598625329081273, |
| "learning_rate": 3.8226960402207316e-05, |
| "loss": 0.4201, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.108125399872041, |
| "grad_norm": 0.7955423553177828, |
| "learning_rate": 3.821219394329638e-05, |
| "loss": 0.468, |
| "step": 433 |
| }, |
| { |
| "epoch": 1.110684580934101, |
| "grad_norm": 0.6767598095676679, |
| "learning_rate": 3.81973691254106e-05, |
| "loss": 0.4104, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.1132437619961613, |
| "grad_norm": 0.669121013587649, |
| "learning_rate": 3.818248599605448e-05, |
| "loss": 0.3625, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.1158029430582213, |
| "grad_norm": 0.9836919729491361, |
| "learning_rate": 3.816754460291936e-05, |
| "loss": 0.4852, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.1183621241202815, |
| "grad_norm": 0.9574882546263436, |
| "learning_rate": 3.8152544993883305e-05, |
| "loss": 0.4003, |
| "step": 437 |
| }, |
| { |
| "epoch": 1.1209213051823417, |
| "grad_norm": 0.6647251327930666, |
| "learning_rate": 3.813748721701091e-05, |
| "loss": 0.4202, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.1234804862444019, |
| "grad_norm": 1.05006176773705, |
| "learning_rate": 3.812237132055317e-05, |
| "loss": 0.4341, |
| "step": 439 |
| }, |
| { |
| "epoch": 1.1260396673064619, |
| "grad_norm": 0.7815506825876288, |
| "learning_rate": 3.810719735294731e-05, |
| "loss": 0.4748, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.128598848368522, |
| "grad_norm": 1.0347544390312602, |
| "learning_rate": 3.809196536281665e-05, |
| "loss": 0.4248, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.1311580294305823, |
| "grad_norm": 0.8757539929940961, |
| "learning_rate": 3.807667539897041e-05, |
| "loss": 0.3786, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.1337172104926423, |
| "grad_norm": 0.869699779958049, |
| "learning_rate": 3.8061327510403624e-05, |
| "loss": 0.4397, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.1362763915547025, |
| "grad_norm": 1.0544887595956864, |
| "learning_rate": 3.80459217462969e-05, |
| "loss": 0.4333, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.1388355726167627, |
| "grad_norm": 0.7148540444466959, |
| "learning_rate": 3.8030458156016326e-05, |
| "loss": 0.432, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.1413947536788227, |
| "grad_norm": 0.8417941713288487, |
| "learning_rate": 3.801493678911326e-05, |
| "loss": 0.4414, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.143953934740883, |
| "grad_norm": 0.9182856799978838, |
| "learning_rate": 3.799935769532425e-05, |
| "loss": 0.4318, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.1465131158029431, |
| "grad_norm": 0.6142048538916006, |
| "learning_rate": 3.798372092457076e-05, |
| "loss": 0.3898, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.1490722968650031, |
| "grad_norm": 0.8780972905895119, |
| "learning_rate": 3.796802652695911e-05, |
| "loss": 0.5123, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.1516314779270633, |
| "grad_norm": 0.7702566431359069, |
| "learning_rate": 3.795227455278029e-05, |
| "loss": 0.3752, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.1541906589891235, |
| "grad_norm": 0.6619464755572778, |
| "learning_rate": 3.7936465052509744e-05, |
| "loss": 0.4028, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.1567498400511835, |
| "grad_norm": 0.8748332649924442, |
| "learning_rate": 3.79205980768073e-05, |
| "loss": 0.4178, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.1593090211132437, |
| "grad_norm": 0.5921826549497154, |
| "learning_rate": 3.790467367651694e-05, |
| "loss": 0.4034, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.161868202175304, |
| "grad_norm": 0.8231905572303784, |
| "learning_rate": 3.788869190266664e-05, |
| "loss": 0.4934, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.164427383237364, |
| "grad_norm": 0.7187628790605729, |
| "learning_rate": 3.787265280646825e-05, |
| "loss": 0.4113, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.1669865642994242, |
| "grad_norm": 0.6550020914082988, |
| "learning_rate": 3.785655643931728e-05, |
| "loss": 0.4038, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.1695457453614844, |
| "grad_norm": 0.7857598212931832, |
| "learning_rate": 3.784040285279279e-05, |
| "loss": 0.4083, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.1721049264235446, |
| "grad_norm": 0.6478292689928322, |
| "learning_rate": 3.782419209865716e-05, |
| "loss": 0.387, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.1746641074856046, |
| "grad_norm": 0.6645094823023456, |
| "learning_rate": 3.780792422885597e-05, |
| "loss": 0.3904, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.1772232885476648, |
| "grad_norm": 0.6316724059123985, |
| "learning_rate": 3.7791599295517825e-05, |
| "loss": 0.4225, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.1797824696097248, |
| "grad_norm": 0.5440033363984303, |
| "learning_rate": 3.777521735095418e-05, |
| "loss": 0.4116, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.182341650671785, |
| "grad_norm": 0.7448918620076476, |
| "learning_rate": 3.7758778447659184e-05, |
| "loss": 0.4272, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.1849008317338452, |
| "grad_norm": 0.5835830556541663, |
| "learning_rate": 3.774228263830948e-05, |
| "loss": 0.3958, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.1874600127959054, |
| "grad_norm": 0.5982038873563293, |
| "learning_rate": 3.772572997576409e-05, |
| "loss": 0.4053, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.1900191938579654, |
| "grad_norm": 0.5062712599156436, |
| "learning_rate": 3.7709120513064196e-05, |
| "loss": 0.3874, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.1925783749200256, |
| "grad_norm": 0.606586616558905, |
| "learning_rate": 3.769245430343301e-05, |
| "loss": 0.4528, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.1951375559820858, |
| "grad_norm": 0.7220231131524425, |
| "learning_rate": 3.767573140027556e-05, |
| "loss": 0.433, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.1976967370441458, |
| "grad_norm": 0.6706551361731272, |
| "learning_rate": 3.7658951857178544e-05, |
| "loss": 0.443, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.200255918106206, |
| "grad_norm": 0.5717527361383908, |
| "learning_rate": 3.764211572791017e-05, |
| "loss": 0.4669, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.2028150991682662, |
| "grad_norm": 0.8106231588399798, |
| "learning_rate": 3.762522306641998e-05, |
| "loss": 0.406, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.2053742802303262, |
| "grad_norm": 0.5810325663311328, |
| "learning_rate": 3.760827392683863e-05, |
| "loss": 0.4304, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.2079334612923864, |
| "grad_norm": 0.8071567509837944, |
| "learning_rate": 3.759126836347779e-05, |
| "loss": 0.4044, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.2104926423544466, |
| "grad_norm": 0.700979544013499, |
| "learning_rate": 3.757420643082991e-05, |
| "loss": 0.4397, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.2130518234165066, |
| "grad_norm": 0.5863406678085621, |
| "learning_rate": 3.755708818356809e-05, |
| "loss": 0.4099, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.2156110044785668, |
| "grad_norm": 0.6016554944675842, |
| "learning_rate": 3.7539913676545874e-05, |
| "loss": 0.4107, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.218170185540627, |
| "grad_norm": 0.5831332187240583, |
| "learning_rate": 3.7522682964797066e-05, |
| "loss": 0.4023, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.220729366602687, |
| "grad_norm": 0.6093541377057935, |
| "learning_rate": 3.75053961035356e-05, |
| "loss": 0.4301, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.2232885476647473, |
| "grad_norm": 0.6465778983982824, |
| "learning_rate": 3.748805314815532e-05, |
| "loss": 0.3933, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.2258477287268075, |
| "grad_norm": 0.5808579764017139, |
| "learning_rate": 3.7470654154229834e-05, |
| "loss": 0.4386, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.2284069097888675, |
| "grad_norm": 0.7856607698111114, |
| "learning_rate": 3.745319917751229e-05, |
| "loss": 0.4201, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.2309660908509277, |
| "grad_norm": 0.5050539819501365, |
| "learning_rate": 3.743568827393525e-05, |
| "loss": 0.4773, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.2335252719129879, |
| "grad_norm": 0.8385075664472458, |
| "learning_rate": 3.741812149961049e-05, |
| "loss": 0.4041, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.236084452975048, |
| "grad_norm": 0.624705443326678, |
| "learning_rate": 3.740049891082879e-05, |
| "loss": 0.4157, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.238643634037108, |
| "grad_norm": 0.5436565794572649, |
| "learning_rate": 3.738282056405981e-05, |
| "loss": 0.3959, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.2412028150991683, |
| "grad_norm": 0.785730847073682, |
| "learning_rate": 3.736508651595188e-05, |
| "loss": 0.4413, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.2437619961612283, |
| "grad_norm": 0.5059153650301063, |
| "learning_rate": 3.734729682333179e-05, |
| "loss": 0.4033, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.2463211772232885, |
| "grad_norm": 0.609111900762071, |
| "learning_rate": 3.732945154320467e-05, |
| "loss": 0.4282, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.2488803582853487, |
| "grad_norm": 0.5621990676055562, |
| "learning_rate": 3.731155073275375e-05, |
| "loss": 0.411, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.251439539347409, |
| "grad_norm": 0.5476414987684265, |
| "learning_rate": 3.729359444934022e-05, |
| "loss": 0.4217, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.253998720409469, |
| "grad_norm": 0.5674756992559286, |
| "learning_rate": 3.727558275050301e-05, |
| "loss": 0.461, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.2565579014715291, |
| "grad_norm": 0.6949881690129194, |
| "learning_rate": 3.725751569395863e-05, |
| "loss": 0.4621, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.2591170825335891, |
| "grad_norm": 0.5458610089583471, |
| "learning_rate": 3.723939333760099e-05, |
| "loss": 0.4508, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.2616762635956493, |
| "grad_norm": 0.5906724584503589, |
| "learning_rate": 3.7221215739501176e-05, |
| "loss": 0.4276, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.2642354446577095, |
| "grad_norm": 0.4800178261816677, |
| "learning_rate": 3.720298295790732e-05, |
| "loss": 0.3921, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.2667946257197698, |
| "grad_norm": 0.6636123062743209, |
| "learning_rate": 3.718469505124434e-05, |
| "loss": 0.4584, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.2693538067818297, |
| "grad_norm": 0.5232376990167991, |
| "learning_rate": 3.716635207811385e-05, |
| "loss": 0.4168, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.27191298784389, |
| "grad_norm": 0.6767875708314827, |
| "learning_rate": 3.714795409729388e-05, |
| "loss": 0.4379, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.2744721689059502, |
| "grad_norm": 0.5605992428107962, |
| "learning_rate": 3.712950116773875e-05, |
| "loss": 0.4074, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.2770313499680102, |
| "grad_norm": 0.5932800163478181, |
| "learning_rate": 3.711099334857884e-05, |
| "loss": 0.4194, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.2795905310300704, |
| "grad_norm": 0.5438179500986763, |
| "learning_rate": 3.709243069912041e-05, |
| "loss": 0.3917, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.2821497120921306, |
| "grad_norm": 0.7190898552769511, |
| "learning_rate": 3.707381327884545e-05, |
| "loss": 0.4717, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.2847088931541908, |
| "grad_norm": 0.6816367378411385, |
| "learning_rate": 3.705514114741142e-05, |
| "loss": 0.3782, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.2872680742162508, |
| "grad_norm": 0.5116844048821922, |
| "learning_rate": 3.703641436465114e-05, |
| "loss": 0.4225, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.289827255278311, |
| "grad_norm": 0.6736005325770137, |
| "learning_rate": 3.70176329905725e-05, |
| "loss": 0.4444, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.292386436340371, |
| "grad_norm": 0.5864619464379411, |
| "learning_rate": 3.699879708535838e-05, |
| "loss": 0.4354, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.2949456174024312, |
| "grad_norm": 0.6078408475459052, |
| "learning_rate": 3.6979906709366334e-05, |
| "loss": 0.453, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.2975047984644914, |
| "grad_norm": 0.7409797972431034, |
| "learning_rate": 3.696096192312852e-05, |
| "loss": 0.4365, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.3000639795265516, |
| "grad_norm": 0.4775651025928874, |
| "learning_rate": 3.694196278735142e-05, |
| "loss": 0.4391, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.3026231605886116, |
| "grad_norm": 0.5823632608853917, |
| "learning_rate": 3.692290936291568e-05, |
| "loss": 0.3875, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.3051823416506718, |
| "grad_norm": 0.5307154121726435, |
| "learning_rate": 3.69038017108759e-05, |
| "loss": 0.4059, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.3077415227127318, |
| "grad_norm": 0.5835304311686238, |
| "learning_rate": 3.688463989246045e-05, |
| "loss": 0.4505, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.310300703774792, |
| "grad_norm": 0.5459622175115579, |
| "learning_rate": 3.686542396907128e-05, |
| "loss": 0.3994, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.3128598848368522, |
| "grad_norm": 0.5266102200698167, |
| "learning_rate": 3.6846154002283696e-05, |
| "loss": 0.3954, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.3154190658989124, |
| "grad_norm": 0.49436862141147236, |
| "learning_rate": 3.68268300538462e-05, |
| "loss": 0.4417, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.3179782469609724, |
| "grad_norm": 0.5981532644758176, |
| "learning_rate": 3.680745218568026e-05, |
| "loss": 0.4382, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.3205374280230326, |
| "grad_norm": 0.4757174461989479, |
| "learning_rate": 3.678802045988012e-05, |
| "loss": 0.3686, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.3230966090850926, |
| "grad_norm": 0.6321671394432529, |
| "learning_rate": 3.676853493871262e-05, |
| "loss": 0.4418, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.3256557901471528, |
| "grad_norm": 0.44736313515673864, |
| "learning_rate": 3.674899568461696e-05, |
| "loss": 0.4235, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.328214971209213, |
| "grad_norm": 0.5494531167802622, |
| "learning_rate": 3.6729402760204535e-05, |
| "loss": 0.4069, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.3307741522712733, |
| "grad_norm": 0.5068999873204574, |
| "learning_rate": 3.6709756228258735e-05, |
| "loss": 0.4284, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 0.506062505003235, |
| "learning_rate": 3.669005615173469e-05, |
| "loss": 0.4438, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.3358925143953935, |
| "grad_norm": 0.6379310859811307, |
| "learning_rate": 3.667030259375915e-05, |
| "loss": 0.4142, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.3384516954574537, |
| "grad_norm": 0.42277797644227044, |
| "learning_rate": 3.665049561763021e-05, |
| "loss": 0.3805, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.3410108765195137, |
| "grad_norm": 0.6090026974823245, |
| "learning_rate": 3.663063528681716e-05, |
| "loss": 0.4136, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.3435700575815739, |
| "grad_norm": 0.46562372404588254, |
| "learning_rate": 3.6610721664960236e-05, |
| "loss": 0.4354, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.346129238643634, |
| "grad_norm": 0.6500313998419536, |
| "learning_rate": 3.659075481587046e-05, |
| "loss": 0.4283, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.3486884197056943, |
| "grad_norm": 0.5211253922160387, |
| "learning_rate": 3.65707348035294e-05, |
| "loss": 0.4255, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.3512476007677543, |
| "grad_norm": 0.6220670330163766, |
| "learning_rate": 3.6550661692089e-05, |
| "loss": 0.4191, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.3538067818298145, |
| "grad_norm": 0.544711113768934, |
| "learning_rate": 3.6530535545871326e-05, |
| "loss": 0.436, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.3563659628918745, |
| "grad_norm": 0.7052970063859283, |
| "learning_rate": 3.65103564293684e-05, |
| "loss": 0.4949, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.3589251439539347, |
| "grad_norm": 0.5145642841555808, |
| "learning_rate": 3.6490124407242007e-05, |
| "loss": 0.4131, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.361484325015995, |
| "grad_norm": 0.5857771172798699, |
| "learning_rate": 3.646983954432342e-05, |
| "loss": 0.4146, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.3640435060780551, |
| "grad_norm": 0.4920261736044566, |
| "learning_rate": 3.644950190561325e-05, |
| "loss": 0.4284, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.3666026871401151, |
| "grad_norm": 0.5687057750503046, |
| "learning_rate": 3.642911155628124e-05, |
| "loss": 0.4514, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.3691618682021753, |
| "grad_norm": 0.5724699735691123, |
| "learning_rate": 3.640866856166601e-05, |
| "loss": 0.4539, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.3717210492642353, |
| "grad_norm": 0.5749209613889618, |
| "learning_rate": 3.6388172987274913e-05, |
| "loss": 0.3865, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.3742802303262955, |
| "grad_norm": 0.6228395395499405, |
| "learning_rate": 3.636762489878374e-05, |
| "loss": 0.4075, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.3768394113883557, |
| "grad_norm": 0.6903037733166263, |
| "learning_rate": 3.63470243620366e-05, |
| "loss": 0.4312, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.379398592450416, |
| "grad_norm": 0.678573324042214, |
| "learning_rate": 3.632637144304565e-05, |
| "loss": 0.4806, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.381957773512476, |
| "grad_norm": 0.6951420732428104, |
| "learning_rate": 3.6305666207990886e-05, |
| "loss": 0.439, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.3845169545745362, |
| "grad_norm": 0.5961242888653673, |
| "learning_rate": 3.628490872321998e-05, |
| "loss": 0.4205, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.3870761356365962, |
| "grad_norm": 0.7217418974601812, |
| "learning_rate": 3.626409905524799e-05, |
| "loss": 0.4707, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.3896353166986564, |
| "grad_norm": 0.45054510458685476, |
| "learning_rate": 3.624323727075723e-05, |
| "loss": 0.4145, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.3921944977607166, |
| "grad_norm": 0.7284286885213026, |
| "learning_rate": 3.622232343659698e-05, |
| "loss": 0.4299, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.3947536788227768, |
| "grad_norm": 0.6769810527885796, |
| "learning_rate": 3.6201357619783336e-05, |
| "loss": 0.4163, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.3973128598848368, |
| "grad_norm": 0.5221114603904703, |
| "learning_rate": 3.6180339887498953e-05, |
| "loss": 0.4443, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.399872040946897, |
| "grad_norm": 0.6008896686645977, |
| "learning_rate": 3.615927030709284e-05, |
| "loss": 0.4318, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.4024312220089572, |
| "grad_norm": 0.5343608825415525, |
| "learning_rate": 3.613814894608016e-05, |
| "loss": 0.4623, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.4049904030710172, |
| "grad_norm": 0.691771745080796, |
| "learning_rate": 3.6116975872141984e-05, |
| "loss": 0.4624, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.4075495841330774, |
| "grad_norm": 0.6259414433121822, |
| "learning_rate": 3.609575115312511e-05, |
| "loss": 0.4508, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.4101087651951376, |
| "grad_norm": 0.6649399879213752, |
| "learning_rate": 3.607447485704182e-05, |
| "loss": 0.4143, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.4126679462571978, |
| "grad_norm": 0.47253136462127165, |
| "learning_rate": 3.605314705206966e-05, |
| "loss": 0.4106, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.4152271273192578, |
| "grad_norm": 0.5556693107195737, |
| "learning_rate": 3.603176780655124e-05, |
| "loss": 0.4616, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.417786308381318, |
| "grad_norm": 0.4021083541093729, |
| "learning_rate": 3.601033718899401e-05, |
| "loss": 0.3928, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.420345489443378, |
| "grad_norm": 0.5351580026934937, |
| "learning_rate": 3.598885526807003e-05, |
| "loss": 0.4661, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.4229046705054382, |
| "grad_norm": 0.5247684064361327, |
| "learning_rate": 3.596732211261574e-05, |
| "loss": 0.4303, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.4254638515674984, |
| "grad_norm": 0.49894549324782106, |
| "learning_rate": 3.594573779163179e-05, |
| "loss": 0.3938, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.4280230326295587, |
| "grad_norm": 0.5751022801506976, |
| "learning_rate": 3.5924102374282754e-05, |
| "loss": 0.4401, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.4305822136916186, |
| "grad_norm": 0.4560617808969142, |
| "learning_rate": 3.590241592989696e-05, |
| "loss": 0.4241, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.4331413947536789, |
| "grad_norm": 0.6245655178581035, |
| "learning_rate": 3.5880678527966224e-05, |
| "loss": 0.4138, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.4357005758157388, |
| "grad_norm": 0.5355963534405355, |
| "learning_rate": 3.5858890238145674e-05, |
| "loss": 0.4145, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.438259756877799, |
| "grad_norm": 0.6422997737126074, |
| "learning_rate": 3.583705113025348e-05, |
| "loss": 0.4554, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.4408189379398593, |
| "grad_norm": 0.6761962261433756, |
| "learning_rate": 3.581516127427068e-05, |
| "loss": 0.4176, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.4433781190019195, |
| "grad_norm": 0.6187534229527232, |
| "learning_rate": 3.5793220740340904e-05, |
| "loss": 0.4255, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.4459373000639795, |
| "grad_norm": 0.5255489769173007, |
| "learning_rate": 3.577122959877017e-05, |
| "loss": 0.4147, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.4484964811260397, |
| "grad_norm": 0.5786362132356377, |
| "learning_rate": 3.57491879200267e-05, |
| "loss": 0.4018, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.4510556621880997, |
| "grad_norm": 0.5297316242089579, |
| "learning_rate": 3.572709577474062e-05, |
| "loss": 0.4446, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.4536148432501599, |
| "grad_norm": 0.5131558041266011, |
| "learning_rate": 3.570495323370378e-05, |
| "loss": 0.4475, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.45617402431222, |
| "grad_norm": 0.7521443480114743, |
| "learning_rate": 3.568276036786952e-05, |
| "loss": 0.4091, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.4587332053742803, |
| "grad_norm": 0.5969703958136027, |
| "learning_rate": 3.566051724835245e-05, |
| "loss": 0.4283, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.4612923864363403, |
| "grad_norm": 0.4037709403552394, |
| "learning_rate": 3.5638223946428194e-05, |
| "loss": 0.4271, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.4638515674984005, |
| "grad_norm": 0.5680529949916449, |
| "learning_rate": 3.561588053353319e-05, |
| "loss": 0.4253, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.4664107485604607, |
| "grad_norm": 0.4379512610916079, |
| "learning_rate": 3.559348708126445e-05, |
| "loss": 0.3955, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.4689699296225207, |
| "grad_norm": 0.6425572888305521, |
| "learning_rate": 3.557104366137934e-05, |
| "loss": 0.4208, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.471529110684581, |
| "grad_norm": 0.5147430638168887, |
| "learning_rate": 3.554855034579532e-05, |
| "loss": 0.4206, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.4740882917466411, |
| "grad_norm": 0.5773864382218094, |
| "learning_rate": 3.552600720658976e-05, |
| "loss": 0.3936, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.4766474728087013, |
| "grad_norm": 0.5531560341828542, |
| "learning_rate": 3.550341431599967e-05, |
| "loss": 0.4674, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.4792066538707613, |
| "grad_norm": 0.5447609179432047, |
| "learning_rate": 3.5480771746421494e-05, |
| "loss": 0.4032, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.4817658349328215, |
| "grad_norm": 0.572196110734813, |
| "learning_rate": 3.545807957041084e-05, |
| "loss": 0.4509, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.4843250159948815, |
| "grad_norm": 0.5028398333246705, |
| "learning_rate": 3.5435337860682304e-05, |
| "loss": 0.3334, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.4868841970569417, |
| "grad_norm": 0.7160945031568345, |
| "learning_rate": 3.54125466901092e-05, |
| "loss": 0.4598, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.489443378119002, |
| "grad_norm": 0.6791351765101247, |
| "learning_rate": 3.538970613172332e-05, |
| "loss": 0.4055, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.4920025591810622, |
| "grad_norm": 0.8140085732718708, |
| "learning_rate": 3.536681625871474e-05, |
| "loss": 0.3982, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.4945617402431222, |
| "grad_norm": 0.6600156473699682, |
| "learning_rate": 3.534387714443153e-05, |
| "loss": 0.4283, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.4971209213051824, |
| "grad_norm": 0.9072053140385462, |
| "learning_rate": 3.532088886237956e-05, |
| "loss": 0.461, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.4996801023672424, |
| "grad_norm": 0.5291141400904751, |
| "learning_rate": 3.5297851486222274e-05, |
| "loss": 0.4105, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.5022392834293026, |
| "grad_norm": 0.8711742022509097, |
| "learning_rate": 3.527476508978039e-05, |
| "loss": 0.4266, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.5047984644913628, |
| "grad_norm": 0.5011541715943939, |
| "learning_rate": 3.525162974703174e-05, |
| "loss": 0.4681, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.507357645553423, |
| "grad_norm": 0.7884765548368484, |
| "learning_rate": 3.5228445532110996e-05, |
| "loss": 0.4341, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.5099168266154832, |
| "grad_norm": 0.5740501726759667, |
| "learning_rate": 3.520521251930941e-05, |
| "loss": 0.4128, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.5124760076775432, |
| "grad_norm": 0.5460891296171442, |
| "learning_rate": 3.518193078307463e-05, |
| "loss": 0.4188, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.5150351887396032, |
| "grad_norm": 0.7055940264802735, |
| "learning_rate": 3.515860039801043e-05, |
| "loss": 0.3965, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.5175943698016634, |
| "grad_norm": 0.5874392809254365, |
| "learning_rate": 3.513522143887645e-05, |
| "loss": 0.4918, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.5201535508637236, |
| "grad_norm": 0.6245938510885568, |
| "learning_rate": 3.5111793980588006e-05, |
| "loss": 0.4285, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.5227127319257838, |
| "grad_norm": 0.48774952340541083, |
| "learning_rate": 3.5088318098215805e-05, |
| "loss": 0.4013, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.525271912987844, |
| "grad_norm": 0.4468453631601922, |
| "learning_rate": 3.506479386698575e-05, |
| "loss": 0.3958, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.527831094049904, |
| "grad_norm": 0.5538376365976897, |
| "learning_rate": 3.5041221362278644e-05, |
| "loss": 0.4347, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.530390275111964, |
| "grad_norm": 0.4775499856280007, |
| "learning_rate": 3.5017600659629986e-05, |
| "loss": 0.4484, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.5329494561740242, |
| "grad_norm": 0.5259243816931762, |
| "learning_rate": 3.499393183472973e-05, |
| "loss": 0.4211, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.5355086372360844, |
| "grad_norm": 0.5198109524806964, |
| "learning_rate": 3.497021496342203e-05, |
| "loss": 0.4363, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.5380678182981447, |
| "grad_norm": 0.4696941806509467, |
| "learning_rate": 3.494645012170498e-05, |
| "loss": 0.4295, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.5406269993602049, |
| "grad_norm": 0.6474700305067774, |
| "learning_rate": 3.4922637385730406e-05, |
| "loss": 0.5, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.5431861804222649, |
| "grad_norm": 0.4793948522260617, |
| "learning_rate": 3.489877683180362e-05, |
| "loss": 0.3845, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.545745361484325, |
| "grad_norm": 0.539237627319592, |
| "learning_rate": 3.487486853638314e-05, |
| "loss": 0.4356, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.548304542546385, |
| "grad_norm": 0.5532057074478213, |
| "learning_rate": 3.485091257608047e-05, |
| "loss": 0.3891, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.5508637236084453, |
| "grad_norm": 0.6992089262871057, |
| "learning_rate": 3.482690902765984e-05, |
| "loss": 0.4571, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.5534229046705055, |
| "grad_norm": 0.4638019008983174, |
| "learning_rate": 3.4802857968038e-05, |
| "loss": 0.4188, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.5559820857325657, |
| "grad_norm": 0.7914532265925823, |
| "learning_rate": 3.4778759474283936e-05, |
| "loss": 0.4534, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.5585412667946257, |
| "grad_norm": 0.5295039816146513, |
| "learning_rate": 3.475461362361861e-05, |
| "loss": 0.4001, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.561100447856686, |
| "grad_norm": 0.6132941939025509, |
| "learning_rate": 3.473042049341474e-05, |
| "loss": 0.4225, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.5636596289187459, |
| "grad_norm": 0.714057587477597, |
| "learning_rate": 3.470618016119658e-05, |
| "loss": 0.4136, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.566218809980806, |
| "grad_norm": 0.5320432430474729, |
| "learning_rate": 3.468189270463959e-05, |
| "loss": 0.4004, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.5687779910428663, |
| "grad_norm": 0.66784032505952, |
| "learning_rate": 3.465755820157026e-05, |
| "loss": 0.4065, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.5713371721049265, |
| "grad_norm": 0.49152795733031895, |
| "learning_rate": 3.463317672996583e-05, |
| "loss": 0.3791, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.5738963531669867, |
| "grad_norm": 0.555185505506747, |
| "learning_rate": 3.4608748367954064e-05, |
| "loss": 0.4633, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.5764555342290467, |
| "grad_norm": 0.4980166871980404, |
| "learning_rate": 3.4584273193812956e-05, |
| "loss": 0.4252, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.5790147152911067, |
| "grad_norm": 0.589732498310526, |
| "learning_rate": 3.45597512859705e-05, |
| "loss": 0.4839, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.581573896353167, |
| "grad_norm": 0.5743441085203349, |
| "learning_rate": 3.4535182723004466e-05, |
| "loss": 0.4062, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.5841330774152271, |
| "grad_norm": 0.49126577666625093, |
| "learning_rate": 3.451056758364212e-05, |
| "loss": 0.4135, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.5866922584772873, |
| "grad_norm": 0.5037192947792559, |
| "learning_rate": 3.4485905946759965e-05, |
| "loss": 0.459, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.5892514395393476, |
| "grad_norm": 0.5439325021578344, |
| "learning_rate": 3.446119789138351e-05, |
| "loss": 0.3882, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.5918106206014075, |
| "grad_norm": 0.5020489297705218, |
| "learning_rate": 3.443644349668701e-05, |
| "loss": 0.4053, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.5943698016634675, |
| "grad_norm": 0.5634072314676822, |
| "learning_rate": 3.4411642841993185e-05, |
| "loss": 0.4065, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.5969289827255277, |
| "grad_norm": 0.4911032231149214, |
| "learning_rate": 3.438679600677303e-05, |
| "loss": 0.4207, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.599488163787588, |
| "grad_norm": 0.6463093595410542, |
| "learning_rate": 3.4361903070645484e-05, |
| "loss": 0.4195, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.6020473448496482, |
| "grad_norm": 0.5395555876619189, |
| "learning_rate": 3.433696411337723e-05, |
| "loss": 0.4359, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.6046065259117084, |
| "grad_norm": 0.5456191870776568, |
| "learning_rate": 3.431197921488242e-05, |
| "loss": 0.4325, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.6071657069737684, |
| "grad_norm": 0.5181489907237284, |
| "learning_rate": 3.4286948455222425e-05, |
| "loss": 0.4262, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.6097248880358286, |
| "grad_norm": 0.4542522655783656, |
| "learning_rate": 3.426187191460555e-05, |
| "loss": 0.4008, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.6122840690978886, |
| "grad_norm": 0.5748315523058384, |
| "learning_rate": 3.423674967338681e-05, |
| "loss": 0.4613, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.6148432501599488, |
| "grad_norm": 0.4893291757606303, |
| "learning_rate": 3.421158181206769e-05, |
| "loss": 0.411, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.617402431222009, |
| "grad_norm": 0.580837787265809, |
| "learning_rate": 3.418636841129582e-05, |
| "loss": 0.417, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.6199616122840692, |
| "grad_norm": 0.6495075917348687, |
| "learning_rate": 3.416110955186477e-05, |
| "loss": 0.4817, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.6225207933461292, |
| "grad_norm": 0.45060911216179744, |
| "learning_rate": 3.4135805314713804e-05, |
| "loss": 0.4033, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.6250799744081894, |
| "grad_norm": 0.5998653037164785, |
| "learning_rate": 3.411045578092754e-05, |
| "loss": 0.3912, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.6276391554702494, |
| "grad_norm": 0.5310655122776353, |
| "learning_rate": 3.4085061031735794e-05, |
| "loss": 0.4313, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.6301983365323096, |
| "grad_norm": 0.6388088047336757, |
| "learning_rate": 3.405962114851324e-05, |
| "loss": 0.4433, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.6327575175943698, |
| "grad_norm": 0.5039515005309823, |
| "learning_rate": 3.4034136212779195e-05, |
| "loss": 0.414, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.63531669865643, |
| "grad_norm": 0.6351418548004399, |
| "learning_rate": 3.4008606306197336e-05, |
| "loss": 0.4271, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.6378758797184902, |
| "grad_norm": 0.5382251510311172, |
| "learning_rate": 3.398303151057543e-05, |
| "loss": 0.4223, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.6404350607805502, |
| "grad_norm": 0.5213424824749899, |
| "learning_rate": 3.3957411907865123e-05, |
| "loss": 0.4169, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.6429942418426102, |
| "grad_norm": 0.6114074897511479, |
| "learning_rate": 3.393174758016161e-05, |
| "loss": 0.4141, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.6455534229046704, |
| "grad_norm": 0.5077805851469291, |
| "learning_rate": 3.39060386097034e-05, |
| "loss": 0.4354, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.6481126039667306, |
| "grad_norm": 0.5648032929681525, |
| "learning_rate": 3.3880285078872076e-05, |
| "loss": 0.3944, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.6506717850287909, |
| "grad_norm": 0.5623890506817537, |
| "learning_rate": 3.385448707019199e-05, |
| "loss": 0.463, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.653230966090851, |
| "grad_norm": 0.47253413270859174, |
| "learning_rate": 3.382864466633003e-05, |
| "loss": 0.4179, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.655790147152911, |
| "grad_norm": 0.6667403121893724, |
| "learning_rate": 3.3802757950095346e-05, |
| "loss": 0.4401, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.658349328214971, |
| "grad_norm": 0.48362697457689036, |
| "learning_rate": 3.377682700443907e-05, |
| "loss": 0.4294, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.6609085092770313, |
| "grad_norm": 0.49305745599661777, |
| "learning_rate": 3.375085191245407e-05, |
| "loss": 0.4166, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.6634676903390915, |
| "grad_norm": 0.5645696987568555, |
| "learning_rate": 3.372483275737468e-05, |
| "loss": 0.3922, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.6660268714011517, |
| "grad_norm": 0.6497192793696049, |
| "learning_rate": 3.3698769622576404e-05, |
| "loss": 0.484, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.668586052463212, |
| "grad_norm": 0.6004795627408669, |
| "learning_rate": 3.367266259157572e-05, |
| "loss": 0.4744, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.671145233525272, |
| "grad_norm": 0.6035991357132837, |
| "learning_rate": 3.364651174802974e-05, |
| "loss": 0.4576, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.673704414587332, |
| "grad_norm": 0.5638974284467396, |
| "learning_rate": 3.3620317175735945e-05, |
| "loss": 0.3829, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.676263595649392, |
| "grad_norm": 0.6144746966380764, |
| "learning_rate": 3.359407895863199e-05, |
| "loss": 0.4219, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.6788227767114523, |
| "grad_norm": 0.6330635207423285, |
| "learning_rate": 3.356779718079534e-05, |
| "loss": 0.3939, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.6813819577735125, |
| "grad_norm": 0.6511893943323425, |
| "learning_rate": 3.3541471926443084e-05, |
| "loss": 0.4626, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.6839411388355727, |
| "grad_norm": 0.7313805162264239, |
| "learning_rate": 3.3515103279931584e-05, |
| "loss": 0.4443, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.6865003198976327, |
| "grad_norm": 0.4567231810719478, |
| "learning_rate": 3.3488691325756294e-05, |
| "loss": 0.4072, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.689059500959693, |
| "grad_norm": 0.6498464270828352, |
| "learning_rate": 3.34622361485514e-05, |
| "loss": 0.4532, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.691618682021753, |
| "grad_norm": 0.46427052887792153, |
| "learning_rate": 3.343573783308964e-05, |
| "loss": 0.4266, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.6941778630838131, |
| "grad_norm": 0.5468516675156209, |
| "learning_rate": 3.340919646428193e-05, |
| "loss": 0.4208, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.6967370441458733, |
| "grad_norm": 0.5751528606477143, |
| "learning_rate": 3.3382612127177166e-05, |
| "loss": 0.4146, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.6992962252079336, |
| "grad_norm": 0.5793295201191827, |
| "learning_rate": 3.335598490696196e-05, |
| "loss": 0.4623, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.7018554062699938, |
| "grad_norm": 0.7077864391730174, |
| "learning_rate": 3.332931488896029e-05, |
| "loss": 0.4459, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.7044145873320538, |
| "grad_norm": 0.4786371924890489, |
| "learning_rate": 3.330260215863332e-05, |
| "loss": 0.3967, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.7069737683941137, |
| "grad_norm": 0.71627935746326, |
| "learning_rate": 3.327584680157904e-05, |
| "loss": 0.4466, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.709532949456174, |
| "grad_norm": 0.5672877205591593, |
| "learning_rate": 3.3249048903532075e-05, |
| "loss": 0.4245, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.7120921305182342, |
| "grad_norm": 0.5967392434258936, |
| "learning_rate": 3.322220855036333e-05, |
| "loss": 0.4399, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.7146513115802944, |
| "grad_norm": 0.619642871257619, |
| "learning_rate": 3.319532582807977e-05, |
| "loss": 0.4429, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.7172104926423546, |
| "grad_norm": 0.5718993726856418, |
| "learning_rate": 3.316840082282412e-05, |
| "loss": 0.4049, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.7197696737044146, |
| "grad_norm": 0.7158788807881703, |
| "learning_rate": 3.314143362087462e-05, |
| "loss": 0.465, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.7223288547664746, |
| "grad_norm": 0.49801282221595167, |
| "learning_rate": 3.3114424308644686e-05, |
| "loss": 0.4304, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.7248880358285348, |
| "grad_norm": 0.732000530911472, |
| "learning_rate": 3.3087372972682703e-05, |
| "loss": 0.4496, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.727447216890595, |
| "grad_norm": 0.5016534768936702, |
| "learning_rate": 3.30602796996717e-05, |
| "loss": 0.4196, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.7300063979526552, |
| "grad_norm": 0.6732998458752849, |
| "learning_rate": 3.303314457642911e-05, |
| "loss": 0.4377, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.7325655790147154, |
| "grad_norm": 0.512135853555358, |
| "learning_rate": 3.300596768990644e-05, |
| "loss": 0.4032, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.7351247600767754, |
| "grad_norm": 0.5565097137954512, |
| "learning_rate": 3.297874912718902e-05, |
| "loss": 0.4124, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.7376839411388356, |
| "grad_norm": 0.5579693456602185, |
| "learning_rate": 3.2951488975495785e-05, |
| "loss": 0.4493, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.7402431222008956, |
| "grad_norm": 0.5767956410825538, |
| "learning_rate": 3.2924187322178865e-05, |
| "loss": 0.4701, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.7428023032629558, |
| "grad_norm": 0.474585132199083, |
| "learning_rate": 3.2896844254723414e-05, |
| "loss": 0.4118, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.745361484325016, |
| "grad_norm": 0.44776574599085095, |
| "learning_rate": 3.28694598607473e-05, |
| "loss": 0.3928, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.7479206653870762, |
| "grad_norm": 0.4613485770735262, |
| "learning_rate": 3.28420342280008e-05, |
| "loss": 0.4185, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.7504798464491362, |
| "grad_norm": 0.6343181624558385, |
| "learning_rate": 3.281456744436634e-05, |
| "loss": 0.4133, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.7530390275111964, |
| "grad_norm": 0.5493779464654077, |
| "learning_rate": 3.278705959785821e-05, |
| "loss": 0.4671, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.7555982085732564, |
| "grad_norm": 0.5441386561154584, |
| "learning_rate": 3.2759510776622274e-05, |
| "loss": 0.4453, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.7581573896353166, |
| "grad_norm": 0.629823120577556, |
| "learning_rate": 3.273192106893572e-05, |
| "loss": 0.3839, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.7607165706973769, |
| "grad_norm": 0.549530453190963, |
| "learning_rate": 3.270429056320672e-05, |
| "loss": 0.4502, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.763275751759437, |
| "grad_norm": 0.6031975081301458, |
| "learning_rate": 3.26766193479742e-05, |
| "loss": 0.4738, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.7658349328214973, |
| "grad_norm": 0.48136220411058944, |
| "learning_rate": 3.2648907511907544e-05, |
| "loss": 0.4036, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.7683941138835573, |
| "grad_norm": 0.6253783336704821, |
| "learning_rate": 3.262115514380628e-05, |
| "loss": 0.4081, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.7709532949456173, |
| "grad_norm": 0.47939071537048983, |
| "learning_rate": 3.25933623325998e-05, |
| "loss": 0.4314, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.7735124760076775, |
| "grad_norm": 0.4518873833371369, |
| "learning_rate": 3.256552916734713e-05, |
| "loss": 0.3986, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.7760716570697377, |
| "grad_norm": 0.6074017672602955, |
| "learning_rate": 3.25376557372366e-05, |
| "loss": 0.4324, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.778630838131798, |
| "grad_norm": 0.470674972052956, |
| "learning_rate": 3.250974213158555e-05, |
| "loss": 0.3933, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.781190019193858, |
| "grad_norm": 0.6255159824430874, |
| "learning_rate": 3.248178843984006e-05, |
| "loss": 0.4252, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.783749200255918, |
| "grad_norm": 0.5371760909763971, |
| "learning_rate": 3.245379475157465e-05, |
| "loss": 0.4778, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.786308381317978, |
| "grad_norm": 0.5168010335153898, |
| "learning_rate": 3.242576115649205e-05, |
| "loss": 0.4229, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.7888675623800383, |
| "grad_norm": 0.49166449165933496, |
| "learning_rate": 3.239768774442281e-05, |
| "loss": 0.4005, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.7914267434420985, |
| "grad_norm": 0.4932386148580624, |
| "learning_rate": 3.23695746053251e-05, |
| "loss": 0.4163, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.7939859245041587, |
| "grad_norm": 0.5880360652699835, |
| "learning_rate": 3.2341421829284394e-05, |
| "loss": 0.4413, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.796545105566219, |
| "grad_norm": 0.4625923123089751, |
| "learning_rate": 3.2313229506513167e-05, |
| "loss": 0.426, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.799104286628279, |
| "grad_norm": 0.5109208128171377, |
| "learning_rate": 3.228499772735062e-05, |
| "loss": 0.393, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.8016634676903391, |
| "grad_norm": 0.4806179945017673, |
| "learning_rate": 3.2256726582262384e-05, |
| "loss": 0.4479, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.8042226487523991, |
| "grad_norm": 0.4970665904869278, |
| "learning_rate": 3.222841616184025e-05, |
| "loss": 0.4318, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.8067818298144593, |
| "grad_norm": 0.5666482942373245, |
| "learning_rate": 3.220006655680183e-05, |
| "loss": 0.4245, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.8093410108765196, |
| "grad_norm": 0.4999181748027583, |
| "learning_rate": 3.2171677857990334e-05, |
| "loss": 0.4372, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.8119001919385798, |
| "grad_norm": 0.47380754642052403, |
| "learning_rate": 3.2143250156374226e-05, |
| "loss": 0.3926, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.8144593730006398, |
| "grad_norm": 0.6008131918939661, |
| "learning_rate": 3.211478354304695e-05, |
| "loss": 0.4533, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.8170185540627, |
| "grad_norm": 0.5144973230871912, |
| "learning_rate": 3.208627810922665e-05, |
| "loss": 0.4352, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.81957773512476, |
| "grad_norm": 0.4699623742277227, |
| "learning_rate": 3.2057733946255844e-05, |
| "loss": 0.3852, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.8221369161868202, |
| "grad_norm": 0.4961706904135912, |
| "learning_rate": 3.202915114560118e-05, |
| "loss": 0.4445, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.8246960972488804, |
| "grad_norm": 0.561279443158619, |
| "learning_rate": 3.200052979885309e-05, |
| "loss": 0.4802, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.8272552783109406, |
| "grad_norm": 0.4557933345271347, |
| "learning_rate": 3.197186999772555e-05, |
| "loss": 0.4029, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.8298144593730008, |
| "grad_norm": 0.589112376636283, |
| "learning_rate": 3.194317183405573e-05, |
| "loss": 0.4563, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.8323736404350608, |
| "grad_norm": 0.5161326824368248, |
| "learning_rate": 3.191443539980374e-05, |
| "loss": 0.4556, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.8349328214971208, |
| "grad_norm": 0.464424728984026, |
| "learning_rate": 3.188566078705235e-05, |
| "loss": 0.4044, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.837492002559181, |
| "grad_norm": 0.47067836600914265, |
| "learning_rate": 3.1856848088006636e-05, |
| "loss": 0.4335, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.8400511836212412, |
| "grad_norm": 0.5769491747912345, |
| "learning_rate": 3.182799739499371e-05, |
| "loss": 0.4407, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.8426103646833014, |
| "grad_norm": 0.4932465377591071, |
| "learning_rate": 3.1799108800462466e-05, |
| "loss": 0.4328, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.8451695457453616, |
| "grad_norm": 0.488480860113565, |
| "learning_rate": 3.177018239698322e-05, |
| "loss": 0.4235, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.8477287268074216, |
| "grad_norm": 0.48833589337954714, |
| "learning_rate": 3.1741218277247466e-05, |
| "loss": 0.4132, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.8502879078694816, |
| "grad_norm": 0.39099920314827113, |
| "learning_rate": 3.1712216534067536e-05, |
| "loss": 0.4265, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.8528470889315418, |
| "grad_norm": 0.45542998139885993, |
| "learning_rate": 3.168317726037634e-05, |
| "loss": 0.3971, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.855406269993602, |
| "grad_norm": 0.46738782533250195, |
| "learning_rate": 3.1654100549227024e-05, |
| "loss": 0.4559, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.8579654510556622, |
| "grad_norm": 0.40700257974662957, |
| "learning_rate": 3.1624986493792735e-05, |
| "loss": 0.4135, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.8605246321177225, |
| "grad_norm": 0.4797435460256252, |
| "learning_rate": 3.159583518736625e-05, |
| "loss": 0.4463, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.8630838131797824, |
| "grad_norm": 0.47740901056002083, |
| "learning_rate": 3.156664672335973e-05, |
| "loss": 0.3884, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.8656429942418427, |
| "grad_norm": 0.49631875720308977, |
| "learning_rate": 3.153742119530441e-05, |
| "loss": 0.4162, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.8682021753039026, |
| "grad_norm": 0.49356825084947964, |
| "learning_rate": 3.1508158696850275e-05, |
| "loss": 0.4329, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.8707613563659629, |
| "grad_norm": 0.4599036508089157, |
| "learning_rate": 3.1478859321765796e-05, |
| "loss": 0.428, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.873320537428023, |
| "grad_norm": 0.48294927528675924, |
| "learning_rate": 3.144952316393758e-05, |
| "loss": 0.4058, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.8758797184900833, |
| "grad_norm": 0.5890205448596298, |
| "learning_rate": 3.142015031737016e-05, |
| "loss": 0.4776, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.8784388995521433, |
| "grad_norm": 0.4923421153314979, |
| "learning_rate": 3.139074087618556e-05, |
| "loss": 0.4045, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.8809980806142035, |
| "grad_norm": 0.5087618925492778, |
| "learning_rate": 3.136129493462312e-05, |
| "loss": 0.4275, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.8835572616762635, |
| "grad_norm": 0.5648227631500222, |
| "learning_rate": 3.133181258703912e-05, |
| "loss": 0.4727, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.8861164427383237, |
| "grad_norm": 0.5234994479526746, |
| "learning_rate": 3.1302293927906516e-05, |
| "loss": 0.3967, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.888675623800384, |
| "grad_norm": 0.6807348510101979, |
| "learning_rate": 3.1272739051814594e-05, |
| "loss": 0.4551, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.891234804862444, |
| "grad_norm": 0.4969832272412207, |
| "learning_rate": 3.1243148053468715e-05, |
| "loss": 0.3773, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.8937939859245043, |
| "grad_norm": 0.6317866448620022, |
| "learning_rate": 3.121352102768998e-05, |
| "loss": 0.4389, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.8963531669865643, |
| "grad_norm": 0.5131091797313253, |
| "learning_rate": 3.1183858069414936e-05, |
| "loss": 0.4458, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.8989123480486243, |
| "grad_norm": 0.4972035729160381, |
| "learning_rate": 3.115415927369529e-05, |
| "loss": 0.4451, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.9014715291106845, |
| "grad_norm": 0.47895024578706524, |
| "learning_rate": 3.112442473569754e-05, |
| "loss": 0.4324, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.9040307101727447, |
| "grad_norm": 0.4691676491006599, |
| "learning_rate": 3.109465455070278e-05, |
| "loss": 0.4035, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.906589891234805, |
| "grad_norm": 0.4736286307947326, |
| "learning_rate": 3.106484881410628e-05, |
| "loss": 0.4446, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.9091490722968651, |
| "grad_norm": 0.4359831705290721, |
| "learning_rate": 3.103500762141725e-05, |
| "loss": 0.3829, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.9117082533589251, |
| "grad_norm": 0.462916560551583, |
| "learning_rate": 3.1005131068258506e-05, |
| "loss": 0.4107, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.9142674344209851, |
| "grad_norm": 0.47173985630781595, |
| "learning_rate": 3.09752192503662e-05, |
| "loss": 0.3903, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.9168266154830453, |
| "grad_norm": 0.43246703137114556, |
| "learning_rate": 3.094527226358945e-05, |
| "loss": 0.4091, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.9193857965451055, |
| "grad_norm": 0.5056443186885541, |
| "learning_rate": 3.091529020389009e-05, |
| "loss": 0.4837, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.9219449776071658, |
| "grad_norm": 0.49376735973598973, |
| "learning_rate": 3.088527316734235e-05, |
| "loss": 0.4124, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.924504158669226, |
| "grad_norm": 0.5428642850508197, |
| "learning_rate": 3.08552212501325e-05, |
| "loss": 0.4304, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.927063339731286, |
| "grad_norm": 0.5365719180678239, |
| "learning_rate": 3.082513454855863e-05, |
| "loss": 0.405, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.9296225207933462, |
| "grad_norm": 0.4559502049703374, |
| "learning_rate": 3.079501315903026e-05, |
| "loss": 0.445, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.9321817018554062, |
| "grad_norm": 0.5222902812164878, |
| "learning_rate": 3.076485717806808e-05, |
| "loss": 0.3726, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.9347408829174664, |
| "grad_norm": 0.5737561385558596, |
| "learning_rate": 3.073466670230361e-05, |
| "loss": 0.4588, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.9373000639795266, |
| "grad_norm": 0.43383831135928497, |
| "learning_rate": 3.070444182847891e-05, |
| "loss": 0.4006, |
| "step": 757 |
| }, |
| { |
| "epoch": 1.9398592450415868, |
| "grad_norm": 0.49738529963698463, |
| "learning_rate": 3.067418265344628e-05, |
| "loss": 0.404, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.9424184261036468, |
| "grad_norm": 0.4779872060995513, |
| "learning_rate": 3.0643889274167926e-05, |
| "loss": 0.4642, |
| "step": 759 |
| }, |
| { |
| "epoch": 1.944977607165707, |
| "grad_norm": 0.4501703649941174, |
| "learning_rate": 3.061356178771564e-05, |
| "loss": 0.3845, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.947536788227767, |
| "grad_norm": 0.566851781049989, |
| "learning_rate": 3.058320029127052e-05, |
| "loss": 0.4603, |
| "step": 761 |
| }, |
| { |
| "epoch": 1.9500959692898272, |
| "grad_norm": 0.41716803055724166, |
| "learning_rate": 3.055280488212266e-05, |
| "loss": 0.3988, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.9526551503518874, |
| "grad_norm": 0.5321354765650695, |
| "learning_rate": 3.052237565767079e-05, |
| "loss": 0.4633, |
| "step": 763 |
| }, |
| { |
| "epoch": 1.9552143314139476, |
| "grad_norm": 0.5101148541262678, |
| "learning_rate": 3.0491912715422047e-05, |
| "loss": 0.4154, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.9577735124760078, |
| "grad_norm": 0.44138127982821407, |
| "learning_rate": 3.0461416152991555e-05, |
| "loss": 0.3971, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.9603326935380678, |
| "grad_norm": 0.5119670448118282, |
| "learning_rate": 3.043088606810221e-05, |
| "loss": 0.4344, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.9628918746001278, |
| "grad_norm": 0.5844510240848945, |
| "learning_rate": 3.0400322558584308e-05, |
| "loss": 0.4369, |
| "step": 767 |
| }, |
| { |
| "epoch": 1.965451055662188, |
| "grad_norm": 0.45663704360586077, |
| "learning_rate": 3.0369725722375274e-05, |
| "loss": 0.4666, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.9680102367242482, |
| "grad_norm": 0.539565481061931, |
| "learning_rate": 3.0339095657519292e-05, |
| "loss": 0.4359, |
| "step": 769 |
| }, |
| { |
| "epoch": 1.9705694177863085, |
| "grad_norm": 0.5385392681842599, |
| "learning_rate": 3.0308432462167045e-05, |
| "loss": 0.4264, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.9731285988483687, |
| "grad_norm": 0.432889165826209, |
| "learning_rate": 3.0277736234575378e-05, |
| "loss": 0.3845, |
| "step": 771 |
| }, |
| { |
| "epoch": 1.9756877799104287, |
| "grad_norm": 0.5443873180170078, |
| "learning_rate": 3.0247007073106976e-05, |
| "loss": 0.406, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.9782469609724886, |
| "grad_norm": 0.5012354762450505, |
| "learning_rate": 3.0216245076230062e-05, |
| "loss": 0.4334, |
| "step": 773 |
| }, |
| { |
| "epoch": 1.9808061420345489, |
| "grad_norm": 0.5232208647955975, |
| "learning_rate": 3.0185450342518075e-05, |
| "loss": 0.4268, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.983365323096609, |
| "grad_norm": 0.4513266845951912, |
| "learning_rate": 3.015462297064936e-05, |
| "loss": 0.3783, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.9859245041586693, |
| "grad_norm": 0.5054305167039745, |
| "learning_rate": 3.0123763059406835e-05, |
| "loss": 0.4148, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.9884836852207295, |
| "grad_norm": 0.47543662649122564, |
| "learning_rate": 3.009287070767771e-05, |
| "loss": 0.4083, |
| "step": 777 |
| }, |
| { |
| "epoch": 1.9910428662827895, |
| "grad_norm": 0.5147396246458542, |
| "learning_rate": 3.0061946014453113e-05, |
| "loss": 0.406, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.9936020473448497, |
| "grad_norm": 0.537028842282906, |
| "learning_rate": 3.0030989078827848e-05, |
| "loss": 0.386, |
| "step": 779 |
| }, |
| { |
| "epoch": 1.9961612284069097, |
| "grad_norm": 0.4661586754448457, |
| "learning_rate": 3.0000000000000004e-05, |
| "loss": 0.4218, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.99872040946897, |
| "grad_norm": 0.4261635908664324, |
| "learning_rate": 2.9968978877270672e-05, |
| "loss": 0.4449, |
| "step": 781 |
| }, |
| { |
| "epoch": 2.00127959053103, |
| "grad_norm": 0.6837214787749436, |
| "learning_rate": 2.9937925810043654e-05, |
| "loss": 0.4318, |
| "step": 782 |
| }, |
| { |
| "epoch": 2.0038387715930903, |
| "grad_norm": 0.451386066705997, |
| "learning_rate": 2.990684089782507e-05, |
| "loss": 0.3612, |
| "step": 783 |
| }, |
| { |
| "epoch": 2.0063979526551505, |
| "grad_norm": 0.7078432089850583, |
| "learning_rate": 2.987572424022311e-05, |
| "loss": 0.3505, |
| "step": 784 |
| }, |
| { |
| "epoch": 2.0089571337172103, |
| "grad_norm": 0.5081289643217769, |
| "learning_rate": 2.98445759369477e-05, |
| "loss": 0.3149, |
| "step": 785 |
| }, |
| { |
| "epoch": 2.0115163147792705, |
| "grad_norm": 0.5849221231819475, |
| "learning_rate": 2.9813396087810134e-05, |
| "loss": 0.3514, |
| "step": 786 |
| }, |
| { |
| "epoch": 2.0140754958413307, |
| "grad_norm": 0.48371629559591783, |
| "learning_rate": 2.9782184792722845e-05, |
| "loss": 0.3339, |
| "step": 787 |
| }, |
| { |
| "epoch": 2.016634676903391, |
| "grad_norm": 0.6063864453716905, |
| "learning_rate": 2.9750942151698968e-05, |
| "loss": 0.389, |
| "step": 788 |
| }, |
| { |
| "epoch": 2.019193857965451, |
| "grad_norm": 0.6277898887247666, |
| "learning_rate": 2.971966826485212e-05, |
| "loss": 0.3283, |
| "step": 789 |
| }, |
| { |
| "epoch": 2.0217530390275114, |
| "grad_norm": 0.5884137115714144, |
| "learning_rate": 2.9688363232396056e-05, |
| "loss": 0.3353, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.024312220089571, |
| "grad_norm": 0.5242949584836221, |
| "learning_rate": 2.9657027154644294e-05, |
| "loss": 0.3059, |
| "step": 791 |
| }, |
| { |
| "epoch": 2.0268714011516313, |
| "grad_norm": 0.5023037783914988, |
| "learning_rate": 2.962566013200986e-05, |
| "loss": 0.3433, |
| "step": 792 |
| }, |
| { |
| "epoch": 2.0294305822136915, |
| "grad_norm": 0.6201000972339953, |
| "learning_rate": 2.959426226500493e-05, |
| "loss": 0.318, |
| "step": 793 |
| }, |
| { |
| "epoch": 2.0319897632757518, |
| "grad_norm": 0.44395380528610195, |
| "learning_rate": 2.9562833654240518e-05, |
| "loss": 0.3401, |
| "step": 794 |
| }, |
| { |
| "epoch": 2.034548944337812, |
| "grad_norm": 0.43056205609112264, |
| "learning_rate": 2.9531374400426158e-05, |
| "loss": 0.2937, |
| "step": 795 |
| }, |
| { |
| "epoch": 2.037108125399872, |
| "grad_norm": 0.4803245756648518, |
| "learning_rate": 2.949988460436958e-05, |
| "loss": 0.3396, |
| "step": 796 |
| }, |
| { |
| "epoch": 2.0396673064619324, |
| "grad_norm": 0.41525308273215306, |
| "learning_rate": 2.946836436697636e-05, |
| "loss": 0.3508, |
| "step": 797 |
| }, |
| { |
| "epoch": 2.042226487523992, |
| "grad_norm": 0.4033224019153147, |
| "learning_rate": 2.943681378924964e-05, |
| "loss": 0.307, |
| "step": 798 |
| }, |
| { |
| "epoch": 2.0447856685860524, |
| "grad_norm": 0.4163421073725909, |
| "learning_rate": 2.94052329722898e-05, |
| "loss": 0.3145, |
| "step": 799 |
| }, |
| { |
| "epoch": 2.0473448496481126, |
| "grad_norm": 0.374364306892129, |
| "learning_rate": 2.9373622017294075e-05, |
| "loss": 0.3412, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.049904030710173, |
| "grad_norm": 0.3911100371820488, |
| "learning_rate": 2.934198102555631e-05, |
| "loss": 0.3046, |
| "step": 801 |
| }, |
| { |
| "epoch": 2.052463211772233, |
| "grad_norm": 0.4467542980832922, |
| "learning_rate": 2.9310310098466588e-05, |
| "loss": 0.291, |
| "step": 802 |
| }, |
| { |
| "epoch": 2.055022392834293, |
| "grad_norm": 0.42396484047274274, |
| "learning_rate": 2.92786093375109e-05, |
| "loss": 0.3268, |
| "step": 803 |
| }, |
| { |
| "epoch": 2.057581573896353, |
| "grad_norm": 0.52821272561634, |
| "learning_rate": 2.924687884427087e-05, |
| "loss": 0.3699, |
| "step": 804 |
| }, |
| { |
| "epoch": 2.060140754958413, |
| "grad_norm": 0.4278559612529404, |
| "learning_rate": 2.9215118720423375e-05, |
| "loss": 0.3389, |
| "step": 805 |
| }, |
| { |
| "epoch": 2.0626999360204734, |
| "grad_norm": 0.4868265035371802, |
| "learning_rate": 2.9183329067740235e-05, |
| "loss": 0.2993, |
| "step": 806 |
| }, |
| { |
| "epoch": 2.0652591170825336, |
| "grad_norm": 0.41003428936435155, |
| "learning_rate": 2.9151509988087912e-05, |
| "loss": 0.3138, |
| "step": 807 |
| }, |
| { |
| "epoch": 2.067818298144594, |
| "grad_norm": 0.4548780109348631, |
| "learning_rate": 2.911966158342713e-05, |
| "loss": 0.3298, |
| "step": 808 |
| }, |
| { |
| "epoch": 2.070377479206654, |
| "grad_norm": 0.42982485262636566, |
| "learning_rate": 2.9087783955812628e-05, |
| "loss": 0.3493, |
| "step": 809 |
| }, |
| { |
| "epoch": 2.072936660268714, |
| "grad_norm": 0.37659830424896135, |
| "learning_rate": 2.9055877207392752e-05, |
| "loss": 0.2905, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.075495841330774, |
| "grad_norm": 0.6047986137094586, |
| "learning_rate": 2.9023941440409164e-05, |
| "loss": 0.3921, |
| "step": 811 |
| }, |
| { |
| "epoch": 2.0780550223928342, |
| "grad_norm": 0.4024787680486599, |
| "learning_rate": 2.899197675719653e-05, |
| "loss": 0.3126, |
| "step": 812 |
| }, |
| { |
| "epoch": 2.0806142034548945, |
| "grad_norm": 0.4864391147176377, |
| "learning_rate": 2.8959983260182166e-05, |
| "loss": 0.3259, |
| "step": 813 |
| }, |
| { |
| "epoch": 2.0831733845169547, |
| "grad_norm": 0.4289885322757846, |
| "learning_rate": 2.8927961051885716e-05, |
| "loss": 0.3327, |
| "step": 814 |
| }, |
| { |
| "epoch": 2.085732565579015, |
| "grad_norm": 0.46469758441400516, |
| "learning_rate": 2.8895910234918828e-05, |
| "loss": 0.3566, |
| "step": 815 |
| }, |
| { |
| "epoch": 2.0882917466410746, |
| "grad_norm": 0.47717231725597103, |
| "learning_rate": 2.886383091198483e-05, |
| "loss": 0.3543, |
| "step": 816 |
| }, |
| { |
| "epoch": 2.090850927703135, |
| "grad_norm": 0.40073207808377775, |
| "learning_rate": 2.8831723185878382e-05, |
| "loss": 0.2954, |
| "step": 817 |
| }, |
| { |
| "epoch": 2.093410108765195, |
| "grad_norm": 0.5629907330757088, |
| "learning_rate": 2.8799587159485166e-05, |
| "loss": 0.3302, |
| "step": 818 |
| }, |
| { |
| "epoch": 2.0959692898272553, |
| "grad_norm": 0.5537969468557435, |
| "learning_rate": 2.876742293578155e-05, |
| "loss": 0.3323, |
| "step": 819 |
| }, |
| { |
| "epoch": 2.0985284708893155, |
| "grad_norm": 0.5750723337033808, |
| "learning_rate": 2.873523061783426e-05, |
| "loss": 0.3083, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.1010876519513757, |
| "grad_norm": 0.5333136369374486, |
| "learning_rate": 2.8703010308800034e-05, |
| "loss": 0.3516, |
| "step": 821 |
| }, |
| { |
| "epoch": 2.103646833013436, |
| "grad_norm": 0.5946698263030077, |
| "learning_rate": 2.8670762111925313e-05, |
| "loss": 0.3337, |
| "step": 822 |
| }, |
| { |
| "epoch": 2.1062060140754957, |
| "grad_norm": 0.4783241662438903, |
| "learning_rate": 2.863848613054591e-05, |
| "loss": 0.302, |
| "step": 823 |
| }, |
| { |
| "epoch": 2.108765195137556, |
| "grad_norm": 0.42094897221075406, |
| "learning_rate": 2.8606182468086654e-05, |
| "loss": 0.3739, |
| "step": 824 |
| }, |
| { |
| "epoch": 2.111324376199616, |
| "grad_norm": 0.5112995720272753, |
| "learning_rate": 2.8573851228061084e-05, |
| "loss": 0.3328, |
| "step": 825 |
| }, |
| { |
| "epoch": 2.1138835572616763, |
| "grad_norm": 0.4268589732083703, |
| "learning_rate": 2.8541492514071115e-05, |
| "loss": 0.3199, |
| "step": 826 |
| }, |
| { |
| "epoch": 2.1164427383237365, |
| "grad_norm": 0.40030928394161236, |
| "learning_rate": 2.850910642980668e-05, |
| "loss": 0.3229, |
| "step": 827 |
| }, |
| { |
| "epoch": 2.1190019193857967, |
| "grad_norm": 0.4208663528647155, |
| "learning_rate": 2.8476693079045432e-05, |
| "loss": 0.3475, |
| "step": 828 |
| }, |
| { |
| "epoch": 2.1215611004478565, |
| "grad_norm": 0.43920626817956737, |
| "learning_rate": 2.8444252565652397e-05, |
| "loss": 0.3395, |
| "step": 829 |
| }, |
| { |
| "epoch": 2.1241202815099167, |
| "grad_norm": 0.4129992695563009, |
| "learning_rate": 2.8411784993579633e-05, |
| "loss": 0.2742, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.126679462571977, |
| "grad_norm": 0.4454585391740596, |
| "learning_rate": 2.8379290466865906e-05, |
| "loss": 0.3328, |
| "step": 831 |
| }, |
| { |
| "epoch": 2.129238643634037, |
| "grad_norm": 0.46723501336721224, |
| "learning_rate": 2.834676908963636e-05, |
| "loss": 0.3379, |
| "step": 832 |
| }, |
| { |
| "epoch": 2.1317978246960974, |
| "grad_norm": 0.5238858504216463, |
| "learning_rate": 2.8314220966102177e-05, |
| "loss": 0.3621, |
| "step": 833 |
| }, |
| { |
| "epoch": 2.1343570057581576, |
| "grad_norm": 0.4375600366447412, |
| "learning_rate": 2.828164620056024e-05, |
| "loss": 0.3031, |
| "step": 834 |
| }, |
| { |
| "epoch": 2.1369161868202173, |
| "grad_norm": 0.4011400320445024, |
| "learning_rate": 2.8249044897392814e-05, |
| "loss": 0.3167, |
| "step": 835 |
| }, |
| { |
| "epoch": 2.1394753678822775, |
| "grad_norm": 0.48667210285852947, |
| "learning_rate": 2.8216417161067187e-05, |
| "loss": 0.3517, |
| "step": 836 |
| }, |
| { |
| "epoch": 2.1420345489443378, |
| "grad_norm": 0.540946535276379, |
| "learning_rate": 2.818376309613535e-05, |
| "loss": 0.3276, |
| "step": 837 |
| }, |
| { |
| "epoch": 2.144593730006398, |
| "grad_norm": 0.41449908449590483, |
| "learning_rate": 2.8151082807233684e-05, |
| "loss": 0.3429, |
| "step": 838 |
| }, |
| { |
| "epoch": 2.147152911068458, |
| "grad_norm": 0.4411596533715045, |
| "learning_rate": 2.811837639908257e-05, |
| "loss": 0.3064, |
| "step": 839 |
| }, |
| { |
| "epoch": 2.1497120921305184, |
| "grad_norm": 0.4049429223396906, |
| "learning_rate": 2.80856439764861e-05, |
| "loss": 0.3212, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.1522712731925786, |
| "grad_norm": 0.41603945550088506, |
| "learning_rate": 2.8052885644331742e-05, |
| "loss": 0.3097, |
| "step": 841 |
| }, |
| { |
| "epoch": 2.1548304542546384, |
| "grad_norm": 0.526968199578847, |
| "learning_rate": 2.8020101507589958e-05, |
| "loss": 0.3547, |
| "step": 842 |
| }, |
| { |
| "epoch": 2.1573896353166986, |
| "grad_norm": 0.38305125359444786, |
| "learning_rate": 2.798729167131391e-05, |
| "loss": 0.3027, |
| "step": 843 |
| }, |
| { |
| "epoch": 2.159948816378759, |
| "grad_norm": 0.6336647706261161, |
| "learning_rate": 2.795445624063913e-05, |
| "loss": 0.3806, |
| "step": 844 |
| }, |
| { |
| "epoch": 2.162507997440819, |
| "grad_norm": 0.44531937519080506, |
| "learning_rate": 2.792159532078314e-05, |
| "loss": 0.3323, |
| "step": 845 |
| }, |
| { |
| "epoch": 2.165067178502879, |
| "grad_norm": 0.44290733063906507, |
| "learning_rate": 2.7888709017045146e-05, |
| "loss": 0.3237, |
| "step": 846 |
| }, |
| { |
| "epoch": 2.167626359564939, |
| "grad_norm": 0.4370183293736626, |
| "learning_rate": 2.7855797434805695e-05, |
| "loss": 0.338, |
| "step": 847 |
| }, |
| { |
| "epoch": 2.170185540626999, |
| "grad_norm": 0.4164027353779343, |
| "learning_rate": 2.782286067952634e-05, |
| "loss": 0.3278, |
| "step": 848 |
| }, |
| { |
| "epoch": 2.1727447216890594, |
| "grad_norm": 0.49409168279966853, |
| "learning_rate": 2.7789898856749297e-05, |
| "loss": 0.3568, |
| "step": 849 |
| }, |
| { |
| "epoch": 2.1753039027511196, |
| "grad_norm": 0.43481015195691675, |
| "learning_rate": 2.77569120720971e-05, |
| "loss": 0.3356, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.17786308381318, |
| "grad_norm": 0.4983975940240211, |
| "learning_rate": 2.772390043127228e-05, |
| "loss": 0.3373, |
| "step": 851 |
| }, |
| { |
| "epoch": 2.18042226487524, |
| "grad_norm": 0.43692987588400956, |
| "learning_rate": 2.7690864040057023e-05, |
| "loss": 0.3108, |
| "step": 852 |
| }, |
| { |
| "epoch": 2.1829814459373003, |
| "grad_norm": 0.44866752393409093, |
| "learning_rate": 2.7657803004312797e-05, |
| "loss": 0.3347, |
| "step": 853 |
| }, |
| { |
| "epoch": 2.18554062699936, |
| "grad_norm": 0.5027966160971863, |
| "learning_rate": 2.7624717429980067e-05, |
| "loss": 0.3536, |
| "step": 854 |
| }, |
| { |
| "epoch": 2.1880998080614202, |
| "grad_norm": 0.4093435861731407, |
| "learning_rate": 2.7591607423077932e-05, |
| "loss": 0.2917, |
| "step": 855 |
| }, |
| { |
| "epoch": 2.1906589891234804, |
| "grad_norm": 0.49615217011265117, |
| "learning_rate": 2.755847308970376e-05, |
| "loss": 0.3502, |
| "step": 856 |
| }, |
| { |
| "epoch": 2.1932181701855407, |
| "grad_norm": 0.3967350798943657, |
| "learning_rate": 2.752531453603288e-05, |
| "loss": 0.3177, |
| "step": 857 |
| }, |
| { |
| "epoch": 2.195777351247601, |
| "grad_norm": 0.4774434168760954, |
| "learning_rate": 2.7492131868318247e-05, |
| "loss": 0.3616, |
| "step": 858 |
| }, |
| { |
| "epoch": 2.198336532309661, |
| "grad_norm": 0.4533916271890891, |
| "learning_rate": 2.7458925192890057e-05, |
| "loss": 0.3235, |
| "step": 859 |
| }, |
| { |
| "epoch": 2.200895713371721, |
| "grad_norm": 0.4666533097303878, |
| "learning_rate": 2.7425694616155474e-05, |
| "loss": 0.362, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.203454894433781, |
| "grad_norm": 0.5256588022807218, |
| "learning_rate": 2.739244024459822e-05, |
| "loss": 0.3577, |
| "step": 861 |
| }, |
| { |
| "epoch": 2.2060140754958413, |
| "grad_norm": 0.42484253391437565, |
| "learning_rate": 2.7359162184778276e-05, |
| "loss": 0.3297, |
| "step": 862 |
| }, |
| { |
| "epoch": 2.2085732565579015, |
| "grad_norm": 0.44868873474156656, |
| "learning_rate": 2.7325860543331533e-05, |
| "loss": 0.336, |
| "step": 863 |
| }, |
| { |
| "epoch": 2.2111324376199617, |
| "grad_norm": 0.4732703525530866, |
| "learning_rate": 2.7292535426969436e-05, |
| "loss": 0.3057, |
| "step": 864 |
| }, |
| { |
| "epoch": 2.213691618682022, |
| "grad_norm": 0.477868292020973, |
| "learning_rate": 2.7259186942478656e-05, |
| "loss": 0.308, |
| "step": 865 |
| }, |
| { |
| "epoch": 2.216250799744082, |
| "grad_norm": 0.4842331398481323, |
| "learning_rate": 2.7225815196720767e-05, |
| "loss": 0.3145, |
| "step": 866 |
| }, |
| { |
| "epoch": 2.218809980806142, |
| "grad_norm": 0.5381115021925195, |
| "learning_rate": 2.7192420296631835e-05, |
| "loss": 0.3798, |
| "step": 867 |
| }, |
| { |
| "epoch": 2.221369161868202, |
| "grad_norm": 0.4236836146413328, |
| "learning_rate": 2.7159002349222178e-05, |
| "loss": 0.3237, |
| "step": 868 |
| }, |
| { |
| "epoch": 2.2239283429302623, |
| "grad_norm": 0.5479188963928002, |
| "learning_rate": 2.7125561461575924e-05, |
| "loss": 0.3832, |
| "step": 869 |
| }, |
| { |
| "epoch": 2.2264875239923225, |
| "grad_norm": 0.45518106829461097, |
| "learning_rate": 2.7092097740850712e-05, |
| "loss": 0.3048, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.2290467050543827, |
| "grad_norm": 0.4441896488412185, |
| "learning_rate": 2.7058611294277378e-05, |
| "loss": 0.3141, |
| "step": 871 |
| }, |
| { |
| "epoch": 2.2316058861164425, |
| "grad_norm": 0.43355994332482317, |
| "learning_rate": 2.702510222915956e-05, |
| "loss": 0.3107, |
| "step": 872 |
| }, |
| { |
| "epoch": 2.2341650671785027, |
| "grad_norm": 0.5312269604580118, |
| "learning_rate": 2.6991570652873357e-05, |
| "loss": 0.3404, |
| "step": 873 |
| }, |
| { |
| "epoch": 2.236724248240563, |
| "grad_norm": 0.3995110246728028, |
| "learning_rate": 2.6958016672867048e-05, |
| "loss": 0.3122, |
| "step": 874 |
| }, |
| { |
| "epoch": 2.239283429302623, |
| "grad_norm": 0.4611276323390611, |
| "learning_rate": 2.692444039666066e-05, |
| "loss": 0.317, |
| "step": 875 |
| }, |
| { |
| "epoch": 2.2418426103646834, |
| "grad_norm": 0.5307192324613822, |
| "learning_rate": 2.6890841931845674e-05, |
| "loss": 0.3579, |
| "step": 876 |
| }, |
| { |
| "epoch": 2.2444017914267436, |
| "grad_norm": 0.41152344841446314, |
| "learning_rate": 2.68572213860847e-05, |
| "loss": 0.3278, |
| "step": 877 |
| }, |
| { |
| "epoch": 2.2469609724888038, |
| "grad_norm": 0.439703599513908, |
| "learning_rate": 2.6823578867111072e-05, |
| "loss": 0.3207, |
| "step": 878 |
| }, |
| { |
| "epoch": 2.2495201535508635, |
| "grad_norm": 0.48436216914849156, |
| "learning_rate": 2.6789914482728546e-05, |
| "loss": 0.3923, |
| "step": 879 |
| }, |
| { |
| "epoch": 2.2520793346129238, |
| "grad_norm": 0.4065191432791332, |
| "learning_rate": 2.6756228340810946e-05, |
| "loss": 0.3092, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.254638515674984, |
| "grad_norm": 0.45257368911274026, |
| "learning_rate": 2.6722520549301813e-05, |
| "loss": 0.3201, |
| "step": 881 |
| }, |
| { |
| "epoch": 2.257197696737044, |
| "grad_norm": 0.47676366709922463, |
| "learning_rate": 2.6688791216214064e-05, |
| "loss": 0.3552, |
| "step": 882 |
| }, |
| { |
| "epoch": 2.2597568777991044, |
| "grad_norm": 0.4388935470431639, |
| "learning_rate": 2.6655040449629646e-05, |
| "loss": 0.3117, |
| "step": 883 |
| }, |
| { |
| "epoch": 2.2623160588611646, |
| "grad_norm": 0.49852041258030133, |
| "learning_rate": 2.6621268357699165e-05, |
| "loss": 0.2986, |
| "step": 884 |
| }, |
| { |
| "epoch": 2.2648752399232244, |
| "grad_norm": 0.5574815275071192, |
| "learning_rate": 2.6587475048641596e-05, |
| "loss": 0.3652, |
| "step": 885 |
| }, |
| { |
| "epoch": 2.2674344209852846, |
| "grad_norm": 0.532498078005579, |
| "learning_rate": 2.655366063074388e-05, |
| "loss": 0.3361, |
| "step": 886 |
| }, |
| { |
| "epoch": 2.269993602047345, |
| "grad_norm": 0.4905390491427929, |
| "learning_rate": 2.6519825212360607e-05, |
| "loss": 0.2904, |
| "step": 887 |
| }, |
| { |
| "epoch": 2.272552783109405, |
| "grad_norm": 0.4548356990918924, |
| "learning_rate": 2.6485968901913658e-05, |
| "loss": 0.3383, |
| "step": 888 |
| }, |
| { |
| "epoch": 2.275111964171465, |
| "grad_norm": 0.4527361560109495, |
| "learning_rate": 2.6452091807891855e-05, |
| "loss": 0.3395, |
| "step": 889 |
| }, |
| { |
| "epoch": 2.2776711452335254, |
| "grad_norm": 0.4444282648587205, |
| "learning_rate": 2.6418194038850634e-05, |
| "loss": 0.3155, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.2802303262955856, |
| "grad_norm": 0.37005966275705604, |
| "learning_rate": 2.6384275703411666e-05, |
| "loss": 0.3172, |
| "step": 891 |
| }, |
| { |
| "epoch": 2.2827895073576454, |
| "grad_norm": 0.4280292706382066, |
| "learning_rate": 2.635033691026253e-05, |
| "loss": 0.3643, |
| "step": 892 |
| }, |
| { |
| "epoch": 2.2853486884197056, |
| "grad_norm": 0.46336387808601265, |
| "learning_rate": 2.6316377768156366e-05, |
| "loss": 0.3516, |
| "step": 893 |
| }, |
| { |
| "epoch": 2.287907869481766, |
| "grad_norm": 0.3675246620437907, |
| "learning_rate": 2.6282398385911503e-05, |
| "loss": 0.2782, |
| "step": 894 |
| }, |
| { |
| "epoch": 2.290467050543826, |
| "grad_norm": 0.4188680082069283, |
| "learning_rate": 2.624839887241115e-05, |
| "loss": 0.3521, |
| "step": 895 |
| }, |
| { |
| "epoch": 2.2930262316058863, |
| "grad_norm": 0.42684239051457756, |
| "learning_rate": 2.6214379336603016e-05, |
| "loss": 0.2909, |
| "step": 896 |
| }, |
| { |
| "epoch": 2.295585412667946, |
| "grad_norm": 0.36815581001539444, |
| "learning_rate": 2.618033988749895e-05, |
| "loss": 0.3068, |
| "step": 897 |
| }, |
| { |
| "epoch": 2.2981445937300062, |
| "grad_norm": 0.4355098607462678, |
| "learning_rate": 2.614628063417464e-05, |
| "loss": 0.3561, |
| "step": 898 |
| }, |
| { |
| "epoch": 2.3007037747920664, |
| "grad_norm": 0.4278330160593537, |
| "learning_rate": 2.6112201685769224e-05, |
| "loss": 0.3265, |
| "step": 899 |
| }, |
| { |
| "epoch": 2.3032629558541267, |
| "grad_norm": 0.39014595589180573, |
| "learning_rate": 2.607810315148494e-05, |
| "loss": 0.3569, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.305822136916187, |
| "grad_norm": 0.4667698942028393, |
| "learning_rate": 2.60439851405868e-05, |
| "loss": 0.3631, |
| "step": 901 |
| }, |
| { |
| "epoch": 2.308381317978247, |
| "grad_norm": 0.41965973806882434, |
| "learning_rate": 2.600984776240222e-05, |
| "loss": 0.3248, |
| "step": 902 |
| }, |
| { |
| "epoch": 2.3109404990403073, |
| "grad_norm": 0.44724491924157506, |
| "learning_rate": 2.5975691126320678e-05, |
| "loss": 0.3854, |
| "step": 903 |
| }, |
| { |
| "epoch": 2.313499680102367, |
| "grad_norm": 0.46251594524874595, |
| "learning_rate": 2.5941515341793366e-05, |
| "loss": 0.3503, |
| "step": 904 |
| }, |
| { |
| "epoch": 2.3160588611644273, |
| "grad_norm": 0.35346075618792994, |
| "learning_rate": 2.5907320518332827e-05, |
| "loss": 0.3309, |
| "step": 905 |
| }, |
| { |
| "epoch": 2.3186180422264875, |
| "grad_norm": 0.3826935478844609, |
| "learning_rate": 2.587310676551262e-05, |
| "loss": 0.2894, |
| "step": 906 |
| }, |
| { |
| "epoch": 2.3211772232885477, |
| "grad_norm": 0.416397755264069, |
| "learning_rate": 2.5838874192966953e-05, |
| "loss": 0.3716, |
| "step": 907 |
| }, |
| { |
| "epoch": 2.323736404350608, |
| "grad_norm": 0.3982851139464845, |
| "learning_rate": 2.5804622910390348e-05, |
| "loss": 0.2833, |
| "step": 908 |
| }, |
| { |
| "epoch": 2.326295585412668, |
| "grad_norm": 0.4591104893849011, |
| "learning_rate": 2.5770353027537276e-05, |
| "loss": 0.3277, |
| "step": 909 |
| }, |
| { |
| "epoch": 2.328854766474728, |
| "grad_norm": 0.382725152261151, |
| "learning_rate": 2.5736064654221808e-05, |
| "loss": 0.323, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.331413947536788, |
| "grad_norm": 0.5087928821054611, |
| "learning_rate": 2.5701757900317277e-05, |
| "loss": 0.3314, |
| "step": 911 |
| }, |
| { |
| "epoch": 2.3339731285988483, |
| "grad_norm": 0.3914001278691039, |
| "learning_rate": 2.5667432875755904e-05, |
| "loss": 0.3508, |
| "step": 912 |
| }, |
| { |
| "epoch": 2.3365323096609085, |
| "grad_norm": 0.4510480545622787, |
| "learning_rate": 2.5633089690528455e-05, |
| "loss": 0.3529, |
| "step": 913 |
| }, |
| { |
| "epoch": 2.3390914907229687, |
| "grad_norm": 0.4134813709280613, |
| "learning_rate": 2.559872845468391e-05, |
| "loss": 0.3286, |
| "step": 914 |
| }, |
| { |
| "epoch": 2.341650671785029, |
| "grad_norm": 0.40105410165144934, |
| "learning_rate": 2.5564349278329056e-05, |
| "loss": 0.2852, |
| "step": 915 |
| }, |
| { |
| "epoch": 2.344209852847089, |
| "grad_norm": 0.49998212235296036, |
| "learning_rate": 2.5529952271628192e-05, |
| "loss": 0.2916, |
| "step": 916 |
| }, |
| { |
| "epoch": 2.346769033909149, |
| "grad_norm": 0.46734623561200184, |
| "learning_rate": 2.5495537544802757e-05, |
| "loss": 0.3497, |
| "step": 917 |
| }, |
| { |
| "epoch": 2.349328214971209, |
| "grad_norm": 0.43677669206755015, |
| "learning_rate": 2.5461105208130953e-05, |
| "loss": 0.359, |
| "step": 918 |
| }, |
| { |
| "epoch": 2.3518873960332694, |
| "grad_norm": 0.49010454865810016, |
| "learning_rate": 2.542665537194742e-05, |
| "loss": 0.3368, |
| "step": 919 |
| }, |
| { |
| "epoch": 2.3544465770953296, |
| "grad_norm": 0.370850360816377, |
| "learning_rate": 2.539218814664288e-05, |
| "loss": 0.3222, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.3570057581573898, |
| "grad_norm": 0.46886497417633327, |
| "learning_rate": 2.5357703642663766e-05, |
| "loss": 0.3633, |
| "step": 921 |
| }, |
| { |
| "epoch": 2.3595649392194495, |
| "grad_norm": 0.4227514997155462, |
| "learning_rate": 2.5323201970511883e-05, |
| "loss": 0.3497, |
| "step": 922 |
| }, |
| { |
| "epoch": 2.3621241202815098, |
| "grad_norm": 0.4015401134012503, |
| "learning_rate": 2.528868324074405e-05, |
| "loss": 0.3076, |
| "step": 923 |
| }, |
| { |
| "epoch": 2.36468330134357, |
| "grad_norm": 0.4012146844135177, |
| "learning_rate": 2.525414756397174e-05, |
| "loss": 0.3117, |
| "step": 924 |
| }, |
| { |
| "epoch": 2.36724248240563, |
| "grad_norm": 0.3724913651532696, |
| "learning_rate": 2.521959505086075e-05, |
| "loss": 0.2948, |
| "step": 925 |
| }, |
| { |
| "epoch": 2.3698016634676904, |
| "grad_norm": 0.44515489828057647, |
| "learning_rate": 2.5185025812130794e-05, |
| "loss": 0.3624, |
| "step": 926 |
| }, |
| { |
| "epoch": 2.3723608445297506, |
| "grad_norm": 0.4333489454322345, |
| "learning_rate": 2.5150439958555205e-05, |
| "loss": 0.3254, |
| "step": 927 |
| }, |
| { |
| "epoch": 2.374920025591811, |
| "grad_norm": 0.4284602049159506, |
| "learning_rate": 2.5115837600960564e-05, |
| "loss": 0.3232, |
| "step": 928 |
| }, |
| { |
| "epoch": 2.3774792066538706, |
| "grad_norm": 0.4327763714080197, |
| "learning_rate": 2.5081218850226315e-05, |
| "loss": 0.3213, |
| "step": 929 |
| }, |
| { |
| "epoch": 2.380038387715931, |
| "grad_norm": 0.4092391708238846, |
| "learning_rate": 2.5046583817284437e-05, |
| "loss": 0.3645, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.382597568777991, |
| "grad_norm": 0.3874157650273361, |
| "learning_rate": 2.5011932613119098e-05, |
| "loss": 0.3546, |
| "step": 931 |
| }, |
| { |
| "epoch": 2.385156749840051, |
| "grad_norm": 0.4154060512286611, |
| "learning_rate": 2.497726534876627e-05, |
| "loss": 0.3724, |
| "step": 932 |
| }, |
| { |
| "epoch": 2.3877159309021114, |
| "grad_norm": 0.3520365071455179, |
| "learning_rate": 2.4942582135313393e-05, |
| "loss": 0.3171, |
| "step": 933 |
| }, |
| { |
| "epoch": 2.3902751119641716, |
| "grad_norm": 0.4129362868719995, |
| "learning_rate": 2.490788308389902e-05, |
| "loss": 0.3081, |
| "step": 934 |
| }, |
| { |
| "epoch": 2.3928342930262314, |
| "grad_norm": 0.3826332240853489, |
| "learning_rate": 2.487316830571244e-05, |
| "loss": 0.3167, |
| "step": 935 |
| }, |
| { |
| "epoch": 2.3953934740882916, |
| "grad_norm": 0.41073512437749543, |
| "learning_rate": 2.4838437911993355e-05, |
| "loss": 0.2872, |
| "step": 936 |
| }, |
| { |
| "epoch": 2.397952655150352, |
| "grad_norm": 0.41060249122236425, |
| "learning_rate": 2.48036920140315e-05, |
| "loss": 0.3331, |
| "step": 937 |
| }, |
| { |
| "epoch": 2.400511836212412, |
| "grad_norm": 0.39939347922246277, |
| "learning_rate": 2.4768930723166266e-05, |
| "loss": 0.309, |
| "step": 938 |
| }, |
| { |
| "epoch": 2.4030710172744723, |
| "grad_norm": 0.44800610692896503, |
| "learning_rate": 2.473415415078642e-05, |
| "loss": 0.3301, |
| "step": 939 |
| }, |
| { |
| "epoch": 2.4056301983365325, |
| "grad_norm": 0.45045037484262557, |
| "learning_rate": 2.4699362408329646e-05, |
| "loss": 0.3545, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.4081893793985927, |
| "grad_norm": 0.4516520833709128, |
| "learning_rate": 2.466455560728227e-05, |
| "loss": 0.3219, |
| "step": 941 |
| }, |
| { |
| "epoch": 2.4107485604606524, |
| "grad_norm": 0.4140396756771499, |
| "learning_rate": 2.4629733859178867e-05, |
| "loss": 0.3312, |
| "step": 942 |
| }, |
| { |
| "epoch": 2.4133077415227127, |
| "grad_norm": 0.4242325070475781, |
| "learning_rate": 2.4594897275601887e-05, |
| "loss": 0.3657, |
| "step": 943 |
| }, |
| { |
| "epoch": 2.415866922584773, |
| "grad_norm": 0.3534056791478892, |
| "learning_rate": 2.456004596818135e-05, |
| "loss": 0.2875, |
| "step": 944 |
| }, |
| { |
| "epoch": 2.418426103646833, |
| "grad_norm": 0.4504625178937038, |
| "learning_rate": 2.4525180048594452e-05, |
| "loss": 0.3947, |
| "step": 945 |
| }, |
| { |
| "epoch": 2.4209852847088933, |
| "grad_norm": 0.3713456955088067, |
| "learning_rate": 2.4490299628565168e-05, |
| "loss": 0.3365, |
| "step": 946 |
| }, |
| { |
| "epoch": 2.423544465770953, |
| "grad_norm": 0.4139332630376369, |
| "learning_rate": 2.4455404819864e-05, |
| "loss": 0.3213, |
| "step": 947 |
| }, |
| { |
| "epoch": 2.4261036468330133, |
| "grad_norm": 0.39284365253142334, |
| "learning_rate": 2.4420495734307527e-05, |
| "loss": 0.3707, |
| "step": 948 |
| }, |
| { |
| "epoch": 2.4286628278950735, |
| "grad_norm": 0.44240961049247096, |
| "learning_rate": 2.4385572483758066e-05, |
| "loss": 0.373, |
| "step": 949 |
| }, |
| { |
| "epoch": 2.4312220089571337, |
| "grad_norm": 0.41468110454884644, |
| "learning_rate": 2.435063518012335e-05, |
| "loss": 0.3791, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.433781190019194, |
| "grad_norm": 0.38546461774505014, |
| "learning_rate": 2.4315683935356127e-05, |
| "loss": 0.3092, |
| "step": 951 |
| }, |
| { |
| "epoch": 2.436340371081254, |
| "grad_norm": 0.3973539449011059, |
| "learning_rate": 2.4280718861453814e-05, |
| "loss": 0.3537, |
| "step": 952 |
| }, |
| { |
| "epoch": 2.4388995521433143, |
| "grad_norm": 0.40087880001543535, |
| "learning_rate": 2.424574007045816e-05, |
| "loss": 0.3513, |
| "step": 953 |
| }, |
| { |
| "epoch": 2.441458733205374, |
| "grad_norm": 0.4363352682087938, |
| "learning_rate": 2.421074767445485e-05, |
| "loss": 0.3168, |
| "step": 954 |
| }, |
| { |
| "epoch": 2.4440179142674343, |
| "grad_norm": 0.387588478700538, |
| "learning_rate": 2.4175741785573177e-05, |
| "loss": 0.3156, |
| "step": 955 |
| }, |
| { |
| "epoch": 2.4465770953294945, |
| "grad_norm": 0.43136617250905906, |
| "learning_rate": 2.4140722515985666e-05, |
| "loss": 0.3396, |
| "step": 956 |
| }, |
| { |
| "epoch": 2.4491362763915547, |
| "grad_norm": 0.4356259978949205, |
| "learning_rate": 2.4105689977907722e-05, |
| "loss": 0.3633, |
| "step": 957 |
| }, |
| { |
| "epoch": 2.451695457453615, |
| "grad_norm": 0.3939046327707216, |
| "learning_rate": 2.407064428359726e-05, |
| "loss": 0.3367, |
| "step": 958 |
| }, |
| { |
| "epoch": 2.454254638515675, |
| "grad_norm": 0.44530753935780215, |
| "learning_rate": 2.4035585545354353e-05, |
| "loss": 0.2652, |
| "step": 959 |
| }, |
| { |
| "epoch": 2.456813819577735, |
| "grad_norm": 0.39324736708789354, |
| "learning_rate": 2.4000513875520892e-05, |
| "loss": 0.3497, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.459373000639795, |
| "grad_norm": 0.3745102508002373, |
| "learning_rate": 2.396542938648018e-05, |
| "loss": 0.351, |
| "step": 961 |
| }, |
| { |
| "epoch": 2.4619321817018553, |
| "grad_norm": 0.433148063417755, |
| "learning_rate": 2.3930332190656604e-05, |
| "loss": 0.3226, |
| "step": 962 |
| }, |
| { |
| "epoch": 2.4644913627639156, |
| "grad_norm": 0.5060978327975577, |
| "learning_rate": 2.3895222400515282e-05, |
| "loss": 0.3944, |
| "step": 963 |
| }, |
| { |
| "epoch": 2.4670505438259758, |
| "grad_norm": 0.3831333684566055, |
| "learning_rate": 2.3860100128561677e-05, |
| "loss": 0.303, |
| "step": 964 |
| }, |
| { |
| "epoch": 2.469609724888036, |
| "grad_norm": 0.4834696949807748, |
| "learning_rate": 2.3824965487341247e-05, |
| "loss": 0.36, |
| "step": 965 |
| }, |
| { |
| "epoch": 2.472168905950096, |
| "grad_norm": 0.48539696684918826, |
| "learning_rate": 2.3789818589439094e-05, |
| "loss": 0.3418, |
| "step": 966 |
| }, |
| { |
| "epoch": 2.474728087012156, |
| "grad_norm": 0.3663525699221002, |
| "learning_rate": 2.375465954747959e-05, |
| "loss": 0.2906, |
| "step": 967 |
| }, |
| { |
| "epoch": 2.477287268074216, |
| "grad_norm": 0.4444877062926493, |
| "learning_rate": 2.371948847412602e-05, |
| "loss": 0.3281, |
| "step": 968 |
| }, |
| { |
| "epoch": 2.4798464491362764, |
| "grad_norm": 0.37267946857207057, |
| "learning_rate": 2.3684305482080233e-05, |
| "loss": 0.3214, |
| "step": 969 |
| }, |
| { |
| "epoch": 2.4824056301983366, |
| "grad_norm": 0.422309895496103, |
| "learning_rate": 2.3649110684082258e-05, |
| "loss": 0.3309, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.484964811260397, |
| "grad_norm": 0.366896575024139, |
| "learning_rate": 2.361390419290995e-05, |
| "loss": 0.3359, |
| "step": 971 |
| }, |
| { |
| "epoch": 2.4875239923224566, |
| "grad_norm": 0.4252498965109737, |
| "learning_rate": 2.357868612137866e-05, |
| "loss": 0.3162, |
| "step": 972 |
| }, |
| { |
| "epoch": 2.490083173384517, |
| "grad_norm": 0.4454769676233995, |
| "learning_rate": 2.3543456582340815e-05, |
| "loss": 0.3458, |
| "step": 973 |
| }, |
| { |
| "epoch": 2.492642354446577, |
| "grad_norm": 0.504528012046428, |
| "learning_rate": 2.3508215688685607e-05, |
| "loss": 0.3783, |
| "step": 974 |
| }, |
| { |
| "epoch": 2.495201535508637, |
| "grad_norm": 0.3791982649378316, |
| "learning_rate": 2.3472963553338614e-05, |
| "loss": 0.3439, |
| "step": 975 |
| }, |
| { |
| "epoch": 2.4977607165706974, |
| "grad_norm": 0.3958397655771158, |
| "learning_rate": 2.3437700289261417e-05, |
| "loss": 0.3098, |
| "step": 976 |
| }, |
| { |
| "epoch": 2.5003198976327576, |
| "grad_norm": 0.4716678361651927, |
| "learning_rate": 2.3402426009451288e-05, |
| "loss": 0.3442, |
| "step": 977 |
| }, |
| { |
| "epoch": 2.502879078694818, |
| "grad_norm": 0.41505752875646384, |
| "learning_rate": 2.3367140826940768e-05, |
| "loss": 0.3393, |
| "step": 978 |
| }, |
| { |
| "epoch": 2.505438259756878, |
| "grad_norm": 0.4831411264450984, |
| "learning_rate": 2.333184485479737e-05, |
| "loss": 0.3406, |
| "step": 979 |
| }, |
| { |
| "epoch": 2.507997440818938, |
| "grad_norm": 0.44259478488091053, |
| "learning_rate": 2.3296538206123134e-05, |
| "loss": 0.3498, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.510556621880998, |
| "grad_norm": 0.39650938475151654, |
| "learning_rate": 2.326122099405435e-05, |
| "loss": 0.3218, |
| "step": 981 |
| }, |
| { |
| "epoch": 2.5131158029430583, |
| "grad_norm": 0.44478690078566685, |
| "learning_rate": 2.3225893331761143e-05, |
| "loss": 0.3354, |
| "step": 982 |
| }, |
| { |
| "epoch": 2.5156749840051185, |
| "grad_norm": 0.4617579108787994, |
| "learning_rate": 2.319055533244712e-05, |
| "loss": 0.3689, |
| "step": 983 |
| }, |
| { |
| "epoch": 2.5182341650671782, |
| "grad_norm": 0.36510971786258006, |
| "learning_rate": 2.315520710934903e-05, |
| "loss": 0.3189, |
| "step": 984 |
| }, |
| { |
| "epoch": 2.5207933461292384, |
| "grad_norm": 0.5962978268524062, |
| "learning_rate": 2.311984877573636e-05, |
| "loss": 0.3785, |
| "step": 985 |
| }, |
| { |
| "epoch": 2.5233525271912987, |
| "grad_norm": 0.4286182290118198, |
| "learning_rate": 2.3084480444911006e-05, |
| "loss": 0.2969, |
| "step": 986 |
| }, |
| { |
| "epoch": 2.525911708253359, |
| "grad_norm": 0.406129472684799, |
| "learning_rate": 2.304910223020691e-05, |
| "loss": 0.3622, |
| "step": 987 |
| }, |
| { |
| "epoch": 2.528470889315419, |
| "grad_norm": 0.40352070167371706, |
| "learning_rate": 2.3013714244989665e-05, |
| "loss": 0.3003, |
| "step": 988 |
| }, |
| { |
| "epoch": 2.5310300703774793, |
| "grad_norm": 0.41853745249254193, |
| "learning_rate": 2.2978316602656183e-05, |
| "loss": 0.3545, |
| "step": 989 |
| }, |
| { |
| "epoch": 2.5335892514395395, |
| "grad_norm": 0.3751248697656993, |
| "learning_rate": 2.2942909416634326e-05, |
| "loss": 0.3317, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.5361484325015997, |
| "grad_norm": 0.38457641982344676, |
| "learning_rate": 2.290749280038252e-05, |
| "loss": 0.3186, |
| "step": 991 |
| }, |
| { |
| "epoch": 2.5387076135636595, |
| "grad_norm": 0.43410698680189885, |
| "learning_rate": 2.2872066867389434e-05, |
| "loss": 0.3819, |
| "step": 992 |
| }, |
| { |
| "epoch": 2.5412667946257197, |
| "grad_norm": 0.3827463318912182, |
| "learning_rate": 2.2836631731173577e-05, |
| "loss": 0.3428, |
| "step": 993 |
| }, |
| { |
| "epoch": 2.54382597568778, |
| "grad_norm": 0.3626009354081465, |
| "learning_rate": 2.2801187505282948e-05, |
| "loss": 0.3313, |
| "step": 994 |
| }, |
| { |
| "epoch": 2.54638515674984, |
| "grad_norm": 0.3879708806451702, |
| "learning_rate": 2.2765734303294666e-05, |
| "loss": 0.302, |
| "step": 995 |
| }, |
| { |
| "epoch": 2.5489443378119003, |
| "grad_norm": 0.3615193094932171, |
| "learning_rate": 2.2730272238814636e-05, |
| "loss": 0.3022, |
| "step": 996 |
| }, |
| { |
| "epoch": 2.55150351887396, |
| "grad_norm": 0.38109707078147037, |
| "learning_rate": 2.2694801425477136e-05, |
| "loss": 0.3199, |
| "step": 997 |
| }, |
| { |
| "epoch": 2.5540626999360203, |
| "grad_norm": 0.35624704442372485, |
| "learning_rate": 2.2659321976944507e-05, |
| "loss": 0.3394, |
| "step": 998 |
| }, |
| { |
| "epoch": 2.5566218809980805, |
| "grad_norm": 0.5236454693659701, |
| "learning_rate": 2.2623834006906732e-05, |
| "loss": 0.3254, |
| "step": 999 |
| }, |
| { |
| "epoch": 2.5591810620601407, |
| "grad_norm": 0.4793516094510245, |
| "learning_rate": 2.2588337629081107e-05, |
| "loss": 0.4122, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.561740243122201, |
| "grad_norm": 0.32519173651998734, |
| "learning_rate": 2.25528329572119e-05, |
| "loss": 0.2782, |
| "step": 1001 |
| }, |
| { |
| "epoch": 2.564299424184261, |
| "grad_norm": 0.47513564073653997, |
| "learning_rate": 2.25173201050699e-05, |
| "loss": 0.4075, |
| "step": 1002 |
| }, |
| { |
| "epoch": 2.5668586052463214, |
| "grad_norm": 0.34327531232567976, |
| "learning_rate": 2.248179918645216e-05, |
| "loss": 0.2602, |
| "step": 1003 |
| }, |
| { |
| "epoch": 2.5694177863083816, |
| "grad_norm": 0.42876499217691605, |
| "learning_rate": 2.2446270315181566e-05, |
| "loss": 0.3538, |
| "step": 1004 |
| }, |
| { |
| "epoch": 2.5719769673704413, |
| "grad_norm": 0.38643082957234787, |
| "learning_rate": 2.2410733605106462e-05, |
| "loss": 0.3331, |
| "step": 1005 |
| }, |
| { |
| "epoch": 2.5745361484325016, |
| "grad_norm": 0.3845952145329833, |
| "learning_rate": 2.237518917010035e-05, |
| "loss": 0.3068, |
| "step": 1006 |
| }, |
| { |
| "epoch": 2.5770953294945618, |
| "grad_norm": 0.398304345128026, |
| "learning_rate": 2.233963712406147e-05, |
| "loss": 0.3455, |
| "step": 1007 |
| }, |
| { |
| "epoch": 2.579654510556622, |
| "grad_norm": 0.4106067027436424, |
| "learning_rate": 2.2304077580912423e-05, |
| "loss": 0.3266, |
| "step": 1008 |
| }, |
| { |
| "epoch": 2.5822136916186818, |
| "grad_norm": 0.3515875497757696, |
| "learning_rate": 2.2268510654599885e-05, |
| "loss": 0.3089, |
| "step": 1009 |
| }, |
| { |
| "epoch": 2.584772872680742, |
| "grad_norm": 0.34230034407291976, |
| "learning_rate": 2.2232936459094158e-05, |
| "loss": 0.37, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.587332053742802, |
| "grad_norm": 0.3409277384030245, |
| "learning_rate": 2.2197355108388835e-05, |
| "loss": 0.3425, |
| "step": 1011 |
| }, |
| { |
| "epoch": 2.5898912348048624, |
| "grad_norm": 0.3659124451915072, |
| "learning_rate": 2.216176671650045e-05, |
| "loss": 0.3417, |
| "step": 1012 |
| }, |
| { |
| "epoch": 2.5924504158669226, |
| "grad_norm": 0.34644737723549984, |
| "learning_rate": 2.2126171397468105e-05, |
| "loss": 0.3048, |
| "step": 1013 |
| }, |
| { |
| "epoch": 2.595009596928983, |
| "grad_norm": 0.34192043331418503, |
| "learning_rate": 2.209056926535307e-05, |
| "loss": 0.3245, |
| "step": 1014 |
| }, |
| { |
| "epoch": 2.597568777991043, |
| "grad_norm": 0.4024225219500372, |
| "learning_rate": 2.205496043423849e-05, |
| "loss": 0.3501, |
| "step": 1015 |
| }, |
| { |
| "epoch": 2.6001279590531032, |
| "grad_norm": 0.3444122394151278, |
| "learning_rate": 2.2019345018228922e-05, |
| "loss": 0.3403, |
| "step": 1016 |
| }, |
| { |
| "epoch": 2.602687140115163, |
| "grad_norm": 0.41753443691652886, |
| "learning_rate": 2.1983723131450088e-05, |
| "loss": 0.3609, |
| "step": 1017 |
| }, |
| { |
| "epoch": 2.605246321177223, |
| "grad_norm": 0.42275585901863255, |
| "learning_rate": 2.194809488804839e-05, |
| "loss": 0.3427, |
| "step": 1018 |
| }, |
| { |
| "epoch": 2.6078055022392834, |
| "grad_norm": 0.3513925245942965, |
| "learning_rate": 2.1912460402190625e-05, |
| "loss": 0.2984, |
| "step": 1019 |
| }, |
| { |
| "epoch": 2.6103646833013436, |
| "grad_norm": 0.4845404009383636, |
| "learning_rate": 2.1876819788063586e-05, |
| "loss": 0.342, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.612923864363404, |
| "grad_norm": 0.38632744480954595, |
| "learning_rate": 2.1841173159873718e-05, |
| "loss": 0.3178, |
| "step": 1021 |
| }, |
| { |
| "epoch": 2.6154830454254636, |
| "grad_norm": 0.40341965144316216, |
| "learning_rate": 2.1805520631846705e-05, |
| "loss": 0.3454, |
| "step": 1022 |
| }, |
| { |
| "epoch": 2.618042226487524, |
| "grad_norm": 0.5429183555857332, |
| "learning_rate": 2.176986231822717e-05, |
| "loss": 0.3407, |
| "step": 1023 |
| }, |
| { |
| "epoch": 2.620601407549584, |
| "grad_norm": 0.3693931345744361, |
| "learning_rate": 2.173419833327826e-05, |
| "loss": 0.2931, |
| "step": 1024 |
| }, |
| { |
| "epoch": 2.6231605886116443, |
| "grad_norm": 0.41461427921632693, |
| "learning_rate": 2.16985287912813e-05, |
| "loss": 0.3462, |
| "step": 1025 |
| }, |
| { |
| "epoch": 2.6257197696737045, |
| "grad_norm": 0.35793827637412173, |
| "learning_rate": 2.166285380653541e-05, |
| "loss": 0.2649, |
| "step": 1026 |
| }, |
| { |
| "epoch": 2.6282789507357647, |
| "grad_norm": 0.43455345040805726, |
| "learning_rate": 2.1627173493357167e-05, |
| "loss": 0.3432, |
| "step": 1027 |
| }, |
| { |
| "epoch": 2.630838131797825, |
| "grad_norm": 0.36886933829443885, |
| "learning_rate": 2.1591487966080215e-05, |
| "loss": 0.3106, |
| "step": 1028 |
| }, |
| { |
| "epoch": 2.633397312859885, |
| "grad_norm": 0.3991822617060509, |
| "learning_rate": 2.1555797339054898e-05, |
| "loss": 0.3621, |
| "step": 1029 |
| }, |
| { |
| "epoch": 2.635956493921945, |
| "grad_norm": 0.3967391590295086, |
| "learning_rate": 2.1520101726647922e-05, |
| "loss": 0.3711, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.638515674984005, |
| "grad_norm": 0.3577916760514241, |
| "learning_rate": 2.1484401243241947e-05, |
| "loss": 0.2945, |
| "step": 1031 |
| }, |
| { |
| "epoch": 2.6410748560460653, |
| "grad_norm": 0.40155422081290365, |
| "learning_rate": 2.1448696003235252e-05, |
| "loss": 0.3366, |
| "step": 1032 |
| }, |
| { |
| "epoch": 2.6436340371081255, |
| "grad_norm": 0.36541079152322986, |
| "learning_rate": 2.1412986121041355e-05, |
| "loss": 0.2932, |
| "step": 1033 |
| }, |
| { |
| "epoch": 2.6461932181701853, |
| "grad_norm": 0.3484241132798254, |
| "learning_rate": 2.1377271711088655e-05, |
| "loss": 0.3339, |
| "step": 1034 |
| }, |
| { |
| "epoch": 2.6487523992322455, |
| "grad_norm": 0.43519634098943255, |
| "learning_rate": 2.1341552887820048e-05, |
| "loss": 0.3762, |
| "step": 1035 |
| }, |
| { |
| "epoch": 2.6513115802943057, |
| "grad_norm": 0.33001353721510546, |
| "learning_rate": 2.1305829765692588e-05, |
| "loss": 0.3277, |
| "step": 1036 |
| }, |
| { |
| "epoch": 2.653870761356366, |
| "grad_norm": 0.3720609129475248, |
| "learning_rate": 2.1270102459177093e-05, |
| "loss": 0.3101, |
| "step": 1037 |
| }, |
| { |
| "epoch": 2.656429942418426, |
| "grad_norm": 0.3837314560637158, |
| "learning_rate": 2.123437108275779e-05, |
| "loss": 0.351, |
| "step": 1038 |
| }, |
| { |
| "epoch": 2.6589891234804863, |
| "grad_norm": 0.40361073664191494, |
| "learning_rate": 2.119863575093195e-05, |
| "loss": 0.3171, |
| "step": 1039 |
| }, |
| { |
| "epoch": 2.6615483045425465, |
| "grad_norm": 0.35854323369583274, |
| "learning_rate": 2.1162896578209517e-05, |
| "loss": 0.3253, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.6641074856046068, |
| "grad_norm": 0.37248590893696937, |
| "learning_rate": 2.112715367911275e-05, |
| "loss": 0.3511, |
| "step": 1041 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 0.3453384873567075, |
| "learning_rate": 2.1091407168175836e-05, |
| "loss": 0.3414, |
| "step": 1042 |
| }, |
| { |
| "epoch": 2.6692258477287267, |
| "grad_norm": 0.40821485869737884, |
| "learning_rate": 2.1055657159944545e-05, |
| "loss": 0.3326, |
| "step": 1043 |
| }, |
| { |
| "epoch": 2.671785028790787, |
| "grad_norm": 0.36123413084691214, |
| "learning_rate": 2.1019903768975852e-05, |
| "loss": 0.3298, |
| "step": 1044 |
| }, |
| { |
| "epoch": 2.674344209852847, |
| "grad_norm": 0.3404084610072503, |
| "learning_rate": 2.0984147109837564e-05, |
| "loss": 0.3047, |
| "step": 1045 |
| }, |
| { |
| "epoch": 2.6769033909149074, |
| "grad_norm": 0.4454775171788898, |
| "learning_rate": 2.094838729710798e-05, |
| "loss": 0.3679, |
| "step": 1046 |
| }, |
| { |
| "epoch": 2.679462571976967, |
| "grad_norm": 0.3571602753390297, |
| "learning_rate": 2.0912624445375483e-05, |
| "loss": 0.3261, |
| "step": 1047 |
| }, |
| { |
| "epoch": 2.6820217530390273, |
| "grad_norm": 0.332410280409988, |
| "learning_rate": 2.0876858669238206e-05, |
| "loss": 0.3114, |
| "step": 1048 |
| }, |
| { |
| "epoch": 2.6845809341010876, |
| "grad_norm": 0.42377420739915694, |
| "learning_rate": 2.0841090083303643e-05, |
| "loss": 0.342, |
| "step": 1049 |
| }, |
| { |
| "epoch": 2.6871401151631478, |
| "grad_norm": 0.3395618485708725, |
| "learning_rate": 2.0805318802188307e-05, |
| "loss": 0.3157, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.689699296225208, |
| "grad_norm": 0.3545398294326781, |
| "learning_rate": 2.0769544940517326e-05, |
| "loss": 0.3207, |
| "step": 1051 |
| }, |
| { |
| "epoch": 2.692258477287268, |
| "grad_norm": 0.41494596285417495, |
| "learning_rate": 2.0733768612924137e-05, |
| "loss": 0.3294, |
| "step": 1052 |
| }, |
| { |
| "epoch": 2.6948176583493284, |
| "grad_norm": 0.3439663523935823, |
| "learning_rate": 2.0697989934050025e-05, |
| "loss": 0.2852, |
| "step": 1053 |
| }, |
| { |
| "epoch": 2.6973768394113886, |
| "grad_norm": 0.41679625940370135, |
| "learning_rate": 2.0662209018543836e-05, |
| "loss": 0.3548, |
| "step": 1054 |
| }, |
| { |
| "epoch": 2.6999360204734484, |
| "grad_norm": 0.4309626747124301, |
| "learning_rate": 2.0626425981061608e-05, |
| "loss": 0.326, |
| "step": 1055 |
| }, |
| { |
| "epoch": 2.7024952015355086, |
| "grad_norm": 0.3700268039056913, |
| "learning_rate": 2.0590640936266132e-05, |
| "loss": 0.3346, |
| "step": 1056 |
| }, |
| { |
| "epoch": 2.705054382597569, |
| "grad_norm": 0.4092764582830664, |
| "learning_rate": 2.0554853998826652e-05, |
| "loss": 0.3432, |
| "step": 1057 |
| }, |
| { |
| "epoch": 2.707613563659629, |
| "grad_norm": 0.3003644777646875, |
| "learning_rate": 2.0519065283418494e-05, |
| "loss": 0.257, |
| "step": 1058 |
| }, |
| { |
| "epoch": 2.710172744721689, |
| "grad_norm": 0.36206552407899595, |
| "learning_rate": 2.0483274904722647e-05, |
| "loss": 0.3339, |
| "step": 1059 |
| }, |
| { |
| "epoch": 2.712731925783749, |
| "grad_norm": 0.41404402491302905, |
| "learning_rate": 2.0447482977425465e-05, |
| "loss": 0.3705, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.715291106845809, |
| "grad_norm": 0.35467256324797414, |
| "learning_rate": 2.0411689616218234e-05, |
| "loss": 0.3173, |
| "step": 1061 |
| }, |
| { |
| "epoch": 2.7178502879078694, |
| "grad_norm": 0.39211100334061155, |
| "learning_rate": 2.037589493579685e-05, |
| "loss": 0.336, |
| "step": 1062 |
| }, |
| { |
| "epoch": 2.7204094689699296, |
| "grad_norm": 0.3752676154141884, |
| "learning_rate": 2.034009905086144e-05, |
| "loss": 0.339, |
| "step": 1063 |
| }, |
| { |
| "epoch": 2.72296865003199, |
| "grad_norm": 0.41294533749554696, |
| "learning_rate": 2.0304302076115987e-05, |
| "loss": 0.3187, |
| "step": 1064 |
| }, |
| { |
| "epoch": 2.72552783109405, |
| "grad_norm": 0.37876928739351945, |
| "learning_rate": 2.0268504126267952e-05, |
| "loss": 0.2895, |
| "step": 1065 |
| }, |
| { |
| "epoch": 2.7280870121561103, |
| "grad_norm": 0.4001922065674535, |
| "learning_rate": 2.0232705316027946e-05, |
| "loss": 0.3153, |
| "step": 1066 |
| }, |
| { |
| "epoch": 2.73064619321817, |
| "grad_norm": 0.8048596079823415, |
| "learning_rate": 2.019690576010931e-05, |
| "loss": 0.3593, |
| "step": 1067 |
| }, |
| { |
| "epoch": 2.7332053742802302, |
| "grad_norm": 0.38282646966391626, |
| "learning_rate": 2.0161105573227798e-05, |
| "loss": 0.3035, |
| "step": 1068 |
| }, |
| { |
| "epoch": 2.7357645553422905, |
| "grad_norm": 0.46315568817195285, |
| "learning_rate": 2.0125304870101184e-05, |
| "loss": 0.3751, |
| "step": 1069 |
| }, |
| { |
| "epoch": 2.7383237364043507, |
| "grad_norm": 0.4109495278872969, |
| "learning_rate": 2.008950376544887e-05, |
| "loss": 0.3346, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.740882917466411, |
| "grad_norm": 0.4857096813595627, |
| "learning_rate": 2.005370237399157e-05, |
| "loss": 0.328, |
| "step": 1071 |
| }, |
| { |
| "epoch": 2.7434420985284707, |
| "grad_norm": 0.39791730028337013, |
| "learning_rate": 2.0017900810450923e-05, |
| "loss": 0.2865, |
| "step": 1072 |
| }, |
| { |
| "epoch": 2.746001279590531, |
| "grad_norm": 0.4465305517364689, |
| "learning_rate": 1.9982099189549087e-05, |
| "loss": 0.3647, |
| "step": 1073 |
| }, |
| { |
| "epoch": 2.748560460652591, |
| "grad_norm": 0.41005369965327937, |
| "learning_rate": 1.9946297626008432e-05, |
| "loss": 0.3151, |
| "step": 1074 |
| }, |
| { |
| "epoch": 2.7511196417146513, |
| "grad_norm": 0.49665282457932985, |
| "learning_rate": 1.9910496234551132e-05, |
| "loss": 0.3809, |
| "step": 1075 |
| }, |
| { |
| "epoch": 2.7536788227767115, |
| "grad_norm": 0.37361149681320743, |
| "learning_rate": 1.9874695129898826e-05, |
| "loss": 0.3221, |
| "step": 1076 |
| }, |
| { |
| "epoch": 2.7562380038387717, |
| "grad_norm": 0.51447395549755, |
| "learning_rate": 1.9838894426772205e-05, |
| "loss": 0.3677, |
| "step": 1077 |
| }, |
| { |
| "epoch": 2.758797184900832, |
| "grad_norm": 0.4601671007615494, |
| "learning_rate": 1.9803094239890692e-05, |
| "loss": 0.3519, |
| "step": 1078 |
| }, |
| { |
| "epoch": 2.761356365962892, |
| "grad_norm": 0.39680274509707913, |
| "learning_rate": 1.9767294683972064e-05, |
| "loss": 0.3521, |
| "step": 1079 |
| }, |
| { |
| "epoch": 2.763915547024952, |
| "grad_norm": 0.43847108432837295, |
| "learning_rate": 1.9731495873732055e-05, |
| "loss": 0.3346, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.766474728087012, |
| "grad_norm": 0.4078264869228389, |
| "learning_rate": 1.969569792388402e-05, |
| "loss": 0.3624, |
| "step": 1081 |
| }, |
| { |
| "epoch": 2.7690339091490723, |
| "grad_norm": 0.37340927869032653, |
| "learning_rate": 1.9659900949138562e-05, |
| "loss": 0.3252, |
| "step": 1082 |
| }, |
| { |
| "epoch": 2.7715930902111325, |
| "grad_norm": 0.7622901676621391, |
| "learning_rate": 1.9624105064203157e-05, |
| "loss": 0.3829, |
| "step": 1083 |
| }, |
| { |
| "epoch": 2.7741522712731923, |
| "grad_norm": 0.4249232215662602, |
| "learning_rate": 1.9588310383781773e-05, |
| "loss": 0.342, |
| "step": 1084 |
| }, |
| { |
| "epoch": 2.7767114523352525, |
| "grad_norm": 0.41402606002990694, |
| "learning_rate": 1.9552517022574542e-05, |
| "loss": 0.3756, |
| "step": 1085 |
| }, |
| { |
| "epoch": 2.7792706333973127, |
| "grad_norm": 0.48132928718497536, |
| "learning_rate": 1.951672509527736e-05, |
| "loss": 0.3263, |
| "step": 1086 |
| }, |
| { |
| "epoch": 2.781829814459373, |
| "grad_norm": 0.3954278720969905, |
| "learning_rate": 1.9480934716581513e-05, |
| "loss": 0.3209, |
| "step": 1087 |
| }, |
| { |
| "epoch": 2.784388995521433, |
| "grad_norm": 0.39183072418087983, |
| "learning_rate": 1.944514600117335e-05, |
| "loss": 0.3807, |
| "step": 1088 |
| }, |
| { |
| "epoch": 2.7869481765834934, |
| "grad_norm": 0.4620346036605357, |
| "learning_rate": 1.940935906373388e-05, |
| "loss": 0.3576, |
| "step": 1089 |
| }, |
| { |
| "epoch": 2.7895073576455536, |
| "grad_norm": 0.36599489933850005, |
| "learning_rate": 1.93735740189384e-05, |
| "loss": 0.3239, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.792066538707614, |
| "grad_norm": 0.3686130358461154, |
| "learning_rate": 1.9337790981456164e-05, |
| "loss": 0.331, |
| "step": 1091 |
| }, |
| { |
| "epoch": 2.7946257197696736, |
| "grad_norm": 0.38425163759635245, |
| "learning_rate": 1.930201006594999e-05, |
| "loss": 0.2916, |
| "step": 1092 |
| }, |
| { |
| "epoch": 2.7971849008317338, |
| "grad_norm": 0.4405563689728695, |
| "learning_rate": 1.926623138707587e-05, |
| "loss": 0.3361, |
| "step": 1093 |
| }, |
| { |
| "epoch": 2.799744081893794, |
| "grad_norm": 0.38706329631394476, |
| "learning_rate": 1.923045505948267e-05, |
| "loss": 0.323, |
| "step": 1094 |
| }, |
| { |
| "epoch": 2.802303262955854, |
| "grad_norm": 0.4708041341171456, |
| "learning_rate": 1.9194681197811703e-05, |
| "loss": 0.3378, |
| "step": 1095 |
| }, |
| { |
| "epoch": 2.8048624440179144, |
| "grad_norm": 0.43844070620974923, |
| "learning_rate": 1.915890991669636e-05, |
| "loss": 0.331, |
| "step": 1096 |
| }, |
| { |
| "epoch": 2.807421625079974, |
| "grad_norm": 0.4532919809550908, |
| "learning_rate": 1.9123141330761804e-05, |
| "loss": 0.3863, |
| "step": 1097 |
| }, |
| { |
| "epoch": 2.8099808061420344, |
| "grad_norm": 0.48356206097375876, |
| "learning_rate": 1.9087375554624527e-05, |
| "loss": 0.3241, |
| "step": 1098 |
| }, |
| { |
| "epoch": 2.8125399872040946, |
| "grad_norm": 0.37016571224063527, |
| "learning_rate": 1.9051612702892028e-05, |
| "loss": 0.3035, |
| "step": 1099 |
| }, |
| { |
| "epoch": 2.815099168266155, |
| "grad_norm": 0.5488710646000331, |
| "learning_rate": 1.901585289016244e-05, |
| "loss": 0.3365, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.817658349328215, |
| "grad_norm": 0.39186801965858076, |
| "learning_rate": 1.898009623102415e-05, |
| "loss": 0.3171, |
| "step": 1101 |
| }, |
| { |
| "epoch": 2.8202175303902752, |
| "grad_norm": 0.396128192090393, |
| "learning_rate": 1.894434284005546e-05, |
| "loss": 0.2926, |
| "step": 1102 |
| }, |
| { |
| "epoch": 2.8227767114523354, |
| "grad_norm": 0.4498006361647164, |
| "learning_rate": 1.890859283182417e-05, |
| "loss": 0.339, |
| "step": 1103 |
| }, |
| { |
| "epoch": 2.8253358925143957, |
| "grad_norm": 0.4421659448994112, |
| "learning_rate": 1.887284632088725e-05, |
| "loss": 0.3512, |
| "step": 1104 |
| }, |
| { |
| "epoch": 2.8278950735764554, |
| "grad_norm": 0.40391454153472733, |
| "learning_rate": 1.8837103421790486e-05, |
| "loss": 0.3431, |
| "step": 1105 |
| }, |
| { |
| "epoch": 2.8304542546385156, |
| "grad_norm": 0.4029197127830964, |
| "learning_rate": 1.8801364249068053e-05, |
| "loss": 0.3083, |
| "step": 1106 |
| }, |
| { |
| "epoch": 2.833013435700576, |
| "grad_norm": 0.3636262656158161, |
| "learning_rate": 1.8765628917242213e-05, |
| "loss": 0.2695, |
| "step": 1107 |
| }, |
| { |
| "epoch": 2.835572616762636, |
| "grad_norm": 0.41088999848761776, |
| "learning_rate": 1.8729897540822914e-05, |
| "loss": 0.3191, |
| "step": 1108 |
| }, |
| { |
| "epoch": 2.838131797824696, |
| "grad_norm": 0.4141748812616923, |
| "learning_rate": 1.8694170234307415e-05, |
| "loss": 0.3494, |
| "step": 1109 |
| }, |
| { |
| "epoch": 2.840690978886756, |
| "grad_norm": 0.40924209313260995, |
| "learning_rate": 1.8658447112179952e-05, |
| "loss": 0.3424, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.8432501599488162, |
| "grad_norm": 0.38985761612852116, |
| "learning_rate": 1.8622728288911358e-05, |
| "loss": 0.3367, |
| "step": 1111 |
| }, |
| { |
| "epoch": 2.8458093410108765, |
| "grad_norm": 0.38078814823399454, |
| "learning_rate": 1.858701387895865e-05, |
| "loss": 0.3143, |
| "step": 1112 |
| }, |
| { |
| "epoch": 2.8483685220729367, |
| "grad_norm": 0.34959904397806785, |
| "learning_rate": 1.8551303996764755e-05, |
| "loss": 0.3007, |
| "step": 1113 |
| }, |
| { |
| "epoch": 2.850927703134997, |
| "grad_norm": 0.4183989425563707, |
| "learning_rate": 1.8515598756758064e-05, |
| "loss": 0.359, |
| "step": 1114 |
| }, |
| { |
| "epoch": 2.853486884197057, |
| "grad_norm": 0.37111554641813266, |
| "learning_rate": 1.8479898273352084e-05, |
| "loss": 0.3381, |
| "step": 1115 |
| }, |
| { |
| "epoch": 2.8560460652591173, |
| "grad_norm": 0.36998674648942864, |
| "learning_rate": 1.8444202660945105e-05, |
| "loss": 0.3317, |
| "step": 1116 |
| }, |
| { |
| "epoch": 2.858605246321177, |
| "grad_norm": 0.4020148068654659, |
| "learning_rate": 1.8408512033919798e-05, |
| "loss": 0.3048, |
| "step": 1117 |
| }, |
| { |
| "epoch": 2.8611644273832373, |
| "grad_norm": 0.4005462960969913, |
| "learning_rate": 1.837282650664284e-05, |
| "loss": 0.3603, |
| "step": 1118 |
| }, |
| { |
| "epoch": 2.8637236084452975, |
| "grad_norm": 0.3444120356474249, |
| "learning_rate": 1.8337146193464595e-05, |
| "loss": 0.2898, |
| "step": 1119 |
| }, |
| { |
| "epoch": 2.8662827895073577, |
| "grad_norm": 0.39082290145745685, |
| "learning_rate": 1.83014712087187e-05, |
| "loss": 0.3131, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.868841970569418, |
| "grad_norm": 0.35821645503904304, |
| "learning_rate": 1.8265801666721744e-05, |
| "loss": 0.3433, |
| "step": 1121 |
| }, |
| { |
| "epoch": 2.8714011516314777, |
| "grad_norm": 0.41668733158509796, |
| "learning_rate": 1.8230137681772836e-05, |
| "loss": 0.3567, |
| "step": 1122 |
| }, |
| { |
| "epoch": 2.873960332693538, |
| "grad_norm": 0.34804389069027475, |
| "learning_rate": 1.8194479368153298e-05, |
| "loss": 0.3136, |
| "step": 1123 |
| }, |
| { |
| "epoch": 2.876519513755598, |
| "grad_norm": 0.3911488779456878, |
| "learning_rate": 1.8158826840126292e-05, |
| "loss": 0.3412, |
| "step": 1124 |
| }, |
| { |
| "epoch": 2.8790786948176583, |
| "grad_norm": 0.44396897741245006, |
| "learning_rate": 1.8123180211936417e-05, |
| "loss": 0.3644, |
| "step": 1125 |
| }, |
| { |
| "epoch": 2.8816378758797185, |
| "grad_norm": 0.3818666952140594, |
| "learning_rate": 1.808753959780938e-05, |
| "loss": 0.2988, |
| "step": 1126 |
| }, |
| { |
| "epoch": 2.8841970569417787, |
| "grad_norm": 0.3819899108018794, |
| "learning_rate": 1.805190511195162e-05, |
| "loss": 0.3304, |
| "step": 1127 |
| }, |
| { |
| "epoch": 2.886756238003839, |
| "grad_norm": 0.3988449634404456, |
| "learning_rate": 1.801627686854992e-05, |
| "loss": 0.3413, |
| "step": 1128 |
| }, |
| { |
| "epoch": 2.889315419065899, |
| "grad_norm": 0.4410132899232368, |
| "learning_rate": 1.7980654981771074e-05, |
| "loss": 0.3725, |
| "step": 1129 |
| }, |
| { |
| "epoch": 2.891874600127959, |
| "grad_norm": 0.31362864495515286, |
| "learning_rate": 1.794503956576152e-05, |
| "loss": 0.2833, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.894433781190019, |
| "grad_norm": 0.411109247445083, |
| "learning_rate": 1.7909430734646936e-05, |
| "loss": 0.3297, |
| "step": 1131 |
| }, |
| { |
| "epoch": 2.8969929622520794, |
| "grad_norm": 0.3360115333630458, |
| "learning_rate": 1.78738286025319e-05, |
| "loss": 0.2985, |
| "step": 1132 |
| }, |
| { |
| "epoch": 2.8995521433141396, |
| "grad_norm": 0.3728330681966131, |
| "learning_rate": 1.7838233283499554e-05, |
| "loss": 0.378, |
| "step": 1133 |
| }, |
| { |
| "epoch": 2.9021113243761993, |
| "grad_norm": 0.3539275837478167, |
| "learning_rate": 1.780264489161117e-05, |
| "loss": 0.3638, |
| "step": 1134 |
| }, |
| { |
| "epoch": 2.9046705054382596, |
| "grad_norm": 0.33582742300915935, |
| "learning_rate": 1.776706354090585e-05, |
| "loss": 0.3383, |
| "step": 1135 |
| }, |
| { |
| "epoch": 2.9072296865003198, |
| "grad_norm": 0.38396584024437336, |
| "learning_rate": 1.7731489345400118e-05, |
| "loss": 0.3116, |
| "step": 1136 |
| }, |
| { |
| "epoch": 2.90978886756238, |
| "grad_norm": 0.3121417283022767, |
| "learning_rate": 1.769592241908758e-05, |
| "loss": 0.3089, |
| "step": 1137 |
| }, |
| { |
| "epoch": 2.91234804862444, |
| "grad_norm": 0.37871598245894667, |
| "learning_rate": 1.766036287593854e-05, |
| "loss": 0.3504, |
| "step": 1138 |
| }, |
| { |
| "epoch": 2.9149072296865004, |
| "grad_norm": 0.4083761440481677, |
| "learning_rate": 1.762481082989965e-05, |
| "loss": 0.3338, |
| "step": 1139 |
| }, |
| { |
| "epoch": 2.9174664107485606, |
| "grad_norm": 0.3759166218369834, |
| "learning_rate": 1.758926639489354e-05, |
| "loss": 0.3448, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.920025591810621, |
| "grad_norm": 0.3918883448687237, |
| "learning_rate": 1.755372968481844e-05, |
| "loss": 0.3465, |
| "step": 1141 |
| }, |
| { |
| "epoch": 2.9225847728726806, |
| "grad_norm": 0.45281102397931977, |
| "learning_rate": 1.7518200813547842e-05, |
| "loss": 0.352, |
| "step": 1142 |
| }, |
| { |
| "epoch": 2.925143953934741, |
| "grad_norm": 0.3403187481784673, |
| "learning_rate": 1.748267989493011e-05, |
| "loss": 0.2767, |
| "step": 1143 |
| }, |
| { |
| "epoch": 2.927703134996801, |
| "grad_norm": 0.32374732234103554, |
| "learning_rate": 1.7447167042788108e-05, |
| "loss": 0.3003, |
| "step": 1144 |
| }, |
| { |
| "epoch": 2.9302623160588612, |
| "grad_norm": 0.43821825023278765, |
| "learning_rate": 1.7411662370918893e-05, |
| "loss": 0.3365, |
| "step": 1145 |
| }, |
| { |
| "epoch": 2.9328214971209214, |
| "grad_norm": 0.3735728621894312, |
| "learning_rate": 1.7376165993093278e-05, |
| "loss": 0.3164, |
| "step": 1146 |
| }, |
| { |
| "epoch": 2.935380678182981, |
| "grad_norm": 0.3713647905663265, |
| "learning_rate": 1.7340678023055496e-05, |
| "loss": 0.3237, |
| "step": 1147 |
| }, |
| { |
| "epoch": 2.9379398592450414, |
| "grad_norm": 0.40116903737296333, |
| "learning_rate": 1.7305198574522864e-05, |
| "loss": 0.3614, |
| "step": 1148 |
| }, |
| { |
| "epoch": 2.9404990403071016, |
| "grad_norm": 0.40054289621797295, |
| "learning_rate": 1.7269727761185374e-05, |
| "loss": 0.334, |
| "step": 1149 |
| }, |
| { |
| "epoch": 2.943058221369162, |
| "grad_norm": 0.3925230509455669, |
| "learning_rate": 1.7234265696705344e-05, |
| "loss": 0.2959, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.945617402431222, |
| "grad_norm": 0.42214888830794545, |
| "learning_rate": 1.7198812494717062e-05, |
| "loss": 0.3776, |
| "step": 1151 |
| }, |
| { |
| "epoch": 2.9481765834932823, |
| "grad_norm": 0.3616779488648713, |
| "learning_rate": 1.7163368268826433e-05, |
| "loss": 0.3016, |
| "step": 1152 |
| }, |
| { |
| "epoch": 2.9507357645553425, |
| "grad_norm": 0.38477907353760216, |
| "learning_rate": 1.7127933132610573e-05, |
| "loss": 0.3073, |
| "step": 1153 |
| }, |
| { |
| "epoch": 2.9532949456174027, |
| "grad_norm": 0.38583388740216534, |
| "learning_rate": 1.7092507199617482e-05, |
| "loss": 0.3303, |
| "step": 1154 |
| }, |
| { |
| "epoch": 2.9558541266794625, |
| "grad_norm": 0.4615746603773426, |
| "learning_rate": 1.7057090583365678e-05, |
| "loss": 0.3944, |
| "step": 1155 |
| }, |
| { |
| "epoch": 2.9584133077415227, |
| "grad_norm": 0.3793974621003137, |
| "learning_rate": 1.7021683397343823e-05, |
| "loss": 0.3298, |
| "step": 1156 |
| }, |
| { |
| "epoch": 2.960972488803583, |
| "grad_norm": 0.4920742763843501, |
| "learning_rate": 1.698628575501034e-05, |
| "loss": 0.3401, |
| "step": 1157 |
| }, |
| { |
| "epoch": 2.963531669865643, |
| "grad_norm": 0.4104914994234224, |
| "learning_rate": 1.6950897769793093e-05, |
| "loss": 0.3268, |
| "step": 1158 |
| }, |
| { |
| "epoch": 2.966090850927703, |
| "grad_norm": 0.41153736245664496, |
| "learning_rate": 1.6915519555089e-05, |
| "loss": 0.3594, |
| "step": 1159 |
| }, |
| { |
| "epoch": 2.968650031989763, |
| "grad_norm": 0.3875070318404897, |
| "learning_rate": 1.6880151224263646e-05, |
| "loss": 0.3398, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.9712092130518233, |
| "grad_norm": 0.44887352669211456, |
| "learning_rate": 1.6844792890650976e-05, |
| "loss": 0.2813, |
| "step": 1161 |
| }, |
| { |
| "epoch": 2.9737683941138835, |
| "grad_norm": 0.3800558590649599, |
| "learning_rate": 1.680944466755289e-05, |
| "loss": 0.3635, |
| "step": 1162 |
| }, |
| { |
| "epoch": 2.9763275751759437, |
| "grad_norm": 0.3971504175952064, |
| "learning_rate": 1.6774106668238867e-05, |
| "loss": 0.3146, |
| "step": 1163 |
| }, |
| { |
| "epoch": 2.978886756238004, |
| "grad_norm": 0.4715791982640647, |
| "learning_rate": 1.673877900594566e-05, |
| "loss": 0.3553, |
| "step": 1164 |
| }, |
| { |
| "epoch": 2.981445937300064, |
| "grad_norm": 0.30859611389638464, |
| "learning_rate": 1.6703461793876876e-05, |
| "loss": 0.2989, |
| "step": 1165 |
| }, |
| { |
| "epoch": 2.9840051183621243, |
| "grad_norm": 0.3973251808749978, |
| "learning_rate": 1.6668155145202638e-05, |
| "loss": 0.3579, |
| "step": 1166 |
| }, |
| { |
| "epoch": 2.986564299424184, |
| "grad_norm": 0.446835333586439, |
| "learning_rate": 1.6632859173059232e-05, |
| "loss": 0.3258, |
| "step": 1167 |
| }, |
| { |
| "epoch": 2.9891234804862443, |
| "grad_norm": 0.3811823018283798, |
| "learning_rate": 1.6597573990548722e-05, |
| "loss": 0.3201, |
| "step": 1168 |
| }, |
| { |
| "epoch": 2.9916826615483045, |
| "grad_norm": 0.375872795491289, |
| "learning_rate": 1.6562299710738586e-05, |
| "loss": 0.3255, |
| "step": 1169 |
| }, |
| { |
| "epoch": 2.9942418426103647, |
| "grad_norm": 0.4029499625289535, |
| "learning_rate": 1.6527036446661396e-05, |
| "loss": 0.307, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.996801023672425, |
| "grad_norm": 0.4156370523912452, |
| "learning_rate": 1.6491784311314403e-05, |
| "loss": 0.3797, |
| "step": 1171 |
| }, |
| { |
| "epoch": 2.9993602047344847, |
| "grad_norm": 0.4189485161365368, |
| "learning_rate": 1.6456543417659192e-05, |
| "loss": 0.3488, |
| "step": 1172 |
| }, |
| { |
| "epoch": 3.001919385796545, |
| "grad_norm": 0.5633124123177335, |
| "learning_rate": 1.6421313878621344e-05, |
| "loss": 0.2857, |
| "step": 1173 |
| }, |
| { |
| "epoch": 3.004478566858605, |
| "grad_norm": 0.39083213560124314, |
| "learning_rate": 1.6386095807090047e-05, |
| "loss": 0.2379, |
| "step": 1174 |
| }, |
| { |
| "epoch": 3.0070377479206654, |
| "grad_norm": 0.5240113784249277, |
| "learning_rate": 1.635088931591775e-05, |
| "loss": 0.2245, |
| "step": 1175 |
| }, |
| { |
| "epoch": 3.0095969289827256, |
| "grad_norm": 0.6354763063642779, |
| "learning_rate": 1.631569451791977e-05, |
| "loss": 0.2869, |
| "step": 1176 |
| }, |
| { |
| "epoch": 3.012156110044786, |
| "grad_norm": 0.4893508226750199, |
| "learning_rate": 1.628051152587398e-05, |
| "loss": 0.2703, |
| "step": 1177 |
| }, |
| { |
| "epoch": 3.014715291106846, |
| "grad_norm": 0.4524451646086151, |
| "learning_rate": 1.6245340452520414e-05, |
| "loss": 0.2176, |
| "step": 1178 |
| }, |
| { |
| "epoch": 3.0172744721689058, |
| "grad_norm": 0.4778994330296646, |
| "learning_rate": 1.6210181410560912e-05, |
| "loss": 0.2571, |
| "step": 1179 |
| }, |
| { |
| "epoch": 3.019833653230966, |
| "grad_norm": 0.37599046051577073, |
| "learning_rate": 1.6175034512658753e-05, |
| "loss": 0.2338, |
| "step": 1180 |
| }, |
| { |
| "epoch": 3.022392834293026, |
| "grad_norm": 0.45320853578403025, |
| "learning_rate": 1.613989987143833e-05, |
| "loss": 0.2325, |
| "step": 1181 |
| }, |
| { |
| "epoch": 3.0249520153550864, |
| "grad_norm": 0.43340502469781866, |
| "learning_rate": 1.610477759948472e-05, |
| "loss": 0.2767, |
| "step": 1182 |
| }, |
| { |
| "epoch": 3.0275111964171466, |
| "grad_norm": 0.38361490841329304, |
| "learning_rate": 1.6069667809343396e-05, |
| "loss": 0.2445, |
| "step": 1183 |
| }, |
| { |
| "epoch": 3.030070377479207, |
| "grad_norm": 0.39878208398916914, |
| "learning_rate": 1.603457061351983e-05, |
| "loss": 0.2506, |
| "step": 1184 |
| }, |
| { |
| "epoch": 3.0326295585412666, |
| "grad_norm": 0.44131555640934655, |
| "learning_rate": 1.5999486124479115e-05, |
| "loss": 0.2588, |
| "step": 1185 |
| }, |
| { |
| "epoch": 3.035188739603327, |
| "grad_norm": 0.36228403498534006, |
| "learning_rate": 1.5964414454645647e-05, |
| "loss": 0.2394, |
| "step": 1186 |
| }, |
| { |
| "epoch": 3.037747920665387, |
| "grad_norm": 0.40556573549758734, |
| "learning_rate": 1.5929355716402754e-05, |
| "loss": 0.2422, |
| "step": 1187 |
| }, |
| { |
| "epoch": 3.0403071017274472, |
| "grad_norm": 0.46971240300411676, |
| "learning_rate": 1.5894310022092288e-05, |
| "loss": 0.2536, |
| "step": 1188 |
| }, |
| { |
| "epoch": 3.0428662827895074, |
| "grad_norm": 0.37074278168598435, |
| "learning_rate": 1.5859277484014338e-05, |
| "loss": 0.2262, |
| "step": 1189 |
| }, |
| { |
| "epoch": 3.0454254638515676, |
| "grad_norm": 0.4101230047135583, |
| "learning_rate": 1.5824258214426833e-05, |
| "loss": 0.2501, |
| "step": 1190 |
| }, |
| { |
| "epoch": 3.047984644913628, |
| "grad_norm": 0.4316482731662535, |
| "learning_rate": 1.5789252325545157e-05, |
| "loss": 0.2766, |
| "step": 1191 |
| }, |
| { |
| "epoch": 3.0505438259756876, |
| "grad_norm": 0.36783848268146724, |
| "learning_rate": 1.5754259929541848e-05, |
| "loss": 0.2401, |
| "step": 1192 |
| }, |
| { |
| "epoch": 3.053103007037748, |
| "grad_norm": 0.4061482848745174, |
| "learning_rate": 1.5719281138546186e-05, |
| "loss": 0.2508, |
| "step": 1193 |
| }, |
| { |
| "epoch": 3.055662188099808, |
| "grad_norm": 0.382539152433566, |
| "learning_rate": 1.568431606464388e-05, |
| "loss": 0.2489, |
| "step": 1194 |
| }, |
| { |
| "epoch": 3.0582213691618683, |
| "grad_norm": 0.3652697874982772, |
| "learning_rate": 1.5649364819876655e-05, |
| "loss": 0.2429, |
| "step": 1195 |
| }, |
| { |
| "epoch": 3.0607805502239285, |
| "grad_norm": 0.42737097970366417, |
| "learning_rate": 1.561442751624193e-05, |
| "loss": 0.256, |
| "step": 1196 |
| }, |
| { |
| "epoch": 3.0633397312859887, |
| "grad_norm": 0.3207166801748589, |
| "learning_rate": 1.557950426569248e-05, |
| "loss": 0.1962, |
| "step": 1197 |
| }, |
| { |
| "epoch": 3.0658989123480485, |
| "grad_norm": 0.3943172590265861, |
| "learning_rate": 1.5544595180136003e-05, |
| "loss": 0.2519, |
| "step": 1198 |
| }, |
| { |
| "epoch": 3.0684580934101087, |
| "grad_norm": 0.36274192661719984, |
| "learning_rate": 1.550970037143483e-05, |
| "loss": 0.2279, |
| "step": 1199 |
| }, |
| { |
| "epoch": 3.071017274472169, |
| "grad_norm": 0.3630874200444502, |
| "learning_rate": 1.547481995140556e-05, |
| "loss": 0.2516, |
| "step": 1200 |
| }, |
| { |
| "epoch": 3.073576455534229, |
| "grad_norm": 0.3518754730219404, |
| "learning_rate": 1.5439954031818652e-05, |
| "loss": 0.2329, |
| "step": 1201 |
| }, |
| { |
| "epoch": 3.0761356365962893, |
| "grad_norm": 0.37790098887949486, |
| "learning_rate": 1.5405102724398113e-05, |
| "loss": 0.2677, |
| "step": 1202 |
| }, |
| { |
| "epoch": 3.0786948176583495, |
| "grad_norm": 0.35043382673558215, |
| "learning_rate": 1.5370266140821143e-05, |
| "loss": 0.2294, |
| "step": 1203 |
| }, |
| { |
| "epoch": 3.0812539987204093, |
| "grad_norm": 0.3731184820167596, |
| "learning_rate": 1.5335444392717738e-05, |
| "loss": 0.2319, |
| "step": 1204 |
| }, |
| { |
| "epoch": 3.0838131797824695, |
| "grad_norm": 0.3395760795759123, |
| "learning_rate": 1.5300637591670357e-05, |
| "loss": 0.2333, |
| "step": 1205 |
| }, |
| { |
| "epoch": 3.0863723608445297, |
| "grad_norm": 0.3530521161404101, |
| "learning_rate": 1.5265845849213588e-05, |
| "loss": 0.2458, |
| "step": 1206 |
| }, |
| { |
| "epoch": 3.08893154190659, |
| "grad_norm": 0.2968062718035343, |
| "learning_rate": 1.523106927683374e-05, |
| "loss": 0.1984, |
| "step": 1207 |
| }, |
| { |
| "epoch": 3.09149072296865, |
| "grad_norm": 0.34618157328728927, |
| "learning_rate": 1.5196307985968509e-05, |
| "loss": 0.2338, |
| "step": 1208 |
| }, |
| { |
| "epoch": 3.0940499040307103, |
| "grad_norm": 0.3991365653818135, |
| "learning_rate": 1.5161562088006649e-05, |
| "loss": 0.2639, |
| "step": 1209 |
| }, |
| { |
| "epoch": 3.09660908509277, |
| "grad_norm": 0.3337465568445769, |
| "learning_rate": 1.5126831694287564e-05, |
| "loss": 0.2354, |
| "step": 1210 |
| }, |
| { |
| "epoch": 3.0991682661548303, |
| "grad_norm": 0.39228609678359605, |
| "learning_rate": 1.5092116916100982e-05, |
| "loss": 0.2737, |
| "step": 1211 |
| }, |
| { |
| "epoch": 3.1017274472168905, |
| "grad_norm": 0.3216556126081721, |
| "learning_rate": 1.5057417864686607e-05, |
| "loss": 0.2237, |
| "step": 1212 |
| }, |
| { |
| "epoch": 3.1042866282789507, |
| "grad_norm": 0.3567015015151436, |
| "learning_rate": 1.5022734651233737e-05, |
| "loss": 0.2568, |
| "step": 1213 |
| }, |
| { |
| "epoch": 3.106845809341011, |
| "grad_norm": 0.35178848138592544, |
| "learning_rate": 1.4988067386880904e-05, |
| "loss": 0.2276, |
| "step": 1214 |
| }, |
| { |
| "epoch": 3.109404990403071, |
| "grad_norm": 0.3511504661864566, |
| "learning_rate": 1.4953416182715566e-05, |
| "loss": 0.2699, |
| "step": 1215 |
| }, |
| { |
| "epoch": 3.1119641714651314, |
| "grad_norm": 0.3425398327341164, |
| "learning_rate": 1.4918781149773694e-05, |
| "loss": 0.2677, |
| "step": 1216 |
| }, |
| { |
| "epoch": 3.114523352527191, |
| "grad_norm": 0.36160910451306577, |
| "learning_rate": 1.4884162399039439e-05, |
| "loss": 0.2545, |
| "step": 1217 |
| }, |
| { |
| "epoch": 3.1170825335892514, |
| "grad_norm": 0.37097329916252125, |
| "learning_rate": 1.4849560041444795e-05, |
| "loss": 0.2609, |
| "step": 1218 |
| }, |
| { |
| "epoch": 3.1196417146513116, |
| "grad_norm": 0.36352782561925345, |
| "learning_rate": 1.4814974187869218e-05, |
| "loss": 0.2236, |
| "step": 1219 |
| }, |
| { |
| "epoch": 3.122200895713372, |
| "grad_norm": 0.3551683721423837, |
| "learning_rate": 1.478040494913926e-05, |
| "loss": 0.2244, |
| "step": 1220 |
| }, |
| { |
| "epoch": 3.124760076775432, |
| "grad_norm": 0.3308813359796844, |
| "learning_rate": 1.4745852436028262e-05, |
| "loss": 0.2591, |
| "step": 1221 |
| }, |
| { |
| "epoch": 3.127319257837492, |
| "grad_norm": 0.3507830733548493, |
| "learning_rate": 1.4711316759255963e-05, |
| "loss": 0.2453, |
| "step": 1222 |
| }, |
| { |
| "epoch": 3.129878438899552, |
| "grad_norm": 0.33582461966461585, |
| "learning_rate": 1.4676798029488123e-05, |
| "loss": 0.2593, |
| "step": 1223 |
| }, |
| { |
| "epoch": 3.132437619961612, |
| "grad_norm": 0.3507842455435477, |
| "learning_rate": 1.464229635733624e-05, |
| "loss": 0.2372, |
| "step": 1224 |
| }, |
| { |
| "epoch": 3.1349968010236724, |
| "grad_norm": 0.3318567188084375, |
| "learning_rate": 1.460781185335713e-05, |
| "loss": 0.231, |
| "step": 1225 |
| }, |
| { |
| "epoch": 3.1375559820857326, |
| "grad_norm": 0.3188990523059626, |
| "learning_rate": 1.4573344628052588e-05, |
| "loss": 0.2376, |
| "step": 1226 |
| }, |
| { |
| "epoch": 3.140115163147793, |
| "grad_norm": 0.3664030290111237, |
| "learning_rate": 1.4538894791869052e-05, |
| "loss": 0.2585, |
| "step": 1227 |
| }, |
| { |
| "epoch": 3.142674344209853, |
| "grad_norm": 0.3456582759492691, |
| "learning_rate": 1.4504462455197248e-05, |
| "loss": 0.2295, |
| "step": 1228 |
| }, |
| { |
| "epoch": 3.145233525271913, |
| "grad_norm": 0.3071644333690587, |
| "learning_rate": 1.4470047728371813e-05, |
| "loss": 0.2113, |
| "step": 1229 |
| }, |
| { |
| "epoch": 3.147792706333973, |
| "grad_norm": 0.33706220227684885, |
| "learning_rate": 1.443565072167095e-05, |
| "loss": 0.2286, |
| "step": 1230 |
| }, |
| { |
| "epoch": 3.1503518873960332, |
| "grad_norm": 0.3213473350814528, |
| "learning_rate": 1.4401271545316096e-05, |
| "loss": 0.2333, |
| "step": 1231 |
| }, |
| { |
| "epoch": 3.1529110684580934, |
| "grad_norm": 0.32806923950956013, |
| "learning_rate": 1.436691030947155e-05, |
| "loss": 0.2338, |
| "step": 1232 |
| }, |
| { |
| "epoch": 3.1554702495201536, |
| "grad_norm": 0.33540430311298725, |
| "learning_rate": 1.43325671242441e-05, |
| "loss": 0.209, |
| "step": 1233 |
| }, |
| { |
| "epoch": 3.158029430582214, |
| "grad_norm": 0.3208377254757641, |
| "learning_rate": 1.4298242099682726e-05, |
| "loss": 0.245, |
| "step": 1234 |
| }, |
| { |
| "epoch": 3.1605886116442736, |
| "grad_norm": 0.3427654538840671, |
| "learning_rate": 1.4263935345778202e-05, |
| "loss": 0.2521, |
| "step": 1235 |
| }, |
| { |
| "epoch": 3.163147792706334, |
| "grad_norm": 0.35551971354398254, |
| "learning_rate": 1.4229646972462732e-05, |
| "loss": 0.2338, |
| "step": 1236 |
| }, |
| { |
| "epoch": 3.165706973768394, |
| "grad_norm": 0.33357596367989273, |
| "learning_rate": 1.419537708960966e-05, |
| "loss": 0.2322, |
| "step": 1237 |
| }, |
| { |
| "epoch": 3.1682661548304543, |
| "grad_norm": 0.3416940872307819, |
| "learning_rate": 1.4161125807033059e-05, |
| "loss": 0.24, |
| "step": 1238 |
| }, |
| { |
| "epoch": 3.1708253358925145, |
| "grad_norm": 0.3259027337159305, |
| "learning_rate": 1.412689323448739e-05, |
| "loss": 0.2705, |
| "step": 1239 |
| }, |
| { |
| "epoch": 3.1733845169545747, |
| "grad_norm": 0.33778026989222404, |
| "learning_rate": 1.409267948166718e-05, |
| "loss": 0.2335, |
| "step": 1240 |
| }, |
| { |
| "epoch": 3.175943698016635, |
| "grad_norm": 0.3425941944724759, |
| "learning_rate": 1.4058484658206646e-05, |
| "loss": 0.2684, |
| "step": 1241 |
| }, |
| { |
| "epoch": 3.1785028790786947, |
| "grad_norm": 0.304680436913791, |
| "learning_rate": 1.4024308873679327e-05, |
| "loss": 0.2181, |
| "step": 1242 |
| }, |
| { |
| "epoch": 3.181062060140755, |
| "grad_norm": 0.3522850109826806, |
| "learning_rate": 1.3990152237597787e-05, |
| "loss": 0.2572, |
| "step": 1243 |
| }, |
| { |
| "epoch": 3.183621241202815, |
| "grad_norm": 0.3209721557320742, |
| "learning_rate": 1.3956014859413211e-05, |
| "loss": 0.2337, |
| "step": 1244 |
| }, |
| { |
| "epoch": 3.1861804222648753, |
| "grad_norm": 0.3293098487746776, |
| "learning_rate": 1.3921896848515064e-05, |
| "loss": 0.2411, |
| "step": 1245 |
| }, |
| { |
| "epoch": 3.1887396033269355, |
| "grad_norm": 0.30365057870700035, |
| "learning_rate": 1.388779831423078e-05, |
| "loss": 0.2291, |
| "step": 1246 |
| }, |
| { |
| "epoch": 3.1912987843889957, |
| "grad_norm": 0.3131208709907512, |
| "learning_rate": 1.3853719365825357e-05, |
| "loss": 0.2352, |
| "step": 1247 |
| }, |
| { |
| "epoch": 3.1938579654510555, |
| "grad_norm": 0.3376254463164988, |
| "learning_rate": 1.3819660112501054e-05, |
| "loss": 0.2625, |
| "step": 1248 |
| }, |
| { |
| "epoch": 3.1964171465131157, |
| "grad_norm": 0.3069287683421629, |
| "learning_rate": 1.3785620663396992e-05, |
| "loss": 0.2229, |
| "step": 1249 |
| }, |
| { |
| "epoch": 3.198976327575176, |
| "grad_norm": 0.3316589759980029, |
| "learning_rate": 1.3751601127588849e-05, |
| "loss": 0.245, |
| "step": 1250 |
| }, |
| { |
| "epoch": 3.201535508637236, |
| "grad_norm": 0.30527916893181595, |
| "learning_rate": 1.37176016140885e-05, |
| "loss": 0.2346, |
| "step": 1251 |
| }, |
| { |
| "epoch": 3.2040946896992963, |
| "grad_norm": 0.34900918414936455, |
| "learning_rate": 1.3683622231843644e-05, |
| "loss": 0.2392, |
| "step": 1252 |
| }, |
| { |
| "epoch": 3.2066538707613566, |
| "grad_norm": 0.30641882677939075, |
| "learning_rate": 1.364966308973747e-05, |
| "loss": 0.2218, |
| "step": 1253 |
| }, |
| { |
| "epoch": 3.2092130518234163, |
| "grad_norm": 0.3296381755475144, |
| "learning_rate": 1.3615724296588342e-05, |
| "loss": 0.2566, |
| "step": 1254 |
| }, |
| { |
| "epoch": 3.2117722328854765, |
| "grad_norm": 0.34080590800970306, |
| "learning_rate": 1.3581805961149371e-05, |
| "loss": 0.2518, |
| "step": 1255 |
| }, |
| { |
| "epoch": 3.2143314139475367, |
| "grad_norm": 0.31502390005004344, |
| "learning_rate": 1.3547908192108143e-05, |
| "loss": 0.2288, |
| "step": 1256 |
| }, |
| { |
| "epoch": 3.216890595009597, |
| "grad_norm": 0.30635056034248115, |
| "learning_rate": 1.3514031098086349e-05, |
| "loss": 0.2539, |
| "step": 1257 |
| }, |
| { |
| "epoch": 3.219449776071657, |
| "grad_norm": 0.34066214746897916, |
| "learning_rate": 1.3480174787639397e-05, |
| "loss": 0.2664, |
| "step": 1258 |
| }, |
| { |
| "epoch": 3.2220089571337174, |
| "grad_norm": 0.31764511418435903, |
| "learning_rate": 1.3446339369256121e-05, |
| "loss": 0.2067, |
| "step": 1259 |
| }, |
| { |
| "epoch": 3.224568138195777, |
| "grad_norm": 0.29852956617495935, |
| "learning_rate": 1.341252495135841e-05, |
| "loss": 0.2298, |
| "step": 1260 |
| }, |
| { |
| "epoch": 3.2271273192578374, |
| "grad_norm": 0.34615186747664684, |
| "learning_rate": 1.3378731642300841e-05, |
| "loss": 0.2488, |
| "step": 1261 |
| }, |
| { |
| "epoch": 3.2296865003198976, |
| "grad_norm": 0.31284863193899576, |
| "learning_rate": 1.3344959550370362e-05, |
| "loss": 0.222, |
| "step": 1262 |
| }, |
| { |
| "epoch": 3.232245681381958, |
| "grad_norm": 0.3198015399733083, |
| "learning_rate": 1.3311208783785945e-05, |
| "loss": 0.2561, |
| "step": 1263 |
| }, |
| { |
| "epoch": 3.234804862444018, |
| "grad_norm": 0.33484963926651445, |
| "learning_rate": 1.327747945069819e-05, |
| "loss": 0.2532, |
| "step": 1264 |
| }, |
| { |
| "epoch": 3.237364043506078, |
| "grad_norm": 0.3505508918710989, |
| "learning_rate": 1.324377165918906e-05, |
| "loss": 0.253, |
| "step": 1265 |
| }, |
| { |
| "epoch": 3.2399232245681384, |
| "grad_norm": 0.3317100885612124, |
| "learning_rate": 1.3210085517271459e-05, |
| "loss": 0.2488, |
| "step": 1266 |
| }, |
| { |
| "epoch": 3.242482405630198, |
| "grad_norm": 0.3183951056600632, |
| "learning_rate": 1.3176421132888936e-05, |
| "loss": 0.2206, |
| "step": 1267 |
| }, |
| { |
| "epoch": 3.2450415866922584, |
| "grad_norm": 0.7798174468009574, |
| "learning_rate": 1.3142778613915308e-05, |
| "loss": 0.3465, |
| "step": 1268 |
| }, |
| { |
| "epoch": 3.2476007677543186, |
| "grad_norm": 0.3276263112485529, |
| "learning_rate": 1.3109158068154329e-05, |
| "loss": 0.2206, |
| "step": 1269 |
| }, |
| { |
| "epoch": 3.250159948816379, |
| "grad_norm": 0.35582948619273064, |
| "learning_rate": 1.3075559603339354e-05, |
| "loss": 0.2272, |
| "step": 1270 |
| }, |
| { |
| "epoch": 3.252719129878439, |
| "grad_norm": 0.3425439408964034, |
| "learning_rate": 1.304198332713296e-05, |
| "loss": 0.2587, |
| "step": 1271 |
| }, |
| { |
| "epoch": 3.255278310940499, |
| "grad_norm": 0.35588463150095667, |
| "learning_rate": 1.3008429347126641e-05, |
| "loss": 0.2585, |
| "step": 1272 |
| }, |
| { |
| "epoch": 3.257837492002559, |
| "grad_norm": 0.3354553998723496, |
| "learning_rate": 1.2974897770840448e-05, |
| "loss": 0.2067, |
| "step": 1273 |
| }, |
| { |
| "epoch": 3.260396673064619, |
| "grad_norm": 0.3247184453320128, |
| "learning_rate": 1.2941388705722627e-05, |
| "loss": 0.2449, |
| "step": 1274 |
| }, |
| { |
| "epoch": 3.2629558541266794, |
| "grad_norm": 0.3269101886184072, |
| "learning_rate": 1.2907902259149287e-05, |
| "loss": 0.2454, |
| "step": 1275 |
| }, |
| { |
| "epoch": 3.2655150351887396, |
| "grad_norm": 0.34277751654037186, |
| "learning_rate": 1.2874438538424086e-05, |
| "loss": 0.2267, |
| "step": 1276 |
| }, |
| { |
| "epoch": 3.2680742162508, |
| "grad_norm": 0.3425839528353915, |
| "learning_rate": 1.2840997650777829e-05, |
| "loss": 0.2289, |
| "step": 1277 |
| }, |
| { |
| "epoch": 3.27063339731286, |
| "grad_norm": 0.32496809601312776, |
| "learning_rate": 1.2807579703368162e-05, |
| "loss": 0.2437, |
| "step": 1278 |
| }, |
| { |
| "epoch": 3.27319257837492, |
| "grad_norm": 0.37627417428336984, |
| "learning_rate": 1.2774184803279245e-05, |
| "loss": 0.2196, |
| "step": 1279 |
| }, |
| { |
| "epoch": 3.27575175943698, |
| "grad_norm": 0.30844464297394786, |
| "learning_rate": 1.274081305752135e-05, |
| "loss": 0.2148, |
| "step": 1280 |
| }, |
| { |
| "epoch": 3.2783109404990403, |
| "grad_norm": 0.32041752442694194, |
| "learning_rate": 1.2707464573030572e-05, |
| "loss": 0.2495, |
| "step": 1281 |
| }, |
| { |
| "epoch": 3.2808701215611005, |
| "grad_norm": 0.3247468389566215, |
| "learning_rate": 1.2674139456668479e-05, |
| "loss": 0.2558, |
| "step": 1282 |
| }, |
| { |
| "epoch": 3.2834293026231607, |
| "grad_norm": 0.30941778730595587, |
| "learning_rate": 1.2640837815221731e-05, |
| "loss": 0.2238, |
| "step": 1283 |
| }, |
| { |
| "epoch": 3.285988483685221, |
| "grad_norm": 0.3397180703699647, |
| "learning_rate": 1.260755975540178e-05, |
| "loss": 0.2405, |
| "step": 1284 |
| }, |
| { |
| "epoch": 3.2885476647472807, |
| "grad_norm": 0.31915117073868005, |
| "learning_rate": 1.2574305383844528e-05, |
| "loss": 0.2396, |
| "step": 1285 |
| }, |
| { |
| "epoch": 3.291106845809341, |
| "grad_norm": 0.2977168854647766, |
| "learning_rate": 1.2541074807109945e-05, |
| "loss": 0.2286, |
| "step": 1286 |
| }, |
| { |
| "epoch": 3.293666026871401, |
| "grad_norm": 0.3141534078265832, |
| "learning_rate": 1.250786813168176e-05, |
| "loss": 0.2291, |
| "step": 1287 |
| }, |
| { |
| "epoch": 3.2962252079334613, |
| "grad_norm": 0.3250362176047104, |
| "learning_rate": 1.2474685463967125e-05, |
| "loss": 0.2353, |
| "step": 1288 |
| }, |
| { |
| "epoch": 3.2987843889955215, |
| "grad_norm": 0.5783304188096524, |
| "learning_rate": 1.2441526910296253e-05, |
| "loss": 0.2316, |
| "step": 1289 |
| }, |
| { |
| "epoch": 3.3013435700575817, |
| "grad_norm": 0.31254991008955707, |
| "learning_rate": 1.2408392576922075e-05, |
| "loss": 0.2336, |
| "step": 1290 |
| }, |
| { |
| "epoch": 3.303902751119642, |
| "grad_norm": 0.31041323932723247, |
| "learning_rate": 1.2375282570019933e-05, |
| "loss": 0.2457, |
| "step": 1291 |
| }, |
| { |
| "epoch": 3.3064619321817017, |
| "grad_norm": 0.32993719976229857, |
| "learning_rate": 1.2342196995687212e-05, |
| "loss": 0.2588, |
| "step": 1292 |
| }, |
| { |
| "epoch": 3.309021113243762, |
| "grad_norm": 0.3016426692910031, |
| "learning_rate": 1.2309135959942986e-05, |
| "loss": 0.2577, |
| "step": 1293 |
| }, |
| { |
| "epoch": 3.311580294305822, |
| "grad_norm": 0.34298650349077, |
| "learning_rate": 1.227609956872772e-05, |
| "loss": 0.2386, |
| "step": 1294 |
| }, |
| { |
| "epoch": 3.3141394753678823, |
| "grad_norm": 0.3233823231490881, |
| "learning_rate": 1.2243087927902905e-05, |
| "loss": 0.2203, |
| "step": 1295 |
| }, |
| { |
| "epoch": 3.3166986564299425, |
| "grad_norm": 0.3431401051489707, |
| "learning_rate": 1.2210101143250708e-05, |
| "loss": 0.2369, |
| "step": 1296 |
| }, |
| { |
| "epoch": 3.3192578374920023, |
| "grad_norm": 0.30536095566488874, |
| "learning_rate": 1.2177139320473663e-05, |
| "loss": 0.209, |
| "step": 1297 |
| }, |
| { |
| "epoch": 3.3218170185540625, |
| "grad_norm": 0.3380078591507895, |
| "learning_rate": 1.2144202565194311e-05, |
| "loss": 0.2793, |
| "step": 1298 |
| }, |
| { |
| "epoch": 3.3243761996161227, |
| "grad_norm": 0.3327840853187567, |
| "learning_rate": 1.211129098295486e-05, |
| "loss": 0.2473, |
| "step": 1299 |
| }, |
| { |
| "epoch": 3.326935380678183, |
| "grad_norm": 0.29406663008971645, |
| "learning_rate": 1.2078404679216864e-05, |
| "loss": 0.2056, |
| "step": 1300 |
| }, |
| { |
| "epoch": 3.329494561740243, |
| "grad_norm": 0.31662925879264764, |
| "learning_rate": 1.2045543759360876e-05, |
| "loss": 0.2443, |
| "step": 1301 |
| }, |
| { |
| "epoch": 3.3320537428023034, |
| "grad_norm": 0.322730147707437, |
| "learning_rate": 1.2012708328686093e-05, |
| "loss": 0.2143, |
| "step": 1302 |
| }, |
| { |
| "epoch": 3.3346129238643636, |
| "grad_norm": 0.2975751708993095, |
| "learning_rate": 1.1979898492410049e-05, |
| "loss": 0.2385, |
| "step": 1303 |
| }, |
| { |
| "epoch": 3.3371721049264234, |
| "grad_norm": 0.3636857886009555, |
| "learning_rate": 1.1947114355668265e-05, |
| "loss": 0.2623, |
| "step": 1304 |
| }, |
| { |
| "epoch": 3.3397312859884836, |
| "grad_norm": 0.3199715781542156, |
| "learning_rate": 1.1914356023513904e-05, |
| "loss": 0.2605, |
| "step": 1305 |
| }, |
| { |
| "epoch": 3.342290467050544, |
| "grad_norm": 0.32209675935492127, |
| "learning_rate": 1.1881623600917437e-05, |
| "loss": 0.2474, |
| "step": 1306 |
| }, |
| { |
| "epoch": 3.344849648112604, |
| "grad_norm": 0.3146940770709782, |
| "learning_rate": 1.1848917192766322e-05, |
| "loss": 0.1886, |
| "step": 1307 |
| }, |
| { |
| "epoch": 3.347408829174664, |
| "grad_norm": 0.3515135762125379, |
| "learning_rate": 1.1816236903864656e-05, |
| "loss": 0.2639, |
| "step": 1308 |
| }, |
| { |
| "epoch": 3.3499680102367244, |
| "grad_norm": 0.3273044475438286, |
| "learning_rate": 1.1783582838932821e-05, |
| "loss": 0.2681, |
| "step": 1309 |
| }, |
| { |
| "epoch": 3.352527191298784, |
| "grad_norm": 0.3241556852063347, |
| "learning_rate": 1.1750955102607193e-05, |
| "loss": 0.2148, |
| "step": 1310 |
| }, |
| { |
| "epoch": 3.3550863723608444, |
| "grad_norm": 0.34788552766120195, |
| "learning_rate": 1.1718353799439766e-05, |
| "loss": 0.2328, |
| "step": 1311 |
| }, |
| { |
| "epoch": 3.3576455534229046, |
| "grad_norm": 0.31285926538820524, |
| "learning_rate": 1.1685779033897827e-05, |
| "loss": 0.2139, |
| "step": 1312 |
| }, |
| { |
| "epoch": 3.360204734484965, |
| "grad_norm": 0.34091923953794956, |
| "learning_rate": 1.1653230910363645e-05, |
| "loss": 0.2522, |
| "step": 1313 |
| }, |
| { |
| "epoch": 3.362763915547025, |
| "grad_norm": 0.3093765842790142, |
| "learning_rate": 1.1620709533134104e-05, |
| "loss": 0.25, |
| "step": 1314 |
| }, |
| { |
| "epoch": 3.3653230966090852, |
| "grad_norm": 0.3332035792467053, |
| "learning_rate": 1.1588215006420374e-05, |
| "loss": 0.2729, |
| "step": 1315 |
| }, |
| { |
| "epoch": 3.3678822776711455, |
| "grad_norm": 0.3076095736538766, |
| "learning_rate": 1.1555747434347606e-05, |
| "loss": 0.2076, |
| "step": 1316 |
| }, |
| { |
| "epoch": 3.370441458733205, |
| "grad_norm": 0.30288813913337326, |
| "learning_rate": 1.1523306920954571e-05, |
| "loss": 0.2449, |
| "step": 1317 |
| }, |
| { |
| "epoch": 3.3730006397952654, |
| "grad_norm": 0.3702724394333769, |
| "learning_rate": 1.1490893570193328e-05, |
| "loss": 0.2646, |
| "step": 1318 |
| }, |
| { |
| "epoch": 3.3755598208573256, |
| "grad_norm": 0.35103580998810946, |
| "learning_rate": 1.1458507485928891e-05, |
| "loss": 0.2634, |
| "step": 1319 |
| }, |
| { |
| "epoch": 3.378119001919386, |
| "grad_norm": 0.3037095018272702, |
| "learning_rate": 1.1426148771938915e-05, |
| "loss": 0.2229, |
| "step": 1320 |
| }, |
| { |
| "epoch": 3.380678182981446, |
| "grad_norm": 0.3192579058292554, |
| "learning_rate": 1.139381753191335e-05, |
| "loss": 0.2489, |
| "step": 1321 |
| }, |
| { |
| "epoch": 3.383237364043506, |
| "grad_norm": 0.32501023665860496, |
| "learning_rate": 1.1361513869454092e-05, |
| "loss": 0.2407, |
| "step": 1322 |
| }, |
| { |
| "epoch": 3.385796545105566, |
| "grad_norm": 0.35596917491983554, |
| "learning_rate": 1.1329237888074691e-05, |
| "loss": 0.2437, |
| "step": 1323 |
| }, |
| { |
| "epoch": 3.3883557261676263, |
| "grad_norm": 0.33172031186682566, |
| "learning_rate": 1.129698969119998e-05, |
| "loss": 0.2623, |
| "step": 1324 |
| }, |
| { |
| "epoch": 3.3909149072296865, |
| "grad_norm": 0.2995594606117323, |
| "learning_rate": 1.1264769382165748e-05, |
| "loss": 0.1996, |
| "step": 1325 |
| }, |
| { |
| "epoch": 3.3934740882917467, |
| "grad_norm": 0.3194047630014032, |
| "learning_rate": 1.123257706421845e-05, |
| "loss": 0.236, |
| "step": 1326 |
| }, |
| { |
| "epoch": 3.396033269353807, |
| "grad_norm": 0.33068857427400655, |
| "learning_rate": 1.1200412840514839e-05, |
| "loss": 0.2244, |
| "step": 1327 |
| }, |
| { |
| "epoch": 3.398592450415867, |
| "grad_norm": 0.36837193881845204, |
| "learning_rate": 1.1168276814121621e-05, |
| "loss": 0.2828, |
| "step": 1328 |
| }, |
| { |
| "epoch": 3.401151631477927, |
| "grad_norm": 0.3076965971301543, |
| "learning_rate": 1.1136169088015177e-05, |
| "loss": 0.2241, |
| "step": 1329 |
| }, |
| { |
| "epoch": 3.403710812539987, |
| "grad_norm": 0.3411885790050691, |
| "learning_rate": 1.110408976508118e-05, |
| "loss": 0.2232, |
| "step": 1330 |
| }, |
| { |
| "epoch": 3.4062699936020473, |
| "grad_norm": 0.3263500540450158, |
| "learning_rate": 1.107203894811429e-05, |
| "loss": 0.2572, |
| "step": 1331 |
| }, |
| { |
| "epoch": 3.4088291746641075, |
| "grad_norm": 0.3416605787702754, |
| "learning_rate": 1.1040016739817836e-05, |
| "loss": 0.2433, |
| "step": 1332 |
| }, |
| { |
| "epoch": 3.4113883557261677, |
| "grad_norm": 0.335195547574942, |
| "learning_rate": 1.1008023242803477e-05, |
| "loss": 0.2648, |
| "step": 1333 |
| }, |
| { |
| "epoch": 3.413947536788228, |
| "grad_norm": 0.36538157527653864, |
| "learning_rate": 1.097605855959084e-05, |
| "loss": 0.2286, |
| "step": 1334 |
| }, |
| { |
| "epoch": 3.4165067178502877, |
| "grad_norm": 0.2912108672412734, |
| "learning_rate": 1.094412279260726e-05, |
| "loss": 0.2073, |
| "step": 1335 |
| }, |
| { |
| "epoch": 3.419065898912348, |
| "grad_norm": 0.32352858434290793, |
| "learning_rate": 1.0912216044187382e-05, |
| "loss": 0.2725, |
| "step": 1336 |
| }, |
| { |
| "epoch": 3.421625079974408, |
| "grad_norm": 0.30980460582268804, |
| "learning_rate": 1.0880338416572872e-05, |
| "loss": 0.242, |
| "step": 1337 |
| }, |
| { |
| "epoch": 3.4241842610364683, |
| "grad_norm": 0.3026357282953144, |
| "learning_rate": 1.0848490011912096e-05, |
| "loss": 0.2207, |
| "step": 1338 |
| }, |
| { |
| "epoch": 3.4267434420985285, |
| "grad_norm": 0.3222649336637817, |
| "learning_rate": 1.0816670932259763e-05, |
| "loss": 0.2196, |
| "step": 1339 |
| }, |
| { |
| "epoch": 3.4293026231605888, |
| "grad_norm": 0.3346500533447882, |
| "learning_rate": 1.0784881279576635e-05, |
| "loss": 0.2187, |
| "step": 1340 |
| }, |
| { |
| "epoch": 3.431861804222649, |
| "grad_norm": 0.3122079565048836, |
| "learning_rate": 1.0753121155729133e-05, |
| "loss": 0.2227, |
| "step": 1341 |
| }, |
| { |
| "epoch": 3.4344209852847087, |
| "grad_norm": 0.3240510909707239, |
| "learning_rate": 1.07213906624891e-05, |
| "loss": 0.2231, |
| "step": 1342 |
| }, |
| { |
| "epoch": 3.436980166346769, |
| "grad_norm": 0.3235912618403718, |
| "learning_rate": 1.0689689901533424e-05, |
| "loss": 0.2492, |
| "step": 1343 |
| }, |
| { |
| "epoch": 3.439539347408829, |
| "grad_norm": 0.3040119908970231, |
| "learning_rate": 1.0658018974443692e-05, |
| "loss": 0.1984, |
| "step": 1344 |
| }, |
| { |
| "epoch": 3.4420985284708894, |
| "grad_norm": 0.340863607236755, |
| "learning_rate": 1.0626377982705929e-05, |
| "loss": 0.2349, |
| "step": 1345 |
| }, |
| { |
| "epoch": 3.4446577095329496, |
| "grad_norm": 0.32795701173977326, |
| "learning_rate": 1.059476702771021e-05, |
| "loss": 0.2529, |
| "step": 1346 |
| }, |
| { |
| "epoch": 3.4472168905950094, |
| "grad_norm": 0.31132450713720333, |
| "learning_rate": 1.056318621075036e-05, |
| "loss": 0.2095, |
| "step": 1347 |
| }, |
| { |
| "epoch": 3.4497760716570696, |
| "grad_norm": 0.3254097118432526, |
| "learning_rate": 1.0531635633023644e-05, |
| "loss": 0.2358, |
| "step": 1348 |
| }, |
| { |
| "epoch": 3.4523352527191298, |
| "grad_norm": 0.3017269372689714, |
| "learning_rate": 1.050011539563043e-05, |
| "loss": 0.2247, |
| "step": 1349 |
| }, |
| { |
| "epoch": 3.45489443378119, |
| "grad_norm": 0.3283903326525304, |
| "learning_rate": 1.0468625599573842e-05, |
| "loss": 0.2718, |
| "step": 1350 |
| }, |
| { |
| "epoch": 3.45745361484325, |
| "grad_norm": 0.31738351502037276, |
| "learning_rate": 1.0437166345759489e-05, |
| "loss": 0.2345, |
| "step": 1351 |
| }, |
| { |
| "epoch": 3.4600127959053104, |
| "grad_norm": 0.30402755053632596, |
| "learning_rate": 1.0405737734995083e-05, |
| "loss": 0.2057, |
| "step": 1352 |
| }, |
| { |
| "epoch": 3.4625719769673706, |
| "grad_norm": 0.33488642167297444, |
| "learning_rate": 1.037433986799015e-05, |
| "loss": 0.2439, |
| "step": 1353 |
| }, |
| { |
| "epoch": 3.4651311580294304, |
| "grad_norm": 0.30914851197686366, |
| "learning_rate": 1.034297284535571e-05, |
| "loss": 0.2028, |
| "step": 1354 |
| }, |
| { |
| "epoch": 3.4676903390914906, |
| "grad_norm": 0.32175431813825445, |
| "learning_rate": 1.0311636767603952e-05, |
| "loss": 0.2439, |
| "step": 1355 |
| }, |
| { |
| "epoch": 3.470249520153551, |
| "grad_norm": 0.3395487986448244, |
| "learning_rate": 1.028033173514788e-05, |
| "loss": 0.2502, |
| "step": 1356 |
| }, |
| { |
| "epoch": 3.472808701215611, |
| "grad_norm": 0.3042126709214444, |
| "learning_rate": 1.0249057848301043e-05, |
| "loss": 0.2395, |
| "step": 1357 |
| }, |
| { |
| "epoch": 3.4753678822776712, |
| "grad_norm": 0.3183501854149144, |
| "learning_rate": 1.0217815207277165e-05, |
| "loss": 0.2234, |
| "step": 1358 |
| }, |
| { |
| "epoch": 3.4779270633397315, |
| "grad_norm": 0.3233478623491546, |
| "learning_rate": 1.0186603912189867e-05, |
| "loss": 0.2589, |
| "step": 1359 |
| }, |
| { |
| "epoch": 3.480486244401791, |
| "grad_norm": 0.3018286569760461, |
| "learning_rate": 1.0155424063052306e-05, |
| "loss": 0.2401, |
| "step": 1360 |
| }, |
| { |
| "epoch": 3.4830454254638514, |
| "grad_norm": 0.3174313407841064, |
| "learning_rate": 1.0124275759776889e-05, |
| "loss": 0.2399, |
| "step": 1361 |
| }, |
| { |
| "epoch": 3.4856046065259116, |
| "grad_norm": 0.307048568974569, |
| "learning_rate": 1.0093159102174938e-05, |
| "loss": 0.2291, |
| "step": 1362 |
| }, |
| { |
| "epoch": 3.488163787587972, |
| "grad_norm": 0.3132031600114937, |
| "learning_rate": 1.006207418995636e-05, |
| "loss": 0.2086, |
| "step": 1363 |
| }, |
| { |
| "epoch": 3.490722968650032, |
| "grad_norm": 0.34596427815653313, |
| "learning_rate": 1.0031021122729328e-05, |
| "loss": 0.2497, |
| "step": 1364 |
| }, |
| { |
| "epoch": 3.4932821497120923, |
| "grad_norm": 0.2986896060364163, |
| "learning_rate": 1.0000000000000006e-05, |
| "loss": 0.2379, |
| "step": 1365 |
| }, |
| { |
| "epoch": 3.4958413307741525, |
| "grad_norm": 0.3319066544902576, |
| "learning_rate": 9.969010921172155e-06, |
| "loss": 0.2542, |
| "step": 1366 |
| }, |
| { |
| "epoch": 3.4984005118362123, |
| "grad_norm": 0.3053208810307986, |
| "learning_rate": 9.938053985546883e-06, |
| "loss": 0.2299, |
| "step": 1367 |
| }, |
| { |
| "epoch": 3.5009596928982725, |
| "grad_norm": 0.3736366846122222, |
| "learning_rate": 9.907129292322298e-06, |
| "loss": 0.2676, |
| "step": 1368 |
| }, |
| { |
| "epoch": 3.5035188739603327, |
| "grad_norm": 0.33175316766942814, |
| "learning_rate": 9.876236940593173e-06, |
| "loss": 0.2753, |
| "step": 1369 |
| }, |
| { |
| "epoch": 3.506078055022393, |
| "grad_norm": 0.3276624133983928, |
| "learning_rate": 9.84537702935065e-06, |
| "loss": 0.2745, |
| "step": 1370 |
| }, |
| { |
| "epoch": 3.508637236084453, |
| "grad_norm": 0.3067138284048095, |
| "learning_rate": 9.814549657481935e-06, |
| "loss": 0.2201, |
| "step": 1371 |
| }, |
| { |
| "epoch": 3.511196417146513, |
| "grad_norm": 0.3489342217784152, |
| "learning_rate": 9.783754923769946e-06, |
| "loss": 0.2402, |
| "step": 1372 |
| }, |
| { |
| "epoch": 3.513755598208573, |
| "grad_norm": 0.3103704194146652, |
| "learning_rate": 9.752992926893027e-06, |
| "loss": 0.231, |
| "step": 1373 |
| }, |
| { |
| "epoch": 3.5163147792706333, |
| "grad_norm": 0.29799307682543535, |
| "learning_rate": 9.722263765424628e-06, |
| "loss": 0.2103, |
| "step": 1374 |
| }, |
| { |
| "epoch": 3.5188739603326935, |
| "grad_norm": 0.3245882511730939, |
| "learning_rate": 9.691567537832964e-06, |
| "loss": 0.2547, |
| "step": 1375 |
| }, |
| { |
| "epoch": 3.5214331413947537, |
| "grad_norm": 0.29350433799889125, |
| "learning_rate": 9.660904342480715e-06, |
| "loss": 0.2083, |
| "step": 1376 |
| }, |
| { |
| "epoch": 3.523992322456814, |
| "grad_norm": 0.3333636077305378, |
| "learning_rate": 9.630274277624729e-06, |
| "loss": 0.2837, |
| "step": 1377 |
| }, |
| { |
| "epoch": 3.526551503518874, |
| "grad_norm": 0.33952864266921756, |
| "learning_rate": 9.599677441415694e-06, |
| "loss": 0.2313, |
| "step": 1378 |
| }, |
| { |
| "epoch": 3.5291106845809344, |
| "grad_norm": 0.3012959852140507, |
| "learning_rate": 9.5691139318978e-06, |
| "loss": 0.2171, |
| "step": 1379 |
| }, |
| { |
| "epoch": 3.531669865642994, |
| "grad_norm": 0.3318689398716116, |
| "learning_rate": 9.538583847008452e-06, |
| "loss": 0.2366, |
| "step": 1380 |
| }, |
| { |
| "epoch": 3.5342290467050543, |
| "grad_norm": 0.3358375103639254, |
| "learning_rate": 9.508087284577963e-06, |
| "loss": 0.2402, |
| "step": 1381 |
| }, |
| { |
| "epoch": 3.5367882277671145, |
| "grad_norm": 0.30920378089911293, |
| "learning_rate": 9.477624342329209e-06, |
| "loss": 0.2143, |
| "step": 1382 |
| }, |
| { |
| "epoch": 3.5393474088291748, |
| "grad_norm": 0.3169427298803479, |
| "learning_rate": 9.447195117877343e-06, |
| "loss": 0.2285, |
| "step": 1383 |
| }, |
| { |
| "epoch": 3.541906589891235, |
| "grad_norm": 0.3112954457690554, |
| "learning_rate": 9.416799708729486e-06, |
| "loss": 0.2315, |
| "step": 1384 |
| }, |
| { |
| "epoch": 3.5444657709532947, |
| "grad_norm": 0.3238511016385153, |
| "learning_rate": 9.386438212284372e-06, |
| "loss": 0.2252, |
| "step": 1385 |
| }, |
| { |
| "epoch": 3.547024952015355, |
| "grad_norm": 0.3031398199187957, |
| "learning_rate": 9.356110725832081e-06, |
| "loss": 0.2376, |
| "step": 1386 |
| }, |
| { |
| "epoch": 3.549584133077415, |
| "grad_norm": 0.3105091167975465, |
| "learning_rate": 9.325817346553725e-06, |
| "loss": 0.2689, |
| "step": 1387 |
| }, |
| { |
| "epoch": 3.5521433141394754, |
| "grad_norm": 0.31208594113425225, |
| "learning_rate": 9.295558171521093e-06, |
| "loss": 0.2278, |
| "step": 1388 |
| }, |
| { |
| "epoch": 3.5547024952015356, |
| "grad_norm": 0.31485619105429463, |
| "learning_rate": 9.265333297696395e-06, |
| "loss": 0.242, |
| "step": 1389 |
| }, |
| { |
| "epoch": 3.557261676263596, |
| "grad_norm": 0.31606147283215824, |
| "learning_rate": 9.235142821931928e-06, |
| "loss": 0.2363, |
| "step": 1390 |
| }, |
| { |
| "epoch": 3.559820857325656, |
| "grad_norm": 0.3150525527068536, |
| "learning_rate": 9.204986840969749e-06, |
| "loss": 0.2199, |
| "step": 1391 |
| }, |
| { |
| "epoch": 3.5623800383877158, |
| "grad_norm": 0.31857444893477177, |
| "learning_rate": 9.174865451441375e-06, |
| "loss": 0.2283, |
| "step": 1392 |
| }, |
| { |
| "epoch": 3.564939219449776, |
| "grad_norm": 0.30466028849006704, |
| "learning_rate": 9.1447787498675e-06, |
| "loss": 0.232, |
| "step": 1393 |
| }, |
| { |
| "epoch": 3.567498400511836, |
| "grad_norm": 0.34031824974175295, |
| "learning_rate": 9.114726832657658e-06, |
| "loss": 0.2663, |
| "step": 1394 |
| }, |
| { |
| "epoch": 3.5700575815738964, |
| "grad_norm": 0.3261116373502211, |
| "learning_rate": 9.084709796109907e-06, |
| "loss": 0.2489, |
| "step": 1395 |
| }, |
| { |
| "epoch": 3.5726167626359566, |
| "grad_norm": 0.30217642557332414, |
| "learning_rate": 9.054727736410555e-06, |
| "loss": 0.2613, |
| "step": 1396 |
| }, |
| { |
| "epoch": 3.5751759436980164, |
| "grad_norm": 0.3029175133984261, |
| "learning_rate": 9.02478074963381e-06, |
| "loss": 0.2263, |
| "step": 1397 |
| }, |
| { |
| "epoch": 3.5777351247600766, |
| "grad_norm": 0.3613891446327612, |
| "learning_rate": 8.994868931741499e-06, |
| "loss": 0.2658, |
| "step": 1398 |
| }, |
| { |
| "epoch": 3.580294305822137, |
| "grad_norm": 0.32375645597232505, |
| "learning_rate": 8.964992378582758e-06, |
| "loss": 0.2458, |
| "step": 1399 |
| }, |
| { |
| "epoch": 3.582853486884197, |
| "grad_norm": 0.346977280248451, |
| "learning_rate": 8.93515118589373e-06, |
| "loss": 0.2673, |
| "step": 1400 |
| }, |
| { |
| "epoch": 3.5854126679462572, |
| "grad_norm": 0.3091670769782311, |
| "learning_rate": 8.905345449297223e-06, |
| "loss": 0.2517, |
| "step": 1401 |
| }, |
| { |
| "epoch": 3.5879718490083174, |
| "grad_norm": 0.3033606651844572, |
| "learning_rate": 8.87557526430246e-06, |
| "loss": 0.2012, |
| "step": 1402 |
| }, |
| { |
| "epoch": 3.5905310300703777, |
| "grad_norm": 0.3115678004260849, |
| "learning_rate": 8.845840726304723e-06, |
| "loss": 0.2297, |
| "step": 1403 |
| }, |
| { |
| "epoch": 3.593090211132438, |
| "grad_norm": 0.3441565775702763, |
| "learning_rate": 8.816141930585067e-06, |
| "loss": 0.2542, |
| "step": 1404 |
| }, |
| { |
| "epoch": 3.5956493921944976, |
| "grad_norm": 0.30703493196426435, |
| "learning_rate": 8.786478972310023e-06, |
| "loss": 0.2342, |
| "step": 1405 |
| }, |
| { |
| "epoch": 3.598208573256558, |
| "grad_norm": 0.31549384345069187, |
| "learning_rate": 8.756851946531294e-06, |
| "loss": 0.247, |
| "step": 1406 |
| }, |
| { |
| "epoch": 3.600767754318618, |
| "grad_norm": 0.29102001056426585, |
| "learning_rate": 8.72726094818541e-06, |
| "loss": 0.2074, |
| "step": 1407 |
| }, |
| { |
| "epoch": 3.6033269353806783, |
| "grad_norm": 0.33490062048588876, |
| "learning_rate": 8.697706072093493e-06, |
| "loss": 0.2541, |
| "step": 1408 |
| }, |
| { |
| "epoch": 3.6058861164427385, |
| "grad_norm": 0.3124637481108502, |
| "learning_rate": 8.668187412960887e-06, |
| "loss": 0.2437, |
| "step": 1409 |
| }, |
| { |
| "epoch": 3.6084452975047983, |
| "grad_norm": 0.3113279052119832, |
| "learning_rate": 8.638705065376887e-06, |
| "loss": 0.2389, |
| "step": 1410 |
| }, |
| { |
| "epoch": 3.6110044785668585, |
| "grad_norm": 0.3194418676306725, |
| "learning_rate": 8.609259123814443e-06, |
| "loss": 0.2549, |
| "step": 1411 |
| }, |
| { |
| "epoch": 3.6135636596289187, |
| "grad_norm": 0.28910975717043763, |
| "learning_rate": 8.579849682629844e-06, |
| "loss": 0.2246, |
| "step": 1412 |
| }, |
| { |
| "epoch": 3.616122840690979, |
| "grad_norm": 0.31307324283444393, |
| "learning_rate": 8.550476836062419e-06, |
| "loss": 0.2425, |
| "step": 1413 |
| }, |
| { |
| "epoch": 3.618682021753039, |
| "grad_norm": 0.3040084877742325, |
| "learning_rate": 8.521140678234214e-06, |
| "loss": 0.2361, |
| "step": 1414 |
| }, |
| { |
| "epoch": 3.6212412028150993, |
| "grad_norm": 0.3130765747434785, |
| "learning_rate": 8.491841303149728e-06, |
| "loss": 0.2272, |
| "step": 1415 |
| }, |
| { |
| "epoch": 3.6238003838771595, |
| "grad_norm": 0.30265698644043443, |
| "learning_rate": 8.462578804695595e-06, |
| "loss": 0.2701, |
| "step": 1416 |
| }, |
| { |
| "epoch": 3.6263595649392193, |
| "grad_norm": 0.2987350574519827, |
| "learning_rate": 8.43335327664027e-06, |
| "loss": 0.2177, |
| "step": 1417 |
| }, |
| { |
| "epoch": 3.6289187460012795, |
| "grad_norm": 0.3166676881180338, |
| "learning_rate": 8.404164812633755e-06, |
| "loss": 0.2756, |
| "step": 1418 |
| }, |
| { |
| "epoch": 3.6314779270633397, |
| "grad_norm": 0.3033675615693007, |
| "learning_rate": 8.375013506207275e-06, |
| "loss": 0.2136, |
| "step": 1419 |
| }, |
| { |
| "epoch": 3.6340371081254, |
| "grad_norm": 0.30761103254564687, |
| "learning_rate": 8.345899450772975e-06, |
| "loss": 0.2535, |
| "step": 1420 |
| }, |
| { |
| "epoch": 3.63659628918746, |
| "grad_norm": 0.2982029035930307, |
| "learning_rate": 8.316822739623662e-06, |
| "loss": 0.2165, |
| "step": 1421 |
| }, |
| { |
| "epoch": 3.63915547024952, |
| "grad_norm": 0.31969771781374523, |
| "learning_rate": 8.287783465932466e-06, |
| "loss": 0.257, |
| "step": 1422 |
| }, |
| { |
| "epoch": 3.64171465131158, |
| "grad_norm": 0.2837299669230536, |
| "learning_rate": 8.258781722752535e-06, |
| "loss": 0.224, |
| "step": 1423 |
| }, |
| { |
| "epoch": 3.6442738323736403, |
| "grad_norm": 0.3012916036117272, |
| "learning_rate": 8.229817603016786e-06, |
| "loss": 0.2246, |
| "step": 1424 |
| }, |
| { |
| "epoch": 3.6468330134357005, |
| "grad_norm": 0.31189818144182524, |
| "learning_rate": 8.200891199537549e-06, |
| "loss": 0.2695, |
| "step": 1425 |
| }, |
| { |
| "epoch": 3.6493921944977608, |
| "grad_norm": 0.2997238939361931, |
| "learning_rate": 8.1720026050063e-06, |
| "loss": 0.1862, |
| "step": 1426 |
| }, |
| { |
| "epoch": 3.651951375559821, |
| "grad_norm": 0.34075811134574374, |
| "learning_rate": 8.143151911993374e-06, |
| "loss": 0.2619, |
| "step": 1427 |
| }, |
| { |
| "epoch": 3.654510556621881, |
| "grad_norm": 0.3146837641261533, |
| "learning_rate": 8.114339212947655e-06, |
| "loss": 0.2396, |
| "step": 1428 |
| }, |
| { |
| "epoch": 3.6570697376839414, |
| "grad_norm": 0.325379060971747, |
| "learning_rate": 8.085564600196258e-06, |
| "loss": 0.2435, |
| "step": 1429 |
| }, |
| { |
| "epoch": 3.659628918746001, |
| "grad_norm": 0.33089426856010606, |
| "learning_rate": 8.056828165944282e-06, |
| "loss": 0.2459, |
| "step": 1430 |
| }, |
| { |
| "epoch": 3.6621880998080614, |
| "grad_norm": 0.3111195583478781, |
| "learning_rate": 8.028130002274459e-06, |
| "loss": 0.2328, |
| "step": 1431 |
| }, |
| { |
| "epoch": 3.6647472808701216, |
| "grad_norm": 0.2917405967267679, |
| "learning_rate": 7.999470201146915e-06, |
| "loss": 0.2273, |
| "step": 1432 |
| }, |
| { |
| "epoch": 3.667306461932182, |
| "grad_norm": 0.3072708256616894, |
| "learning_rate": 7.970848854398825e-06, |
| "loss": 0.2616, |
| "step": 1433 |
| }, |
| { |
| "epoch": 3.669865642994242, |
| "grad_norm": 0.3274866801130236, |
| "learning_rate": 7.942266053744155e-06, |
| "loss": 0.2469, |
| "step": 1434 |
| }, |
| { |
| "epoch": 3.6724248240563018, |
| "grad_norm": 0.3206452949252231, |
| "learning_rate": 7.913721890773354e-06, |
| "loss": 0.2265, |
| "step": 1435 |
| }, |
| { |
| "epoch": 3.674984005118362, |
| "grad_norm": 0.3076598617812908, |
| "learning_rate": 7.885216456953053e-06, |
| "loss": 0.2167, |
| "step": 1436 |
| }, |
| { |
| "epoch": 3.677543186180422, |
| "grad_norm": 0.2979466544850749, |
| "learning_rate": 7.856749843625777e-06, |
| "loss": 0.2203, |
| "step": 1437 |
| }, |
| { |
| "epoch": 3.6801023672424824, |
| "grad_norm": 0.3224437385684691, |
| "learning_rate": 7.828322142009672e-06, |
| "loss": 0.2473, |
| "step": 1438 |
| }, |
| { |
| "epoch": 3.6826615483045426, |
| "grad_norm": 0.31485417513081154, |
| "learning_rate": 7.799933443198173e-06, |
| "loss": 0.2606, |
| "step": 1439 |
| }, |
| { |
| "epoch": 3.685220729366603, |
| "grad_norm": 0.3243639601134415, |
| "learning_rate": 7.771583838159756e-06, |
| "loss": 0.2633, |
| "step": 1440 |
| }, |
| { |
| "epoch": 3.687779910428663, |
| "grad_norm": 0.32893510730247094, |
| "learning_rate": 7.743273417737617e-06, |
| "loss": 0.2531, |
| "step": 1441 |
| }, |
| { |
| "epoch": 3.690339091490723, |
| "grad_norm": 0.31189557871340884, |
| "learning_rate": 7.715002272649388e-06, |
| "loss": 0.2403, |
| "step": 1442 |
| }, |
| { |
| "epoch": 3.692898272552783, |
| "grad_norm": 0.2901780629695267, |
| "learning_rate": 7.686770493486835e-06, |
| "loss": 0.2517, |
| "step": 1443 |
| }, |
| { |
| "epoch": 3.6954574536148432, |
| "grad_norm": 0.3159906947616901, |
| "learning_rate": 7.65857817071561e-06, |
| "loss": 0.2492, |
| "step": 1444 |
| }, |
| { |
| "epoch": 3.6980166346769034, |
| "grad_norm": 0.3098159857766409, |
| "learning_rate": 7.630425394674903e-06, |
| "loss": 0.2341, |
| "step": 1445 |
| }, |
| { |
| "epoch": 3.7005758157389637, |
| "grad_norm": 0.31575476226888965, |
| "learning_rate": 7.602312255577193e-06, |
| "loss": 0.2416, |
| "step": 1446 |
| }, |
| { |
| "epoch": 3.7031349968010234, |
| "grad_norm": 0.31087089571752347, |
| "learning_rate": 7.574238843507957e-06, |
| "loss": 0.2673, |
| "step": 1447 |
| }, |
| { |
| "epoch": 3.7056941778630836, |
| "grad_norm": 0.3209375688192084, |
| "learning_rate": 7.546205248425353e-06, |
| "loss": 0.2313, |
| "step": 1448 |
| }, |
| { |
| "epoch": 3.708253358925144, |
| "grad_norm": 0.30680259294814516, |
| "learning_rate": 7.518211560159949e-06, |
| "loss": 0.2187, |
| "step": 1449 |
| }, |
| { |
| "epoch": 3.710812539987204, |
| "grad_norm": 0.3064495121568587, |
| "learning_rate": 7.49025786841445e-06, |
| "loss": 0.2161, |
| "step": 1450 |
| }, |
| { |
| "epoch": 3.7133717210492643, |
| "grad_norm": 0.29214767577744066, |
| "learning_rate": 7.462344262763399e-06, |
| "loss": 0.2339, |
| "step": 1451 |
| }, |
| { |
| "epoch": 3.7159309021113245, |
| "grad_norm": 0.3146911137467561, |
| "learning_rate": 7.434470832652865e-06, |
| "loss": 0.2464, |
| "step": 1452 |
| }, |
| { |
| "epoch": 3.7184900831733847, |
| "grad_norm": 0.3499806012960674, |
| "learning_rate": 7.406637667400205e-06, |
| "loss": 0.2246, |
| "step": 1453 |
| }, |
| { |
| "epoch": 3.721049264235445, |
| "grad_norm": 0.30724891177758956, |
| "learning_rate": 7.378844856193736e-06, |
| "loss": 0.272, |
| "step": 1454 |
| }, |
| { |
| "epoch": 3.7236084452975047, |
| "grad_norm": 0.29711759225447126, |
| "learning_rate": 7.3510924880924575e-06, |
| "loss": 0.2205, |
| "step": 1455 |
| }, |
| { |
| "epoch": 3.726167626359565, |
| "grad_norm": 0.31114069148352147, |
| "learning_rate": 7.323380652025794e-06, |
| "loss": 0.2619, |
| "step": 1456 |
| }, |
| { |
| "epoch": 3.728726807421625, |
| "grad_norm": 0.3019555109712794, |
| "learning_rate": 7.295709436793284e-06, |
| "loss": 0.2526, |
| "step": 1457 |
| }, |
| { |
| "epoch": 3.7312859884836853, |
| "grad_norm": 0.30587326696750855, |
| "learning_rate": 7.268078931064293e-06, |
| "loss": 0.2156, |
| "step": 1458 |
| }, |
| { |
| "epoch": 3.7338451695457455, |
| "grad_norm": 0.3010387774085701, |
| "learning_rate": 7.2404892233777334e-06, |
| "loss": 0.2343, |
| "step": 1459 |
| }, |
| { |
| "epoch": 3.7364043506078053, |
| "grad_norm": 0.30265924975544334, |
| "learning_rate": 7.212940402141808e-06, |
| "loss": 0.2542, |
| "step": 1460 |
| }, |
| { |
| "epoch": 3.7389635316698655, |
| "grad_norm": 0.2899279021405336, |
| "learning_rate": 7.185432555633672e-06, |
| "loss": 0.2263, |
| "step": 1461 |
| }, |
| { |
| "epoch": 3.7415227127319257, |
| "grad_norm": 0.3197841686852777, |
| "learning_rate": 7.1579657719992045e-06, |
| "loss": 0.2665, |
| "step": 1462 |
| }, |
| { |
| "epoch": 3.744081893793986, |
| "grad_norm": 0.27978040220930955, |
| "learning_rate": 7.130540139252704e-06, |
| "loss": 0.2338, |
| "step": 1463 |
| }, |
| { |
| "epoch": 3.746641074856046, |
| "grad_norm": 0.31288929736620646, |
| "learning_rate": 7.1031557452765934e-06, |
| "loss": 0.2372, |
| "step": 1464 |
| }, |
| { |
| "epoch": 3.7492002559181064, |
| "grad_norm": 0.3098001081263227, |
| "learning_rate": 7.075812677821145e-06, |
| "loss": 0.2221, |
| "step": 1465 |
| }, |
| { |
| "epoch": 3.7517594369801666, |
| "grad_norm": 0.29645463690795515, |
| "learning_rate": 7.048511024504223e-06, |
| "loss": 0.2439, |
| "step": 1466 |
| }, |
| { |
| "epoch": 3.7543186180422263, |
| "grad_norm": 0.30305748324499643, |
| "learning_rate": 7.021250872810983e-06, |
| "loss": 0.2447, |
| "step": 1467 |
| }, |
| { |
| "epoch": 3.7568777991042865, |
| "grad_norm": 0.3180001382034292, |
| "learning_rate": 6.9940323100935725e-06, |
| "loss": 0.2455, |
| "step": 1468 |
| }, |
| { |
| "epoch": 3.7594369801663468, |
| "grad_norm": 0.30172748918488546, |
| "learning_rate": 6.966855423570898e-06, |
| "loss": 0.2319, |
| "step": 1469 |
| }, |
| { |
| "epoch": 3.761996161228407, |
| "grad_norm": 0.30342551799145645, |
| "learning_rate": 6.939720300328303e-06, |
| "loss": 0.2283, |
| "step": 1470 |
| }, |
| { |
| "epoch": 3.764555342290467, |
| "grad_norm": 0.31295405822100386, |
| "learning_rate": 6.9126270273173e-06, |
| "loss": 0.2361, |
| "step": 1471 |
| }, |
| { |
| "epoch": 3.767114523352527, |
| "grad_norm": 0.2970422267078146, |
| "learning_rate": 6.885575691355315e-06, |
| "loss": 0.1965, |
| "step": 1472 |
| }, |
| { |
| "epoch": 3.769673704414587, |
| "grad_norm": 0.32398586323455586, |
| "learning_rate": 6.858566379125389e-06, |
| "loss": 0.2661, |
| "step": 1473 |
| }, |
| { |
| "epoch": 3.7722328854766474, |
| "grad_norm": 0.28922597868223626, |
| "learning_rate": 6.831599177175879e-06, |
| "loss": 0.222, |
| "step": 1474 |
| }, |
| { |
| "epoch": 3.7747920665387076, |
| "grad_norm": 0.296927401509587, |
| "learning_rate": 6.8046741719202385e-06, |
| "loss": 0.2262, |
| "step": 1475 |
| }, |
| { |
| "epoch": 3.777351247600768, |
| "grad_norm": 0.318579910031184, |
| "learning_rate": 6.777791449636681e-06, |
| "loss": 0.2455, |
| "step": 1476 |
| }, |
| { |
| "epoch": 3.779910428662828, |
| "grad_norm": 0.30482817177680954, |
| "learning_rate": 6.7509510964679305e-06, |
| "loss": 0.2376, |
| "step": 1477 |
| }, |
| { |
| "epoch": 3.782469609724888, |
| "grad_norm": 0.3043184575100951, |
| "learning_rate": 6.724153198420957e-06, |
| "loss": 0.2508, |
| "step": 1478 |
| }, |
| { |
| "epoch": 3.7850287907869484, |
| "grad_norm": 0.29471146448676344, |
| "learning_rate": 6.697397841366686e-06, |
| "loss": 0.2219, |
| "step": 1479 |
| }, |
| { |
| "epoch": 3.787587971849008, |
| "grad_norm": 0.2763016151280136, |
| "learning_rate": 6.67068511103971e-06, |
| "loss": 0.2092, |
| "step": 1480 |
| }, |
| { |
| "epoch": 3.7901471529110684, |
| "grad_norm": 0.29319933516975266, |
| "learning_rate": 6.644015093038049e-06, |
| "loss": 0.2042, |
| "step": 1481 |
| }, |
| { |
| "epoch": 3.7927063339731286, |
| "grad_norm": 0.3160040922162459, |
| "learning_rate": 6.617387872822842e-06, |
| "loss": 0.2269, |
| "step": 1482 |
| }, |
| { |
| "epoch": 3.795265515035189, |
| "grad_norm": 0.31249964016432036, |
| "learning_rate": 6.590803535718082e-06, |
| "loss": 0.2841, |
| "step": 1483 |
| }, |
| { |
| "epoch": 3.797824696097249, |
| "grad_norm": 0.29901036986186935, |
| "learning_rate": 6.564262166910367e-06, |
| "loss": 0.2096, |
| "step": 1484 |
| }, |
| { |
| "epoch": 3.800383877159309, |
| "grad_norm": 0.3151541438953574, |
| "learning_rate": 6.537763851448593e-06, |
| "loss": 0.2215, |
| "step": 1485 |
| }, |
| { |
| "epoch": 3.802943058221369, |
| "grad_norm": 0.3238104601226396, |
| "learning_rate": 6.511308674243711e-06, |
| "loss": 0.2493, |
| "step": 1486 |
| }, |
| { |
| "epoch": 3.8055022392834292, |
| "grad_norm": 0.30561591970597685, |
| "learning_rate": 6.484896720068421e-06, |
| "loss": 0.238, |
| "step": 1487 |
| }, |
| { |
| "epoch": 3.8080614203454894, |
| "grad_norm": 0.28920704087260063, |
| "learning_rate": 6.458528073556925e-06, |
| "loss": 0.2685, |
| "step": 1488 |
| }, |
| { |
| "epoch": 3.8106206014075497, |
| "grad_norm": 0.3038589031703289, |
| "learning_rate": 6.432202819204667e-06, |
| "loss": 0.248, |
| "step": 1489 |
| }, |
| { |
| "epoch": 3.81317978246961, |
| "grad_norm": 0.3385452502303158, |
| "learning_rate": 6.4059210413680175e-06, |
| "loss": 0.2503, |
| "step": 1490 |
| }, |
| { |
| "epoch": 3.81573896353167, |
| "grad_norm": 0.2768232976810782, |
| "learning_rate": 6.379682824264055e-06, |
| "loss": 0.2164, |
| "step": 1491 |
| }, |
| { |
| "epoch": 3.81829814459373, |
| "grad_norm": 0.3009349939503682, |
| "learning_rate": 6.353488251970275e-06, |
| "loss": 0.2366, |
| "step": 1492 |
| }, |
| { |
| "epoch": 3.82085732565579, |
| "grad_norm": 0.2928819289728828, |
| "learning_rate": 6.327337408424281e-06, |
| "loss": 0.2332, |
| "step": 1493 |
| }, |
| { |
| "epoch": 3.8234165067178503, |
| "grad_norm": 0.2870974660694633, |
| "learning_rate": 6.301230377423595e-06, |
| "loss": 0.2, |
| "step": 1494 |
| }, |
| { |
| "epoch": 3.8259756877799105, |
| "grad_norm": 0.3081786003102801, |
| "learning_rate": 6.275167242625331e-06, |
| "loss": 0.2414, |
| "step": 1495 |
| }, |
| { |
| "epoch": 3.8285348688419707, |
| "grad_norm": 0.2790089490684267, |
| "learning_rate": 6.2491480875459336e-06, |
| "loss": 0.215, |
| "step": 1496 |
| }, |
| { |
| "epoch": 3.8310940499040305, |
| "grad_norm": 0.29306281039648446, |
| "learning_rate": 6.223172995560935e-06, |
| "loss": 0.2679, |
| "step": 1497 |
| }, |
| { |
| "epoch": 3.8336532309660907, |
| "grad_norm": 0.2762204818599091, |
| "learning_rate": 6.1972420499046635e-06, |
| "loss": 0.2192, |
| "step": 1498 |
| }, |
| { |
| "epoch": 3.836212412028151, |
| "grad_norm": 0.29828532304173333, |
| "learning_rate": 6.171355333669973e-06, |
| "loss": 0.2441, |
| "step": 1499 |
| }, |
| { |
| "epoch": 3.838771593090211, |
| "grad_norm": 0.32131992187351277, |
| "learning_rate": 6.145512929808013e-06, |
| "loss": 0.229, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.8413307741522713, |
| "grad_norm": 0.3043046372632, |
| "learning_rate": 6.119714921127933e-06, |
| "loss": 0.2694, |
| "step": 1501 |
| }, |
| { |
| "epoch": 3.8438899552143315, |
| "grad_norm": 0.2717132569091513, |
| "learning_rate": 6.093961390296603e-06, |
| "loss": 0.2254, |
| "step": 1502 |
| }, |
| { |
| "epoch": 3.8464491362763917, |
| "grad_norm": 0.30029687863634635, |
| "learning_rate": 6.068252419838399e-06, |
| "loss": 0.2326, |
| "step": 1503 |
| }, |
| { |
| "epoch": 3.849008317338452, |
| "grad_norm": 0.27288392014891966, |
| "learning_rate": 6.042588092134878e-06, |
| "loss": 0.2163, |
| "step": 1504 |
| }, |
| { |
| "epoch": 3.8515674984005117, |
| "grad_norm": 0.283307260045485, |
| "learning_rate": 6.016968489424572e-06, |
| "loss": 0.2312, |
| "step": 1505 |
| }, |
| { |
| "epoch": 3.854126679462572, |
| "grad_norm": 0.2919987165904583, |
| "learning_rate": 5.991393693802674e-06, |
| "loss": 0.2533, |
| "step": 1506 |
| }, |
| { |
| "epoch": 3.856685860524632, |
| "grad_norm": 0.2968617574713475, |
| "learning_rate": 5.96586378722081e-06, |
| "loss": 0.2397, |
| "step": 1507 |
| }, |
| { |
| "epoch": 3.8592450415866923, |
| "grad_norm": 0.28294719312430794, |
| "learning_rate": 5.940378851486766e-06, |
| "loss": 0.2302, |
| "step": 1508 |
| }, |
| { |
| "epoch": 3.8618042226487526, |
| "grad_norm": 0.2885647154013107, |
| "learning_rate": 5.9149389682642165e-06, |
| "loss": 0.2429, |
| "step": 1509 |
| }, |
| { |
| "epoch": 3.8643634037108123, |
| "grad_norm": 0.29335993884299777, |
| "learning_rate": 5.889544219072465e-06, |
| "loss": 0.2347, |
| "step": 1510 |
| }, |
| { |
| "epoch": 3.8669225847728725, |
| "grad_norm": 0.30407531909262675, |
| "learning_rate": 5.864194685286206e-06, |
| "loss": 0.2405, |
| "step": 1511 |
| }, |
| { |
| "epoch": 3.8694817658349328, |
| "grad_norm": 0.29889145730228683, |
| "learning_rate": 5.838890448135228e-06, |
| "loss": 0.2373, |
| "step": 1512 |
| }, |
| { |
| "epoch": 3.872040946896993, |
| "grad_norm": 0.3002231215499318, |
| "learning_rate": 5.81363158870418e-06, |
| "loss": 0.2316, |
| "step": 1513 |
| }, |
| { |
| "epoch": 3.874600127959053, |
| "grad_norm": 0.3246106606552585, |
| "learning_rate": 5.788418187932314e-06, |
| "loss": 0.2365, |
| "step": 1514 |
| }, |
| { |
| "epoch": 3.8771593090211134, |
| "grad_norm": 0.29292707103941984, |
| "learning_rate": 5.7632503266131925e-06, |
| "loss": 0.2087, |
| "step": 1515 |
| }, |
| { |
| "epoch": 3.8797184900831736, |
| "grad_norm": 0.3255584037433501, |
| "learning_rate": 5.7381280853944585e-06, |
| "loss": 0.2807, |
| "step": 1516 |
| }, |
| { |
| "epoch": 3.8822776711452334, |
| "grad_norm": 0.2912419112819368, |
| "learning_rate": 5.713051544777584e-06, |
| "loss": 0.2218, |
| "step": 1517 |
| }, |
| { |
| "epoch": 3.8848368522072936, |
| "grad_norm": 0.3048580778970364, |
| "learning_rate": 5.688020785117581e-06, |
| "loss": 0.2753, |
| "step": 1518 |
| }, |
| { |
| "epoch": 3.887396033269354, |
| "grad_norm": 0.29167337976733226, |
| "learning_rate": 5.66303588662277e-06, |
| "loss": 0.2329, |
| "step": 1519 |
| }, |
| { |
| "epoch": 3.889955214331414, |
| "grad_norm": 0.2992142371051243, |
| "learning_rate": 5.638096929354522e-06, |
| "loss": 0.2268, |
| "step": 1520 |
| }, |
| { |
| "epoch": 3.892514395393474, |
| "grad_norm": 0.3027423583297044, |
| "learning_rate": 5.613203993226981e-06, |
| "loss": 0.221, |
| "step": 1521 |
| }, |
| { |
| "epoch": 3.895073576455534, |
| "grad_norm": 0.3021686605260421, |
| "learning_rate": 5.588357158006821e-06, |
| "loss": 0.252, |
| "step": 1522 |
| }, |
| { |
| "epoch": 3.897632757517594, |
| "grad_norm": 0.2996327213886315, |
| "learning_rate": 5.563556503312997e-06, |
| "loss": 0.2318, |
| "step": 1523 |
| }, |
| { |
| "epoch": 3.9001919385796544, |
| "grad_norm": 0.2957035292429405, |
| "learning_rate": 5.538802108616494e-06, |
| "loss": 0.239, |
| "step": 1524 |
| }, |
| { |
| "epoch": 3.9027511196417146, |
| "grad_norm": 0.309492243720712, |
| "learning_rate": 5.514094053240035e-06, |
| "loss": 0.2228, |
| "step": 1525 |
| }, |
| { |
| "epoch": 3.905310300703775, |
| "grad_norm": 0.6416163470606975, |
| "learning_rate": 5.489432416357885e-06, |
| "loss": 0.2326, |
| "step": 1526 |
| }, |
| { |
| "epoch": 3.907869481765835, |
| "grad_norm": 0.297847179245253, |
| "learning_rate": 5.46481727699554e-06, |
| "loss": 0.2346, |
| "step": 1527 |
| }, |
| { |
| "epoch": 3.9104286628278953, |
| "grad_norm": 0.30350301665082174, |
| "learning_rate": 5.440248714029508e-06, |
| "loss": 0.2478, |
| "step": 1528 |
| }, |
| { |
| "epoch": 3.9129878438899555, |
| "grad_norm": 0.29944760683896515, |
| "learning_rate": 5.415726806187052e-06, |
| "loss": 0.2306, |
| "step": 1529 |
| }, |
| { |
| "epoch": 3.9155470249520152, |
| "grad_norm": 0.28720986057591197, |
| "learning_rate": 5.39125163204594e-06, |
| "loss": 0.2057, |
| "step": 1530 |
| }, |
| { |
| "epoch": 3.9181062060140754, |
| "grad_norm": 0.2868190209794002, |
| "learning_rate": 5.3668232700341735e-06, |
| "loss": 0.2278, |
| "step": 1531 |
| }, |
| { |
| "epoch": 3.9206653870761357, |
| "grad_norm": 0.3118363419035033, |
| "learning_rate": 5.342441798429747e-06, |
| "loss": 0.2518, |
| "step": 1532 |
| }, |
| { |
| "epoch": 3.923224568138196, |
| "grad_norm": 0.27782593526187427, |
| "learning_rate": 5.318107295360424e-06, |
| "loss": 0.2334, |
| "step": 1533 |
| }, |
| { |
| "epoch": 3.925783749200256, |
| "grad_norm": 0.29231670514977803, |
| "learning_rate": 5.293819838803429e-06, |
| "loss": 0.2198, |
| "step": 1534 |
| }, |
| { |
| "epoch": 3.928342930262316, |
| "grad_norm": 0.30148858674026124, |
| "learning_rate": 5.269579506585259e-06, |
| "loss": 0.2291, |
| "step": 1535 |
| }, |
| { |
| "epoch": 3.930902111324376, |
| "grad_norm": 0.2942793998750714, |
| "learning_rate": 5.245386376381398e-06, |
| "loss": 0.2235, |
| "step": 1536 |
| }, |
| { |
| "epoch": 3.9334612923864363, |
| "grad_norm": 0.30230762629355007, |
| "learning_rate": 5.221240525716071e-06, |
| "loss": 0.2182, |
| "step": 1537 |
| }, |
| { |
| "epoch": 3.9360204734484965, |
| "grad_norm": 0.2838869405763987, |
| "learning_rate": 5.197142031961999e-06, |
| "loss": 0.2531, |
| "step": 1538 |
| }, |
| { |
| "epoch": 3.9385796545105567, |
| "grad_norm": 0.284590980219594, |
| "learning_rate": 5.17309097234016e-06, |
| "loss": 0.2235, |
| "step": 1539 |
| }, |
| { |
| "epoch": 3.941138835572617, |
| "grad_norm": 0.2805331306681004, |
| "learning_rate": 5.149087423919541e-06, |
| "loss": 0.1941, |
| "step": 1540 |
| }, |
| { |
| "epoch": 3.943698016634677, |
| "grad_norm": 0.3227111819969921, |
| "learning_rate": 5.125131463616863e-06, |
| "loss": 0.2598, |
| "step": 1541 |
| }, |
| { |
| "epoch": 3.946257197696737, |
| "grad_norm": 0.3095815791809101, |
| "learning_rate": 5.101223168196381e-06, |
| "loss": 0.26, |
| "step": 1542 |
| }, |
| { |
| "epoch": 3.948816378758797, |
| "grad_norm": 0.28306994750627523, |
| "learning_rate": 5.077362614269599e-06, |
| "loss": 0.2214, |
| "step": 1543 |
| }, |
| { |
| "epoch": 3.9513755598208573, |
| "grad_norm": 0.30833267171559586, |
| "learning_rate": 5.05354987829503e-06, |
| "loss": 0.2473, |
| "step": 1544 |
| }, |
| { |
| "epoch": 3.9539347408829175, |
| "grad_norm": 0.29154031796148494, |
| "learning_rate": 5.029785036577976e-06, |
| "loss": 0.231, |
| "step": 1545 |
| }, |
| { |
| "epoch": 3.9564939219449777, |
| "grad_norm": 0.3230057370683127, |
| "learning_rate": 5.0060681652702745e-06, |
| "loss": 0.2538, |
| "step": 1546 |
| }, |
| { |
| "epoch": 3.9590531030070375, |
| "grad_norm": 0.28553506909372656, |
| "learning_rate": 4.982399340370017e-06, |
| "loss": 0.231, |
| "step": 1547 |
| }, |
| { |
| "epoch": 3.9616122840690977, |
| "grad_norm": 0.30170947130683173, |
| "learning_rate": 4.958778637721364e-06, |
| "loss": 0.2454, |
| "step": 1548 |
| }, |
| { |
| "epoch": 3.964171465131158, |
| "grad_norm": 0.28267032534811537, |
| "learning_rate": 4.935206133014259e-06, |
| "loss": 0.2417, |
| "step": 1549 |
| }, |
| { |
| "epoch": 3.966730646193218, |
| "grad_norm": 0.2954487998931121, |
| "learning_rate": 4.911681901784198e-06, |
| "loss": 0.2319, |
| "step": 1550 |
| }, |
| { |
| "epoch": 3.9692898272552783, |
| "grad_norm": 0.32228832699706966, |
| "learning_rate": 4.8882060194119985e-06, |
| "loss": 0.2282, |
| "step": 1551 |
| }, |
| { |
| "epoch": 3.9718490083173386, |
| "grad_norm": 0.34818873755980667, |
| "learning_rate": 4.864778561123555e-06, |
| "loss": 0.2718, |
| "step": 1552 |
| }, |
| { |
| "epoch": 3.9744081893793988, |
| "grad_norm": 0.27407731345565545, |
| "learning_rate": 4.841399601989574e-06, |
| "loss": 0.2039, |
| "step": 1553 |
| }, |
| { |
| "epoch": 3.976967370441459, |
| "grad_norm": 0.2749332266585077, |
| "learning_rate": 4.8180692169253714e-06, |
| "loss": 0.2181, |
| "step": 1554 |
| }, |
| { |
| "epoch": 3.9795265515035187, |
| "grad_norm": 0.3010112395338718, |
| "learning_rate": 4.794787480690597e-06, |
| "loss": 0.2232, |
| "step": 1555 |
| }, |
| { |
| "epoch": 3.982085732565579, |
| "grad_norm": 0.29171650447929676, |
| "learning_rate": 4.771554467889012e-06, |
| "loss": 0.2391, |
| "step": 1556 |
| }, |
| { |
| "epoch": 3.984644913627639, |
| "grad_norm": 0.2906873549139878, |
| "learning_rate": 4.74837025296826e-06, |
| "loss": 0.2297, |
| "step": 1557 |
| }, |
| { |
| "epoch": 3.9872040946896994, |
| "grad_norm": 0.3106548346493187, |
| "learning_rate": 4.725234910219609e-06, |
| "loss": 0.2564, |
| "step": 1558 |
| }, |
| { |
| "epoch": 3.9897632757517596, |
| "grad_norm": 0.29178940946369886, |
| "learning_rate": 4.702148513777731e-06, |
| "loss": 0.2457, |
| "step": 1559 |
| }, |
| { |
| "epoch": 3.9923224568138194, |
| "grad_norm": 0.290141222179049, |
| "learning_rate": 4.679111137620442e-06, |
| "loss": 0.2007, |
| "step": 1560 |
| }, |
| { |
| "epoch": 3.9948816378758796, |
| "grad_norm": 0.3078661741665255, |
| "learning_rate": 4.656122855568477e-06, |
| "loss": 0.2416, |
| "step": 1561 |
| }, |
| { |
| "epoch": 3.99744081893794, |
| "grad_norm": 0.28827757162372486, |
| "learning_rate": 4.63318374128527e-06, |
| "loss": 0.2416, |
| "step": 1562 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.36059866962884213, |
| "learning_rate": 4.610293868276681e-06, |
| "loss": 0.286, |
| "step": 1563 |
| }, |
| { |
| "epoch": 4.00255918106206, |
| "grad_norm": 0.4806949756871284, |
| "learning_rate": 4.587453309890804e-06, |
| "loss": 0.1829, |
| "step": 1564 |
| }, |
| { |
| "epoch": 4.00511836212412, |
| "grad_norm": 0.4071198782663298, |
| "learning_rate": 4.5646621393177e-06, |
| "loss": 0.2002, |
| "step": 1565 |
| }, |
| { |
| "epoch": 4.007677543186181, |
| "grad_norm": 0.29904364596653477, |
| "learning_rate": 4.541920429589168e-06, |
| "loss": 0.1689, |
| "step": 1566 |
| }, |
| { |
| "epoch": 4.010236724248241, |
| "grad_norm": 0.319382339625052, |
| "learning_rate": 4.519228253578514e-06, |
| "loss": 0.162, |
| "step": 1567 |
| }, |
| { |
| "epoch": 4.012795905310301, |
| "grad_norm": 0.44482796159027516, |
| "learning_rate": 4.496585684000332e-06, |
| "loss": 0.1905, |
| "step": 1568 |
| }, |
| { |
| "epoch": 4.015355086372361, |
| "grad_norm": 0.4881520882605733, |
| "learning_rate": 4.47399279341024e-06, |
| "loss": 0.1883, |
| "step": 1569 |
| }, |
| { |
| "epoch": 4.017914267434421, |
| "grad_norm": 0.39543877818036155, |
| "learning_rate": 4.451449654204685e-06, |
| "loss": 0.1792, |
| "step": 1570 |
| }, |
| { |
| "epoch": 4.020473448496481, |
| "grad_norm": 0.33428133649360503, |
| "learning_rate": 4.428956338620671e-06, |
| "loss": 0.1549, |
| "step": 1571 |
| }, |
| { |
| "epoch": 4.023032629558541, |
| "grad_norm": 0.39094054957770324, |
| "learning_rate": 4.406512918735555e-06, |
| "loss": 0.168, |
| "step": 1572 |
| }, |
| { |
| "epoch": 4.025591810620601, |
| "grad_norm": 0.41624590579275506, |
| "learning_rate": 4.384119466466816e-06, |
| "loss": 0.1546, |
| "step": 1573 |
| }, |
| { |
| "epoch": 4.028150991682661, |
| "grad_norm": 0.41050595250529065, |
| "learning_rate": 4.361776053571816e-06, |
| "loss": 0.1553, |
| "step": 1574 |
| }, |
| { |
| "epoch": 4.030710172744722, |
| "grad_norm": 0.338525098415926, |
| "learning_rate": 4.339482751647557e-06, |
| "loss": 0.1672, |
| "step": 1575 |
| }, |
| { |
| "epoch": 4.033269353806782, |
| "grad_norm": 0.3227337964991407, |
| "learning_rate": 4.317239632130485e-06, |
| "loss": 0.1694, |
| "step": 1576 |
| }, |
| { |
| "epoch": 4.035828534868842, |
| "grad_norm": 0.3126102999935797, |
| "learning_rate": 4.295046766296224e-06, |
| "loss": 0.1652, |
| "step": 1577 |
| }, |
| { |
| "epoch": 4.038387715930902, |
| "grad_norm": 0.3419323105009614, |
| "learning_rate": 4.272904225259387e-06, |
| "loss": 0.1643, |
| "step": 1578 |
| }, |
| { |
| "epoch": 4.0409468969929625, |
| "grad_norm": 0.35539152039667277, |
| "learning_rate": 4.250812079973301e-06, |
| "loss": 0.1693, |
| "step": 1579 |
| }, |
| { |
| "epoch": 4.043506078055023, |
| "grad_norm": 0.34150875416867105, |
| "learning_rate": 4.228770401229824e-06, |
| "loss": 0.1676, |
| "step": 1580 |
| }, |
| { |
| "epoch": 4.046065259117083, |
| "grad_norm": 0.31333649874909303, |
| "learning_rate": 4.206779259659102e-06, |
| "loss": 0.1837, |
| "step": 1581 |
| }, |
| { |
| "epoch": 4.048624440179142, |
| "grad_norm": 0.29497085250511934, |
| "learning_rate": 4.184838725729326e-06, |
| "loss": 0.1606, |
| "step": 1582 |
| }, |
| { |
| "epoch": 4.0511836212412025, |
| "grad_norm": 0.28865884769293576, |
| "learning_rate": 4.1629488697465195e-06, |
| "loss": 0.1701, |
| "step": 1583 |
| }, |
| { |
| "epoch": 4.053742802303263, |
| "grad_norm": 0.30617690195804087, |
| "learning_rate": 4.141109761854332e-06, |
| "loss": 0.1586, |
| "step": 1584 |
| }, |
| { |
| "epoch": 4.056301983365323, |
| "grad_norm": 0.32345308536745493, |
| "learning_rate": 4.119321472033779e-06, |
| "loss": 0.1787, |
| "step": 1585 |
| }, |
| { |
| "epoch": 4.058861164427383, |
| "grad_norm": 0.29444459546640134, |
| "learning_rate": 4.097584070103042e-06, |
| "loss": 0.153, |
| "step": 1586 |
| }, |
| { |
| "epoch": 4.061420345489443, |
| "grad_norm": 0.28258351174881763, |
| "learning_rate": 4.075897625717249e-06, |
| "loss": 0.1593, |
| "step": 1587 |
| }, |
| { |
| "epoch": 4.0639795265515035, |
| "grad_norm": 0.3000401537763792, |
| "learning_rate": 4.054262208368216e-06, |
| "loss": 0.1805, |
| "step": 1588 |
| }, |
| { |
| "epoch": 4.066538707613564, |
| "grad_norm": 0.31652390372720957, |
| "learning_rate": 4.032677887384262e-06, |
| "loss": 0.1702, |
| "step": 1589 |
| }, |
| { |
| "epoch": 4.069097888675624, |
| "grad_norm": 0.3228892663985873, |
| "learning_rate": 4.011144731929981e-06, |
| "loss": 0.1913, |
| "step": 1590 |
| }, |
| { |
| "epoch": 4.071657069737684, |
| "grad_norm": 0.31373728199242906, |
| "learning_rate": 3.989662811005992e-06, |
| "loss": 0.1727, |
| "step": 1591 |
| }, |
| { |
| "epoch": 4.074216250799744, |
| "grad_norm": 0.30059854075379616, |
| "learning_rate": 3.96823219344876e-06, |
| "loss": 0.2085, |
| "step": 1592 |
| }, |
| { |
| "epoch": 4.076775431861805, |
| "grad_norm": 0.29253073860156936, |
| "learning_rate": 3.9468529479303445e-06, |
| "loss": 0.1746, |
| "step": 1593 |
| }, |
| { |
| "epoch": 4.079334612923865, |
| "grad_norm": 0.2925598790253486, |
| "learning_rate": 3.925525142958189e-06, |
| "loss": 0.1949, |
| "step": 1594 |
| }, |
| { |
| "epoch": 4.081893793985924, |
| "grad_norm": 0.29501284249932824, |
| "learning_rate": 3.904248846874894e-06, |
| "loss": 0.1665, |
| "step": 1595 |
| }, |
| { |
| "epoch": 4.084452975047984, |
| "grad_norm": 0.3128356519747454, |
| "learning_rate": 3.883024127858017e-06, |
| "loss": 0.1725, |
| "step": 1596 |
| }, |
| { |
| "epoch": 4.0870121561100445, |
| "grad_norm": 0.2888374275584094, |
| "learning_rate": 3.861851053919847e-06, |
| "loss": 0.1873, |
| "step": 1597 |
| }, |
| { |
| "epoch": 4.089571337172105, |
| "grad_norm": 0.28170591176204707, |
| "learning_rate": 3.840729692907164e-06, |
| "loss": 0.1789, |
| "step": 1598 |
| }, |
| { |
| "epoch": 4.092130518234165, |
| "grad_norm": 0.2851212955027544, |
| "learning_rate": 3.819660112501053e-06, |
| "loss": 0.1587, |
| "step": 1599 |
| }, |
| { |
| "epoch": 4.094689699296225, |
| "grad_norm": 0.2794461018913548, |
| "learning_rate": 3.7986423802166705e-06, |
| "loss": 0.1564, |
| "step": 1600 |
| }, |
| { |
| "epoch": 4.097248880358285, |
| "grad_norm": 0.29566827038245036, |
| "learning_rate": 3.7776765634030234e-06, |
| "loss": 0.1636, |
| "step": 1601 |
| }, |
| { |
| "epoch": 4.099808061420346, |
| "grad_norm": 0.2933200780850988, |
| "learning_rate": 3.756762729242773e-06, |
| "loss": 0.1991, |
| "step": 1602 |
| }, |
| { |
| "epoch": 4.102367242482406, |
| "grad_norm": 0.2761865132545396, |
| "learning_rate": 3.7359009447520112e-06, |
| "loss": 0.165, |
| "step": 1603 |
| }, |
| { |
| "epoch": 4.104926423544466, |
| "grad_norm": 0.2661987317855668, |
| "learning_rate": 3.715091276780023e-06, |
| "loss": 0.1897, |
| "step": 1604 |
| }, |
| { |
| "epoch": 4.107485604606526, |
| "grad_norm": 0.2898297360177148, |
| "learning_rate": 3.694333792009115e-06, |
| "loss": 0.1967, |
| "step": 1605 |
| }, |
| { |
| "epoch": 4.110044785668586, |
| "grad_norm": 0.30088937431780177, |
| "learning_rate": 3.6736285569543585e-06, |
| "loss": 0.1705, |
| "step": 1606 |
| }, |
| { |
| "epoch": 4.112603966730646, |
| "grad_norm": 0.3076179252763136, |
| "learning_rate": 3.652975637963401e-06, |
| "loss": 0.1865, |
| "step": 1607 |
| }, |
| { |
| "epoch": 4.115163147792706, |
| "grad_norm": 0.2666186074605756, |
| "learning_rate": 3.632375101216259e-06, |
| "loss": 0.1804, |
| "step": 1608 |
| }, |
| { |
| "epoch": 4.117722328854766, |
| "grad_norm": 0.264680141837875, |
| "learning_rate": 3.6118270127250954e-06, |
| "loss": 0.139, |
| "step": 1609 |
| }, |
| { |
| "epoch": 4.120281509916826, |
| "grad_norm": 0.26713666643272443, |
| "learning_rate": 3.5913314383339937e-06, |
| "loss": 0.1533, |
| "step": 1610 |
| }, |
| { |
| "epoch": 4.122840690978887, |
| "grad_norm": 0.28782921605361467, |
| "learning_rate": 3.5708884437187673e-06, |
| "loss": 0.1614, |
| "step": 1611 |
| }, |
| { |
| "epoch": 4.125399872040947, |
| "grad_norm": 0.27318365156538243, |
| "learning_rate": 3.5504980943867538e-06, |
| "loss": 0.1868, |
| "step": 1612 |
| }, |
| { |
| "epoch": 4.127959053103007, |
| "grad_norm": 0.27437741262556425, |
| "learning_rate": 3.53016045567659e-06, |
| "loss": 0.1634, |
| "step": 1613 |
| }, |
| { |
| "epoch": 4.130518234165067, |
| "grad_norm": 0.27443522507309115, |
| "learning_rate": 3.509875592757999e-06, |
| "loss": 0.2041, |
| "step": 1614 |
| }, |
| { |
| "epoch": 4.1330774152271275, |
| "grad_norm": 0.2733625632699625, |
| "learning_rate": 3.4896435706316e-06, |
| "loss": 0.1676, |
| "step": 1615 |
| }, |
| { |
| "epoch": 4.135636596289188, |
| "grad_norm": 0.28425889785798236, |
| "learning_rate": 3.469464454128684e-06, |
| "loss": 0.1714, |
| "step": 1616 |
| }, |
| { |
| "epoch": 4.138195777351248, |
| "grad_norm": 0.27651054747700926, |
| "learning_rate": 3.4493383079110054e-06, |
| "loss": 0.2032, |
| "step": 1617 |
| }, |
| { |
| "epoch": 4.140754958413308, |
| "grad_norm": 0.27095766306492064, |
| "learning_rate": 3.429265196470599e-06, |
| "loss": 0.1654, |
| "step": 1618 |
| }, |
| { |
| "epoch": 4.143314139475368, |
| "grad_norm": 0.27829913330629624, |
| "learning_rate": 3.409245184129546e-06, |
| "loss": 0.1753, |
| "step": 1619 |
| }, |
| { |
| "epoch": 4.145873320537428, |
| "grad_norm": 0.26642028532837686, |
| "learning_rate": 3.3892783350397675e-06, |
| "loss": 0.1605, |
| "step": 1620 |
| }, |
| { |
| "epoch": 4.148432501599488, |
| "grad_norm": 0.2671909078689866, |
| "learning_rate": 3.369364713182848e-06, |
| "loss": 0.1546, |
| "step": 1621 |
| }, |
| { |
| "epoch": 4.150991682661548, |
| "grad_norm": 0.2744629320037609, |
| "learning_rate": 3.349504382369795e-06, |
| "loss": 0.1606, |
| "step": 1622 |
| }, |
| { |
| "epoch": 4.153550863723608, |
| "grad_norm": 0.2655886430602391, |
| "learning_rate": 3.329697406240855e-06, |
| "loss": 0.1802, |
| "step": 1623 |
| }, |
| { |
| "epoch": 4.1561100447856685, |
| "grad_norm": 0.2735670466849154, |
| "learning_rate": 3.309943848265311e-06, |
| "loss": 0.1685, |
| "step": 1624 |
| }, |
| { |
| "epoch": 4.158669225847729, |
| "grad_norm": 0.2733935619603659, |
| "learning_rate": 3.290243771741275e-06, |
| "loss": 0.1712, |
| "step": 1625 |
| }, |
| { |
| "epoch": 4.161228406909789, |
| "grad_norm": 0.28420268431811874, |
| "learning_rate": 3.2705972397954655e-06, |
| "loss": 0.1888, |
| "step": 1626 |
| }, |
| { |
| "epoch": 4.163787587971849, |
| "grad_norm": 0.26985232539202264, |
| "learning_rate": 3.2510043153830486e-06, |
| "loss": 0.1877, |
| "step": 1627 |
| }, |
| { |
| "epoch": 4.166346769033909, |
| "grad_norm": 0.2656409578402704, |
| "learning_rate": 3.231465061287391e-06, |
| "loss": 0.1844, |
| "step": 1628 |
| }, |
| { |
| "epoch": 4.1689059500959695, |
| "grad_norm": 0.2870535364269227, |
| "learning_rate": 3.211979540119883e-06, |
| "loss": 0.1489, |
| "step": 1629 |
| }, |
| { |
| "epoch": 4.17146513115803, |
| "grad_norm": 0.2751438862963445, |
| "learning_rate": 3.1925478143197418e-06, |
| "loss": 0.1651, |
| "step": 1630 |
| }, |
| { |
| "epoch": 4.17402431222009, |
| "grad_norm": 0.28504383266802613, |
| "learning_rate": 3.1731699461537958e-06, |
| "loss": 0.1809, |
| "step": 1631 |
| }, |
| { |
| "epoch": 4.176583493282149, |
| "grad_norm": 0.28160560698590886, |
| "learning_rate": 3.153845997716303e-06, |
| "loss": 0.1608, |
| "step": 1632 |
| }, |
| { |
| "epoch": 4.1791426743442095, |
| "grad_norm": 0.2792655962671141, |
| "learning_rate": 3.1345760309287264e-06, |
| "loss": 0.1486, |
| "step": 1633 |
| }, |
| { |
| "epoch": 4.18170185540627, |
| "grad_norm": 0.2835400810692843, |
| "learning_rate": 3.1153601075395533e-06, |
| "loss": 0.1742, |
| "step": 1634 |
| }, |
| { |
| "epoch": 4.18426103646833, |
| "grad_norm": 0.27354882443074413, |
| "learning_rate": 3.0961982891241083e-06, |
| "loss": 0.1892, |
| "step": 1635 |
| }, |
| { |
| "epoch": 4.18682021753039, |
| "grad_norm": 0.2593652179219975, |
| "learning_rate": 3.0770906370843234e-06, |
| "loss": 0.176, |
| "step": 1636 |
| }, |
| { |
| "epoch": 4.18937939859245, |
| "grad_norm": 0.28067593594699425, |
| "learning_rate": 3.058037212648579e-06, |
| "loss": 0.1942, |
| "step": 1637 |
| }, |
| { |
| "epoch": 4.1919385796545106, |
| "grad_norm": 0.27960007176651136, |
| "learning_rate": 3.039038076871481e-06, |
| "loss": 0.1722, |
| "step": 1638 |
| }, |
| { |
| "epoch": 4.194497760716571, |
| "grad_norm": 0.2784512143122092, |
| "learning_rate": 3.02009329063367e-06, |
| "loss": 0.1819, |
| "step": 1639 |
| }, |
| { |
| "epoch": 4.197056941778631, |
| "grad_norm": 0.291087706558686, |
| "learning_rate": 3.001202914641628e-06, |
| "loss": 0.1855, |
| "step": 1640 |
| }, |
| { |
| "epoch": 4.199616122840691, |
| "grad_norm": 0.2731191212662854, |
| "learning_rate": 2.9823670094275e-06, |
| "loss": 0.1671, |
| "step": 1641 |
| }, |
| { |
| "epoch": 4.202175303902751, |
| "grad_norm": 0.27430309267830155, |
| "learning_rate": 2.9635856353488645e-06, |
| "loss": 0.1731, |
| "step": 1642 |
| }, |
| { |
| "epoch": 4.204734484964812, |
| "grad_norm": 0.26629467870174495, |
| "learning_rate": 2.9448588525885746e-06, |
| "loss": 0.1845, |
| "step": 1643 |
| }, |
| { |
| "epoch": 4.207293666026872, |
| "grad_norm": 0.2742753809709845, |
| "learning_rate": 2.9261867211545603e-06, |
| "loss": 0.1748, |
| "step": 1644 |
| }, |
| { |
| "epoch": 4.209852847088931, |
| "grad_norm": 0.28012849721285554, |
| "learning_rate": 2.907569300879596e-06, |
| "loss": 0.1994, |
| "step": 1645 |
| }, |
| { |
| "epoch": 4.212412028150991, |
| "grad_norm": 0.28016509946069285, |
| "learning_rate": 2.889006651421169e-06, |
| "loss": 0.1788, |
| "step": 1646 |
| }, |
| { |
| "epoch": 4.214971209213052, |
| "grad_norm": 0.2811917062883201, |
| "learning_rate": 2.870498832261257e-06, |
| "loss": 0.1486, |
| "step": 1647 |
| }, |
| { |
| "epoch": 4.217530390275112, |
| "grad_norm": 0.277577622075296, |
| "learning_rate": 2.85204590270612e-06, |
| "loss": 0.1832, |
| "step": 1648 |
| }, |
| { |
| "epoch": 4.220089571337172, |
| "grad_norm": 0.27124773514140205, |
| "learning_rate": 2.8336479218861556e-06, |
| "loss": 0.1626, |
| "step": 1649 |
| }, |
| { |
| "epoch": 4.222648752399232, |
| "grad_norm": 0.26604650947349917, |
| "learning_rate": 2.815304948755664e-06, |
| "loss": 0.1686, |
| "step": 1650 |
| }, |
| { |
| "epoch": 4.225207933461292, |
| "grad_norm": 0.273081026747494, |
| "learning_rate": 2.7970170420926957e-06, |
| "loss": 0.1713, |
| "step": 1651 |
| }, |
| { |
| "epoch": 4.227767114523353, |
| "grad_norm": 0.2906700784230057, |
| "learning_rate": 2.778784260498828e-06, |
| "loss": 0.1681, |
| "step": 1652 |
| }, |
| { |
| "epoch": 4.230326295585413, |
| "grad_norm": 0.29269194422721684, |
| "learning_rate": 2.7606066623990145e-06, |
| "loss": 0.1869, |
| "step": 1653 |
| }, |
| { |
| "epoch": 4.232885476647473, |
| "grad_norm": 0.28591911142148163, |
| "learning_rate": 2.742484306041373e-06, |
| "loss": 0.174, |
| "step": 1654 |
| }, |
| { |
| "epoch": 4.235444657709533, |
| "grad_norm": 0.27682074794775224, |
| "learning_rate": 2.7244172494969978e-06, |
| "loss": 0.1855, |
| "step": 1655 |
| }, |
| { |
| "epoch": 4.2380038387715935, |
| "grad_norm": 0.302986579035124, |
| "learning_rate": 2.7064055506597875e-06, |
| "loss": 0.1641, |
| "step": 1656 |
| }, |
| { |
| "epoch": 4.240563019833653, |
| "grad_norm": 0.2799225480334416, |
| "learning_rate": 2.688449267246258e-06, |
| "loss": 0.1923, |
| "step": 1657 |
| }, |
| { |
| "epoch": 4.243122200895713, |
| "grad_norm": 0.38988709458435794, |
| "learning_rate": 2.6705484567953386e-06, |
| "loss": 0.2104, |
| "step": 1658 |
| }, |
| { |
| "epoch": 4.245681381957773, |
| "grad_norm": 0.26949955966216177, |
| "learning_rate": 2.6527031766682142e-06, |
| "loss": 0.1718, |
| "step": 1659 |
| }, |
| { |
| "epoch": 4.248240563019833, |
| "grad_norm": 0.2788156254648664, |
| "learning_rate": 2.6349134840481294e-06, |
| "loss": 0.1711, |
| "step": 1660 |
| }, |
| { |
| "epoch": 4.250799744081894, |
| "grad_norm": 0.2621763225250691, |
| "learning_rate": 2.6171794359401957e-06, |
| "loss": 0.1532, |
| "step": 1661 |
| }, |
| { |
| "epoch": 4.253358925143954, |
| "grad_norm": 0.2865555062851034, |
| "learning_rate": 2.599501089171217e-06, |
| "loss": 0.1552, |
| "step": 1662 |
| }, |
| { |
| "epoch": 4.255918106206014, |
| "grad_norm": 0.26224793954089864, |
| "learning_rate": 2.581878500389523e-06, |
| "loss": 0.1755, |
| "step": 1663 |
| }, |
| { |
| "epoch": 4.258477287268074, |
| "grad_norm": 0.2721003139035216, |
| "learning_rate": 2.564311726064754e-06, |
| "loss": 0.1898, |
| "step": 1664 |
| }, |
| { |
| "epoch": 4.2610364683301345, |
| "grad_norm": 0.27269538230220364, |
| "learning_rate": 2.546800822487714e-06, |
| "loss": 0.1698, |
| "step": 1665 |
| }, |
| { |
| "epoch": 4.263595649392195, |
| "grad_norm": 0.28827484525986047, |
| "learning_rate": 2.5293458457701726e-06, |
| "loss": 0.2087, |
| "step": 1666 |
| }, |
| { |
| "epoch": 4.266154830454255, |
| "grad_norm": 0.27259819354187614, |
| "learning_rate": 2.5119468518446844e-06, |
| "loss": 0.18, |
| "step": 1667 |
| }, |
| { |
| "epoch": 4.268714011516315, |
| "grad_norm": 0.28656968871426663, |
| "learning_rate": 2.494603896464405e-06, |
| "loss": 0.1818, |
| "step": 1668 |
| }, |
| { |
| "epoch": 4.271273192578375, |
| "grad_norm": 0.28475736704342813, |
| "learning_rate": 2.47731703520294e-06, |
| "loss": 0.1888, |
| "step": 1669 |
| }, |
| { |
| "epoch": 4.273832373640435, |
| "grad_norm": 0.2816641273674954, |
| "learning_rate": 2.4600863234541338e-06, |
| "loss": 0.186, |
| "step": 1670 |
| }, |
| { |
| "epoch": 4.276391554702495, |
| "grad_norm": 0.2836263153168214, |
| "learning_rate": 2.4429118164319076e-06, |
| "loss": 0.1554, |
| "step": 1671 |
| }, |
| { |
| "epoch": 4.278950735764555, |
| "grad_norm": 0.2840636606374706, |
| "learning_rate": 2.4257935691700897e-06, |
| "loss": 0.2089, |
| "step": 1672 |
| }, |
| { |
| "epoch": 4.281509916826615, |
| "grad_norm": 0.2745376280727821, |
| "learning_rate": 2.408731636522217e-06, |
| "loss": 0.1579, |
| "step": 1673 |
| }, |
| { |
| "epoch": 4.2840690978886755, |
| "grad_norm": 0.2605630953509163, |
| "learning_rate": 2.3917260731613733e-06, |
| "loss": 0.1903, |
| "step": 1674 |
| }, |
| { |
| "epoch": 4.286628278950736, |
| "grad_norm": 0.2740285831465967, |
| "learning_rate": 2.374776933580025e-06, |
| "loss": 0.1725, |
| "step": 1675 |
| }, |
| { |
| "epoch": 4.289187460012796, |
| "grad_norm": 0.28095336757761535, |
| "learning_rate": 2.35788427208983e-06, |
| "loss": 0.1867, |
| "step": 1676 |
| }, |
| { |
| "epoch": 4.291746641074856, |
| "grad_norm": 0.27154745065005187, |
| "learning_rate": 2.3410481428214602e-06, |
| "loss": 0.1613, |
| "step": 1677 |
| }, |
| { |
| "epoch": 4.294305822136916, |
| "grad_norm": 0.27345791664879376, |
| "learning_rate": 2.324268599724451e-06, |
| "loss": 0.1667, |
| "step": 1678 |
| }, |
| { |
| "epoch": 4.296865003198977, |
| "grad_norm": 0.2732090679685769, |
| "learning_rate": 2.307545696566997e-06, |
| "loss": 0.1657, |
| "step": 1679 |
| }, |
| { |
| "epoch": 4.299424184261037, |
| "grad_norm": 0.2697521270279199, |
| "learning_rate": 2.2908794869358044e-06, |
| "loss": 0.1897, |
| "step": 1680 |
| }, |
| { |
| "epoch": 4.301983365323097, |
| "grad_norm": 0.27114579546466616, |
| "learning_rate": 2.274270024235912e-06, |
| "loss": 0.188, |
| "step": 1681 |
| }, |
| { |
| "epoch": 4.304542546385157, |
| "grad_norm": 0.2729061433296195, |
| "learning_rate": 2.2577173616905256e-06, |
| "loss": 0.1595, |
| "step": 1682 |
| }, |
| { |
| "epoch": 4.3071017274472165, |
| "grad_norm": 0.2906131303633033, |
| "learning_rate": 2.2412215523408266e-06, |
| "loss": 0.1737, |
| "step": 1683 |
| }, |
| { |
| "epoch": 4.309660908509277, |
| "grad_norm": 0.2768622060092854, |
| "learning_rate": 2.2247826490458223e-06, |
| "loss": 0.1796, |
| "step": 1684 |
| }, |
| { |
| "epoch": 4.312220089571337, |
| "grad_norm": 0.27867740126032275, |
| "learning_rate": 2.2084007044821764e-06, |
| "loss": 0.1565, |
| "step": 1685 |
| }, |
| { |
| "epoch": 4.314779270633397, |
| "grad_norm": 0.2723637032411945, |
| "learning_rate": 2.1920757711440354e-06, |
| "loss": 0.1756, |
| "step": 1686 |
| }, |
| { |
| "epoch": 4.317338451695457, |
| "grad_norm": 0.2676292277176362, |
| "learning_rate": 2.1758079013428435e-06, |
| "loss": 0.1683, |
| "step": 1687 |
| }, |
| { |
| "epoch": 4.319897632757518, |
| "grad_norm": 0.29143070017187106, |
| "learning_rate": 2.159597147207213e-06, |
| "loss": 0.1697, |
| "step": 1688 |
| }, |
| { |
| "epoch": 4.322456813819578, |
| "grad_norm": 0.27260777733690406, |
| "learning_rate": 2.143443560682721e-06, |
| "loss": 0.1788, |
| "step": 1689 |
| }, |
| { |
| "epoch": 4.325015994881638, |
| "grad_norm": 0.27238765087958206, |
| "learning_rate": 2.127347193531757e-06, |
| "loss": 0.1704, |
| "step": 1690 |
| }, |
| { |
| "epoch": 4.327575175943698, |
| "grad_norm": 0.3004112415700906, |
| "learning_rate": 2.1113080973333643e-06, |
| "loss": 0.1684, |
| "step": 1691 |
| }, |
| { |
| "epoch": 4.330134357005758, |
| "grad_norm": 0.27426577172310523, |
| "learning_rate": 2.0953263234830667e-06, |
| "loss": 0.1541, |
| "step": 1692 |
| }, |
| { |
| "epoch": 4.332693538067819, |
| "grad_norm": 0.27454753389749326, |
| "learning_rate": 2.0794019231926986e-06, |
| "loss": 0.1861, |
| "step": 1693 |
| }, |
| { |
| "epoch": 4.335252719129878, |
| "grad_norm": 0.2809263966115645, |
| "learning_rate": 2.0635349474902598e-06, |
| "loss": 0.1785, |
| "step": 1694 |
| }, |
| { |
| "epoch": 4.337811900191938, |
| "grad_norm": 0.2690301934416836, |
| "learning_rate": 2.0477254472197237e-06, |
| "loss": 0.1896, |
| "step": 1695 |
| }, |
| { |
| "epoch": 4.340371081253998, |
| "grad_norm": 0.2682604924926407, |
| "learning_rate": 2.0319734730408935e-06, |
| "loss": 0.1775, |
| "step": 1696 |
| }, |
| { |
| "epoch": 4.342930262316059, |
| "grad_norm": 0.2696807174487812, |
| "learning_rate": 2.016279075429246e-06, |
| "loss": 0.1903, |
| "step": 1697 |
| }, |
| { |
| "epoch": 4.345489443378119, |
| "grad_norm": 0.2665228127768651, |
| "learning_rate": 2.0006423046757596e-06, |
| "loss": 0.1754, |
| "step": 1698 |
| }, |
| { |
| "epoch": 4.348048624440179, |
| "grad_norm": 0.2694146119632947, |
| "learning_rate": 1.985063210886735e-06, |
| "loss": 0.1549, |
| "step": 1699 |
| }, |
| { |
| "epoch": 4.350607805502239, |
| "grad_norm": 0.2822224216458224, |
| "learning_rate": 1.96954184398368e-06, |
| "loss": 0.1362, |
| "step": 1700 |
| }, |
| { |
| "epoch": 4.3531669865642995, |
| "grad_norm": 0.2615035547353888, |
| "learning_rate": 1.9540782537031045e-06, |
| "loss": 0.1586, |
| "step": 1701 |
| }, |
| { |
| "epoch": 4.35572616762636, |
| "grad_norm": 0.269471616538485, |
| "learning_rate": 1.9386724895963805e-06, |
| "loss": 0.1612, |
| "step": 1702 |
| }, |
| { |
| "epoch": 4.35828534868842, |
| "grad_norm": 0.2682599270036803, |
| "learning_rate": 1.9233246010295903e-06, |
| "loss": 0.1822, |
| "step": 1703 |
| }, |
| { |
| "epoch": 4.36084452975048, |
| "grad_norm": 0.2632188336157985, |
| "learning_rate": 1.908034637183356e-06, |
| "loss": 0.1815, |
| "step": 1704 |
| }, |
| { |
| "epoch": 4.36340371081254, |
| "grad_norm": 0.2615148499789861, |
| "learning_rate": 1.8928026470526917e-06, |
| "loss": 0.1545, |
| "step": 1705 |
| }, |
| { |
| "epoch": 4.3659628918746005, |
| "grad_norm": 0.274824297046551, |
| "learning_rate": 1.8776286794468346e-06, |
| "loss": 0.1476, |
| "step": 1706 |
| }, |
| { |
| "epoch": 4.36852207293666, |
| "grad_norm": 0.27527733450113034, |
| "learning_rate": 1.8625127829890922e-06, |
| "loss": 0.2037, |
| "step": 1707 |
| }, |
| { |
| "epoch": 4.37108125399872, |
| "grad_norm": 0.27423935602322536, |
| "learning_rate": 1.8474550061166984e-06, |
| "loss": 0.1719, |
| "step": 1708 |
| }, |
| { |
| "epoch": 4.37364043506078, |
| "grad_norm": 0.27364608091349185, |
| "learning_rate": 1.8324553970806436e-06, |
| "loss": 0.1664, |
| "step": 1709 |
| }, |
| { |
| "epoch": 4.3761996161228405, |
| "grad_norm": 0.2796166648988934, |
| "learning_rate": 1.817514003945524e-06, |
| "loss": 0.1953, |
| "step": 1710 |
| }, |
| { |
| "epoch": 4.378758797184901, |
| "grad_norm": 0.26079246660216127, |
| "learning_rate": 1.802630874589404e-06, |
| "loss": 0.1641, |
| "step": 1711 |
| }, |
| { |
| "epoch": 4.381317978246961, |
| "grad_norm": 0.2999617994892891, |
| "learning_rate": 1.787806056703627e-06, |
| "loss": 0.1718, |
| "step": 1712 |
| }, |
| { |
| "epoch": 4.383877159309021, |
| "grad_norm": 0.2889063824640918, |
| "learning_rate": 1.7730395977926917e-06, |
| "loss": 0.1653, |
| "step": 1713 |
| }, |
| { |
| "epoch": 4.386436340371081, |
| "grad_norm": 0.26748602837639757, |
| "learning_rate": 1.758331545174099e-06, |
| "loss": 0.1842, |
| "step": 1714 |
| }, |
| { |
| "epoch": 4.3889955214331415, |
| "grad_norm": 0.2806929556508416, |
| "learning_rate": 1.743681945978184e-06, |
| "loss": 0.1586, |
| "step": 1715 |
| }, |
| { |
| "epoch": 4.391554702495202, |
| "grad_norm": 0.29287158041254, |
| "learning_rate": 1.7290908471479805e-06, |
| "loss": 0.1761, |
| "step": 1716 |
| }, |
| { |
| "epoch": 4.394113883557262, |
| "grad_norm": 0.28949556938232984, |
| "learning_rate": 1.7145582954390638e-06, |
| "loss": 0.1831, |
| "step": 1717 |
| }, |
| { |
| "epoch": 4.396673064619322, |
| "grad_norm": 0.2707679886069612, |
| "learning_rate": 1.7000843374193987e-06, |
| "loss": 0.1796, |
| "step": 1718 |
| }, |
| { |
| "epoch": 4.399232245681382, |
| "grad_norm": 0.26434041057826485, |
| "learning_rate": 1.6856690194691872e-06, |
| "loss": 0.1812, |
| "step": 1719 |
| }, |
| { |
| "epoch": 4.401791426743442, |
| "grad_norm": 0.2717307275192052, |
| "learning_rate": 1.6713123877807413e-06, |
| "loss": 0.1618, |
| "step": 1720 |
| }, |
| { |
| "epoch": 4.404350607805502, |
| "grad_norm": 0.26427198325914797, |
| "learning_rate": 1.6570144883582994e-06, |
| "loss": 0.1485, |
| "step": 1721 |
| }, |
| { |
| "epoch": 4.406909788867562, |
| "grad_norm": 0.2830739919541902, |
| "learning_rate": 1.6427753670179214e-06, |
| "loss": 0.1628, |
| "step": 1722 |
| }, |
| { |
| "epoch": 4.409468969929622, |
| "grad_norm": 0.2667553896951004, |
| "learning_rate": 1.6285950693872999e-06, |
| "loss": 0.1887, |
| "step": 1723 |
| }, |
| { |
| "epoch": 4.4120281509916826, |
| "grad_norm": 0.2681400499833261, |
| "learning_rate": 1.614473640905645e-06, |
| "loss": 0.1629, |
| "step": 1724 |
| }, |
| { |
| "epoch": 4.414587332053743, |
| "grad_norm": 0.2629743389937269, |
| "learning_rate": 1.6004111268235156e-06, |
| "loss": 0.2008, |
| "step": 1725 |
| }, |
| { |
| "epoch": 4.417146513115803, |
| "grad_norm": 0.285465240532548, |
| "learning_rate": 1.5864075722027017e-06, |
| "loss": 0.191, |
| "step": 1726 |
| }, |
| { |
| "epoch": 4.419705694177863, |
| "grad_norm": 0.2683638382247192, |
| "learning_rate": 1.5724630219160553e-06, |
| "loss": 0.2073, |
| "step": 1727 |
| }, |
| { |
| "epoch": 4.422264875239923, |
| "grad_norm": 0.2829211001794818, |
| "learning_rate": 1.5585775206473508e-06, |
| "loss": 0.1568, |
| "step": 1728 |
| }, |
| { |
| "epoch": 4.424824056301984, |
| "grad_norm": 0.2788212658151338, |
| "learning_rate": 1.5447511128911542e-06, |
| "loss": 0.1728, |
| "step": 1729 |
| }, |
| { |
| "epoch": 4.427383237364044, |
| "grad_norm": 0.2848579162361746, |
| "learning_rate": 1.5309838429526714e-06, |
| "loss": 0.1904, |
| "step": 1730 |
| }, |
| { |
| "epoch": 4.429942418426104, |
| "grad_norm": 0.2654662056800488, |
| "learning_rate": 1.5172757549476024e-06, |
| "loss": 0.166, |
| "step": 1731 |
| }, |
| { |
| "epoch": 4.432501599488164, |
| "grad_norm": 0.285577343916777, |
| "learning_rate": 1.5036268928020125e-06, |
| "loss": 0.195, |
| "step": 1732 |
| }, |
| { |
| "epoch": 4.435060780550224, |
| "grad_norm": 0.25545559192222317, |
| "learning_rate": 1.4900373002521851e-06, |
| "loss": 0.1706, |
| "step": 1733 |
| }, |
| { |
| "epoch": 4.437619961612284, |
| "grad_norm": 0.279484081091582, |
| "learning_rate": 1.4765070208444732e-06, |
| "loss": 0.1909, |
| "step": 1734 |
| }, |
| { |
| "epoch": 4.440179142674344, |
| "grad_norm": 0.27394133244756325, |
| "learning_rate": 1.4630360979351644e-06, |
| "loss": 0.1955, |
| "step": 1735 |
| }, |
| { |
| "epoch": 4.442738323736404, |
| "grad_norm": 0.27730795832891525, |
| "learning_rate": 1.4496245746903626e-06, |
| "loss": 0.1668, |
| "step": 1736 |
| }, |
| { |
| "epoch": 4.445297504798464, |
| "grad_norm": 0.2515943739407271, |
| "learning_rate": 1.4362724940858109e-06, |
| "loss": 0.173, |
| "step": 1737 |
| }, |
| { |
| "epoch": 4.447856685860525, |
| "grad_norm": 0.25367847682125305, |
| "learning_rate": 1.422979898906789e-06, |
| "loss": 0.1639, |
| "step": 1738 |
| }, |
| { |
| "epoch": 4.450415866922585, |
| "grad_norm": 0.27558480853746065, |
| "learning_rate": 1.4097468317479623e-06, |
| "loss": 0.1633, |
| "step": 1739 |
| }, |
| { |
| "epoch": 4.452975047984645, |
| "grad_norm": 0.27695396643812065, |
| "learning_rate": 1.396573335013236e-06, |
| "loss": 0.1808, |
| "step": 1740 |
| }, |
| { |
| "epoch": 4.455534229046705, |
| "grad_norm": 0.2804193028236503, |
| "learning_rate": 1.3834594509156319e-06, |
| "loss": 0.1673, |
| "step": 1741 |
| }, |
| { |
| "epoch": 4.4580934101087655, |
| "grad_norm": 0.2782333366929398, |
| "learning_rate": 1.3704052214771513e-06, |
| "loss": 0.1971, |
| "step": 1742 |
| }, |
| { |
| "epoch": 4.460652591170826, |
| "grad_norm": 0.2777400443098731, |
| "learning_rate": 1.3574106885286465e-06, |
| "loss": 0.1737, |
| "step": 1743 |
| }, |
| { |
| "epoch": 4.463211772232885, |
| "grad_norm": 0.2764493972670724, |
| "learning_rate": 1.344475893709658e-06, |
| "loss": 0.1904, |
| "step": 1744 |
| }, |
| { |
| "epoch": 4.465770953294945, |
| "grad_norm": 0.2752241285220294, |
| "learning_rate": 1.3316008784683265e-06, |
| "loss": 0.1613, |
| "step": 1745 |
| }, |
| { |
| "epoch": 4.468330134357005, |
| "grad_norm": 0.2693372116468191, |
| "learning_rate": 1.3187856840612167e-06, |
| "loss": 0.1627, |
| "step": 1746 |
| }, |
| { |
| "epoch": 4.470889315419066, |
| "grad_norm": 0.26299523604064184, |
| "learning_rate": 1.3060303515532135e-06, |
| "loss": 0.1644, |
| "step": 1747 |
| }, |
| { |
| "epoch": 4.473448496481126, |
| "grad_norm": 0.26861009102213246, |
| "learning_rate": 1.2933349218173774e-06, |
| "loss": 0.1748, |
| "step": 1748 |
| }, |
| { |
| "epoch": 4.476007677543186, |
| "grad_norm": 0.266256378668002, |
| "learning_rate": 1.2806994355348224e-06, |
| "loss": 0.1717, |
| "step": 1749 |
| }, |
| { |
| "epoch": 4.478566858605246, |
| "grad_norm": 0.2783524667972571, |
| "learning_rate": 1.2681239331945695e-06, |
| "loss": 0.1739, |
| "step": 1750 |
| }, |
| { |
| "epoch": 4.4811260396673065, |
| "grad_norm": 0.2807754665043445, |
| "learning_rate": 1.2556084550934423e-06, |
| "loss": 0.163, |
| "step": 1751 |
| }, |
| { |
| "epoch": 4.483685220729367, |
| "grad_norm": 0.2751132941559695, |
| "learning_rate": 1.2431530413359138e-06, |
| "loss": 0.1596, |
| "step": 1752 |
| }, |
| { |
| "epoch": 4.486244401791427, |
| "grad_norm": 0.27965370827809377, |
| "learning_rate": 1.2307577318339825e-06, |
| "loss": 0.1764, |
| "step": 1753 |
| }, |
| { |
| "epoch": 4.488803582853487, |
| "grad_norm": 0.27090435786248723, |
| "learning_rate": 1.2184225663070604e-06, |
| "loss": 0.1904, |
| "step": 1754 |
| }, |
| { |
| "epoch": 4.491362763915547, |
| "grad_norm": 0.2830802635501525, |
| "learning_rate": 1.2061475842818337e-06, |
| "loss": 0.1785, |
| "step": 1755 |
| }, |
| { |
| "epoch": 4.4939219449776076, |
| "grad_norm": 0.2624221347168147, |
| "learning_rate": 1.1939328250921278e-06, |
| "loss": 0.1804, |
| "step": 1756 |
| }, |
| { |
| "epoch": 4.496481126039667, |
| "grad_norm": 0.27670735162368, |
| "learning_rate": 1.1817783278788042e-06, |
| "loss": 0.1534, |
| "step": 1757 |
| }, |
| { |
| "epoch": 4.499040307101727, |
| "grad_norm": 0.2755467325350106, |
| "learning_rate": 1.169684131589608e-06, |
| "loss": 0.1791, |
| "step": 1758 |
| }, |
| { |
| "epoch": 4.501599488163787, |
| "grad_norm": 0.2750192315338786, |
| "learning_rate": 1.1576502749790608e-06, |
| "loss": 0.1721, |
| "step": 1759 |
| }, |
| { |
| "epoch": 4.5041586692258475, |
| "grad_norm": 0.26825366429953873, |
| "learning_rate": 1.1456767966083393e-06, |
| "loss": 0.1739, |
| "step": 1760 |
| }, |
| { |
| "epoch": 4.506717850287908, |
| "grad_norm": 0.26461882189193386, |
| "learning_rate": 1.1337637348451369e-06, |
| "loss": 0.1836, |
| "step": 1761 |
| }, |
| { |
| "epoch": 4.509277031349968, |
| "grad_norm": 0.27170229996613754, |
| "learning_rate": 1.1219111278635575e-06, |
| "loss": 0.1746, |
| "step": 1762 |
| }, |
| { |
| "epoch": 4.511836212412028, |
| "grad_norm": 0.28612289439672206, |
| "learning_rate": 1.1101190136439689e-06, |
| "loss": 0.1664, |
| "step": 1763 |
| }, |
| { |
| "epoch": 4.514395393474088, |
| "grad_norm": 0.2814719237385938, |
| "learning_rate": 1.0983874299729092e-06, |
| "loss": 0.1552, |
| "step": 1764 |
| }, |
| { |
| "epoch": 4.516954574536149, |
| "grad_norm": 0.27224408532725913, |
| "learning_rate": 1.086716414442952e-06, |
| "loss": 0.155, |
| "step": 1765 |
| }, |
| { |
| "epoch": 4.519513755598209, |
| "grad_norm": 0.2683837888920839, |
| "learning_rate": 1.0751060044525797e-06, |
| "loss": 0.1947, |
| "step": 1766 |
| }, |
| { |
| "epoch": 4.522072936660269, |
| "grad_norm": 0.266405093166955, |
| "learning_rate": 1.0635562372060825e-06, |
| "loss": 0.179, |
| "step": 1767 |
| }, |
| { |
| "epoch": 4.524632117722329, |
| "grad_norm": 0.26568191781978007, |
| "learning_rate": 1.052067149713416e-06, |
| "loss": 0.1595, |
| "step": 1768 |
| }, |
| { |
| "epoch": 4.527191298784389, |
| "grad_norm": 0.27613787388854283, |
| "learning_rate": 1.0406387787900974e-06, |
| "loss": 0.2022, |
| "step": 1769 |
| }, |
| { |
| "epoch": 4.529750479846449, |
| "grad_norm": 0.2783446591602134, |
| "learning_rate": 1.0292711610570904e-06, |
| "loss": 0.1965, |
| "step": 1770 |
| }, |
| { |
| "epoch": 4.532309660908509, |
| "grad_norm": 0.2754628182404677, |
| "learning_rate": 1.0179643329406752e-06, |
| "loss": 0.1796, |
| "step": 1771 |
| }, |
| { |
| "epoch": 4.534868841970569, |
| "grad_norm": 0.2717991423747503, |
| "learning_rate": 1.0067183306723384e-06, |
| "loss": 0.1872, |
| "step": 1772 |
| }, |
| { |
| "epoch": 4.537428023032629, |
| "grad_norm": 0.26023938540588254, |
| "learning_rate": 9.955331902886645e-07, |
| "loss": 0.1645, |
| "step": 1773 |
| }, |
| { |
| "epoch": 4.53998720409469, |
| "grad_norm": 0.2697243580148783, |
| "learning_rate": 9.844089476312035e-07, |
| "loss": 0.1736, |
| "step": 1774 |
| }, |
| { |
| "epoch": 4.54254638515675, |
| "grad_norm": 0.27089652411524956, |
| "learning_rate": 9.733456383463658e-07, |
| "loss": 0.156, |
| "step": 1775 |
| }, |
| { |
| "epoch": 4.54510556621881, |
| "grad_norm": 0.2625263168411182, |
| "learning_rate": 9.62343297885313e-07, |
| "loss": 0.1709, |
| "step": 1776 |
| }, |
| { |
| "epoch": 4.54766474728087, |
| "grad_norm": 0.2720147925441457, |
| "learning_rate": 9.514019615038395e-07, |
| "loss": 0.1609, |
| "step": 1777 |
| }, |
| { |
| "epoch": 4.55022392834293, |
| "grad_norm": 0.26862738106885103, |
| "learning_rate": 9.40521664262255e-07, |
| "loss": 0.1823, |
| "step": 1778 |
| }, |
| { |
| "epoch": 4.552783109404991, |
| "grad_norm": 0.2858477259373205, |
| "learning_rate": 9.297024410252753e-07, |
| "loss": 0.1719, |
| "step": 1779 |
| }, |
| { |
| "epoch": 4.555342290467051, |
| "grad_norm": 0.2792231695337476, |
| "learning_rate": 9.189443264619102e-07, |
| "loss": 0.2187, |
| "step": 1780 |
| }, |
| { |
| "epoch": 4.557901471529111, |
| "grad_norm": 0.2722587468079133, |
| "learning_rate": 9.082473550453619e-07, |
| "loss": 0.1581, |
| "step": 1781 |
| }, |
| { |
| "epoch": 4.560460652591171, |
| "grad_norm": 0.25994947891197123, |
| "learning_rate": 8.976115610528957e-07, |
| "loss": 0.1813, |
| "step": 1782 |
| }, |
| { |
| "epoch": 4.563019833653231, |
| "grad_norm": 0.2685596280130304, |
| "learning_rate": 8.870369785657451e-07, |
| "loss": 0.1637, |
| "step": 1783 |
| }, |
| { |
| "epoch": 4.565579014715291, |
| "grad_norm": 0.2624613954232775, |
| "learning_rate": 8.765236414690026e-07, |
| "loss": 0.1867, |
| "step": 1784 |
| }, |
| { |
| "epoch": 4.568138195777351, |
| "grad_norm": 0.26985980601394455, |
| "learning_rate": 8.660715834514977e-07, |
| "loss": 0.1812, |
| "step": 1785 |
| }, |
| { |
| "epoch": 4.570697376839411, |
| "grad_norm": 0.2782580432929674, |
| "learning_rate": 8.556808380057013e-07, |
| "loss": 0.1551, |
| "step": 1786 |
| }, |
| { |
| "epoch": 4.5732565579014715, |
| "grad_norm": 0.3229561788111089, |
| "learning_rate": 8.453514384276196e-07, |
| "loss": 0.1665, |
| "step": 1787 |
| }, |
| { |
| "epoch": 4.575815738963532, |
| "grad_norm": 0.26676739208882927, |
| "learning_rate": 8.350834178166755e-07, |
| "loss": 0.2019, |
| "step": 1788 |
| }, |
| { |
| "epoch": 4.578374920025592, |
| "grad_norm": 0.25638061426053027, |
| "learning_rate": 8.248768090756143e-07, |
| "loss": 0.1623, |
| "step": 1789 |
| }, |
| { |
| "epoch": 4.580934101087652, |
| "grad_norm": 0.2769760353268046, |
| "learning_rate": 8.147316449103959e-07, |
| "loss": 0.193, |
| "step": 1790 |
| }, |
| { |
| "epoch": 4.583493282149712, |
| "grad_norm": 0.2827217753260577, |
| "learning_rate": 8.046479578300803e-07, |
| "loss": 0.1573, |
| "step": 1791 |
| }, |
| { |
| "epoch": 4.5860524632117725, |
| "grad_norm": 0.267728272299288, |
| "learning_rate": 7.946257801467339e-07, |
| "loss": 0.1534, |
| "step": 1792 |
| }, |
| { |
| "epoch": 4.588611644273833, |
| "grad_norm": 0.26899124519431056, |
| "learning_rate": 7.846651439753273e-07, |
| "loss": 0.1785, |
| "step": 1793 |
| }, |
| { |
| "epoch": 4.591170825335892, |
| "grad_norm": 0.2655562652017706, |
| "learning_rate": 7.747660812336221e-07, |
| "loss": 0.1632, |
| "step": 1794 |
| }, |
| { |
| "epoch": 4.593730006397953, |
| "grad_norm": 0.2912418780943663, |
| "learning_rate": 7.649286236420806e-07, |
| "loss": 0.1664, |
| "step": 1795 |
| }, |
| { |
| "epoch": 4.5962891874600125, |
| "grad_norm": 0.2773582855251603, |
| "learning_rate": 7.551528027237553e-07, |
| "loss": 0.1649, |
| "step": 1796 |
| }, |
| { |
| "epoch": 4.598848368522073, |
| "grad_norm": 0.2706350032862212, |
| "learning_rate": 7.454386498041865e-07, |
| "loss": 0.1897, |
| "step": 1797 |
| }, |
| { |
| "epoch": 4.601407549584133, |
| "grad_norm": 0.27987597117336843, |
| "learning_rate": 7.357861960113121e-07, |
| "loss": 0.1806, |
| "step": 1798 |
| }, |
| { |
| "epoch": 4.603966730646193, |
| "grad_norm": 0.2554799929519513, |
| "learning_rate": 7.261954722753595e-07, |
| "loss": 0.1454, |
| "step": 1799 |
| }, |
| { |
| "epoch": 4.606525911708253, |
| "grad_norm": 0.28194077726489003, |
| "learning_rate": 7.166665093287539e-07, |
| "loss": 0.1956, |
| "step": 1800 |
| }, |
| { |
| "epoch": 4.6090850927703135, |
| "grad_norm": 0.27206485970301414, |
| "learning_rate": 7.071993377060038e-07, |
| "loss": 0.1813, |
| "step": 1801 |
| }, |
| { |
| "epoch": 4.611644273832374, |
| "grad_norm": 0.27639368969275124, |
| "learning_rate": 6.977939877436224e-07, |
| "loss": 0.1937, |
| "step": 1802 |
| }, |
| { |
| "epoch": 4.614203454894434, |
| "grad_norm": 0.26700294636297844, |
| "learning_rate": 6.884504895800237e-07, |
| "loss": 0.159, |
| "step": 1803 |
| }, |
| { |
| "epoch": 4.616762635956494, |
| "grad_norm": 0.2715005815453172, |
| "learning_rate": 6.791688731554158e-07, |
| "loss": 0.1608, |
| "step": 1804 |
| }, |
| { |
| "epoch": 4.619321817018554, |
| "grad_norm": 0.27127828240291824, |
| "learning_rate": 6.69949168211721e-07, |
| "loss": 0.1857, |
| "step": 1805 |
| }, |
| { |
| "epoch": 4.621880998080615, |
| "grad_norm": 0.28402081462443657, |
| "learning_rate": 6.607914042924756e-07, |
| "loss": 0.1918, |
| "step": 1806 |
| }, |
| { |
| "epoch": 4.624440179142674, |
| "grad_norm": 0.26263908410916775, |
| "learning_rate": 6.516956107427241e-07, |
| "loss": 0.1569, |
| "step": 1807 |
| }, |
| { |
| "epoch": 4.626999360204734, |
| "grad_norm": 0.27371755225997646, |
| "learning_rate": 6.426618167089338e-07, |
| "loss": 0.1557, |
| "step": 1808 |
| }, |
| { |
| "epoch": 4.629558541266794, |
| "grad_norm": 0.26959266513847036, |
| "learning_rate": 6.336900511389133e-07, |
| "loss": 0.1733, |
| "step": 1809 |
| }, |
| { |
| "epoch": 4.6321177223288545, |
| "grad_norm": 0.27453758652223553, |
| "learning_rate": 6.247803427816945e-07, |
| "loss": 0.1635, |
| "step": 1810 |
| }, |
| { |
| "epoch": 4.634676903390915, |
| "grad_norm": 0.2673151789681698, |
| "learning_rate": 6.159327201874598e-07, |
| "loss": 0.1709, |
| "step": 1811 |
| }, |
| { |
| "epoch": 4.637236084452975, |
| "grad_norm": 0.2702926085830735, |
| "learning_rate": 6.071472117074462e-07, |
| "loss": 0.1815, |
| "step": 1812 |
| }, |
| { |
| "epoch": 4.639795265515035, |
| "grad_norm": 0.2788070786022333, |
| "learning_rate": 5.984238454938496e-07, |
| "loss": 0.1527, |
| "step": 1813 |
| }, |
| { |
| "epoch": 4.642354446577095, |
| "grad_norm": 0.27358568856995236, |
| "learning_rate": 5.897626494997366e-07, |
| "loss": 0.1785, |
| "step": 1814 |
| }, |
| { |
| "epoch": 4.644913627639156, |
| "grad_norm": 0.2718095549716457, |
| "learning_rate": 5.811636514789598e-07, |
| "loss": 0.1853, |
| "step": 1815 |
| }, |
| { |
| "epoch": 4.647472808701216, |
| "grad_norm": 0.27759832517042105, |
| "learning_rate": 5.726268789860645e-07, |
| "loss": 0.1646, |
| "step": 1816 |
| }, |
| { |
| "epoch": 4.650031989763276, |
| "grad_norm": 0.26320625609355736, |
| "learning_rate": 5.641523593761977e-07, |
| "loss": 0.1723, |
| "step": 1817 |
| }, |
| { |
| "epoch": 4.652591170825336, |
| "grad_norm": 0.27780583001556897, |
| "learning_rate": 5.557401198050327e-07, |
| "loss": 0.184, |
| "step": 1818 |
| }, |
| { |
| "epoch": 4.6551503518873965, |
| "grad_norm": 0.27504183175562963, |
| "learning_rate": 5.473901872286602e-07, |
| "loss": 0.1712, |
| "step": 1819 |
| }, |
| { |
| "epoch": 4.657709532949456, |
| "grad_norm": 0.2774680321446144, |
| "learning_rate": 5.391025884035239e-07, |
| "loss": 0.1817, |
| "step": 1820 |
| }, |
| { |
| "epoch": 4.660268714011516, |
| "grad_norm": 0.26555781569772313, |
| "learning_rate": 5.308773498863251e-07, |
| "loss": 0.1576, |
| "step": 1821 |
| }, |
| { |
| "epoch": 4.662827895073576, |
| "grad_norm": 0.33797160433489215, |
| "learning_rate": 5.22714498033936e-07, |
| "loss": 0.1929, |
| "step": 1822 |
| }, |
| { |
| "epoch": 4.665387076135636, |
| "grad_norm": 0.28245555374717063, |
| "learning_rate": 5.146140590033199e-07, |
| "loss": 0.1869, |
| "step": 1823 |
| }, |
| { |
| "epoch": 4.667946257197697, |
| "grad_norm": 0.27464067240369455, |
| "learning_rate": 5.065760587514446e-07, |
| "loss": 0.1902, |
| "step": 1824 |
| }, |
| { |
| "epoch": 4.670505438259757, |
| "grad_norm": 0.2672943490021358, |
| "learning_rate": 4.986005230351954e-07, |
| "loss": 0.188, |
| "step": 1825 |
| }, |
| { |
| "epoch": 4.673064619321817, |
| "grad_norm": 0.2657663358279065, |
| "learning_rate": 4.906874774113024e-07, |
| "loss": 0.184, |
| "step": 1826 |
| }, |
| { |
| "epoch": 4.675623800383877, |
| "grad_norm": 0.2674714454963707, |
| "learning_rate": 4.828369472362493e-07, |
| "loss": 0.1469, |
| "step": 1827 |
| }, |
| { |
| "epoch": 4.6781829814459375, |
| "grad_norm": 0.2882898088898947, |
| "learning_rate": 4.750489576662021e-07, |
| "loss": 0.162, |
| "step": 1828 |
| }, |
| { |
| "epoch": 4.680742162507998, |
| "grad_norm": 0.27727023737142387, |
| "learning_rate": 4.6732353365691374e-07, |
| "loss": 0.1543, |
| "step": 1829 |
| }, |
| { |
| "epoch": 4.683301343570058, |
| "grad_norm": 0.2636650641131126, |
| "learning_rate": 4.5966069996365993e-07, |
| "loss": 0.1561, |
| "step": 1830 |
| }, |
| { |
| "epoch": 4.685860524632118, |
| "grad_norm": 0.2682699322744399, |
| "learning_rate": 4.5206048114114775e-07, |
| "loss": 0.1673, |
| "step": 1831 |
| }, |
| { |
| "epoch": 4.688419705694178, |
| "grad_norm": 0.2743352871936966, |
| "learning_rate": 4.4452290154344046e-07, |
| "loss": 0.1807, |
| "step": 1832 |
| }, |
| { |
| "epoch": 4.690978886756238, |
| "grad_norm": 0.2770317090716807, |
| "learning_rate": 4.3704798532388624e-07, |
| "loss": 0.2129, |
| "step": 1833 |
| }, |
| { |
| "epoch": 4.693538067818298, |
| "grad_norm": 0.280836808159879, |
| "learning_rate": 4.296357564350362e-07, |
| "loss": 0.1604, |
| "step": 1834 |
| }, |
| { |
| "epoch": 4.696097248880358, |
| "grad_norm": 0.26525175245500215, |
| "learning_rate": 4.22286238628562e-07, |
| "loss": 0.1763, |
| "step": 1835 |
| }, |
| { |
| "epoch": 4.698656429942418, |
| "grad_norm": 0.2700214270271814, |
| "learning_rate": 4.1499945545518283e-07, |
| "loss": 0.154, |
| "step": 1836 |
| }, |
| { |
| "epoch": 4.7012156110044785, |
| "grad_norm": 0.24327556501731853, |
| "learning_rate": 4.077754302645964e-07, |
| "loss": 0.1616, |
| "step": 1837 |
| }, |
| { |
| "epoch": 4.703774792066539, |
| "grad_norm": 0.2652109590879505, |
| "learning_rate": 4.006141862054014e-07, |
| "loss": 0.1809, |
| "step": 1838 |
| }, |
| { |
| "epoch": 4.706333973128599, |
| "grad_norm": 0.26799443888146, |
| "learning_rate": 3.935157462250128e-07, |
| "loss": 0.1799, |
| "step": 1839 |
| }, |
| { |
| "epoch": 4.708893154190659, |
| "grad_norm": 0.26417342730564347, |
| "learning_rate": 3.8648013306960664e-07, |
| "loss": 0.1697, |
| "step": 1840 |
| }, |
| { |
| "epoch": 4.711452335252719, |
| "grad_norm": 0.2655166510116645, |
| "learning_rate": 3.7950736928402674e-07, |
| "loss": 0.1354, |
| "step": 1841 |
| }, |
| { |
| "epoch": 4.7140115163147795, |
| "grad_norm": 0.27018720652857536, |
| "learning_rate": 3.7259747721173134e-07, |
| "loss": 0.1568, |
| "step": 1842 |
| }, |
| { |
| "epoch": 4.71657069737684, |
| "grad_norm": 0.26944814432184416, |
| "learning_rate": 3.6575047899471085e-07, |
| "loss": 0.1539, |
| "step": 1843 |
| }, |
| { |
| "epoch": 4.719129878438899, |
| "grad_norm": 0.273820427322093, |
| "learning_rate": 3.5896639657342134e-07, |
| "loss": 0.1566, |
| "step": 1844 |
| }, |
| { |
| "epoch": 4.72168905950096, |
| "grad_norm": 0.25408644087595794, |
| "learning_rate": 3.522452516867048e-07, |
| "loss": 0.1751, |
| "step": 1845 |
| }, |
| { |
| "epoch": 4.7242482405630195, |
| "grad_norm": 0.282638324070005, |
| "learning_rate": 3.455870658717353e-07, |
| "loss": 0.1788, |
| "step": 1846 |
| }, |
| { |
| "epoch": 4.72680742162508, |
| "grad_norm": 0.27645267654347633, |
| "learning_rate": 3.3899186046393526e-07, |
| "loss": 0.1856, |
| "step": 1847 |
| }, |
| { |
| "epoch": 4.72936660268714, |
| "grad_norm": 0.28435322569370036, |
| "learning_rate": 3.324596565969174e-07, |
| "loss": 0.1903, |
| "step": 1848 |
| }, |
| { |
| "epoch": 4.7319257837492, |
| "grad_norm": 0.2665475946312596, |
| "learning_rate": 3.2599047520241123e-07, |
| "loss": 0.1625, |
| "step": 1849 |
| }, |
| { |
| "epoch": 4.73448496481126, |
| "grad_norm": 0.2848927094305906, |
| "learning_rate": 3.1958433701019697e-07, |
| "loss": 0.2058, |
| "step": 1850 |
| }, |
| { |
| "epoch": 4.737044145873321, |
| "grad_norm": 0.2704317535606738, |
| "learning_rate": 3.1324126254804524e-07, |
| "loss": 0.1868, |
| "step": 1851 |
| }, |
| { |
| "epoch": 4.739603326935381, |
| "grad_norm": 0.2800291692162128, |
| "learning_rate": 3.069612721416371e-07, |
| "loss": 0.1794, |
| "step": 1852 |
| }, |
| { |
| "epoch": 4.742162507997441, |
| "grad_norm": 0.27622038521669706, |
| "learning_rate": 3.007443859145087e-07, |
| "loss": 0.1701, |
| "step": 1853 |
| }, |
| { |
| "epoch": 4.744721689059501, |
| "grad_norm": 0.28051196613093177, |
| "learning_rate": 2.9459062378799806e-07, |
| "loss": 0.193, |
| "step": 1854 |
| }, |
| { |
| "epoch": 4.747280870121561, |
| "grad_norm": 0.26937886948023293, |
| "learning_rate": 2.8850000548115155e-07, |
| "loss": 0.1645, |
| "step": 1855 |
| }, |
| { |
| "epoch": 4.749840051183622, |
| "grad_norm": 0.2574745053917364, |
| "learning_rate": 2.8247255051068845e-07, |
| "loss": 0.1711, |
| "step": 1856 |
| }, |
| { |
| "epoch": 4.752399232245681, |
| "grad_norm": 0.2716275327438086, |
| "learning_rate": 2.7650827819093005e-07, |
| "loss": 0.1699, |
| "step": 1857 |
| }, |
| { |
| "epoch": 4.754958413307741, |
| "grad_norm": 0.2590049108849854, |
| "learning_rate": 2.706072076337285e-07, |
| "loss": 0.1648, |
| "step": 1858 |
| }, |
| { |
| "epoch": 4.757517594369801, |
| "grad_norm": 0.27267669634639347, |
| "learning_rate": 2.647693577484156e-07, |
| "loss": 0.1887, |
| "step": 1859 |
| }, |
| { |
| "epoch": 4.760076775431862, |
| "grad_norm": 0.27747570018737217, |
| "learning_rate": 2.5899474724174313e-07, |
| "loss": 0.1822, |
| "step": 1860 |
| }, |
| { |
| "epoch": 4.762635956493922, |
| "grad_norm": 0.27256177644643004, |
| "learning_rate": 2.532833946178137e-07, |
| "loss": 0.1833, |
| "step": 1861 |
| }, |
| { |
| "epoch": 4.765195137555982, |
| "grad_norm": 0.26875210471690963, |
| "learning_rate": 2.4763531817802777e-07, |
| "loss": 0.1634, |
| "step": 1862 |
| }, |
| { |
| "epoch": 4.767754318618042, |
| "grad_norm": 0.2841356724669023, |
| "learning_rate": 2.4205053602103015e-07, |
| "loss": 0.1716, |
| "step": 1863 |
| }, |
| { |
| "epoch": 4.770313499680102, |
| "grad_norm": 0.28261688509298977, |
| "learning_rate": 2.365290660426389e-07, |
| "loss": 0.1804, |
| "step": 1864 |
| }, |
| { |
| "epoch": 4.772872680742163, |
| "grad_norm": 0.2673985339554513, |
| "learning_rate": 2.3107092593579905e-07, |
| "loss": 0.17, |
| "step": 1865 |
| }, |
| { |
| "epoch": 4.775431861804223, |
| "grad_norm": 0.2644854641715479, |
| "learning_rate": 2.2567613319051997e-07, |
| "loss": 0.1624, |
| "step": 1866 |
| }, |
| { |
| "epoch": 4.777991042866283, |
| "grad_norm": 0.2657701185136481, |
| "learning_rate": 2.2034470509382234e-07, |
| "loss": 0.1967, |
| "step": 1867 |
| }, |
| { |
| "epoch": 4.780550223928343, |
| "grad_norm": 0.26830383283496656, |
| "learning_rate": 2.1507665872968264e-07, |
| "loss": 0.1743, |
| "step": 1868 |
| }, |
| { |
| "epoch": 4.7831094049904035, |
| "grad_norm": 0.26825661403886375, |
| "learning_rate": 2.0987201097897757e-07, |
| "loss": 0.1697, |
| "step": 1869 |
| }, |
| { |
| "epoch": 4.785668586052463, |
| "grad_norm": 0.25032287801653486, |
| "learning_rate": 2.0473077851942858e-07, |
| "loss": 0.1692, |
| "step": 1870 |
| }, |
| { |
| "epoch": 4.788227767114523, |
| "grad_norm": 0.2540438269392125, |
| "learning_rate": 1.9965297782554848e-07, |
| "loss": 0.1594, |
| "step": 1871 |
| }, |
| { |
| "epoch": 4.790786948176583, |
| "grad_norm": 0.25648856154725486, |
| "learning_rate": 1.9463862516859277e-07, |
| "loss": 0.1862, |
| "step": 1872 |
| }, |
| { |
| "epoch": 4.7933461292386434, |
| "grad_norm": 0.27520225342246696, |
| "learning_rate": 1.896877366165062e-07, |
| "loss": 0.1625, |
| "step": 1873 |
| }, |
| { |
| "epoch": 4.795905310300704, |
| "grad_norm": 0.2879456654071117, |
| "learning_rate": 1.8480032803386505e-07, |
| "loss": 0.1647, |
| "step": 1874 |
| }, |
| { |
| "epoch": 4.798464491362764, |
| "grad_norm": 0.26113619907647856, |
| "learning_rate": 1.799764150818306e-07, |
| "loss": 0.1556, |
| "step": 1875 |
| }, |
| { |
| "epoch": 4.801023672424824, |
| "grad_norm": 0.2684336534763457, |
| "learning_rate": 1.7521601321810687e-07, |
| "loss": 0.1686, |
| "step": 1876 |
| }, |
| { |
| "epoch": 4.803582853486884, |
| "grad_norm": 0.287932659535979, |
| "learning_rate": 1.7051913769687623e-07, |
| "loss": 0.1549, |
| "step": 1877 |
| }, |
| { |
| "epoch": 4.8061420345489445, |
| "grad_norm": 0.26143215817508003, |
| "learning_rate": 1.658858035687594e-07, |
| "loss": 0.1796, |
| "step": 1878 |
| }, |
| { |
| "epoch": 4.808701215611005, |
| "grad_norm": 0.26742321309706935, |
| "learning_rate": 1.6131602568076887e-07, |
| "loss": 0.1723, |
| "step": 1879 |
| }, |
| { |
| "epoch": 4.811260396673065, |
| "grad_norm": 0.26866517810628654, |
| "learning_rate": 1.5680981867625566e-07, |
| "loss": 0.1631, |
| "step": 1880 |
| }, |
| { |
| "epoch": 4.813819577735125, |
| "grad_norm": 0.2664673668196368, |
| "learning_rate": 1.5236719699486256e-07, |
| "loss": 0.1595, |
| "step": 1881 |
| }, |
| { |
| "epoch": 4.816378758797185, |
| "grad_norm": 0.26624359763267497, |
| "learning_rate": 1.479881748724865e-07, |
| "loss": 0.174, |
| "step": 1882 |
| }, |
| { |
| "epoch": 4.818937939859245, |
| "grad_norm": 0.26586320390850965, |
| "learning_rate": 1.4367276634122073e-07, |
| "loss": 0.1733, |
| "step": 1883 |
| }, |
| { |
| "epoch": 4.821497120921305, |
| "grad_norm": 0.2734079332163835, |
| "learning_rate": 1.3942098522931491e-07, |
| "loss": 0.1524, |
| "step": 1884 |
| }, |
| { |
| "epoch": 4.824056301983365, |
| "grad_norm": 0.26860707446851667, |
| "learning_rate": 1.3523284516113955e-07, |
| "loss": 0.1801, |
| "step": 1885 |
| }, |
| { |
| "epoch": 4.826615483045425, |
| "grad_norm": 0.26293219622877667, |
| "learning_rate": 1.3110835955712831e-07, |
| "loss": 0.1789, |
| "step": 1886 |
| }, |
| { |
| "epoch": 4.8291746641074855, |
| "grad_norm": 0.26548180261766674, |
| "learning_rate": 1.2704754163374022e-07, |
| "loss": 0.1643, |
| "step": 1887 |
| }, |
| { |
| "epoch": 4.831733845169546, |
| "grad_norm": 0.27466550793343814, |
| "learning_rate": 1.2305040440342198e-07, |
| "loss": 0.1417, |
| "step": 1888 |
| }, |
| { |
| "epoch": 4.834293026231606, |
| "grad_norm": 0.27044679875019695, |
| "learning_rate": 1.1911696067455902e-07, |
| "loss": 0.1862, |
| "step": 1889 |
| }, |
| { |
| "epoch": 4.836852207293666, |
| "grad_norm": 0.2667382729572236, |
| "learning_rate": 1.1524722305144231e-07, |
| "loss": 0.1671, |
| "step": 1890 |
| }, |
| { |
| "epoch": 4.839411388355726, |
| "grad_norm": 0.26077082259648754, |
| "learning_rate": 1.114412039342172e-07, |
| "loss": 0.1583, |
| "step": 1891 |
| }, |
| { |
| "epoch": 4.841970569417787, |
| "grad_norm": 0.2683982318957702, |
| "learning_rate": 1.0769891551885903e-07, |
| "loss": 0.1689, |
| "step": 1892 |
| }, |
| { |
| "epoch": 4.844529750479847, |
| "grad_norm": 0.26442562426087834, |
| "learning_rate": 1.0402036979711317e-07, |
| "loss": 0.1901, |
| "step": 1893 |
| }, |
| { |
| "epoch": 4.847088931541906, |
| "grad_norm": 0.2628418022630685, |
| "learning_rate": 1.0040557855648169e-07, |
| "loss": 0.1628, |
| "step": 1894 |
| }, |
| { |
| "epoch": 4.849648112603967, |
| "grad_norm": 0.2643402568604478, |
| "learning_rate": 9.685455338016347e-08, |
| "loss": 0.1769, |
| "step": 1895 |
| }, |
| { |
| "epoch": 4.8522072936660265, |
| "grad_norm": 0.27073662782178587, |
| "learning_rate": 9.336730564702745e-08, |
| "loss": 0.163, |
| "step": 1896 |
| }, |
| { |
| "epoch": 4.854766474728087, |
| "grad_norm": 0.2592523308295433, |
| "learning_rate": 8.994384653157718e-08, |
| "loss": 0.1748, |
| "step": 1897 |
| }, |
| { |
| "epoch": 4.857325655790147, |
| "grad_norm": 0.26364468600135543, |
| "learning_rate": 8.658418700391302e-08, |
| "loss": 0.1658, |
| "step": 1898 |
| }, |
| { |
| "epoch": 4.859884836852207, |
| "grad_norm": 0.26261286147824464, |
| "learning_rate": 8.328833782969003e-08, |
| "loss": 0.166, |
| "step": 1899 |
| }, |
| { |
| "epoch": 4.862444017914267, |
| "grad_norm": 0.2752608130158951, |
| "learning_rate": 8.005630957010014e-08, |
| "loss": 0.1832, |
| "step": 1900 |
| }, |
| { |
| "epoch": 4.865003198976328, |
| "grad_norm": 0.27593480590108715, |
| "learning_rate": 7.688811258181883e-08, |
| "loss": 0.1838, |
| "step": 1901 |
| }, |
| { |
| "epoch": 4.867562380038388, |
| "grad_norm": 0.27236472207879586, |
| "learning_rate": 7.378375701698748e-08, |
| "loss": 0.1898, |
| "step": 1902 |
| }, |
| { |
| "epoch": 4.870121561100448, |
| "grad_norm": 0.25195081103744715, |
| "learning_rate": 7.074325282317329e-08, |
| "loss": 0.1754, |
| "step": 1903 |
| }, |
| { |
| "epoch": 4.872680742162508, |
| "grad_norm": 0.2869783000463503, |
| "learning_rate": 6.776660974333605e-08, |
| "loss": 0.1572, |
| "step": 1904 |
| }, |
| { |
| "epoch": 4.8752399232245685, |
| "grad_norm": 0.27441367536012884, |
| "learning_rate": 6.485383731580142e-08, |
| "loss": 0.1766, |
| "step": 1905 |
| }, |
| { |
| "epoch": 4.877799104286629, |
| "grad_norm": 0.28772532733219014, |
| "learning_rate": 6.200494487422771e-08, |
| "loss": 0.1794, |
| "step": 1906 |
| }, |
| { |
| "epoch": 4.880358285348688, |
| "grad_norm": 0.27889648675155043, |
| "learning_rate": 5.921994154758137e-08, |
| "loss": 0.17, |
| "step": 1907 |
| }, |
| { |
| "epoch": 4.882917466410748, |
| "grad_norm": 0.25762876059244744, |
| "learning_rate": 5.649883626009933e-08, |
| "loss": 0.1415, |
| "step": 1908 |
| }, |
| { |
| "epoch": 4.885476647472808, |
| "grad_norm": 0.27418736611080513, |
| "learning_rate": 5.3841637731260054e-08, |
| "loss": 0.1637, |
| "step": 1909 |
| }, |
| { |
| "epoch": 4.888035828534869, |
| "grad_norm": 0.2644534739010174, |
| "learning_rate": 5.1248354475768034e-08, |
| "loss": 0.1856, |
| "step": 1910 |
| }, |
| { |
| "epoch": 4.890595009596929, |
| "grad_norm": 0.26943083477416585, |
| "learning_rate": 4.871899480351605e-08, |
| "loss": 0.1833, |
| "step": 1911 |
| }, |
| { |
| "epoch": 4.893154190658989, |
| "grad_norm": 0.27096897776649476, |
| "learning_rate": 4.6253566819554066e-08, |
| "loss": 0.1646, |
| "step": 1912 |
| }, |
| { |
| "epoch": 4.895713371721049, |
| "grad_norm": 0.2810986979004101, |
| "learning_rate": 4.385207842407813e-08, |
| "loss": 0.1688, |
| "step": 1913 |
| }, |
| { |
| "epoch": 4.8982725527831095, |
| "grad_norm": 0.2780463267788789, |
| "learning_rate": 4.151453731239707e-08, |
| "loss": 0.1889, |
| "step": 1914 |
| }, |
| { |
| "epoch": 4.90083173384517, |
| "grad_norm": 0.2740470136715985, |
| "learning_rate": 3.924095097489922e-08, |
| "loss": 0.1771, |
| "step": 1915 |
| }, |
| { |
| "epoch": 4.90339091490723, |
| "grad_norm": 0.2670609796470983, |
| "learning_rate": 3.703132669704568e-08, |
| "loss": 0.1767, |
| "step": 1916 |
| }, |
| { |
| "epoch": 4.90595009596929, |
| "grad_norm": 0.276634001409615, |
| "learning_rate": 3.4885671559332645e-08, |
| "loss": 0.1698, |
| "step": 1917 |
| }, |
| { |
| "epoch": 4.90850927703135, |
| "grad_norm": 0.2836253559347624, |
| "learning_rate": 3.280399243727806e-08, |
| "loss": 0.1434, |
| "step": 1918 |
| }, |
| { |
| "epoch": 4.9110684580934105, |
| "grad_norm": 0.2596326694355661, |
| "learning_rate": 3.078629600139271e-08, |
| "loss": 0.1738, |
| "step": 1919 |
| }, |
| { |
| "epoch": 4.91362763915547, |
| "grad_norm": 0.26138854550688545, |
| "learning_rate": 2.8832588717164766e-08, |
| "loss": 0.1698, |
| "step": 1920 |
| }, |
| { |
| "epoch": 4.91618682021753, |
| "grad_norm": 0.2629381551374675, |
| "learning_rate": 2.694287684503083e-08, |
| "loss": 0.1803, |
| "step": 1921 |
| }, |
| { |
| "epoch": 4.91874600127959, |
| "grad_norm": 0.2707296291029451, |
| "learning_rate": 2.511716644036932e-08, |
| "loss": 0.2076, |
| "step": 1922 |
| }, |
| { |
| "epoch": 4.9213051823416505, |
| "grad_norm": 0.2643795979993967, |
| "learning_rate": 2.3355463353467168e-08, |
| "loss": 0.1737, |
| "step": 1923 |
| }, |
| { |
| "epoch": 4.923864363403711, |
| "grad_norm": 0.2687386354716593, |
| "learning_rate": 2.1657773229508684e-08, |
| "loss": 0.1525, |
| "step": 1924 |
| }, |
| { |
| "epoch": 4.926423544465771, |
| "grad_norm": 0.25773983452315097, |
| "learning_rate": 2.0024101508555604e-08, |
| "loss": 0.1611, |
| "step": 1925 |
| }, |
| { |
| "epoch": 4.928982725527831, |
| "grad_norm": 0.2759068990299683, |
| "learning_rate": 1.8454453425527098e-08, |
| "loss": 0.149, |
| "step": 1926 |
| }, |
| { |
| "epoch": 4.931541906589891, |
| "grad_norm": 0.2658729323365574, |
| "learning_rate": 1.6948834010190874e-08, |
| "loss": 0.1928, |
| "step": 1927 |
| }, |
| { |
| "epoch": 4.9341010876519515, |
| "grad_norm": 0.28178214512822436, |
| "learning_rate": 1.550724808713877e-08, |
| "loss": 0.1885, |
| "step": 1928 |
| }, |
| { |
| "epoch": 4.936660268714012, |
| "grad_norm": 0.28860699599060785, |
| "learning_rate": 1.4129700275771208e-08, |
| "loss": 0.1466, |
| "step": 1929 |
| }, |
| { |
| "epoch": 4.939219449776072, |
| "grad_norm": 0.2584677694793919, |
| "learning_rate": 1.281619499029274e-08, |
| "loss": 0.1844, |
| "step": 1930 |
| }, |
| { |
| "epoch": 4.941778630838132, |
| "grad_norm": 0.2704355939225655, |
| "learning_rate": 1.1566736439685422e-08, |
| "loss": 0.1687, |
| "step": 1931 |
| }, |
| { |
| "epoch": 4.944337811900192, |
| "grad_norm": 0.2647721632155255, |
| "learning_rate": 1.0381328627702136e-08, |
| "loss": 0.1398, |
| "step": 1932 |
| }, |
| { |
| "epoch": 4.946896992962252, |
| "grad_norm": 0.26415602194940657, |
| "learning_rate": 9.259975352848838e-09, |
| "loss": 0.1665, |
| "step": 1933 |
| }, |
| { |
| "epoch": 4.949456174024312, |
| "grad_norm": 0.26483516576483956, |
| "learning_rate": 8.20268020838455e-09, |
| "loss": 0.1632, |
| "step": 1934 |
| }, |
| { |
| "epoch": 4.952015355086372, |
| "grad_norm": 0.26643152935588643, |
| "learning_rate": 7.209446582292501e-09, |
| "loss": 0.1563, |
| "step": 1935 |
| }, |
| { |
| "epoch": 4.954574536148432, |
| "grad_norm": 0.25910553240725315, |
| "learning_rate": 6.2802776572779005e-09, |
| "loss": 0.1467, |
| "step": 1936 |
| }, |
| { |
| "epoch": 4.957133717210493, |
| "grad_norm": 0.25618053832139825, |
| "learning_rate": 5.415176410765721e-09, |
| "loss": 0.1586, |
| "step": 1937 |
| }, |
| { |
| "epoch": 4.959692898272553, |
| "grad_norm": 0.2519718295297774, |
| "learning_rate": 4.614145614876275e-09, |
| "loss": 0.175, |
| "step": 1938 |
| }, |
| { |
| "epoch": 4.962252079334613, |
| "grad_norm": 0.266276772335997, |
| "learning_rate": 3.877187836422991e-09, |
| "loss": 0.1893, |
| "step": 1939 |
| }, |
| { |
| "epoch": 4.964811260396673, |
| "grad_norm": 0.27130663744108346, |
| "learning_rate": 3.2043054369057523e-09, |
| "loss": 0.1928, |
| "step": 1940 |
| }, |
| { |
| "epoch": 4.967370441458733, |
| "grad_norm": 0.2716068333936989, |
| "learning_rate": 2.5955005725064597e-09, |
| "loss": 0.1714, |
| "step": 1941 |
| }, |
| { |
| "epoch": 4.969929622520794, |
| "grad_norm": 0.26663386055671573, |
| "learning_rate": 2.0507751940690434e-09, |
| "loss": 0.1648, |
| "step": 1942 |
| }, |
| { |
| "epoch": 4.972488803582854, |
| "grad_norm": 0.26494109005360517, |
| "learning_rate": 1.5701310471083476e-09, |
| "loss": 0.1591, |
| "step": 1943 |
| }, |
| { |
| "epoch": 4.975047984644913, |
| "grad_norm": 0.2655087142061252, |
| "learning_rate": 1.1535696717945855e-09, |
| "loss": 0.1437, |
| "step": 1944 |
| }, |
| { |
| "epoch": 4.977607165706974, |
| "grad_norm": 0.27477087152296875, |
| "learning_rate": 8.010924029533406e-10, |
| "loss": 0.1491, |
| "step": 1945 |
| }, |
| { |
| "epoch": 4.980166346769034, |
| "grad_norm": 0.2590976709734058, |
| "learning_rate": 5.127003700589051e-10, |
| "loss": 0.1584, |
| "step": 1946 |
| }, |
| { |
| "epoch": 4.982725527831094, |
| "grad_norm": 0.267447188689341, |
| "learning_rate": 2.8839449723205847e-10, |
| "loss": 0.1975, |
| "step": 1947 |
| }, |
| { |
| "epoch": 4.985284708893154, |
| "grad_norm": 0.2598735449781636, |
| "learning_rate": 1.2817550323784843e-10, |
| "loss": 0.2008, |
| "step": 1948 |
| }, |
| { |
| "epoch": 4.987843889955214, |
| "grad_norm": 0.26368976010696143, |
| "learning_rate": 3.2043901478928666e-11, |
| "loss": 0.1795, |
| "step": 1949 |
| }, |
| { |
| "epoch": 4.990403071017274, |
| "grad_norm": 0.2645697161648628, |
| "learning_rate": 0.0, |
| "loss": 0.1707, |
| "step": 1950 |
| }, |
| { |
| "epoch": 4.990403071017274, |
| "step": 1950, |
| "total_flos": 3.115960359367213e+18, |
| "train_loss": 0.34092234334120386, |
| "train_runtime": 73926.9587, |
| "train_samples_per_second": 3.382, |
| "train_steps_per_second": 0.026 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 1950, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.115960359367213e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|