kcgpt2 / trainer_state.json
beomi's picture
Newly trained kcgpt2
d484d83
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.2272680646532144,
"global_step": 1340000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 9.997710320774901e-05,
"loss": 8.4343,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 9.995420641549801e-05,
"loss": 7.6761,
"step": 1000
},
{
"epoch": 0.0,
"learning_rate": 9.993130962324703e-05,
"loss": 7.2953,
"step": 1500
},
{
"epoch": 0.0,
"learning_rate": 9.990841283099604e-05,
"loss": 7.1186,
"step": 2000
},
{
"epoch": 0.0,
"learning_rate": 9.988551603874504e-05,
"loss": 6.9563,
"step": 2500
},
{
"epoch": 0.0,
"learning_rate": 9.986261924649405e-05,
"loss": 6.8496,
"step": 3000
},
{
"epoch": 0.0,
"learning_rate": 9.983972245424305e-05,
"loss": 6.7726,
"step": 3500
},
{
"epoch": 0.0,
"learning_rate": 9.981682566199206e-05,
"loss": 6.6604,
"step": 4000
},
{
"epoch": 0.0,
"learning_rate": 9.979392886974108e-05,
"loss": 6.5838,
"step": 4500
},
{
"epoch": 0.0,
"learning_rate": 9.977103207749008e-05,
"loss": 6.5021,
"step": 5000
},
{
"epoch": 0.01,
"learning_rate": 9.974813528523909e-05,
"loss": 6.4388,
"step": 5500
},
{
"epoch": 0.01,
"learning_rate": 9.972523849298809e-05,
"loss": 6.3715,
"step": 6000
},
{
"epoch": 0.01,
"learning_rate": 9.97023874943216e-05,
"loss": 6.3208,
"step": 6500
},
{
"epoch": 0.01,
"learning_rate": 9.967953649565512e-05,
"loss": 6.2523,
"step": 7000
},
{
"epoch": 0.01,
"learning_rate": 9.965663970340412e-05,
"loss": 6.2069,
"step": 7500
},
{
"epoch": 0.01,
"learning_rate": 9.963374291115313e-05,
"loss": 6.1543,
"step": 8000
},
{
"epoch": 0.01,
"learning_rate": 9.961084611890213e-05,
"loss": 6.1284,
"step": 8500
},
{
"epoch": 0.01,
"learning_rate": 9.958794932665114e-05,
"loss": 6.0781,
"step": 9000
},
{
"epoch": 0.01,
"learning_rate": 9.956509832798466e-05,
"loss": 6.0411,
"step": 9500
},
{
"epoch": 0.01,
"learning_rate": 9.954220153573366e-05,
"loss": 5.9981,
"step": 10000
},
{
"epoch": 0.01,
"learning_rate": 9.951930474348267e-05,
"loss": 5.9761,
"step": 10500
},
{
"epoch": 0.01,
"learning_rate": 9.949640795123167e-05,
"loss": 5.9395,
"step": 11000
},
{
"epoch": 0.01,
"learning_rate": 9.947351115898068e-05,
"loss": 5.8878,
"step": 11500
},
{
"epoch": 0.01,
"learning_rate": 9.94506143667297e-05,
"loss": 5.894,
"step": 12000
},
{
"epoch": 0.01,
"learning_rate": 9.94277175744787e-05,
"loss": 5.8519,
"step": 12500
},
{
"epoch": 0.01,
"learning_rate": 9.940486657581219e-05,
"loss": 5.8273,
"step": 13000
},
{
"epoch": 0.01,
"learning_rate": 9.93819697835612e-05,
"loss": 5.8008,
"step": 13500
},
{
"epoch": 0.01,
"learning_rate": 9.93590729913102e-05,
"loss": 5.7704,
"step": 14000
},
{
"epoch": 0.01,
"learning_rate": 9.933617619905922e-05,
"loss": 5.7637,
"step": 14500
},
{
"epoch": 0.01,
"learning_rate": 9.931327940680823e-05,
"loss": 5.7453,
"step": 15000
},
{
"epoch": 0.01,
"learning_rate": 9.929038261455723e-05,
"loss": 5.7167,
"step": 15500
},
{
"epoch": 0.01,
"learning_rate": 9.926748582230624e-05,
"loss": 5.6919,
"step": 16000
},
{
"epoch": 0.02,
"learning_rate": 9.924458903005524e-05,
"loss": 5.6712,
"step": 16500
},
{
"epoch": 0.02,
"learning_rate": 9.922173803138876e-05,
"loss": 5.6498,
"step": 17000
},
{
"epoch": 0.02,
"learning_rate": 9.919884123913777e-05,
"loss": 5.62,
"step": 17500
},
{
"epoch": 0.02,
"learning_rate": 9.917599024047127e-05,
"loss": 5.61,
"step": 18000
},
{
"epoch": 0.02,
"learning_rate": 9.915309344822028e-05,
"loss": 5.5865,
"step": 18500
},
{
"epoch": 0.02,
"learning_rate": 9.913019665596928e-05,
"loss": 5.5761,
"step": 19000
},
{
"epoch": 0.02,
"learning_rate": 9.91072998637183e-05,
"loss": 5.5614,
"step": 19500
},
{
"epoch": 0.02,
"learning_rate": 9.90844488650518e-05,
"loss": 5.5512,
"step": 20000
},
{
"epoch": 0.02,
"learning_rate": 9.906155207280081e-05,
"loss": 5.5278,
"step": 20500
},
{
"epoch": 0.02,
"learning_rate": 9.903865528054982e-05,
"loss": 5.5045,
"step": 21000
},
{
"epoch": 0.02,
"learning_rate": 9.901575848829882e-05,
"loss": 5.4876,
"step": 21500
},
{
"epoch": 0.02,
"learning_rate": 9.899286169604784e-05,
"loss": 5.4824,
"step": 22000
},
{
"epoch": 0.02,
"learning_rate": 9.896996490379684e-05,
"loss": 5.4701,
"step": 22500
},
{
"epoch": 0.02,
"learning_rate": 9.894706811154585e-05,
"loss": 5.4463,
"step": 23000
},
{
"epoch": 0.02,
"learning_rate": 9.892417131929485e-05,
"loss": 5.4411,
"step": 23500
},
{
"epoch": 0.02,
"learning_rate": 9.890127452704386e-05,
"loss": 5.4202,
"step": 24000
},
{
"epoch": 0.02,
"learning_rate": 9.887846932196188e-05,
"loss": 5.4271,
"step": 24500
},
{
"epoch": 0.02,
"learning_rate": 9.885557252971088e-05,
"loss": 5.4054,
"step": 25000
},
{
"epoch": 0.02,
"learning_rate": 9.883267573745989e-05,
"loss": 5.3969,
"step": 25500
},
{
"epoch": 0.02,
"learning_rate": 9.88097789452089e-05,
"loss": 5.3873,
"step": 26000
},
{
"epoch": 0.02,
"learning_rate": 9.87868821529579e-05,
"loss": 5.378,
"step": 26500
},
{
"epoch": 0.02,
"learning_rate": 9.876403115429142e-05,
"loss": 5.3648,
"step": 27000
},
{
"epoch": 0.03,
"learning_rate": 9.874113436204042e-05,
"loss": 5.3609,
"step": 27500
},
{
"epoch": 0.03,
"learning_rate": 9.871823756978943e-05,
"loss": 5.3499,
"step": 28000
},
{
"epoch": 0.03,
"learning_rate": 9.869534077753843e-05,
"loss": 5.3344,
"step": 28500
},
{
"epoch": 0.03,
"learning_rate": 9.867244398528744e-05,
"loss": 5.3369,
"step": 29000
},
{
"epoch": 0.03,
"learning_rate": 9.864954719303646e-05,
"loss": 5.3206,
"step": 29500
},
{
"epoch": 0.03,
"learning_rate": 9.862665040078546e-05,
"loss": 5.3247,
"step": 30000
},
{
"epoch": 0.03,
"learning_rate": 9.860375360853447e-05,
"loss": 5.3145,
"step": 30500
},
{
"epoch": 0.03,
"learning_rate": 9.858085681628347e-05,
"loss": 5.298,
"step": 31000
},
{
"epoch": 0.03,
"learning_rate": 9.855805161120148e-05,
"loss": 5.2957,
"step": 31500
},
{
"epoch": 0.03,
"learning_rate": 9.85351548189505e-05,
"loss": 5.2907,
"step": 32000
},
{
"epoch": 0.03,
"learning_rate": 9.85122580266995e-05,
"loss": 5.2881,
"step": 32500
},
{
"epoch": 0.03,
"learning_rate": 9.848936123444851e-05,
"loss": 5.2615,
"step": 33000
},
{
"epoch": 0.03,
"learning_rate": 9.846651023578201e-05,
"loss": 5.2705,
"step": 33500
},
{
"epoch": 0.03,
"learning_rate": 9.844361344353102e-05,
"loss": 5.2556,
"step": 34000
},
{
"epoch": 0.03,
"learning_rate": 9.842071665128004e-05,
"loss": 5.2577,
"step": 34500
},
{
"epoch": 0.03,
"learning_rate": 9.839781985902904e-05,
"loss": 5.2384,
"step": 35000
},
{
"epoch": 0.03,
"learning_rate": 9.837492306677805e-05,
"loss": 5.2469,
"step": 35500
},
{
"epoch": 0.03,
"learning_rate": 9.835202627452705e-05,
"loss": 5.2298,
"step": 36000
},
{
"epoch": 0.03,
"learning_rate": 9.832912948227606e-05,
"loss": 5.2368,
"step": 36500
},
{
"epoch": 0.03,
"learning_rate": 9.830623269002506e-05,
"loss": 5.2303,
"step": 37000
},
{
"epoch": 0.03,
"learning_rate": 9.828333589777407e-05,
"loss": 5.2134,
"step": 37500
},
{
"epoch": 0.03,
"learning_rate": 9.826048489910757e-05,
"loss": 5.222,
"step": 38000
},
{
"epoch": 0.04,
"learning_rate": 9.823767969402559e-05,
"loss": 5.1964,
"step": 38500
},
{
"epoch": 0.04,
"learning_rate": 9.82147829017746e-05,
"loss": 5.2077,
"step": 39000
},
{
"epoch": 0.04,
"learning_rate": 9.81918861095236e-05,
"loss": 5.1936,
"step": 39500
},
{
"epoch": 0.04,
"learning_rate": 9.816898931727261e-05,
"loss": 5.2013,
"step": 40000
},
{
"epoch": 0.04,
"learning_rate": 9.814609252502161e-05,
"loss": 5.1999,
"step": 40500
},
{
"epoch": 0.04,
"learning_rate": 9.812319573277062e-05,
"loss": 5.1802,
"step": 41000
},
{
"epoch": 0.04,
"learning_rate": 9.810029894051964e-05,
"loss": 5.1736,
"step": 41500
},
{
"epoch": 0.04,
"learning_rate": 9.807740214826864e-05,
"loss": 5.1556,
"step": 42000
},
{
"epoch": 0.04,
"learning_rate": 9.805450535601765e-05,
"loss": 5.167,
"step": 42500
},
{
"epoch": 0.04,
"learning_rate": 9.803165435735115e-05,
"loss": 5.1708,
"step": 43000
},
{
"epoch": 0.04,
"learning_rate": 9.800875756510016e-05,
"loss": 5.1625,
"step": 43500
},
{
"epoch": 0.04,
"learning_rate": 9.798586077284918e-05,
"loss": 5.1446,
"step": 44000
},
{
"epoch": 0.04,
"learning_rate": 9.796296398059818e-05,
"loss": 5.1512,
"step": 44500
},
{
"epoch": 0.04,
"learning_rate": 9.794006718834719e-05,
"loss": 5.1609,
"step": 45000
},
{
"epoch": 0.04,
"learning_rate": 9.791717039609619e-05,
"loss": 5.1432,
"step": 45500
},
{
"epoch": 0.04,
"learning_rate": 9.78943193974297e-05,
"loss": 5.1449,
"step": 46000
},
{
"epoch": 0.04,
"learning_rate": 9.787142260517871e-05,
"loss": 5.1338,
"step": 46500
},
{
"epoch": 0.04,
"learning_rate": 9.784852581292772e-05,
"loss": 5.1284,
"step": 47000
},
{
"epoch": 0.04,
"learning_rate": 9.782562902067672e-05,
"loss": 5.1237,
"step": 47500
},
{
"epoch": 0.04,
"learning_rate": 9.780273222842573e-05,
"loss": 5.1232,
"step": 48000
},
{
"epoch": 0.04,
"learning_rate": 9.777983543617473e-05,
"loss": 5.1129,
"step": 48500
},
{
"epoch": 0.04,
"learning_rate": 9.775693864392374e-05,
"loss": 5.1148,
"step": 49000
},
{
"epoch": 0.05,
"learning_rate": 9.773408764525726e-05,
"loss": 5.1125,
"step": 49500
},
{
"epoch": 0.05,
"learning_rate": 9.771119085300626e-05,
"loss": 5.0908,
"step": 50000
},
{
"epoch": 0.05,
"learning_rate": 9.768829406075527e-05,
"loss": 5.1003,
"step": 50500
},
{
"epoch": 0.05,
"learning_rate": 9.766539726850427e-05,
"loss": 5.1157,
"step": 51000
},
{
"epoch": 0.05,
"learning_rate": 9.764250047625328e-05,
"loss": 5.0904,
"step": 51500
},
{
"epoch": 0.05,
"learning_rate": 9.76196494775868e-05,
"loss": 5.095,
"step": 52000
},
{
"epoch": 0.05,
"learning_rate": 9.75967526853358e-05,
"loss": 5.0809,
"step": 52500
},
{
"epoch": 0.05,
"learning_rate": 9.757385589308481e-05,
"loss": 5.0771,
"step": 53000
},
{
"epoch": 0.05,
"learning_rate": 9.755100489441831e-05,
"loss": 5.0863,
"step": 53500
},
{
"epoch": 0.05,
"learning_rate": 9.752810810216733e-05,
"loss": 5.081,
"step": 54000
},
{
"epoch": 0.05,
"learning_rate": 9.750521130991634e-05,
"loss": 5.0804,
"step": 54500
},
{
"epoch": 0.05,
"learning_rate": 9.748231451766534e-05,
"loss": 5.0933,
"step": 55000
},
{
"epoch": 0.05,
"learning_rate": 9.745941772541435e-05,
"loss": 5.0863,
"step": 55500
},
{
"epoch": 0.05,
"learning_rate": 9.743652093316335e-05,
"loss": 5.0636,
"step": 56000
},
{
"epoch": 0.05,
"learning_rate": 9.741362414091236e-05,
"loss": 5.0717,
"step": 56500
},
{
"epoch": 0.05,
"learning_rate": 9.739072734866138e-05,
"loss": 5.0522,
"step": 57000
},
{
"epoch": 0.05,
"learning_rate": 9.736783055641038e-05,
"loss": 5.0556,
"step": 57500
},
{
"epoch": 0.05,
"learning_rate": 9.734497955774389e-05,
"loss": 5.0544,
"step": 58000
},
{
"epoch": 0.05,
"learning_rate": 9.732208276549289e-05,
"loss": 5.0498,
"step": 58500
},
{
"epoch": 0.05,
"learning_rate": 9.72991859732419e-05,
"loss": 5.0535,
"step": 59000
},
{
"epoch": 0.05,
"learning_rate": 9.727628918099092e-05,
"loss": 5.0444,
"step": 59500
},
{
"epoch": 0.05,
"learning_rate": 9.725339238873992e-05,
"loss": 5.0377,
"step": 60000
},
{
"epoch": 0.06,
"learning_rate": 9.723049559648893e-05,
"loss": 5.0399,
"step": 60500
},
{
"epoch": 0.06,
"learning_rate": 9.720759880423793e-05,
"loss": 5.0243,
"step": 61000
},
{
"epoch": 0.06,
"learning_rate": 9.718470201198692e-05,
"loss": 5.0291,
"step": 61500
},
{
"epoch": 0.06,
"learning_rate": 9.716180521973594e-05,
"loss": 5.0342,
"step": 62000
},
{
"epoch": 0.06,
"learning_rate": 9.713895422106945e-05,
"loss": 5.035,
"step": 62500
},
{
"epoch": 0.06,
"learning_rate": 9.711610322240295e-05,
"loss": 5.0205,
"step": 63000
},
{
"epoch": 0.06,
"learning_rate": 9.709320643015196e-05,
"loss": 5.0258,
"step": 63500
},
{
"epoch": 0.06,
"learning_rate": 9.707030963790096e-05,
"loss": 5.0201,
"step": 64000
},
{
"epoch": 0.06,
"learning_rate": 9.704741284564998e-05,
"loss": 5.0222,
"step": 64500
},
{
"epoch": 0.06,
"learning_rate": 9.702456184698349e-05,
"loss": 5.0088,
"step": 65000
},
{
"epoch": 0.06,
"learning_rate": 9.700166505473249e-05,
"loss": 5.0124,
"step": 65500
},
{
"epoch": 0.06,
"learning_rate": 9.69787682624815e-05,
"loss": 5.0043,
"step": 66000
},
{
"epoch": 0.06,
"learning_rate": 9.69558714702305e-05,
"loss": 5.004,
"step": 66500
},
{
"epoch": 0.06,
"learning_rate": 9.693297467797952e-05,
"loss": 5.0045,
"step": 67000
},
{
"epoch": 0.06,
"learning_rate": 9.691007788572853e-05,
"loss": 4.9946,
"step": 67500
},
{
"epoch": 0.06,
"learning_rate": 9.688718109347753e-05,
"loss": 5.0021,
"step": 68000
},
{
"epoch": 0.06,
"learning_rate": 9.686433009481104e-05,
"loss": 5.0055,
"step": 68500
},
{
"epoch": 0.06,
"learning_rate": 9.684143330256004e-05,
"loss": 4.9827,
"step": 69000
},
{
"epoch": 0.06,
"learning_rate": 9.681853651030906e-05,
"loss": 4.9951,
"step": 69500
},
{
"epoch": 0.06,
"learning_rate": 9.679563971805806e-05,
"loss": 5.0007,
"step": 70000
},
{
"epoch": 0.06,
"learning_rate": 9.677274292580707e-05,
"loss": 4.9938,
"step": 70500
},
{
"epoch": 0.07,
"learning_rate": 9.674984613355607e-05,
"loss": 4.9775,
"step": 71000
},
{
"epoch": 0.07,
"learning_rate": 9.672699513488958e-05,
"loss": 4.9694,
"step": 71500
},
{
"epoch": 0.07,
"learning_rate": 9.67040983426386e-05,
"loss": 4.9858,
"step": 72000
},
{
"epoch": 0.07,
"learning_rate": 9.66812015503876e-05,
"loss": 4.9756,
"step": 72500
},
{
"epoch": 0.07,
"learning_rate": 9.665830475813661e-05,
"loss": 4.9821,
"step": 73000
},
{
"epoch": 0.07,
"learning_rate": 9.663545375947011e-05,
"loss": 4.9886,
"step": 73500
},
{
"epoch": 0.07,
"learning_rate": 9.661260276080363e-05,
"loss": 4.9627,
"step": 74000
},
{
"epoch": 0.07,
"learning_rate": 9.658970596855264e-05,
"loss": 4.9704,
"step": 74500
},
{
"epoch": 0.07,
"learning_rate": 9.656680917630164e-05,
"loss": 4.9523,
"step": 75000
},
{
"epoch": 0.07,
"learning_rate": 9.654391238405065e-05,
"loss": 4.9592,
"step": 75500
},
{
"epoch": 0.07,
"learning_rate": 9.652101559179965e-05,
"loss": 4.974,
"step": 76000
},
{
"epoch": 0.07,
"learning_rate": 9.649811879954866e-05,
"loss": 4.961,
"step": 76500
},
{
"epoch": 0.07,
"learning_rate": 9.647522200729768e-05,
"loss": 4.9724,
"step": 77000
},
{
"epoch": 0.07,
"learning_rate": 9.645232521504668e-05,
"loss": 4.9627,
"step": 77500
},
{
"epoch": 0.07,
"learning_rate": 9.642942842279569e-05,
"loss": 4.9562,
"step": 78000
},
{
"epoch": 0.07,
"learning_rate": 9.640653163054469e-05,
"loss": 4.9546,
"step": 78500
},
{
"epoch": 0.07,
"learning_rate": 9.63836806318782e-05,
"loss": 4.9543,
"step": 79000
},
{
"epoch": 0.07,
"learning_rate": 9.636078383962722e-05,
"loss": 4.9458,
"step": 79500
},
{
"epoch": 0.07,
"learning_rate": 9.633788704737622e-05,
"loss": 4.9604,
"step": 80000
},
{
"epoch": 0.07,
"learning_rate": 9.631499025512523e-05,
"loss": 4.9354,
"step": 80500
},
{
"epoch": 0.07,
"learning_rate": 9.629209346287423e-05,
"loss": 4.9336,
"step": 81000
},
{
"epoch": 0.07,
"learning_rate": 9.626919667062324e-05,
"loss": 4.9344,
"step": 81500
},
{
"epoch": 0.08,
"learning_rate": 9.624634567195676e-05,
"loss": 4.9516,
"step": 82000
},
{
"epoch": 0.08,
"learning_rate": 9.622344887970576e-05,
"loss": 4.9416,
"step": 82500
},
{
"epoch": 0.08,
"learning_rate": 9.620055208745477e-05,
"loss": 4.948,
"step": 83000
},
{
"epoch": 0.08,
"learning_rate": 9.617765529520377e-05,
"loss": 4.9374,
"step": 83500
},
{
"epoch": 0.08,
"learning_rate": 9.615475850295278e-05,
"loss": 4.9447,
"step": 84000
},
{
"epoch": 0.08,
"learning_rate": 9.61319075042863e-05,
"loss": 4.9397,
"step": 84500
},
{
"epoch": 0.08,
"learning_rate": 9.61090107120353e-05,
"loss": 4.9379,
"step": 85000
},
{
"epoch": 0.08,
"learning_rate": 9.608611391978429e-05,
"loss": 4.9344,
"step": 85500
},
{
"epoch": 0.08,
"learning_rate": 9.60632171275333e-05,
"loss": 4.9262,
"step": 86000
},
{
"epoch": 0.08,
"learning_rate": 9.604036612886682e-05,
"loss": 4.9254,
"step": 86500
},
{
"epoch": 0.08,
"learning_rate": 9.601746933661582e-05,
"loss": 4.9279,
"step": 87000
},
{
"epoch": 0.08,
"learning_rate": 9.599457254436483e-05,
"loss": 4.9221,
"step": 87500
},
{
"epoch": 0.08,
"learning_rate": 9.597167575211383e-05,
"loss": 4.9334,
"step": 88000
},
{
"epoch": 0.08,
"learning_rate": 9.594877895986284e-05,
"loss": 4.9259,
"step": 88500
},
{
"epoch": 0.08,
"learning_rate": 9.592588216761184e-05,
"loss": 4.9217,
"step": 89000
},
{
"epoch": 0.08,
"learning_rate": 9.590298537536086e-05,
"loss": 4.9246,
"step": 89500
},
{
"epoch": 0.08,
"learning_rate": 9.588013437669437e-05,
"loss": 4.9131,
"step": 90000
},
{
"epoch": 0.08,
"learning_rate": 9.585723758444337e-05,
"loss": 4.9184,
"step": 90500
},
{
"epoch": 0.08,
"learning_rate": 9.583434079219238e-05,
"loss": 4.8998,
"step": 91000
},
{
"epoch": 0.08,
"learning_rate": 9.581144399994138e-05,
"loss": 4.9117,
"step": 91500
},
{
"epoch": 0.08,
"learning_rate": 9.57885472076904e-05,
"loss": 4.9162,
"step": 92000
},
{
"epoch": 0.08,
"learning_rate": 9.57656504154394e-05,
"loss": 4.901,
"step": 92500
},
{
"epoch": 0.09,
"learning_rate": 9.574275362318841e-05,
"loss": 4.9029,
"step": 93000
},
{
"epoch": 0.09,
"learning_rate": 9.571985683093741e-05,
"loss": 4.9123,
"step": 93500
},
{
"epoch": 0.09,
"learning_rate": 9.569696003868642e-05,
"loss": 4.9024,
"step": 94000
},
{
"epoch": 0.09,
"learning_rate": 9.567406324643542e-05,
"loss": 4.8946,
"step": 94500
},
{
"epoch": 0.09,
"learning_rate": 9.565121224776894e-05,
"loss": 4.8973,
"step": 95000
},
{
"epoch": 0.09,
"learning_rate": 9.562836124910245e-05,
"loss": 4.9087,
"step": 95500
},
{
"epoch": 0.09,
"learning_rate": 9.560546445685145e-05,
"loss": 4.9024,
"step": 96000
},
{
"epoch": 0.09,
"learning_rate": 9.558256766460046e-05,
"loss": 4.906,
"step": 96500
},
{
"epoch": 0.09,
"learning_rate": 9.555967087234946e-05,
"loss": 4.9054,
"step": 97000
},
{
"epoch": 0.09,
"learning_rate": 9.553677408009848e-05,
"loss": 4.9136,
"step": 97500
},
{
"epoch": 0.09,
"learning_rate": 9.551387728784749e-05,
"loss": 4.9013,
"step": 98000
},
{
"epoch": 0.09,
"learning_rate": 9.54909804955965e-05,
"loss": 4.8808,
"step": 98500
},
{
"epoch": 0.09,
"learning_rate": 9.54680837033455e-05,
"loss": 4.8892,
"step": 99000
},
{
"epoch": 0.09,
"learning_rate": 9.54451869110945e-05,
"loss": 4.8987,
"step": 99500
},
{
"epoch": 0.09,
"learning_rate": 9.542229011884352e-05,
"loss": 4.89,
"step": 100000
},
{
"epoch": 0.09,
"learning_rate": 9.539943912017703e-05,
"loss": 4.8891,
"step": 100500
},
{
"epoch": 0.09,
"learning_rate": 9.537654232792603e-05,
"loss": 4.8871,
"step": 101000
},
{
"epoch": 0.09,
"learning_rate": 9.535364553567504e-05,
"loss": 4.881,
"step": 101500
},
{
"epoch": 0.09,
"learning_rate": 9.533079453700854e-05,
"loss": 4.8753,
"step": 102000
},
{
"epoch": 0.09,
"learning_rate": 9.530789774475756e-05,
"loss": 4.8809,
"step": 102500
},
{
"epoch": 0.09,
"learning_rate": 9.528500095250657e-05,
"loss": 4.8657,
"step": 103000
},
{
"epoch": 0.09,
"learning_rate": 9.526210416025557e-05,
"loss": 4.8868,
"step": 103500
},
{
"epoch": 0.1,
"learning_rate": 9.523920736800458e-05,
"loss": 4.8808,
"step": 104000
},
{
"epoch": 0.1,
"learning_rate": 9.521635636933808e-05,
"loss": 4.8774,
"step": 104500
},
{
"epoch": 0.1,
"learning_rate": 9.51934595770871e-05,
"loss": 4.8726,
"step": 105000
},
{
"epoch": 0.1,
"learning_rate": 9.51705627848361e-05,
"loss": 4.863,
"step": 105500
},
{
"epoch": 0.1,
"learning_rate": 9.514766599258511e-05,
"loss": 4.871,
"step": 106000
},
{
"epoch": 0.1,
"learning_rate": 9.512476920033412e-05,
"loss": 4.866,
"step": 106500
},
{
"epoch": 0.1,
"learning_rate": 9.510187240808312e-05,
"loss": 4.8688,
"step": 107000
},
{
"epoch": 0.1,
"learning_rate": 9.507902140941664e-05,
"loss": 4.8668,
"step": 107500
},
{
"epoch": 0.1,
"learning_rate": 9.505612461716565e-05,
"loss": 4.8611,
"step": 108000
},
{
"epoch": 0.1,
"learning_rate": 9.503322782491465e-05,
"loss": 4.8612,
"step": 108500
},
{
"epoch": 0.1,
"learning_rate": 9.501033103266366e-05,
"loss": 4.8643,
"step": 109000
},
{
"epoch": 0.1,
"learning_rate": 9.498743424041266e-05,
"loss": 4.8752,
"step": 109500
},
{
"epoch": 0.1,
"learning_rate": 9.496453744816167e-05,
"loss": 4.8657,
"step": 110000
},
{
"epoch": 0.1,
"learning_rate": 9.494164065591067e-05,
"loss": 4.8758,
"step": 110500
},
{
"epoch": 0.1,
"learning_rate": 9.491874386365968e-05,
"loss": 4.8615,
"step": 111000
},
{
"epoch": 0.1,
"learning_rate": 9.489589286499318e-05,
"loss": 4.864,
"step": 111500
},
{
"epoch": 0.1,
"learning_rate": 9.487299607274219e-05,
"loss": 4.8481,
"step": 112000
},
{
"epoch": 0.1,
"learning_rate": 9.48500992804912e-05,
"loss": 4.8423,
"step": 112500
},
{
"epoch": 0.1,
"learning_rate": 9.482720248824021e-05,
"loss": 4.8536,
"step": 113000
},
{
"epoch": 0.1,
"learning_rate": 9.480430569598922e-05,
"loss": 4.8663,
"step": 113500
},
{
"epoch": 0.1,
"learning_rate": 9.478140890373822e-05,
"loss": 4.8439,
"step": 114000
},
{
"epoch": 0.1,
"learning_rate": 9.475851211148723e-05,
"loss": 4.8723,
"step": 114500
},
{
"epoch": 0.11,
"learning_rate": 9.473561531923623e-05,
"loss": 4.8474,
"step": 115000
},
{
"epoch": 0.11,
"learning_rate": 9.471276432056975e-05,
"loss": 4.8478,
"step": 115500
},
{
"epoch": 0.11,
"learning_rate": 9.468986752831875e-05,
"loss": 4.8462,
"step": 116000
},
{
"epoch": 0.11,
"learning_rate": 9.466697073606776e-05,
"loss": 4.8397,
"step": 116500
},
{
"epoch": 0.11,
"learning_rate": 9.464411973740127e-05,
"loss": 4.8486,
"step": 117000
},
{
"epoch": 0.11,
"learning_rate": 9.462126873873478e-05,
"loss": 4.8517,
"step": 117500
},
{
"epoch": 0.11,
"learning_rate": 9.459837194648379e-05,
"loss": 4.8274,
"step": 118000
},
{
"epoch": 0.11,
"learning_rate": 9.45754751542328e-05,
"loss": 4.8447,
"step": 118500
},
{
"epoch": 0.11,
"learning_rate": 9.45525783619818e-05,
"loss": 4.8462,
"step": 119000
},
{
"epoch": 0.11,
"learning_rate": 9.452972736331532e-05,
"loss": 4.8347,
"step": 119500
},
{
"epoch": 0.11,
"learning_rate": 9.450683057106432e-05,
"loss": 4.8413,
"step": 120000
},
{
"epoch": 0.11,
"learning_rate": 9.448393377881333e-05,
"loss": 4.8332,
"step": 120500
},
{
"epoch": 0.11,
"learning_rate": 9.446103698656233e-05,
"loss": 4.829,
"step": 121000
},
{
"epoch": 0.11,
"learning_rate": 9.443818598789584e-05,
"loss": 4.8338,
"step": 121500
},
{
"epoch": 0.11,
"learning_rate": 9.441528919564484e-05,
"loss": 4.849,
"step": 122000
},
{
"epoch": 0.11,
"learning_rate": 9.439239240339386e-05,
"loss": 4.8397,
"step": 122500
},
{
"epoch": 0.11,
"learning_rate": 9.436949561114287e-05,
"loss": 4.8379,
"step": 123000
},
{
"epoch": 0.11,
"learning_rate": 9.434659881889187e-05,
"loss": 4.8388,
"step": 123500
},
{
"epoch": 0.11,
"learning_rate": 9.432370202664088e-05,
"loss": 4.8316,
"step": 124000
},
{
"epoch": 0.11,
"learning_rate": 9.430080523438988e-05,
"loss": 4.8345,
"step": 124500
},
{
"epoch": 0.11,
"learning_rate": 9.42779084421389e-05,
"loss": 4.8384,
"step": 125000
},
{
"epoch": 0.11,
"learning_rate": 9.425501164988791e-05,
"loss": 4.8245,
"step": 125500
},
{
"epoch": 0.12,
"learning_rate": 9.423211485763691e-05,
"loss": 4.8235,
"step": 126000
},
{
"epoch": 0.12,
"learning_rate": 9.420921806538592e-05,
"loss": 4.8223,
"step": 126500
},
{
"epoch": 0.12,
"learning_rate": 9.418632127313492e-05,
"loss": 4.8159,
"step": 127000
},
{
"epoch": 0.12,
"learning_rate": 9.416342448088393e-05,
"loss": 4.8264,
"step": 127500
},
{
"epoch": 0.12,
"learning_rate": 9.414066506938645e-05,
"loss": 4.819,
"step": 128000
},
{
"epoch": 0.12,
"learning_rate": 9.411776827713545e-05,
"loss": 4.8198,
"step": 128500
},
{
"epoch": 0.12,
"learning_rate": 9.409487148488446e-05,
"loss": 4.8202,
"step": 129000
},
{
"epoch": 0.12,
"learning_rate": 9.407197469263346e-05,
"loss": 4.8242,
"step": 129500
},
{
"epoch": 0.12,
"learning_rate": 9.404907790038248e-05,
"loss": 4.8091,
"step": 130000
},
{
"epoch": 0.12,
"learning_rate": 9.402618110813149e-05,
"loss": 4.8118,
"step": 130500
},
{
"epoch": 0.12,
"learning_rate": 9.400328431588049e-05,
"loss": 4.8133,
"step": 131000
},
{
"epoch": 0.12,
"learning_rate": 9.39803875236295e-05,
"loss": 4.8093,
"step": 131500
},
{
"epoch": 0.12,
"learning_rate": 9.39574907313785e-05,
"loss": 4.8075,
"step": 132000
},
{
"epoch": 0.12,
"learning_rate": 9.39345939391275e-05,
"loss": 4.8173,
"step": 132500
},
{
"epoch": 0.12,
"learning_rate": 9.391169714687653e-05,
"loss": 4.8075,
"step": 133000
},
{
"epoch": 0.12,
"learning_rate": 9.388880035462553e-05,
"loss": 4.8097,
"step": 133500
},
{
"epoch": 0.12,
"learning_rate": 9.386590356237454e-05,
"loss": 4.8085,
"step": 134000
},
{
"epoch": 0.12,
"learning_rate": 9.384300677012354e-05,
"loss": 4.819,
"step": 134500
},
{
"epoch": 0.12,
"learning_rate": 9.382010997787255e-05,
"loss": 4.8082,
"step": 135000
},
{
"epoch": 0.12,
"learning_rate": 9.379721318562155e-05,
"loss": 4.8021,
"step": 135500
},
{
"epoch": 0.12,
"learning_rate": 9.377431639337056e-05,
"loss": 4.8132,
"step": 136000
},
{
"epoch": 0.13,
"learning_rate": 9.375146539470406e-05,
"loss": 4.8171,
"step": 136500
},
{
"epoch": 0.13,
"learning_rate": 9.372861439603757e-05,
"loss": 4.8076,
"step": 137000
},
{
"epoch": 0.13,
"learning_rate": 9.370571760378659e-05,
"loss": 4.8095,
"step": 137500
},
{
"epoch": 0.13,
"learning_rate": 9.368286660512009e-05,
"loss": 4.8058,
"step": 138000
},
{
"epoch": 0.13,
"learning_rate": 9.36599698128691e-05,
"loss": 4.7895,
"step": 138500
},
{
"epoch": 0.13,
"learning_rate": 9.36370730206181e-05,
"loss": 4.8062,
"step": 139000
},
{
"epoch": 0.13,
"learning_rate": 9.36141762283671e-05,
"loss": 4.796,
"step": 139500
},
{
"epoch": 0.13,
"learning_rate": 9.359127943611612e-05,
"loss": 4.798,
"step": 140000
},
{
"epoch": 0.13,
"learning_rate": 9.356838264386513e-05,
"loss": 4.7977,
"step": 140500
},
{
"epoch": 0.13,
"learning_rate": 9.354553164519864e-05,
"loss": 4.799,
"step": 141000
},
{
"epoch": 0.13,
"learning_rate": 9.352263485294764e-05,
"loss": 4.8073,
"step": 141500
},
{
"epoch": 0.13,
"learning_rate": 9.349973806069665e-05,
"loss": 4.7919,
"step": 142000
},
{
"epoch": 0.13,
"learning_rate": 9.347684126844566e-05,
"loss": 4.7796,
"step": 142500
},
{
"epoch": 0.13,
"learning_rate": 9.345394447619467e-05,
"loss": 4.7973,
"step": 143000
},
{
"epoch": 0.13,
"learning_rate": 9.343104768394367e-05,
"loss": 4.7906,
"step": 143500
},
{
"epoch": 0.13,
"learning_rate": 9.340815089169268e-05,
"loss": 4.8024,
"step": 144000
},
{
"epoch": 0.13,
"learning_rate": 9.338525409944168e-05,
"loss": 4.7982,
"step": 144500
},
{
"epoch": 0.13,
"learning_rate": 9.336235730719069e-05,
"loss": 4.7862,
"step": 145000
},
{
"epoch": 0.13,
"learning_rate": 9.333946051493971e-05,
"loss": 4.7838,
"step": 145500
},
{
"epoch": 0.13,
"learning_rate": 9.331656372268871e-05,
"loss": 4.7872,
"step": 146000
},
{
"epoch": 0.13,
"learning_rate": 9.329371272402222e-05,
"loss": 4.7776,
"step": 146500
},
{
"epoch": 0.13,
"learning_rate": 9.327081593177122e-05,
"loss": 4.7827,
"step": 147000
},
{
"epoch": 0.14,
"learning_rate": 9.324791913952023e-05,
"loss": 4.7906,
"step": 147500
},
{
"epoch": 0.14,
"learning_rate": 9.322502234726925e-05,
"loss": 4.7795,
"step": 148000
},
{
"epoch": 0.14,
"learning_rate": 9.320212555501825e-05,
"loss": 4.7873,
"step": 148500
},
{
"epoch": 0.14,
"learning_rate": 9.317932034993626e-05,
"loss": 4.786,
"step": 149000
},
{
"epoch": 0.14,
"learning_rate": 9.315642355768526e-05,
"loss": 4.7833,
"step": 149500
},
{
"epoch": 0.14,
"learning_rate": 9.313352676543428e-05,
"loss": 4.7912,
"step": 150000
},
{
"epoch": 0.14,
"learning_rate": 9.311062997318329e-05,
"loss": 4.7711,
"step": 150500
},
{
"epoch": 0.14,
"learning_rate": 9.308773318093229e-05,
"loss": 4.775,
"step": 151000
},
{
"epoch": 0.14,
"learning_rate": 9.30648363886813e-05,
"loss": 4.7816,
"step": 151500
},
{
"epoch": 0.14,
"learning_rate": 9.30420311835993e-05,
"loss": 4.769,
"step": 152000
},
{
"epoch": 0.14,
"learning_rate": 9.301913439134832e-05,
"loss": 4.7854,
"step": 152500
},
{
"epoch": 0.14,
"learning_rate": 9.299623759909733e-05,
"loss": 4.7933,
"step": 153000
},
{
"epoch": 0.14,
"learning_rate": 9.297334080684633e-05,
"loss": 4.7785,
"step": 153500
},
{
"epoch": 0.14,
"learning_rate": 9.295044401459534e-05,
"loss": 4.765,
"step": 154000
},
{
"epoch": 0.14,
"learning_rate": 9.292754722234434e-05,
"loss": 4.7787,
"step": 154500
},
{
"epoch": 0.14,
"learning_rate": 9.290465043009335e-05,
"loss": 4.7839,
"step": 155000
},
{
"epoch": 0.14,
"learning_rate": 9.288175363784237e-05,
"loss": 4.7812,
"step": 155500
},
{
"epoch": 0.14,
"learning_rate": 9.285885684559137e-05,
"loss": 4.7723,
"step": 156000
},
{
"epoch": 0.14,
"learning_rate": 9.283596005334038e-05,
"loss": 4.787,
"step": 156500
},
{
"epoch": 0.14,
"learning_rate": 9.281306326108938e-05,
"loss": 4.7707,
"step": 157000
},
{
"epoch": 0.14,
"learning_rate": 9.279016646883839e-05,
"loss": 4.7779,
"step": 157500
},
{
"epoch": 0.14,
"learning_rate": 9.27672696765874e-05,
"loss": 4.7703,
"step": 158000
},
{
"epoch": 0.15,
"learning_rate": 9.274441867792091e-05,
"loss": 4.7606,
"step": 158500
},
{
"epoch": 0.15,
"learning_rate": 9.27215218856699e-05,
"loss": 4.7575,
"step": 159000
},
{
"epoch": 0.15,
"learning_rate": 9.269862509341891e-05,
"loss": 4.7708,
"step": 159500
},
{
"epoch": 0.15,
"learning_rate": 9.267572830116793e-05,
"loss": 4.7602,
"step": 160000
},
{
"epoch": 0.15,
"learning_rate": 9.265287730250143e-05,
"loss": 4.7595,
"step": 160500
},
{
"epoch": 0.15,
"learning_rate": 9.262998051025044e-05,
"loss": 4.7712,
"step": 161000
},
{
"epoch": 0.15,
"learning_rate": 9.260708371799944e-05,
"loss": 4.7699,
"step": 161500
},
{
"epoch": 0.15,
"learning_rate": 9.258418692574845e-05,
"loss": 4.7676,
"step": 162000
},
{
"epoch": 0.15,
"learning_rate": 9.256133592708197e-05,
"loss": 4.7723,
"step": 162500
},
{
"epoch": 0.15,
"learning_rate": 9.253843913483097e-05,
"loss": 4.7604,
"step": 163000
},
{
"epoch": 0.15,
"learning_rate": 9.251558813616448e-05,
"loss": 4.7622,
"step": 163500
},
{
"epoch": 0.15,
"learning_rate": 9.249269134391348e-05,
"loss": 4.774,
"step": 164000
},
{
"epoch": 0.15,
"learning_rate": 9.246979455166249e-05,
"loss": 4.7561,
"step": 164500
},
{
"epoch": 0.15,
"learning_rate": 9.24468977594115e-05,
"loss": 4.7561,
"step": 165000
},
{
"epoch": 0.15,
"learning_rate": 9.242400096716051e-05,
"loss": 4.755,
"step": 165500
},
{
"epoch": 0.15,
"learning_rate": 9.240114996849402e-05,
"loss": 4.7644,
"step": 166000
},
{
"epoch": 0.15,
"learning_rate": 9.237825317624302e-05,
"loss": 4.7578,
"step": 166500
},
{
"epoch": 0.15,
"learning_rate": 9.235535638399203e-05,
"loss": 4.7633,
"step": 167000
},
{
"epoch": 0.15,
"learning_rate": 9.233250538532554e-05,
"loss": 4.7604,
"step": 167500
},
{
"epoch": 0.15,
"learning_rate": 9.230960859307455e-05,
"loss": 4.7465,
"step": 168000
},
{
"epoch": 0.15,
"learning_rate": 9.228671180082355e-05,
"loss": 4.7608,
"step": 168500
},
{
"epoch": 0.15,
"learning_rate": 9.226381500857256e-05,
"loss": 4.7431,
"step": 169000
},
{
"epoch": 0.16,
"learning_rate": 9.224096400990608e-05,
"loss": 4.7622,
"step": 169500
},
{
"epoch": 0.16,
"learning_rate": 9.221806721765508e-05,
"loss": 4.7538,
"step": 170000
},
{
"epoch": 0.16,
"learning_rate": 9.219521621898859e-05,
"loss": 4.742,
"step": 170500
},
{
"epoch": 0.16,
"learning_rate": 9.21723194267376e-05,
"loss": 4.7637,
"step": 171000
},
{
"epoch": 0.16,
"learning_rate": 9.21494226344866e-05,
"loss": 4.7577,
"step": 171500
},
{
"epoch": 0.16,
"learning_rate": 9.21265258422356e-05,
"loss": 4.7585,
"step": 172000
},
{
"epoch": 0.16,
"learning_rate": 9.210367484356912e-05,
"loss": 4.7495,
"step": 172500
},
{
"epoch": 0.16,
"learning_rate": 9.208077805131813e-05,
"loss": 4.7448,
"step": 173000
},
{
"epoch": 0.16,
"learning_rate": 9.205788125906713e-05,
"loss": 4.7499,
"step": 173500
},
{
"epoch": 0.16,
"learning_rate": 9.203498446681614e-05,
"loss": 4.7594,
"step": 174000
},
{
"epoch": 0.16,
"learning_rate": 9.201208767456514e-05,
"loss": 4.735,
"step": 174500
},
{
"epoch": 0.16,
"learning_rate": 9.198919088231416e-05,
"loss": 4.7598,
"step": 175000
},
{
"epoch": 0.16,
"learning_rate": 9.196629409006317e-05,
"loss": 4.7507,
"step": 175500
},
{
"epoch": 0.16,
"learning_rate": 9.194339729781217e-05,
"loss": 4.7464,
"step": 176000
},
{
"epoch": 0.16,
"learning_rate": 9.192050050556118e-05,
"loss": 4.7455,
"step": 176500
},
{
"epoch": 0.16,
"learning_rate": 9.189760371331018e-05,
"loss": 4.7331,
"step": 177000
},
{
"epoch": 0.16,
"learning_rate": 9.18747069210592e-05,
"loss": 4.7467,
"step": 177500
},
{
"epoch": 0.16,
"learning_rate": 9.18518101288082e-05,
"loss": 4.7463,
"step": 178000
},
{
"epoch": 0.16,
"learning_rate": 9.182891333655721e-05,
"loss": 4.7451,
"step": 178500
},
{
"epoch": 0.16,
"learning_rate": 9.180601654430622e-05,
"loss": 4.748,
"step": 179000
},
{
"epoch": 0.16,
"learning_rate": 9.178316554563972e-05,
"loss": 4.7535,
"step": 179500
},
{
"epoch": 0.16,
"learning_rate": 9.176026875338873e-05,
"loss": 4.7316,
"step": 180000
},
{
"epoch": 0.17,
"learning_rate": 9.173737196113775e-05,
"loss": 4.7333,
"step": 180500
},
{
"epoch": 0.17,
"learning_rate": 9.171452096247125e-05,
"loss": 4.7415,
"step": 181000
},
{
"epoch": 0.17,
"learning_rate": 9.169162417022026e-05,
"loss": 4.7351,
"step": 181500
},
{
"epoch": 0.17,
"learning_rate": 9.166872737796926e-05,
"loss": 4.7343,
"step": 182000
},
{
"epoch": 0.17,
"learning_rate": 9.164583058571827e-05,
"loss": 4.7392,
"step": 182500
},
{
"epoch": 0.17,
"learning_rate": 9.162293379346727e-05,
"loss": 4.727,
"step": 183000
},
{
"epoch": 0.17,
"learning_rate": 9.160003700121628e-05,
"loss": 4.7467,
"step": 183500
},
{
"epoch": 0.17,
"learning_rate": 9.157714020896528e-05,
"loss": 4.7399,
"step": 184000
},
{
"epoch": 0.17,
"learning_rate": 9.155424341671429e-05,
"loss": 4.7443,
"step": 184500
},
{
"epoch": 0.17,
"learning_rate": 9.15313466244633e-05,
"loss": 4.7317,
"step": 185000
},
{
"epoch": 0.17,
"learning_rate": 9.150844983221231e-05,
"loss": 4.7463,
"step": 185500
},
{
"epoch": 0.17,
"learning_rate": 9.148555303996132e-05,
"loss": 4.7344,
"step": 186000
},
{
"epoch": 0.17,
"learning_rate": 9.146265624771032e-05,
"loss": 4.7399,
"step": 186500
},
{
"epoch": 0.17,
"learning_rate": 9.143975945545933e-05,
"loss": 4.7438,
"step": 187000
},
{
"epoch": 0.17,
"learning_rate": 9.141690845679284e-05,
"loss": 4.7464,
"step": 187500
},
{
"epoch": 0.17,
"learning_rate": 9.139401166454185e-05,
"loss": 4.7286,
"step": 188000
},
{
"epoch": 0.17,
"learning_rate": 9.137116066587536e-05,
"loss": 4.7342,
"step": 188500
},
{
"epoch": 0.17,
"learning_rate": 9.134826387362436e-05,
"loss": 4.7316,
"step": 189000
},
{
"epoch": 0.17,
"learning_rate": 9.132536708137337e-05,
"loss": 4.7212,
"step": 189500
},
{
"epoch": 0.17,
"learning_rate": 9.130247028912237e-05,
"loss": 4.7231,
"step": 190000
},
{
"epoch": 0.17,
"learning_rate": 9.127957349687139e-05,
"loss": 4.7126,
"step": 190500
},
{
"epoch": 0.17,
"learning_rate": 9.12566767046204e-05,
"loss": 4.7251,
"step": 191000
},
{
"epoch": 0.18,
"learning_rate": 9.12337799123694e-05,
"loss": 4.7373,
"step": 191500
},
{
"epoch": 0.18,
"learning_rate": 9.12108831201184e-05,
"loss": 4.7227,
"step": 192000
},
{
"epoch": 0.18,
"learning_rate": 9.118798632786741e-05,
"loss": 4.729,
"step": 192500
},
{
"epoch": 0.18,
"learning_rate": 9.116518112278543e-05,
"loss": 4.7204,
"step": 193000
},
{
"epoch": 0.18,
"learning_rate": 9.114228433053443e-05,
"loss": 4.7389,
"step": 193500
},
{
"epoch": 0.18,
"learning_rate": 9.111938753828344e-05,
"loss": 4.7267,
"step": 194000
},
{
"epoch": 0.18,
"learning_rate": 9.109649074603244e-05,
"loss": 4.7145,
"step": 194500
},
{
"epoch": 0.18,
"learning_rate": 9.107359395378145e-05,
"loss": 4.7421,
"step": 195000
},
{
"epoch": 0.18,
"learning_rate": 9.105074295511497e-05,
"loss": 4.727,
"step": 195500
},
{
"epoch": 0.18,
"learning_rate": 9.102784616286397e-05,
"loss": 4.7267,
"step": 196000
},
{
"epoch": 0.18,
"learning_rate": 9.100494937061298e-05,
"loss": 4.7183,
"step": 196500
},
{
"epoch": 0.18,
"learning_rate": 9.098205257836198e-05,
"loss": 4.7246,
"step": 197000
},
{
"epoch": 0.18,
"learning_rate": 9.095915578611099e-05,
"loss": 4.7332,
"step": 197500
},
{
"epoch": 0.18,
"learning_rate": 9.093625899386001e-05,
"loss": 4.7065,
"step": 198000
},
{
"epoch": 0.18,
"learning_rate": 9.091336220160901e-05,
"loss": 4.7115,
"step": 198500
},
{
"epoch": 0.18,
"learning_rate": 9.089046540935802e-05,
"loss": 4.7332,
"step": 199000
},
{
"epoch": 0.18,
"learning_rate": 9.086756861710702e-05,
"loss": 4.7145,
"step": 199500
},
{
"epoch": 0.18,
"learning_rate": 9.084471761844053e-05,
"loss": 4.7091,
"step": 200000
},
{
"epoch": 0.18,
"learning_rate": 9.082186661977405e-05,
"loss": 4.7069,
"step": 200500
},
{
"epoch": 0.18,
"learning_rate": 9.079901562110755e-05,
"loss": 4.7214,
"step": 201000
},
{
"epoch": 0.18,
"learning_rate": 9.077611882885656e-05,
"loss": 4.7218,
"step": 201500
},
{
"epoch": 0.19,
"learning_rate": 9.075322203660556e-05,
"loss": 4.7131,
"step": 202000
},
{
"epoch": 0.19,
"learning_rate": 9.073032524435458e-05,
"loss": 4.7241,
"step": 202500
},
{
"epoch": 0.19,
"learning_rate": 9.070742845210359e-05,
"loss": 4.7084,
"step": 203000
},
{
"epoch": 0.19,
"learning_rate": 9.068457745343709e-05,
"loss": 4.7187,
"step": 203500
},
{
"epoch": 0.19,
"learning_rate": 9.06616806611861e-05,
"loss": 4.7114,
"step": 204000
},
{
"epoch": 0.19,
"learning_rate": 9.06387838689351e-05,
"loss": 4.7137,
"step": 204500
},
{
"epoch": 0.19,
"learning_rate": 9.061588707668411e-05,
"loss": 4.7077,
"step": 205000
},
{
"epoch": 0.19,
"learning_rate": 9.059299028443313e-05,
"loss": 4.7174,
"step": 205500
},
{
"epoch": 0.19,
"learning_rate": 9.057009349218213e-05,
"loss": 4.7118,
"step": 206000
},
{
"epoch": 0.19,
"learning_rate": 9.054719669993114e-05,
"loss": 4.7051,
"step": 206500
},
{
"epoch": 0.19,
"learning_rate": 9.052429990768014e-05,
"loss": 4.712,
"step": 207000
},
{
"epoch": 0.19,
"learning_rate": 9.050144890901365e-05,
"loss": 4.7138,
"step": 207500
},
{
"epoch": 0.19,
"learning_rate": 9.047855211676265e-05,
"loss": 4.7041,
"step": 208000
},
{
"epoch": 0.19,
"learning_rate": 9.045565532451166e-05,
"loss": 4.7185,
"step": 208500
},
{
"epoch": 0.19,
"learning_rate": 9.043275853226066e-05,
"loss": 4.7227,
"step": 209000
},
{
"epoch": 0.19,
"learning_rate": 9.040986174000967e-05,
"loss": 4.7194,
"step": 209500
},
{
"epoch": 0.19,
"learning_rate": 9.038696494775869e-05,
"loss": 4.7187,
"step": 210000
},
{
"epoch": 0.19,
"learning_rate": 9.036406815550769e-05,
"loss": 4.7064,
"step": 210500
},
{
"epoch": 0.19,
"learning_rate": 9.03411713632567e-05,
"loss": 4.7025,
"step": 211000
},
{
"epoch": 0.19,
"learning_rate": 9.03182745710057e-05,
"loss": 4.7065,
"step": 211500
},
{
"epoch": 0.19,
"learning_rate": 9.02954235723392e-05,
"loss": 4.7148,
"step": 212000
},
{
"epoch": 0.19,
"learning_rate": 9.027252678008822e-05,
"loss": 4.7038,
"step": 212500
},
{
"epoch": 0.2,
"learning_rate": 9.024962998783723e-05,
"loss": 4.6974,
"step": 213000
},
{
"epoch": 0.2,
"learning_rate": 9.022677898917074e-05,
"loss": 4.7006,
"step": 213500
},
{
"epoch": 0.2,
"learning_rate": 9.020388219691974e-05,
"loss": 4.7143,
"step": 214000
},
{
"epoch": 0.2,
"learning_rate": 9.018098540466875e-05,
"loss": 4.7062,
"step": 214500
},
{
"epoch": 0.2,
"learning_rate": 9.015808861241775e-05,
"loss": 4.6983,
"step": 215000
},
{
"epoch": 0.2,
"learning_rate": 9.013519182016677e-05,
"loss": 4.7196,
"step": 215500
},
{
"epoch": 0.2,
"learning_rate": 9.011229502791577e-05,
"loss": 4.7019,
"step": 216000
},
{
"epoch": 0.2,
"learning_rate": 9.008939823566478e-05,
"loss": 4.6808,
"step": 216500
},
{
"epoch": 0.2,
"learning_rate": 9.006650144341378e-05,
"loss": 4.7091,
"step": 217000
},
{
"epoch": 0.2,
"learning_rate": 9.00436962383318e-05,
"loss": 4.6953,
"step": 217500
},
{
"epoch": 0.2,
"learning_rate": 9.002079944608081e-05,
"loss": 4.6965,
"step": 218000
},
{
"epoch": 0.2,
"learning_rate": 8.999790265382981e-05,
"loss": 4.7058,
"step": 218500
},
{
"epoch": 0.2,
"learning_rate": 8.997500586157882e-05,
"loss": 4.7001,
"step": 219000
},
{
"epoch": 0.2,
"learning_rate": 8.995210906932782e-05,
"loss": 4.7157,
"step": 219500
},
{
"epoch": 0.2,
"learning_rate": 8.992921227707683e-05,
"loss": 4.6924,
"step": 220000
},
{
"epoch": 0.2,
"learning_rate": 8.990631548482585e-05,
"loss": 4.6984,
"step": 220500
},
{
"epoch": 0.2,
"learning_rate": 8.988341869257485e-05,
"loss": 4.7011,
"step": 221000
},
{
"epoch": 0.2,
"learning_rate": 8.986056769390836e-05,
"loss": 4.6908,
"step": 221500
},
{
"epoch": 0.2,
"learning_rate": 8.983767090165736e-05,
"loss": 4.7007,
"step": 222000
},
{
"epoch": 0.2,
"learning_rate": 8.981477410940637e-05,
"loss": 4.6884,
"step": 222500
},
{
"epoch": 0.2,
"learning_rate": 8.979187731715539e-05,
"loss": 4.6917,
"step": 223000
},
{
"epoch": 0.2,
"learning_rate": 8.976898052490439e-05,
"loss": 4.6879,
"step": 223500
},
{
"epoch": 0.21,
"learning_rate": 8.97460837326534e-05,
"loss": 4.7008,
"step": 224000
},
{
"epoch": 0.21,
"learning_rate": 8.97231869404024e-05,
"loss": 4.6931,
"step": 224500
},
{
"epoch": 0.21,
"learning_rate": 8.970029014815141e-05,
"loss": 4.7007,
"step": 225000
},
{
"epoch": 0.21,
"learning_rate": 8.967743914948493e-05,
"loss": 4.6898,
"step": 225500
},
{
"epoch": 0.21,
"learning_rate": 8.965454235723393e-05,
"loss": 4.6936,
"step": 226000
},
{
"epoch": 0.21,
"learning_rate": 8.963164556498294e-05,
"loss": 4.6953,
"step": 226500
},
{
"epoch": 0.21,
"learning_rate": 8.960879456631644e-05,
"loss": 4.6686,
"step": 227000
},
{
"epoch": 0.21,
"learning_rate": 8.958589777406545e-05,
"loss": 4.6965,
"step": 227500
},
{
"epoch": 0.21,
"learning_rate": 8.956300098181447e-05,
"loss": 4.7081,
"step": 228000
},
{
"epoch": 0.21,
"learning_rate": 8.954010418956347e-05,
"loss": 4.69,
"step": 228500
},
{
"epoch": 0.21,
"learning_rate": 8.951720739731248e-05,
"loss": 4.6863,
"step": 229000
},
{
"epoch": 0.21,
"learning_rate": 8.949431060506148e-05,
"loss": 4.69,
"step": 229500
},
{
"epoch": 0.21,
"learning_rate": 8.947141381281049e-05,
"loss": 4.69,
"step": 230000
},
{
"epoch": 0.21,
"learning_rate": 8.944851702055949e-05,
"loss": 4.6866,
"step": 230500
},
{
"epoch": 0.21,
"learning_rate": 8.942562022830851e-05,
"loss": 4.6946,
"step": 231000
},
{
"epoch": 0.21,
"learning_rate": 8.940276922964202e-05,
"loss": 4.6808,
"step": 231500
},
{
"epoch": 0.21,
"learning_rate": 8.9379872437391e-05,
"loss": 4.6888,
"step": 232000
},
{
"epoch": 0.21,
"learning_rate": 8.935697564514001e-05,
"loss": 4.6793,
"step": 232500
},
{
"epoch": 0.21,
"learning_rate": 8.933412464647353e-05,
"loss": 4.6792,
"step": 233000
},
{
"epoch": 0.21,
"learning_rate": 8.931122785422254e-05,
"loss": 4.6837,
"step": 233500
},
{
"epoch": 0.21,
"learning_rate": 8.928833106197154e-05,
"loss": 4.7022,
"step": 234000
},
{
"epoch": 0.21,
"learning_rate": 8.926543426972055e-05,
"loss": 4.6798,
"step": 234500
},
{
"epoch": 0.22,
"learning_rate": 8.924258327105407e-05,
"loss": 4.6858,
"step": 235000
},
{
"epoch": 0.22,
"learning_rate": 8.921968647880307e-05,
"loss": 4.674,
"step": 235500
},
{
"epoch": 0.22,
"learning_rate": 8.919683548013658e-05,
"loss": 4.6769,
"step": 236000
},
{
"epoch": 0.22,
"learning_rate": 8.917393868788558e-05,
"loss": 4.688,
"step": 236500
},
{
"epoch": 0.22,
"learning_rate": 8.915104189563459e-05,
"loss": 4.6883,
"step": 237000
},
{
"epoch": 0.22,
"learning_rate": 8.91281451033836e-05,
"loss": 4.6925,
"step": 237500
},
{
"epoch": 0.22,
"learning_rate": 8.910524831113261e-05,
"loss": 4.6764,
"step": 238000
},
{
"epoch": 0.22,
"learning_rate": 8.908235151888161e-05,
"loss": 4.6804,
"step": 238500
},
{
"epoch": 0.22,
"learning_rate": 8.905945472663062e-05,
"loss": 4.6807,
"step": 239000
},
{
"epoch": 0.22,
"learning_rate": 8.903660372796413e-05,
"loss": 4.693,
"step": 239500
},
{
"epoch": 0.22,
"learning_rate": 8.901370693571313e-05,
"loss": 4.6767,
"step": 240000
},
{
"epoch": 0.22,
"learning_rate": 8.899081014346215e-05,
"loss": 4.6675,
"step": 240500
},
{
"epoch": 0.22,
"learning_rate": 8.896791335121115e-05,
"loss": 4.6842,
"step": 241000
},
{
"epoch": 0.22,
"learning_rate": 8.894501655896016e-05,
"loss": 4.6811,
"step": 241500
},
{
"epoch": 0.22,
"learning_rate": 8.892211976670916e-05,
"loss": 4.6867,
"step": 242000
},
{
"epoch": 0.22,
"learning_rate": 8.889922297445817e-05,
"loss": 4.6791,
"step": 242500
},
{
"epoch": 0.22,
"learning_rate": 8.887637197579169e-05,
"loss": 4.672,
"step": 243000
},
{
"epoch": 0.22,
"learning_rate": 8.88534751835407e-05,
"loss": 4.6772,
"step": 243500
},
{
"epoch": 0.22,
"learning_rate": 8.88305783912897e-05,
"loss": 4.6779,
"step": 244000
},
{
"epoch": 0.22,
"learning_rate": 8.88076815990387e-05,
"loss": 4.6865,
"step": 244500
},
{
"epoch": 0.22,
"learning_rate": 8.878478480678771e-05,
"loss": 4.6762,
"step": 245000
},
{
"epoch": 0.22,
"learning_rate": 8.876188801453673e-05,
"loss": 4.6774,
"step": 245500
},
{
"epoch": 0.23,
"learning_rate": 8.873899122228573e-05,
"loss": 4.6884,
"step": 246000
},
{
"epoch": 0.23,
"learning_rate": 8.871614022361924e-05,
"loss": 4.6745,
"step": 246500
},
{
"epoch": 0.23,
"learning_rate": 8.869324343136824e-05,
"loss": 4.6841,
"step": 247000
},
{
"epoch": 0.23,
"learning_rate": 8.867034663911725e-05,
"loss": 4.6687,
"step": 247500
},
{
"epoch": 0.23,
"learning_rate": 8.864744984686625e-05,
"loss": 4.6751,
"step": 248000
},
{
"epoch": 0.23,
"learning_rate": 8.862459884819977e-05,
"loss": 4.6713,
"step": 248500
},
{
"epoch": 0.23,
"learning_rate": 8.860170205594878e-05,
"loss": 4.6759,
"step": 249000
},
{
"epoch": 0.23,
"learning_rate": 8.857880526369778e-05,
"loss": 4.6654,
"step": 249500
},
{
"epoch": 0.23,
"learning_rate": 8.855590847144679e-05,
"loss": 4.6717,
"step": 250000
},
{
"epoch": 0.23,
"learning_rate": 8.853301167919579e-05,
"loss": 4.6809,
"step": 250500
},
{
"epoch": 0.23,
"learning_rate": 8.851011488694481e-05,
"loss": 4.6718,
"step": 251000
},
{
"epoch": 0.23,
"learning_rate": 8.848721809469382e-05,
"loss": 4.6737,
"step": 251500
},
{
"epoch": 0.23,
"learning_rate": 8.846432130244282e-05,
"loss": 4.6611,
"step": 252000
},
{
"epoch": 0.23,
"learning_rate": 8.844142451019183e-05,
"loss": 4.6653,
"step": 252500
},
{
"epoch": 0.23,
"learning_rate": 8.841852771794083e-05,
"loss": 4.6504,
"step": 253000
},
{
"epoch": 0.23,
"learning_rate": 8.839563092568985e-05,
"loss": 4.6703,
"step": 253500
},
{
"epoch": 0.23,
"learning_rate": 8.837273413343885e-05,
"loss": 4.6781,
"step": 254000
},
{
"epoch": 0.23,
"learning_rate": 8.834988313477236e-05,
"loss": 4.6634,
"step": 254500
},
{
"epoch": 0.23,
"learning_rate": 8.832698634252137e-05,
"loss": 4.6633,
"step": 255000
},
{
"epoch": 0.23,
"learning_rate": 8.830408955027037e-05,
"loss": 4.6825,
"step": 255500
},
{
"epoch": 0.23,
"learning_rate": 8.828119275801938e-05,
"loss": 4.6819,
"step": 256000
},
{
"epoch": 0.23,
"learning_rate": 8.825834175935288e-05,
"loss": 4.665,
"step": 256500
},
{
"epoch": 0.24,
"learning_rate": 8.823544496710189e-05,
"loss": 4.6716,
"step": 257000
},
{
"epoch": 0.24,
"learning_rate": 8.821254817485089e-05,
"loss": 4.666,
"step": 257500
},
{
"epoch": 0.24,
"learning_rate": 8.81896513825999e-05,
"loss": 4.6797,
"step": 258000
},
{
"epoch": 0.24,
"learning_rate": 8.816675459034891e-05,
"loss": 4.6855,
"step": 258500
},
{
"epoch": 0.24,
"learning_rate": 8.814385779809792e-05,
"loss": 4.6556,
"step": 259000
},
{
"epoch": 0.24,
"learning_rate": 8.812100679943143e-05,
"loss": 4.6546,
"step": 259500
},
{
"epoch": 0.24,
"learning_rate": 8.809811000718043e-05,
"loss": 4.6736,
"step": 260000
},
{
"epoch": 0.24,
"learning_rate": 8.807521321492944e-05,
"loss": 4.6548,
"step": 260500
},
{
"epoch": 0.24,
"learning_rate": 8.805231642267845e-05,
"loss": 4.6674,
"step": 261000
},
{
"epoch": 0.24,
"learning_rate": 8.802941963042746e-05,
"loss": 4.6686,
"step": 261500
},
{
"epoch": 0.24,
"learning_rate": 8.800652283817646e-05,
"loss": 4.654,
"step": 262000
},
{
"epoch": 0.24,
"learning_rate": 8.798362604592547e-05,
"loss": 4.6613,
"step": 262500
},
{
"epoch": 0.24,
"learning_rate": 8.796072925367447e-05,
"loss": 4.6579,
"step": 263000
},
{
"epoch": 0.24,
"learning_rate": 8.793783246142348e-05,
"loss": 4.6778,
"step": 263500
},
{
"epoch": 0.24,
"learning_rate": 8.79149356691725e-05,
"loss": 4.6662,
"step": 264000
},
{
"epoch": 0.24,
"learning_rate": 8.78920388769215e-05,
"loss": 4.6614,
"step": 264500
},
{
"epoch": 0.24,
"learning_rate": 8.786914208467051e-05,
"loss": 4.6636,
"step": 265000
},
{
"epoch": 0.24,
"learning_rate": 8.784629108600401e-05,
"loss": 4.6584,
"step": 265500
},
{
"epoch": 0.24,
"learning_rate": 8.782339429375302e-05,
"loss": 4.6654,
"step": 266000
},
{
"epoch": 0.24,
"learning_rate": 8.780049750150204e-05,
"loss": 4.6383,
"step": 266500
},
{
"epoch": 0.24,
"learning_rate": 8.777760070925104e-05,
"loss": 4.6584,
"step": 267000
},
{
"epoch": 0.24,
"learning_rate": 8.775474971058455e-05,
"loss": 4.6792,
"step": 267500
},
{
"epoch": 0.25,
"learning_rate": 8.773185291833355e-05,
"loss": 4.6607,
"step": 268000
},
{
"epoch": 0.25,
"learning_rate": 8.770895612608256e-05,
"loss": 4.6578,
"step": 268500
},
{
"epoch": 0.25,
"learning_rate": 8.768605933383158e-05,
"loss": 4.6616,
"step": 269000
},
{
"epoch": 0.25,
"learning_rate": 8.766316254158058e-05,
"loss": 4.6598,
"step": 269500
},
{
"epoch": 0.25,
"learning_rate": 8.764031154291409e-05,
"loss": 4.6522,
"step": 270000
},
{
"epoch": 0.25,
"learning_rate": 8.761741475066309e-05,
"loss": 4.6665,
"step": 270500
},
{
"epoch": 0.25,
"learning_rate": 8.75945179584121e-05,
"loss": 4.6521,
"step": 271000
},
{
"epoch": 0.25,
"learning_rate": 8.757162116616112e-05,
"loss": 4.6564,
"step": 271500
},
{
"epoch": 0.25,
"learning_rate": 8.754872437391012e-05,
"loss": 4.6587,
"step": 272000
},
{
"epoch": 0.25,
"learning_rate": 8.752587337524363e-05,
"loss": 4.6491,
"step": 272500
},
{
"epoch": 0.25,
"learning_rate": 8.750297658299263e-05,
"loss": 4.6492,
"step": 273000
},
{
"epoch": 0.25,
"learning_rate": 8.748012558432615e-05,
"loss": 4.6553,
"step": 273500
},
{
"epoch": 0.25,
"learning_rate": 8.745722879207516e-05,
"loss": 4.652,
"step": 274000
},
{
"epoch": 0.25,
"learning_rate": 8.743433199982416e-05,
"loss": 4.6655,
"step": 274500
},
{
"epoch": 0.25,
"learning_rate": 8.741143520757317e-05,
"loss": 4.6577,
"step": 275000
},
{
"epoch": 0.25,
"learning_rate": 8.738853841532217e-05,
"loss": 4.6514,
"step": 275500
},
{
"epoch": 0.25,
"learning_rate": 8.736564162307118e-05,
"loss": 4.6559,
"step": 276000
},
{
"epoch": 0.25,
"learning_rate": 8.73427448308202e-05,
"loss": 4.6471,
"step": 276500
},
{
"epoch": 0.25,
"learning_rate": 8.73198480385692e-05,
"loss": 4.6578,
"step": 277000
},
{
"epoch": 0.25,
"learning_rate": 8.72969512463182e-05,
"loss": 4.6565,
"step": 277500
},
{
"epoch": 0.25,
"learning_rate": 8.727405445406721e-05,
"loss": 4.6543,
"step": 278000
},
{
"epoch": 0.26,
"learning_rate": 8.725120345540072e-05,
"loss": 4.6495,
"step": 278500
},
{
"epoch": 0.26,
"learning_rate": 8.722830666314973e-05,
"loss": 4.6636,
"step": 279000
},
{
"epoch": 0.26,
"learning_rate": 8.720540987089874e-05,
"loss": 4.6437,
"step": 279500
},
{
"epoch": 0.26,
"learning_rate": 8.718251307864774e-05,
"loss": 4.6527,
"step": 280000
},
{
"epoch": 0.26,
"learning_rate": 8.715966207998125e-05,
"loss": 4.6434,
"step": 280500
},
{
"epoch": 0.26,
"learning_rate": 8.713681108131476e-05,
"loss": 4.6564,
"step": 281000
},
{
"epoch": 0.26,
"learning_rate": 8.711391428906376e-05,
"loss": 4.6476,
"step": 281500
},
{
"epoch": 0.26,
"learning_rate": 8.709101749681277e-05,
"loss": 4.6617,
"step": 282000
},
{
"epoch": 0.26,
"learning_rate": 8.706812070456177e-05,
"loss": 4.6642,
"step": 282500
},
{
"epoch": 0.26,
"learning_rate": 8.704522391231078e-05,
"loss": 4.646,
"step": 283000
},
{
"epoch": 0.26,
"learning_rate": 8.70223271200598e-05,
"loss": 4.6217,
"step": 283500
},
{
"epoch": 0.26,
"learning_rate": 8.69994303278088e-05,
"loss": 4.656,
"step": 284000
},
{
"epoch": 0.26,
"learning_rate": 8.69765335355578e-05,
"loss": 4.6464,
"step": 284500
},
{
"epoch": 0.26,
"learning_rate": 8.695363674330681e-05,
"loss": 4.6532,
"step": 285000
},
{
"epoch": 0.26,
"learning_rate": 8.693073995105581e-05,
"loss": 4.6377,
"step": 285500
},
{
"epoch": 0.26,
"learning_rate": 8.690784315880482e-05,
"loss": 4.6475,
"step": 286000
},
{
"epoch": 0.26,
"learning_rate": 8.688494636655384e-05,
"loss": 4.6538,
"step": 286500
},
{
"epoch": 0.26,
"learning_rate": 8.686204957430284e-05,
"loss": 4.6526,
"step": 287000
},
{
"epoch": 0.26,
"learning_rate": 8.683915278205185e-05,
"loss": 4.6565,
"step": 287500
},
{
"epoch": 0.26,
"learning_rate": 8.681625598980085e-05,
"loss": 4.6445,
"step": 288000
},
{
"epoch": 0.26,
"learning_rate": 8.679340499113436e-05,
"loss": 4.6463,
"step": 288500
},
{
"epoch": 0.26,
"learning_rate": 8.677050819888338e-05,
"loss": 4.6335,
"step": 289000
},
{
"epoch": 0.27,
"learning_rate": 8.674761140663238e-05,
"loss": 4.634,
"step": 289500
},
{
"epoch": 0.27,
"learning_rate": 8.672471461438139e-05,
"loss": 4.6498,
"step": 290000
},
{
"epoch": 0.27,
"learning_rate": 8.670181782213039e-05,
"loss": 4.6323,
"step": 290500
},
{
"epoch": 0.27,
"learning_rate": 8.66789210298794e-05,
"loss": 4.6375,
"step": 291000
},
{
"epoch": 0.27,
"learning_rate": 8.665607003121292e-05,
"loss": 4.6396,
"step": 291500
},
{
"epoch": 0.27,
"learning_rate": 8.663317323896192e-05,
"loss": 4.6407,
"step": 292000
},
{
"epoch": 0.27,
"learning_rate": 8.661027644671093e-05,
"loss": 4.6481,
"step": 292500
},
{
"epoch": 0.27,
"learning_rate": 8.658737965445993e-05,
"loss": 4.638,
"step": 293000
},
{
"epoch": 0.27,
"learning_rate": 8.656448286220894e-05,
"loss": 4.6296,
"step": 293500
},
{
"epoch": 0.27,
"learning_rate": 8.654158606995794e-05,
"loss": 4.6407,
"step": 294000
},
{
"epoch": 0.27,
"learning_rate": 8.651868927770696e-05,
"loss": 4.6418,
"step": 294500
},
{
"epoch": 0.27,
"learning_rate": 8.649579248545597e-05,
"loss": 4.6554,
"step": 295000
},
{
"epoch": 0.27,
"learning_rate": 8.647289569320497e-05,
"loss": 4.6521,
"step": 295500
},
{
"epoch": 0.27,
"learning_rate": 8.645013628170748e-05,
"loss": 4.653,
"step": 296000
},
{
"epoch": 0.27,
"learning_rate": 8.64272394894565e-05,
"loss": 4.6392,
"step": 296500
},
{
"epoch": 0.27,
"learning_rate": 8.64043426972055e-05,
"loss": 4.6322,
"step": 297000
},
{
"epoch": 0.27,
"learning_rate": 8.63814459049545e-05,
"loss": 4.6315,
"step": 297500
},
{
"epoch": 0.27,
"learning_rate": 8.635854911270351e-05,
"loss": 4.6496,
"step": 298000
},
{
"epoch": 0.27,
"learning_rate": 8.633565232045252e-05,
"loss": 4.6368,
"step": 298500
},
{
"epoch": 0.27,
"learning_rate": 8.631275552820152e-05,
"loss": 4.6261,
"step": 299000
},
{
"epoch": 0.27,
"learning_rate": 8.628985873595054e-05,
"loss": 4.6399,
"step": 299500
},
{
"epoch": 0.27,
"learning_rate": 8.626696194369954e-05,
"loss": 4.6411,
"step": 300000
},
{
"epoch": 0.28,
"learning_rate": 8.624406515144855e-05,
"loss": 4.6408,
"step": 300500
},
{
"epoch": 0.28,
"learning_rate": 8.622121415278206e-05,
"loss": 4.6405,
"step": 301000
},
{
"epoch": 0.28,
"learning_rate": 8.619831736053106e-05,
"loss": 4.6487,
"step": 301500
},
{
"epoch": 0.28,
"learning_rate": 8.617542056828008e-05,
"loss": 4.6273,
"step": 302000
},
{
"epoch": 0.28,
"learning_rate": 8.615252377602908e-05,
"loss": 4.6293,
"step": 302500
},
{
"epoch": 0.28,
"learning_rate": 8.612962698377809e-05,
"loss": 4.6174,
"step": 303000
},
{
"epoch": 0.28,
"learning_rate": 8.61067301915271e-05,
"loss": 4.6405,
"step": 303500
},
{
"epoch": 0.28,
"learning_rate": 8.60838333992761e-05,
"loss": 4.6293,
"step": 304000
},
{
"epoch": 0.28,
"learning_rate": 8.60609366070251e-05,
"loss": 4.6402,
"step": 304500
},
{
"epoch": 0.28,
"learning_rate": 8.603808560835862e-05,
"loss": 4.6362,
"step": 305000
},
{
"epoch": 0.28,
"learning_rate": 8.601523460969213e-05,
"loss": 4.6163,
"step": 305500
},
{
"epoch": 0.28,
"learning_rate": 8.599233781744112e-05,
"loss": 4.6326,
"step": 306000
},
{
"epoch": 0.28,
"learning_rate": 8.596944102519014e-05,
"loss": 4.6327,
"step": 306500
},
{
"epoch": 0.28,
"learning_rate": 8.594654423293914e-05,
"loss": 4.6481,
"step": 307000
},
{
"epoch": 0.28,
"learning_rate": 8.592364744068815e-05,
"loss": 4.6308,
"step": 307500
},
{
"epoch": 0.28,
"learning_rate": 8.590075064843715e-05,
"loss": 4.6336,
"step": 308000
},
{
"epoch": 0.28,
"learning_rate": 8.587789964977066e-05,
"loss": 4.6358,
"step": 308500
},
{
"epoch": 0.28,
"learning_rate": 8.585500285751968e-05,
"loss": 4.636,
"step": 309000
},
{
"epoch": 0.28,
"learning_rate": 8.583210606526868e-05,
"loss": 4.6205,
"step": 309500
},
{
"epoch": 0.28,
"learning_rate": 8.580920927301769e-05,
"loss": 4.6351,
"step": 310000
},
{
"epoch": 0.28,
"learning_rate": 8.57863124807667e-05,
"loss": 4.6474,
"step": 310500
},
{
"epoch": 0.28,
"learning_rate": 8.57634614821002e-05,
"loss": 4.6371,
"step": 311000
},
{
"epoch": 0.29,
"learning_rate": 8.574056468984922e-05,
"loss": 4.6363,
"step": 311500
},
{
"epoch": 0.29,
"learning_rate": 8.571771369118272e-05,
"loss": 4.6324,
"step": 312000
},
{
"epoch": 0.29,
"learning_rate": 8.569481689893173e-05,
"loss": 4.6251,
"step": 312500
},
{
"epoch": 0.29,
"learning_rate": 8.567192010668073e-05,
"loss": 4.626,
"step": 313000
},
{
"epoch": 0.29,
"learning_rate": 8.564902331442974e-05,
"loss": 4.6245,
"step": 313500
},
{
"epoch": 0.29,
"learning_rate": 8.562612652217876e-05,
"loss": 4.6382,
"step": 314000
},
{
"epoch": 0.29,
"learning_rate": 8.560322972992776e-05,
"loss": 4.6282,
"step": 314500
},
{
"epoch": 0.29,
"learning_rate": 8.558037873126127e-05,
"loss": 4.6226,
"step": 315000
},
{
"epoch": 0.29,
"learning_rate": 8.555748193901027e-05,
"loss": 4.6274,
"step": 315500
},
{
"epoch": 0.29,
"learning_rate": 8.553463094034378e-05,
"loss": 4.6222,
"step": 316000
},
{
"epoch": 0.29,
"learning_rate": 8.55117341480928e-05,
"loss": 4.6261,
"step": 316500
},
{
"epoch": 0.29,
"learning_rate": 8.54888373558418e-05,
"loss": 4.6291,
"step": 317000
},
{
"epoch": 0.29,
"learning_rate": 8.546594056359081e-05,
"loss": 4.6235,
"step": 317500
},
{
"epoch": 0.29,
"learning_rate": 8.544304377133981e-05,
"loss": 4.6252,
"step": 318000
},
{
"epoch": 0.29,
"learning_rate": 8.542014697908882e-05,
"loss": 4.6219,
"step": 318500
},
{
"epoch": 0.29,
"learning_rate": 8.539725018683784e-05,
"loss": 4.6212,
"step": 319000
},
{
"epoch": 0.29,
"learning_rate": 8.537435339458684e-05,
"loss": 4.6196,
"step": 319500
},
{
"epoch": 0.29,
"learning_rate": 8.535145660233585e-05,
"loss": 4.6138,
"step": 320000
},
{
"epoch": 0.29,
"learning_rate": 8.532855981008485e-05,
"loss": 4.6091,
"step": 320500
},
{
"epoch": 0.29,
"learning_rate": 8.530570881141836e-05,
"loss": 4.6195,
"step": 321000
},
{
"epoch": 0.29,
"learning_rate": 8.528281201916736e-05,
"loss": 4.6347,
"step": 321500
},
{
"epoch": 0.29,
"learning_rate": 8.525991522691638e-05,
"loss": 4.6343,
"step": 322000
},
{
"epoch": 0.3,
"learning_rate": 8.523701843466539e-05,
"loss": 4.6422,
"step": 322500
},
{
"epoch": 0.3,
"learning_rate": 8.521412164241439e-05,
"loss": 4.6266,
"step": 323000
},
{
"epoch": 0.3,
"learning_rate": 8.51912248501634e-05,
"loss": 4.6285,
"step": 323500
},
{
"epoch": 0.3,
"learning_rate": 8.51683280579124e-05,
"loss": 4.626,
"step": 324000
},
{
"epoch": 0.3,
"learning_rate": 8.514543126566142e-05,
"loss": 4.6172,
"step": 324500
},
{
"epoch": 0.3,
"learning_rate": 8.512253447341042e-05,
"loss": 4.6202,
"step": 325000
},
{
"epoch": 0.3,
"learning_rate": 8.509963768115943e-05,
"loss": 4.6116,
"step": 325500
},
{
"epoch": 0.3,
"learning_rate": 8.507678668249293e-05,
"loss": 4.6084,
"step": 326000
},
{
"epoch": 0.3,
"learning_rate": 8.505393568382644e-05,
"loss": 4.6178,
"step": 326500
},
{
"epoch": 0.3,
"learning_rate": 8.503103889157546e-05,
"loss": 4.6282,
"step": 327000
},
{
"epoch": 0.3,
"learning_rate": 8.500814209932446e-05,
"loss": 4.6111,
"step": 327500
},
{
"epoch": 0.3,
"learning_rate": 8.498524530707347e-05,
"loss": 4.618,
"step": 328000
},
{
"epoch": 0.3,
"learning_rate": 8.496234851482247e-05,
"loss": 4.6023,
"step": 328500
},
{
"epoch": 0.3,
"learning_rate": 8.493945172257148e-05,
"loss": 4.6387,
"step": 329000
},
{
"epoch": 0.3,
"learning_rate": 8.491655493032048e-05,
"loss": 4.6187,
"step": 329500
},
{
"epoch": 0.3,
"learning_rate": 8.48936581380695e-05,
"loss": 4.6279,
"step": 330000
},
{
"epoch": 0.3,
"learning_rate": 8.4870807139403e-05,
"loss": 4.6116,
"step": 330500
},
{
"epoch": 0.3,
"learning_rate": 8.4847910347152e-05,
"loss": 4.6325,
"step": 331000
},
{
"epoch": 0.3,
"learning_rate": 8.4825013554901e-05,
"loss": 4.6297,
"step": 331500
},
{
"epoch": 0.3,
"learning_rate": 8.480211676265002e-05,
"loss": 4.6239,
"step": 332000
},
{
"epoch": 0.3,
"learning_rate": 8.477921997039903e-05,
"loss": 4.6297,
"step": 332500
},
{
"epoch": 0.3,
"learning_rate": 8.475632317814803e-05,
"loss": 4.6087,
"step": 333000
},
{
"epoch": 0.31,
"learning_rate": 8.473342638589704e-05,
"loss": 4.615,
"step": 333500
},
{
"epoch": 0.31,
"learning_rate": 8.471052959364604e-05,
"loss": 4.6136,
"step": 334000
},
{
"epoch": 0.31,
"learning_rate": 8.468767859497956e-05,
"loss": 4.6153,
"step": 334500
},
{
"epoch": 0.31,
"learning_rate": 8.466478180272857e-05,
"loss": 4.6311,
"step": 335000
},
{
"epoch": 0.31,
"learning_rate": 8.464188501047757e-05,
"loss": 4.6269,
"step": 335500
},
{
"epoch": 0.31,
"learning_rate": 8.461898821822658e-05,
"loss": 4.6227,
"step": 336000
},
{
"epoch": 0.31,
"learning_rate": 8.459609142597558e-05,
"loss": 4.618,
"step": 336500
},
{
"epoch": 0.31,
"learning_rate": 8.45731946337246e-05,
"loss": 4.6152,
"step": 337000
},
{
"epoch": 0.31,
"learning_rate": 8.455029784147361e-05,
"loss": 4.6142,
"step": 337500
},
{
"epoch": 0.31,
"learning_rate": 8.452740104922261e-05,
"loss": 4.6308,
"step": 338000
},
{
"epoch": 0.31,
"learning_rate": 8.450455005055612e-05,
"loss": 4.6287,
"step": 338500
},
{
"epoch": 0.31,
"learning_rate": 8.448165325830512e-05,
"loss": 4.6283,
"step": 339000
},
{
"epoch": 0.31,
"learning_rate": 8.445875646605413e-05,
"loss": 4.619,
"step": 339500
},
{
"epoch": 0.31,
"learning_rate": 8.443590546738765e-05,
"loss": 4.6181,
"step": 340000
},
{
"epoch": 0.31,
"learning_rate": 8.441300867513665e-05,
"loss": 4.6102,
"step": 340500
},
{
"epoch": 0.31,
"learning_rate": 8.439011188288566e-05,
"loss": 4.607,
"step": 341000
},
{
"epoch": 0.31,
"learning_rate": 8.436721509063466e-05,
"loss": 4.6017,
"step": 341500
},
{
"epoch": 0.31,
"learning_rate": 8.434431829838367e-05,
"loss": 4.617,
"step": 342000
},
{
"epoch": 0.31,
"learning_rate": 8.432146729971719e-05,
"loss": 4.6041,
"step": 342500
},
{
"epoch": 0.31,
"learning_rate": 8.429857050746619e-05,
"loss": 4.6075,
"step": 343000
},
{
"epoch": 0.31,
"learning_rate": 8.42756737152152e-05,
"loss": 4.6157,
"step": 343500
},
{
"epoch": 0.32,
"learning_rate": 8.42527769229642e-05,
"loss": 4.6175,
"step": 344000
},
{
"epoch": 0.32,
"learning_rate": 8.42298801307132e-05,
"loss": 4.6139,
"step": 344500
},
{
"epoch": 0.32,
"learning_rate": 8.420698333846222e-05,
"loss": 4.6043,
"step": 345000
},
{
"epoch": 0.32,
"learning_rate": 8.418408654621123e-05,
"loss": 4.6386,
"step": 345500
},
{
"epoch": 0.32,
"learning_rate": 8.416118975396023e-05,
"loss": 4.6122,
"step": 346000
},
{
"epoch": 0.32,
"learning_rate": 8.413833875529374e-05,
"loss": 4.6106,
"step": 346500
},
{
"epoch": 0.32,
"learning_rate": 8.411544196304275e-05,
"loss": 4.6218,
"step": 347000
},
{
"epoch": 0.32,
"learning_rate": 8.409254517079176e-05,
"loss": 4.6195,
"step": 347500
},
{
"epoch": 0.32,
"learning_rate": 8.406964837854077e-05,
"loss": 4.628,
"step": 348000
},
{
"epoch": 0.32,
"learning_rate": 8.404675158628977e-05,
"loss": 4.608,
"step": 348500
},
{
"epoch": 0.32,
"learning_rate": 8.402385479403878e-05,
"loss": 4.6176,
"step": 349000
},
{
"epoch": 0.32,
"learning_rate": 8.400095800178778e-05,
"loss": 4.6119,
"step": 349500
},
{
"epoch": 0.32,
"learning_rate": 8.397806120953679e-05,
"loss": 4.6197,
"step": 350000
},
{
"epoch": 0.32,
"learning_rate": 8.395521021087031e-05,
"loss": 4.6084,
"step": 350500
},
{
"epoch": 0.32,
"learning_rate": 8.393231341861931e-05,
"loss": 4.6051,
"step": 351000
},
{
"epoch": 0.32,
"learning_rate": 8.390946241995282e-05,
"loss": 4.6159,
"step": 351500
},
{
"epoch": 0.32,
"learning_rate": 8.388656562770182e-05,
"loss": 4.6103,
"step": 352000
},
{
"epoch": 0.32,
"learning_rate": 8.386366883545084e-05,
"loss": 4.6261,
"step": 352500
},
{
"epoch": 0.32,
"learning_rate": 8.384077204319985e-05,
"loss": 4.6091,
"step": 353000
},
{
"epoch": 0.32,
"learning_rate": 8.381787525094885e-05,
"loss": 4.6031,
"step": 353500
},
{
"epoch": 0.32,
"learning_rate": 8.379497845869786e-05,
"loss": 4.6035,
"step": 354000
},
{
"epoch": 0.32,
"learning_rate": 8.377208166644686e-05,
"loss": 4.5985,
"step": 354500
},
{
"epoch": 0.33,
"learning_rate": 8.374918487419587e-05,
"loss": 4.622,
"step": 355000
},
{
"epoch": 0.33,
"learning_rate": 8.372628808194489e-05,
"loss": 4.6241,
"step": 355500
},
{
"epoch": 0.33,
"learning_rate": 8.370339128969388e-05,
"loss": 4.6098,
"step": 356000
},
{
"epoch": 0.33,
"learning_rate": 8.368054029102738e-05,
"loss": 4.6032,
"step": 356500
},
{
"epoch": 0.33,
"learning_rate": 8.365764349877639e-05,
"loss": 4.6162,
"step": 357000
},
{
"epoch": 0.33,
"learning_rate": 8.363474670652541e-05,
"loss": 4.6138,
"step": 357500
},
{
"epoch": 0.33,
"learning_rate": 8.361184991427441e-05,
"loss": 4.614,
"step": 358000
},
{
"epoch": 0.33,
"learning_rate": 8.358895312202342e-05,
"loss": 4.5968,
"step": 358500
},
{
"epoch": 0.33,
"learning_rate": 8.356610212335692e-05,
"loss": 4.5913,
"step": 359000
},
{
"epoch": 0.33,
"learning_rate": 8.354320533110593e-05,
"loss": 4.6181,
"step": 359500
},
{
"epoch": 0.33,
"learning_rate": 8.352030853885495e-05,
"loss": 4.6084,
"step": 360000
},
{
"epoch": 0.33,
"learning_rate": 8.349741174660395e-05,
"loss": 4.6179,
"step": 360500
},
{
"epoch": 0.33,
"learning_rate": 8.347451495435296e-05,
"loss": 4.6327,
"step": 361000
},
{
"epoch": 0.33,
"learning_rate": 8.345166395568646e-05,
"loss": 4.5855,
"step": 361500
},
{
"epoch": 0.33,
"learning_rate": 8.342876716343547e-05,
"loss": 4.6119,
"step": 362000
},
{
"epoch": 0.33,
"learning_rate": 8.340587037118449e-05,
"loss": 4.6073,
"step": 362500
},
{
"epoch": 0.33,
"learning_rate": 8.338297357893349e-05,
"loss": 4.617,
"step": 363000
},
{
"epoch": 0.33,
"learning_rate": 8.33600767866825e-05,
"loss": 4.5957,
"step": 363500
},
{
"epoch": 0.33,
"learning_rate": 8.33371799944315e-05,
"loss": 4.6044,
"step": 364000
},
{
"epoch": 0.33,
"learning_rate": 8.33142832021805e-05,
"loss": 4.6085,
"step": 364500
},
{
"epoch": 0.33,
"learning_rate": 8.329138640992951e-05,
"loss": 4.5961,
"step": 365000
},
{
"epoch": 0.33,
"learning_rate": 8.326853541126303e-05,
"loss": 4.5876,
"step": 365500
},
{
"epoch": 0.34,
"learning_rate": 8.324563861901204e-05,
"loss": 4.592,
"step": 366000
},
{
"epoch": 0.34,
"learning_rate": 8.322274182676104e-05,
"loss": 4.5889,
"step": 366500
},
{
"epoch": 0.34,
"learning_rate": 8.319984503451005e-05,
"loss": 4.5954,
"step": 367000
},
{
"epoch": 0.34,
"learning_rate": 8.317694824225905e-05,
"loss": 4.6036,
"step": 367500
},
{
"epoch": 0.34,
"learning_rate": 8.315405145000807e-05,
"loss": 4.5898,
"step": 368000
},
{
"epoch": 0.34,
"learning_rate": 8.313120045134157e-05,
"loss": 4.6,
"step": 368500
},
{
"epoch": 0.34,
"learning_rate": 8.310834945267508e-05,
"loss": 4.5979,
"step": 369000
},
{
"epoch": 0.34,
"learning_rate": 8.308545266042409e-05,
"loss": 4.5892,
"step": 369500
},
{
"epoch": 0.34,
"learning_rate": 8.30625558681731e-05,
"loss": 4.6034,
"step": 370000
},
{
"epoch": 0.34,
"learning_rate": 8.303965907592211e-05,
"loss": 4.5901,
"step": 370500
},
{
"epoch": 0.34,
"learning_rate": 8.301676228367111e-05,
"loss": 4.5885,
"step": 371000
},
{
"epoch": 0.34,
"learning_rate": 8.299391128500462e-05,
"loss": 4.5984,
"step": 371500
},
{
"epoch": 0.34,
"learning_rate": 8.297101449275362e-05,
"loss": 4.5994,
"step": 372000
},
{
"epoch": 0.34,
"learning_rate": 8.294811770050263e-05,
"loss": 4.6071,
"step": 372500
},
{
"epoch": 0.34,
"learning_rate": 8.292522090825165e-05,
"loss": 4.5987,
"step": 373000
},
{
"epoch": 0.34,
"learning_rate": 8.290232411600065e-05,
"loss": 4.5988,
"step": 373500
},
{
"epoch": 0.34,
"learning_rate": 8.287942732374966e-05,
"loss": 4.5998,
"step": 374000
},
{
"epoch": 0.34,
"learning_rate": 8.285653053149866e-05,
"loss": 4.5928,
"step": 374500
},
{
"epoch": 0.34,
"learning_rate": 8.283363373924767e-05,
"loss": 4.5908,
"step": 375000
},
{
"epoch": 0.34,
"learning_rate": 8.281073694699669e-05,
"loss": 4.6028,
"step": 375500
},
{
"epoch": 0.34,
"learning_rate": 8.278784015474569e-05,
"loss": 4.596,
"step": 376000
},
{
"epoch": 0.34,
"learning_rate": 8.27649891560792e-05,
"loss": 4.5929,
"step": 376500
},
{
"epoch": 0.35,
"learning_rate": 8.27420923638282e-05,
"loss": 4.5929,
"step": 377000
},
{
"epoch": 0.35,
"learning_rate": 8.271919557157721e-05,
"loss": 4.589,
"step": 377500
},
{
"epoch": 0.35,
"learning_rate": 8.269634457291073e-05,
"loss": 4.6042,
"step": 378000
},
{
"epoch": 0.35,
"learning_rate": 8.267344778065973e-05,
"loss": 4.5838,
"step": 378500
},
{
"epoch": 0.35,
"learning_rate": 8.265055098840874e-05,
"loss": 4.6129,
"step": 379000
},
{
"epoch": 0.35,
"learning_rate": 8.262765419615774e-05,
"loss": 4.5982,
"step": 379500
},
{
"epoch": 0.35,
"learning_rate": 8.260475740390675e-05,
"loss": 4.5797,
"step": 380000
},
{
"epoch": 0.35,
"learning_rate": 8.258186061165575e-05,
"loss": 4.588,
"step": 380500
},
{
"epoch": 0.35,
"learning_rate": 8.255896381940476e-05,
"loss": 4.5937,
"step": 381000
},
{
"epoch": 0.35,
"learning_rate": 8.253606702715376e-05,
"loss": 4.5889,
"step": 381500
},
{
"epoch": 0.35,
"learning_rate": 8.251317023490277e-05,
"loss": 4.5966,
"step": 382000
},
{
"epoch": 0.35,
"learning_rate": 8.249027344265177e-05,
"loss": 4.6009,
"step": 382500
},
{
"epoch": 0.35,
"learning_rate": 8.246742244398529e-05,
"loss": 4.5838,
"step": 383000
},
{
"epoch": 0.35,
"learning_rate": 8.24445256517343e-05,
"loss": 4.6107,
"step": 383500
},
{
"epoch": 0.35,
"learning_rate": 8.24216288594833e-05,
"loss": 4.6128,
"step": 384000
},
{
"epoch": 0.35,
"learning_rate": 8.239873206723231e-05,
"loss": 4.5935,
"step": 384500
},
{
"epoch": 0.35,
"learning_rate": 8.237583527498131e-05,
"loss": 4.5895,
"step": 385000
},
{
"epoch": 0.35,
"learning_rate": 8.235298427631483e-05,
"loss": 4.6011,
"step": 385500
},
{
"epoch": 0.35,
"learning_rate": 8.233008748406384e-05,
"loss": 4.5992,
"step": 386000
},
{
"epoch": 0.35,
"learning_rate": 8.230719069181284e-05,
"loss": 4.6073,
"step": 386500
},
{
"epoch": 0.35,
"learning_rate": 8.228429389956185e-05,
"loss": 4.5914,
"step": 387000
},
{
"epoch": 0.35,
"learning_rate": 8.226139710731085e-05,
"loss": 4.5839,
"step": 387500
},
{
"epoch": 0.36,
"learning_rate": 8.223854610864437e-05,
"loss": 4.5873,
"step": 388000
},
{
"epoch": 0.36,
"learning_rate": 8.221569510997788e-05,
"loss": 4.5894,
"step": 388500
},
{
"epoch": 0.36,
"learning_rate": 8.219279831772688e-05,
"loss": 4.5976,
"step": 389000
},
{
"epoch": 0.36,
"learning_rate": 8.216990152547589e-05,
"loss": 4.6015,
"step": 389500
},
{
"epoch": 0.36,
"learning_rate": 8.214700473322489e-05,
"loss": 4.5936,
"step": 390000
},
{
"epoch": 0.36,
"learning_rate": 8.212410794097391e-05,
"loss": 4.586,
"step": 390500
},
{
"epoch": 0.36,
"learning_rate": 8.210125694230742e-05,
"loss": 4.5947,
"step": 391000
},
{
"epoch": 0.36,
"learning_rate": 8.207836015005642e-05,
"loss": 4.5821,
"step": 391500
},
{
"epoch": 0.36,
"learning_rate": 8.205546335780543e-05,
"loss": 4.5971,
"step": 392000
},
{
"epoch": 0.36,
"learning_rate": 8.203256656555443e-05,
"loss": 4.5956,
"step": 392500
},
{
"epoch": 0.36,
"learning_rate": 8.200966977330345e-05,
"loss": 4.5958,
"step": 393000
},
{
"epoch": 0.36,
"learning_rate": 8.198681877463695e-05,
"loss": 4.604,
"step": 393500
},
{
"epoch": 0.36,
"learning_rate": 8.196392198238596e-05,
"loss": 4.5874,
"step": 394000
},
{
"epoch": 0.36,
"learning_rate": 8.194102519013496e-05,
"loss": 4.5954,
"step": 394500
},
{
"epoch": 0.36,
"learning_rate": 8.191812839788397e-05,
"loss": 4.5907,
"step": 395000
},
{
"epoch": 0.36,
"learning_rate": 8.189523160563299e-05,
"loss": 4.5896,
"step": 395500
},
{
"epoch": 0.36,
"learning_rate": 8.1872334813382e-05,
"loss": 4.5966,
"step": 396000
},
{
"epoch": 0.36,
"learning_rate": 8.1849438021131e-05,
"loss": 4.5887,
"step": 396500
},
{
"epoch": 0.36,
"learning_rate": 8.182654122888e-05,
"loss": 4.5907,
"step": 397000
},
{
"epoch": 0.36,
"learning_rate": 8.180369023021351e-05,
"loss": 4.5904,
"step": 397500
},
{
"epoch": 0.36,
"learning_rate": 8.178079343796253e-05,
"loss": 4.5911,
"step": 398000
},
{
"epoch": 0.36,
"learning_rate": 8.175789664571153e-05,
"loss": 4.5894,
"step": 398500
},
{
"epoch": 0.37,
"learning_rate": 8.173499985346054e-05,
"loss": 4.5895,
"step": 399000
},
{
"epoch": 0.37,
"learning_rate": 8.171210306120954e-05,
"loss": 4.5827,
"step": 399500
},
{
"epoch": 0.37,
"learning_rate": 8.168925206254305e-05,
"loss": 4.5808,
"step": 400000
},
{
"epoch": 0.37,
"learning_rate": 8.166640106387657e-05,
"loss": 4.5848,
"step": 400500
},
{
"epoch": 0.37,
"learning_rate": 8.164350427162557e-05,
"loss": 4.5932,
"step": 401000
},
{
"epoch": 0.37,
"learning_rate": 8.162060747937458e-05,
"loss": 4.5781,
"step": 401500
},
{
"epoch": 0.37,
"learning_rate": 8.159771068712358e-05,
"loss": 4.5793,
"step": 402000
},
{
"epoch": 0.37,
"learning_rate": 8.157481389487259e-05,
"loss": 4.5755,
"step": 402500
},
{
"epoch": 0.37,
"learning_rate": 8.155196289620611e-05,
"loss": 4.586,
"step": 403000
},
{
"epoch": 0.37,
"learning_rate": 8.152906610395511e-05,
"loss": 4.5719,
"step": 403500
},
{
"epoch": 0.37,
"learning_rate": 8.15061693117041e-05,
"loss": 4.5961,
"step": 404000
},
{
"epoch": 0.37,
"learning_rate": 8.148327251945311e-05,
"loss": 4.5871,
"step": 404500
},
{
"epoch": 0.37,
"learning_rate": 8.146037572720211e-05,
"loss": 4.5826,
"step": 405000
},
{
"epoch": 0.37,
"learning_rate": 8.143747893495113e-05,
"loss": 4.5868,
"step": 405500
},
{
"epoch": 0.37,
"learning_rate": 8.141462793628464e-05,
"loss": 4.5922,
"step": 406000
},
{
"epoch": 0.37,
"learning_rate": 8.139173114403364e-05,
"loss": 4.5829,
"step": 406500
},
{
"epoch": 0.37,
"learning_rate": 8.136883435178265e-05,
"loss": 4.5958,
"step": 407000
},
{
"epoch": 0.37,
"learning_rate": 8.134593755953165e-05,
"loss": 4.5815,
"step": 407500
},
{
"epoch": 0.37,
"learning_rate": 8.132304076728067e-05,
"loss": 4.5791,
"step": 408000
},
{
"epoch": 0.37,
"learning_rate": 8.130014397502968e-05,
"loss": 4.5957,
"step": 408500
},
{
"epoch": 0.37,
"learning_rate": 8.127724718277868e-05,
"loss": 4.5963,
"step": 409000
},
{
"epoch": 0.38,
"learning_rate": 8.125435039052769e-05,
"loss": 4.589,
"step": 409500
},
{
"epoch": 0.38,
"learning_rate": 8.123145359827669e-05,
"loss": 4.5776,
"step": 410000
},
{
"epoch": 0.38,
"learning_rate": 8.120855680602571e-05,
"loss": 4.58,
"step": 410500
},
{
"epoch": 0.38,
"learning_rate": 8.118566001377472e-05,
"loss": 4.5863,
"step": 411000
},
{
"epoch": 0.38,
"learning_rate": 8.116276322152372e-05,
"loss": 4.5816,
"step": 411500
},
{
"epoch": 0.38,
"learning_rate": 8.113991222285723e-05,
"loss": 4.5746,
"step": 412000
},
{
"epoch": 0.38,
"learning_rate": 8.111706122419073e-05,
"loss": 4.5765,
"step": 412500
},
{
"epoch": 0.38,
"learning_rate": 8.109416443193975e-05,
"loss": 4.5712,
"step": 413000
},
{
"epoch": 0.38,
"learning_rate": 8.107126763968876e-05,
"loss": 4.5776,
"step": 413500
},
{
"epoch": 0.38,
"learning_rate": 8.104837084743776e-05,
"loss": 4.584,
"step": 414000
},
{
"epoch": 0.38,
"learning_rate": 8.102547405518677e-05,
"loss": 4.5958,
"step": 414500
},
{
"epoch": 0.38,
"learning_rate": 8.100257726293577e-05,
"loss": 4.5776,
"step": 415000
},
{
"epoch": 0.38,
"learning_rate": 8.097968047068478e-05,
"loss": 4.5665,
"step": 415500
},
{
"epoch": 0.38,
"learning_rate": 8.09567836784338e-05,
"loss": 4.584,
"step": 416000
},
{
"epoch": 0.38,
"learning_rate": 8.09339326797673e-05,
"loss": 4.5678,
"step": 416500
},
{
"epoch": 0.38,
"learning_rate": 8.09110358875163e-05,
"loss": 4.5839,
"step": 417000
},
{
"epoch": 0.38,
"learning_rate": 8.088813909526531e-05,
"loss": 4.5762,
"step": 417500
},
{
"epoch": 0.38,
"learning_rate": 8.086524230301432e-05,
"loss": 4.5892,
"step": 418000
},
{
"epoch": 0.38,
"learning_rate": 8.084234551076333e-05,
"loss": 4.5718,
"step": 418500
},
{
"epoch": 0.38,
"learning_rate": 8.081949451209684e-05,
"loss": 4.574,
"step": 419000
},
{
"epoch": 0.38,
"learning_rate": 8.079659771984584e-05,
"loss": 4.5815,
"step": 419500
},
{
"epoch": 0.38,
"learning_rate": 8.077370092759485e-05,
"loss": 4.5789,
"step": 420000
},
{
"epoch": 0.39,
"learning_rate": 8.075080413534385e-05,
"loss": 4.5798,
"step": 420500
},
{
"epoch": 0.39,
"learning_rate": 8.072795313667737e-05,
"loss": 4.5852,
"step": 421000
},
{
"epoch": 0.39,
"learning_rate": 8.070505634442638e-05,
"loss": 4.5742,
"step": 421500
},
{
"epoch": 0.39,
"learning_rate": 8.068215955217538e-05,
"loss": 4.5804,
"step": 422000
},
{
"epoch": 0.39,
"learning_rate": 8.065926275992439e-05,
"loss": 4.5866,
"step": 422500
},
{
"epoch": 0.39,
"learning_rate": 8.06363659676734e-05,
"loss": 4.5731,
"step": 423000
},
{
"epoch": 0.39,
"learning_rate": 8.061346917542241e-05,
"loss": 4.5768,
"step": 423500
},
{
"epoch": 0.39,
"learning_rate": 8.059066397034042e-05,
"loss": 4.5762,
"step": 424000
},
{
"epoch": 0.39,
"learning_rate": 8.056776717808942e-05,
"loss": 4.5823,
"step": 424500
},
{
"epoch": 0.39,
"learning_rate": 8.054487038583843e-05,
"loss": 4.5757,
"step": 425000
},
{
"epoch": 0.39,
"learning_rate": 8.052197359358745e-05,
"loss": 4.5723,
"step": 425500
},
{
"epoch": 0.39,
"learning_rate": 8.049907680133645e-05,
"loss": 4.5979,
"step": 426000
},
{
"epoch": 0.39,
"learning_rate": 8.047618000908546e-05,
"loss": 4.5851,
"step": 426500
},
{
"epoch": 0.39,
"learning_rate": 8.045328321683446e-05,
"loss": 4.5632,
"step": 427000
},
{
"epoch": 0.39,
"learning_rate": 8.043038642458347e-05,
"loss": 4.5904,
"step": 427500
},
{
"epoch": 0.39,
"learning_rate": 8.040753542591699e-05,
"loss": 4.581,
"step": 428000
},
{
"epoch": 0.39,
"learning_rate": 8.038463863366598e-05,
"loss": 4.5808,
"step": 428500
},
{
"epoch": 0.39,
"learning_rate": 8.036174184141498e-05,
"loss": 4.5756,
"step": 429000
},
{
"epoch": 0.39,
"learning_rate": 8.033884504916399e-05,
"loss": 4.5658,
"step": 429500
},
{
"epoch": 0.39,
"learning_rate": 8.03159940504975e-05,
"loss": 4.5819,
"step": 430000
},
{
"epoch": 0.39,
"learning_rate": 8.029309725824651e-05,
"loss": 4.5888,
"step": 430500
},
{
"epoch": 0.39,
"learning_rate": 8.027020046599552e-05,
"loss": 4.5719,
"step": 431000
},
{
"epoch": 0.4,
"learning_rate": 8.024734946732902e-05,
"loss": 4.574,
"step": 431500
},
{
"epoch": 0.4,
"learning_rate": 8.022445267507803e-05,
"loss": 4.5736,
"step": 432000
},
{
"epoch": 0.4,
"learning_rate": 8.020155588282703e-05,
"loss": 4.5823,
"step": 432500
},
{
"epoch": 0.4,
"learning_rate": 8.017865909057605e-05,
"loss": 4.5895,
"step": 433000
},
{
"epoch": 0.4,
"learning_rate": 8.015576229832506e-05,
"loss": 4.5695,
"step": 433500
},
{
"epoch": 0.4,
"learning_rate": 8.013286550607406e-05,
"loss": 4.5843,
"step": 434000
},
{
"epoch": 0.4,
"learning_rate": 8.010996871382307e-05,
"loss": 4.5808,
"step": 434500
},
{
"epoch": 0.4,
"learning_rate": 8.008707192157207e-05,
"loss": 4.5852,
"step": 435000
},
{
"epoch": 0.4,
"learning_rate": 8.006417512932109e-05,
"loss": 4.5786,
"step": 435500
},
{
"epoch": 0.4,
"learning_rate": 8.00412783370701e-05,
"loss": 4.5881,
"step": 436000
},
{
"epoch": 0.4,
"learning_rate": 8.00183815448191e-05,
"loss": 4.5682,
"step": 436500
},
{
"epoch": 0.4,
"learning_rate": 7.99954847525681e-05,
"loss": 4.5615,
"step": 437000
},
{
"epoch": 0.4,
"learning_rate": 7.997258796031711e-05,
"loss": 4.5721,
"step": 437500
},
{
"epoch": 0.4,
"learning_rate": 7.994973696165063e-05,
"loss": 4.5786,
"step": 438000
},
{
"epoch": 0.4,
"learning_rate": 7.992693175656864e-05,
"loss": 4.575,
"step": 438500
},
{
"epoch": 0.4,
"learning_rate": 7.990403496431764e-05,
"loss": 4.5544,
"step": 439000
},
{
"epoch": 0.4,
"learning_rate": 7.988113817206665e-05,
"loss": 4.565,
"step": 439500
},
{
"epoch": 0.4,
"learning_rate": 7.985824137981565e-05,
"loss": 4.5568,
"step": 440000
},
{
"epoch": 0.4,
"learning_rate": 7.983534458756467e-05,
"loss": 4.5711,
"step": 440500
},
{
"epoch": 0.4,
"learning_rate": 7.981244779531367e-05,
"loss": 4.5771,
"step": 441000
},
{
"epoch": 0.4,
"learning_rate": 7.978955100306268e-05,
"loss": 4.5804,
"step": 441500
},
{
"epoch": 0.4,
"learning_rate": 7.976665421081168e-05,
"loss": 4.573,
"step": 442000
},
{
"epoch": 0.41,
"learning_rate": 7.974375741856069e-05,
"loss": 4.5795,
"step": 442500
},
{
"epoch": 0.41,
"learning_rate": 7.97208606263097e-05,
"loss": 4.5832,
"step": 443000
},
{
"epoch": 0.41,
"learning_rate": 7.969800962764321e-05,
"loss": 4.5672,
"step": 443500
},
{
"epoch": 0.41,
"learning_rate": 7.967511283539222e-05,
"loss": 4.5591,
"step": 444000
},
{
"epoch": 0.41,
"learning_rate": 7.965221604314122e-05,
"loss": 4.5673,
"step": 444500
},
{
"epoch": 0.41,
"learning_rate": 7.962931925089023e-05,
"loss": 4.5655,
"step": 445000
},
{
"epoch": 0.41,
"learning_rate": 7.960642245863923e-05,
"loss": 4.5721,
"step": 445500
},
{
"epoch": 0.41,
"learning_rate": 7.958352566638825e-05,
"loss": 4.5587,
"step": 446000
},
{
"epoch": 0.41,
"learning_rate": 7.956062887413726e-05,
"loss": 4.5627,
"step": 446500
},
{
"epoch": 0.41,
"learning_rate": 7.953773208188626e-05,
"loss": 4.5877,
"step": 447000
},
{
"epoch": 0.41,
"learning_rate": 7.951483528963527e-05,
"loss": 4.5733,
"step": 447500
},
{
"epoch": 0.41,
"learning_rate": 7.949193849738427e-05,
"loss": 4.5746,
"step": 448000
},
{
"epoch": 0.41,
"learning_rate": 7.946904170513328e-05,
"loss": 4.5715,
"step": 448500
},
{
"epoch": 0.41,
"learning_rate": 7.94461449128823e-05,
"loss": 4.5766,
"step": 449000
},
{
"epoch": 0.41,
"learning_rate": 7.94232939142158e-05,
"loss": 4.5699,
"step": 449500
},
{
"epoch": 0.41,
"learning_rate": 7.940039712196481e-05,
"loss": 4.5695,
"step": 450000
},
{
"epoch": 0.41,
"learning_rate": 7.937750032971381e-05,
"loss": 4.5614,
"step": 450500
},
{
"epoch": 0.41,
"learning_rate": 7.935460353746282e-05,
"loss": 4.5627,
"step": 451000
},
{
"epoch": 0.41,
"learning_rate": 7.933175253879634e-05,
"loss": 4.5673,
"step": 451500
},
{
"epoch": 0.41,
"learning_rate": 7.930885574654534e-05,
"loss": 4.569,
"step": 452000
},
{
"epoch": 0.41,
"learning_rate": 7.928595895429435e-05,
"loss": 4.5614,
"step": 452500
},
{
"epoch": 0.41,
"learning_rate": 7.926310795562785e-05,
"loss": 4.5561,
"step": 453000
},
{
"epoch": 0.42,
"learning_rate": 7.924021116337686e-05,
"loss": 4.5649,
"step": 453500
},
{
"epoch": 0.42,
"learning_rate": 7.921731437112586e-05,
"loss": 4.5602,
"step": 454000
},
{
"epoch": 0.42,
"learning_rate": 7.919441757887487e-05,
"loss": 4.5516,
"step": 454500
},
{
"epoch": 0.42,
"learning_rate": 7.917152078662387e-05,
"loss": 4.5816,
"step": 455000
},
{
"epoch": 0.42,
"learning_rate": 7.914862399437288e-05,
"loss": 4.575,
"step": 455500
},
{
"epoch": 0.42,
"learning_rate": 7.91257272021219e-05,
"loss": 4.5682,
"step": 456000
},
{
"epoch": 0.42,
"learning_rate": 7.91028304098709e-05,
"loss": 4.5717,
"step": 456500
},
{
"epoch": 0.42,
"learning_rate": 7.907997941120441e-05,
"loss": 4.5566,
"step": 457000
},
{
"epoch": 0.42,
"learning_rate": 7.905708261895341e-05,
"loss": 4.5753,
"step": 457500
},
{
"epoch": 0.42,
"learning_rate": 7.903418582670242e-05,
"loss": 4.5744,
"step": 458000
},
{
"epoch": 0.42,
"learning_rate": 7.901133482803594e-05,
"loss": 4.5645,
"step": 458500
},
{
"epoch": 0.42,
"learning_rate": 7.898843803578494e-05,
"loss": 4.5685,
"step": 459000
},
{
"epoch": 0.42,
"learning_rate": 7.896554124353395e-05,
"loss": 4.5628,
"step": 459500
},
{
"epoch": 0.42,
"learning_rate": 7.894264445128295e-05,
"loss": 4.5802,
"step": 460000
},
{
"epoch": 0.42,
"learning_rate": 7.891979345261647e-05,
"loss": 4.5691,
"step": 460500
},
{
"epoch": 0.42,
"learning_rate": 7.889689666036548e-05,
"loss": 4.5733,
"step": 461000
},
{
"epoch": 0.42,
"learning_rate": 7.887399986811448e-05,
"loss": 4.5671,
"step": 461500
},
{
"epoch": 0.42,
"learning_rate": 7.885110307586349e-05,
"loss": 4.5643,
"step": 462000
},
{
"epoch": 0.42,
"learning_rate": 7.882820628361249e-05,
"loss": 4.564,
"step": 462500
},
{
"epoch": 0.42,
"learning_rate": 7.88053094913615e-05,
"loss": 4.5507,
"step": 463000
},
{
"epoch": 0.42,
"learning_rate": 7.878241269911051e-05,
"loss": 4.5662,
"step": 463500
},
{
"epoch": 0.42,
"learning_rate": 7.875951590685952e-05,
"loss": 4.5726,
"step": 464000
},
{
"epoch": 0.43,
"learning_rate": 7.873666490819302e-05,
"loss": 4.5638,
"step": 464500
},
{
"epoch": 0.43,
"learning_rate": 7.871376811594203e-05,
"loss": 4.5496,
"step": 465000
},
{
"epoch": 0.43,
"learning_rate": 7.869087132369104e-05,
"loss": 4.5691,
"step": 465500
},
{
"epoch": 0.43,
"learning_rate": 7.866797453144005e-05,
"loss": 4.5621,
"step": 466000
},
{
"epoch": 0.43,
"learning_rate": 7.864507773918906e-05,
"loss": 4.5756,
"step": 466500
},
{
"epoch": 0.43,
"learning_rate": 7.862222674052256e-05,
"loss": 4.5697,
"step": 467000
},
{
"epoch": 0.43,
"learning_rate": 7.859932994827157e-05,
"loss": 4.5553,
"step": 467500
},
{
"epoch": 0.43,
"learning_rate": 7.857643315602057e-05,
"loss": 4.5675,
"step": 468000
},
{
"epoch": 0.43,
"learning_rate": 7.855353636376959e-05,
"loss": 4.5694,
"step": 468500
},
{
"epoch": 0.43,
"learning_rate": 7.85306395715186e-05,
"loss": 4.5648,
"step": 469000
},
{
"epoch": 0.43,
"learning_rate": 7.85077427792676e-05,
"loss": 4.5601,
"step": 469500
},
{
"epoch": 0.43,
"learning_rate": 7.848489178060111e-05,
"loss": 4.5576,
"step": 470000
},
{
"epoch": 0.43,
"learning_rate": 7.846199498835011e-05,
"loss": 4.557,
"step": 470500
},
{
"epoch": 0.43,
"learning_rate": 7.843909819609913e-05,
"loss": 4.5717,
"step": 471000
},
{
"epoch": 0.43,
"learning_rate": 7.841624719743264e-05,
"loss": 4.5558,
"step": 471500
},
{
"epoch": 0.43,
"learning_rate": 7.839335040518164e-05,
"loss": 4.5685,
"step": 472000
},
{
"epoch": 0.43,
"learning_rate": 7.837045361293065e-05,
"loss": 4.5667,
"step": 472500
},
{
"epoch": 0.43,
"learning_rate": 7.834755682067965e-05,
"loss": 4.5577,
"step": 473000
},
{
"epoch": 0.43,
"learning_rate": 7.832466002842866e-05,
"loss": 4.5643,
"step": 473500
},
{
"epoch": 0.43,
"learning_rate": 7.830176323617768e-05,
"loss": 4.5669,
"step": 474000
},
{
"epoch": 0.43,
"learning_rate": 7.827886644392668e-05,
"loss": 4.5641,
"step": 474500
},
{
"epoch": 0.44,
"learning_rate": 7.825596965167569e-05,
"loss": 4.5535,
"step": 475000
},
{
"epoch": 0.44,
"learning_rate": 7.823307285942469e-05,
"loss": 4.5644,
"step": 475500
},
{
"epoch": 0.44,
"learning_rate": 7.82102218607582e-05,
"loss": 4.5601,
"step": 476000
},
{
"epoch": 0.44,
"learning_rate": 7.818732506850722e-05,
"loss": 4.5687,
"step": 476500
},
{
"epoch": 0.44,
"learning_rate": 7.816442827625622e-05,
"loss": 4.5485,
"step": 477000
},
{
"epoch": 0.44,
"learning_rate": 7.814153148400523e-05,
"loss": 4.5558,
"step": 477500
},
{
"epoch": 0.44,
"learning_rate": 7.811868048533872e-05,
"loss": 4.5549,
"step": 478000
},
{
"epoch": 0.44,
"learning_rate": 7.809578369308774e-05,
"loss": 4.5604,
"step": 478500
},
{
"epoch": 0.44,
"learning_rate": 7.807288690083674e-05,
"loss": 4.55,
"step": 479000
},
{
"epoch": 0.44,
"learning_rate": 7.804999010858575e-05,
"loss": 4.5581,
"step": 479500
},
{
"epoch": 0.44,
"learning_rate": 7.802709331633475e-05,
"loss": 4.5707,
"step": 480000
},
{
"epoch": 0.44,
"learning_rate": 7.800424231766826e-05,
"loss": 4.5598,
"step": 480500
},
{
"epoch": 0.44,
"learning_rate": 7.798134552541728e-05,
"loss": 4.5585,
"step": 481000
},
{
"epoch": 0.44,
"learning_rate": 7.795844873316628e-05,
"loss": 4.5602,
"step": 481500
},
{
"epoch": 0.44,
"learning_rate": 7.793555194091529e-05,
"loss": 4.5559,
"step": 482000
},
{
"epoch": 0.44,
"learning_rate": 7.791270094224879e-05,
"loss": 4.5481,
"step": 482500
},
{
"epoch": 0.44,
"learning_rate": 7.78898041499978e-05,
"loss": 4.5692,
"step": 483000
},
{
"epoch": 0.44,
"learning_rate": 7.786690735774682e-05,
"loss": 4.5554,
"step": 483500
},
{
"epoch": 0.44,
"learning_rate": 7.784401056549582e-05,
"loss": 4.5558,
"step": 484000
},
{
"epoch": 0.44,
"learning_rate": 7.782111377324483e-05,
"loss": 4.5503,
"step": 484500
},
{
"epoch": 0.44,
"learning_rate": 7.779826277457833e-05,
"loss": 4.5467,
"step": 485000
},
{
"epoch": 0.44,
"learning_rate": 7.777536598232734e-05,
"loss": 4.5649,
"step": 485500
},
{
"epoch": 0.45,
"learning_rate": 7.775246919007635e-05,
"loss": 4.5644,
"step": 486000
},
{
"epoch": 0.45,
"learning_rate": 7.772961819140986e-05,
"loss": 4.5455,
"step": 486500
},
{
"epoch": 0.45,
"learning_rate": 7.770672139915887e-05,
"loss": 4.5709,
"step": 487000
},
{
"epoch": 0.45,
"learning_rate": 7.768382460690787e-05,
"loss": 4.5536,
"step": 487500
},
{
"epoch": 0.45,
"learning_rate": 7.766097360824139e-05,
"loss": 4.5614,
"step": 488000
},
{
"epoch": 0.45,
"learning_rate": 7.76380768159904e-05,
"loss": 4.5719,
"step": 488500
},
{
"epoch": 0.45,
"learning_rate": 7.76151800237394e-05,
"loss": 4.5571,
"step": 489000
},
{
"epoch": 0.45,
"learning_rate": 7.75922832314884e-05,
"loss": 4.5477,
"step": 489500
},
{
"epoch": 0.45,
"learning_rate": 7.756938643923741e-05,
"loss": 4.5465,
"step": 490000
},
{
"epoch": 0.45,
"learning_rate": 7.754648964698641e-05,
"loss": 4.5567,
"step": 490500
},
{
"epoch": 0.45,
"learning_rate": 7.752359285473543e-05,
"loss": 4.5757,
"step": 491000
},
{
"epoch": 0.45,
"learning_rate": 7.750069606248444e-05,
"loss": 4.5574,
"step": 491500
},
{
"epoch": 0.45,
"learning_rate": 7.747779927023344e-05,
"loss": 4.5572,
"step": 492000
},
{
"epoch": 0.45,
"learning_rate": 7.745490247798245e-05,
"loss": 4.5575,
"step": 492500
},
{
"epoch": 0.45,
"learning_rate": 7.743200568573145e-05,
"loss": 4.565,
"step": 493000
},
{
"epoch": 0.45,
"learning_rate": 7.740910889348046e-05,
"loss": 4.5519,
"step": 493500
},
{
"epoch": 0.45,
"learning_rate": 7.738625789481398e-05,
"loss": 4.5427,
"step": 494000
},
{
"epoch": 0.45,
"learning_rate": 7.736336110256298e-05,
"loss": 4.5671,
"step": 494500
},
{
"epoch": 0.45,
"learning_rate": 7.734051010389649e-05,
"loss": 4.5689,
"step": 495000
},
{
"epoch": 0.45,
"learning_rate": 7.73176133116455e-05,
"loss": 4.5583,
"step": 495500
},
{
"epoch": 0.45,
"learning_rate": 7.729471651939451e-05,
"loss": 4.549,
"step": 496000
},
{
"epoch": 0.45,
"learning_rate": 7.727181972714352e-05,
"loss": 4.5552,
"step": 496500
},
{
"epoch": 0.46,
"learning_rate": 7.724892293489252e-05,
"loss": 4.5654,
"step": 497000
},
{
"epoch": 0.46,
"learning_rate": 7.722602614264153e-05,
"loss": 4.5525,
"step": 497500
},
{
"epoch": 0.46,
"learning_rate": 7.720312935039053e-05,
"loss": 4.5559,
"step": 498000
},
{
"epoch": 0.46,
"learning_rate": 7.718023255813954e-05,
"loss": 4.5545,
"step": 498500
},
{
"epoch": 0.46,
"learning_rate": 7.715733576588856e-05,
"loss": 4.5493,
"step": 499000
},
{
"epoch": 0.46,
"learning_rate": 7.713448476722206e-05,
"loss": 4.5495,
"step": 499500
},
{
"epoch": 0.46,
"learning_rate": 7.711158797497107e-05,
"loss": 4.5593,
"step": 500000
},
{
"epoch": 0.46,
"learning_rate": 7.708869118272007e-05,
"loss": 4.5554,
"step": 500500
},
{
"epoch": 0.46,
"learning_rate": 7.706579439046908e-05,
"loss": 4.5548,
"step": 501000
},
{
"epoch": 0.46,
"learning_rate": 7.70428975982181e-05,
"loss": 4.5456,
"step": 501500
},
{
"epoch": 0.46,
"learning_rate": 7.702004659955159e-05,
"loss": 4.5564,
"step": 502000
},
{
"epoch": 0.46,
"learning_rate": 7.699714980730059e-05,
"loss": 4.5602,
"step": 502500
},
{
"epoch": 0.46,
"learning_rate": 7.69742988086341e-05,
"loss": 4.5634,
"step": 503000
},
{
"epoch": 0.46,
"learning_rate": 7.695140201638312e-05,
"loss": 4.563,
"step": 503500
},
{
"epoch": 0.46,
"learning_rate": 7.692850522413212e-05,
"loss": 4.5379,
"step": 504000
},
{
"epoch": 0.46,
"learning_rate": 7.690560843188113e-05,
"loss": 4.5451,
"step": 504500
},
{
"epoch": 0.46,
"learning_rate": 7.688271163963013e-05,
"loss": 4.5509,
"step": 505000
},
{
"epoch": 0.46,
"learning_rate": 7.685981484737914e-05,
"loss": 4.5551,
"step": 505500
},
{
"epoch": 0.46,
"learning_rate": 7.683691805512814e-05,
"loss": 4.5557,
"step": 506000
},
{
"epoch": 0.46,
"learning_rate": 7.681406705646166e-05,
"loss": 4.5332,
"step": 506500
},
{
"epoch": 0.46,
"learning_rate": 7.679117026421067e-05,
"loss": 4.5512,
"step": 507000
},
{
"epoch": 0.46,
"learning_rate": 7.676827347195967e-05,
"loss": 4.543,
"step": 507500
},
{
"epoch": 0.47,
"learning_rate": 7.674537667970868e-05,
"loss": 4.5435,
"step": 508000
},
{
"epoch": 0.47,
"learning_rate": 7.672247988745768e-05,
"loss": 4.543,
"step": 508500
},
{
"epoch": 0.47,
"learning_rate": 7.66995830952067e-05,
"loss": 4.5551,
"step": 509000
},
{
"epoch": 0.47,
"learning_rate": 7.66766863029557e-05,
"loss": 4.5523,
"step": 509500
},
{
"epoch": 0.47,
"learning_rate": 7.665378951070471e-05,
"loss": 4.5601,
"step": 510000
},
{
"epoch": 0.47,
"learning_rate": 7.663089271845372e-05,
"loss": 4.5537,
"step": 510500
},
{
"epoch": 0.47,
"learning_rate": 7.660804171978722e-05,
"loss": 4.5377,
"step": 511000
},
{
"epoch": 0.47,
"learning_rate": 7.658514492753624e-05,
"loss": 4.5348,
"step": 511500
},
{
"epoch": 0.47,
"learning_rate": 7.656224813528524e-05,
"loss": 4.5546,
"step": 512000
},
{
"epoch": 0.47,
"learning_rate": 7.653935134303425e-05,
"loss": 4.5421,
"step": 512500
},
{
"epoch": 0.47,
"learning_rate": 7.651650034436776e-05,
"loss": 4.5403,
"step": 513000
},
{
"epoch": 0.47,
"learning_rate": 7.649364934570127e-05,
"loss": 4.5399,
"step": 513500
},
{
"epoch": 0.47,
"learning_rate": 7.647075255345028e-05,
"loss": 4.5485,
"step": 514000
},
{
"epoch": 0.47,
"learning_rate": 7.644785576119928e-05,
"loss": 4.5399,
"step": 514500
},
{
"epoch": 0.47,
"learning_rate": 7.642495896894829e-05,
"loss": 4.5458,
"step": 515000
},
{
"epoch": 0.47,
"learning_rate": 7.64020621766973e-05,
"loss": 4.5466,
"step": 515500
},
{
"epoch": 0.47,
"learning_rate": 7.63791653844463e-05,
"loss": 4.558,
"step": 516000
},
{
"epoch": 0.47,
"learning_rate": 7.635626859219532e-05,
"loss": 4.5493,
"step": 516500
},
{
"epoch": 0.47,
"learning_rate": 7.633337179994432e-05,
"loss": 4.5411,
"step": 517000
},
{
"epoch": 0.47,
"learning_rate": 7.631052080127783e-05,
"loss": 4.5522,
"step": 517500
},
{
"epoch": 0.47,
"learning_rate": 7.628762400902683e-05,
"loss": 4.538,
"step": 518000
},
{
"epoch": 0.47,
"learning_rate": 7.626472721677584e-05,
"loss": 4.5591,
"step": 518500
},
{
"epoch": 0.48,
"learning_rate": 7.624183042452486e-05,
"loss": 4.538,
"step": 519000
},
{
"epoch": 0.48,
"learning_rate": 7.621893363227386e-05,
"loss": 4.5448,
"step": 519500
},
{
"epoch": 0.48,
"learning_rate": 7.619603684002287e-05,
"loss": 4.5493,
"step": 520000
},
{
"epoch": 0.48,
"learning_rate": 7.617314004777187e-05,
"loss": 4.5587,
"step": 520500
},
{
"epoch": 0.48,
"learning_rate": 7.615024325552088e-05,
"loss": 4.5315,
"step": 521000
},
{
"epoch": 0.48,
"learning_rate": 7.61273922568544e-05,
"loss": 4.5492,
"step": 521500
},
{
"epoch": 0.48,
"learning_rate": 7.61044954646034e-05,
"loss": 4.5423,
"step": 522000
},
{
"epoch": 0.48,
"learning_rate": 7.608159867235241e-05,
"loss": 4.555,
"step": 522500
},
{
"epoch": 0.48,
"learning_rate": 7.605870188010141e-05,
"loss": 4.539,
"step": 523000
},
{
"epoch": 0.48,
"learning_rate": 7.603580508785042e-05,
"loss": 4.5549,
"step": 523500
},
{
"epoch": 0.48,
"learning_rate": 7.601290829559942e-05,
"loss": 4.5437,
"step": 524000
},
{
"epoch": 0.48,
"learning_rate": 7.599001150334844e-05,
"loss": 4.5429,
"step": 524500
},
{
"epoch": 0.48,
"learning_rate": 7.596716050468195e-05,
"loss": 4.5537,
"step": 525000
},
{
"epoch": 0.48,
"learning_rate": 7.594426371243095e-05,
"loss": 4.5464,
"step": 525500
},
{
"epoch": 0.48,
"learning_rate": 7.592136692017996e-05,
"loss": 4.5455,
"step": 526000
},
{
"epoch": 0.48,
"learning_rate": 7.589847012792896e-05,
"loss": 4.55,
"step": 526500
},
{
"epoch": 0.48,
"learning_rate": 7.587557333567797e-05,
"loss": 4.5448,
"step": 527000
},
{
"epoch": 0.48,
"learning_rate": 7.585267654342697e-05,
"loss": 4.5453,
"step": 527500
},
{
"epoch": 0.48,
"learning_rate": 7.582977975117598e-05,
"loss": 4.5503,
"step": 528000
},
{
"epoch": 0.48,
"learning_rate": 7.580688295892498e-05,
"loss": 4.5302,
"step": 528500
},
{
"epoch": 0.48,
"learning_rate": 7.578398616667399e-05,
"loss": 4.5426,
"step": 529000
},
{
"epoch": 0.48,
"learning_rate": 7.5761089374423e-05,
"loss": 4.5538,
"step": 529500
},
{
"epoch": 0.49,
"learning_rate": 7.573823837575651e-05,
"loss": 4.5406,
"step": 530000
},
{
"epoch": 0.49,
"learning_rate": 7.571534158350552e-05,
"loss": 4.5467,
"step": 530500
},
{
"epoch": 0.49,
"learning_rate": 7.569249058483902e-05,
"loss": 4.5473,
"step": 531000
},
{
"epoch": 0.49,
"learning_rate": 7.566959379258804e-05,
"loss": 4.5513,
"step": 531500
},
{
"epoch": 0.49,
"learning_rate": 7.564669700033705e-05,
"loss": 4.5551,
"step": 532000
},
{
"epoch": 0.49,
"learning_rate": 7.562380020808605e-05,
"loss": 4.5501,
"step": 532500
},
{
"epoch": 0.49,
"learning_rate": 7.560090341583506e-05,
"loss": 4.5249,
"step": 533000
},
{
"epoch": 0.49,
"learning_rate": 7.557800662358406e-05,
"loss": 4.5449,
"step": 533500
},
{
"epoch": 0.49,
"learning_rate": 7.555510983133307e-05,
"loss": 4.5346,
"step": 534000
},
{
"epoch": 0.49,
"learning_rate": 7.553221303908208e-05,
"loss": 4.5552,
"step": 534500
},
{
"epoch": 0.49,
"learning_rate": 7.550931624683109e-05,
"loss": 4.5346,
"step": 535000
},
{
"epoch": 0.49,
"learning_rate": 7.54864652481646e-05,
"loss": 4.5524,
"step": 535500
},
{
"epoch": 0.49,
"learning_rate": 7.54635684559136e-05,
"loss": 4.5393,
"step": 536000
},
{
"epoch": 0.49,
"learning_rate": 7.54406716636626e-05,
"loss": 4.5409,
"step": 536500
},
{
"epoch": 0.49,
"learning_rate": 7.541782066499612e-05,
"loss": 4.5425,
"step": 537000
},
{
"epoch": 0.49,
"learning_rate": 7.539492387274513e-05,
"loss": 4.5398,
"step": 537500
},
{
"epoch": 0.49,
"learning_rate": 7.537202708049413e-05,
"loss": 4.5412,
"step": 538000
},
{
"epoch": 0.49,
"learning_rate": 7.534913028824314e-05,
"loss": 4.5556,
"step": 538500
},
{
"epoch": 0.49,
"learning_rate": 7.532627928957664e-05,
"loss": 4.5316,
"step": 539000
},
{
"epoch": 0.49,
"learning_rate": 7.530338249732566e-05,
"loss": 4.546,
"step": 539500
},
{
"epoch": 0.49,
"learning_rate": 7.528048570507467e-05,
"loss": 4.5482,
"step": 540000
},
{
"epoch": 0.5,
"learning_rate": 7.525758891282367e-05,
"loss": 4.5461,
"step": 540500
},
{
"epoch": 0.5,
"learning_rate": 7.523469212057268e-05,
"loss": 4.5496,
"step": 541000
},
{
"epoch": 0.5,
"learning_rate": 7.521184112190618e-05,
"loss": 4.5432,
"step": 541500
},
{
"epoch": 0.5,
"learning_rate": 7.51889443296552e-05,
"loss": 4.5257,
"step": 542000
},
{
"epoch": 0.5,
"learning_rate": 7.516604753740421e-05,
"loss": 4.5619,
"step": 542500
},
{
"epoch": 0.5,
"learning_rate": 7.514315074515321e-05,
"loss": 4.5253,
"step": 543000
},
{
"epoch": 0.5,
"learning_rate": 7.512029974648672e-05,
"loss": 4.538,
"step": 543500
},
{
"epoch": 0.5,
"learning_rate": 7.509740295423572e-05,
"loss": 4.5265,
"step": 544000
},
{
"epoch": 0.5,
"learning_rate": 7.507450616198474e-05,
"loss": 4.5397,
"step": 544500
},
{
"epoch": 0.5,
"learning_rate": 7.505160936973375e-05,
"loss": 4.538,
"step": 545000
},
{
"epoch": 0.5,
"learning_rate": 7.502875837106725e-05,
"loss": 4.5379,
"step": 545500
},
{
"epoch": 0.5,
"learning_rate": 7.500586157881626e-05,
"loss": 4.5465,
"step": 546000
},
{
"epoch": 0.5,
"learning_rate": 7.498296478656526e-05,
"loss": 4.5483,
"step": 546500
},
{
"epoch": 0.5,
"learning_rate": 7.496006799431428e-05,
"loss": 4.5478,
"step": 547000
},
{
"epoch": 0.5,
"learning_rate": 7.493717120206329e-05,
"loss": 4.5451,
"step": 547500
},
{
"epoch": 0.5,
"learning_rate": 7.491427440981229e-05,
"loss": 4.5371,
"step": 548000
},
{
"epoch": 0.5,
"learning_rate": 7.48914234111458e-05,
"loss": 4.5331,
"step": 548500
},
{
"epoch": 0.5,
"learning_rate": 7.48685266188948e-05,
"loss": 4.5357,
"step": 549000
},
{
"epoch": 0.5,
"learning_rate": 7.484562982664382e-05,
"loss": 4.5315,
"step": 549500
},
{
"epoch": 0.5,
"learning_rate": 7.482273303439283e-05,
"loss": 4.5457,
"step": 550000
},
{
"epoch": 0.5,
"learning_rate": 7.479988203572633e-05,
"loss": 4.536,
"step": 550500
},
{
"epoch": 0.5,
"learning_rate": 7.477698524347532e-05,
"loss": 4.538,
"step": 551000
},
{
"epoch": 0.51,
"learning_rate": 7.475408845122434e-05,
"loss": 4.5409,
"step": 551500
},
{
"epoch": 0.51,
"learning_rate": 7.473119165897335e-05,
"loss": 4.5533,
"step": 552000
},
{
"epoch": 0.51,
"learning_rate": 7.470829486672235e-05,
"loss": 4.5393,
"step": 552500
},
{
"epoch": 0.51,
"learning_rate": 7.468539807447136e-05,
"loss": 4.5363,
"step": 553000
},
{
"epoch": 0.51,
"learning_rate": 7.466254707580486e-05,
"loss": 4.5348,
"step": 553500
},
{
"epoch": 0.51,
"learning_rate": 7.463965028355388e-05,
"loss": 4.5322,
"step": 554000
},
{
"epoch": 0.51,
"learning_rate": 7.461675349130289e-05,
"loss": 4.533,
"step": 554500
},
{
"epoch": 0.51,
"learning_rate": 7.459385669905189e-05,
"loss": 4.5346,
"step": 555000
},
{
"epoch": 0.51,
"learning_rate": 7.45709599068009e-05,
"loss": 4.5383,
"step": 555500
},
{
"epoch": 0.51,
"learning_rate": 7.45480631145499e-05,
"loss": 4.5306,
"step": 556000
},
{
"epoch": 0.51,
"learning_rate": 7.45251663222989e-05,
"loss": 4.5262,
"step": 556500
},
{
"epoch": 0.51,
"learning_rate": 7.450226953004792e-05,
"loss": 4.5397,
"step": 557000
},
{
"epoch": 0.51,
"learning_rate": 7.447941853138143e-05,
"loss": 4.5211,
"step": 557500
},
{
"epoch": 0.51,
"learning_rate": 7.445652173913044e-05,
"loss": 4.5246,
"step": 558000
},
{
"epoch": 0.51,
"learning_rate": 7.443362494687944e-05,
"loss": 4.5353,
"step": 558500
},
{
"epoch": 0.51,
"learning_rate": 7.441072815462845e-05,
"loss": 4.5254,
"step": 559000
},
{
"epoch": 0.51,
"learning_rate": 7.438783136237746e-05,
"loss": 4.5233,
"step": 559500
},
{
"epoch": 0.51,
"learning_rate": 7.436498036371097e-05,
"loss": 4.5454,
"step": 560000
},
{
"epoch": 0.51,
"learning_rate": 7.434208357145997e-05,
"loss": 4.5399,
"step": 560500
},
{
"epoch": 0.51,
"learning_rate": 7.431923257279348e-05,
"loss": 4.5343,
"step": 561000
},
{
"epoch": 0.51,
"learning_rate": 7.42963357805425e-05,
"loss": 4.5307,
"step": 561500
},
{
"epoch": 0.51,
"learning_rate": 7.42734389882915e-05,
"loss": 4.5268,
"step": 562000
},
{
"epoch": 0.52,
"learning_rate": 7.425054219604051e-05,
"loss": 4.5428,
"step": 562500
},
{
"epoch": 0.52,
"learning_rate": 7.422764540378951e-05,
"loss": 4.5391,
"step": 563000
},
{
"epoch": 0.52,
"learning_rate": 7.420474861153852e-05,
"loss": 4.5445,
"step": 563500
},
{
"epoch": 0.52,
"learning_rate": 7.418185181928752e-05,
"loss": 4.5354,
"step": 564000
},
{
"epoch": 0.52,
"learning_rate": 7.415895502703654e-05,
"loss": 4.5395,
"step": 564500
},
{
"epoch": 0.52,
"learning_rate": 7.413610402837005e-05,
"loss": 4.5385,
"step": 565000
},
{
"epoch": 0.52,
"learning_rate": 7.411320723611905e-05,
"loss": 4.5325,
"step": 565500
},
{
"epoch": 0.52,
"learning_rate": 7.409031044386806e-05,
"loss": 4.5407,
"step": 566000
},
{
"epoch": 0.52,
"learning_rate": 7.406741365161706e-05,
"loss": 4.5347,
"step": 566500
},
{
"epoch": 0.52,
"learning_rate": 7.404451685936608e-05,
"loss": 4.538,
"step": 567000
},
{
"epoch": 0.52,
"learning_rate": 7.402166586069959e-05,
"loss": 4.5477,
"step": 567500
},
{
"epoch": 0.52,
"learning_rate": 7.399876906844859e-05,
"loss": 4.5279,
"step": 568000
},
{
"epoch": 0.52,
"learning_rate": 7.39758722761976e-05,
"loss": 4.5261,
"step": 568500
},
{
"epoch": 0.52,
"learning_rate": 7.39529754839466e-05,
"loss": 4.5406,
"step": 569000
},
{
"epoch": 0.52,
"learning_rate": 7.393012448528012e-05,
"loss": 4.5364,
"step": 569500
},
{
"epoch": 0.52,
"learning_rate": 7.390722769302913e-05,
"loss": 4.5423,
"step": 570000
},
{
"epoch": 0.52,
"learning_rate": 7.388433090077813e-05,
"loss": 4.5374,
"step": 570500
},
{
"epoch": 0.52,
"learning_rate": 7.386143410852714e-05,
"loss": 4.5263,
"step": 571000
},
{
"epoch": 0.52,
"learning_rate": 7.383858310986064e-05,
"loss": 4.5312,
"step": 571500
},
{
"epoch": 0.52,
"learning_rate": 7.381568631760966e-05,
"loss": 4.5237,
"step": 572000
},
{
"epoch": 0.52,
"learning_rate": 7.379278952535867e-05,
"loss": 4.5272,
"step": 572500
},
{
"epoch": 0.52,
"learning_rate": 7.376989273310767e-05,
"loss": 4.5512,
"step": 573000
},
{
"epoch": 0.53,
"learning_rate": 7.374704173444118e-05,
"loss": 4.543,
"step": 573500
},
{
"epoch": 0.53,
"learning_rate": 7.372414494219018e-05,
"loss": 4.5244,
"step": 574000
},
{
"epoch": 0.53,
"learning_rate": 7.37012939435237e-05,
"loss": 4.5209,
"step": 574500
},
{
"epoch": 0.53,
"learning_rate": 7.367839715127269e-05,
"loss": 4.5363,
"step": 575000
},
{
"epoch": 0.53,
"learning_rate": 7.36555003590217e-05,
"loss": 4.5286,
"step": 575500
},
{
"epoch": 0.53,
"learning_rate": 7.36326035667707e-05,
"loss": 4.54,
"step": 576000
},
{
"epoch": 0.53,
"learning_rate": 7.360970677451972e-05,
"loss": 4.5356,
"step": 576500
},
{
"epoch": 0.53,
"learning_rate": 7.358680998226873e-05,
"loss": 4.5347,
"step": 577000
},
{
"epoch": 0.53,
"learning_rate": 7.356391319001773e-05,
"loss": 4.5134,
"step": 577500
},
{
"epoch": 0.53,
"learning_rate": 7.354101639776674e-05,
"loss": 4.5245,
"step": 578000
},
{
"epoch": 0.53,
"learning_rate": 7.351816539910024e-05,
"loss": 4.5239,
"step": 578500
},
{
"epoch": 0.53,
"learning_rate": 7.349526860684926e-05,
"loss": 4.535,
"step": 579000
},
{
"epoch": 0.53,
"learning_rate": 7.347237181459827e-05,
"loss": 4.5285,
"step": 579500
},
{
"epoch": 0.53,
"learning_rate": 7.344947502234727e-05,
"loss": 4.541,
"step": 580000
},
{
"epoch": 0.53,
"learning_rate": 7.342657823009628e-05,
"loss": 4.5242,
"step": 580500
},
{
"epoch": 0.53,
"learning_rate": 7.340368143784528e-05,
"loss": 4.5236,
"step": 581000
},
{
"epoch": 0.53,
"learning_rate": 7.338078464559429e-05,
"loss": 4.5274,
"step": 581500
},
{
"epoch": 0.53,
"learning_rate": 7.33578878533433e-05,
"loss": 4.5321,
"step": 582000
},
{
"epoch": 0.53,
"learning_rate": 7.333503685467681e-05,
"loss": 4.5282,
"step": 582500
},
{
"epoch": 0.53,
"learning_rate": 7.331214006242581e-05,
"loss": 4.54,
"step": 583000
},
{
"epoch": 0.53,
"learning_rate": 7.328928906375932e-05,
"loss": 4.5283,
"step": 583500
},
{
"epoch": 0.53,
"learning_rate": 7.326639227150834e-05,
"loss": 4.5289,
"step": 584000
},
{
"epoch": 0.54,
"learning_rate": 7.324349547925734e-05,
"loss": 4.539,
"step": 584500
},
{
"epoch": 0.54,
"learning_rate": 7.322059868700635e-05,
"loss": 4.5127,
"step": 585000
},
{
"epoch": 0.54,
"learning_rate": 7.319770189475535e-05,
"loss": 4.5306,
"step": 585500
},
{
"epoch": 0.54,
"learning_rate": 7.317480510250436e-05,
"loss": 4.5276,
"step": 586000
},
{
"epoch": 0.54,
"learning_rate": 7.315190831025336e-05,
"loss": 4.5339,
"step": 586500
},
{
"epoch": 0.54,
"learning_rate": 7.312901151800238e-05,
"loss": 4.5273,
"step": 587000
},
{
"epoch": 0.54,
"learning_rate": 7.310616051933589e-05,
"loss": 4.5375,
"step": 587500
},
{
"epoch": 0.54,
"learning_rate": 7.30832637270849e-05,
"loss": 4.5251,
"step": 588000
},
{
"epoch": 0.54,
"learning_rate": 7.30603669348339e-05,
"loss": 4.5265,
"step": 588500
},
{
"epoch": 0.54,
"learning_rate": 7.30374701425829e-05,
"loss": 4.5367,
"step": 589000
},
{
"epoch": 0.54,
"learning_rate": 7.301457335033192e-05,
"loss": 4.5165,
"step": 589500
},
{
"epoch": 0.54,
"learning_rate": 7.299176814524993e-05,
"loss": 4.5261,
"step": 590000
},
{
"epoch": 0.54,
"learning_rate": 7.296887135299893e-05,
"loss": 4.531,
"step": 590500
},
{
"epoch": 0.54,
"learning_rate": 7.294597456074794e-05,
"loss": 4.5387,
"step": 591000
},
{
"epoch": 0.54,
"learning_rate": 7.292307776849694e-05,
"loss": 4.5228,
"step": 591500
},
{
"epoch": 0.54,
"learning_rate": 7.290018097624596e-05,
"loss": 4.5218,
"step": 592000
},
{
"epoch": 0.54,
"learning_rate": 7.287728418399497e-05,
"loss": 4.5279,
"step": 592500
},
{
"epoch": 0.54,
"learning_rate": 7.285438739174397e-05,
"loss": 4.5236,
"step": 593000
},
{
"epoch": 0.54,
"learning_rate": 7.283149059949298e-05,
"loss": 4.5273,
"step": 593500
},
{
"epoch": 0.54,
"learning_rate": 7.280859380724198e-05,
"loss": 4.5143,
"step": 594000
},
{
"epoch": 0.54,
"learning_rate": 7.27857428085755e-05,
"loss": 4.5206,
"step": 594500
},
{
"epoch": 0.54,
"learning_rate": 7.27628460163245e-05,
"loss": 4.5198,
"step": 595000
},
{
"epoch": 0.55,
"learning_rate": 7.273999501765801e-05,
"loss": 4.5313,
"step": 595500
},
{
"epoch": 0.55,
"learning_rate": 7.271709822540702e-05,
"loss": 4.5313,
"step": 596000
},
{
"epoch": 0.55,
"learning_rate": 7.269420143315602e-05,
"loss": 4.512,
"step": 596500
},
{
"epoch": 0.55,
"learning_rate": 7.267130464090504e-05,
"loss": 4.5199,
"step": 597000
},
{
"epoch": 0.55,
"learning_rate": 7.264840784865405e-05,
"loss": 4.5128,
"step": 597500
},
{
"epoch": 0.55,
"learning_rate": 7.262551105640305e-05,
"loss": 4.52,
"step": 598000
},
{
"epoch": 0.55,
"learning_rate": 7.260261426415206e-05,
"loss": 4.5229,
"step": 598500
},
{
"epoch": 0.55,
"learning_rate": 7.257971747190106e-05,
"loss": 4.5219,
"step": 599000
},
{
"epoch": 0.55,
"learning_rate": 7.255682067965007e-05,
"loss": 4.526,
"step": 599500
},
{
"epoch": 0.55,
"learning_rate": 7.253396968098357e-05,
"loss": 4.5289,
"step": 600000
},
{
"epoch": 0.55,
"learning_rate": 7.251111868231708e-05,
"loss": 4.5284,
"step": 600500
},
{
"epoch": 0.55,
"learning_rate": 7.248822189006608e-05,
"loss": 4.5254,
"step": 601000
},
{
"epoch": 0.55,
"learning_rate": 7.24653250978151e-05,
"loss": 4.5253,
"step": 601500
},
{
"epoch": 0.55,
"learning_rate": 7.24424283055641e-05,
"loss": 4.5205,
"step": 602000
},
{
"epoch": 0.55,
"learning_rate": 7.241953151331311e-05,
"loss": 4.5154,
"step": 602500
},
{
"epoch": 0.55,
"learning_rate": 7.239668051464662e-05,
"loss": 4.5257,
"step": 603000
},
{
"epoch": 0.55,
"learning_rate": 7.237378372239562e-05,
"loss": 4.5307,
"step": 603500
},
{
"epoch": 0.55,
"learning_rate": 7.235088693014464e-05,
"loss": 4.519,
"step": 604000
},
{
"epoch": 0.55,
"learning_rate": 7.232799013789365e-05,
"loss": 4.5236,
"step": 604500
},
{
"epoch": 0.55,
"learning_rate": 7.230509334564265e-05,
"loss": 4.5399,
"step": 605000
},
{
"epoch": 0.55,
"learning_rate": 7.228219655339166e-05,
"loss": 4.511,
"step": 605500
},
{
"epoch": 0.56,
"learning_rate": 7.225929976114066e-05,
"loss": 4.5265,
"step": 606000
},
{
"epoch": 0.56,
"learning_rate": 7.223644876247418e-05,
"loss": 4.5211,
"step": 606500
},
{
"epoch": 0.56,
"learning_rate": 7.221355197022318e-05,
"loss": 4.5213,
"step": 607000
},
{
"epoch": 0.56,
"learning_rate": 7.219065517797219e-05,
"loss": 4.5197,
"step": 607500
},
{
"epoch": 0.56,
"learning_rate": 7.21677583857212e-05,
"loss": 4.5213,
"step": 608000
},
{
"epoch": 0.56,
"learning_rate": 7.21448615934702e-05,
"loss": 4.52,
"step": 608500
},
{
"epoch": 0.56,
"learning_rate": 7.21219648012192e-05,
"loss": 4.5163,
"step": 609000
},
{
"epoch": 0.56,
"learning_rate": 7.209906800896822e-05,
"loss": 4.5028,
"step": 609500
},
{
"epoch": 0.56,
"learning_rate": 7.207617121671723e-05,
"loss": 4.5231,
"step": 610000
},
{
"epoch": 0.56,
"learning_rate": 7.205332021805073e-05,
"loss": 4.5349,
"step": 610500
},
{
"epoch": 0.56,
"learning_rate": 7.203042342579974e-05,
"loss": 4.5125,
"step": 611000
},
{
"epoch": 0.56,
"learning_rate": 7.200752663354874e-05,
"loss": 4.53,
"step": 611500
},
{
"epoch": 0.56,
"learning_rate": 7.198462984129776e-05,
"loss": 4.5175,
"step": 612000
},
{
"epoch": 0.56,
"learning_rate": 7.196177884263127e-05,
"loss": 4.5107,
"step": 612500
},
{
"epoch": 0.56,
"learning_rate": 7.193888205038027e-05,
"loss": 4.5227,
"step": 613000
},
{
"epoch": 0.56,
"learning_rate": 7.191598525812928e-05,
"loss": 4.5187,
"step": 613500
},
{
"epoch": 0.56,
"learning_rate": 7.189308846587828e-05,
"loss": 4.5076,
"step": 614000
},
{
"epoch": 0.56,
"learning_rate": 7.18701916736273e-05,
"loss": 4.5177,
"step": 614500
},
{
"epoch": 0.56,
"learning_rate": 7.184729488137631e-05,
"loss": 4.5217,
"step": 615000
},
{
"epoch": 0.56,
"learning_rate": 7.182439808912531e-05,
"loss": 4.5233,
"step": 615500
},
{
"epoch": 0.56,
"learning_rate": 7.180150129687432e-05,
"loss": 4.517,
"step": 616000
},
{
"epoch": 0.56,
"learning_rate": 7.177865029820782e-05,
"loss": 4.5305,
"step": 616500
},
{
"epoch": 0.57,
"learning_rate": 7.175575350595684e-05,
"loss": 4.5206,
"step": 617000
},
{
"epoch": 0.57,
"learning_rate": 7.173285671370585e-05,
"loss": 4.532,
"step": 617500
},
{
"epoch": 0.57,
"learning_rate": 7.170995992145485e-05,
"loss": 4.5226,
"step": 618000
},
{
"epoch": 0.57,
"learning_rate": 7.168710892278836e-05,
"loss": 4.5306,
"step": 618500
},
{
"epoch": 0.57,
"learning_rate": 7.166421213053736e-05,
"loss": 4.5267,
"step": 619000
},
{
"epoch": 0.57,
"learning_rate": 7.164131533828638e-05,
"loss": 4.5171,
"step": 619500
},
{
"epoch": 0.57,
"learning_rate": 7.161841854603539e-05,
"loss": 4.5136,
"step": 620000
},
{
"epoch": 0.57,
"learning_rate": 7.159552175378439e-05,
"loss": 4.5101,
"step": 620500
},
{
"epoch": 0.57,
"learning_rate": 7.15726707551179e-05,
"loss": 4.538,
"step": 621000
},
{
"epoch": 0.57,
"learning_rate": 7.15497739628669e-05,
"loss": 4.5257,
"step": 621500
},
{
"epoch": 0.57,
"learning_rate": 7.152687717061591e-05,
"loss": 4.5231,
"step": 622000
},
{
"epoch": 0.57,
"learning_rate": 7.150398037836493e-05,
"loss": 4.5128,
"step": 622500
},
{
"epoch": 0.57,
"learning_rate": 7.148112937969843e-05,
"loss": 4.5193,
"step": 623000
},
{
"epoch": 0.57,
"learning_rate": 7.145823258744744e-05,
"loss": 4.5111,
"step": 623500
},
{
"epoch": 0.57,
"learning_rate": 7.143533579519643e-05,
"loss": 4.5255,
"step": 624000
},
{
"epoch": 0.57,
"learning_rate": 7.141243900294545e-05,
"loss": 4.5311,
"step": 624500
},
{
"epoch": 0.57,
"learning_rate": 7.138958800427895e-05,
"loss": 4.5211,
"step": 625000
},
{
"epoch": 0.57,
"learning_rate": 7.136669121202796e-05,
"loss": 4.5225,
"step": 625500
},
{
"epoch": 0.57,
"learning_rate": 7.134379441977696e-05,
"loss": 4.5101,
"step": 626000
},
{
"epoch": 0.57,
"learning_rate": 7.132089762752597e-05,
"loss": 4.517,
"step": 626500
},
{
"epoch": 0.57,
"learning_rate": 7.129800083527499e-05,
"loss": 4.5263,
"step": 627000
},
{
"epoch": 0.57,
"learning_rate": 7.127514983660849e-05,
"loss": 4.5069,
"step": 627500
},
{
"epoch": 0.58,
"learning_rate": 7.12522530443575e-05,
"loss": 4.5215,
"step": 628000
},
{
"epoch": 0.58,
"learning_rate": 7.12293562521065e-05,
"loss": 4.543,
"step": 628500
},
{
"epoch": 0.58,
"learning_rate": 7.12064594598555e-05,
"loss": 4.5266,
"step": 629000
},
{
"epoch": 0.58,
"learning_rate": 7.118356266760452e-05,
"loss": 4.5198,
"step": 629500
},
{
"epoch": 0.58,
"learning_rate": 7.116071166893803e-05,
"loss": 4.504,
"step": 630000
},
{
"epoch": 0.58,
"learning_rate": 7.113781487668704e-05,
"loss": 4.5098,
"step": 630500
},
{
"epoch": 0.58,
"learning_rate": 7.111491808443604e-05,
"loss": 4.5104,
"step": 631000
},
{
"epoch": 0.58,
"learning_rate": 7.109202129218505e-05,
"loss": 4.5162,
"step": 631500
},
{
"epoch": 0.58,
"learning_rate": 7.106912449993406e-05,
"loss": 4.5249,
"step": 632000
},
{
"epoch": 0.58,
"learning_rate": 7.104627350126757e-05,
"loss": 4.5211,
"step": 632500
},
{
"epoch": 0.58,
"learning_rate": 7.102337670901657e-05,
"loss": 4.5093,
"step": 633000
},
{
"epoch": 0.58,
"learning_rate": 7.100047991676558e-05,
"loss": 4.5225,
"step": 633500
},
{
"epoch": 0.58,
"learning_rate": 7.097758312451458e-05,
"loss": 4.5136,
"step": 634000
},
{
"epoch": 0.58,
"learning_rate": 7.09546863322636e-05,
"loss": 4.5198,
"step": 634500
},
{
"epoch": 0.58,
"learning_rate": 7.093178954001261e-05,
"loss": 4.513,
"step": 635000
},
{
"epoch": 0.58,
"learning_rate": 7.090893854134611e-05,
"loss": 4.5039,
"step": 635500
},
{
"epoch": 0.58,
"learning_rate": 7.088604174909512e-05,
"loss": 4.5081,
"step": 636000
},
{
"epoch": 0.58,
"learning_rate": 7.086314495684412e-05,
"loss": 4.5174,
"step": 636500
},
{
"epoch": 0.58,
"learning_rate": 7.084024816459314e-05,
"loss": 4.5198,
"step": 637000
},
{
"epoch": 0.58,
"learning_rate": 7.081739716592665e-05,
"loss": 4.4917,
"step": 637500
},
{
"epoch": 0.58,
"learning_rate": 7.079454616726015e-05,
"loss": 4.5467,
"step": 638000
},
{
"epoch": 0.58,
"learning_rate": 7.077164937500916e-05,
"loss": 4.5143,
"step": 638500
},
{
"epoch": 0.59,
"learning_rate": 7.074875258275816e-05,
"loss": 4.5066,
"step": 639000
},
{
"epoch": 0.59,
"learning_rate": 7.072585579050718e-05,
"loss": 4.5082,
"step": 639500
},
{
"epoch": 0.59,
"learning_rate": 7.070300479184069e-05,
"loss": 4.5381,
"step": 640000
},
{
"epoch": 0.59,
"learning_rate": 7.06801079995897e-05,
"loss": 4.5149,
"step": 640500
},
{
"epoch": 0.59,
"learning_rate": 7.06572112073387e-05,
"loss": 4.5167,
"step": 641000
},
{
"epoch": 0.59,
"learning_rate": 7.06343144150877e-05,
"loss": 4.5193,
"step": 641500
},
{
"epoch": 0.59,
"learning_rate": 7.061141762283672e-05,
"loss": 4.512,
"step": 642000
},
{
"epoch": 0.59,
"learning_rate": 7.058852083058573e-05,
"loss": 4.5206,
"step": 642500
},
{
"epoch": 0.59,
"learning_rate": 7.056566983191923e-05,
"loss": 4.5208,
"step": 643000
},
{
"epoch": 0.59,
"learning_rate": 7.054277303966824e-05,
"loss": 4.5358,
"step": 643500
},
{
"epoch": 0.59,
"learning_rate": 7.051987624741724e-05,
"loss": 4.5192,
"step": 644000
},
{
"epoch": 0.59,
"learning_rate": 7.049697945516626e-05,
"loss": 4.5143,
"step": 644500
},
{
"epoch": 0.59,
"learning_rate": 7.047408266291527e-05,
"loss": 4.5162,
"step": 645000
},
{
"epoch": 0.59,
"learning_rate": 7.045118587066427e-05,
"loss": 4.5074,
"step": 645500
},
{
"epoch": 0.59,
"learning_rate": 7.042828907841328e-05,
"loss": 4.5297,
"step": 646000
},
{
"epoch": 0.59,
"learning_rate": 7.040539228616228e-05,
"loss": 4.5012,
"step": 646500
},
{
"epoch": 0.59,
"learning_rate": 7.038249549391129e-05,
"loss": 4.5117,
"step": 647000
},
{
"epoch": 0.59,
"learning_rate": 7.03596444952448e-05,
"loss": 4.5197,
"step": 647500
},
{
"epoch": 0.59,
"learning_rate": 7.03367477029938e-05,
"loss": 4.5106,
"step": 648000
},
{
"epoch": 0.59,
"learning_rate": 7.03138967043273e-05,
"loss": 4.4993,
"step": 648500
},
{
"epoch": 0.59,
"learning_rate": 7.029099991207632e-05,
"loss": 4.5277,
"step": 649000
},
{
"epoch": 0.59,
"learning_rate": 7.026810311982533e-05,
"loss": 4.5132,
"step": 649500
},
{
"epoch": 0.6,
"learning_rate": 7.024520632757433e-05,
"loss": 4.53,
"step": 650000
},
{
"epoch": 0.6,
"learning_rate": 7.022230953532334e-05,
"loss": 4.5219,
"step": 650500
},
{
"epoch": 0.6,
"learning_rate": 7.019941274307234e-05,
"loss": 4.5085,
"step": 651000
},
{
"epoch": 0.6,
"learning_rate": 7.017651595082135e-05,
"loss": 4.5004,
"step": 651500
},
{
"epoch": 0.6,
"learning_rate": 7.015361915857037e-05,
"loss": 4.5118,
"step": 652000
},
{
"epoch": 0.6,
"learning_rate": 7.013072236631937e-05,
"loss": 4.4977,
"step": 652500
},
{
"epoch": 0.6,
"learning_rate": 7.010787136765288e-05,
"loss": 4.5102,
"step": 653000
},
{
"epoch": 0.6,
"learning_rate": 7.008502036898638e-05,
"loss": 4.5065,
"step": 653500
},
{
"epoch": 0.6,
"learning_rate": 7.00621235767354e-05,
"loss": 4.5124,
"step": 654000
},
{
"epoch": 0.6,
"learning_rate": 7.00392267844844e-05,
"loss": 4.5077,
"step": 654500
},
{
"epoch": 0.6,
"learning_rate": 7.001632999223341e-05,
"loss": 4.5166,
"step": 655000
},
{
"epoch": 0.6,
"learning_rate": 6.999343319998242e-05,
"loss": 4.51,
"step": 655500
},
{
"epoch": 0.6,
"learning_rate": 6.997053640773142e-05,
"loss": 4.512,
"step": 656000
},
{
"epoch": 0.6,
"learning_rate": 6.994763961548043e-05,
"loss": 4.5031,
"step": 656500
},
{
"epoch": 0.6,
"learning_rate": 6.992474282322944e-05,
"loss": 4.5069,
"step": 657000
},
{
"epoch": 0.6,
"learning_rate": 6.990184603097845e-05,
"loss": 4.5101,
"step": 657500
},
{
"epoch": 0.6,
"learning_rate": 6.987894923872745e-05,
"loss": 4.5034,
"step": 658000
},
{
"epoch": 0.6,
"learning_rate": 6.985605244647646e-05,
"loss": 4.5065,
"step": 658500
},
{
"epoch": 0.6,
"learning_rate": 6.983315565422546e-05,
"loss": 4.5145,
"step": 659000
},
{
"epoch": 0.6,
"learning_rate": 6.981030465555898e-05,
"loss": 4.4976,
"step": 659500
},
{
"epoch": 0.6,
"learning_rate": 6.978740786330799e-05,
"loss": 4.5073,
"step": 660000
},
{
"epoch": 0.6,
"learning_rate": 6.9764511071057e-05,
"loss": 4.5103,
"step": 660500
},
{
"epoch": 0.61,
"learning_rate": 6.97416600723905e-05,
"loss": 4.5107,
"step": 661000
},
{
"epoch": 0.61,
"learning_rate": 6.97187632801395e-05,
"loss": 4.5172,
"step": 661500
},
{
"epoch": 0.61,
"learning_rate": 6.969586648788852e-05,
"loss": 4.5104,
"step": 662000
},
{
"epoch": 0.61,
"learning_rate": 6.967296969563753e-05,
"loss": 4.5057,
"step": 662500
},
{
"epoch": 0.61,
"learning_rate": 6.965007290338653e-05,
"loss": 4.5057,
"step": 663000
},
{
"epoch": 0.61,
"learning_rate": 6.962717611113554e-05,
"loss": 4.5067,
"step": 663500
},
{
"epoch": 0.61,
"learning_rate": 6.960427931888454e-05,
"loss": 4.5054,
"step": 664000
},
{
"epoch": 0.61,
"learning_rate": 6.958138252663355e-05,
"loss": 4.5127,
"step": 664500
},
{
"epoch": 0.61,
"learning_rate": 6.955853152796707e-05,
"loss": 4.5075,
"step": 665000
},
{
"epoch": 0.61,
"learning_rate": 6.953563473571607e-05,
"loss": 4.5255,
"step": 665500
},
{
"epoch": 0.61,
"learning_rate": 6.951278373704958e-05,
"loss": 4.5209,
"step": 666000
},
{
"epoch": 0.61,
"learning_rate": 6.948988694479858e-05,
"loss": 4.5176,
"step": 666500
},
{
"epoch": 0.61,
"learning_rate": 6.94669901525476e-05,
"loss": 4.5111,
"step": 667000
},
{
"epoch": 0.61,
"learning_rate": 6.94440933602966e-05,
"loss": 4.5078,
"step": 667500
},
{
"epoch": 0.61,
"learning_rate": 6.942124236163011e-05,
"loss": 4.5194,
"step": 668000
},
{
"epoch": 0.61,
"learning_rate": 6.939843715654812e-05,
"loss": 4.5099,
"step": 668500
},
{
"epoch": 0.61,
"learning_rate": 6.937554036429714e-05,
"loss": 4.5164,
"step": 669000
},
{
"epoch": 0.61,
"learning_rate": 6.935264357204614e-05,
"loss": 4.5084,
"step": 669500
},
{
"epoch": 0.61,
"learning_rate": 6.932974677979515e-05,
"loss": 4.5003,
"step": 670000
},
{
"epoch": 0.61,
"learning_rate": 6.930684998754415e-05,
"loss": 4.5066,
"step": 670500
},
{
"epoch": 0.61,
"learning_rate": 6.928395319529316e-05,
"loss": 4.5052,
"step": 671000
},
{
"epoch": 0.62,
"learning_rate": 6.926105640304216e-05,
"loss": 4.5154,
"step": 671500
},
{
"epoch": 0.62,
"learning_rate": 6.923815961079117e-05,
"loss": 4.5067,
"step": 672000
},
{
"epoch": 0.62,
"learning_rate": 6.921526281854017e-05,
"loss": 4.5109,
"step": 672500
},
{
"epoch": 0.62,
"learning_rate": 6.919236602628918e-05,
"loss": 4.5085,
"step": 673000
},
{
"epoch": 0.62,
"learning_rate": 6.916946923403818e-05,
"loss": 4.5122,
"step": 673500
},
{
"epoch": 0.62,
"learning_rate": 6.914657244178719e-05,
"loss": 4.5164,
"step": 674000
},
{
"epoch": 0.62,
"learning_rate": 6.91236756495362e-05,
"loss": 4.5074,
"step": 674500
},
{
"epoch": 0.62,
"learning_rate": 6.910077885728521e-05,
"loss": 4.5002,
"step": 675000
},
{
"epoch": 0.62,
"learning_rate": 6.907792785861872e-05,
"loss": 4.5122,
"step": 675500
},
{
"epoch": 0.62,
"learning_rate": 6.905503106636772e-05,
"loss": 4.5123,
"step": 676000
},
{
"epoch": 0.62,
"learning_rate": 6.903213427411673e-05,
"loss": 4.4983,
"step": 676500
},
{
"epoch": 0.62,
"learning_rate": 6.900923748186575e-05,
"loss": 4.4989,
"step": 677000
},
{
"epoch": 0.62,
"learning_rate": 6.898634068961475e-05,
"loss": 4.5122,
"step": 677500
},
{
"epoch": 0.62,
"learning_rate": 6.896344389736376e-05,
"loss": 4.5116,
"step": 678000
},
{
"epoch": 0.62,
"learning_rate": 6.894054710511276e-05,
"loss": 4.4961,
"step": 678500
},
{
"epoch": 0.62,
"learning_rate": 6.891765031286177e-05,
"loss": 4.5088,
"step": 679000
},
{
"epoch": 0.62,
"learning_rate": 6.889475352061078e-05,
"loss": 4.5033,
"step": 679500
},
{
"epoch": 0.62,
"learning_rate": 6.887190252194429e-05,
"loss": 4.5126,
"step": 680000
},
{
"epoch": 0.62,
"learning_rate": 6.88490057296933e-05,
"loss": 4.4891,
"step": 680500
},
{
"epoch": 0.62,
"learning_rate": 6.88261089374423e-05,
"loss": 4.5024,
"step": 681000
},
{
"epoch": 0.62,
"learning_rate": 6.88032121451913e-05,
"loss": 4.5105,
"step": 681500
},
{
"epoch": 0.62,
"learning_rate": 6.878031535294031e-05,
"loss": 4.5002,
"step": 682000
},
{
"epoch": 0.63,
"learning_rate": 6.875746435427383e-05,
"loss": 4.5085,
"step": 682500
},
{
"epoch": 0.63,
"learning_rate": 6.873456756202283e-05,
"loss": 4.5095,
"step": 683000
},
{
"epoch": 0.63,
"learning_rate": 6.871171656335634e-05,
"loss": 4.5004,
"step": 683500
},
{
"epoch": 0.63,
"learning_rate": 6.868881977110534e-05,
"loss": 4.4968,
"step": 684000
},
{
"epoch": 0.63,
"learning_rate": 6.866592297885436e-05,
"loss": 4.5241,
"step": 684500
},
{
"epoch": 0.63,
"learning_rate": 6.864302618660337e-05,
"loss": 4.5098,
"step": 685000
},
{
"epoch": 0.63,
"learning_rate": 6.862012939435237e-05,
"loss": 4.5173,
"step": 685500
},
{
"epoch": 0.63,
"learning_rate": 6.859723260210138e-05,
"loss": 4.5197,
"step": 686000
},
{
"epoch": 0.63,
"learning_rate": 6.857433580985038e-05,
"loss": 4.5042,
"step": 686500
},
{
"epoch": 0.63,
"learning_rate": 6.85514848111839e-05,
"loss": 4.4914,
"step": 687000
},
{
"epoch": 0.63,
"learning_rate": 6.852858801893291e-05,
"loss": 4.5056,
"step": 687500
},
{
"epoch": 0.63,
"learning_rate": 6.850569122668191e-05,
"loss": 4.5033,
"step": 688000
},
{
"epoch": 0.63,
"learning_rate": 6.848279443443092e-05,
"loss": 4.5088,
"step": 688500
},
{
"epoch": 0.63,
"learning_rate": 6.845989764217992e-05,
"loss": 4.5133,
"step": 689000
},
{
"epoch": 0.63,
"learning_rate": 6.843704664351344e-05,
"loss": 4.4881,
"step": 689500
},
{
"epoch": 0.63,
"learning_rate": 6.841414985126245e-05,
"loss": 4.5163,
"step": 690000
},
{
"epoch": 0.63,
"learning_rate": 6.839125305901145e-05,
"loss": 4.4941,
"step": 690500
},
{
"epoch": 0.63,
"learning_rate": 6.836835626676046e-05,
"loss": 4.5006,
"step": 691000
},
{
"epoch": 0.63,
"learning_rate": 6.834545947450946e-05,
"loss": 4.5095,
"step": 691500
},
{
"epoch": 0.63,
"learning_rate": 6.832256268225847e-05,
"loss": 4.5001,
"step": 692000
},
{
"epoch": 0.63,
"learning_rate": 6.829966589000749e-05,
"loss": 4.5061,
"step": 692500
},
{
"epoch": 0.63,
"learning_rate": 6.827676909775649e-05,
"loss": 4.4967,
"step": 693000
},
{
"epoch": 0.64,
"learning_rate": 6.825391809909e-05,
"loss": 4.4954,
"step": 693500
},
{
"epoch": 0.64,
"learning_rate": 6.8231021306839e-05,
"loss": 4.505,
"step": 694000
},
{
"epoch": 0.64,
"learning_rate": 6.8208124514588e-05,
"loss": 4.5094,
"step": 694500
},
{
"epoch": 0.64,
"learning_rate": 6.818522772233703e-05,
"loss": 4.5081,
"step": 695000
},
{
"epoch": 0.64,
"learning_rate": 6.816233093008603e-05,
"loss": 4.5247,
"step": 695500
},
{
"epoch": 0.64,
"learning_rate": 6.813943413783504e-05,
"loss": 4.5025,
"step": 696000
},
{
"epoch": 0.64,
"learning_rate": 6.811653734558404e-05,
"loss": 4.4861,
"step": 696500
},
{
"epoch": 0.64,
"learning_rate": 6.809364055333305e-05,
"loss": 4.5081,
"step": 697000
},
{
"epoch": 0.64,
"learning_rate": 6.807074376108205e-05,
"loss": 4.4994,
"step": 697500
},
{
"epoch": 0.64,
"learning_rate": 6.804789276241556e-05,
"loss": 4.4941,
"step": 698000
},
{
"epoch": 0.64,
"learning_rate": 6.802499597016456e-05,
"loss": 4.5034,
"step": 698500
},
{
"epoch": 0.64,
"learning_rate": 6.800214497149807e-05,
"loss": 4.4934,
"step": 699000
},
{
"epoch": 0.64,
"learning_rate": 6.797924817924709e-05,
"loss": 4.5041,
"step": 699500
},
{
"epoch": 0.64,
"learning_rate": 6.795635138699609e-05,
"loss": 4.5051,
"step": 700000
},
{
"epoch": 0.64,
"learning_rate": 6.79334545947451e-05,
"loss": 4.5111,
"step": 700500
},
{
"epoch": 0.64,
"learning_rate": 6.79105578024941e-05,
"loss": 4.4981,
"step": 701000
},
{
"epoch": 0.64,
"learning_rate": 6.78876610102431e-05,
"loss": 4.5036,
"step": 701500
},
{
"epoch": 0.64,
"learning_rate": 6.786476421799211e-05,
"loss": 4.5069,
"step": 702000
},
{
"epoch": 0.64,
"learning_rate": 6.784191321932563e-05,
"loss": 4.4999,
"step": 702500
},
{
"epoch": 0.64,
"learning_rate": 6.781901642707463e-05,
"loss": 4.5134,
"step": 703000
},
{
"epoch": 0.64,
"learning_rate": 6.779611963482364e-05,
"loss": 4.4886,
"step": 703500
},
{
"epoch": 0.64,
"learning_rate": 6.777322284257264e-05,
"loss": 4.5002,
"step": 704000
},
{
"epoch": 0.65,
"learning_rate": 6.775032605032165e-05,
"loss": 4.5067,
"step": 704500
},
{
"epoch": 0.65,
"learning_rate": 6.772742925807067e-05,
"loss": 4.5094,
"step": 705000
},
{
"epoch": 0.65,
"learning_rate": 6.770453246581967e-05,
"loss": 4.5038,
"step": 705500
},
{
"epoch": 0.65,
"learning_rate": 6.768163567356868e-05,
"loss": 4.5023,
"step": 706000
},
{
"epoch": 0.65,
"learning_rate": 6.765873888131768e-05,
"loss": 4.4943,
"step": 706500
},
{
"epoch": 0.65,
"learning_rate": 6.763588788265119e-05,
"loss": 4.5032,
"step": 707000
},
{
"epoch": 0.65,
"learning_rate": 6.761303688398471e-05,
"loss": 4.5062,
"step": 707500
},
{
"epoch": 0.65,
"learning_rate": 6.759014009173371e-05,
"loss": 4.5056,
"step": 708000
},
{
"epoch": 0.65,
"learning_rate": 6.756724329948272e-05,
"loss": 4.4967,
"step": 708500
},
{
"epoch": 0.65,
"learning_rate": 6.754434650723172e-05,
"loss": 4.4952,
"step": 709000
},
{
"epoch": 0.65,
"learning_rate": 6.752144971498073e-05,
"loss": 4.4906,
"step": 709500
},
{
"epoch": 0.65,
"learning_rate": 6.749855292272975e-05,
"loss": 4.4871,
"step": 710000
},
{
"epoch": 0.65,
"learning_rate": 6.747565613047875e-05,
"loss": 4.5065,
"step": 710500
},
{
"epoch": 0.65,
"learning_rate": 6.745280513181226e-05,
"loss": 4.5009,
"step": 711000
},
{
"epoch": 0.65,
"learning_rate": 6.742990833956126e-05,
"loss": 4.486,
"step": 711500
},
{
"epoch": 0.65,
"learning_rate": 6.740701154731027e-05,
"loss": 4.4875,
"step": 712000
},
{
"epoch": 0.65,
"learning_rate": 6.738411475505929e-05,
"loss": 4.5009,
"step": 712500
},
{
"epoch": 0.65,
"learning_rate": 6.736121796280829e-05,
"loss": 4.4956,
"step": 713000
},
{
"epoch": 0.65,
"learning_rate": 6.73383211705573e-05,
"loss": 4.5123,
"step": 713500
},
{
"epoch": 0.65,
"learning_rate": 6.73154701718908e-05,
"loss": 4.4769,
"step": 714000
},
{
"epoch": 0.65,
"learning_rate": 6.729257337963981e-05,
"loss": 4.4848,
"step": 714500
},
{
"epoch": 0.65,
"learning_rate": 6.726967658738881e-05,
"loss": 4.5009,
"step": 715000
},
{
"epoch": 0.66,
"learning_rate": 6.724677979513783e-05,
"loss": 4.5041,
"step": 715500
},
{
"epoch": 0.66,
"learning_rate": 6.722388300288684e-05,
"loss": 4.5043,
"step": 716000
},
{
"epoch": 0.66,
"learning_rate": 6.720103200422034e-05,
"loss": 4.5016,
"step": 716500
},
{
"epoch": 0.66,
"learning_rate": 6.717818100555385e-05,
"loss": 4.509,
"step": 717000
},
{
"epoch": 0.66,
"learning_rate": 6.715528421330287e-05,
"loss": 4.4817,
"step": 717500
},
{
"epoch": 0.66,
"learning_rate": 6.713238742105187e-05,
"loss": 4.4877,
"step": 718000
},
{
"epoch": 0.66,
"learning_rate": 6.710949062880088e-05,
"loss": 4.5078,
"step": 718500
},
{
"epoch": 0.66,
"learning_rate": 6.708659383654988e-05,
"loss": 4.5087,
"step": 719000
},
{
"epoch": 0.66,
"learning_rate": 6.706369704429889e-05,
"loss": 4.4927,
"step": 719500
},
{
"epoch": 0.66,
"learning_rate": 6.704080025204789e-05,
"loss": 4.4916,
"step": 720000
},
{
"epoch": 0.66,
"learning_rate": 6.701790345979691e-05,
"loss": 4.4967,
"step": 720500
},
{
"epoch": 0.66,
"learning_rate": 6.699500666754591e-05,
"loss": 4.5194,
"step": 721000
},
{
"epoch": 0.66,
"learning_rate": 6.697210987529492e-05,
"loss": 4.4931,
"step": 721500
},
{
"epoch": 0.66,
"learning_rate": 6.694921308304391e-05,
"loss": 4.5015,
"step": 722000
},
{
"epoch": 0.66,
"learning_rate": 6.692631629079292e-05,
"loss": 4.4921,
"step": 722500
},
{
"epoch": 0.66,
"learning_rate": 6.690346529212644e-05,
"loss": 4.4961,
"step": 723000
},
{
"epoch": 0.66,
"learning_rate": 6.688056849987544e-05,
"loss": 4.5106,
"step": 723500
},
{
"epoch": 0.66,
"learning_rate": 6.685767170762445e-05,
"loss": 4.5038,
"step": 724000
},
{
"epoch": 0.66,
"learning_rate": 6.683477491537345e-05,
"loss": 4.4953,
"step": 724500
},
{
"epoch": 0.66,
"learning_rate": 6.681192391670697e-05,
"loss": 4.5079,
"step": 725000
},
{
"epoch": 0.66,
"learning_rate": 6.678907291804048e-05,
"loss": 4.4942,
"step": 725500
},
{
"epoch": 0.66,
"learning_rate": 6.676617612578948e-05,
"loss": 4.4954,
"step": 726000
},
{
"epoch": 0.67,
"learning_rate": 6.674327933353849e-05,
"loss": 4.5005,
"step": 726500
},
{
"epoch": 0.67,
"learning_rate": 6.672038254128749e-05,
"loss": 4.4861,
"step": 727000
},
{
"epoch": 0.67,
"learning_rate": 6.669748574903651e-05,
"loss": 4.5135,
"step": 727500
},
{
"epoch": 0.67,
"learning_rate": 6.667463475037001e-05,
"loss": 4.5118,
"step": 728000
},
{
"epoch": 0.67,
"learning_rate": 6.665173795811902e-05,
"loss": 4.4936,
"step": 728500
},
{
"epoch": 0.67,
"learning_rate": 6.662884116586802e-05,
"loss": 4.4923,
"step": 729000
},
{
"epoch": 0.67,
"learning_rate": 6.660594437361703e-05,
"loss": 4.509,
"step": 729500
},
{
"epoch": 0.67,
"learning_rate": 6.658304758136605e-05,
"loss": 4.5036,
"step": 730000
},
{
"epoch": 0.67,
"learning_rate": 6.656015078911505e-05,
"loss": 4.4942,
"step": 730500
},
{
"epoch": 0.67,
"learning_rate": 6.653725399686406e-05,
"loss": 4.4916,
"step": 731000
},
{
"epoch": 0.67,
"learning_rate": 6.651444879178206e-05,
"loss": 4.4935,
"step": 731500
},
{
"epoch": 0.67,
"learning_rate": 6.649155199953107e-05,
"loss": 4.5004,
"step": 732000
},
{
"epoch": 0.67,
"learning_rate": 6.646865520728009e-05,
"loss": 4.4926,
"step": 732500
},
{
"epoch": 0.67,
"learning_rate": 6.64457584150291e-05,
"loss": 4.4886,
"step": 733000
},
{
"epoch": 0.67,
"learning_rate": 6.64228616227781e-05,
"loss": 4.4934,
"step": 733500
},
{
"epoch": 0.67,
"learning_rate": 6.63999648305271e-05,
"loss": 4.4819,
"step": 734000
},
{
"epoch": 0.67,
"learning_rate": 6.637706803827611e-05,
"loss": 4.4935,
"step": 734500
},
{
"epoch": 0.67,
"learning_rate": 6.635417124602513e-05,
"loss": 4.4962,
"step": 735000
},
{
"epoch": 0.67,
"learning_rate": 6.633127445377413e-05,
"loss": 4.4903,
"step": 735500
},
{
"epoch": 0.67,
"learning_rate": 6.630837766152314e-05,
"loss": 4.5081,
"step": 736000
},
{
"epoch": 0.67,
"learning_rate": 6.628552666285664e-05,
"loss": 4.4942,
"step": 736500
},
{
"epoch": 0.67,
"learning_rate": 6.626262987060565e-05,
"loss": 4.5001,
"step": 737000
},
{
"epoch": 0.68,
"learning_rate": 6.623973307835467e-05,
"loss": 4.4875,
"step": 737500
},
{
"epoch": 0.68,
"learning_rate": 6.621683628610367e-05,
"loss": 4.4911,
"step": 738000
},
{
"epoch": 0.68,
"learning_rate": 6.619398528743718e-05,
"loss": 4.4985,
"step": 738500
},
{
"epoch": 0.68,
"learning_rate": 6.617108849518618e-05,
"loss": 4.4968,
"step": 739000
},
{
"epoch": 0.68,
"learning_rate": 6.614819170293519e-05,
"loss": 4.4834,
"step": 739500
},
{
"epoch": 0.68,
"learning_rate": 6.612529491068419e-05,
"loss": 4.4906,
"step": 740000
},
{
"epoch": 0.68,
"learning_rate": 6.610239811843321e-05,
"loss": 4.4881,
"step": 740500
},
{
"epoch": 0.68,
"learning_rate": 6.607950132618222e-05,
"loss": 4.4965,
"step": 741000
},
{
"epoch": 0.68,
"learning_rate": 6.605660453393122e-05,
"loss": 4.5056,
"step": 741500
},
{
"epoch": 0.68,
"learning_rate": 6.603370774168023e-05,
"loss": 4.5031,
"step": 742000
},
{
"epoch": 0.68,
"learning_rate": 6.601081094942923e-05,
"loss": 4.509,
"step": 742500
},
{
"epoch": 0.68,
"learning_rate": 6.598795995076275e-05,
"loss": 4.5112,
"step": 743000
},
{
"epoch": 0.68,
"learning_rate": 6.596506315851176e-05,
"loss": 4.5029,
"step": 743500
},
{
"epoch": 0.68,
"learning_rate": 6.594216636626076e-05,
"loss": 4.4892,
"step": 744000
},
{
"epoch": 0.68,
"learning_rate": 6.591926957400977e-05,
"loss": 4.4848,
"step": 744500
},
{
"epoch": 0.68,
"learning_rate": 6.589641857534327e-05,
"loss": 4.4839,
"step": 745000
},
{
"epoch": 0.68,
"learning_rate": 6.587352178309228e-05,
"loss": 4.4962,
"step": 745500
},
{
"epoch": 0.68,
"learning_rate": 6.585062499084128e-05,
"loss": 4.4872,
"step": 746000
},
{
"epoch": 0.68,
"learning_rate": 6.582772819859029e-05,
"loss": 4.4931,
"step": 746500
},
{
"epoch": 0.68,
"learning_rate": 6.580487719992379e-05,
"loss": 4.4849,
"step": 747000
},
{
"epoch": 0.68,
"learning_rate": 6.578202620125731e-05,
"loss": 4.4843,
"step": 747500
},
{
"epoch": 0.69,
"learning_rate": 6.575912940900632e-05,
"loss": 4.4984,
"step": 748000
},
{
"epoch": 0.69,
"learning_rate": 6.573623261675532e-05,
"loss": 4.4924,
"step": 748500
},
{
"epoch": 0.69,
"learning_rate": 6.571333582450433e-05,
"loss": 4.4967,
"step": 749000
},
{
"epoch": 0.69,
"learning_rate": 6.569043903225333e-05,
"loss": 4.4844,
"step": 749500
},
{
"epoch": 0.69,
"learning_rate": 6.566754224000235e-05,
"loss": 4.4798,
"step": 750000
},
{
"epoch": 0.69,
"learning_rate": 6.564464544775135e-05,
"loss": 4.498,
"step": 750500
},
{
"epoch": 0.69,
"learning_rate": 6.562174865550036e-05,
"loss": 4.486,
"step": 751000
},
{
"epoch": 0.69,
"learning_rate": 6.559885186324936e-05,
"loss": 4.4853,
"step": 751500
},
{
"epoch": 0.69,
"learning_rate": 6.557600086458287e-05,
"loss": 4.4833,
"step": 752000
},
{
"epoch": 0.69,
"learning_rate": 6.555310407233189e-05,
"loss": 4.4927,
"step": 752500
},
{
"epoch": 0.69,
"learning_rate": 6.55302072800809e-05,
"loss": 4.4805,
"step": 753000
},
{
"epoch": 0.69,
"learning_rate": 6.55073104878299e-05,
"loss": 4.4906,
"step": 753500
},
{
"epoch": 0.69,
"learning_rate": 6.54844136955789e-05,
"loss": 4.487,
"step": 754000
},
{
"epoch": 0.69,
"learning_rate": 6.546151690332791e-05,
"loss": 4.4895,
"step": 754500
},
{
"epoch": 0.69,
"learning_rate": 6.543862011107691e-05,
"loss": 4.4913,
"step": 755000
},
{
"epoch": 0.69,
"learning_rate": 6.541572331882593e-05,
"loss": 4.5076,
"step": 755500
},
{
"epoch": 0.69,
"learning_rate": 6.539282652657494e-05,
"loss": 4.4928,
"step": 756000
},
{
"epoch": 0.69,
"learning_rate": 6.536997552790844e-05,
"loss": 4.4865,
"step": 756500
},
{
"epoch": 0.69,
"learning_rate": 6.534707873565745e-05,
"loss": 4.4695,
"step": 757000
},
{
"epoch": 0.69,
"learning_rate": 6.532418194340645e-05,
"loss": 4.4996,
"step": 757500
},
{
"epoch": 0.69,
"learning_rate": 6.530128515115547e-05,
"loss": 4.5003,
"step": 758000
},
{
"epoch": 0.69,
"learning_rate": 6.527838835890448e-05,
"loss": 4.4859,
"step": 758500
},
{
"epoch": 0.7,
"learning_rate": 6.525553736023798e-05,
"loss": 4.4938,
"step": 759000
},
{
"epoch": 0.7,
"learning_rate": 6.523264056798699e-05,
"loss": 4.4893,
"step": 759500
},
{
"epoch": 0.7,
"learning_rate": 6.520974377573599e-05,
"loss": 4.4752,
"step": 760000
},
{
"epoch": 0.7,
"learning_rate": 6.518684698348501e-05,
"loss": 4.4843,
"step": 760500
},
{
"epoch": 0.7,
"learning_rate": 6.516404177840302e-05,
"loss": 4.4878,
"step": 761000
},
{
"epoch": 0.7,
"learning_rate": 6.514114498615202e-05,
"loss": 4.4936,
"step": 761500
},
{
"epoch": 0.7,
"learning_rate": 6.511824819390103e-05,
"loss": 4.4863,
"step": 762000
},
{
"epoch": 0.7,
"learning_rate": 6.509535140165005e-05,
"loss": 4.4873,
"step": 762500
},
{
"epoch": 0.7,
"learning_rate": 6.507245460939905e-05,
"loss": 4.4913,
"step": 763000
},
{
"epoch": 0.7,
"learning_rate": 6.504955781714806e-05,
"loss": 4.4785,
"step": 763500
},
{
"epoch": 0.7,
"learning_rate": 6.502670681848156e-05,
"loss": 4.4888,
"step": 764000
},
{
"epoch": 0.7,
"learning_rate": 6.500381002623057e-05,
"loss": 4.4967,
"step": 764500
},
{
"epoch": 0.7,
"learning_rate": 6.498091323397957e-05,
"loss": 4.4912,
"step": 765000
},
{
"epoch": 0.7,
"learning_rate": 6.495801644172859e-05,
"loss": 4.4846,
"step": 765500
},
{
"epoch": 0.7,
"learning_rate": 6.49351196494776e-05,
"loss": 4.4954,
"step": 766000
},
{
"epoch": 0.7,
"learning_rate": 6.49122228572266e-05,
"loss": 4.5018,
"step": 766500
},
{
"epoch": 0.7,
"learning_rate": 6.488941765214461e-05,
"loss": 4.4878,
"step": 767000
},
{
"epoch": 0.7,
"learning_rate": 6.486652085989363e-05,
"loss": 4.4775,
"step": 767500
},
{
"epoch": 0.7,
"learning_rate": 6.484362406764263e-05,
"loss": 4.488,
"step": 768000
},
{
"epoch": 0.7,
"learning_rate": 6.482072727539164e-05,
"loss": 4.5008,
"step": 768500
},
{
"epoch": 0.7,
"learning_rate": 6.479783048314064e-05,
"loss": 4.4915,
"step": 769000
},
{
"epoch": 0.7,
"learning_rate": 6.477493369088963e-05,
"loss": 4.4881,
"step": 769500
},
{
"epoch": 0.71,
"learning_rate": 6.475203689863865e-05,
"loss": 4.4886,
"step": 770000
},
{
"epoch": 0.71,
"learning_rate": 6.472914010638766e-05,
"loss": 4.4732,
"step": 770500
},
{
"epoch": 0.71,
"learning_rate": 6.470624331413666e-05,
"loss": 4.4746,
"step": 771000
},
{
"epoch": 0.71,
"learning_rate": 6.468334652188567e-05,
"loss": 4.4896,
"step": 771500
},
{
"epoch": 0.71,
"learning_rate": 6.466044972963467e-05,
"loss": 4.4905,
"step": 772000
},
{
"epoch": 0.71,
"learning_rate": 6.463755293738368e-05,
"loss": 4.4831,
"step": 772500
},
{
"epoch": 0.71,
"learning_rate": 6.46147019387172e-05,
"loss": 4.4831,
"step": 773000
},
{
"epoch": 0.71,
"learning_rate": 6.45918051464662e-05,
"loss": 4.5024,
"step": 773500
},
{
"epoch": 0.71,
"learning_rate": 6.45689083542152e-05,
"loss": 4.4825,
"step": 774000
},
{
"epoch": 0.71,
"learning_rate": 6.454601156196421e-05,
"loss": 4.4903,
"step": 774500
},
{
"epoch": 0.71,
"learning_rate": 6.452311476971322e-05,
"loss": 4.4875,
"step": 775000
},
{
"epoch": 0.71,
"learning_rate": 6.450021797746223e-05,
"loss": 4.4994,
"step": 775500
},
{
"epoch": 0.71,
"learning_rate": 6.447732118521124e-05,
"loss": 4.4986,
"step": 776000
},
{
"epoch": 0.71,
"learning_rate": 6.445442439296024e-05,
"loss": 4.5064,
"step": 776500
},
{
"epoch": 0.71,
"learning_rate": 6.443157339429375e-05,
"loss": 4.4808,
"step": 777000
},
{
"epoch": 0.71,
"learning_rate": 6.440867660204275e-05,
"loss": 4.4892,
"step": 777500
},
{
"epoch": 0.71,
"learning_rate": 6.438577980979177e-05,
"loss": 4.4802,
"step": 778000
},
{
"epoch": 0.71,
"learning_rate": 6.436288301754078e-05,
"loss": 4.4764,
"step": 778500
},
{
"epoch": 0.71,
"learning_rate": 6.433998622528978e-05,
"loss": 4.5041,
"step": 779000
},
{
"epoch": 0.71,
"learning_rate": 6.431713522662329e-05,
"loss": 4.4847,
"step": 779500
},
{
"epoch": 0.71,
"learning_rate": 6.42942384343723e-05,
"loss": 4.4783,
"step": 780000
},
{
"epoch": 0.71,
"learning_rate": 6.427134164212131e-05,
"loss": 4.4806,
"step": 780500
},
{
"epoch": 0.72,
"learning_rate": 6.424844484987032e-05,
"loss": 4.4777,
"step": 781000
},
{
"epoch": 0.72,
"learning_rate": 6.422554805761932e-05,
"loss": 4.4846,
"step": 781500
},
{
"epoch": 0.72,
"learning_rate": 6.420265126536833e-05,
"loss": 4.477,
"step": 782000
},
{
"epoch": 0.72,
"learning_rate": 6.417980026670183e-05,
"loss": 4.4904,
"step": 782500
},
{
"epoch": 0.72,
"learning_rate": 6.415690347445085e-05,
"loss": 4.498,
"step": 783000
},
{
"epoch": 0.72,
"learning_rate": 6.413400668219986e-05,
"loss": 4.4892,
"step": 783500
},
{
"epoch": 0.72,
"learning_rate": 6.411115568353336e-05,
"loss": 4.4924,
"step": 784000
},
{
"epoch": 0.72,
"learning_rate": 6.408825889128237e-05,
"loss": 4.4845,
"step": 784500
},
{
"epoch": 0.72,
"learning_rate": 6.406536209903137e-05,
"loss": 4.4904,
"step": 785000
},
{
"epoch": 0.72,
"learning_rate": 6.404246530678039e-05,
"loss": 4.4758,
"step": 785500
},
{
"epoch": 0.72,
"learning_rate": 6.40195685145294e-05,
"loss": 4.4753,
"step": 786000
},
{
"epoch": 0.72,
"learning_rate": 6.39967175158629e-05,
"loss": 4.488,
"step": 786500
},
{
"epoch": 0.72,
"learning_rate": 6.397382072361191e-05,
"loss": 4.4859,
"step": 787000
},
{
"epoch": 0.72,
"learning_rate": 6.395092393136091e-05,
"loss": 4.4974,
"step": 787500
},
{
"epoch": 0.72,
"learning_rate": 6.392802713910993e-05,
"loss": 4.4878,
"step": 788000
},
{
"epoch": 0.72,
"learning_rate": 6.390513034685894e-05,
"loss": 4.4861,
"step": 788500
},
{
"epoch": 0.72,
"learning_rate": 6.388223355460794e-05,
"loss": 4.4854,
"step": 789000
},
{
"epoch": 0.72,
"learning_rate": 6.385933676235695e-05,
"loss": 4.4921,
"step": 789500
},
{
"epoch": 0.72,
"learning_rate": 6.383643997010595e-05,
"loss": 4.494,
"step": 790000
},
{
"epoch": 0.72,
"learning_rate": 6.381358897143947e-05,
"loss": 4.4677,
"step": 790500
},
{
"epoch": 0.72,
"learning_rate": 6.379069217918848e-05,
"loss": 4.475,
"step": 791000
},
{
"epoch": 0.72,
"learning_rate": 6.376784118052198e-05,
"loss": 4.4795,
"step": 791500
},
{
"epoch": 0.73,
"learning_rate": 6.374494438827099e-05,
"loss": 4.4806,
"step": 792000
},
{
"epoch": 0.73,
"learning_rate": 6.372204759601999e-05,
"loss": 4.4931,
"step": 792500
},
{
"epoch": 0.73,
"learning_rate": 6.369915080376901e-05,
"loss": 4.4876,
"step": 793000
},
{
"epoch": 0.73,
"learning_rate": 6.367625401151801e-05,
"loss": 4.4863,
"step": 793500
},
{
"epoch": 0.73,
"learning_rate": 6.365335721926702e-05,
"loss": 4.4915,
"step": 794000
},
{
"epoch": 0.73,
"learning_rate": 6.363046042701601e-05,
"loss": 4.4881,
"step": 794500
},
{
"epoch": 0.73,
"learning_rate": 6.360756363476502e-05,
"loss": 4.4988,
"step": 795000
},
{
"epoch": 0.73,
"learning_rate": 6.358471263609854e-05,
"loss": 4.4825,
"step": 795500
},
{
"epoch": 0.73,
"learning_rate": 6.356181584384754e-05,
"loss": 4.4789,
"step": 796000
},
{
"epoch": 0.73,
"learning_rate": 6.353896484518105e-05,
"loss": 4.4815,
"step": 796500
},
{
"epoch": 0.73,
"learning_rate": 6.351606805293005e-05,
"loss": 4.4787,
"step": 797000
},
{
"epoch": 0.73,
"learning_rate": 6.349317126067906e-05,
"loss": 4.4824,
"step": 797500
},
{
"epoch": 0.73,
"learning_rate": 6.347027446842807e-05,
"loss": 4.4764,
"step": 798000
},
{
"epoch": 0.73,
"learning_rate": 6.344737767617708e-05,
"loss": 4.4851,
"step": 798500
},
{
"epoch": 0.73,
"learning_rate": 6.342448088392608e-05,
"loss": 4.4698,
"step": 799000
},
{
"epoch": 0.73,
"learning_rate": 6.340158409167509e-05,
"loss": 4.493,
"step": 799500
},
{
"epoch": 0.73,
"learning_rate": 6.33786872994241e-05,
"loss": 4.4822,
"step": 800000
},
{
"epoch": 0.73,
"learning_rate": 6.335583630075761e-05,
"loss": 4.4791,
"step": 800500
},
{
"epoch": 0.73,
"learning_rate": 6.333293950850662e-05,
"loss": 4.5043,
"step": 801000
},
{
"epoch": 0.73,
"learning_rate": 6.331004271625562e-05,
"loss": 4.4898,
"step": 801500
},
{
"epoch": 0.73,
"learning_rate": 6.328714592400463e-05,
"loss": 4.4771,
"step": 802000
},
{
"epoch": 0.73,
"learning_rate": 6.326429492533813e-05,
"loss": 4.4837,
"step": 802500
},
{
"epoch": 0.74,
"learning_rate": 6.324139813308715e-05,
"loss": 4.482,
"step": 803000
},
{
"epoch": 0.74,
"learning_rate": 6.321850134083616e-05,
"loss": 4.4697,
"step": 803500
},
{
"epoch": 0.74,
"learning_rate": 6.319560454858516e-05,
"loss": 4.4837,
"step": 804000
},
{
"epoch": 0.74,
"learning_rate": 6.317279934350317e-05,
"loss": 4.482,
"step": 804500
},
{
"epoch": 0.74,
"learning_rate": 6.314990255125219e-05,
"loss": 4.4807,
"step": 805000
},
{
"epoch": 0.74,
"learning_rate": 6.31270057590012e-05,
"loss": 4.4791,
"step": 805500
},
{
"epoch": 0.74,
"learning_rate": 6.31041089667502e-05,
"loss": 4.4841,
"step": 806000
},
{
"epoch": 0.74,
"learning_rate": 6.30812121744992e-05,
"loss": 4.4914,
"step": 806500
},
{
"epoch": 0.74,
"learning_rate": 6.305831538224821e-05,
"loss": 4.4678,
"step": 807000
},
{
"epoch": 0.74,
"learning_rate": 6.303541858999721e-05,
"loss": 4.4876,
"step": 807500
},
{
"epoch": 0.74,
"learning_rate": 6.301252179774623e-05,
"loss": 4.4782,
"step": 808000
},
{
"epoch": 0.74,
"learning_rate": 6.298962500549524e-05,
"loss": 4.4799,
"step": 808500
},
{
"epoch": 0.74,
"learning_rate": 6.296672821324424e-05,
"loss": 4.4772,
"step": 809000
},
{
"epoch": 0.74,
"learning_rate": 6.294383142099325e-05,
"loss": 4.4773,
"step": 809500
},
{
"epoch": 0.74,
"learning_rate": 6.292098042232675e-05,
"loss": 4.4862,
"step": 810000
},
{
"epoch": 0.74,
"learning_rate": 6.289812942366027e-05,
"loss": 4.4821,
"step": 810500
},
{
"epoch": 0.74,
"learning_rate": 6.287523263140928e-05,
"loss": 4.481,
"step": 811000
},
{
"epoch": 0.74,
"learning_rate": 6.285233583915828e-05,
"loss": 4.4859,
"step": 811500
},
{
"epoch": 0.74,
"learning_rate": 6.282943904690729e-05,
"loss": 4.4867,
"step": 812000
},
{
"epoch": 0.74,
"learning_rate": 6.280654225465629e-05,
"loss": 4.4705,
"step": 812500
},
{
"epoch": 0.74,
"learning_rate": 6.278364546240531e-05,
"loss": 4.4807,
"step": 813000
},
{
"epoch": 0.75,
"learning_rate": 6.276074867015432e-05,
"loss": 4.498,
"step": 813500
},
{
"epoch": 0.75,
"learning_rate": 6.273785187790332e-05,
"loss": 4.49,
"step": 814000
},
{
"epoch": 0.75,
"learning_rate": 6.271495508565233e-05,
"loss": 4.4779,
"step": 814500
},
{
"epoch": 0.75,
"learning_rate": 6.269210408698583e-05,
"loss": 4.483,
"step": 815000
},
{
"epoch": 0.75,
"learning_rate": 6.266920729473485e-05,
"loss": 4.4744,
"step": 815500
},
{
"epoch": 0.75,
"learning_rate": 6.264631050248386e-05,
"loss": 4.4862,
"step": 816000
},
{
"epoch": 0.75,
"learning_rate": 6.262341371023286e-05,
"loss": 4.4767,
"step": 816500
},
{
"epoch": 0.75,
"learning_rate": 6.260051691798187e-05,
"loss": 4.4874,
"step": 817000
},
{
"epoch": 0.75,
"learning_rate": 6.257762012573087e-05,
"loss": 4.4852,
"step": 817500
},
{
"epoch": 0.75,
"learning_rate": 6.255476912706439e-05,
"loss": 4.4666,
"step": 818000
},
{
"epoch": 0.75,
"learning_rate": 6.253191812839788e-05,
"loss": 4.4738,
"step": 818500
},
{
"epoch": 0.75,
"learning_rate": 6.250902133614689e-05,
"loss": 4.473,
"step": 819000
},
{
"epoch": 0.75,
"learning_rate": 6.248612454389589e-05,
"loss": 4.48,
"step": 819500
},
{
"epoch": 0.75,
"learning_rate": 6.246322775164491e-05,
"loss": 4.4844,
"step": 820000
},
{
"epoch": 0.75,
"learning_rate": 6.244033095939392e-05,
"loss": 4.4782,
"step": 820500
},
{
"epoch": 0.75,
"learning_rate": 6.241743416714292e-05,
"loss": 4.485,
"step": 821000
},
{
"epoch": 0.75,
"learning_rate": 6.239453737489193e-05,
"loss": 4.4845,
"step": 821500
},
{
"epoch": 0.75,
"learning_rate": 6.237168637622543e-05,
"loss": 4.4637,
"step": 822000
},
{
"epoch": 0.75,
"learning_rate": 6.234878958397445e-05,
"loss": 4.495,
"step": 822500
},
{
"epoch": 0.75,
"learning_rate": 6.232589279172345e-05,
"loss": 4.4822,
"step": 823000
},
{
"epoch": 0.75,
"learning_rate": 6.230299599947246e-05,
"loss": 4.4852,
"step": 823500
},
{
"epoch": 0.75,
"learning_rate": 6.228009920722146e-05,
"loss": 4.4744,
"step": 824000
},
{
"epoch": 0.76,
"learning_rate": 6.225724820855497e-05,
"loss": 4.4668,
"step": 824500
},
{
"epoch": 0.76,
"learning_rate": 6.223435141630398e-05,
"loss": 4.4675,
"step": 825000
},
{
"epoch": 0.76,
"learning_rate": 6.2211454624053e-05,
"loss": 4.4824,
"step": 825500
},
{
"epoch": 0.76,
"learning_rate": 6.2188557831802e-05,
"loss": 4.4708,
"step": 826000
},
{
"epoch": 0.76,
"learning_rate": 6.2165661039551e-05,
"loss": 4.4755,
"step": 826500
},
{
"epoch": 0.76,
"learning_rate": 6.214276424730001e-05,
"loss": 4.4785,
"step": 827000
},
{
"epoch": 0.76,
"learning_rate": 6.211986745504901e-05,
"loss": 4.4769,
"step": 827500
},
{
"epoch": 0.76,
"learning_rate": 6.209697066279803e-05,
"loss": 4.4802,
"step": 828000
},
{
"epoch": 0.76,
"learning_rate": 6.207407387054704e-05,
"loss": 4.4965,
"step": 828500
},
{
"epoch": 0.76,
"learning_rate": 6.205117707829604e-05,
"loss": 4.4981,
"step": 829000
},
{
"epoch": 0.76,
"learning_rate": 6.202828028604505e-05,
"loss": 4.4855,
"step": 829500
},
{
"epoch": 0.76,
"learning_rate": 6.200538349379405e-05,
"loss": 4.4754,
"step": 830000
},
{
"epoch": 0.76,
"learning_rate": 6.198248670154306e-05,
"loss": 4.4844,
"step": 830500
},
{
"epoch": 0.76,
"learning_rate": 6.195963570287658e-05,
"loss": 4.4691,
"step": 831000
},
{
"epoch": 0.76,
"learning_rate": 6.193673891062558e-05,
"loss": 4.4681,
"step": 831500
},
{
"epoch": 0.76,
"learning_rate": 6.191384211837459e-05,
"loss": 4.4673,
"step": 832000
},
{
"epoch": 0.76,
"learning_rate": 6.189099111970809e-05,
"loss": 4.4818,
"step": 832500
},
{
"epoch": 0.76,
"learning_rate": 6.18680943274571e-05,
"loss": 4.4755,
"step": 833000
},
{
"epoch": 0.76,
"learning_rate": 6.184519753520612e-05,
"loss": 4.4854,
"step": 833500
},
{
"epoch": 0.76,
"learning_rate": 6.182230074295512e-05,
"loss": 4.4871,
"step": 834000
},
{
"epoch": 0.76,
"learning_rate": 6.179940395070413e-05,
"loss": 4.4703,
"step": 834500
},
{
"epoch": 0.76,
"learning_rate": 6.177650715845313e-05,
"loss": 4.496,
"step": 835000
},
{
"epoch": 0.77,
"learning_rate": 6.175361036620214e-05,
"loss": 4.4752,
"step": 835500
},
{
"epoch": 0.77,
"learning_rate": 6.173071357395116e-05,
"loss": 4.4765,
"step": 836000
},
{
"epoch": 0.77,
"learning_rate": 6.170781678170016e-05,
"loss": 4.4596,
"step": 836500
},
{
"epoch": 0.77,
"learning_rate": 6.168501157661817e-05,
"loss": 4.4809,
"step": 837000
},
{
"epoch": 0.77,
"learning_rate": 6.166211478436717e-05,
"loss": 4.4796,
"step": 837500
},
{
"epoch": 0.77,
"learning_rate": 6.163921799211618e-05,
"loss": 4.4686,
"step": 838000
},
{
"epoch": 0.77,
"learning_rate": 6.16163211998652e-05,
"loss": 4.4818,
"step": 838500
},
{
"epoch": 0.77,
"learning_rate": 6.15934244076142e-05,
"loss": 4.4796,
"step": 839000
},
{
"epoch": 0.77,
"learning_rate": 6.15705276153632e-05,
"loss": 4.4713,
"step": 839500
},
{
"epoch": 0.77,
"learning_rate": 6.154763082311221e-05,
"loss": 4.4912,
"step": 840000
},
{
"epoch": 0.77,
"learning_rate": 6.152473403086122e-05,
"loss": 4.4758,
"step": 840500
},
{
"epoch": 0.77,
"learning_rate": 6.150183723861022e-05,
"loss": 4.478,
"step": 841000
},
{
"epoch": 0.77,
"learning_rate": 6.147898623994374e-05,
"loss": 4.4693,
"step": 841500
},
{
"epoch": 0.77,
"learning_rate": 6.145608944769274e-05,
"loss": 4.4646,
"step": 842000
},
{
"epoch": 0.77,
"learning_rate": 6.143319265544175e-05,
"loss": 4.4822,
"step": 842500
},
{
"epoch": 0.77,
"learning_rate": 6.141029586319075e-05,
"loss": 4.4715,
"step": 843000
},
{
"epoch": 0.77,
"learning_rate": 6.138739907093976e-05,
"loss": 4.4706,
"step": 843500
},
{
"epoch": 0.77,
"learning_rate": 6.136454807227327e-05,
"loss": 4.4766,
"step": 844000
},
{
"epoch": 0.77,
"learning_rate": 6.134165128002227e-05,
"loss": 4.4681,
"step": 844500
},
{
"epoch": 0.77,
"learning_rate": 6.131880028135578e-05,
"loss": 4.4651,
"step": 845000
},
{
"epoch": 0.77,
"learning_rate": 6.12959034891048e-05,
"loss": 4.4819,
"step": 845500
},
{
"epoch": 0.77,
"learning_rate": 6.12730524904383e-05,
"loss": 4.4691,
"step": 846000
},
{
"epoch": 0.78,
"learning_rate": 6.12501556981873e-05,
"loss": 4.4718,
"step": 846500
},
{
"epoch": 0.78,
"learning_rate": 6.122725890593631e-05,
"loss": 4.4699,
"step": 847000
},
{
"epoch": 0.78,
"learning_rate": 6.120436211368532e-05,
"loss": 4.4549,
"step": 847500
},
{
"epoch": 0.78,
"learning_rate": 6.118146532143433e-05,
"loss": 4.4691,
"step": 848000
},
{
"epoch": 0.78,
"learning_rate": 6.115856852918334e-05,
"loss": 4.4604,
"step": 848500
},
{
"epoch": 0.78,
"learning_rate": 6.113567173693234e-05,
"loss": 4.4737,
"step": 849000
},
{
"epoch": 0.78,
"learning_rate": 6.111277494468135e-05,
"loss": 4.4751,
"step": 849500
},
{
"epoch": 0.78,
"learning_rate": 6.108987815243035e-05,
"loss": 4.4875,
"step": 850000
},
{
"epoch": 0.78,
"learning_rate": 6.106698136017936e-05,
"loss": 4.4808,
"step": 850500
},
{
"epoch": 0.78,
"learning_rate": 6.104408456792838e-05,
"loss": 4.4795,
"step": 851000
},
{
"epoch": 0.78,
"learning_rate": 6.1021233569261884e-05,
"loss": 4.4743,
"step": 851500
},
{
"epoch": 0.78,
"learning_rate": 6.099833677701089e-05,
"loss": 4.479,
"step": 852000
},
{
"epoch": 0.78,
"learning_rate": 6.0975439984759894e-05,
"loss": 4.4732,
"step": 852500
},
{
"epoch": 0.78,
"learning_rate": 6.0952543192508905e-05,
"loss": 4.4726,
"step": 853000
},
{
"epoch": 0.78,
"learning_rate": 6.092964640025791e-05,
"loss": 4.4756,
"step": 853500
},
{
"epoch": 0.78,
"learning_rate": 6.0906749608006916e-05,
"loss": 4.4693,
"step": 854000
},
{
"epoch": 0.78,
"learning_rate": 6.088385281575593e-05,
"loss": 4.4672,
"step": 854500
},
{
"epoch": 0.78,
"learning_rate": 6.086095602350493e-05,
"loss": 4.4678,
"step": 855000
},
{
"epoch": 0.78,
"learning_rate": 6.0838105024838445e-05,
"loss": 4.4858,
"step": 855500
},
{
"epoch": 0.78,
"learning_rate": 6.081525402617195e-05,
"loss": 4.4714,
"step": 856000
},
{
"epoch": 0.78,
"learning_rate": 6.079235723392096e-05,
"loss": 4.4786,
"step": 856500
},
{
"epoch": 0.78,
"learning_rate": 6.076946044166997e-05,
"loss": 4.4755,
"step": 857000
},
{
"epoch": 0.79,
"learning_rate": 6.074656364941897e-05,
"loss": 4.4815,
"step": 857500
},
{
"epoch": 0.79,
"learning_rate": 6.0723712650752485e-05,
"loss": 4.4703,
"step": 858000
},
{
"epoch": 0.79,
"learning_rate": 6.070081585850149e-05,
"loss": 4.4696,
"step": 858500
},
{
"epoch": 0.79,
"learning_rate": 6.06779190662505e-05,
"loss": 4.4768,
"step": 859000
},
{
"epoch": 0.79,
"learning_rate": 6.065502227399951e-05,
"loss": 4.4688,
"step": 859500
},
{
"epoch": 0.79,
"learning_rate": 6.063212548174851e-05,
"loss": 4.4697,
"step": 860000
},
{
"epoch": 0.79,
"learning_rate": 6.0609228689497523e-05,
"loss": 4.4655,
"step": 860500
},
{
"epoch": 0.79,
"learning_rate": 6.058637769083103e-05,
"loss": 4.4768,
"step": 861000
},
{
"epoch": 0.79,
"learning_rate": 6.056348089858004e-05,
"loss": 4.481,
"step": 861500
},
{
"epoch": 0.79,
"learning_rate": 6.0540584106329046e-05,
"loss": 4.4708,
"step": 862000
},
{
"epoch": 0.79,
"learning_rate": 6.051768731407805e-05,
"loss": 4.47,
"step": 862500
},
{
"epoch": 0.79,
"learning_rate": 6.049479052182706e-05,
"loss": 4.4781,
"step": 863000
},
{
"epoch": 0.79,
"learning_rate": 6.047189372957607e-05,
"loss": 4.4652,
"step": 863500
},
{
"epoch": 0.79,
"learning_rate": 6.044899693732507e-05,
"loss": 4.4646,
"step": 864000
},
{
"epoch": 0.79,
"learning_rate": 6.0426145938658585e-05,
"loss": 4.4735,
"step": 864500
},
{
"epoch": 0.79,
"learning_rate": 6.040324914640759e-05,
"loss": 4.4904,
"step": 865000
},
{
"epoch": 0.79,
"learning_rate": 6.03803523541566e-05,
"loss": 4.4646,
"step": 865500
},
{
"epoch": 0.79,
"learning_rate": 6.035745556190561e-05,
"loss": 4.4856,
"step": 866000
},
{
"epoch": 0.79,
"learning_rate": 6.033455876965461e-05,
"loss": 4.4743,
"step": 866500
},
{
"epoch": 0.79,
"learning_rate": 6.0311661977403624e-05,
"loss": 4.4914,
"step": 867000
},
{
"epoch": 0.79,
"learning_rate": 6.028876518515263e-05,
"loss": 4.4764,
"step": 867500
},
{
"epoch": 0.79,
"learning_rate": 6.026591418648613e-05,
"loss": 4.4819,
"step": 868000
},
{
"epoch": 0.8,
"learning_rate": 6.024301739423513e-05,
"loss": 4.4666,
"step": 868500
},
{
"epoch": 0.8,
"learning_rate": 6.0220120601984145e-05,
"loss": 4.4538,
"step": 869000
},
{
"epoch": 0.8,
"learning_rate": 6.019722380973315e-05,
"loss": 4.4857,
"step": 869500
},
{
"epoch": 0.8,
"learning_rate": 6.0174327017482155e-05,
"loss": 4.4711,
"step": 870000
},
{
"epoch": 0.8,
"learning_rate": 6.015143022523117e-05,
"loss": 4.4813,
"step": 870500
},
{
"epoch": 0.8,
"learning_rate": 6.012853343298017e-05,
"loss": 4.4773,
"step": 871000
},
{
"epoch": 0.8,
"learning_rate": 6.010563664072918e-05,
"loss": 4.4837,
"step": 871500
},
{
"epoch": 0.8,
"learning_rate": 6.008278564206269e-05,
"loss": 4.4824,
"step": 872000
},
{
"epoch": 0.8,
"learning_rate": 6.00599804369807e-05,
"loss": 4.4751,
"step": 872500
},
{
"epoch": 0.8,
"learning_rate": 6.003708364472971e-05,
"loss": 4.4766,
"step": 873000
},
{
"epoch": 0.8,
"learning_rate": 6.001418685247871e-05,
"loss": 4.4763,
"step": 873500
},
{
"epoch": 0.8,
"learning_rate": 5.9991290060227724e-05,
"loss": 4.4768,
"step": 874000
},
{
"epoch": 0.8,
"learning_rate": 5.996839326797673e-05,
"loss": 4.4807,
"step": 874500
},
{
"epoch": 0.8,
"learning_rate": 5.9945496475725734e-05,
"loss": 4.4645,
"step": 875000
},
{
"epoch": 0.8,
"learning_rate": 5.9922599683474746e-05,
"loss": 4.47,
"step": 875500
},
{
"epoch": 0.8,
"learning_rate": 5.989970289122375e-05,
"loss": 4.4806,
"step": 876000
},
{
"epoch": 0.8,
"learning_rate": 5.987680609897276e-05,
"loss": 4.4734,
"step": 876500
},
{
"epoch": 0.8,
"learning_rate": 5.985390930672177e-05,
"loss": 4.4678,
"step": 877000
},
{
"epoch": 0.8,
"learning_rate": 5.983101251447077e-05,
"loss": 4.4711,
"step": 877500
},
{
"epoch": 0.8,
"learning_rate": 5.9808161515804285e-05,
"loss": 4.461,
"step": 878000
},
{
"epoch": 0.8,
"learning_rate": 5.978526472355329e-05,
"loss": 4.4693,
"step": 878500
},
{
"epoch": 0.81,
"learning_rate": 5.9762367931302295e-05,
"loss": 4.4749,
"step": 879000
},
{
"epoch": 0.81,
"learning_rate": 5.973947113905131e-05,
"loss": 4.4531,
"step": 879500
},
{
"epoch": 0.81,
"learning_rate": 5.971657434680031e-05,
"loss": 4.481,
"step": 880000
},
{
"epoch": 0.81,
"learning_rate": 5.9693723348133825e-05,
"loss": 4.4606,
"step": 880500
},
{
"epoch": 0.81,
"learning_rate": 5.967082655588283e-05,
"loss": 4.4707,
"step": 881000
},
{
"epoch": 0.81,
"learning_rate": 5.9647929763631835e-05,
"loss": 4.4672,
"step": 881500
},
{
"epoch": 0.81,
"learning_rate": 5.962503297138085e-05,
"loss": 4.4826,
"step": 882000
},
{
"epoch": 0.81,
"learning_rate": 5.960213617912985e-05,
"loss": 4.4719,
"step": 882500
},
{
"epoch": 0.81,
"learning_rate": 5.957923938687886e-05,
"loss": 4.4696,
"step": 883000
},
{
"epoch": 0.81,
"learning_rate": 5.955638838821237e-05,
"loss": 4.4569,
"step": 883500
},
{
"epoch": 0.81,
"learning_rate": 5.9533491595961374e-05,
"loss": 4.4778,
"step": 884000
},
{
"epoch": 0.81,
"learning_rate": 5.9510594803710386e-05,
"loss": 4.4643,
"step": 884500
},
{
"epoch": 0.81,
"learning_rate": 5.948769801145939e-05,
"loss": 4.4609,
"step": 885000
},
{
"epoch": 0.81,
"learning_rate": 5.9464801219208396e-05,
"loss": 4.4698,
"step": 885500
},
{
"epoch": 0.81,
"learning_rate": 5.944190442695741e-05,
"loss": 4.4731,
"step": 886000
},
{
"epoch": 0.81,
"learning_rate": 5.941900763470641e-05,
"loss": 4.471,
"step": 886500
},
{
"epoch": 0.81,
"learning_rate": 5.939611084245542e-05,
"loss": 4.4805,
"step": 887000
},
{
"epoch": 0.81,
"learning_rate": 5.937325984378893e-05,
"loss": 4.4689,
"step": 887500
},
{
"epoch": 0.81,
"learning_rate": 5.9350363051537935e-05,
"loss": 4.4522,
"step": 888000
},
{
"epoch": 0.81,
"learning_rate": 5.932746625928695e-05,
"loss": 4.4632,
"step": 888500
},
{
"epoch": 0.81,
"learning_rate": 5.930456946703595e-05,
"loss": 4.4881,
"step": 889000
},
{
"epoch": 0.81,
"learning_rate": 5.928167267478496e-05,
"loss": 4.4617,
"step": 889500
},
{
"epoch": 0.82,
"learning_rate": 5.925882167611847e-05,
"loss": 4.4716,
"step": 890000
},
{
"epoch": 0.82,
"learning_rate": 5.9235924883867475e-05,
"loss": 4.4492,
"step": 890500
},
{
"epoch": 0.82,
"learning_rate": 5.9213028091616487e-05,
"loss": 4.4725,
"step": 891000
},
{
"epoch": 0.82,
"learning_rate": 5.919013129936549e-05,
"loss": 4.4615,
"step": 891500
},
{
"epoch": 0.82,
"learning_rate": 5.916728030069899e-05,
"loss": 4.4577,
"step": 892000
},
{
"epoch": 0.82,
"learning_rate": 5.9144383508447996e-05,
"loss": 4.4576,
"step": 892500
},
{
"epoch": 0.82,
"learning_rate": 5.912148671619701e-05,
"loss": 4.4684,
"step": 893000
},
{
"epoch": 0.82,
"learning_rate": 5.909858992394601e-05,
"loss": 4.4504,
"step": 893500
},
{
"epoch": 0.82,
"learning_rate": 5.907569313169502e-05,
"loss": 4.4747,
"step": 894000
},
{
"epoch": 0.82,
"learning_rate": 5.905279633944403e-05,
"loss": 4.4656,
"step": 894500
},
{
"epoch": 0.82,
"learning_rate": 5.9029945340777535e-05,
"loss": 4.4636,
"step": 895000
},
{
"epoch": 0.82,
"learning_rate": 5.900704854852655e-05,
"loss": 4.4705,
"step": 895500
},
{
"epoch": 0.82,
"learning_rate": 5.898415175627555e-05,
"loss": 4.4647,
"step": 896000
},
{
"epoch": 0.82,
"learning_rate": 5.896125496402456e-05,
"loss": 4.465,
"step": 896500
},
{
"epoch": 0.82,
"learning_rate": 5.893840396535807e-05,
"loss": 4.4671,
"step": 897000
},
{
"epoch": 0.82,
"learning_rate": 5.891555296669158e-05,
"loss": 4.4645,
"step": 897500
},
{
"epoch": 0.82,
"learning_rate": 5.889270196802509e-05,
"loss": 4.4556,
"step": 898000
},
{
"epoch": 0.82,
"learning_rate": 5.886980517577409e-05,
"loss": 4.4744,
"step": 898500
},
{
"epoch": 0.82,
"learning_rate": 5.8846908383523104e-05,
"loss": 4.4529,
"step": 899000
},
{
"epoch": 0.82,
"learning_rate": 5.882401159127211e-05,
"loss": 4.4646,
"step": 899500
},
{
"epoch": 0.82,
"learning_rate": 5.8801114799021114e-05,
"loss": 4.4742,
"step": 900000
},
{
"epoch": 0.82,
"learning_rate": 5.8778218006770126e-05,
"loss": 4.4663,
"step": 900500
},
{
"epoch": 0.83,
"learning_rate": 5.875532121451913e-05,
"loss": 4.4505,
"step": 901000
},
{
"epoch": 0.83,
"learning_rate": 5.8732470215852643e-05,
"loss": 4.4694,
"step": 901500
},
{
"epoch": 0.83,
"learning_rate": 5.870957342360165e-05,
"loss": 4.468,
"step": 902000
},
{
"epoch": 0.83,
"learning_rate": 5.8686676631350654e-05,
"loss": 4.4629,
"step": 902500
},
{
"epoch": 0.83,
"learning_rate": 5.8663779839099665e-05,
"loss": 4.4696,
"step": 903000
},
{
"epoch": 0.83,
"learning_rate": 5.864088304684867e-05,
"loss": 4.4589,
"step": 903500
},
{
"epoch": 0.83,
"learning_rate": 5.8617986254597675e-05,
"loss": 4.4719,
"step": 904000
},
{
"epoch": 0.83,
"learning_rate": 5.859508946234669e-05,
"loss": 4.4714,
"step": 904500
},
{
"epoch": 0.83,
"learning_rate": 5.857219267009569e-05,
"loss": 4.4543,
"step": 905000
},
{
"epoch": 0.83,
"learning_rate": 5.8549341671429205e-05,
"loss": 4.4537,
"step": 905500
},
{
"epoch": 0.83,
"learning_rate": 5.852644487917821e-05,
"loss": 4.4706,
"step": 906000
},
{
"epoch": 0.83,
"learning_rate": 5.8503548086927215e-05,
"loss": 4.4554,
"step": 906500
},
{
"epoch": 0.83,
"learning_rate": 5.8480651294676227e-05,
"loss": 4.4572,
"step": 907000
},
{
"epoch": 0.83,
"learning_rate": 5.845775450242523e-05,
"loss": 4.473,
"step": 907500
},
{
"epoch": 0.83,
"learning_rate": 5.8434903503758744e-05,
"loss": 4.4681,
"step": 908000
},
{
"epoch": 0.83,
"learning_rate": 5.841200671150775e-05,
"loss": 4.4623,
"step": 908500
},
{
"epoch": 0.83,
"learning_rate": 5.8389109919256754e-05,
"loss": 4.4483,
"step": 909000
},
{
"epoch": 0.83,
"learning_rate": 5.8366213127005766e-05,
"loss": 4.462,
"step": 909500
},
{
"epoch": 0.83,
"learning_rate": 5.834331633475477e-05,
"loss": 4.4732,
"step": 910000
},
{
"epoch": 0.83,
"learning_rate": 5.8320419542503776e-05,
"loss": 4.4515,
"step": 910500
},
{
"epoch": 0.83,
"learning_rate": 5.829752275025279e-05,
"loss": 4.4718,
"step": 911000
},
{
"epoch": 0.83,
"learning_rate": 5.827462595800179e-05,
"loss": 4.4965,
"step": 911500
},
{
"epoch": 0.84,
"learning_rate": 5.8251774959335305e-05,
"loss": 4.464,
"step": 912000
},
{
"epoch": 0.84,
"learning_rate": 5.822887816708431e-05,
"loss": 4.4782,
"step": 912500
},
{
"epoch": 0.84,
"learning_rate": 5.8205981374833315e-05,
"loss": 4.4587,
"step": 913000
},
{
"epoch": 0.84,
"learning_rate": 5.818308458258233e-05,
"loss": 4.4595,
"step": 913500
},
{
"epoch": 0.84,
"learning_rate": 5.816023358391583e-05,
"loss": 4.4743,
"step": 914000
},
{
"epoch": 0.84,
"learning_rate": 5.8137336791664845e-05,
"loss": 4.4763,
"step": 914500
},
{
"epoch": 0.84,
"learning_rate": 5.811443999941385e-05,
"loss": 4.4679,
"step": 915000
},
{
"epoch": 0.84,
"learning_rate": 5.809158900074736e-05,
"loss": 4.462,
"step": 915500
},
{
"epoch": 0.84,
"learning_rate": 5.8068692208496354e-05,
"loss": 4.4646,
"step": 916000
},
{
"epoch": 0.84,
"learning_rate": 5.8045795416245365e-05,
"loss": 4.4671,
"step": 916500
},
{
"epoch": 0.84,
"learning_rate": 5.802289862399437e-05,
"loss": 4.4587,
"step": 917000
},
{
"epoch": 0.84,
"learning_rate": 5.8000001831743375e-05,
"loss": 4.4642,
"step": 917500
},
{
"epoch": 0.84,
"learning_rate": 5.797710503949239e-05,
"loss": 4.4569,
"step": 918000
},
{
"epoch": 0.84,
"learning_rate": 5.795420824724139e-05,
"loss": 4.476,
"step": 918500
},
{
"epoch": 0.84,
"learning_rate": 5.7931403042159405e-05,
"loss": 4.4579,
"step": 919000
},
{
"epoch": 0.84,
"learning_rate": 5.790850624990841e-05,
"loss": 4.4603,
"step": 919500
},
{
"epoch": 0.84,
"learning_rate": 5.788560945765742e-05,
"loss": 4.4582,
"step": 920000
},
{
"epoch": 0.84,
"learning_rate": 5.786271266540643e-05,
"loss": 4.4747,
"step": 920500
},
{
"epoch": 0.84,
"learning_rate": 5.783981587315543e-05,
"loss": 4.47,
"step": 921000
},
{
"epoch": 0.84,
"learning_rate": 5.7816919080904444e-05,
"loss": 4.4554,
"step": 921500
},
{
"epoch": 0.84,
"learning_rate": 5.779402228865345e-05,
"loss": 4.4655,
"step": 922000
},
{
"epoch": 0.84,
"learning_rate": 5.7771125496402454e-05,
"loss": 4.4533,
"step": 922500
},
{
"epoch": 0.85,
"learning_rate": 5.7748228704151466e-05,
"loss": 4.4714,
"step": 923000
},
{
"epoch": 0.85,
"learning_rate": 5.772533191190047e-05,
"loss": 4.4669,
"step": 923500
},
{
"epoch": 0.85,
"learning_rate": 5.7702435119649476e-05,
"loss": 4.465,
"step": 924000
},
{
"epoch": 0.85,
"learning_rate": 5.767958412098299e-05,
"loss": 4.4707,
"step": 924500
},
{
"epoch": 0.85,
"learning_rate": 5.7656687328731994e-05,
"loss": 4.4746,
"step": 925000
},
{
"epoch": 0.85,
"learning_rate": 5.7633790536481005e-05,
"loss": 4.4606,
"step": 925500
},
{
"epoch": 0.85,
"learning_rate": 5.761089374423001e-05,
"loss": 4.458,
"step": 926000
},
{
"epoch": 0.85,
"learning_rate": 5.7587996951979015e-05,
"loss": 4.4583,
"step": 926500
},
{
"epoch": 0.85,
"learning_rate": 5.756510015972803e-05,
"loss": 4.4605,
"step": 927000
},
{
"epoch": 0.85,
"learning_rate": 5.754220336747703e-05,
"loss": 4.4662,
"step": 927500
},
{
"epoch": 0.85,
"learning_rate": 5.7519352368810545e-05,
"loss": 4.4753,
"step": 928000
},
{
"epoch": 0.85,
"learning_rate": 5.749645557655955e-05,
"loss": 4.462,
"step": 928500
},
{
"epoch": 0.85,
"learning_rate": 5.7473558784308555e-05,
"loss": 4.4548,
"step": 929000
},
{
"epoch": 0.85,
"learning_rate": 5.7450661992057567e-05,
"loss": 4.4704,
"step": 929500
},
{
"epoch": 0.85,
"learning_rate": 5.742776519980657e-05,
"loss": 4.4667,
"step": 930000
},
{
"epoch": 0.85,
"learning_rate": 5.740486840755558e-05,
"loss": 4.4559,
"step": 930500
},
{
"epoch": 0.85,
"learning_rate": 5.738197161530459e-05,
"loss": 4.4784,
"step": 931000
},
{
"epoch": 0.85,
"learning_rate": 5.7359074823053594e-05,
"loss": 4.4705,
"step": 931500
},
{
"epoch": 0.85,
"learning_rate": 5.7336223824387106e-05,
"loss": 4.4697,
"step": 932000
},
{
"epoch": 0.85,
"learning_rate": 5.731332703213611e-05,
"loss": 4.4533,
"step": 932500
},
{
"epoch": 0.85,
"learning_rate": 5.7290430239885116e-05,
"loss": 4.4718,
"step": 933000
},
{
"epoch": 0.85,
"learning_rate": 5.726753344763413e-05,
"loss": 4.4582,
"step": 933500
},
{
"epoch": 0.86,
"learning_rate": 5.7244682448967633e-05,
"loss": 4.4423,
"step": 934000
},
{
"epoch": 0.86,
"learning_rate": 5.7221785656716645e-05,
"loss": 4.4568,
"step": 934500
},
{
"epoch": 0.86,
"learning_rate": 5.719888886446565e-05,
"loss": 4.4628,
"step": 935000
},
{
"epoch": 0.86,
"learning_rate": 5.7175992072214655e-05,
"loss": 4.4638,
"step": 935500
},
{
"epoch": 0.86,
"learning_rate": 5.715318686713267e-05,
"loss": 4.4489,
"step": 936000
},
{
"epoch": 0.86,
"learning_rate": 5.7130290074881673e-05,
"loss": 4.4605,
"step": 936500
},
{
"epoch": 0.86,
"learning_rate": 5.7107439076215186e-05,
"loss": 4.4577,
"step": 937000
},
{
"epoch": 0.86,
"learning_rate": 5.708454228396419e-05,
"loss": 4.4657,
"step": 937500
},
{
"epoch": 0.86,
"learning_rate": 5.70616454917132e-05,
"loss": 4.4626,
"step": 938000
},
{
"epoch": 0.86,
"learning_rate": 5.703874869946221e-05,
"loss": 4.459,
"step": 938500
},
{
"epoch": 0.86,
"learning_rate": 5.701589770079572e-05,
"loss": 4.4595,
"step": 939000
},
{
"epoch": 0.86,
"learning_rate": 5.6993000908544725e-05,
"loss": 4.4684,
"step": 939500
},
{
"epoch": 0.86,
"learning_rate": 5.6970149909878224e-05,
"loss": 4.4629,
"step": 940000
},
{
"epoch": 0.86,
"learning_rate": 5.694725311762723e-05,
"loss": 4.4678,
"step": 940500
},
{
"epoch": 0.86,
"learning_rate": 5.692435632537624e-05,
"loss": 4.4511,
"step": 941000
},
{
"epoch": 0.86,
"learning_rate": 5.6901459533125246e-05,
"loss": 4.4574,
"step": 941500
},
{
"epoch": 0.86,
"learning_rate": 5.687856274087425e-05,
"loss": 4.4715,
"step": 942000
},
{
"epoch": 0.86,
"learning_rate": 5.685566594862326e-05,
"loss": 4.4559,
"step": 942500
},
{
"epoch": 0.86,
"learning_rate": 5.683276915637227e-05,
"loss": 4.4523,
"step": 943000
},
{
"epoch": 0.86,
"learning_rate": 5.680987236412127e-05,
"loss": 4.4599,
"step": 943500
},
{
"epoch": 0.86,
"learning_rate": 5.6786975571870285e-05,
"loss": 4.4656,
"step": 944000
},
{
"epoch": 0.87,
"learning_rate": 5.676407877961929e-05,
"loss": 4.4454,
"step": 944500
},
{
"epoch": 0.87,
"learning_rate": 5.6741181987368295e-05,
"loss": 4.462,
"step": 945000
},
{
"epoch": 0.87,
"learning_rate": 5.6718285195117307e-05,
"loss": 4.463,
"step": 945500
},
{
"epoch": 0.87,
"learning_rate": 5.669538840286631e-05,
"loss": 4.464,
"step": 946000
},
{
"epoch": 0.87,
"learning_rate": 5.6672537404199824e-05,
"loss": 4.4504,
"step": 946500
},
{
"epoch": 0.87,
"learning_rate": 5.664964061194883e-05,
"loss": 4.4638,
"step": 947000
},
{
"epoch": 0.87,
"learning_rate": 5.6626743819697834e-05,
"loss": 4.4574,
"step": 947500
},
{
"epoch": 0.87,
"learning_rate": 5.6603847027446846e-05,
"loss": 4.4484,
"step": 948000
},
{
"epoch": 0.87,
"learning_rate": 5.658095023519585e-05,
"loss": 4.4596,
"step": 948500
},
{
"epoch": 0.87,
"learning_rate": 5.6558053442944856e-05,
"loss": 4.4733,
"step": 949000
},
{
"epoch": 0.87,
"learning_rate": 5.653515665069387e-05,
"loss": 4.4529,
"step": 949500
},
{
"epoch": 0.87,
"learning_rate": 5.6512305652027373e-05,
"loss": 4.4576,
"step": 950000
},
{
"epoch": 0.87,
"learning_rate": 5.6489408859776385e-05,
"loss": 4.462,
"step": 950500
},
{
"epoch": 0.87,
"learning_rate": 5.646651206752539e-05,
"loss": 4.4558,
"step": 951000
},
{
"epoch": 0.87,
"learning_rate": 5.6443615275274395e-05,
"loss": 4.4614,
"step": 951500
},
{
"epoch": 0.87,
"learning_rate": 5.642071848302341e-05,
"loss": 4.4527,
"step": 952000
},
{
"epoch": 0.87,
"learning_rate": 5.639782169077241e-05,
"loss": 4.4569,
"step": 952500
},
{
"epoch": 0.87,
"learning_rate": 5.637492489852142e-05,
"loss": 4.4526,
"step": 953000
},
{
"epoch": 0.87,
"learning_rate": 5.635202810627043e-05,
"loss": 4.4473,
"step": 953500
},
{
"epoch": 0.87,
"learning_rate": 5.6329177107603935e-05,
"loss": 4.4534,
"step": 954000
},
{
"epoch": 0.87,
"learning_rate": 5.6306280315352947e-05,
"loss": 4.468,
"step": 954500
},
{
"epoch": 0.87,
"learning_rate": 5.628338352310195e-05,
"loss": 4.4482,
"step": 955000
},
{
"epoch": 0.88,
"learning_rate": 5.626048673085096e-05,
"loss": 4.4639,
"step": 955500
},
{
"epoch": 0.88,
"learning_rate": 5.623758993859997e-05,
"loss": 4.4607,
"step": 956000
},
{
"epoch": 0.88,
"learning_rate": 5.6214784733517975e-05,
"loss": 4.4634,
"step": 956500
},
{
"epoch": 0.88,
"learning_rate": 5.6191887941266986e-05,
"loss": 4.4818,
"step": 957000
},
{
"epoch": 0.88,
"learning_rate": 5.616899114901599e-05,
"loss": 4.456,
"step": 957500
},
{
"epoch": 0.88,
"learning_rate": 5.6146094356764997e-05,
"loss": 4.4769,
"step": 958000
},
{
"epoch": 0.88,
"learning_rate": 5.612319756451401e-05,
"loss": 4.4402,
"step": 958500
},
{
"epoch": 0.88,
"learning_rate": 5.6100300772263013e-05,
"loss": 4.4515,
"step": 959000
},
{
"epoch": 0.88,
"learning_rate": 5.6077403980012025e-05,
"loss": 4.4555,
"step": 959500
},
{
"epoch": 0.88,
"learning_rate": 5.605450718776103e-05,
"loss": 4.4631,
"step": 960000
},
{
"epoch": 0.88,
"learning_rate": 5.603170198267904e-05,
"loss": 4.4536,
"step": 960500
},
{
"epoch": 0.88,
"learning_rate": 5.600880519042805e-05,
"loss": 4.4558,
"step": 961000
},
{
"epoch": 0.88,
"learning_rate": 5.598590839817705e-05,
"loss": 4.4659,
"step": 961500
},
{
"epoch": 0.88,
"learning_rate": 5.5963011605926065e-05,
"loss": 4.4566,
"step": 962000
},
{
"epoch": 0.88,
"learning_rate": 5.594011481367507e-05,
"loss": 4.4548,
"step": 962500
},
{
"epoch": 0.88,
"learning_rate": 5.5917218021424075e-05,
"loss": 4.4508,
"step": 963000
},
{
"epoch": 0.88,
"learning_rate": 5.589436702275759e-05,
"loss": 4.4575,
"step": 963500
},
{
"epoch": 0.88,
"learning_rate": 5.587147023050659e-05,
"loss": 4.4541,
"step": 964000
},
{
"epoch": 0.88,
"learning_rate": 5.584857343825559e-05,
"loss": 4.4524,
"step": 964500
},
{
"epoch": 0.88,
"learning_rate": 5.5825676646004596e-05,
"loss": 4.4607,
"step": 965000
},
{
"epoch": 0.88,
"learning_rate": 5.580277985375361e-05,
"loss": 4.4457,
"step": 965500
},
{
"epoch": 0.88,
"learning_rate": 5.577988306150261e-05,
"loss": 4.4556,
"step": 966000
},
{
"epoch": 0.89,
"learning_rate": 5.575698626925162e-05,
"loss": 4.465,
"step": 966500
},
{
"epoch": 0.89,
"learning_rate": 5.573408947700063e-05,
"loss": 4.4595,
"step": 967000
},
{
"epoch": 0.89,
"learning_rate": 5.5711192684749635e-05,
"loss": 4.4535,
"step": 967500
},
{
"epoch": 0.89,
"learning_rate": 5.568829589249864e-05,
"loss": 4.4625,
"step": 968000
},
{
"epoch": 0.89,
"learning_rate": 5.566539910024765e-05,
"loss": 4.4656,
"step": 968500
},
{
"epoch": 0.89,
"learning_rate": 5.564254810158116e-05,
"loss": 4.4637,
"step": 969000
},
{
"epoch": 0.89,
"learning_rate": 5.561965130933017e-05,
"loss": 4.4502,
"step": 969500
},
{
"epoch": 0.89,
"learning_rate": 5.5596754517079174e-05,
"loss": 4.4569,
"step": 970000
},
{
"epoch": 0.89,
"learning_rate": 5.557385772482818e-05,
"loss": 4.4592,
"step": 970500
},
{
"epoch": 0.89,
"learning_rate": 5.555096093257719e-05,
"loss": 4.4659,
"step": 971000
},
{
"epoch": 0.89,
"learning_rate": 5.55281099339107e-05,
"loss": 4.4468,
"step": 971500
},
{
"epoch": 0.89,
"learning_rate": 5.550521314165971e-05,
"loss": 4.4562,
"step": 972000
},
{
"epoch": 0.89,
"learning_rate": 5.5482316349408713e-05,
"loss": 4.4603,
"step": 972500
},
{
"epoch": 0.89,
"learning_rate": 5.545941955715772e-05,
"loss": 4.4577,
"step": 973000
},
{
"epoch": 0.89,
"learning_rate": 5.543652276490673e-05,
"loss": 4.4514,
"step": 973500
},
{
"epoch": 0.89,
"learning_rate": 5.5413671766240236e-05,
"loss": 4.4476,
"step": 974000
},
{
"epoch": 0.89,
"learning_rate": 5.539077497398925e-05,
"loss": 4.4569,
"step": 974500
},
{
"epoch": 0.89,
"learning_rate": 5.536787818173825e-05,
"loss": 4.4554,
"step": 975000
},
{
"epoch": 0.89,
"learning_rate": 5.5345027183071765e-05,
"loss": 4.4596,
"step": 975500
},
{
"epoch": 0.89,
"learning_rate": 5.532213039082077e-05,
"loss": 4.4515,
"step": 976000
},
{
"epoch": 0.89,
"learning_rate": 5.5299233598569775e-05,
"loss": 4.4624,
"step": 976500
},
{
"epoch": 0.89,
"learning_rate": 5.527633680631879e-05,
"loss": 4.4495,
"step": 977000
},
{
"epoch": 0.9,
"learning_rate": 5.525344001406779e-05,
"loss": 4.4549,
"step": 977500
},
{
"epoch": 0.9,
"learning_rate": 5.52305432218168e-05,
"loss": 4.4513,
"step": 978000
},
{
"epoch": 0.9,
"learning_rate": 5.520769222315031e-05,
"loss": 4.4609,
"step": 978500
},
{
"epoch": 0.9,
"learning_rate": 5.5184795430899315e-05,
"loss": 4.4639,
"step": 979000
},
{
"epoch": 0.9,
"learning_rate": 5.5161898638648327e-05,
"loss": 4.4492,
"step": 979500
},
{
"epoch": 0.9,
"learning_rate": 5.513904763998183e-05,
"loss": 4.4472,
"step": 980000
},
{
"epoch": 0.9,
"learning_rate": 5.5116150847730844e-05,
"loss": 4.4602,
"step": 980500
},
{
"epoch": 0.9,
"learning_rate": 5.509325405547985e-05,
"loss": 4.4486,
"step": 981000
},
{
"epoch": 0.9,
"learning_rate": 5.5070357263228854e-05,
"loss": 4.4524,
"step": 981500
},
{
"epoch": 0.9,
"learning_rate": 5.5047460470977866e-05,
"loss": 4.4546,
"step": 982000
},
{
"epoch": 0.9,
"learning_rate": 5.502456367872687e-05,
"loss": 4.4671,
"step": 982500
},
{
"epoch": 0.9,
"learning_rate": 5.5001666886475876e-05,
"loss": 4.4674,
"step": 983000
},
{
"epoch": 0.9,
"learning_rate": 5.497877009422489e-05,
"loss": 4.4553,
"step": 983500
},
{
"epoch": 0.9,
"learning_rate": 5.495587330197389e-05,
"loss": 4.4482,
"step": 984000
},
{
"epoch": 0.9,
"learning_rate": 5.49329765097229e-05,
"loss": 4.4641,
"step": 984500
},
{
"epoch": 0.9,
"learning_rate": 5.491007971747191e-05,
"loss": 4.4395,
"step": 985000
},
{
"epoch": 0.9,
"learning_rate": 5.4887228718805415e-05,
"loss": 4.4431,
"step": 985500
},
{
"epoch": 0.9,
"learning_rate": 5.486433192655443e-05,
"loss": 4.4617,
"step": 986000
},
{
"epoch": 0.9,
"learning_rate": 5.484143513430343e-05,
"loss": 4.4611,
"step": 986500
},
{
"epoch": 0.9,
"learning_rate": 5.481853834205244e-05,
"loss": 4.4528,
"step": 987000
},
{
"epoch": 0.9,
"learning_rate": 5.479564154980145e-05,
"loss": 4.4518,
"step": 987500
},
{
"epoch": 0.9,
"learning_rate": 5.4772790551134955e-05,
"loss": 4.4576,
"step": 988000
},
{
"epoch": 0.91,
"learning_rate": 5.4749893758883966e-05,
"loss": 4.4528,
"step": 988500
},
{
"epoch": 0.91,
"learning_rate": 5.472699696663297e-05,
"loss": 4.4476,
"step": 989000
},
{
"epoch": 0.91,
"learning_rate": 5.470410017438197e-05,
"loss": 4.4697,
"step": 989500
},
{
"epoch": 0.91,
"learning_rate": 5.4681203382130975e-05,
"loss": 4.4576,
"step": 990000
},
{
"epoch": 0.91,
"learning_rate": 5.465830658987998e-05,
"loss": 4.446,
"step": 990500
},
{
"epoch": 0.91,
"learning_rate": 5.463540979762899e-05,
"loss": 4.4554,
"step": 991000
},
{
"epoch": 0.91,
"learning_rate": 5.46125587989625e-05,
"loss": 4.4598,
"step": 991500
},
{
"epoch": 0.91,
"learning_rate": 5.458966200671151e-05,
"loss": 4.4521,
"step": 992000
},
{
"epoch": 0.91,
"learning_rate": 5.4566765214460514e-05,
"loss": 4.4632,
"step": 992500
},
{
"epoch": 0.91,
"learning_rate": 5.454386842220952e-05,
"loss": 4.4533,
"step": 993000
},
{
"epoch": 0.91,
"learning_rate": 5.452097162995853e-05,
"loss": 4.4442,
"step": 993500
},
{
"epoch": 0.91,
"learning_rate": 5.4498074837707536e-05,
"loss": 4.462,
"step": 994000
},
{
"epoch": 0.91,
"learning_rate": 5.447517804545654e-05,
"loss": 4.4586,
"step": 994500
},
{
"epoch": 0.91,
"learning_rate": 5.445228125320555e-05,
"loss": 4.4369,
"step": 995000
},
{
"epoch": 0.91,
"learning_rate": 5.442938446095456e-05,
"loss": 4.4454,
"step": 995500
},
{
"epoch": 0.91,
"learning_rate": 5.440657925587257e-05,
"loss": 4.4423,
"step": 996000
},
{
"epoch": 0.91,
"learning_rate": 5.4383682463621576e-05,
"loss": 4.4569,
"step": 996500
},
{
"epoch": 0.91,
"learning_rate": 5.436078567137058e-05,
"loss": 4.4554,
"step": 997000
},
{
"epoch": 0.91,
"learning_rate": 5.433788887911959e-05,
"loss": 4.469,
"step": 997500
},
{
"epoch": 0.91,
"learning_rate": 5.43149920868686e-05,
"loss": 4.4432,
"step": 998000
},
{
"epoch": 0.91,
"learning_rate": 5.429209529461761e-05,
"loss": 4.4512,
"step": 998500
},
{
"epoch": 0.91,
"learning_rate": 5.4269198502366615e-05,
"loss": 4.4535,
"step": 999000
},
{
"epoch": 0.92,
"learning_rate": 5.424634750370012e-05,
"loss": 4.4543,
"step": 999500
},
{
"epoch": 0.92,
"learning_rate": 5.422345071144913e-05,
"loss": 4.4488,
"step": 1000000
},
{
"epoch": 0.92,
"learning_rate": 5.420055391919814e-05,
"loss": 4.4444,
"step": 1000500
},
{
"epoch": 0.92,
"learning_rate": 5.417765712694714e-05,
"loss": 4.4342,
"step": 1001000
},
{
"epoch": 0.92,
"learning_rate": 5.4154760334696154e-05,
"loss": 4.4604,
"step": 1001500
},
{
"epoch": 0.92,
"learning_rate": 5.413190933602966e-05,
"loss": 4.4585,
"step": 1002000
},
{
"epoch": 0.92,
"learning_rate": 5.410901254377867e-05,
"loss": 4.4475,
"step": 1002500
},
{
"epoch": 0.92,
"learning_rate": 5.4086115751527677e-05,
"loss": 4.464,
"step": 1003000
},
{
"epoch": 0.92,
"learning_rate": 5.406321895927668e-05,
"loss": 4.443,
"step": 1003500
},
{
"epoch": 0.92,
"learning_rate": 5.4040322167025693e-05,
"loss": 4.4428,
"step": 1004000
},
{
"epoch": 0.92,
"learning_rate": 5.40174253747747e-05,
"loss": 4.4498,
"step": 1004500
},
{
"epoch": 0.92,
"learning_rate": 5.3994528582523704e-05,
"loss": 4.4552,
"step": 1005000
},
{
"epoch": 0.92,
"learning_rate": 5.3971631790272715e-05,
"loss": 4.4661,
"step": 1005500
},
{
"epoch": 0.92,
"learning_rate": 5.394878079160622e-05,
"loss": 4.4469,
"step": 1006000
},
{
"epoch": 0.92,
"learning_rate": 5.392588399935523e-05,
"loss": 4.4539,
"step": 1006500
},
{
"epoch": 0.92,
"learning_rate": 5.390298720710424e-05,
"loss": 4.4516,
"step": 1007000
},
{
"epoch": 0.92,
"learning_rate": 5.388009041485324e-05,
"loss": 4.4669,
"step": 1007500
},
{
"epoch": 0.92,
"learning_rate": 5.3857285209771256e-05,
"loss": 4.4489,
"step": 1008000
},
{
"epoch": 0.92,
"learning_rate": 5.383438841752027e-05,
"loss": 4.4536,
"step": 1008500
},
{
"epoch": 0.92,
"learning_rate": 5.381149162526927e-05,
"loss": 4.444,
"step": 1009000
},
{
"epoch": 0.92,
"learning_rate": 5.378859483301828e-05,
"loss": 4.4644,
"step": 1009500
},
{
"epoch": 0.93,
"learning_rate": 5.376574383435179e-05,
"loss": 4.4426,
"step": 1010000
},
{
"epoch": 0.93,
"learning_rate": 5.3742847042100795e-05,
"loss": 4.462,
"step": 1010500
},
{
"epoch": 0.93,
"learning_rate": 5.371999604343431e-05,
"loss": 4.4525,
"step": 1011000
},
{
"epoch": 0.93,
"learning_rate": 5.369709925118331e-05,
"loss": 4.4567,
"step": 1011500
},
{
"epoch": 0.93,
"learning_rate": 5.3674202458932325e-05,
"loss": 4.4598,
"step": 1012000
},
{
"epoch": 0.93,
"learning_rate": 5.365130566668133e-05,
"loss": 4.4422,
"step": 1012500
},
{
"epoch": 0.93,
"learning_rate": 5.3628408874430335e-05,
"loss": 4.4479,
"step": 1013000
},
{
"epoch": 0.93,
"learning_rate": 5.360551208217933e-05,
"loss": 4.4411,
"step": 1013500
},
{
"epoch": 0.93,
"learning_rate": 5.358261528992834e-05,
"loss": 4.4426,
"step": 1014000
},
{
"epoch": 0.93,
"learning_rate": 5.355971849767735e-05,
"loss": 4.4391,
"step": 1014500
},
{
"epoch": 0.93,
"learning_rate": 5.3536821705426355e-05,
"loss": 4.4611,
"step": 1015000
},
{
"epoch": 0.93,
"learning_rate": 5.351392491317536e-05,
"loss": 4.4463,
"step": 1015500
},
{
"epoch": 0.93,
"learning_rate": 5.349107391450887e-05,
"loss": 4.4561,
"step": 1016000
},
{
"epoch": 0.93,
"learning_rate": 5.346822291584238e-05,
"loss": 4.4545,
"step": 1016500
},
{
"epoch": 0.93,
"learning_rate": 5.344532612359139e-05,
"loss": 4.4427,
"step": 1017000
},
{
"epoch": 0.93,
"learning_rate": 5.3422429331340395e-05,
"loss": 4.4508,
"step": 1017500
},
{
"epoch": 0.93,
"learning_rate": 5.33995325390894e-05,
"loss": 4.4424,
"step": 1018000
},
{
"epoch": 0.93,
"learning_rate": 5.337663574683841e-05,
"loss": 4.4367,
"step": 1018500
},
{
"epoch": 0.93,
"learning_rate": 5.3353738954587417e-05,
"loss": 4.4444,
"step": 1019000
},
{
"epoch": 0.93,
"learning_rate": 5.333084216233643e-05,
"loss": 4.4531,
"step": 1019500
},
{
"epoch": 0.93,
"learning_rate": 5.3307945370085433e-05,
"loss": 4.4539,
"step": 1020000
},
{
"epoch": 0.93,
"learning_rate": 5.328504857783444e-05,
"loss": 4.4498,
"step": 1020500
},
{
"epoch": 0.94,
"learning_rate": 5.326215178558345e-05,
"loss": 4.4505,
"step": 1021000
},
{
"epoch": 0.94,
"learning_rate": 5.3239300786916956e-05,
"loss": 4.4424,
"step": 1021500
},
{
"epoch": 0.94,
"learning_rate": 5.321640399466596e-05,
"loss": 4.4595,
"step": 1022000
},
{
"epoch": 0.94,
"learning_rate": 5.319350720241497e-05,
"loss": 4.4449,
"step": 1022500
},
{
"epoch": 0.94,
"learning_rate": 5.317061041016398e-05,
"loss": 4.4428,
"step": 1023000
},
{
"epoch": 0.94,
"learning_rate": 5.314771361791299e-05,
"loss": 4.4465,
"step": 1023500
},
{
"epoch": 0.94,
"learning_rate": 5.3124816825661995e-05,
"loss": 4.4509,
"step": 1024000
},
{
"epoch": 0.94,
"learning_rate": 5.3101920033411e-05,
"loss": 4.4414,
"step": 1024500
},
{
"epoch": 0.94,
"learning_rate": 5.307906903474451e-05,
"loss": 4.452,
"step": 1025000
},
{
"epoch": 0.94,
"learning_rate": 5.305617224249352e-05,
"loss": 4.4565,
"step": 1025500
},
{
"epoch": 0.94,
"learning_rate": 5.303327545024252e-05,
"loss": 4.4598,
"step": 1026000
},
{
"epoch": 0.94,
"learning_rate": 5.3010378657991534e-05,
"loss": 4.4436,
"step": 1026500
},
{
"epoch": 0.94,
"learning_rate": 5.298748186574054e-05,
"loss": 4.4554,
"step": 1027000
},
{
"epoch": 0.94,
"learning_rate": 5.296458507348955e-05,
"loss": 4.4454,
"step": 1027500
},
{
"epoch": 0.94,
"learning_rate": 5.2941734074823057e-05,
"loss": 4.4602,
"step": 1028000
},
{
"epoch": 0.94,
"learning_rate": 5.291883728257206e-05,
"loss": 4.4431,
"step": 1028500
},
{
"epoch": 0.94,
"learning_rate": 5.2895940490321073e-05,
"loss": 4.4464,
"step": 1029000
},
{
"epoch": 0.94,
"learning_rate": 5.287304369807008e-05,
"loss": 4.4425,
"step": 1029500
},
{
"epoch": 0.94,
"learning_rate": 5.2850146905819084e-05,
"loss": 4.4396,
"step": 1030000
},
{
"epoch": 0.94,
"learning_rate": 5.2827250113568095e-05,
"loss": 4.4515,
"step": 1030500
},
{
"epoch": 0.94,
"learning_rate": 5.28043991149016e-05,
"loss": 4.4395,
"step": 1031000
},
{
"epoch": 0.94,
"learning_rate": 5.278150232265061e-05,
"loss": 4.453,
"step": 1031500
},
{
"epoch": 0.95,
"learning_rate": 5.275860553039962e-05,
"loss": 4.4417,
"step": 1032000
},
{
"epoch": 0.95,
"learning_rate": 5.273570873814862e-05,
"loss": 4.4546,
"step": 1032500
},
{
"epoch": 0.95,
"learning_rate": 5.2712811945897635e-05,
"loss": 4.4428,
"step": 1033000
},
{
"epoch": 0.95,
"learning_rate": 5.268996094723114e-05,
"loss": 4.4452,
"step": 1033500
},
{
"epoch": 0.95,
"learning_rate": 5.266706415498015e-05,
"loss": 4.4428,
"step": 1034000
},
{
"epoch": 0.95,
"learning_rate": 5.264416736272916e-05,
"loss": 4.445,
"step": 1034500
},
{
"epoch": 0.95,
"learning_rate": 5.262127057047816e-05,
"loss": 4.4376,
"step": 1035000
},
{
"epoch": 0.95,
"learning_rate": 5.2598373778227174e-05,
"loss": 4.4404,
"step": 1035500
},
{
"epoch": 0.95,
"learning_rate": 5.257547698597618e-05,
"loss": 4.4561,
"step": 1036000
},
{
"epoch": 0.95,
"learning_rate": 5.2552580193725184e-05,
"loss": 4.4414,
"step": 1036500
},
{
"epoch": 0.95,
"learning_rate": 5.2529683401474196e-05,
"loss": 4.4452,
"step": 1037000
},
{
"epoch": 0.95,
"learning_rate": 5.25067866092232e-05,
"loss": 4.4396,
"step": 1037500
},
{
"epoch": 0.95,
"learning_rate": 5.248393561055671e-05,
"loss": 4.4573,
"step": 1038000
},
{
"epoch": 0.95,
"learning_rate": 5.246108461189021e-05,
"loss": 4.4485,
"step": 1038500
},
{
"epoch": 0.95,
"learning_rate": 5.243818781963922e-05,
"loss": 4.4586,
"step": 1039000
},
{
"epoch": 0.95,
"learning_rate": 5.241529102738822e-05,
"loss": 4.4462,
"step": 1039500
},
{
"epoch": 0.95,
"learning_rate": 5.2392394235137234e-05,
"loss": 4.4549,
"step": 1040000
},
{
"epoch": 0.95,
"learning_rate": 5.236949744288624e-05,
"loss": 4.4559,
"step": 1040500
},
{
"epoch": 0.95,
"learning_rate": 5.2346600650635244e-05,
"loss": 4.4475,
"step": 1041000
},
{
"epoch": 0.95,
"learning_rate": 5.2323703858384256e-05,
"loss": 4.438,
"step": 1041500
},
{
"epoch": 0.95,
"learning_rate": 5.230080706613326e-05,
"loss": 4.4259,
"step": 1042000
},
{
"epoch": 0.95,
"learning_rate": 5.2277956067466773e-05,
"loss": 4.4481,
"step": 1042500
},
{
"epoch": 0.96,
"learning_rate": 5.225505927521578e-05,
"loss": 4.4393,
"step": 1043000
},
{
"epoch": 0.96,
"learning_rate": 5.2232162482964784e-05,
"loss": 4.4439,
"step": 1043500
},
{
"epoch": 0.96,
"learning_rate": 5.2209311484298296e-05,
"loss": 4.4451,
"step": 1044000
},
{
"epoch": 0.96,
"learning_rate": 5.21864146920473e-05,
"loss": 4.46,
"step": 1044500
},
{
"epoch": 0.96,
"learning_rate": 5.216351789979631e-05,
"loss": 4.4622,
"step": 1045000
},
{
"epoch": 0.96,
"learning_rate": 5.214062110754532e-05,
"loss": 4.4422,
"step": 1045500
},
{
"epoch": 0.96,
"learning_rate": 5.211777010887883e-05,
"loss": 4.4416,
"step": 1046000
},
{
"epoch": 0.96,
"learning_rate": 5.2094873316627835e-05,
"loss": 4.4496,
"step": 1046500
},
{
"epoch": 0.96,
"learning_rate": 5.207197652437684e-05,
"loss": 4.4364,
"step": 1047000
},
{
"epoch": 0.96,
"learning_rate": 5.204907973212585e-05,
"loss": 4.4405,
"step": 1047500
},
{
"epoch": 0.96,
"learning_rate": 5.202618293987486e-05,
"loss": 4.4379,
"step": 1048000
},
{
"epoch": 0.96,
"learning_rate": 5.200328614762386e-05,
"loss": 4.4444,
"step": 1048500
},
{
"epoch": 0.96,
"learning_rate": 5.1980389355372874e-05,
"loss": 4.4372,
"step": 1049000
},
{
"epoch": 0.96,
"learning_rate": 5.195749256312188e-05,
"loss": 4.448,
"step": 1049500
},
{
"epoch": 0.96,
"learning_rate": 5.1934595770870884e-05,
"loss": 4.4378,
"step": 1050000
},
{
"epoch": 0.96,
"learning_rate": 5.1911698978619896e-05,
"loss": 4.4294,
"step": 1050500
},
{
"epoch": 0.96,
"learning_rate": 5.18888479799534e-05,
"loss": 4.4293,
"step": 1051000
},
{
"epoch": 0.96,
"learning_rate": 5.1865951187702413e-05,
"loss": 4.4448,
"step": 1051500
},
{
"epoch": 0.96,
"learning_rate": 5.184305439545142e-05,
"loss": 4.4359,
"step": 1052000
},
{
"epoch": 0.96,
"learning_rate": 5.1820157603200424e-05,
"loss": 4.4554,
"step": 1052500
},
{
"epoch": 0.96,
"learning_rate": 5.1797260810949435e-05,
"loss": 4.433,
"step": 1053000
},
{
"epoch": 0.96,
"learning_rate": 5.177436401869844e-05,
"loss": 4.434,
"step": 1053500
},
{
"epoch": 0.97,
"learning_rate": 5.1751467226447445e-05,
"loss": 4.4396,
"step": 1054000
},
{
"epoch": 0.97,
"learning_rate": 5.172861622778096e-05,
"loss": 4.4493,
"step": 1054500
},
{
"epoch": 0.97,
"learning_rate": 5.170571943552996e-05,
"loss": 4.4457,
"step": 1055000
},
{
"epoch": 0.97,
"learning_rate": 5.1682822643278975e-05,
"loss": 4.4411,
"step": 1055500
},
{
"epoch": 0.97,
"learning_rate": 5.165992585102798e-05,
"loss": 4.4528,
"step": 1056000
},
{
"epoch": 0.97,
"learning_rate": 5.1637029058776985e-05,
"loss": 4.4448,
"step": 1056500
},
{
"epoch": 0.97,
"learning_rate": 5.16141780601105e-05,
"loss": 4.4453,
"step": 1057000
},
{
"epoch": 0.97,
"learning_rate": 5.15912812678595e-05,
"loss": 4.4505,
"step": 1057500
},
{
"epoch": 0.97,
"learning_rate": 5.1568384475608514e-05,
"loss": 4.4449,
"step": 1058000
},
{
"epoch": 0.97,
"learning_rate": 5.154548768335752e-05,
"loss": 4.4395,
"step": 1058500
},
{
"epoch": 0.97,
"learning_rate": 5.1522636684691025e-05,
"loss": 4.4428,
"step": 1059000
},
{
"epoch": 0.97,
"learning_rate": 5.1499739892440037e-05,
"loss": 4.4515,
"step": 1059500
},
{
"epoch": 0.97,
"learning_rate": 5.147684310018904e-05,
"loss": 4.4485,
"step": 1060000
},
{
"epoch": 0.97,
"learning_rate": 5.1453946307938053e-05,
"loss": 4.4432,
"step": 1060500
},
{
"epoch": 0.97,
"learning_rate": 5.143104951568706e-05,
"loss": 4.4421,
"step": 1061000
},
{
"epoch": 0.97,
"learning_rate": 5.1408152723436063e-05,
"loss": 4.4543,
"step": 1061500
},
{
"epoch": 0.97,
"learning_rate": 5.1385255931185075e-05,
"loss": 4.4538,
"step": 1062000
},
{
"epoch": 0.97,
"learning_rate": 5.136235913893408e-05,
"loss": 4.4346,
"step": 1062500
},
{
"epoch": 0.97,
"learning_rate": 5.133950814026758e-05,
"loss": 4.44,
"step": 1063000
},
{
"epoch": 0.97,
"learning_rate": 5.1316611348016584e-05,
"loss": 4.4439,
"step": 1063500
},
{
"epoch": 0.97,
"learning_rate": 5.1293714555765596e-05,
"loss": 4.4431,
"step": 1064000
},
{
"epoch": 0.97,
"learning_rate": 5.12708177635146e-05,
"loss": 4.4528,
"step": 1064500
},
{
"epoch": 0.98,
"learning_rate": 5.1247920971263606e-05,
"loss": 4.4507,
"step": 1065000
},
{
"epoch": 0.98,
"learning_rate": 5.122506997259712e-05,
"loss": 4.4394,
"step": 1065500
},
{
"epoch": 0.98,
"learning_rate": 5.1202173180346124e-05,
"loss": 4.4437,
"step": 1066000
},
{
"epoch": 0.98,
"learning_rate": 5.1179276388095135e-05,
"loss": 4.4177,
"step": 1066500
},
{
"epoch": 0.98,
"learning_rate": 5.115637959584414e-05,
"loss": 4.4535,
"step": 1067000
},
{
"epoch": 0.98,
"learning_rate": 5.1133482803593145e-05,
"loss": 4.438,
"step": 1067500
},
{
"epoch": 0.98,
"learning_rate": 5.111063180492666e-05,
"loss": 4.4439,
"step": 1068000
},
{
"epoch": 0.98,
"learning_rate": 5.108773501267566e-05,
"loss": 4.4428,
"step": 1068500
},
{
"epoch": 0.98,
"learning_rate": 5.106483822042467e-05,
"loss": 4.4386,
"step": 1069000
},
{
"epoch": 0.98,
"learning_rate": 5.104194142817368e-05,
"loss": 4.4486,
"step": 1069500
},
{
"epoch": 0.98,
"learning_rate": 5.1019044635922685e-05,
"loss": 4.434,
"step": 1070000
},
{
"epoch": 0.98,
"learning_rate": 5.09961478436717e-05,
"loss": 4.4438,
"step": 1070500
},
{
"epoch": 0.98,
"learning_rate": 5.09732510514207e-05,
"loss": 4.4502,
"step": 1071000
},
{
"epoch": 0.98,
"learning_rate": 5.095035425916971e-05,
"loss": 4.4403,
"step": 1071500
},
{
"epoch": 0.98,
"learning_rate": 5.092754905408772e-05,
"loss": 4.4406,
"step": 1072000
},
{
"epoch": 0.98,
"learning_rate": 5.0904652261836725e-05,
"loss": 4.4258,
"step": 1072500
},
{
"epoch": 0.98,
"learning_rate": 5.0881755469585737e-05,
"loss": 4.4436,
"step": 1073000
},
{
"epoch": 0.98,
"learning_rate": 5.085885867733474e-05,
"loss": 4.4419,
"step": 1073500
},
{
"epoch": 0.98,
"learning_rate": 5.083596188508375e-05,
"loss": 4.439,
"step": 1074000
},
{
"epoch": 0.98,
"learning_rate": 5.081311088641726e-05,
"loss": 4.4464,
"step": 1074500
},
{
"epoch": 0.98,
"learning_rate": 5.0790214094166264e-05,
"loss": 4.4401,
"step": 1075000
},
{
"epoch": 0.99,
"learning_rate": 5.0767317301915276e-05,
"loss": 4.4437,
"step": 1075500
},
{
"epoch": 0.99,
"learning_rate": 5.074442050966428e-05,
"loss": 4.4422,
"step": 1076000
},
{
"epoch": 0.99,
"learning_rate": 5.0721523717413286e-05,
"loss": 4.4427,
"step": 1076500
},
{
"epoch": 0.99,
"learning_rate": 5.06986269251623e-05,
"loss": 4.4321,
"step": 1077000
},
{
"epoch": 0.99,
"learning_rate": 5.06757301329113e-05,
"loss": 4.4389,
"step": 1077500
},
{
"epoch": 0.99,
"learning_rate": 5.065283334066031e-05,
"loss": 4.445,
"step": 1078000
},
{
"epoch": 0.99,
"learning_rate": 5.062993654840932e-05,
"loss": 4.4274,
"step": 1078500
},
{
"epoch": 0.99,
"learning_rate": 5.0607085549742825e-05,
"loss": 4.4375,
"step": 1079000
},
{
"epoch": 0.99,
"learning_rate": 5.058418875749184e-05,
"loss": 4.4407,
"step": 1079500
},
{
"epoch": 0.99,
"learning_rate": 5.056129196524084e-05,
"loss": 4.4448,
"step": 1080000
},
{
"epoch": 0.99,
"learning_rate": 5.0538440966574355e-05,
"loss": 4.4387,
"step": 1080500
},
{
"epoch": 0.99,
"learning_rate": 5.051554417432336e-05,
"loss": 4.4379,
"step": 1081000
},
{
"epoch": 0.99,
"learning_rate": 5.0492647382072365e-05,
"loss": 4.429,
"step": 1081500
},
{
"epoch": 0.99,
"learning_rate": 5.0469750589821377e-05,
"loss": 4.4272,
"step": 1082000
},
{
"epoch": 0.99,
"learning_rate": 5.044685379757038e-05,
"loss": 4.4356,
"step": 1082500
},
{
"epoch": 0.99,
"learning_rate": 5.042395700531939e-05,
"loss": 4.4389,
"step": 1083000
},
{
"epoch": 0.99,
"learning_rate": 5.04011060066529e-05,
"loss": 4.4396,
"step": 1083500
},
{
"epoch": 0.99,
"learning_rate": 5.0378209214401904e-05,
"loss": 4.4335,
"step": 1084000
},
{
"epoch": 0.99,
"learning_rate": 5.0355312422150916e-05,
"loss": 4.4456,
"step": 1084500
},
{
"epoch": 0.99,
"learning_rate": 5.033241562989992e-05,
"loss": 4.4385,
"step": 1085000
},
{
"epoch": 0.99,
"learning_rate": 5.0309518837648926e-05,
"loss": 4.4353,
"step": 1085500
},
{
"epoch": 0.99,
"learning_rate": 5.028666783898244e-05,
"loss": 4.4481,
"step": 1086000
},
{
"epoch": 1.0,
"learning_rate": 5.0263771046731443e-05,
"loss": 4.4259,
"step": 1086500
},
{
"epoch": 1.0,
"learning_rate": 5.0240874254480455e-05,
"loss": 4.4314,
"step": 1087000
},
{
"epoch": 1.0,
"learning_rate": 5.021797746222945e-05,
"loss": 4.4511,
"step": 1087500
},
{
"epoch": 1.0,
"learning_rate": 5.019508066997846e-05,
"loss": 4.437,
"step": 1088000
},
{
"epoch": 1.0,
"learning_rate": 5.0172183877727464e-05,
"loss": 4.4348,
"step": 1088500
},
{
"epoch": 1.0,
"learning_rate": 5.014928708547647e-05,
"loss": 4.4439,
"step": 1089000
},
{
"epoch": 1.0,
"learning_rate": 5.012639029322548e-05,
"loss": 4.4313,
"step": 1089500
},
{
"epoch": 1.0,
"learning_rate": 5.0103539294558986e-05,
"loss": 4.4349,
"step": 1090000
},
{
"epoch": 1.0,
"learning_rate": 5.0080642502308e-05,
"loss": 4.4445,
"step": 1090500
},
{
"epoch": 1.0,
"learning_rate": 5.0057745710057e-05,
"loss": 4.4503,
"step": 1091000
},
{
"epoch": 1.0,
"learning_rate": 5.003484891780601e-05,
"loss": 4.4407,
"step": 1091500
},
{
"epoch": 1.0,
"learning_rate": 5.001195212555502e-05,
"loss": 4.4341,
"step": 1092000
},
{
"epoch": 1.0,
"learning_rate": 4.998910112688853e-05,
"loss": 4.4384,
"step": 1092500
},
{
"epoch": 1.0,
"learning_rate": 4.996620433463754e-05,
"loss": 4.4309,
"step": 1093000
},
{
"epoch": 1.0,
"learning_rate": 4.994335333597104e-05,
"loss": 4.426,
"step": 1093500
},
{
"epoch": 1.0,
"learning_rate": 4.9920502337304555e-05,
"loss": 4.4174,
"step": 1094000
},
{
"epoch": 1.0,
"learning_rate": 4.989760554505356e-05,
"loss": 4.4254,
"step": 1094500
},
{
"epoch": 1.0,
"learning_rate": 4.9874708752802565e-05,
"loss": 4.4383,
"step": 1095000
},
{
"epoch": 1.0,
"learning_rate": 4.985181196055158e-05,
"loss": 4.4323,
"step": 1095500
},
{
"epoch": 1.0,
"learning_rate": 4.982891516830058e-05,
"loss": 4.4192,
"step": 1096000
},
{
"epoch": 1.0,
"learning_rate": 4.980601837604959e-05,
"loss": 4.4205,
"step": 1096500
},
{
"epoch": 1.0,
"learning_rate": 4.97831215837986e-05,
"loss": 4.4331,
"step": 1097000
},
{
"epoch": 1.01,
"learning_rate": 4.9760224791547604e-05,
"loss": 4.4381,
"step": 1097500
},
{
"epoch": 1.01,
"learning_rate": 4.973732799929661e-05,
"loss": 4.4344,
"step": 1098000
},
{
"epoch": 1.01,
"learning_rate": 4.971443120704562e-05,
"loss": 4.42,
"step": 1098500
},
{
"epoch": 1.01,
"learning_rate": 4.9691534414794626e-05,
"loss": 4.4287,
"step": 1099000
},
{
"epoch": 1.01,
"learning_rate": 4.966868341612814e-05,
"loss": 4.4309,
"step": 1099500
},
{
"epoch": 1.01,
"learning_rate": 4.9645832417461644e-05,
"loss": 4.4361,
"step": 1100000
},
{
"epoch": 1.01,
"learning_rate": 4.9622935625210656e-05,
"loss": 4.4172,
"step": 1100500
},
{
"epoch": 1.01,
"learning_rate": 4.960003883295966e-05,
"loss": 4.4123,
"step": 1101000
},
{
"epoch": 1.01,
"learning_rate": 4.9577142040708666e-05,
"loss": 4.4322,
"step": 1101500
},
{
"epoch": 1.01,
"learning_rate": 4.955424524845768e-05,
"loss": 4.4322,
"step": 1102000
},
{
"epoch": 1.01,
"learning_rate": 4.953134845620668e-05,
"loss": 4.4305,
"step": 1102500
},
{
"epoch": 1.01,
"learning_rate": 4.950845166395569e-05,
"loss": 4.4269,
"step": 1103000
},
{
"epoch": 1.01,
"learning_rate": 4.94855548717047e-05,
"loss": 4.4299,
"step": 1103500
},
{
"epoch": 1.01,
"learning_rate": 4.9462658079453705e-05,
"loss": 4.4283,
"step": 1104000
},
{
"epoch": 1.01,
"learning_rate": 4.943976128720271e-05,
"loss": 4.4226,
"step": 1104500
},
{
"epoch": 1.01,
"learning_rate": 4.941686449495172e-05,
"loss": 4.4302,
"step": 1105000
},
{
"epoch": 1.01,
"learning_rate": 4.939401349628523e-05,
"loss": 4.4315,
"step": 1105500
},
{
"epoch": 1.01,
"learning_rate": 4.937111670403423e-05,
"loss": 4.4141,
"step": 1106000
},
{
"epoch": 1.01,
"learning_rate": 4.934821991178324e-05,
"loss": 4.4294,
"step": 1106500
},
{
"epoch": 1.01,
"learning_rate": 4.932532311953225e-05,
"loss": 4.4393,
"step": 1107000
},
{
"epoch": 1.01,
"learning_rate": 4.9302426327281254e-05,
"loss": 4.4281,
"step": 1107500
},
{
"epoch": 1.01,
"learning_rate": 4.9279575328614767e-05,
"loss": 4.4285,
"step": 1108000
},
{
"epoch": 1.02,
"learning_rate": 4.925667853636377e-05,
"loss": 4.4436,
"step": 1108500
},
{
"epoch": 1.02,
"learning_rate": 4.923378174411278e-05,
"loss": 4.4207,
"step": 1109000
},
{
"epoch": 1.02,
"learning_rate": 4.921093074544629e-05,
"loss": 4.4205,
"step": 1109500
},
{
"epoch": 1.02,
"learning_rate": 4.9188033953195294e-05,
"loss": 4.421,
"step": 1110000
},
{
"epoch": 1.02,
"learning_rate": 4.91651371609443e-05,
"loss": 4.4407,
"step": 1110500
},
{
"epoch": 1.02,
"learning_rate": 4.914224036869331e-05,
"loss": 4.429,
"step": 1111000
},
{
"epoch": 1.02,
"learning_rate": 4.9119389370026817e-05,
"loss": 4.4245,
"step": 1111500
},
{
"epoch": 1.02,
"learning_rate": 4.909649257777583e-05,
"loss": 4.4209,
"step": 1112000
},
{
"epoch": 1.02,
"learning_rate": 4.9073595785524833e-05,
"loss": 4.4226,
"step": 1112500
},
{
"epoch": 1.02,
"learning_rate": 4.905069899327384e-05,
"loss": 4.4327,
"step": 1113000
},
{
"epoch": 1.02,
"learning_rate": 4.902780220102285e-05,
"loss": 4.4178,
"step": 1113500
},
{
"epoch": 1.02,
"learning_rate": 4.9004905408771855e-05,
"loss": 4.428,
"step": 1114000
},
{
"epoch": 1.02,
"learning_rate": 4.898200861652086e-05,
"loss": 4.4205,
"step": 1114500
},
{
"epoch": 1.02,
"learning_rate": 4.895911182426987e-05,
"loss": 4.4159,
"step": 1115000
},
{
"epoch": 1.02,
"learning_rate": 4.893621503201888e-05,
"loss": 4.4191,
"step": 1115500
},
{
"epoch": 1.02,
"learning_rate": 4.891331823976789e-05,
"loss": 4.4299,
"step": 1116000
},
{
"epoch": 1.02,
"learning_rate": 4.8890421447516894e-05,
"loss": 4.4295,
"step": 1116500
},
{
"epoch": 1.02,
"learning_rate": 4.88675246552659e-05,
"loss": 4.4262,
"step": 1117000
},
{
"epoch": 1.02,
"learning_rate": 4.884462786301491e-05,
"loss": 4.43,
"step": 1117500
},
{
"epoch": 1.02,
"learning_rate": 4.882182265793292e-05,
"loss": 4.4329,
"step": 1118000
},
{
"epoch": 1.02,
"learning_rate": 4.879892586568192e-05,
"loss": 4.4202,
"step": 1118500
},
{
"epoch": 1.02,
"learning_rate": 4.877607486701543e-05,
"loss": 4.4186,
"step": 1119000
},
{
"epoch": 1.03,
"learning_rate": 4.875317807476444e-05,
"loss": 4.4274,
"step": 1119500
},
{
"epoch": 1.03,
"learning_rate": 4.8730281282513445e-05,
"loss": 4.4316,
"step": 1120000
},
{
"epoch": 1.03,
"learning_rate": 4.8707384490262457e-05,
"loss": 4.4219,
"step": 1120500
},
{
"epoch": 1.03,
"learning_rate": 4.868448769801146e-05,
"loss": 4.416,
"step": 1121000
},
{
"epoch": 1.03,
"learning_rate": 4.866159090576047e-05,
"loss": 4.405,
"step": 1121500
},
{
"epoch": 1.03,
"learning_rate": 4.863869411350948e-05,
"loss": 4.4241,
"step": 1122000
},
{
"epoch": 1.03,
"learning_rate": 4.8615797321258484e-05,
"loss": 4.4155,
"step": 1122500
},
{
"epoch": 1.03,
"learning_rate": 4.859290052900749e-05,
"loss": 4.4291,
"step": 1123000
},
{
"epoch": 1.03,
"learning_rate": 4.85700953239255e-05,
"loss": 4.4275,
"step": 1123500
},
{
"epoch": 1.03,
"learning_rate": 4.8547198531674507e-05,
"loss": 4.4237,
"step": 1124000
},
{
"epoch": 1.03,
"learning_rate": 4.852430173942352e-05,
"loss": 4.4343,
"step": 1124500
},
{
"epoch": 1.03,
"learning_rate": 4.8501404947172523e-05,
"loss": 4.4315,
"step": 1125000
},
{
"epoch": 1.03,
"learning_rate": 4.847850815492153e-05,
"loss": 4.4408,
"step": 1125500
},
{
"epoch": 1.03,
"learning_rate": 4.845561136267054e-05,
"loss": 4.4295,
"step": 1126000
},
{
"epoch": 1.03,
"learning_rate": 4.8432714570419545e-05,
"loss": 4.4234,
"step": 1126500
},
{
"epoch": 1.03,
"learning_rate": 4.840981777816855e-05,
"loss": 4.4427,
"step": 1127000
},
{
"epoch": 1.03,
"learning_rate": 4.838692098591756e-05,
"loss": 4.422,
"step": 1127500
},
{
"epoch": 1.03,
"learning_rate": 4.836402419366657e-05,
"loss": 4.4238,
"step": 1128000
},
{
"epoch": 1.03,
"learning_rate": 4.834112740141558e-05,
"loss": 4.416,
"step": 1128500
},
{
"epoch": 1.03,
"learning_rate": 4.8318230609164584e-05,
"loss": 4.4296,
"step": 1129000
},
{
"epoch": 1.03,
"learning_rate": 4.829537961049809e-05,
"loss": 4.4239,
"step": 1129500
},
{
"epoch": 1.03,
"learning_rate": 4.8272482818247095e-05,
"loss": 4.4246,
"step": 1130000
},
{
"epoch": 1.04,
"learning_rate": 4.82495860259961e-05,
"loss": 4.4209,
"step": 1130500
},
{
"epoch": 1.04,
"learning_rate": 4.822668923374511e-05,
"loss": 4.4298,
"step": 1131000
},
{
"epoch": 1.04,
"learning_rate": 4.820379244149412e-05,
"loss": 4.4386,
"step": 1131500
},
{
"epoch": 1.04,
"learning_rate": 4.818089564924312e-05,
"loss": 4.4331,
"step": 1132000
},
{
"epoch": 1.04,
"learning_rate": 4.8157998856992134e-05,
"loss": 4.4278,
"step": 1132500
},
{
"epoch": 1.04,
"learning_rate": 4.813510206474114e-05,
"loss": 4.413,
"step": 1133000
},
{
"epoch": 1.04,
"learning_rate": 4.8112205272490144e-05,
"loss": 4.4205,
"step": 1133500
},
{
"epoch": 1.04,
"learning_rate": 4.8089354273823656e-05,
"loss": 4.417,
"step": 1134000
},
{
"epoch": 1.04,
"learning_rate": 4.806645748157266e-05,
"loss": 4.4304,
"step": 1134500
},
{
"epoch": 1.04,
"learning_rate": 4.804356068932167e-05,
"loss": 4.4277,
"step": 1135000
},
{
"epoch": 1.04,
"learning_rate": 4.802066389707068e-05,
"loss": 4.4096,
"step": 1135500
},
{
"epoch": 1.04,
"learning_rate": 4.799785869198869e-05,
"loss": 4.42,
"step": 1136000
},
{
"epoch": 1.04,
"learning_rate": 4.7974961899737696e-05,
"loss": 4.4103,
"step": 1136500
},
{
"epoch": 1.04,
"learning_rate": 4.795206510748671e-05,
"loss": 4.4283,
"step": 1137000
},
{
"epoch": 1.04,
"learning_rate": 4.792916831523571e-05,
"loss": 4.4172,
"step": 1137500
},
{
"epoch": 1.04,
"learning_rate": 4.790627152298472e-05,
"loss": 4.4106,
"step": 1138000
},
{
"epoch": 1.04,
"learning_rate": 4.788337473073373e-05,
"loss": 4.4371,
"step": 1138500
},
{
"epoch": 1.04,
"learning_rate": 4.7860477938482735e-05,
"loss": 4.4183,
"step": 1139000
},
{
"epoch": 1.04,
"learning_rate": 4.783758114623174e-05,
"loss": 4.4177,
"step": 1139500
},
{
"epoch": 1.04,
"learning_rate": 4.781468435398075e-05,
"loss": 4.425,
"step": 1140000
},
{
"epoch": 1.04,
"learning_rate": 4.779187914889876e-05,
"loss": 4.4133,
"step": 1140500
},
{
"epoch": 1.05,
"learning_rate": 4.776898235664777e-05,
"loss": 4.4149,
"step": 1141000
},
{
"epoch": 1.05,
"learning_rate": 4.7746085564396775e-05,
"loss": 4.426,
"step": 1141500
},
{
"epoch": 1.05,
"learning_rate": 4.772318877214578e-05,
"loss": 4.4455,
"step": 1142000
},
{
"epoch": 1.05,
"learning_rate": 4.7700291979894785e-05,
"loss": 4.4368,
"step": 1142500
},
{
"epoch": 1.05,
"learning_rate": 4.767739518764379e-05,
"loss": 4.4111,
"step": 1143000
},
{
"epoch": 1.05,
"learning_rate": 4.76544983953928e-05,
"loss": 4.4284,
"step": 1143500
},
{
"epoch": 1.05,
"learning_rate": 4.763160160314181e-05,
"loss": 4.4216,
"step": 1144000
},
{
"epoch": 1.05,
"learning_rate": 4.760870481089081e-05,
"loss": 4.4241,
"step": 1144500
},
{
"epoch": 1.05,
"learning_rate": 4.7585853812224324e-05,
"loss": 4.4331,
"step": 1145000
},
{
"epoch": 1.05,
"learning_rate": 4.756295701997333e-05,
"loss": 4.4347,
"step": 1145500
},
{
"epoch": 1.05,
"learning_rate": 4.754006022772234e-05,
"loss": 4.4134,
"step": 1146000
},
{
"epoch": 1.05,
"learning_rate": 4.7517163435471346e-05,
"loss": 4.4208,
"step": 1146500
},
{
"epoch": 1.05,
"learning_rate": 4.749426664322035e-05,
"loss": 4.4185,
"step": 1147000
},
{
"epoch": 1.05,
"learning_rate": 4.7471461438138364e-05,
"loss": 4.4217,
"step": 1147500
},
{
"epoch": 1.05,
"learning_rate": 4.744856464588737e-05,
"loss": 4.4251,
"step": 1148000
},
{
"epoch": 1.05,
"learning_rate": 4.742566785363638e-05,
"loss": 4.4322,
"step": 1148500
},
{
"epoch": 1.05,
"learning_rate": 4.7402771061385386e-05,
"loss": 4.4324,
"step": 1149000
},
{
"epoch": 1.05,
"learning_rate": 4.73798742691344e-05,
"loss": 4.4213,
"step": 1149500
},
{
"epoch": 1.05,
"learning_rate": 4.73570232704679e-05,
"loss": 4.4069,
"step": 1150000
},
{
"epoch": 1.05,
"learning_rate": 4.733412647821691e-05,
"loss": 4.4303,
"step": 1150500
},
{
"epoch": 1.05,
"learning_rate": 4.731122968596592e-05,
"loss": 4.4337,
"step": 1151000
},
{
"epoch": 1.05,
"learning_rate": 4.7288332893714925e-05,
"loss": 4.4252,
"step": 1151500
},
{
"epoch": 1.06,
"learning_rate": 4.726543610146393e-05,
"loss": 4.4379,
"step": 1152000
},
{
"epoch": 1.06,
"learning_rate": 4.724253930921294e-05,
"loss": 4.4189,
"step": 1152500
},
{
"epoch": 1.06,
"learning_rate": 4.721964251696195e-05,
"loss": 4.4295,
"step": 1153000
},
{
"epoch": 1.06,
"learning_rate": 4.719679151829546e-05,
"loss": 4.4232,
"step": 1153500
},
{
"epoch": 1.06,
"learning_rate": 4.717389472604446e-05,
"loss": 4.4268,
"step": 1154000
},
{
"epoch": 1.06,
"learning_rate": 4.715099793379347e-05,
"loss": 4.4354,
"step": 1154500
},
{
"epoch": 1.06,
"learning_rate": 4.7128101141542475e-05,
"loss": 4.4245,
"step": 1155000
},
{
"epoch": 1.06,
"learning_rate": 4.710520434929148e-05,
"loss": 4.416,
"step": 1155500
},
{
"epoch": 1.06,
"learning_rate": 4.708230755704049e-05,
"loss": 4.4372,
"step": 1156000
},
{
"epoch": 1.06,
"learning_rate": 4.70594107647895e-05,
"loss": 4.4236,
"step": 1156500
},
{
"epoch": 1.06,
"learning_rate": 4.70365139725385e-05,
"loss": 4.4334,
"step": 1157000
},
{
"epoch": 1.06,
"learning_rate": 4.7013617180287513e-05,
"loss": 4.4148,
"step": 1157500
},
{
"epoch": 1.06,
"learning_rate": 4.699076618162102e-05,
"loss": 4.4291,
"step": 1158000
},
{
"epoch": 1.06,
"learning_rate": 4.696786938937003e-05,
"loss": 4.4202,
"step": 1158500
},
{
"epoch": 1.06,
"learning_rate": 4.6944972597119036e-05,
"loss": 4.4285,
"step": 1159000
},
{
"epoch": 1.06,
"learning_rate": 4.692207580486804e-05,
"loss": 4.434,
"step": 1159500
},
{
"epoch": 1.06,
"learning_rate": 4.6899224806201553e-05,
"loss": 4.4177,
"step": 1160000
},
{
"epoch": 1.06,
"learning_rate": 4.687632801395056e-05,
"loss": 4.4184,
"step": 1160500
},
{
"epoch": 1.06,
"learning_rate": 4.685343122169957e-05,
"loss": 4.428,
"step": 1161000
},
{
"epoch": 1.06,
"learning_rate": 4.6830534429448575e-05,
"loss": 4.4265,
"step": 1161500
},
{
"epoch": 1.06,
"learning_rate": 4.680763763719758e-05,
"loss": 4.4404,
"step": 1162000
},
{
"epoch": 1.06,
"learning_rate": 4.678478663853109e-05,
"loss": 4.4393,
"step": 1162500
},
{
"epoch": 1.07,
"learning_rate": 4.67618898462801e-05,
"loss": 4.4265,
"step": 1163000
},
{
"epoch": 1.07,
"learning_rate": 4.673899305402911e-05,
"loss": 4.4366,
"step": 1163500
},
{
"epoch": 1.07,
"learning_rate": 4.6716096261778115e-05,
"loss": 4.4259,
"step": 1164000
},
{
"epoch": 1.07,
"learning_rate": 4.669324526311162e-05,
"loss": 4.4267,
"step": 1164500
},
{
"epoch": 1.07,
"learning_rate": 4.667034847086063e-05,
"loss": 4.4295,
"step": 1165000
},
{
"epoch": 1.07,
"learning_rate": 4.664745167860964e-05,
"loss": 4.4271,
"step": 1165500
},
{
"epoch": 1.07,
"learning_rate": 4.662455488635865e-05,
"loss": 4.4142,
"step": 1166000
},
{
"epoch": 1.07,
"learning_rate": 4.660165809410765e-05,
"loss": 4.4077,
"step": 1166500
},
{
"epoch": 1.07,
"learning_rate": 4.657876130185665e-05,
"loss": 4.4272,
"step": 1167000
},
{
"epoch": 1.07,
"learning_rate": 4.6555864509605664e-05,
"loss": 4.4279,
"step": 1167500
},
{
"epoch": 1.07,
"learning_rate": 4.653301351093917e-05,
"loss": 4.4359,
"step": 1168000
},
{
"epoch": 1.07,
"learning_rate": 4.651016251227268e-05,
"loss": 4.4342,
"step": 1168500
},
{
"epoch": 1.07,
"learning_rate": 4.648726572002169e-05,
"loss": 4.4272,
"step": 1169000
},
{
"epoch": 1.07,
"learning_rate": 4.64643689277707e-05,
"loss": 4.4162,
"step": 1169500
},
{
"epoch": 1.07,
"learning_rate": 4.6441472135519704e-05,
"loss": 4.4308,
"step": 1170000
},
{
"epoch": 1.07,
"learning_rate": 4.6418621136853216e-05,
"loss": 4.4263,
"step": 1170500
},
{
"epoch": 1.07,
"learning_rate": 4.639572434460222e-05,
"loss": 4.4255,
"step": 1171000
},
{
"epoch": 1.07,
"learning_rate": 4.6372827552351227e-05,
"loss": 4.415,
"step": 1171500
},
{
"epoch": 1.07,
"learning_rate": 4.634993076010024e-05,
"loss": 4.4248,
"step": 1172000
},
{
"epoch": 1.07,
"learning_rate": 4.6327033967849243e-05,
"loss": 4.4267,
"step": 1172500
},
{
"epoch": 1.07,
"learning_rate": 4.630413717559825e-05,
"loss": 4.4303,
"step": 1173000
},
{
"epoch": 1.07,
"learning_rate": 4.628124038334726e-05,
"loss": 4.4196,
"step": 1173500
},
{
"epoch": 1.08,
"learning_rate": 4.6258343591096265e-05,
"loss": 4.4202,
"step": 1174000
},
{
"epoch": 1.08,
"learning_rate": 4.623544679884527e-05,
"loss": 4.4065,
"step": 1174500
},
{
"epoch": 1.08,
"learning_rate": 4.621259580017878e-05,
"loss": 4.4157,
"step": 1175000
},
{
"epoch": 1.08,
"learning_rate": 4.618969900792779e-05,
"loss": 4.4063,
"step": 1175500
},
{
"epoch": 1.08,
"learning_rate": 4.61668022156768e-05,
"loss": 4.4162,
"step": 1176000
},
{
"epoch": 1.08,
"learning_rate": 4.6143905423425805e-05,
"loss": 4.4216,
"step": 1176500
},
{
"epoch": 1.08,
"learning_rate": 4.612100863117481e-05,
"loss": 4.4339,
"step": 1177000
},
{
"epoch": 1.08,
"learning_rate": 4.609811183892382e-05,
"loss": 4.4183,
"step": 1177500
},
{
"epoch": 1.08,
"learning_rate": 4.6075215046672827e-05,
"loss": 4.4241,
"step": 1178000
},
{
"epoch": 1.08,
"learning_rate": 4.605236404800633e-05,
"loss": 4.419,
"step": 1178500
},
{
"epoch": 1.08,
"learning_rate": 4.602946725575534e-05,
"loss": 4.4324,
"step": 1179000
},
{
"epoch": 1.08,
"learning_rate": 4.600657046350434e-05,
"loss": 4.4204,
"step": 1179500
},
{
"epoch": 1.08,
"learning_rate": 4.5983673671253354e-05,
"loss": 4.408,
"step": 1180000
},
{
"epoch": 1.08,
"learning_rate": 4.596077687900236e-05,
"loss": 4.4356,
"step": 1180500
},
{
"epoch": 1.08,
"learning_rate": 4.5937880086751364e-05,
"loss": 4.4245,
"step": 1181000
},
{
"epoch": 1.08,
"learning_rate": 4.5914983294500376e-05,
"loss": 4.4294,
"step": 1181500
},
{
"epoch": 1.08,
"learning_rate": 4.589213229583388e-05,
"loss": 4.419,
"step": 1182000
},
{
"epoch": 1.08,
"learning_rate": 4.5869235503582893e-05,
"loss": 4.4198,
"step": 1182500
},
{
"epoch": 1.08,
"learning_rate": 4.58463387113319e-05,
"loss": 4.4133,
"step": 1183000
},
{
"epoch": 1.08,
"learning_rate": 4.5823441919080904e-05,
"loss": 4.4243,
"step": 1183500
},
{
"epoch": 1.08,
"learning_rate": 4.5800590920414416e-05,
"loss": 4.4237,
"step": 1184000
},
{
"epoch": 1.08,
"learning_rate": 4.577769412816342e-05,
"loss": 4.4182,
"step": 1184500
},
{
"epoch": 1.09,
"learning_rate": 4.575479733591243e-05,
"loss": 4.4384,
"step": 1185000
},
{
"epoch": 1.09,
"learning_rate": 4.573190054366144e-05,
"loss": 4.4233,
"step": 1185500
},
{
"epoch": 1.09,
"learning_rate": 4.570900375141044e-05,
"loss": 4.4167,
"step": 1186000
},
{
"epoch": 1.09,
"learning_rate": 4.5686106959159455e-05,
"loss": 4.4184,
"step": 1186500
},
{
"epoch": 1.09,
"learning_rate": 4.566321016690846e-05,
"loss": 4.4281,
"step": 1187000
},
{
"epoch": 1.09,
"learning_rate": 4.5640313374657465e-05,
"loss": 4.4333,
"step": 1187500
},
{
"epoch": 1.09,
"learning_rate": 4.5617416582406477e-05,
"loss": 4.4293,
"step": 1188000
},
{
"epoch": 1.09,
"learning_rate": 4.559456558373998e-05,
"loss": 4.4253,
"step": 1188500
},
{
"epoch": 1.09,
"learning_rate": 4.5571668791488994e-05,
"loss": 4.4119,
"step": 1189000
},
{
"epoch": 1.09,
"learning_rate": 4.5548771999238e-05,
"loss": 4.427,
"step": 1189500
},
{
"epoch": 1.09,
"learning_rate": 4.5525875206987004e-05,
"loss": 4.4202,
"step": 1190000
},
{
"epoch": 1.09,
"learning_rate": 4.5502978414736016e-05,
"loss": 4.4252,
"step": 1190500
},
{
"epoch": 1.09,
"learning_rate": 4.5480127416069515e-05,
"loss": 4.4156,
"step": 1191000
},
{
"epoch": 1.09,
"learning_rate": 4.545723062381853e-05,
"loss": 4.4146,
"step": 1191500
},
{
"epoch": 1.09,
"learning_rate": 4.543437962515203e-05,
"loss": 4.4198,
"step": 1192000
},
{
"epoch": 1.09,
"learning_rate": 4.5411482832901044e-05,
"loss": 4.4293,
"step": 1192500
},
{
"epoch": 1.09,
"learning_rate": 4.538858604065005e-05,
"loss": 4.4095,
"step": 1193000
},
{
"epoch": 1.09,
"learning_rate": 4.5365689248399054e-05,
"loss": 4.4179,
"step": 1193500
},
{
"epoch": 1.09,
"learning_rate": 4.5342792456148066e-05,
"loss": 4.4147,
"step": 1194000
},
{
"epoch": 1.09,
"learning_rate": 4.531989566389707e-05,
"loss": 4.4145,
"step": 1194500
},
{
"epoch": 1.09,
"learning_rate": 4.5296998871646076e-05,
"loss": 4.4324,
"step": 1195000
},
{
"epoch": 1.09,
"learning_rate": 4.527410207939509e-05,
"loss": 4.4262,
"step": 1195500
},
{
"epoch": 1.1,
"learning_rate": 4.5251251080728594e-05,
"loss": 4.4298,
"step": 1196000
},
{
"epoch": 1.1,
"learning_rate": 4.5228354288477605e-05,
"loss": 4.414,
"step": 1196500
},
{
"epoch": 1.1,
"learning_rate": 4.520545749622661e-05,
"loss": 4.4033,
"step": 1197000
},
{
"epoch": 1.1,
"learning_rate": 4.5182560703975615e-05,
"loss": 4.4217,
"step": 1197500
},
{
"epoch": 1.1,
"learning_rate": 4.515966391172463e-05,
"loss": 4.431,
"step": 1198000
},
{
"epoch": 1.1,
"learning_rate": 4.513676711947363e-05,
"loss": 4.4146,
"step": 1198500
},
{
"epoch": 1.1,
"learning_rate": 4.511387032722264e-05,
"loss": 4.4151,
"step": 1199000
},
{
"epoch": 1.1,
"learning_rate": 4.509097353497165e-05,
"loss": 4.4123,
"step": 1199500
},
{
"epoch": 1.1,
"learning_rate": 4.5068122536305155e-05,
"loss": 4.4217,
"step": 1200000
},
{
"epoch": 1.1,
"learning_rate": 4.5045225744054167e-05,
"loss": 4.416,
"step": 1200500
},
{
"epoch": 1.1,
"learning_rate": 4.502232895180317e-05,
"loss": 4.4282,
"step": 1201000
},
{
"epoch": 1.1,
"learning_rate": 4.4999477953136684e-05,
"loss": 4.4162,
"step": 1201500
},
{
"epoch": 1.1,
"learning_rate": 4.497658116088569e-05,
"loss": 4.4245,
"step": 1202000
},
{
"epoch": 1.1,
"learning_rate": 4.4953684368634694e-05,
"loss": 4.4191,
"step": 1202500
},
{
"epoch": 1.1,
"learning_rate": 4.4930787576383706e-05,
"loss": 4.4111,
"step": 1203000
},
{
"epoch": 1.1,
"learning_rate": 4.4907890784132704e-05,
"loss": 4.4138,
"step": 1203500
},
{
"epoch": 1.1,
"learning_rate": 4.4884993991881716e-05,
"loss": 4.4241,
"step": 1204000
},
{
"epoch": 1.1,
"learning_rate": 4.486209719963072e-05,
"loss": 4.4195,
"step": 1204500
},
{
"epoch": 1.1,
"learning_rate": 4.4839200407379726e-05,
"loss": 4.4175,
"step": 1205000
},
{
"epoch": 1.1,
"learning_rate": 4.481634940871324e-05,
"loss": 4.428,
"step": 1205500
},
{
"epoch": 1.1,
"learning_rate": 4.4793452616462244e-05,
"loss": 4.4139,
"step": 1206000
},
{
"epoch": 1.1,
"learning_rate": 4.4770555824211255e-05,
"loss": 4.4214,
"step": 1206500
},
{
"epoch": 1.11,
"learning_rate": 4.474765903196026e-05,
"loss": 4.4192,
"step": 1207000
},
{
"epoch": 1.11,
"learning_rate": 4.4724762239709265e-05,
"loss": 4.4175,
"step": 1207500
},
{
"epoch": 1.11,
"learning_rate": 4.470186544745828e-05,
"loss": 4.4212,
"step": 1208000
},
{
"epoch": 1.11,
"learning_rate": 4.467896865520728e-05,
"loss": 4.4198,
"step": 1208500
},
{
"epoch": 1.11,
"learning_rate": 4.465607186295629e-05,
"loss": 4.4215,
"step": 1209000
},
{
"epoch": 1.11,
"learning_rate": 4.46332208642898e-05,
"loss": 4.426,
"step": 1209500
},
{
"epoch": 1.11,
"learning_rate": 4.4610324072038805e-05,
"loss": 4.4308,
"step": 1210000
},
{
"epoch": 1.11,
"learning_rate": 4.458742727978782e-05,
"loss": 4.4127,
"step": 1210500
},
{
"epoch": 1.11,
"learning_rate": 4.456457628112132e-05,
"loss": 4.42,
"step": 1211000
},
{
"epoch": 1.11,
"learning_rate": 4.454167948887033e-05,
"loss": 4.4039,
"step": 1211500
},
{
"epoch": 1.11,
"learning_rate": 4.451878269661934e-05,
"loss": 4.4301,
"step": 1212000
},
{
"epoch": 1.11,
"learning_rate": 4.4495885904368344e-05,
"loss": 4.4123,
"step": 1212500
},
{
"epoch": 1.11,
"learning_rate": 4.447298911211735e-05,
"loss": 4.4219,
"step": 1213000
},
{
"epoch": 1.11,
"learning_rate": 4.445013811345086e-05,
"loss": 4.4147,
"step": 1213500
},
{
"epoch": 1.11,
"learning_rate": 4.442724132119987e-05,
"loss": 4.4074,
"step": 1214000
},
{
"epoch": 1.11,
"learning_rate": 4.440434452894888e-05,
"loss": 4.4272,
"step": 1214500
},
{
"epoch": 1.11,
"learning_rate": 4.4381447736697883e-05,
"loss": 4.4219,
"step": 1215000
},
{
"epoch": 1.11,
"learning_rate": 4.435859673803139e-05,
"loss": 4.4198,
"step": 1215500
},
{
"epoch": 1.11,
"learning_rate": 4.4335699945780394e-05,
"loss": 4.4101,
"step": 1216000
},
{
"epoch": 1.11,
"learning_rate": 4.4312803153529406e-05,
"loss": 4.4224,
"step": 1216500
},
{
"epoch": 1.11,
"learning_rate": 4.428990636127841e-05,
"loss": 4.4223,
"step": 1217000
},
{
"epoch": 1.12,
"learning_rate": 4.4267009569027416e-05,
"loss": 4.4283,
"step": 1217500
},
{
"epoch": 1.12,
"learning_rate": 4.424411277677643e-05,
"loss": 4.4193,
"step": 1218000
},
{
"epoch": 1.12,
"learning_rate": 4.4221261778109934e-05,
"loss": 4.4093,
"step": 1218500
},
{
"epoch": 1.12,
"learning_rate": 4.4198364985858945e-05,
"loss": 4.4228,
"step": 1219000
},
{
"epoch": 1.12,
"learning_rate": 4.417546819360795e-05,
"loss": 4.4155,
"step": 1219500
},
{
"epoch": 1.12,
"learning_rate": 4.4152571401356955e-05,
"loss": 4.4238,
"step": 1220000
},
{
"epoch": 1.12,
"learning_rate": 4.412967460910597e-05,
"loss": 4.4198,
"step": 1220500
},
{
"epoch": 1.12,
"learning_rate": 4.410682361043947e-05,
"loss": 4.4244,
"step": 1221000
},
{
"epoch": 1.12,
"learning_rate": 4.408392681818848e-05,
"loss": 4.4016,
"step": 1221500
},
{
"epoch": 1.12,
"learning_rate": 4.406103002593749e-05,
"loss": 4.4192,
"step": 1222000
},
{
"epoch": 1.12,
"learning_rate": 4.4038133233686495e-05,
"loss": 4.4114,
"step": 1222500
},
{
"epoch": 1.12,
"learning_rate": 4.4015236441435507e-05,
"loss": 4.4138,
"step": 1223000
},
{
"epoch": 1.12,
"learning_rate": 4.399238544276901e-05,
"loss": 4.4168,
"step": 1223500
},
{
"epoch": 1.12,
"learning_rate": 4.396948865051802e-05,
"loss": 4.4065,
"step": 1224000
},
{
"epoch": 1.12,
"learning_rate": 4.394659185826703e-05,
"loss": 4.4125,
"step": 1224500
},
{
"epoch": 1.12,
"learning_rate": 4.3923695066016034e-05,
"loss": 4.4133,
"step": 1225000
},
{
"epoch": 1.12,
"learning_rate": 4.390079827376504e-05,
"loss": 4.4053,
"step": 1225500
},
{
"epoch": 1.12,
"learning_rate": 4.387790148151405e-05,
"loss": 4.4275,
"step": 1226000
},
{
"epoch": 1.12,
"learning_rate": 4.3855004689263056e-05,
"loss": 4.4123,
"step": 1226500
},
{
"epoch": 1.12,
"learning_rate": 4.383215369059657e-05,
"loss": 4.4192,
"step": 1227000
},
{
"epoch": 1.12,
"learning_rate": 4.3809256898345573e-05,
"loss": 4.4068,
"step": 1227500
},
{
"epoch": 1.12,
"learning_rate": 4.378636010609458e-05,
"loss": 4.4228,
"step": 1228000
},
{
"epoch": 1.13,
"learning_rate": 4.3763463313843584e-05,
"loss": 4.4234,
"step": 1228500
},
{
"epoch": 1.13,
"learning_rate": 4.374056652159259e-05,
"loss": 4.4086,
"step": 1229000
},
{
"epoch": 1.13,
"learning_rate": 4.37176697293416e-05,
"loss": 4.4194,
"step": 1229500
},
{
"epoch": 1.13,
"learning_rate": 4.3694772937090605e-05,
"loss": 4.4046,
"step": 1230000
},
{
"epoch": 1.13,
"learning_rate": 4.367187614483961e-05,
"loss": 4.41,
"step": 1230500
},
{
"epoch": 1.13,
"learning_rate": 4.364902514617312e-05,
"loss": 4.4214,
"step": 1231000
},
{
"epoch": 1.13,
"learning_rate": 4.3626174147506635e-05,
"loss": 4.423,
"step": 1231500
},
{
"epoch": 1.13,
"learning_rate": 4.360327735525564e-05,
"loss": 4.4175,
"step": 1232000
},
{
"epoch": 1.13,
"learning_rate": 4.3580380563004645e-05,
"loss": 4.4147,
"step": 1232500
},
{
"epoch": 1.13,
"learning_rate": 4.355748377075366e-05,
"loss": 4.4287,
"step": 1233000
},
{
"epoch": 1.13,
"learning_rate": 4.353458697850266e-05,
"loss": 4.4051,
"step": 1233500
},
{
"epoch": 1.13,
"learning_rate": 4.351169018625167e-05,
"loss": 4.412,
"step": 1234000
},
{
"epoch": 1.13,
"learning_rate": 4.348879339400068e-05,
"loss": 4.4247,
"step": 1234500
},
{
"epoch": 1.13,
"learning_rate": 4.3465896601749684e-05,
"loss": 4.4078,
"step": 1235000
},
{
"epoch": 1.13,
"learning_rate": 4.3443045603083197e-05,
"loss": 4.417,
"step": 1235500
},
{
"epoch": 1.13,
"learning_rate": 4.34201488108322e-05,
"loss": 4.4091,
"step": 1236000
},
{
"epoch": 1.13,
"learning_rate": 4.339725201858121e-05,
"loss": 4.4144,
"step": 1236500
},
{
"epoch": 1.13,
"learning_rate": 4.337435522633022e-05,
"loss": 4.4227,
"step": 1237000
},
{
"epoch": 1.13,
"learning_rate": 4.3351504227663724e-05,
"loss": 4.4054,
"step": 1237500
},
{
"epoch": 1.13,
"learning_rate": 4.332860743541273e-05,
"loss": 4.4112,
"step": 1238000
},
{
"epoch": 1.13,
"learning_rate": 4.330571064316174e-05,
"loss": 4.4232,
"step": 1238500
},
{
"epoch": 1.13,
"learning_rate": 4.3282813850910746e-05,
"loss": 4.4275,
"step": 1239000
},
{
"epoch": 1.14,
"learning_rate": 4.325996285224426e-05,
"loss": 4.4174,
"step": 1239500
},
{
"epoch": 1.14,
"learning_rate": 4.323706605999326e-05,
"loss": 4.4184,
"step": 1240000
},
{
"epoch": 1.14,
"learning_rate": 4.321416926774227e-05,
"loss": 4.4228,
"step": 1240500
},
{
"epoch": 1.14,
"learning_rate": 4.3191272475491274e-05,
"loss": 4.411,
"step": 1241000
},
{
"epoch": 1.14,
"learning_rate": 4.3168421476824786e-05,
"loss": 4.4123,
"step": 1241500
},
{
"epoch": 1.14,
"learning_rate": 4.314552468457379e-05,
"loss": 4.4046,
"step": 1242000
},
{
"epoch": 1.14,
"learning_rate": 4.3122627892322796e-05,
"loss": 4.4148,
"step": 1242500
},
{
"epoch": 1.14,
"learning_rate": 4.309973110007181e-05,
"loss": 4.3986,
"step": 1243000
},
{
"epoch": 1.14,
"learning_rate": 4.307683430782081e-05,
"loss": 4.3993,
"step": 1243500
},
{
"epoch": 1.14,
"learning_rate": 4.3053983309154325e-05,
"loss": 4.4199,
"step": 1244000
},
{
"epoch": 1.14,
"learning_rate": 4.303108651690333e-05,
"loss": 4.4159,
"step": 1244500
},
{
"epoch": 1.14,
"learning_rate": 4.3008189724652335e-05,
"loss": 4.4054,
"step": 1245000
},
{
"epoch": 1.14,
"learning_rate": 4.298529293240135e-05,
"loss": 4.4143,
"step": 1245500
},
{
"epoch": 1.14,
"learning_rate": 4.296244193373485e-05,
"loss": 4.3965,
"step": 1246000
},
{
"epoch": 1.14,
"learning_rate": 4.293954514148386e-05,
"loss": 4.4208,
"step": 1246500
},
{
"epoch": 1.14,
"learning_rate": 4.291664834923287e-05,
"loss": 4.4093,
"step": 1247000
},
{
"epoch": 1.14,
"learning_rate": 4.2893751556981875e-05,
"loss": 4.419,
"step": 1247500
},
{
"epoch": 1.14,
"learning_rate": 4.287090055831539e-05,
"loss": 4.4177,
"step": 1248000
},
{
"epoch": 1.14,
"learning_rate": 4.284804955964889e-05,
"loss": 4.407,
"step": 1248500
},
{
"epoch": 1.14,
"learning_rate": 4.2825152767397905e-05,
"loss": 4.4258,
"step": 1249000
},
{
"epoch": 1.14,
"learning_rate": 4.280225597514691e-05,
"loss": 4.4172,
"step": 1249500
},
{
"epoch": 1.14,
"learning_rate": 4.2779359182895915e-05,
"loss": 4.4124,
"step": 1250000
},
{
"epoch": 1.15,
"learning_rate": 4.2756462390644926e-05,
"loss": 4.4203,
"step": 1250500
},
{
"epoch": 1.15,
"learning_rate": 4.273356559839393e-05,
"loss": 4.4225,
"step": 1251000
},
{
"epoch": 1.15,
"learning_rate": 4.271071459972744e-05,
"loss": 4.4185,
"step": 1251500
},
{
"epoch": 1.15,
"learning_rate": 4.268781780747644e-05,
"loss": 4.4053,
"step": 1252000
},
{
"epoch": 1.15,
"learning_rate": 4.2664921015225454e-05,
"loss": 4.3984,
"step": 1252500
},
{
"epoch": 1.15,
"learning_rate": 4.264202422297446e-05,
"loss": 4.4282,
"step": 1253000
},
{
"epoch": 1.15,
"learning_rate": 4.2619173224307965e-05,
"loss": 4.4156,
"step": 1253500
},
{
"epoch": 1.15,
"learning_rate": 4.2596276432056976e-05,
"loss": 4.4123,
"step": 1254000
},
{
"epoch": 1.15,
"learning_rate": 4.257342543339048e-05,
"loss": 4.423,
"step": 1254500
},
{
"epoch": 1.15,
"learning_rate": 4.2550528641139494e-05,
"loss": 4.4145,
"step": 1255000
},
{
"epoch": 1.15,
"learning_rate": 4.25276318488885e-05,
"loss": 4.4287,
"step": 1255500
},
{
"epoch": 1.15,
"learning_rate": 4.2504735056637504e-05,
"loss": 4.4119,
"step": 1256000
},
{
"epoch": 1.15,
"learning_rate": 4.2481838264386516e-05,
"loss": 4.4238,
"step": 1256500
},
{
"epoch": 1.15,
"learning_rate": 4.245894147213552e-05,
"loss": 4.4202,
"step": 1257000
},
{
"epoch": 1.15,
"learning_rate": 4.2436044679884526e-05,
"loss": 4.4131,
"step": 1257500
},
{
"epoch": 1.15,
"learning_rate": 4.241314788763354e-05,
"loss": 4.4012,
"step": 1258000
},
{
"epoch": 1.15,
"learning_rate": 4.239025109538254e-05,
"loss": 4.4177,
"step": 1258500
},
{
"epoch": 1.15,
"learning_rate": 4.236735430313155e-05,
"loss": 4.4116,
"step": 1259000
},
{
"epoch": 1.15,
"learning_rate": 4.234445751088056e-05,
"loss": 4.4183,
"step": 1259500
},
{
"epoch": 1.15,
"learning_rate": 4.2321606512214065e-05,
"loss": 4.4069,
"step": 1260000
},
{
"epoch": 1.15,
"learning_rate": 4.229870971996308e-05,
"loss": 4.4023,
"step": 1260500
},
{
"epoch": 1.15,
"learning_rate": 4.227581292771208e-05,
"loss": 4.4078,
"step": 1261000
},
{
"epoch": 1.16,
"learning_rate": 4.225291613546109e-05,
"loss": 4.4133,
"step": 1261500
},
{
"epoch": 1.16,
"learning_rate": 4.22300193432101e-05,
"loss": 4.413,
"step": 1262000
},
{
"epoch": 1.16,
"learning_rate": 4.2207122550959104e-05,
"loss": 4.4157,
"step": 1262500
},
{
"epoch": 1.16,
"learning_rate": 4.2184271552292616e-05,
"loss": 4.4108,
"step": 1263000
},
{
"epoch": 1.16,
"learning_rate": 4.2161374760041615e-05,
"loss": 4.414,
"step": 1263500
},
{
"epoch": 1.16,
"learning_rate": 4.2138477967790627e-05,
"loss": 4.4172,
"step": 1264000
},
{
"epoch": 1.16,
"learning_rate": 4.211558117553963e-05,
"loss": 4.4207,
"step": 1264500
},
{
"epoch": 1.16,
"learning_rate": 4.209268438328864e-05,
"loss": 4.4053,
"step": 1265000
},
{
"epoch": 1.16,
"learning_rate": 4.206978759103765e-05,
"loss": 4.39,
"step": 1265500
},
{
"epoch": 1.16,
"learning_rate": 4.2046936592371154e-05,
"loss": 4.4118,
"step": 1266000
},
{
"epoch": 1.16,
"learning_rate": 4.2024039800120166e-05,
"loss": 4.4236,
"step": 1266500
},
{
"epoch": 1.16,
"learning_rate": 4.200114300786917e-05,
"loss": 4.4064,
"step": 1267000
},
{
"epoch": 1.16,
"learning_rate": 4.1978246215618176e-05,
"loss": 4.4153,
"step": 1267500
},
{
"epoch": 1.16,
"learning_rate": 4.195534942336719e-05,
"loss": 4.4091,
"step": 1268000
},
{
"epoch": 1.16,
"learning_rate": 4.1932498424700693e-05,
"loss": 4.4176,
"step": 1268500
},
{
"epoch": 1.16,
"learning_rate": 4.1909601632449705e-05,
"loss": 4.423,
"step": 1269000
},
{
"epoch": 1.16,
"learning_rate": 4.188675063378321e-05,
"loss": 4.3981,
"step": 1269500
},
{
"epoch": 1.16,
"learning_rate": 4.1863853841532216e-05,
"loss": 4.4033,
"step": 1270000
},
{
"epoch": 1.16,
"learning_rate": 4.184095704928123e-05,
"loss": 4.414,
"step": 1270500
},
{
"epoch": 1.16,
"learning_rate": 4.181806025703023e-05,
"loss": 4.4281,
"step": 1271000
},
{
"epoch": 1.16,
"learning_rate": 4.179516346477924e-05,
"loss": 4.4202,
"step": 1271500
},
{
"epoch": 1.16,
"learning_rate": 4.177226667252825e-05,
"loss": 4.4161,
"step": 1272000
},
{
"epoch": 1.17,
"learning_rate": 4.1749369880277255e-05,
"loss": 4.4082,
"step": 1272500
},
{
"epoch": 1.17,
"learning_rate": 4.1726473088026266e-05,
"loss": 4.4117,
"step": 1273000
},
{
"epoch": 1.17,
"learning_rate": 4.170357629577527e-05,
"loss": 4.4115,
"step": 1273500
},
{
"epoch": 1.17,
"learning_rate": 4.1680679503524277e-05,
"loss": 4.4065,
"step": 1274000
},
{
"epoch": 1.17,
"learning_rate": 4.165778271127329e-05,
"loss": 4.4226,
"step": 1274500
},
{
"epoch": 1.17,
"learning_rate": 4.1634931712606794e-05,
"loss": 4.4186,
"step": 1275000
},
{
"epoch": 1.17,
"learning_rate": 4.16120349203558e-05,
"loss": 4.4263,
"step": 1275500
},
{
"epoch": 1.17,
"learning_rate": 4.1589138128104804e-05,
"loss": 4.4077,
"step": 1276000
},
{
"epoch": 1.17,
"learning_rate": 4.156624133585381e-05,
"loss": 4.4245,
"step": 1276500
},
{
"epoch": 1.17,
"learning_rate": 4.154334454360282e-05,
"loss": 4.4106,
"step": 1277000
},
{
"epoch": 1.17,
"learning_rate": 4.1520447751351826e-05,
"loss": 4.4088,
"step": 1277500
},
{
"epoch": 1.17,
"learning_rate": 4.149759675268534e-05,
"loss": 4.4037,
"step": 1278000
},
{
"epoch": 1.17,
"learning_rate": 4.1474699960434343e-05,
"loss": 4.4139,
"step": 1278500
},
{
"epoch": 1.17,
"learning_rate": 4.145180316818335e-05,
"loss": 4.4032,
"step": 1279000
},
{
"epoch": 1.17,
"learning_rate": 4.142890637593236e-05,
"loss": 4.406,
"step": 1279500
},
{
"epoch": 1.17,
"learning_rate": 4.1406009583681365e-05,
"loss": 4.4106,
"step": 1280000
},
{
"epoch": 1.17,
"learning_rate": 4.138315858501488e-05,
"loss": 4.4053,
"step": 1280500
},
{
"epoch": 1.17,
"learning_rate": 4.136026179276388e-05,
"loss": 4.4091,
"step": 1281000
},
{
"epoch": 1.17,
"learning_rate": 4.133736500051289e-05,
"loss": 4.4148,
"step": 1281500
},
{
"epoch": 1.17,
"learning_rate": 4.13144682082619e-05,
"loss": 4.4165,
"step": 1282000
},
{
"epoch": 1.17,
"learning_rate": 4.1291617209595405e-05,
"loss": 4.4131,
"step": 1282500
},
{
"epoch": 1.18,
"learning_rate": 4.126872041734442e-05,
"loss": 4.4066,
"step": 1283000
},
{
"epoch": 1.18,
"learning_rate": 4.124582362509342e-05,
"loss": 4.4173,
"step": 1283500
},
{
"epoch": 1.18,
"learning_rate": 4.122292683284243e-05,
"loss": 4.4218,
"step": 1284000
},
{
"epoch": 1.18,
"learning_rate": 4.120003004059144e-05,
"loss": 4.4075,
"step": 1284500
},
{
"epoch": 1.18,
"learning_rate": 4.1177133248340444e-05,
"loss": 4.4196,
"step": 1285000
},
{
"epoch": 1.18,
"learning_rate": 4.115423645608945e-05,
"loss": 4.3996,
"step": 1285500
},
{
"epoch": 1.18,
"learning_rate": 4.113138545742296e-05,
"loss": 4.4027,
"step": 1286000
},
{
"epoch": 1.18,
"learning_rate": 4.1108488665171967e-05,
"loss": 4.3836,
"step": 1286500
},
{
"epoch": 1.18,
"learning_rate": 4.108559187292098e-05,
"loss": 4.4282,
"step": 1287000
},
{
"epoch": 1.18,
"learning_rate": 4.1062695080669983e-05,
"loss": 4.4009,
"step": 1287500
},
{
"epoch": 1.18,
"learning_rate": 4.103984408200349e-05,
"loss": 4.4014,
"step": 1288000
},
{
"epoch": 1.18,
"learning_rate": 4.1016947289752494e-05,
"loss": 4.4169,
"step": 1288500
},
{
"epoch": 1.18,
"learning_rate": 4.09940504975015e-05,
"loss": 4.4006,
"step": 1289000
},
{
"epoch": 1.18,
"learning_rate": 4.097115370525051e-05,
"loss": 4.3996,
"step": 1289500
},
{
"epoch": 1.18,
"learning_rate": 4.0948302706584017e-05,
"loss": 4.4154,
"step": 1290000
},
{
"epoch": 1.18,
"learning_rate": 4.092540591433303e-05,
"loss": 4.4105,
"step": 1290500
},
{
"epoch": 1.18,
"learning_rate": 4.0902509122082033e-05,
"loss": 4.4156,
"step": 1291000
},
{
"epoch": 1.18,
"learning_rate": 4.087961232983104e-05,
"loss": 4.4038,
"step": 1291500
},
{
"epoch": 1.18,
"learning_rate": 4.085671553758005e-05,
"loss": 4.3958,
"step": 1292000
},
{
"epoch": 1.18,
"learning_rate": 4.0833818745329055e-05,
"loss": 4.3943,
"step": 1292500
},
{
"epoch": 1.18,
"learning_rate": 4.081092195307806e-05,
"loss": 4.3942,
"step": 1293000
},
{
"epoch": 1.18,
"learning_rate": 4.078802516082707e-05,
"loss": 4.4054,
"step": 1293500
},
{
"epoch": 1.19,
"learning_rate": 4.076512836857608e-05,
"loss": 4.4086,
"step": 1294000
},
{
"epoch": 1.19,
"learning_rate": 4.074227736990959e-05,
"loss": 4.4123,
"step": 1294500
},
{
"epoch": 1.19,
"learning_rate": 4.0719380577658595e-05,
"loss": 4.4005,
"step": 1295000
},
{
"epoch": 1.19,
"learning_rate": 4.06964837854076e-05,
"loss": 4.4144,
"step": 1295500
},
{
"epoch": 1.19,
"learning_rate": 4.067358699315661e-05,
"loss": 4.4121,
"step": 1296000
},
{
"epoch": 1.19,
"learning_rate": 4.065073599449012e-05,
"loss": 4.4154,
"step": 1296500
},
{
"epoch": 1.19,
"learning_rate": 4.062788499582363e-05,
"loss": 4.4156,
"step": 1297000
},
{
"epoch": 1.19,
"learning_rate": 4.0604988203572635e-05,
"loss": 4.401,
"step": 1297500
},
{
"epoch": 1.19,
"learning_rate": 4.0582091411321646e-05,
"loss": 4.4075,
"step": 1298000
},
{
"epoch": 1.19,
"learning_rate": 4.055919461907065e-05,
"loss": 4.4088,
"step": 1298500
},
{
"epoch": 1.19,
"learning_rate": 4.053634362040416e-05,
"loss": 4.4148,
"step": 1299000
},
{
"epoch": 1.19,
"learning_rate": 4.051344682815317e-05,
"loss": 4.4076,
"step": 1299500
},
{
"epoch": 1.19,
"learning_rate": 4.049055003590217e-05,
"loss": 4.4173,
"step": 1300000
},
{
"epoch": 1.19,
"learning_rate": 4.046765324365118e-05,
"loss": 4.4068,
"step": 1300500
},
{
"epoch": 1.19,
"learning_rate": 4.0444756451400184e-05,
"loss": 4.4093,
"step": 1301000
},
{
"epoch": 1.19,
"learning_rate": 4.042185965914919e-05,
"loss": 4.4133,
"step": 1301500
},
{
"epoch": 1.19,
"learning_rate": 4.03989628668982e-05,
"loss": 4.4225,
"step": 1302000
},
{
"epoch": 1.19,
"learning_rate": 4.0376066074647206e-05,
"loss": 4.4042,
"step": 1302500
},
{
"epoch": 1.19,
"learning_rate": 4.035316928239621e-05,
"loss": 4.4035,
"step": 1303000
},
{
"epoch": 1.19,
"learning_rate": 4.033027249014522e-05,
"loss": 4.4114,
"step": 1303500
},
{
"epoch": 1.19,
"learning_rate": 4.030737569789423e-05,
"loss": 4.4059,
"step": 1304000
},
{
"epoch": 1.19,
"learning_rate": 4.028447890564323e-05,
"loss": 4.4109,
"step": 1304500
},
{
"epoch": 1.2,
"learning_rate": 4.0261582113392245e-05,
"loss": 4.4062,
"step": 1305000
},
{
"epoch": 1.2,
"learning_rate": 4.023868532114125e-05,
"loss": 4.4088,
"step": 1305500
},
{
"epoch": 1.2,
"learning_rate": 4.021583432247476e-05,
"loss": 4.4145,
"step": 1306000
},
{
"epoch": 1.2,
"learning_rate": 4.019293753022377e-05,
"loss": 4.3966,
"step": 1306500
},
{
"epoch": 1.2,
"learning_rate": 4.017004073797277e-05,
"loss": 4.4004,
"step": 1307000
},
{
"epoch": 1.2,
"learning_rate": 4.0147143945721784e-05,
"loss": 4.4122,
"step": 1307500
},
{
"epoch": 1.2,
"learning_rate": 4.012424715347079e-05,
"loss": 4.4059,
"step": 1308000
},
{
"epoch": 1.2,
"learning_rate": 4.0101350361219794e-05,
"loss": 4.4172,
"step": 1308500
},
{
"epoch": 1.2,
"learning_rate": 4.0078453568968806e-05,
"loss": 4.4086,
"step": 1309000
},
{
"epoch": 1.2,
"learning_rate": 4.005555677671781e-05,
"loss": 4.4079,
"step": 1309500
},
{
"epoch": 1.2,
"learning_rate": 4.0032705778051323e-05,
"loss": 4.4064,
"step": 1310000
},
{
"epoch": 1.2,
"learning_rate": 4.000985477938483e-05,
"loss": 4.412,
"step": 1310500
},
{
"epoch": 1.2,
"learning_rate": 3.998695798713384e-05,
"loss": 4.4017,
"step": 1311000
},
{
"epoch": 1.2,
"learning_rate": 3.9964061194882846e-05,
"loss": 4.4107,
"step": 1311500
},
{
"epoch": 1.2,
"learning_rate": 3.994116440263185e-05,
"loss": 4.3993,
"step": 1312000
},
{
"epoch": 1.2,
"learning_rate": 3.991826761038086e-05,
"loss": 4.4027,
"step": 1312500
},
{
"epoch": 1.2,
"learning_rate": 3.989537081812986e-05,
"loss": 4.4156,
"step": 1313000
},
{
"epoch": 1.2,
"learning_rate": 3.987247402587887e-05,
"loss": 4.4105,
"step": 1313500
},
{
"epoch": 1.2,
"learning_rate": 3.984957723362788e-05,
"loss": 4.4152,
"step": 1314000
},
{
"epoch": 1.2,
"learning_rate": 3.982668044137688e-05,
"loss": 4.4081,
"step": 1314500
},
{
"epoch": 1.2,
"learning_rate": 3.9803829442710395e-05,
"loss": 4.4055,
"step": 1315000
},
{
"epoch": 1.2,
"learning_rate": 3.97809326504594e-05,
"loss": 4.414,
"step": 1315500
},
{
"epoch": 1.21,
"learning_rate": 3.975808165179291e-05,
"loss": 4.4089,
"step": 1316000
},
{
"epoch": 1.21,
"learning_rate": 3.973518485954192e-05,
"loss": 4.4154,
"step": 1316500
},
{
"epoch": 1.21,
"learning_rate": 3.971228806729092e-05,
"loss": 4.41,
"step": 1317000
},
{
"epoch": 1.21,
"learning_rate": 3.9689391275039935e-05,
"loss": 4.4175,
"step": 1317500
},
{
"epoch": 1.21,
"learning_rate": 3.966654027637344e-05,
"loss": 4.407,
"step": 1318000
},
{
"epoch": 1.21,
"learning_rate": 3.964364348412245e-05,
"loss": 4.407,
"step": 1318500
},
{
"epoch": 1.21,
"learning_rate": 3.962074669187146e-05,
"loss": 4.4015,
"step": 1319000
},
{
"epoch": 1.21,
"learning_rate": 3.959784989962046e-05,
"loss": 4.4082,
"step": 1319500
},
{
"epoch": 1.21,
"learning_rate": 3.9574953107369474e-05,
"loss": 4.4005,
"step": 1320000
},
{
"epoch": 1.21,
"learning_rate": 3.955210210870298e-05,
"loss": 4.3903,
"step": 1320500
},
{
"epoch": 1.21,
"learning_rate": 3.952920531645199e-05,
"loss": 4.4045,
"step": 1321000
},
{
"epoch": 1.21,
"learning_rate": 3.9506308524200997e-05,
"loss": 4.4022,
"step": 1321500
},
{
"epoch": 1.21,
"learning_rate": 3.948341173195e-05,
"loss": 4.3928,
"step": 1322000
},
{
"epoch": 1.21,
"learning_rate": 3.9460514939699013e-05,
"loss": 4.3919,
"step": 1322500
},
{
"epoch": 1.21,
"learning_rate": 3.943761814744802e-05,
"loss": 4.4096,
"step": 1323000
},
{
"epoch": 1.21,
"learning_rate": 3.941476714878153e-05,
"loss": 4.4024,
"step": 1323500
},
{
"epoch": 1.21,
"learning_rate": 3.9391870356530536e-05,
"loss": 4.4062,
"step": 1324000
},
{
"epoch": 1.21,
"learning_rate": 3.936897356427954e-05,
"loss": 4.4008,
"step": 1324500
},
{
"epoch": 1.21,
"learning_rate": 3.9346076772028546e-05,
"loss": 4.4064,
"step": 1325000
},
{
"epoch": 1.21,
"learning_rate": 3.932317997977755e-05,
"loss": 4.3958,
"step": 1325500
},
{
"epoch": 1.21,
"learning_rate": 3.930028318752656e-05,
"loss": 4.3997,
"step": 1326000
},
{
"epoch": 1.21,
"learning_rate": 3.927738639527557e-05,
"loss": 4.393,
"step": 1326500
},
{
"epoch": 1.22,
"learning_rate": 3.9254535396609074e-05,
"loss": 4.4047,
"step": 1327000
},
{
"epoch": 1.22,
"learning_rate": 3.9231638604358085e-05,
"loss": 4.4087,
"step": 1327500
},
{
"epoch": 1.22,
"learning_rate": 3.920874181210709e-05,
"loss": 4.4069,
"step": 1328000
},
{
"epoch": 1.22,
"learning_rate": 3.91858908134406e-05,
"loss": 4.3996,
"step": 1328500
},
{
"epoch": 1.22,
"learning_rate": 3.916299402118961e-05,
"loss": 4.4074,
"step": 1329000
},
{
"epoch": 1.22,
"learning_rate": 3.914009722893861e-05,
"loss": 4.4063,
"step": 1329500
},
{
"epoch": 1.22,
"learning_rate": 3.9117200436687625e-05,
"loss": 4.4098,
"step": 1330000
},
{
"epoch": 1.22,
"learning_rate": 3.909434943802113e-05,
"loss": 4.3964,
"step": 1330500
},
{
"epoch": 1.22,
"learning_rate": 3.907145264577014e-05,
"loss": 4.4158,
"step": 1331000
},
{
"epoch": 1.22,
"learning_rate": 3.904855585351915e-05,
"loss": 4.4161,
"step": 1331500
},
{
"epoch": 1.22,
"learning_rate": 3.902565906126815e-05,
"loss": 4.4159,
"step": 1332000
},
{
"epoch": 1.22,
"learning_rate": 3.9002762269017164e-05,
"loss": 4.3829,
"step": 1332500
},
{
"epoch": 1.22,
"learning_rate": 3.897986547676617e-05,
"loss": 4.3922,
"step": 1333000
},
{
"epoch": 1.22,
"learning_rate": 3.8956968684515174e-05,
"loss": 4.4018,
"step": 1333500
},
{
"epoch": 1.22,
"learning_rate": 3.8934071892264186e-05,
"loss": 4.4068,
"step": 1334000
},
{
"epoch": 1.22,
"learning_rate": 3.891117510001319e-05,
"loss": 4.3906,
"step": 1334500
},
{
"epoch": 1.22,
"learning_rate": 3.8888278307762196e-05,
"loss": 4.4004,
"step": 1335000
},
{
"epoch": 1.22,
"learning_rate": 3.886542730909571e-05,
"loss": 4.4055,
"step": 1335500
},
{
"epoch": 1.22,
"learning_rate": 3.8842530516844713e-05,
"loss": 4.4105,
"step": 1336000
},
{
"epoch": 1.22,
"learning_rate": 3.8819633724593725e-05,
"loss": 4.4105,
"step": 1336500
},
{
"epoch": 1.22,
"learning_rate": 3.879673693234273e-05,
"loss": 4.3913,
"step": 1337000
},
{
"epoch": 1.22,
"learning_rate": 3.8773840140091735e-05,
"loss": 4.4093,
"step": 1337500
},
{
"epoch": 1.23,
"learning_rate": 3.875094334784074e-05,
"loss": 4.4066,
"step": 1338000
},
{
"epoch": 1.23,
"learning_rate": 3.8728046555589745e-05,
"loss": 4.4003,
"step": 1338500
},
{
"epoch": 1.23,
"learning_rate": 3.870514976333876e-05,
"loss": 4.3933,
"step": 1339000
},
{
"epoch": 1.23,
"learning_rate": 3.868229876467226e-05,
"loss": 4.3956,
"step": 1339500
},
{
"epoch": 1.23,
"learning_rate": 3.8659401972421275e-05,
"loss": 4.4029,
"step": 1340000
}
],
"max_steps": 2183712,
"num_train_epochs": 2,
"total_flos": 2.801049015287808e+18,
"trial_name": null,
"trial_params": null
}