9b-18 / trainer_state.json
furproxy's picture
Upload folder using huggingface_hub
afb17d6 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 2058,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0029154518950437317,
"grad_norm": 0.08236155658960342,
"learning_rate": 1.4563106796116505e-07,
"loss": 1.120621681213379,
"step": 2
},
{
"epoch": 0.0058309037900874635,
"grad_norm": 0.5269019603729248,
"learning_rate": 4.368932038834952e-07,
"loss": 1.9105433225631714,
"step": 4
},
{
"epoch": 0.008746355685131196,
"grad_norm": 0.3449331521987915,
"learning_rate": 7.281553398058253e-07,
"loss": 1.8805404901504517,
"step": 6
},
{
"epoch": 0.011661807580174927,
"grad_norm": 0.10853756964206696,
"learning_rate": 1.0194174757281554e-06,
"loss": 1.5699371099472046,
"step": 8
},
{
"epoch": 0.014577259475218658,
"grad_norm": 1.1428029537200928,
"learning_rate": 1.3106796116504856e-06,
"loss": 1.4362584352493286,
"step": 10
},
{
"epoch": 0.01749271137026239,
"grad_norm": 0.5868045091629028,
"learning_rate": 1.6019417475728156e-06,
"loss": 2.0035324096679688,
"step": 12
},
{
"epoch": 0.02040816326530612,
"grad_norm": 0.08258485049009323,
"learning_rate": 1.8932038834951458e-06,
"loss": 1.5183849334716797,
"step": 14
},
{
"epoch": 0.023323615160349854,
"grad_norm": 2.6764633655548096,
"learning_rate": 2.1844660194174755e-06,
"loss": 1.8052839040756226,
"step": 16
},
{
"epoch": 0.026239067055393587,
"grad_norm": 0.342227965593338,
"learning_rate": 2.475728155339806e-06,
"loss": 1.8929893970489502,
"step": 18
},
{
"epoch": 0.029154518950437316,
"grad_norm": 0.1563744992017746,
"learning_rate": 2.766990291262136e-06,
"loss": 1.7904902696609497,
"step": 20
},
{
"epoch": 0.03206997084548105,
"grad_norm": 0.1933348923921585,
"learning_rate": 3.058252427184466e-06,
"loss": 1.4513907432556152,
"step": 22
},
{
"epoch": 0.03498542274052478,
"grad_norm": 0.5887855291366577,
"learning_rate": 3.3495145631067963e-06,
"loss": 2.2697947025299072,
"step": 24
},
{
"epoch": 0.037900874635568516,
"grad_norm": 0.33995822072029114,
"learning_rate": 3.6407766990291263e-06,
"loss": 1.7317644357681274,
"step": 26
},
{
"epoch": 0.04081632653061224,
"grad_norm": 0.6888100504875183,
"learning_rate": 3.932038834951457e-06,
"loss": 1.8117475509643555,
"step": 28
},
{
"epoch": 0.043731778425655975,
"grad_norm": 0.7777397036552429,
"learning_rate": 4.223300970873786e-06,
"loss": 1.8055756092071533,
"step": 30
},
{
"epoch": 0.04664723032069971,
"grad_norm": 0.4058018624782562,
"learning_rate": 4.514563106796117e-06,
"loss": 1.9432220458984375,
"step": 32
},
{
"epoch": 0.04956268221574344,
"grad_norm": 0.22737905383110046,
"learning_rate": 4.805825242718447e-06,
"loss": 1.6058305501937866,
"step": 34
},
{
"epoch": 0.052478134110787174,
"grad_norm": 0.3183080852031708,
"learning_rate": 5.097087378640777e-06,
"loss": 1.8658274412155151,
"step": 36
},
{
"epoch": 0.05539358600583091,
"grad_norm": 0.17585590481758118,
"learning_rate": 5.388349514563107e-06,
"loss": 2.2423486709594727,
"step": 38
},
{
"epoch": 0.05830903790087463,
"grad_norm": 0.10230281203985214,
"learning_rate": 5.679611650485437e-06,
"loss": 1.5302915573120117,
"step": 40
},
{
"epoch": 0.061224489795918366,
"grad_norm": 0.4932399392127991,
"learning_rate": 5.970873786407767e-06,
"loss": 1.813106656074524,
"step": 42
},
{
"epoch": 0.0641399416909621,
"grad_norm": 0.15170824527740479,
"learning_rate": 6.262135922330097e-06,
"loss": 1.6509969234466553,
"step": 44
},
{
"epoch": 0.06705539358600583,
"grad_norm": 0.1539481282234192,
"learning_rate": 6.553398058252427e-06,
"loss": 1.7683537006378174,
"step": 46
},
{
"epoch": 0.06997084548104957,
"grad_norm": 0.6599376201629639,
"learning_rate": 6.844660194174757e-06,
"loss": 2.1816630363464355,
"step": 48
},
{
"epoch": 0.0728862973760933,
"grad_norm": 0.24105864763259888,
"learning_rate": 7.135922330097088e-06,
"loss": 1.910886526107788,
"step": 50
},
{
"epoch": 0.07580174927113703,
"grad_norm": 0.09656477719545364,
"learning_rate": 7.427184466019417e-06,
"loss": 1.199069857597351,
"step": 52
},
{
"epoch": 0.07871720116618076,
"grad_norm": 0.3129803240299225,
"learning_rate": 7.718446601941748e-06,
"loss": 1.7870614528656006,
"step": 54
},
{
"epoch": 0.08163265306122448,
"grad_norm": 0.253489226102829,
"learning_rate": 8.009708737864077e-06,
"loss": 2.0801727771759033,
"step": 56
},
{
"epoch": 0.08454810495626822,
"grad_norm": 0.12343698740005493,
"learning_rate": 8.300970873786407e-06,
"loss": 1.4909915924072266,
"step": 58
},
{
"epoch": 0.08746355685131195,
"grad_norm": 0.19224074482917786,
"learning_rate": 8.592233009708738e-06,
"loss": 2.0119330883026123,
"step": 60
},
{
"epoch": 0.09037900874635568,
"grad_norm": 0.2891639471054077,
"learning_rate": 8.883495145631068e-06,
"loss": 1.9431190490722656,
"step": 62
},
{
"epoch": 0.09329446064139942,
"grad_norm": 0.8908348083496094,
"learning_rate": 9.174757281553397e-06,
"loss": 1.8723704814910889,
"step": 64
},
{
"epoch": 0.09620991253644315,
"grad_norm": 0.09907913953065872,
"learning_rate": 9.466019417475729e-06,
"loss": 1.556423306465149,
"step": 66
},
{
"epoch": 0.09912536443148688,
"grad_norm": 0.18893972039222717,
"learning_rate": 9.75728155339806e-06,
"loss": 1.8031634092330933,
"step": 68
},
{
"epoch": 0.10204081632653061,
"grad_norm": 0.3021998107433319,
"learning_rate": 1.004854368932039e-05,
"loss": 1.6836217641830444,
"step": 70
},
{
"epoch": 0.10495626822157435,
"grad_norm": 0.19465358555316925,
"learning_rate": 1.0339805825242719e-05,
"loss": 1.3162983655929565,
"step": 72
},
{
"epoch": 0.10787172011661808,
"grad_norm": 0.35194098949432373,
"learning_rate": 1.0631067961165048e-05,
"loss": 1.6223976612091064,
"step": 74
},
{
"epoch": 0.11078717201166181,
"grad_norm": 0.11608141660690308,
"learning_rate": 1.092233009708738e-05,
"loss": 1.5001176595687866,
"step": 76
},
{
"epoch": 0.11370262390670553,
"grad_norm": 0.17615102231502533,
"learning_rate": 1.121359223300971e-05,
"loss": 1.6835155487060547,
"step": 78
},
{
"epoch": 0.11661807580174927,
"grad_norm": 0.10972107201814651,
"learning_rate": 1.1504854368932039e-05,
"loss": 1.0958292484283447,
"step": 80
},
{
"epoch": 0.119533527696793,
"grad_norm": 0.2486797422170639,
"learning_rate": 1.1796116504854368e-05,
"loss": 1.5743815898895264,
"step": 82
},
{
"epoch": 0.12244897959183673,
"grad_norm": 0.6029097437858582,
"learning_rate": 1.20873786407767e-05,
"loss": 1.4908254146575928,
"step": 84
},
{
"epoch": 0.12536443148688048,
"grad_norm": 0.47159314155578613,
"learning_rate": 1.237864077669903e-05,
"loss": 1.3921440839767456,
"step": 86
},
{
"epoch": 0.1282798833819242,
"grad_norm": 0.23478780686855316,
"learning_rate": 1.2669902912621359e-05,
"loss": 1.60302734375,
"step": 88
},
{
"epoch": 0.13119533527696792,
"grad_norm": 0.06909849494695663,
"learning_rate": 1.2961165048543688e-05,
"loss": 1.3646469116210938,
"step": 90
},
{
"epoch": 0.13411078717201166,
"grad_norm": 0.12045982480049133,
"learning_rate": 1.3252427184466021e-05,
"loss": 1.3031418323516846,
"step": 92
},
{
"epoch": 0.13702623906705538,
"grad_norm": 0.2616878151893616,
"learning_rate": 1.3543689320388351e-05,
"loss": 1.4213391542434692,
"step": 94
},
{
"epoch": 0.13994169096209913,
"grad_norm": 0.19713328778743744,
"learning_rate": 1.383495145631068e-05,
"loss": 1.8326067924499512,
"step": 96
},
{
"epoch": 0.14285714285714285,
"grad_norm": 0.42456164956092834,
"learning_rate": 1.412621359223301e-05,
"loss": 2.064007043838501,
"step": 98
},
{
"epoch": 0.1457725947521866,
"grad_norm": 0.1171143651008606,
"learning_rate": 1.4417475728155341e-05,
"loss": 1.3881018161773682,
"step": 100
},
{
"epoch": 0.14868804664723032,
"grad_norm": 0.5466513633728027,
"learning_rate": 1.470873786407767e-05,
"loss": 1.7975414991378784,
"step": 102
},
{
"epoch": 0.15160349854227406,
"grad_norm": 0.2429724484682083,
"learning_rate": 1.5e-05,
"loss": 1.581913709640503,
"step": 104
},
{
"epoch": 0.15451895043731778,
"grad_norm": 0.16082407534122467,
"learning_rate": 1.4999965139018001e-05,
"loss": 1.6313072443008423,
"step": 106
},
{
"epoch": 0.15743440233236153,
"grad_norm": 0.20626085996627808,
"learning_rate": 1.4999860556432087e-05,
"loss": 1.4128293991088867,
"step": 108
},
{
"epoch": 0.16034985422740525,
"grad_norm": 0.08978555351495743,
"learning_rate": 1.4999686253322514e-05,
"loss": 1.6325119733810425,
"step": 110
},
{
"epoch": 0.16326530612244897,
"grad_norm": 0.17410112917423248,
"learning_rate": 1.4999442231489687e-05,
"loss": 1.6410691738128662,
"step": 112
},
{
"epoch": 0.1661807580174927,
"grad_norm": 0.11147186905145645,
"learning_rate": 1.4999128493454151e-05,
"loss": 1.3302874565124512,
"step": 114
},
{
"epoch": 0.16909620991253643,
"grad_norm": 0.44229331612586975,
"learning_rate": 1.4998745042456563e-05,
"loss": 1.6997064352035522,
"step": 116
},
{
"epoch": 0.17201166180758018,
"grad_norm": 0.1818253993988037,
"learning_rate": 1.499829188245766e-05,
"loss": 1.3123167753219604,
"step": 118
},
{
"epoch": 0.1749271137026239,
"grad_norm": 0.15915799140930176,
"learning_rate": 1.4997769018138212e-05,
"loss": 1.6660683155059814,
"step": 120
},
{
"epoch": 0.17784256559766765,
"grad_norm": 0.2367630898952484,
"learning_rate": 1.4997176454898977e-05,
"loss": 1.4073443412780762,
"step": 122
},
{
"epoch": 0.18075801749271136,
"grad_norm": 0.653868556022644,
"learning_rate": 1.4996514198860649e-05,
"loss": 1.351149082183838,
"step": 124
},
{
"epoch": 0.1836734693877551,
"grad_norm": 0.08681757003068924,
"learning_rate": 1.4995782256863785e-05,
"loss": 1.3422613143920898,
"step": 126
},
{
"epoch": 0.18658892128279883,
"grad_norm": 0.06514488905668259,
"learning_rate": 1.4994980636468756e-05,
"loss": 1.3343521356582642,
"step": 128
},
{
"epoch": 0.18950437317784258,
"grad_norm": 0.9027652740478516,
"learning_rate": 1.4994109345955632e-05,
"loss": 1.4679464101791382,
"step": 130
},
{
"epoch": 0.1924198250728863,
"grad_norm": 0.35018599033355713,
"learning_rate": 1.4993168394324137e-05,
"loss": 1.1963084936141968,
"step": 132
},
{
"epoch": 0.19533527696793002,
"grad_norm": 0.13998304307460785,
"learning_rate": 1.4992157791293523e-05,
"loss": 1.333540678024292,
"step": 134
},
{
"epoch": 0.19825072886297376,
"grad_norm": 0.11608795821666718,
"learning_rate": 1.4991077547302497e-05,
"loss": 1.5141417980194092,
"step": 136
},
{
"epoch": 0.20116618075801748,
"grad_norm": 0.08046405762434006,
"learning_rate": 1.4989927673509089e-05,
"loss": 1.3266879320144653,
"step": 138
},
{
"epoch": 0.20408163265306123,
"grad_norm": 0.1371355652809143,
"learning_rate": 1.4988708181790555e-05,
"loss": 1.2892866134643555,
"step": 140
},
{
"epoch": 0.20699708454810495,
"grad_norm": 0.1368686705827713,
"learning_rate": 1.4987419084743244e-05,
"loss": 1.0467798709869385,
"step": 142
},
{
"epoch": 0.2099125364431487,
"grad_norm": 0.23302382230758667,
"learning_rate": 1.4986060395682469e-05,
"loss": 1.1930760145187378,
"step": 144
},
{
"epoch": 0.21282798833819241,
"grad_norm": 1.9061791896820068,
"learning_rate": 1.4984632128642375e-05,
"loss": 1.4475537538528442,
"step": 146
},
{
"epoch": 0.21574344023323616,
"grad_norm": 0.18942643702030182,
"learning_rate": 1.4983134298375787e-05,
"loss": 1.376928448677063,
"step": 148
},
{
"epoch": 0.21865889212827988,
"grad_norm": 0.21135789155960083,
"learning_rate": 1.498156692035407e-05,
"loss": 1.5480635166168213,
"step": 150
},
{
"epoch": 0.22157434402332363,
"grad_norm": 0.13644421100616455,
"learning_rate": 1.4979930010766947e-05,
"loss": 1.7161264419555664,
"step": 152
},
{
"epoch": 0.22448979591836735,
"grad_norm": 0.12430273741483688,
"learning_rate": 1.4978223586522351e-05,
"loss": 1.242932677268982,
"step": 154
},
{
"epoch": 0.22740524781341107,
"grad_norm": 0.7622217535972595,
"learning_rate": 1.4976447665246251e-05,
"loss": 0.5300056338310242,
"step": 156
},
{
"epoch": 0.2303206997084548,
"grad_norm": 0.13458958268165588,
"learning_rate": 1.4974602265282451e-05,
"loss": 1.571650743484497,
"step": 158
},
{
"epoch": 0.23323615160349853,
"grad_norm": 0.2972854673862457,
"learning_rate": 1.4972687405692425e-05,
"loss": 1.2033076286315918,
"step": 160
},
{
"epoch": 0.23615160349854228,
"grad_norm": 0.29232847690582275,
"learning_rate": 1.4970703106255095e-05,
"loss": 1.4756550788879395,
"step": 162
},
{
"epoch": 0.239067055393586,
"grad_norm": 0.07210766524076462,
"learning_rate": 1.4968649387466655e-05,
"loss": 1.3033177852630615,
"step": 164
},
{
"epoch": 0.24198250728862974,
"grad_norm": 0.5424373745918274,
"learning_rate": 1.4966526270540327e-05,
"loss": 1.0460329055786133,
"step": 166
},
{
"epoch": 0.24489795918367346,
"grad_norm": 0.28463321924209595,
"learning_rate": 1.4964333777406174e-05,
"loss": 1.250373363494873,
"step": 168
},
{
"epoch": 0.2478134110787172,
"grad_norm": 0.3408065140247345,
"learning_rate": 1.496207193071085e-05,
"loss": 0.8593610525131226,
"step": 170
},
{
"epoch": 0.25072886297376096,
"grad_norm": 0.14829058945178986,
"learning_rate": 1.4959740753817374e-05,
"loss": 1.304344892501831,
"step": 172
},
{
"epoch": 0.2536443148688047,
"grad_norm": 0.8436731696128845,
"learning_rate": 1.4957340270804896e-05,
"loss": 1.2743805646896362,
"step": 174
},
{
"epoch": 0.2565597667638484,
"grad_norm": 0.11323361843824387,
"learning_rate": 1.4954870506468434e-05,
"loss": 1.329984188079834,
"step": 176
},
{
"epoch": 0.2594752186588921,
"grad_norm": 0.09321129322052002,
"learning_rate": 1.4952331486318626e-05,
"loss": 1.2258719205856323,
"step": 178
},
{
"epoch": 0.26239067055393583,
"grad_norm": 0.37252843379974365,
"learning_rate": 1.4949723236581472e-05,
"loss": 1.0671582221984863,
"step": 180
},
{
"epoch": 0.2653061224489796,
"grad_norm": 0.3797838091850281,
"learning_rate": 1.4947045784198052e-05,
"loss": 1.2696138620376587,
"step": 182
},
{
"epoch": 0.26822157434402333,
"grad_norm": 0.16805821657180786,
"learning_rate": 1.4944299156824251e-05,
"loss": 1.4738816022872925,
"step": 184
},
{
"epoch": 0.27113702623906705,
"grad_norm": 0.2671731114387512,
"learning_rate": 1.4941483382830475e-05,
"loss": 1.3171305656433105,
"step": 186
},
{
"epoch": 0.27405247813411077,
"grad_norm": 0.07962363958358765,
"learning_rate": 1.4938598491301369e-05,
"loss": 1.2901722192764282,
"step": 188
},
{
"epoch": 0.27696793002915454,
"grad_norm": 0.280506432056427,
"learning_rate": 1.4935644512035486e-05,
"loss": 1.3184595108032227,
"step": 190
},
{
"epoch": 0.27988338192419826,
"grad_norm": 0.13458193838596344,
"learning_rate": 1.4932621475545014e-05,
"loss": 1.1937448978424072,
"step": 192
},
{
"epoch": 0.282798833819242,
"grad_norm": 0.7079519033432007,
"learning_rate": 1.4929529413055442e-05,
"loss": 1.1439327001571655,
"step": 194
},
{
"epoch": 0.2857142857142857,
"grad_norm": 0.18462230265140533,
"learning_rate": 1.4926368356505236e-05,
"loss": 1.5497668981552124,
"step": 196
},
{
"epoch": 0.2886297376093295,
"grad_norm": 0.16043758392333984,
"learning_rate": 1.492313833854552e-05,
"loss": 1.4568783044815063,
"step": 198
},
{
"epoch": 0.2915451895043732,
"grad_norm": 0.42396068572998047,
"learning_rate": 1.491983939253973e-05,
"loss": 1.6005096435546875,
"step": 200
},
{
"epoch": 0.2944606413994169,
"grad_norm": 0.21155761182308197,
"learning_rate": 1.4916471552563272e-05,
"loss": 1.3397752046585083,
"step": 202
},
{
"epoch": 0.29737609329446063,
"grad_norm": 0.17219677567481995,
"learning_rate": 1.4913034853403173e-05,
"loss": 1.3317774534225464,
"step": 204
},
{
"epoch": 0.30029154518950435,
"grad_norm": 0.12617312371730804,
"learning_rate": 1.4909529330557714e-05,
"loss": 1.2119510173797607,
"step": 206
},
{
"epoch": 0.3032069970845481,
"grad_norm": 0.14850527048110962,
"learning_rate": 1.4905955020236072e-05,
"loss": 1.385998010635376,
"step": 208
},
{
"epoch": 0.30612244897959184,
"grad_norm": 0.1191219687461853,
"learning_rate": 1.490231195935794e-05,
"loss": 1.5534725189208984,
"step": 210
},
{
"epoch": 0.30903790087463556,
"grad_norm": 0.06989572942256927,
"learning_rate": 1.4898600185553152e-05,
"loss": 1.4775235652923584,
"step": 212
},
{
"epoch": 0.3119533527696793,
"grad_norm": 0.08547376841306686,
"learning_rate": 1.4894819737161285e-05,
"loss": 1.033743977546692,
"step": 214
},
{
"epoch": 0.31486880466472306,
"grad_norm": 0.11992272734642029,
"learning_rate": 1.489097065323127e-05,
"loss": 1.0980379581451416,
"step": 216
},
{
"epoch": 0.3177842565597668,
"grad_norm": 0.30880632996559143,
"learning_rate": 1.488705297352099e-05,
"loss": 1.317891001701355,
"step": 218
},
{
"epoch": 0.3206997084548105,
"grad_norm": 0.6510909795761108,
"learning_rate": 1.4883066738496858e-05,
"loss": 0.9413776993751526,
"step": 220
},
{
"epoch": 0.3236151603498542,
"grad_norm": 0.43388184905052185,
"learning_rate": 1.4879011989333418e-05,
"loss": 1.381697177886963,
"step": 222
},
{
"epoch": 0.32653061224489793,
"grad_norm": 0.21984761953353882,
"learning_rate": 1.4874888767912902e-05,
"loss": 1.2626378536224365,
"step": 224
},
{
"epoch": 0.3294460641399417,
"grad_norm": 0.2687482237815857,
"learning_rate": 1.48706971168248e-05,
"loss": 1.2034857273101807,
"step": 226
},
{
"epoch": 0.3323615160349854,
"grad_norm": 0.08195902407169342,
"learning_rate": 1.4866437079365439e-05,
"loss": 1.2773680686950684,
"step": 228
},
{
"epoch": 0.33527696793002915,
"grad_norm": 0.1009335145354271,
"learning_rate": 1.4862108699537504e-05,
"loss": 1.0853190422058105,
"step": 230
},
{
"epoch": 0.33819241982507287,
"grad_norm": 0.3376968204975128,
"learning_rate": 1.4857712022049617e-05,
"loss": 1.5481150150299072,
"step": 232
},
{
"epoch": 0.34110787172011664,
"grad_norm": 0.7441994547843933,
"learning_rate": 1.4853247092315843e-05,
"loss": 0.9510725140571594,
"step": 234
},
{
"epoch": 0.34402332361516036,
"grad_norm": 0.04717664048075676,
"learning_rate": 1.484871395645525e-05,
"loss": 1.4734127521514893,
"step": 236
},
{
"epoch": 0.3469387755102041,
"grad_norm": 0.7886844873428345,
"learning_rate": 1.4844112661291409e-05,
"loss": 1.3192212581634521,
"step": 238
},
{
"epoch": 0.3498542274052478,
"grad_norm": 0.4841660261154175,
"learning_rate": 1.4839443254351925e-05,
"loss": 1.691177487373352,
"step": 240
},
{
"epoch": 0.35276967930029157,
"grad_norm": 0.06492076069116592,
"learning_rate": 1.4834705783867948e-05,
"loss": 1.329490065574646,
"step": 242
},
{
"epoch": 0.3556851311953353,
"grad_norm": 0.13113148510456085,
"learning_rate": 1.4829900298773655e-05,
"loss": 1.4308984279632568,
"step": 244
},
{
"epoch": 0.358600583090379,
"grad_norm": 0.2137414813041687,
"learning_rate": 1.4825026848705774e-05,
"loss": 1.5191004276275635,
"step": 246
},
{
"epoch": 0.36151603498542273,
"grad_norm": 0.1302558183670044,
"learning_rate": 1.482008548400304e-05,
"loss": 1.1112821102142334,
"step": 248
},
{
"epoch": 0.36443148688046645,
"grad_norm": 0.24971581995487213,
"learning_rate": 1.4815076255705704e-05,
"loss": 1.2628142833709717,
"step": 250
},
{
"epoch": 0.3673469387755102,
"grad_norm": 0.06637357920408249,
"learning_rate": 1.4809999215554978e-05,
"loss": 1.0483888387680054,
"step": 252
},
{
"epoch": 0.37026239067055394,
"grad_norm": 0.16134153306484222,
"learning_rate": 1.4804854415992531e-05,
"loss": 0.8284896612167358,
"step": 254
},
{
"epoch": 0.37317784256559766,
"grad_norm": 0.22190812230110168,
"learning_rate": 1.479964191015992e-05,
"loss": 1.228007197380066,
"step": 256
},
{
"epoch": 0.3760932944606414,
"grad_norm": 0.3965594172477722,
"learning_rate": 1.4794361751898052e-05,
"loss": 1.461411952972412,
"step": 258
},
{
"epoch": 0.37900874635568516,
"grad_norm": 0.08565931022167206,
"learning_rate": 1.4789013995746636e-05,
"loss": 1.33036208152771,
"step": 260
},
{
"epoch": 0.3819241982507289,
"grad_norm": 0.11709296703338623,
"learning_rate": 1.4783598696943603e-05,
"loss": 1.1803240776062012,
"step": 262
},
{
"epoch": 0.3848396501457726,
"grad_norm": 0.15489286184310913,
"learning_rate": 1.4778115911424552e-05,
"loss": 1.234659194946289,
"step": 264
},
{
"epoch": 0.3877551020408163,
"grad_norm": 0.19184595346450806,
"learning_rate": 1.4772565695822158e-05,
"loss": 1.2707804441452026,
"step": 266
},
{
"epoch": 0.39067055393586003,
"grad_norm": 0.1356089860200882,
"learning_rate": 1.4766948107465598e-05,
"loss": 1.192071557044983,
"step": 268
},
{
"epoch": 0.3935860058309038,
"grad_norm": 0.11989542841911316,
"learning_rate": 1.476126320437995e-05,
"loss": 1.391566276550293,
"step": 270
},
{
"epoch": 0.3965014577259475,
"grad_norm": 0.47645920515060425,
"learning_rate": 1.4755511045285605e-05,
"loss": 1.1564279794692993,
"step": 272
},
{
"epoch": 0.39941690962099125,
"grad_norm": 0.4125911593437195,
"learning_rate": 1.4749691689597646e-05,
"loss": 1.536888599395752,
"step": 274
},
{
"epoch": 0.40233236151603496,
"grad_norm": 0.08971330523490906,
"learning_rate": 1.4743805197425243e-05,
"loss": 1.2086325883865356,
"step": 276
},
{
"epoch": 0.40524781341107874,
"grad_norm": 0.08347416669130325,
"learning_rate": 1.4737851629571035e-05,
"loss": 1.190657615661621,
"step": 278
},
{
"epoch": 0.40816326530612246,
"grad_norm": 0.20587654411792755,
"learning_rate": 1.4731831047530493e-05,
"loss": 1.3656525611877441,
"step": 280
},
{
"epoch": 0.4110787172011662,
"grad_norm": 0.22432878613471985,
"learning_rate": 1.4725743513491294e-05,
"loss": 1.1042253971099854,
"step": 282
},
{
"epoch": 0.4139941690962099,
"grad_norm": 0.26549288630485535,
"learning_rate": 1.471958909033267e-05,
"loss": 1.3797943592071533,
"step": 284
},
{
"epoch": 0.41690962099125367,
"grad_norm": 0.15680500864982605,
"learning_rate": 1.4713367841624764e-05,
"loss": 1.3377087116241455,
"step": 286
},
{
"epoch": 0.4198250728862974,
"grad_norm": 0.4737466275691986,
"learning_rate": 1.4707079831627975e-05,
"loss": 1.3034449815750122,
"step": 288
},
{
"epoch": 0.4227405247813411,
"grad_norm": 0.1271553486585617,
"learning_rate": 1.4700725125292288e-05,
"loss": 1.1474194526672363,
"step": 290
},
{
"epoch": 0.42565597667638483,
"grad_norm": 0.06102332845330238,
"learning_rate": 1.469430378825661e-05,
"loss": 1.1918046474456787,
"step": 292
},
{
"epoch": 0.42857142857142855,
"grad_norm": 0.15844929218292236,
"learning_rate": 1.4687815886848083e-05,
"loss": 1.206626296043396,
"step": 294
},
{
"epoch": 0.4314868804664723,
"grad_norm": 0.24055972695350647,
"learning_rate": 1.4681261488081409e-05,
"loss": 1.5187625885009766,
"step": 296
},
{
"epoch": 0.43440233236151604,
"grad_norm": 0.7840580344200134,
"learning_rate": 1.4674640659658149e-05,
"loss": 1.0932797193527222,
"step": 298
},
{
"epoch": 0.43731778425655976,
"grad_norm": 0.10844213515520096,
"learning_rate": 1.4667953469966035e-05,
"loss": 1.1951229572296143,
"step": 300
},
{
"epoch": 0.4402332361516035,
"grad_norm": 0.11183289438486099,
"learning_rate": 1.466119998807825e-05,
"loss": 1.1717019081115723,
"step": 302
},
{
"epoch": 0.44314868804664725,
"grad_norm": 0.30403003096580505,
"learning_rate": 1.4654380283752722e-05,
"loss": 1.4022222757339478,
"step": 304
},
{
"epoch": 0.446064139941691,
"grad_norm": 0.13156169652938843,
"learning_rate": 1.4647494427431404e-05,
"loss": 1.4486730098724365,
"step": 306
},
{
"epoch": 0.4489795918367347,
"grad_norm": 0.1186894103884697,
"learning_rate": 1.4640542490239546e-05,
"loss": 1.2088007926940918,
"step": 308
},
{
"epoch": 0.4518950437317784,
"grad_norm": 0.3326444625854492,
"learning_rate": 1.4633524543984956e-05,
"loss": 1.3544650077819824,
"step": 310
},
{
"epoch": 0.45481049562682213,
"grad_norm": 0.1379825323820114,
"learning_rate": 1.4626440661157263e-05,
"loss": 1.330404281616211,
"step": 312
},
{
"epoch": 0.4577259475218659,
"grad_norm": 0.1476340889930725,
"learning_rate": 1.4619290914927168e-05,
"loss": 1.3507134914398193,
"step": 314
},
{
"epoch": 0.4606413994169096,
"grad_norm": 0.1802261918783188,
"learning_rate": 1.4612075379145683e-05,
"loss": 1.2097649574279785,
"step": 316
},
{
"epoch": 0.46355685131195334,
"grad_norm": 0.12077829986810684,
"learning_rate": 1.460479412834338e-05,
"loss": 1.3490198850631714,
"step": 318
},
{
"epoch": 0.46647230320699706,
"grad_norm": 0.22901231050491333,
"learning_rate": 1.4597447237729602e-05,
"loss": 1.3041571378707886,
"step": 320
},
{
"epoch": 0.46938775510204084,
"grad_norm": 0.1394783854484558,
"learning_rate": 1.4590034783191705e-05,
"loss": 1.3151127099990845,
"step": 322
},
{
"epoch": 0.47230320699708456,
"grad_norm": 0.15815502405166626,
"learning_rate": 1.4582556841294272e-05,
"loss": 1.4624110460281372,
"step": 324
},
{
"epoch": 0.4752186588921283,
"grad_norm": 0.2137562483549118,
"learning_rate": 1.45750134892783e-05,
"loss": 1.4430997371673584,
"step": 326
},
{
"epoch": 0.478134110787172,
"grad_norm": 0.3299601376056671,
"learning_rate": 1.4567404805060432e-05,
"loss": 1.3537228107452393,
"step": 328
},
{
"epoch": 0.48104956268221577,
"grad_norm": 0.21562345325946808,
"learning_rate": 1.4559730867232141e-05,
"loss": 1.169204592704773,
"step": 330
},
{
"epoch": 0.4839650145772595,
"grad_norm": 0.1736089438199997,
"learning_rate": 1.4551991755058902e-05,
"loss": 1.1071885824203491,
"step": 332
},
{
"epoch": 0.4868804664723032,
"grad_norm": 0.1834300458431244,
"learning_rate": 1.45441875484794e-05,
"loss": 1.5676034688949585,
"step": 334
},
{
"epoch": 0.4897959183673469,
"grad_norm": 0.0843748077750206,
"learning_rate": 1.4536318328104693e-05,
"loss": 1.2121503353118896,
"step": 336
},
{
"epoch": 0.49271137026239065,
"grad_norm": 0.36758843064308167,
"learning_rate": 1.452838417521737e-05,
"loss": 1.1275235414505005,
"step": 338
},
{
"epoch": 0.4956268221574344,
"grad_norm": 0.18445612490177155,
"learning_rate": 1.452038517177072e-05,
"loss": 1.3472223281860352,
"step": 340
},
{
"epoch": 0.49854227405247814,
"grad_norm": 0.05781463533639908,
"learning_rate": 1.4512321400387896e-05,
"loss": 1.0872787237167358,
"step": 342
},
{
"epoch": 0.5014577259475219,
"grad_norm": 0.19518744945526123,
"learning_rate": 1.4504192944361035e-05,
"loss": 1.1387406587600708,
"step": 344
},
{
"epoch": 0.5043731778425656,
"grad_norm": 0.12471595406532288,
"learning_rate": 1.4495999887650425e-05,
"loss": 1.2551310062408447,
"step": 346
},
{
"epoch": 0.5072886297376094,
"grad_norm": 0.21368560194969177,
"learning_rate": 1.4487742314883622e-05,
"loss": 1.4745806455612183,
"step": 348
},
{
"epoch": 0.5102040816326531,
"grad_norm": 0.20728199183940887,
"learning_rate": 1.447942031135458e-05,
"loss": 1.3776572942733765,
"step": 350
},
{
"epoch": 0.5131195335276968,
"grad_norm": 0.3676038384437561,
"learning_rate": 1.447103396302277e-05,
"loss": 1.393446922302246,
"step": 352
},
{
"epoch": 0.5160349854227405,
"grad_norm": 0.4812930226325989,
"learning_rate": 1.4462583356512293e-05,
"loss": 1.6455305814743042,
"step": 354
},
{
"epoch": 0.5189504373177842,
"grad_norm": 0.14569929242134094,
"learning_rate": 1.4454068579110982e-05,
"loss": 1.1214039325714111,
"step": 356
},
{
"epoch": 0.521865889212828,
"grad_norm": 0.08566080778837204,
"learning_rate": 1.4445489718769505e-05,
"loss": 1.0862312316894531,
"step": 358
},
{
"epoch": 0.5247813411078717,
"grad_norm": 0.1737866848707199,
"learning_rate": 1.4436846864100454e-05,
"loss": 1.4677766561508179,
"step": 360
},
{
"epoch": 0.5276967930029155,
"grad_norm": 0.24478068947792053,
"learning_rate": 1.4428140104377428e-05,
"loss": 1.4088914394378662,
"step": 362
},
{
"epoch": 0.5306122448979592,
"grad_norm": 0.07167135179042816,
"learning_rate": 1.4419369529534117e-05,
"loss": 1.0589109659194946,
"step": 364
},
{
"epoch": 0.5335276967930029,
"grad_norm": 0.4344414472579956,
"learning_rate": 1.4410535230163361e-05,
"loss": 1.0916839838027954,
"step": 366
},
{
"epoch": 0.5364431486880467,
"grad_norm": 0.1588602066040039,
"learning_rate": 1.440163729751623e-05,
"loss": 1.2339898347854614,
"step": 368
},
{
"epoch": 0.5393586005830904,
"grad_norm": 0.08355646580457687,
"learning_rate": 1.4392675823501075e-05,
"loss": 1.0559823513031006,
"step": 370
},
{
"epoch": 0.5422740524781341,
"grad_norm": 0.09950409084558487,
"learning_rate": 1.4383650900682563e-05,
"loss": 1.1664844751358032,
"step": 372
},
{
"epoch": 0.5451895043731778,
"grad_norm": 0.21663829684257507,
"learning_rate": 1.4374562622280753e-05,
"loss": 1.2800816297531128,
"step": 374
},
{
"epoch": 0.5481049562682215,
"grad_norm": 0.45721420645713806,
"learning_rate": 1.4365411082170105e-05,
"loss": 1.0968526601791382,
"step": 376
},
{
"epoch": 0.5510204081632653,
"grad_norm": 0.34029263257980347,
"learning_rate": 1.435619637487852e-05,
"loss": 1.4795793294906616,
"step": 378
},
{
"epoch": 0.5539358600583091,
"grad_norm": 0.07205039262771606,
"learning_rate": 1.4346918595586371e-05,
"loss": 0.8370588421821594,
"step": 380
},
{
"epoch": 0.5568513119533528,
"grad_norm": 0.12168021500110626,
"learning_rate": 1.4337577840125506e-05,
"loss": 1.2106021642684937,
"step": 382
},
{
"epoch": 0.5597667638483965,
"grad_norm": 0.32209160923957825,
"learning_rate": 1.4328174204978268e-05,
"loss": 1.321066975593567,
"step": 384
},
{
"epoch": 0.5626822157434402,
"grad_norm": 0.2250237762928009,
"learning_rate": 1.4318707787276499e-05,
"loss": 1.292655348777771,
"step": 386
},
{
"epoch": 0.565597667638484,
"grad_norm": 0.2742823362350464,
"learning_rate": 1.4309178684800527e-05,
"loss": 1.2520337104797363,
"step": 388
},
{
"epoch": 0.5685131195335277,
"grad_norm": 0.27688226103782654,
"learning_rate": 1.4299586995978166e-05,
"loss": 1.38676917552948,
"step": 390
},
{
"epoch": 0.5714285714285714,
"grad_norm": 0.2949990928173065,
"learning_rate": 1.4289932819883696e-05,
"loss": 0.8451089262962341,
"step": 392
},
{
"epoch": 0.5743440233236151,
"grad_norm": 0.1089571937918663,
"learning_rate": 1.4280216256236834e-05,
"loss": 1.2847154140472412,
"step": 394
},
{
"epoch": 0.577259475218659,
"grad_norm": 0.19184090197086334,
"learning_rate": 1.427043740540172e-05,
"loss": 1.387587547302246,
"step": 396
},
{
"epoch": 0.5801749271137027,
"grad_norm": 0.54814612865448,
"learning_rate": 1.4260596368385856e-05,
"loss": 1.3909755945205688,
"step": 398
},
{
"epoch": 0.5830903790087464,
"grad_norm": 0.12275420129299164,
"learning_rate": 1.4250693246839092e-05,
"loss": 1.2625775337219238,
"step": 400
},
{
"epoch": 0.5860058309037901,
"grad_norm": 0.7932881712913513,
"learning_rate": 1.4240728143052544e-05,
"loss": 1.2152988910675049,
"step": 402
},
{
"epoch": 0.5889212827988338,
"grad_norm": 0.37155717611312866,
"learning_rate": 1.4230701159957563e-05,
"loss": 1.3423740863800049,
"step": 404
},
{
"epoch": 0.5918367346938775,
"grad_norm": 0.18500366806983948,
"learning_rate": 1.4220612401124663e-05,
"loss": 1.3449385166168213,
"step": 406
},
{
"epoch": 0.5947521865889213,
"grad_norm": 0.11731770634651184,
"learning_rate": 1.4210461970762447e-05,
"loss": 1.1119245290756226,
"step": 408
},
{
"epoch": 0.597667638483965,
"grad_norm": 0.10353056341409683,
"learning_rate": 1.4200249973716534e-05,
"loss": 1.263884425163269,
"step": 410
},
{
"epoch": 0.6005830903790087,
"grad_norm": 0.14419683814048767,
"learning_rate": 1.418997651546848e-05,
"loss": 1.307144284248352,
"step": 412
},
{
"epoch": 0.6034985422740525,
"grad_norm": 0.10403470695018768,
"learning_rate": 1.4179641702134683e-05,
"loss": 1.1156686544418335,
"step": 414
},
{
"epoch": 0.6064139941690962,
"grad_norm": 0.14356708526611328,
"learning_rate": 1.4169245640465292e-05,
"loss": 1.1539418697357178,
"step": 416
},
{
"epoch": 0.60932944606414,
"grad_norm": 0.20612405240535736,
"learning_rate": 1.415878843784309e-05,
"loss": 1.2595444917678833,
"step": 418
},
{
"epoch": 0.6122448979591837,
"grad_norm": 0.11746654659509659,
"learning_rate": 1.414827020228241e-05,
"loss": 1.2829625606536865,
"step": 420
},
{
"epoch": 0.6151603498542274,
"grad_norm": 0.16831901669502258,
"learning_rate": 1.4137691042427996e-05,
"loss": 1.3437942266464233,
"step": 422
},
{
"epoch": 0.6180758017492711,
"grad_norm": 0.35040462017059326,
"learning_rate": 1.4127051067553895e-05,
"loss": 1.4076067209243774,
"step": 424
},
{
"epoch": 0.6209912536443148,
"grad_norm": 0.061461448669433594,
"learning_rate": 1.4116350387562316e-05,
"loss": 1.0884675979614258,
"step": 426
},
{
"epoch": 0.6239067055393586,
"grad_norm": 0.15810243785381317,
"learning_rate": 1.4105589112982514e-05,
"loss": 1.2547569274902344,
"step": 428
},
{
"epoch": 0.6268221574344023,
"grad_norm": 0.8622474074363708,
"learning_rate": 1.4094767354969625e-05,
"loss": 1.3274284601211548,
"step": 430
},
{
"epoch": 0.6297376093294461,
"grad_norm": 0.13593973219394684,
"learning_rate": 1.4083885225303535e-05,
"loss": 1.2320295572280884,
"step": 432
},
{
"epoch": 0.6326530612244898,
"grad_norm": 0.07243333756923676,
"learning_rate": 1.407294283638772e-05,
"loss": 1.4667418003082275,
"step": 434
},
{
"epoch": 0.6355685131195336,
"grad_norm": 0.07801775634288788,
"learning_rate": 1.406194030124808e-05,
"loss": 1.3038822412490845,
"step": 436
},
{
"epoch": 0.6384839650145773,
"grad_norm": 0.304385781288147,
"learning_rate": 1.4050877733531783e-05,
"loss": 1.3447275161743164,
"step": 438
},
{
"epoch": 0.641399416909621,
"grad_norm": 0.10865950584411621,
"learning_rate": 1.4039755247506077e-05,
"loss": 0.6549509167671204,
"step": 440
},
{
"epoch": 0.6443148688046647,
"grad_norm": 0.28575700521469116,
"learning_rate": 1.4028572958057122e-05,
"loss": 1.1795369386672974,
"step": 442
},
{
"epoch": 0.6472303206997084,
"grad_norm": 0.5246424078941345,
"learning_rate": 1.4017330980688798e-05,
"loss": 1.1711264848709106,
"step": 444
},
{
"epoch": 0.6501457725947521,
"grad_norm": 0.18553860485553741,
"learning_rate": 1.400602943152151e-05,
"loss": 1.2232381105422974,
"step": 446
},
{
"epoch": 0.6530612244897959,
"grad_norm": 0.12490701675415039,
"learning_rate": 1.3994668427290992e-05,
"loss": 1.3382079601287842,
"step": 448
},
{
"epoch": 0.6559766763848397,
"grad_norm": 0.22397291660308838,
"learning_rate": 1.3983248085347099e-05,
"loss": 1.3612568378448486,
"step": 450
},
{
"epoch": 0.6588921282798834,
"grad_norm": 0.35306331515312195,
"learning_rate": 1.3971768523652598e-05,
"loss": 1.2464739084243774,
"step": 452
},
{
"epoch": 0.6618075801749271,
"grad_norm": 0.2772669494152069,
"learning_rate": 1.3960229860781952e-05,
"loss": 1.2844020128250122,
"step": 454
},
{
"epoch": 0.6647230320699709,
"grad_norm": 0.10081592947244644,
"learning_rate": 1.3948632215920074e-05,
"loss": 1.2844829559326172,
"step": 456
},
{
"epoch": 0.6676384839650146,
"grad_norm": 0.4896067678928375,
"learning_rate": 1.3936975708861129e-05,
"loss": 1.2661151885986328,
"step": 458
},
{
"epoch": 0.6705539358600583,
"grad_norm": 0.09726856648921967,
"learning_rate": 1.3925260460007276e-05,
"loss": 1.3103440999984741,
"step": 460
},
{
"epoch": 0.673469387755102,
"grad_norm": 0.15830014646053314,
"learning_rate": 1.3913486590367426e-05,
"loss": 1.2458621263504028,
"step": 462
},
{
"epoch": 0.6763848396501457,
"grad_norm": 0.3230348229408264,
"learning_rate": 1.3901654221555998e-05,
"loss": 1.534423589706421,
"step": 464
},
{
"epoch": 0.6793002915451894,
"grad_norm": 0.1509629487991333,
"learning_rate": 1.3889763475791653e-05,
"loss": 1.2820494174957275,
"step": 466
},
{
"epoch": 0.6822157434402333,
"grad_norm": 0.34530624747276306,
"learning_rate": 1.3877814475896049e-05,
"loss": 1.2601618766784668,
"step": 468
},
{
"epoch": 0.685131195335277,
"grad_norm": 0.10481005907058716,
"learning_rate": 1.3865807345292548e-05,
"loss": 1.1044316291809082,
"step": 470
},
{
"epoch": 0.6880466472303207,
"grad_norm": 0.07815049588680267,
"learning_rate": 1.3853742208004967e-05,
"loss": 0.741702139377594,
"step": 472
},
{
"epoch": 0.6909620991253644,
"grad_norm": 0.22590938210487366,
"learning_rate": 1.3841619188656277e-05,
"loss": 1.2955025434494019,
"step": 474
},
{
"epoch": 0.6938775510204082,
"grad_norm": 0.08640377968549728,
"learning_rate": 1.3829438412467324e-05,
"loss": 1.1016216278076172,
"step": 476
},
{
"epoch": 0.6967930029154519,
"grad_norm": 0.09496122598648071,
"learning_rate": 1.3817200005255538e-05,
"loss": 1.1232506036758423,
"step": 478
},
{
"epoch": 0.6997084548104956,
"grad_norm": 0.07495642453432083,
"learning_rate": 1.380490409343363e-05,
"loss": 1.2044416666030884,
"step": 480
},
{
"epoch": 0.7026239067055393,
"grad_norm": 0.339239239692688,
"learning_rate": 1.3792550804008275e-05,
"loss": 1.2485543489456177,
"step": 482
},
{
"epoch": 0.7055393586005831,
"grad_norm": 0.17572255432605743,
"learning_rate": 1.3780140264578833e-05,
"loss": 1.2681964635849,
"step": 484
},
{
"epoch": 0.7084548104956269,
"grad_norm": 0.16934579610824585,
"learning_rate": 1.3767672603335994e-05,
"loss": 1.4810711145401,
"step": 486
},
{
"epoch": 0.7113702623906706,
"grad_norm": 0.04486797749996185,
"learning_rate": 1.375514794906047e-05,
"loss": 1.046045184135437,
"step": 488
},
{
"epoch": 0.7142857142857143,
"grad_norm": 0.700762927532196,
"learning_rate": 1.374256643112167e-05,
"loss": 1.0363354682922363,
"step": 490
},
{
"epoch": 0.717201166180758,
"grad_norm": 0.2569397985935211,
"learning_rate": 1.3729928179476355e-05,
"loss": 1.3074244260787964,
"step": 492
},
{
"epoch": 0.7201166180758017,
"grad_norm": 0.20563913881778717,
"learning_rate": 1.3717233324667303e-05,
"loss": 1.1921494007110596,
"step": 494
},
{
"epoch": 0.7230320699708455,
"grad_norm": 0.201784148812294,
"learning_rate": 1.3704481997821944e-05,
"loss": 1.3657381534576416,
"step": 496
},
{
"epoch": 0.7259475218658892,
"grad_norm": 0.20627616345882416,
"learning_rate": 1.3691674330651038e-05,
"loss": 1.062203288078308,
"step": 498
},
{
"epoch": 0.7288629737609329,
"grad_norm": 0.04925013706088066,
"learning_rate": 1.3678810455447272e-05,
"loss": 1.0565184354782104,
"step": 500
},
{
"epoch": 0.7317784256559767,
"grad_norm": 0.2994559407234192,
"learning_rate": 1.3665890505083932e-05,
"loss": 0.7342221140861511,
"step": 502
},
{
"epoch": 0.7346938775510204,
"grad_norm": 0.2312147170305252,
"learning_rate": 1.365291461301351e-05,
"loss": 1.1462215185165405,
"step": 504
},
{
"epoch": 0.7376093294460642,
"grad_norm": 0.1264645904302597,
"learning_rate": 1.3639882913266321e-05,
"loss": 1.2779966592788696,
"step": 506
},
{
"epoch": 0.7405247813411079,
"grad_norm": 0.09908440709114075,
"learning_rate": 1.3626795540449146e-05,
"loss": 1.0050630569458008,
"step": 508
},
{
"epoch": 0.7434402332361516,
"grad_norm": 0.0948040708899498,
"learning_rate": 1.3613652629743807e-05,
"loss": 0.9955649375915527,
"step": 510
},
{
"epoch": 0.7463556851311953,
"grad_norm": 0.442697137594223,
"learning_rate": 1.3600454316905794e-05,
"loss": 1.2189491987228394,
"step": 512
},
{
"epoch": 0.749271137026239,
"grad_norm": 0.08219840377569199,
"learning_rate": 1.3587200738262852e-05,
"loss": 1.2169828414916992,
"step": 514
},
{
"epoch": 0.7521865889212828,
"grad_norm": 0.39055153727531433,
"learning_rate": 1.3573892030713581e-05,
"loss": 1.1840598583221436,
"step": 516
},
{
"epoch": 0.7551020408163265,
"grad_norm": 0.16979742050170898,
"learning_rate": 1.3560528331726012e-05,
"loss": 1.2608612775802612,
"step": 518
},
{
"epoch": 0.7580174927113703,
"grad_norm": 0.18750780820846558,
"learning_rate": 1.3547109779336198e-05,
"loss": 1.0730546712875366,
"step": 520
},
{
"epoch": 0.760932944606414,
"grad_norm": 0.16917291283607483,
"learning_rate": 1.3533636512146778e-05,
"loss": 0.8358052968978882,
"step": 522
},
{
"epoch": 0.7638483965014577,
"grad_norm": 0.21615351736545563,
"learning_rate": 1.3520108669325555e-05,
"loss": 1.2778382301330566,
"step": 524
},
{
"epoch": 0.7667638483965015,
"grad_norm": 0.2199150174856186,
"learning_rate": 1.350652639060405e-05,
"loss": 1.3584939241409302,
"step": 526
},
{
"epoch": 0.7696793002915452,
"grad_norm": 0.12701602280139923,
"learning_rate": 1.3492889816276057e-05,
"loss": 1.2652432918548584,
"step": 528
},
{
"epoch": 0.7725947521865889,
"grad_norm": 0.2043219953775406,
"learning_rate": 1.3479199087196211e-05,
"loss": 0.9363166093826294,
"step": 530
},
{
"epoch": 0.7755102040816326,
"grad_norm": 0.25679811835289,
"learning_rate": 1.3465454344778514e-05,
"loss": 1.30280601978302,
"step": 532
},
{
"epoch": 0.7784256559766763,
"grad_norm": 0.1782459169626236,
"learning_rate": 1.3451655730994879e-05,
"loss": 0.8852262496948242,
"step": 534
},
{
"epoch": 0.7813411078717201,
"grad_norm": 0.15585428476333618,
"learning_rate": 1.3437803388373673e-05,
"loss": 1.2652050256729126,
"step": 536
},
{
"epoch": 0.7842565597667639,
"grad_norm": 0.28724268078804016,
"learning_rate": 1.3423897459998234e-05,
"loss": 1.5547116994857788,
"step": 538
},
{
"epoch": 0.7871720116618076,
"grad_norm": 0.2500779628753662,
"learning_rate": 1.3409938089505396e-05,
"loss": 1.2525265216827393,
"step": 540
},
{
"epoch": 0.7900874635568513,
"grad_norm": 0.45470234751701355,
"learning_rate": 1.3395925421084008e-05,
"loss": 1.2771704196929932,
"step": 542
},
{
"epoch": 0.793002915451895,
"grad_norm": 0.29030269384384155,
"learning_rate": 1.3381859599473444e-05,
"loss": 1.17940354347229,
"step": 544
},
{
"epoch": 0.7959183673469388,
"grad_norm": 0.49152040481567383,
"learning_rate": 1.3367740769962097e-05,
"loss": 1.2586897611618042,
"step": 546
},
{
"epoch": 0.7988338192419825,
"grad_norm": 0.6251534819602966,
"learning_rate": 1.335356907838591e-05,
"loss": 1.15794837474823,
"step": 548
},
{
"epoch": 0.8017492711370262,
"grad_norm": 1.237188696861267,
"learning_rate": 1.3339344671126823e-05,
"loss": 1.2396069765090942,
"step": 550
},
{
"epoch": 0.8046647230320699,
"grad_norm": 0.18844130635261536,
"learning_rate": 1.3325067695111302e-05,
"loss": 1.3848127126693726,
"step": 552
},
{
"epoch": 0.8075801749271136,
"grad_norm": 0.0720212385058403,
"learning_rate": 1.3310738297808797e-05,
"loss": 1.2827481031417847,
"step": 554
},
{
"epoch": 0.8104956268221575,
"grad_norm": 0.30795788764953613,
"learning_rate": 1.3296356627230233e-05,
"loss": 1.2539678812026978,
"step": 556
},
{
"epoch": 0.8134110787172012,
"grad_norm": 0.12987054884433746,
"learning_rate": 1.328192283192647e-05,
"loss": 1.1838477849960327,
"step": 558
},
{
"epoch": 0.8163265306122449,
"grad_norm": 0.11866369843482971,
"learning_rate": 1.3267437060986776e-05,
"loss": 1.2138683795928955,
"step": 560
},
{
"epoch": 0.8192419825072886,
"grad_norm": 1.3589751720428467,
"learning_rate": 1.3252899464037285e-05,
"loss": 1.241382122039795,
"step": 562
},
{
"epoch": 0.8221574344023324,
"grad_norm": 0.11315155029296875,
"learning_rate": 1.3238310191239449e-05,
"loss": 1.2092612981796265,
"step": 564
},
{
"epoch": 0.8250728862973761,
"grad_norm": 0.16663309931755066,
"learning_rate": 1.3223669393288492e-05,
"loss": 1.3294919729232788,
"step": 566
},
{
"epoch": 0.8279883381924198,
"grad_norm": 0.18580849468708038,
"learning_rate": 1.320897722141185e-05,
"loss": 1.165387749671936,
"step": 568
},
{
"epoch": 0.8309037900874635,
"grad_norm": 0.14969834685325623,
"learning_rate": 1.3194233827367605e-05,
"loss": 1.1585993766784668,
"step": 570
},
{
"epoch": 0.8338192419825073,
"grad_norm": 0.18476836383342743,
"learning_rate": 1.317943936344293e-05,
"loss": 1.2080127000808716,
"step": 572
},
{
"epoch": 0.8367346938775511,
"grad_norm": 0.19693532586097717,
"learning_rate": 1.3164593982452502e-05,
"loss": 1.4070855379104614,
"step": 574
},
{
"epoch": 0.8396501457725948,
"grad_norm": 0.3612503111362457,
"learning_rate": 1.3149697837736932e-05,
"loss": 1.375995397567749,
"step": 576
},
{
"epoch": 0.8425655976676385,
"grad_norm": 0.2689799964427948,
"learning_rate": 1.3134751083161177e-05,
"loss": 1.5882023572921753,
"step": 578
},
{
"epoch": 0.8454810495626822,
"grad_norm": 0.45044106245040894,
"learning_rate": 1.3119753873112952e-05,
"loss": 1.530938744544983,
"step": 580
},
{
"epoch": 0.8483965014577259,
"grad_norm": 0.15131127834320068,
"learning_rate": 1.3104706362501138e-05,
"loss": 1.1275839805603027,
"step": 582
},
{
"epoch": 0.8513119533527697,
"grad_norm": 0.12577542662620544,
"learning_rate": 1.3089608706754179e-05,
"loss": 1.4129434823989868,
"step": 584
},
{
"epoch": 0.8542274052478134,
"grad_norm": 0.2110750824213028,
"learning_rate": 1.3074461061818475e-05,
"loss": 1.1559196710586548,
"step": 586
},
{
"epoch": 0.8571428571428571,
"grad_norm": 0.21649499237537384,
"learning_rate": 1.3059263584156778e-05,
"loss": 1.3160138130187988,
"step": 588
},
{
"epoch": 0.8600583090379009,
"grad_norm": 0.24884088337421417,
"learning_rate": 1.3044016430746563e-05,
"loss": 1.362827181816101,
"step": 590
},
{
"epoch": 0.8629737609329446,
"grad_norm": 0.13489077985286713,
"learning_rate": 1.3028719759078428e-05,
"loss": 0.9931049942970276,
"step": 592
},
{
"epoch": 0.8658892128279884,
"grad_norm": 0.09495119750499725,
"learning_rate": 1.3013373727154437e-05,
"loss": 1.088317632675171,
"step": 594
},
{
"epoch": 0.8688046647230321,
"grad_norm": 0.08689741790294647,
"learning_rate": 1.2997978493486516e-05,
"loss": 1.135114312171936,
"step": 596
},
{
"epoch": 0.8717201166180758,
"grad_norm": 0.11740924417972565,
"learning_rate": 1.2982534217094805e-05,
"loss": 1.1683244705200195,
"step": 598
},
{
"epoch": 0.8746355685131195,
"grad_norm": 0.19883382320404053,
"learning_rate": 1.2967041057506012e-05,
"loss": 1.200365662574768,
"step": 600
},
{
"epoch": 0.8775510204081632,
"grad_norm": 0.1676117181777954,
"learning_rate": 1.2951499174751767e-05,
"loss": 1.17380952835083,
"step": 602
},
{
"epoch": 0.880466472303207,
"grad_norm": 0.10896378010511398,
"learning_rate": 1.2935908729366975e-05,
"loss": 1.1691476106643677,
"step": 604
},
{
"epoch": 0.8833819241982507,
"grad_norm": 0.48385846614837646,
"learning_rate": 1.2920269882388147e-05,
"loss": 1.2547780275344849,
"step": 606
},
{
"epoch": 0.8862973760932945,
"grad_norm": 0.5236583352088928,
"learning_rate": 1.290458279535175e-05,
"loss": 0.9720197916030884,
"step": 608
},
{
"epoch": 0.8892128279883382,
"grad_norm": 0.14302794635295868,
"learning_rate": 1.2888847630292523e-05,
"loss": 0.7114431858062744,
"step": 610
},
{
"epoch": 0.892128279883382,
"grad_norm": 0.24016736447811127,
"learning_rate": 1.287306454974182e-05,
"loss": 1.1511893272399902,
"step": 612
},
{
"epoch": 0.8950437317784257,
"grad_norm": 0.23368032276630402,
"learning_rate": 1.2857233716725915e-05,
"loss": 1.270735740661621,
"step": 614
},
{
"epoch": 0.8979591836734694,
"grad_norm": 0.31318148970603943,
"learning_rate": 1.2841355294764332e-05,
"loss": 0.9339938163757324,
"step": 616
},
{
"epoch": 0.9008746355685131,
"grad_norm": 0.14631935954093933,
"learning_rate": 1.2825429447868144e-05,
"loss": 1.0888878107070923,
"step": 618
},
{
"epoch": 0.9037900874635568,
"grad_norm": 0.05644264817237854,
"learning_rate": 1.2809456340538295e-05,
"loss": 0.6944148540496826,
"step": 620
},
{
"epoch": 0.9067055393586005,
"grad_norm": 0.5780438780784607,
"learning_rate": 1.2793436137763877e-05,
"loss": 1.4030423164367676,
"step": 622
},
{
"epoch": 0.9096209912536443,
"grad_norm": 0.25053542852401733,
"learning_rate": 1.2777369005020443e-05,
"loss": 1.366930603981018,
"step": 624
},
{
"epoch": 0.9125364431486881,
"grad_norm": 0.668838381767273,
"learning_rate": 1.2761255108268305e-05,
"loss": 1.4005160331726074,
"step": 626
},
{
"epoch": 0.9154518950437318,
"grad_norm": 0.39348724484443665,
"learning_rate": 1.2745094613950798e-05,
"loss": 1.3920326232910156,
"step": 628
},
{
"epoch": 0.9183673469387755,
"grad_norm": 0.21188022196292877,
"learning_rate": 1.2728887688992571e-05,
"loss": 1.2693376541137695,
"step": 630
},
{
"epoch": 0.9212827988338192,
"grad_norm": 0.13943858444690704,
"learning_rate": 1.2712634500797868e-05,
"loss": 1.3852614164352417,
"step": 632
},
{
"epoch": 0.924198250728863,
"grad_norm": 0.09973420947790146,
"learning_rate": 1.2696335217248797e-05,
"loss": 1.0728514194488525,
"step": 634
},
{
"epoch": 0.9271137026239067,
"grad_norm": 0.0977744311094284,
"learning_rate": 1.2679990006703583e-05,
"loss": 1.1080187559127808,
"step": 636
},
{
"epoch": 0.9300291545189504,
"grad_norm": 0.09669560194015503,
"learning_rate": 1.2663599037994848e-05,
"loss": 1.101372480392456,
"step": 638
},
{
"epoch": 0.9329446064139941,
"grad_norm": 0.2537369430065155,
"learning_rate": 1.264716248042786e-05,
"loss": 1.2607650756835938,
"step": 640
},
{
"epoch": 0.9358600583090378,
"grad_norm": 0.10567066818475723,
"learning_rate": 1.263068050377877e-05,
"loss": 1.176032304763794,
"step": 642
},
{
"epoch": 0.9387755102040817,
"grad_norm": 0.23190894722938538,
"learning_rate": 1.2614153278292888e-05,
"loss": 1.569797158241272,
"step": 644
},
{
"epoch": 0.9416909620991254,
"grad_norm": 0.11260157078504562,
"learning_rate": 1.259758097468289e-05,
"loss": 1.124619960784912,
"step": 646
},
{
"epoch": 0.9446064139941691,
"grad_norm": 0.10838615894317627,
"learning_rate": 1.2580963764127086e-05,
"loss": 1.0758150815963745,
"step": 648
},
{
"epoch": 0.9475218658892128,
"grad_norm": 0.862457275390625,
"learning_rate": 1.2564301818267634e-05,
"loss": 0.809301495552063,
"step": 650
},
{
"epoch": 0.9504373177842566,
"grad_norm": 0.13666097819805145,
"learning_rate": 1.2547595309208762e-05,
"loss": 1.1373188495635986,
"step": 652
},
{
"epoch": 0.9533527696793003,
"grad_norm": 0.14616422355175018,
"learning_rate": 1.2530844409515015e-05,
"loss": 1.0827115774154663,
"step": 654
},
{
"epoch": 0.956268221574344,
"grad_norm": 0.10559694468975067,
"learning_rate": 1.2514049292209443e-05,
"loss": 0.9751679301261902,
"step": 656
},
{
"epoch": 0.9591836734693877,
"grad_norm": 0.08088317513465881,
"learning_rate": 1.2497210130771838e-05,
"loss": 1.495046854019165,
"step": 658
},
{
"epoch": 0.9620991253644315,
"grad_norm": 0.6228170990943909,
"learning_rate": 1.2480327099136921e-05,
"loss": 1.2217864990234375,
"step": 660
},
{
"epoch": 0.9650145772594753,
"grad_norm": 0.29220765829086304,
"learning_rate": 1.2463400371692567e-05,
"loss": 1.3038297891616821,
"step": 662
},
{
"epoch": 0.967930029154519,
"grad_norm": 0.1476386934518814,
"learning_rate": 1.2446430123277989e-05,
"loss": 1.0814988613128662,
"step": 664
},
{
"epoch": 0.9708454810495627,
"grad_norm": 0.5601685643196106,
"learning_rate": 1.2429416529181928e-05,
"loss": 1.3198177814483643,
"step": 666
},
{
"epoch": 0.9737609329446064,
"grad_norm": 0.11794130504131317,
"learning_rate": 1.2412359765140863e-05,
"loss": 1.2900370359420776,
"step": 668
},
{
"epoch": 0.9766763848396501,
"grad_norm": 0.1333070546388626,
"learning_rate": 1.2395260007337178e-05,
"loss": 1.0969475507736206,
"step": 670
},
{
"epoch": 0.9795918367346939,
"grad_norm": 0.2164296805858612,
"learning_rate": 1.2378117432397344e-05,
"loss": 1.3217947483062744,
"step": 672
},
{
"epoch": 0.9825072886297376,
"grad_norm": 0.1207147017121315,
"learning_rate": 1.2360932217390101e-05,
"loss": 1.1721763610839844,
"step": 674
},
{
"epoch": 0.9854227405247813,
"grad_norm": 0.19854536652565002,
"learning_rate": 1.2343704539824629e-05,
"loss": 0.8384242057800293,
"step": 676
},
{
"epoch": 0.9883381924198251,
"grad_norm": 0.11634889990091324,
"learning_rate": 1.2326434577648703e-05,
"loss": 0.5937544107437134,
"step": 678
},
{
"epoch": 0.9912536443148688,
"grad_norm": 0.21319809556007385,
"learning_rate": 1.2309122509246873e-05,
"loss": 1.211629033088684,
"step": 680
},
{
"epoch": 0.9941690962099126,
"grad_norm": 0.0654364675283432,
"learning_rate": 1.2291768513438603e-05,
"loss": 1.155535340309143,
"step": 682
},
{
"epoch": 0.9970845481049563,
"grad_norm": 0.25669339299201965,
"learning_rate": 1.2274372769476438e-05,
"loss": 1.164899230003357,
"step": 684
},
{
"epoch": 1.0,
"grad_norm": 0.12079296261072159,
"learning_rate": 1.2256935457044149e-05,
"loss": 1.3323872089385986,
"step": 686
},
{
"epoch": 1.0029154518950438,
"grad_norm": 0.15898126363754272,
"learning_rate": 1.223945675625487e-05,
"loss": 0.9407209753990173,
"step": 688
},
{
"epoch": 1.0058309037900874,
"grad_norm": 0.27969345450401306,
"learning_rate": 1.2221936847649244e-05,
"loss": 1.1378577947616577,
"step": 690
},
{
"epoch": 1.0087463556851313,
"grad_norm": 0.25754043459892273,
"learning_rate": 1.220437591219356e-05,
"loss": 1.4397190809249878,
"step": 692
},
{
"epoch": 1.0116618075801749,
"grad_norm": 0.10848913341760635,
"learning_rate": 1.2186774131277878e-05,
"loss": 1.1280958652496338,
"step": 694
},
{
"epoch": 1.0145772594752187,
"grad_norm": 0.1306256800889969,
"learning_rate": 1.2169131686714156e-05,
"loss": 1.099426031112671,
"step": 696
},
{
"epoch": 1.0174927113702623,
"grad_norm": 0.4202571511268616,
"learning_rate": 1.2151448760734381e-05,
"loss": 1.1389104127883911,
"step": 698
},
{
"epoch": 1.0204081632653061,
"grad_norm": 0.24799339473247528,
"learning_rate": 1.2133725535988675e-05,
"loss": 1.1550320386886597,
"step": 700
},
{
"epoch": 1.0233236151603498,
"grad_norm": 0.3226027190685272,
"learning_rate": 1.211596219554341e-05,
"loss": 1.3826884031295776,
"step": 702
},
{
"epoch": 1.0262390670553936,
"grad_norm": 0.16781915724277496,
"learning_rate": 1.209815892287933e-05,
"loss": 1.2842170000076294,
"step": 704
},
{
"epoch": 1.0291545189504374,
"grad_norm": 0.08502925932407379,
"learning_rate": 1.2080315901889638e-05,
"loss": 1.3487895727157593,
"step": 706
},
{
"epoch": 1.032069970845481,
"grad_norm": 0.16372652351856232,
"learning_rate": 1.2062433316878107e-05,
"loss": 1.0846039056777954,
"step": 708
},
{
"epoch": 1.0349854227405249,
"grad_norm": 0.2926742434501648,
"learning_rate": 1.204451135255717e-05,
"loss": 1.3418132066726685,
"step": 710
},
{
"epoch": 1.0379008746355685,
"grad_norm": 0.13081398606300354,
"learning_rate": 1.2026550194046027e-05,
"loss": 1.2699744701385498,
"step": 712
},
{
"epoch": 1.0408163265306123,
"grad_norm": 0.3602919578552246,
"learning_rate": 1.2008550026868707e-05,
"loss": 1.1103326082229614,
"step": 714
},
{
"epoch": 1.043731778425656,
"grad_norm": 0.24668650329113007,
"learning_rate": 1.1990511036952182e-05,
"loss": 1.1811496019363403,
"step": 716
},
{
"epoch": 1.0466472303206997,
"grad_norm": 0.2009333372116089,
"learning_rate": 1.1972433410624415e-05,
"loss": 1.3141359090805054,
"step": 718
},
{
"epoch": 1.0495626822157433,
"grad_norm": 0.4131545126438141,
"learning_rate": 1.1954317334612466e-05,
"loss": 1.1311266422271729,
"step": 720
},
{
"epoch": 1.0524781341107872,
"grad_norm": 0.26808369159698486,
"learning_rate": 1.193616299604054e-05,
"loss": 1.2641208171844482,
"step": 722
},
{
"epoch": 1.055393586005831,
"grad_norm": 0.18929173052310944,
"learning_rate": 1.1917970582428065e-05,
"loss": 1.022256851196289,
"step": 724
},
{
"epoch": 1.0583090379008746,
"grad_norm": 0.07950548082590103,
"learning_rate": 1.1899740281687752e-05,
"loss": 1.1594070196151733,
"step": 726
},
{
"epoch": 1.0612244897959184,
"grad_norm": 0.3975690007209778,
"learning_rate": 1.1881472282123659e-05,
"loss": 1.09200918674469,
"step": 728
},
{
"epoch": 1.064139941690962,
"grad_norm": 0.1322367936372757,
"learning_rate": 1.1863166772429237e-05,
"loss": 1.144595980644226,
"step": 730
},
{
"epoch": 1.0670553935860059,
"grad_norm": 0.13084831833839417,
"learning_rate": 1.1844823941685388e-05,
"loss": 1.233044981956482,
"step": 732
},
{
"epoch": 1.0699708454810495,
"grad_norm": 0.17538310587406158,
"learning_rate": 1.1826443979358511e-05,
"loss": 0.648325502872467,
"step": 734
},
{
"epoch": 1.0728862973760933,
"grad_norm": 0.1613551825284958,
"learning_rate": 1.1808027075298542e-05,
"loss": 1.339321255683899,
"step": 736
},
{
"epoch": 1.075801749271137,
"grad_norm": 0.062147416174411774,
"learning_rate": 1.1789573419736995e-05,
"loss": 1.0158833265304565,
"step": 738
},
{
"epoch": 1.0787172011661808,
"grad_norm": 0.2725241184234619,
"learning_rate": 1.1771083203284994e-05,
"loss": 1.049664855003357,
"step": 740
},
{
"epoch": 1.0816326530612246,
"grad_norm": 0.14118708670139313,
"learning_rate": 1.1752556616931319e-05,
"loss": 1.4558746814727783,
"step": 742
},
{
"epoch": 1.0845481049562682,
"grad_norm": 0.12485146522521973,
"learning_rate": 1.17339938520404e-05,
"loss": 1.067897081375122,
"step": 744
},
{
"epoch": 1.087463556851312,
"grad_norm": 0.14729249477386475,
"learning_rate": 1.1715395100350386e-05,
"loss": 1.2803950309753418,
"step": 746
},
{
"epoch": 1.0903790087463556,
"grad_norm": 0.2967908978462219,
"learning_rate": 1.1696760553971122e-05,
"loss": 1.4100807905197144,
"step": 748
},
{
"epoch": 1.0932944606413995,
"grad_norm": 0.18390890955924988,
"learning_rate": 1.1678090405382191e-05,
"loss": 1.0381572246551514,
"step": 750
},
{
"epoch": 1.096209912536443,
"grad_norm": 0.08851258456707001,
"learning_rate": 1.1659384847430916e-05,
"loss": 1.2206934690475464,
"step": 752
},
{
"epoch": 1.099125364431487,
"grad_norm": 0.1275774985551834,
"learning_rate": 1.1640644073330365e-05,
"loss": 1.258091688156128,
"step": 754
},
{
"epoch": 1.1020408163265305,
"grad_norm": 0.3569571077823639,
"learning_rate": 1.1621868276657371e-05,
"loss": 1.2325845956802368,
"step": 756
},
{
"epoch": 1.1049562682215743,
"grad_norm": 0.2721734642982483,
"learning_rate": 1.1603057651350508e-05,
"loss": 1.0642601251602173,
"step": 758
},
{
"epoch": 1.1078717201166182,
"grad_norm": 0.2617255449295044,
"learning_rate": 1.158421239170811e-05,
"loss": 1.3023701906204224,
"step": 760
},
{
"epoch": 1.1107871720116618,
"grad_norm": 0.1031145453453064,
"learning_rate": 1.156533269238626e-05,
"loss": 0.8144070506095886,
"step": 762
},
{
"epoch": 1.1137026239067056,
"grad_norm": 0.1646541804075241,
"learning_rate": 1.1546418748396758e-05,
"loss": 1.0213180780410767,
"step": 764
},
{
"epoch": 1.1166180758017492,
"grad_norm": 0.3250854015350342,
"learning_rate": 1.1527470755105138e-05,
"loss": 0.9498108625411987,
"step": 766
},
{
"epoch": 1.119533527696793,
"grad_norm": 0.10029526799917221,
"learning_rate": 1.1508488908228629e-05,
"loss": 1.1771409511566162,
"step": 768
},
{
"epoch": 1.1224489795918366,
"grad_norm": 0.09416939318180084,
"learning_rate": 1.1489473403834142e-05,
"loss": 0.5949094891548157,
"step": 770
},
{
"epoch": 1.1253644314868805,
"grad_norm": 0.20775017142295837,
"learning_rate": 1.1470424438336244e-05,
"loss": 0.8676192760467529,
"step": 772
},
{
"epoch": 1.128279883381924,
"grad_norm": 0.24049599468708038,
"learning_rate": 1.145134220849512e-05,
"loss": 1.1979655027389526,
"step": 774
},
{
"epoch": 1.131195335276968,
"grad_norm": 0.320576548576355,
"learning_rate": 1.1432226911414561e-05,
"loss": 1.150422215461731,
"step": 776
},
{
"epoch": 1.1341107871720117,
"grad_norm": 0.08741223067045212,
"learning_rate": 1.1413078744539906e-05,
"loss": 1.1655181646347046,
"step": 778
},
{
"epoch": 1.1370262390670554,
"grad_norm": 0.13662189245224,
"learning_rate": 1.139389790565601e-05,
"loss": 1.1560207605361938,
"step": 780
},
{
"epoch": 1.1399416909620992,
"grad_norm": 0.1589939296245575,
"learning_rate": 1.1374684592885214e-05,
"loss": 1.3467984199523926,
"step": 782
},
{
"epoch": 1.1428571428571428,
"grad_norm": 0.29279693961143494,
"learning_rate": 1.1355439004685278e-05,
"loss": 1.0917768478393555,
"step": 784
},
{
"epoch": 1.1457725947521866,
"grad_norm": 0.5396981835365295,
"learning_rate": 1.1336161339847343e-05,
"loss": 1.131831169128418,
"step": 786
},
{
"epoch": 1.1486880466472302,
"grad_norm": 1.319527506828308,
"learning_rate": 1.1316851797493877e-05,
"loss": 1.287348747253418,
"step": 788
},
{
"epoch": 1.151603498542274,
"grad_norm": 0.24090451002120972,
"learning_rate": 1.1297510577076617e-05,
"loss": 1.196481466293335,
"step": 790
},
{
"epoch": 1.1545189504373177,
"grad_norm": 0.15632812678813934,
"learning_rate": 1.1278137878374507e-05,
"loss": 1.2842094898223877,
"step": 792
},
{
"epoch": 1.1574344023323615,
"grad_norm": 0.1558282971382141,
"learning_rate": 1.1258733901491634e-05,
"loss": 1.160306453704834,
"step": 794
},
{
"epoch": 1.1603498542274053,
"grad_norm": 0.0693809762597084,
"learning_rate": 1.1239298846855166e-05,
"loss": 1.3671103715896606,
"step": 796
},
{
"epoch": 1.163265306122449,
"grad_norm": 0.11606906354427338,
"learning_rate": 1.121983291521328e-05,
"loss": 1.2540158033370972,
"step": 798
},
{
"epoch": 1.1661807580174928,
"grad_norm": 0.5656346082687378,
"learning_rate": 1.1200336307633083e-05,
"loss": 1.095619797706604,
"step": 800
},
{
"epoch": 1.1690962099125364,
"grad_norm": 0.3416520953178406,
"learning_rate": 1.1180809225498542e-05,
"loss": 1.33209228515625,
"step": 802
},
{
"epoch": 1.1720116618075802,
"grad_norm": 0.14092491567134857,
"learning_rate": 1.11612518705084e-05,
"loss": 1.121877670288086,
"step": 804
},
{
"epoch": 1.1749271137026238,
"grad_norm": 0.26185205578804016,
"learning_rate": 1.1141664444674091e-05,
"loss": 1.3565205335617065,
"step": 806
},
{
"epoch": 1.1778425655976676,
"grad_norm": 0.15331599116325378,
"learning_rate": 1.1122047150317665e-05,
"loss": 0.7860437631607056,
"step": 808
},
{
"epoch": 1.1807580174927113,
"grad_norm": 0.25274330377578735,
"learning_rate": 1.110240019006968e-05,
"loss": 0.7633789777755737,
"step": 810
},
{
"epoch": 1.183673469387755,
"grad_norm": 0.1963554322719574,
"learning_rate": 1.1082723766867123e-05,
"loss": 1.133277177810669,
"step": 812
},
{
"epoch": 1.186588921282799,
"grad_norm": 0.33926016092300415,
"learning_rate": 1.1063018083951309e-05,
"loss": 1.0211750268936157,
"step": 814
},
{
"epoch": 1.1895043731778425,
"grad_norm": 0.23344306647777557,
"learning_rate": 1.1043283344865776e-05,
"loss": 1.1373283863067627,
"step": 816
},
{
"epoch": 1.1924198250728864,
"grad_norm": 0.2557908594608307,
"learning_rate": 1.1023519753454203e-05,
"loss": 0.9404536485671997,
"step": 818
},
{
"epoch": 1.19533527696793,
"grad_norm": 1.4168596267700195,
"learning_rate": 1.1003727513858268e-05,
"loss": 1.1765224933624268,
"step": 820
},
{
"epoch": 1.1982507288629738,
"grad_norm": 0.13063687086105347,
"learning_rate": 1.0983906830515584e-05,
"loss": 1.222176432609558,
"step": 822
},
{
"epoch": 1.2011661807580174,
"grad_norm": 0.07739931344985962,
"learning_rate": 1.0964057908157548e-05,
"loss": 1.151648998260498,
"step": 824
},
{
"epoch": 1.2040816326530612,
"grad_norm": 0.07822076976299286,
"learning_rate": 1.094418095180725e-05,
"loss": 1.061394453048706,
"step": 826
},
{
"epoch": 1.2069970845481048,
"grad_norm": 0.14568239450454712,
"learning_rate": 1.0924276166777349e-05,
"loss": 0.7191852927207947,
"step": 828
},
{
"epoch": 1.2099125364431487,
"grad_norm": 0.30981016159057617,
"learning_rate": 1.090434375866795e-05,
"loss": 0.9558042287826538,
"step": 830
},
{
"epoch": 1.2128279883381925,
"grad_norm": 0.2437950074672699,
"learning_rate": 1.0884383933364477e-05,
"loss": 1.1506716012954712,
"step": 832
},
{
"epoch": 1.215743440233236,
"grad_norm": 0.24170175194740295,
"learning_rate": 1.0864396897035558e-05,
"loss": 1.1895190477371216,
"step": 834
},
{
"epoch": 1.21865889212828,
"grad_norm": 0.1518929898738861,
"learning_rate": 1.0844382856130886e-05,
"loss": 1.2491060495376587,
"step": 836
},
{
"epoch": 1.2215743440233235,
"grad_norm": 0.14055992662906647,
"learning_rate": 1.0824342017379089e-05,
"loss": 1.4196858406066895,
"step": 838
},
{
"epoch": 1.2244897959183674,
"grad_norm": 0.18487177789211273,
"learning_rate": 1.0804274587785595e-05,
"loss": 1.0294526815414429,
"step": 840
},
{
"epoch": 1.227405247813411,
"grad_norm": 0.6372827887535095,
"learning_rate": 1.0784180774630495e-05,
"loss": 0.26844465732574463,
"step": 842
},
{
"epoch": 1.2303206997084548,
"grad_norm": 0.15034730732440948,
"learning_rate": 1.0764060785466391e-05,
"loss": 1.2424967288970947,
"step": 844
},
{
"epoch": 1.2332361516034984,
"grad_norm": 0.16668657958507538,
"learning_rate": 1.0743914828116281e-05,
"loss": 1.0989577770233154,
"step": 846
},
{
"epoch": 1.2361516034985423,
"grad_norm": 0.15799511969089508,
"learning_rate": 1.0723743110671378e-05,
"loss": 1.2244020700454712,
"step": 848
},
{
"epoch": 1.239067055393586,
"grad_norm": 0.09745261073112488,
"learning_rate": 1.0703545841488974e-05,
"loss": 1.1401562690734863,
"step": 850
},
{
"epoch": 1.2419825072886297,
"grad_norm": 0.5921195149421692,
"learning_rate": 1.06833232291903e-05,
"loss": 0.7718449234962463,
"step": 852
},
{
"epoch": 1.2448979591836735,
"grad_norm": 0.08858446776866913,
"learning_rate": 1.0663075482658355e-05,
"loss": 1.074745774269104,
"step": 854
},
{
"epoch": 1.2478134110787171,
"grad_norm": 0.22339816391468048,
"learning_rate": 1.0642802811035753e-05,
"loss": 0.6682339310646057,
"step": 856
},
{
"epoch": 1.250728862973761,
"grad_norm": 0.22134488821029663,
"learning_rate": 1.0622505423722566e-05,
"loss": 1.1483386754989624,
"step": 858
},
{
"epoch": 1.2536443148688048,
"grad_norm": 0.34351247549057007,
"learning_rate": 1.0602183530374159e-05,
"loss": 0.9953691959381104,
"step": 860
},
{
"epoch": 1.2565597667638484,
"grad_norm": 0.1252131313085556,
"learning_rate": 1.0581837340899022e-05,
"loss": 1.152267575263977,
"step": 862
},
{
"epoch": 1.259475218658892,
"grad_norm": 0.10258015990257263,
"learning_rate": 1.0561467065456607e-05,
"loss": 1.0798017978668213,
"step": 864
},
{
"epoch": 1.2623906705539358,
"grad_norm": 0.3338652551174164,
"learning_rate": 1.0541072914455152e-05,
"loss": 0.6286276578903198,
"step": 866
},
{
"epoch": 1.2653061224489797,
"grad_norm": 0.18449436128139496,
"learning_rate": 1.0520655098549508e-05,
"loss": 1.1572736501693726,
"step": 868
},
{
"epoch": 1.2682215743440233,
"grad_norm": 0.1656051129102707,
"learning_rate": 1.0500213828638972e-05,
"loss": 1.2729966640472412,
"step": 870
},
{
"epoch": 1.271137026239067,
"grad_norm": 0.1694529801607132,
"learning_rate": 1.0479749315865093e-05,
"loss": 1.1974416971206665,
"step": 872
},
{
"epoch": 1.2740524781341107,
"grad_norm": 0.07350558042526245,
"learning_rate": 1.045926177160951e-05,
"loss": 1.127896785736084,
"step": 874
},
{
"epoch": 1.2769679300291545,
"grad_norm": 0.1753559112548828,
"learning_rate": 1.0438751407491745e-05,
"loss": 1.1373307704925537,
"step": 876
},
{
"epoch": 1.2798833819241984,
"grad_norm": 0.16192442178726196,
"learning_rate": 1.0418218435367043e-05,
"loss": 1.0873537063598633,
"step": 878
},
{
"epoch": 1.282798833819242,
"grad_norm": 0.2647189497947693,
"learning_rate": 1.0397663067324163e-05,
"loss": 0.8994747400283813,
"step": 880
},
{
"epoch": 1.2857142857142856,
"grad_norm": 0.16055135428905487,
"learning_rate": 1.03770855156832e-05,
"loss": 1.1629761457443237,
"step": 882
},
{
"epoch": 1.2886297376093294,
"grad_norm": 0.1312457174062729,
"learning_rate": 1.0356485992993386e-05,
"loss": 1.2289665937423706,
"step": 884
},
{
"epoch": 1.2915451895043732,
"grad_norm": 0.3237832486629486,
"learning_rate": 1.0335864712030895e-05,
"loss": 1.3477158546447754,
"step": 886
},
{
"epoch": 1.2944606413994169,
"grad_norm": 0.11200102418661118,
"learning_rate": 1.0315221885796648e-05,
"loss": 1.1597537994384766,
"step": 888
},
{
"epoch": 1.2973760932944607,
"grad_norm": 0.1582571268081665,
"learning_rate": 1.029455772751411e-05,
"loss": 1.0584282875061035,
"step": 890
},
{
"epoch": 1.3002915451895043,
"grad_norm": 0.2713635563850403,
"learning_rate": 1.0273872450627086e-05,
"loss": 1.065276026725769,
"step": 892
},
{
"epoch": 1.3032069970845481,
"grad_norm": 0.617933988571167,
"learning_rate": 1.025316626879752e-05,
"loss": 1.1870301961898804,
"step": 894
},
{
"epoch": 1.306122448979592,
"grad_norm": 0.24628496170043945,
"learning_rate": 1.0232439395903295e-05,
"loss": 1.3716992139816284,
"step": 896
},
{
"epoch": 1.3090379008746356,
"grad_norm": 0.07092081010341644,
"learning_rate": 1.0211692046036002e-05,
"loss": 1.2022879123687744,
"step": 898
},
{
"epoch": 1.3119533527696792,
"grad_norm": 0.07380987703800201,
"learning_rate": 1.019092443349875e-05,
"loss": 0.9747592806816101,
"step": 900
},
{
"epoch": 1.314868804664723,
"grad_norm": 0.07589751482009888,
"learning_rate": 1.0170136772803948e-05,
"loss": 1.033135175704956,
"step": 902
},
{
"epoch": 1.3177842565597668,
"grad_norm": 0.12000124901533127,
"learning_rate": 1.0149329278671082e-05,
"loss": 1.1944102048873901,
"step": 904
},
{
"epoch": 1.3206997084548104,
"grad_norm": 0.24365442991256714,
"learning_rate": 1.0128502166024497e-05,
"loss": 0.7611994743347168,
"step": 906
},
{
"epoch": 1.3236151603498543,
"grad_norm": 0.5757351517677307,
"learning_rate": 1.0107655649991186e-05,
"loss": 1.0334023237228394,
"step": 908
},
{
"epoch": 1.3265306122448979,
"grad_norm": 0.09015009552240372,
"learning_rate": 1.0086789945898568e-05,
"loss": 1.1387327909469604,
"step": 910
},
{
"epoch": 1.3294460641399417,
"grad_norm": 0.6966755390167236,
"learning_rate": 1.0065905269272245e-05,
"loss": 1.0652743577957153,
"step": 912
},
{
"epoch": 1.3323615160349855,
"grad_norm": 0.08158166706562042,
"learning_rate": 1.0045001835833804e-05,
"loss": 1.154505968093872,
"step": 914
},
{
"epoch": 1.3352769679300291,
"grad_norm": 0.17343761026859283,
"learning_rate": 1.0024079861498566e-05,
"loss": 1.0197257995605469,
"step": 916
},
{
"epoch": 1.3381924198250728,
"grad_norm": 0.3027811050415039,
"learning_rate": 1.0003139562373365e-05,
"loss": 1.3120397329330444,
"step": 918
},
{
"epoch": 1.3411078717201166,
"grad_norm": 0.7201161980628967,
"learning_rate": 9.982181154754323e-06,
"loss": 0.6248821020126343,
"step": 920
},
{
"epoch": 1.3440233236151604,
"grad_norm": 0.06654369831085205,
"learning_rate": 9.961204855124595e-06,
"loss": 1.3484827280044556,
"step": 922
},
{
"epoch": 1.346938775510204,
"grad_norm": 0.3403482437133789,
"learning_rate": 9.940210880152157e-06,
"loss": 1.023748517036438,
"step": 924
},
{
"epoch": 1.3498542274052479,
"grad_norm": 0.3134101629257202,
"learning_rate": 9.91919944668755e-06,
"loss": 1.462807536125183,
"step": 926
},
{
"epoch": 1.3527696793002915,
"grad_norm": 0.12223192304372787,
"learning_rate": 9.89817077176165e-06,
"loss": 1.0908539295196533,
"step": 928
},
{
"epoch": 1.3556851311953353,
"grad_norm": 0.14625874161720276,
"learning_rate": 9.877125072583421e-06,
"loss": 1.2502838373184204,
"step": 930
},
{
"epoch": 1.3586005830903791,
"grad_norm": 0.2647968828678131,
"learning_rate": 9.856062566537677e-06,
"loss": 1.3731303215026855,
"step": 932
},
{
"epoch": 1.3615160349854227,
"grad_norm": 0.14242695271968842,
"learning_rate": 9.834983471182831e-06,
"loss": 1.0232398509979248,
"step": 934
},
{
"epoch": 1.3644314868804663,
"grad_norm": 0.22755105793476105,
"learning_rate": 9.813888004248648e-06,
"loss": 1.1105183362960815,
"step": 936
},
{
"epoch": 1.3673469387755102,
"grad_norm": 0.10210377722978592,
"learning_rate": 9.792776383634002e-06,
"loss": 0.9822967648506165,
"step": 938
},
{
"epoch": 1.370262390670554,
"grad_norm": 0.2081102728843689,
"learning_rate": 9.771648827404617e-06,
"loss": 0.6831743121147156,
"step": 940
},
{
"epoch": 1.3731778425655976,
"grad_norm": 0.195752814412117,
"learning_rate": 9.750505553790823e-06,
"loss": 1.017356514930725,
"step": 942
},
{
"epoch": 1.3760932944606414,
"grad_norm": 0.149446040391922,
"learning_rate": 9.729346781185295e-06,
"loss": 1.2844679355621338,
"step": 944
},
{
"epoch": 1.379008746355685,
"grad_norm": 0.08231537789106369,
"learning_rate": 9.708172728140804e-06,
"loss": 1.2107067108154297,
"step": 946
},
{
"epoch": 1.3819241982507289,
"grad_norm": 0.1436920166015625,
"learning_rate": 9.686983613367947e-06,
"loss": 0.9730831384658813,
"step": 948
},
{
"epoch": 1.3848396501457727,
"grad_norm": 0.13865897059440613,
"learning_rate": 9.665779655732905e-06,
"loss": 1.134727954864502,
"step": 950
},
{
"epoch": 1.3877551020408163,
"grad_norm": 0.1278238445520401,
"learning_rate": 9.644561074255168e-06,
"loss": 1.1596717834472656,
"step": 952
},
{
"epoch": 1.39067055393586,
"grad_norm": 0.13528533279895782,
"learning_rate": 9.62332808810528e-06,
"loss": 1.0845617055892944,
"step": 954
},
{
"epoch": 1.3935860058309038,
"grad_norm": 0.14649415016174316,
"learning_rate": 9.602080916602573e-06,
"loss": 1.223073124885559,
"step": 956
},
{
"epoch": 1.3965014577259476,
"grad_norm": 0.1999201625585556,
"learning_rate": 9.580819779212905e-06,
"loss": 1.0572779178619385,
"step": 958
},
{
"epoch": 1.3994169096209912,
"grad_norm": 0.42912936210632324,
"learning_rate": 9.559544895546393e-06,
"loss": 1.211446762084961,
"step": 960
},
{
"epoch": 1.402332361516035,
"grad_norm": 0.3703382611274719,
"learning_rate": 9.538256485355125e-06,
"loss": 1.1024117469787598,
"step": 962
},
{
"epoch": 1.4052478134110786,
"grad_norm": 0.09566738456487656,
"learning_rate": 9.516954768530924e-06,
"loss": 1.0713633298873901,
"step": 964
},
{
"epoch": 1.4081632653061225,
"grad_norm": 0.13610726594924927,
"learning_rate": 9.49563996510306e-06,
"loss": 1.2085410356521606,
"step": 966
},
{
"epoch": 1.4110787172011663,
"grad_norm": 0.19745762646198273,
"learning_rate": 9.47431229523596e-06,
"loss": 1.0144951343536377,
"step": 968
},
{
"epoch": 1.41399416909621,
"grad_norm": 0.41680532693862915,
"learning_rate": 9.452971979226972e-06,
"loss": 1.0802420377731323,
"step": 970
},
{
"epoch": 1.4169096209912537,
"grad_norm": 0.18726322054862976,
"learning_rate": 9.431619237504052e-06,
"loss": 1.2159126996994019,
"step": 972
},
{
"epoch": 1.4198250728862973,
"grad_norm": 0.4570455551147461,
"learning_rate": 9.410254290623512e-06,
"loss": 1.1028673648834229,
"step": 974
},
{
"epoch": 1.4227405247813412,
"grad_norm": 0.1720321923494339,
"learning_rate": 9.388877359267732e-06,
"loss": 1.053758978843689,
"step": 976
},
{
"epoch": 1.4256559766763848,
"grad_norm": 0.7719082832336426,
"learning_rate": 9.367488664242878e-06,
"loss": 1.0918673276901245,
"step": 978
},
{
"epoch": 1.4285714285714286,
"grad_norm": 0.11719834804534912,
"learning_rate": 9.346088426476627e-06,
"loss": 1.1107982397079468,
"step": 980
},
{
"epoch": 1.4314868804664722,
"grad_norm": 0.26357176899909973,
"learning_rate": 9.32467686701589e-06,
"loss": 1.3265354633331299,
"step": 982
},
{
"epoch": 1.434402332361516,
"grad_norm": 0.7194681167602539,
"learning_rate": 9.303254207024509e-06,
"loss": 0.6845600605010986,
"step": 984
},
{
"epoch": 1.4373177842565599,
"grad_norm": 0.19328005611896515,
"learning_rate": 9.28182066778099e-06,
"loss": 1.1066367626190186,
"step": 986
},
{
"epoch": 1.4402332361516035,
"grad_norm": 0.3166584372520447,
"learning_rate": 9.260376470676225e-06,
"loss": 1.0711687803268433,
"step": 988
},
{
"epoch": 1.4431486880466473,
"grad_norm": 0.20059515535831451,
"learning_rate": 9.238921837211175e-06,
"loss": 1.2519899606704712,
"step": 990
},
{
"epoch": 1.446064139941691,
"grad_norm": 0.15826623141765594,
"learning_rate": 9.217456988994608e-06,
"loss": 1.3235565423965454,
"step": 992
},
{
"epoch": 1.4489795918367347,
"grad_norm": 0.19210676848888397,
"learning_rate": 9.1959821477408e-06,
"loss": 1.0224212408065796,
"step": 994
},
{
"epoch": 1.4518950437317784,
"grad_norm": 0.26280826330184937,
"learning_rate": 9.174497535267257e-06,
"loss": 1.1540876626968384,
"step": 996
},
{
"epoch": 1.4548104956268222,
"grad_norm": 0.09911534935235977,
"learning_rate": 9.153003373492395e-06,
"loss": 1.197079062461853,
"step": 998
},
{
"epoch": 1.4577259475218658,
"grad_norm": 0.15191975235939026,
"learning_rate": 9.131499884433285e-06,
"loss": 1.2020612955093384,
"step": 1000
},
{
"epoch": 1.4606413994169096,
"grad_norm": 0.1272922158241272,
"learning_rate": 9.109987290203325e-06,
"loss": 1.1222330331802368,
"step": 1002
},
{
"epoch": 1.4635568513119535,
"grad_norm": 0.17026354372501373,
"learning_rate": 9.088465813009979e-06,
"loss": 1.2111908197402954,
"step": 1004
},
{
"epoch": 1.466472303206997,
"grad_norm": 0.1192101240158081,
"learning_rate": 9.06693567515245e-06,
"loss": 1.186848759651184,
"step": 1006
},
{
"epoch": 1.469387755102041,
"grad_norm": 0.5374306440353394,
"learning_rate": 9.045397099019405e-06,
"loss": 1.1735105514526367,
"step": 1008
},
{
"epoch": 1.4723032069970845,
"grad_norm": 0.14989781379699707,
"learning_rate": 9.02385030708667e-06,
"loss": 1.3269665241241455,
"step": 1010
},
{
"epoch": 1.4752186588921283,
"grad_norm": 0.23181524872779846,
"learning_rate": 9.002295521914934e-06,
"loss": 1.234397292137146,
"step": 1012
},
{
"epoch": 1.478134110787172,
"grad_norm": 0.8318726420402527,
"learning_rate": 8.980732966147451e-06,
"loss": 1.2126901149749756,
"step": 1014
},
{
"epoch": 1.4810495626822158,
"grad_norm": 0.2093929797410965,
"learning_rate": 8.959162862507738e-06,
"loss": 1.0737382173538208,
"step": 1016
},
{
"epoch": 1.4839650145772594,
"grad_norm": 0.2963290214538574,
"learning_rate": 8.937585433797273e-06,
"loss": 0.9138633012771606,
"step": 1018
},
{
"epoch": 1.4868804664723032,
"grad_norm": 0.2868603467941284,
"learning_rate": 8.916000902893199e-06,
"loss": 1.3595247268676758,
"step": 1020
},
{
"epoch": 1.489795918367347,
"grad_norm": 0.11513882875442505,
"learning_rate": 8.894409492746018e-06,
"loss": 1.0969007015228271,
"step": 1022
},
{
"epoch": 1.4927113702623906,
"grad_norm": 0.15273737907409668,
"learning_rate": 8.87281142637729e-06,
"loss": 1.0396068096160889,
"step": 1024
},
{
"epoch": 1.4956268221574345,
"grad_norm": 0.12743119895458221,
"learning_rate": 8.851206926877325e-06,
"loss": 1.21293306350708,
"step": 1026
},
{
"epoch": 1.498542274052478,
"grad_norm": 0.07293698191642761,
"learning_rate": 8.82959621740288e-06,
"loss": 0.8554050922393799,
"step": 1028
},
{
"epoch": 1.501457725947522,
"grad_norm": 0.1396367996931076,
"learning_rate": 8.807979521174866e-06,
"loss": 0.8444166779518127,
"step": 1030
},
{
"epoch": 1.5043731778425657,
"grad_norm": 0.34662795066833496,
"learning_rate": 8.786357061476029e-06,
"loss": 1.1405446529388428,
"step": 1032
},
{
"epoch": 1.5072886297376094,
"grad_norm": 0.2602401673793793,
"learning_rate": 8.764729061648632e-06,
"loss": 1.2988492250442505,
"step": 1034
},
{
"epoch": 1.510204081632653,
"grad_norm": 0.19908583164215088,
"learning_rate": 8.743095745092185e-06,
"loss": 1.2301197052001953,
"step": 1036
},
{
"epoch": 1.5131195335276968,
"grad_norm": 0.20294634997844696,
"learning_rate": 8.721457335261104e-06,
"loss": 0.9326356053352356,
"step": 1038
},
{
"epoch": 1.5160349854227406,
"grad_norm": 0.5687612295150757,
"learning_rate": 8.699814055662417e-06,
"loss": 1.187393069267273,
"step": 1040
},
{
"epoch": 1.5189504373177842,
"grad_norm": 0.27902352809906006,
"learning_rate": 8.678166129853442e-06,
"loss": 1.0565565824508667,
"step": 1042
},
{
"epoch": 1.5218658892128278,
"grad_norm": 0.06307139247655869,
"learning_rate": 8.656513781439512e-06,
"loss": 1.0471357107162476,
"step": 1044
},
{
"epoch": 1.5247813411078717,
"grad_norm": 0.3132034242153168,
"learning_rate": 8.634857234071619e-06,
"loss": 1.3265520334243774,
"step": 1046
},
{
"epoch": 1.5276967930029155,
"grad_norm": 0.25837764143943787,
"learning_rate": 8.613196711444138e-06,
"loss": 1.1429646015167236,
"step": 1048
},
{
"epoch": 1.5306122448979593,
"grad_norm": 0.08677840977907181,
"learning_rate": 8.591532437292502e-06,
"loss": 0.9910908937454224,
"step": 1050
},
{
"epoch": 1.533527696793003,
"grad_norm": 0.283247172832489,
"learning_rate": 8.5698646353909e-06,
"loss": 0.8875013589859009,
"step": 1052
},
{
"epoch": 1.5364431486880465,
"grad_norm": 0.16179129481315613,
"learning_rate": 8.548193529549947e-06,
"loss": 1.1073272228240967,
"step": 1054
},
{
"epoch": 1.5393586005830904,
"grad_norm": 0.12490551173686981,
"learning_rate": 8.526519343614398e-06,
"loss": 0.9769071340560913,
"step": 1056
},
{
"epoch": 1.5422740524781342,
"grad_norm": 0.25089073181152344,
"learning_rate": 8.504842301460815e-06,
"loss": 1.069384217262268,
"step": 1058
},
{
"epoch": 1.5451895043731778,
"grad_norm": 0.22324740886688232,
"learning_rate": 8.483162626995268e-06,
"loss": 1.0800434350967407,
"step": 1060
},
{
"epoch": 1.5481049562682214,
"grad_norm": 0.358711302280426,
"learning_rate": 8.461480544151012e-06,
"loss": 0.8311281204223633,
"step": 1062
},
{
"epoch": 1.5510204081632653,
"grad_norm": 0.35619816184043884,
"learning_rate": 8.439796276886177e-06,
"loss": 1.378959059715271,
"step": 1064
},
{
"epoch": 1.553935860058309,
"grad_norm": 0.07740774750709534,
"learning_rate": 8.418110049181464e-06,
"loss": 0.7135167121887207,
"step": 1066
},
{
"epoch": 1.556851311953353,
"grad_norm": 0.11709576100111008,
"learning_rate": 8.396422085037822e-06,
"loss": 1.1297550201416016,
"step": 1068
},
{
"epoch": 1.5597667638483965,
"grad_norm": 0.1865878850221634,
"learning_rate": 8.374732608474128e-06,
"loss": 1.1906490325927734,
"step": 1070
},
{
"epoch": 1.5626822157434401,
"grad_norm": 0.16431988775730133,
"learning_rate": 8.353041843524886e-06,
"loss": 1.1722774505615234,
"step": 1072
},
{
"epoch": 1.565597667638484,
"grad_norm": 0.36135971546173096,
"learning_rate": 8.331350014237912e-06,
"loss": 1.1067001819610596,
"step": 1074
},
{
"epoch": 1.5685131195335278,
"grad_norm": 0.3832073211669922,
"learning_rate": 8.30965734467201e-06,
"loss": 1.2439948320388794,
"step": 1076
},
{
"epoch": 1.5714285714285714,
"grad_norm": 0.2755753993988037,
"learning_rate": 8.28796405889466e-06,
"loss": 0.6848400831222534,
"step": 1078
},
{
"epoch": 1.574344023323615,
"grad_norm": 0.07128661125898361,
"learning_rate": 8.266270380979723e-06,
"loss": 1.2033002376556396,
"step": 1080
},
{
"epoch": 1.5772594752186588,
"grad_norm": 0.16955770552158356,
"learning_rate": 8.244576535005093e-06,
"loss": 1.2546216249465942,
"step": 1082
},
{
"epoch": 1.5801749271137027,
"grad_norm": 0.702198326587677,
"learning_rate": 8.22288274505041e-06,
"loss": 1.0031241178512573,
"step": 1084
},
{
"epoch": 1.5830903790087465,
"grad_norm": 0.09851932525634766,
"learning_rate": 8.201189235194729e-06,
"loss": 1.171536922454834,
"step": 1086
},
{
"epoch": 1.58600583090379,
"grad_norm": 0.5338625907897949,
"learning_rate": 8.179496229514217e-06,
"loss": 1.0307410955429077,
"step": 1088
},
{
"epoch": 1.5889212827988337,
"grad_norm": 0.17403900623321533,
"learning_rate": 8.157803952079832e-06,
"loss": 1.2256954908370972,
"step": 1090
},
{
"epoch": 1.5918367346938775,
"grad_norm": 0.1747167557477951,
"learning_rate": 8.136112626955005e-06,
"loss": 1.2137948274612427,
"step": 1092
},
{
"epoch": 1.5947521865889214,
"grad_norm": 0.07115664333105087,
"learning_rate": 8.114422478193336e-06,
"loss": 1.0697215795516968,
"step": 1094
},
{
"epoch": 1.597667638483965,
"grad_norm": 0.12972617149353027,
"learning_rate": 8.09273372983628e-06,
"loss": 1.1039892435073853,
"step": 1096
},
{
"epoch": 1.6005830903790086,
"grad_norm": 0.13853909075260162,
"learning_rate": 8.071046605910804e-06,
"loss": 1.186689853668213,
"step": 1098
},
{
"epoch": 1.6034985422740524,
"grad_norm": 0.1802920252084732,
"learning_rate": 8.049361330427129e-06,
"loss": 1.047842025756836,
"step": 1100
},
{
"epoch": 1.6064139941690962,
"grad_norm": 0.15627241134643555,
"learning_rate": 8.027678127376353e-06,
"loss": 1.081397294998169,
"step": 1102
},
{
"epoch": 1.60932944606414,
"grad_norm": 0.13871587812900543,
"learning_rate": 8.005997220728181e-06,
"loss": 1.129719614982605,
"step": 1104
},
{
"epoch": 1.6122448979591837,
"grad_norm": 20.326587677001953,
"learning_rate": 7.984318834428607e-06,
"loss": 1.1785022020339966,
"step": 1106
},
{
"epoch": 1.6151603498542273,
"grad_norm": 0.13852129876613617,
"learning_rate": 7.962643192397574e-06,
"loss": 1.0734182596206665,
"step": 1108
},
{
"epoch": 1.6180758017492711,
"grad_norm": 0.6223950982093811,
"learning_rate": 7.940970518526686e-06,
"loss": 1.1438935995101929,
"step": 1110
},
{
"epoch": 1.620991253644315,
"grad_norm": 0.0528414323925972,
"learning_rate": 7.919301036676892e-06,
"loss": 0.9696015119552612,
"step": 1112
},
{
"epoch": 1.6239067055393586,
"grad_norm": 0.13710257411003113,
"learning_rate": 7.897634970676166e-06,
"loss": 1.1505471467971802,
"step": 1114
},
{
"epoch": 1.6268221574344022,
"grad_norm": 0.16004100441932678,
"learning_rate": 7.875972544317203e-06,
"loss": 1.2167091369628906,
"step": 1116
},
{
"epoch": 1.629737609329446,
"grad_norm": 0.45379891991615295,
"learning_rate": 7.854313981355101e-06,
"loss": 1.131983757019043,
"step": 1118
},
{
"epoch": 1.6326530612244898,
"grad_norm": 0.13307584822177887,
"learning_rate": 7.832659505505048e-06,
"loss": 1.1805908679962158,
"step": 1120
},
{
"epoch": 1.6355685131195337,
"grad_norm": 0.2649403214454651,
"learning_rate": 7.811009340440022e-06,
"loss": 1.2160626649856567,
"step": 1122
},
{
"epoch": 1.6384839650145773,
"grad_norm": 0.16499841213226318,
"learning_rate": 7.789363709788472e-06,
"loss": 1.2312496900558472,
"step": 1124
},
{
"epoch": 1.6413994169096209,
"grad_norm": 0.14581745862960815,
"learning_rate": 7.767722837132008e-06,
"loss": 0.5785539150238037,
"step": 1126
},
{
"epoch": 1.6443148688046647,
"grad_norm": 0.40138673782348633,
"learning_rate": 7.746086946003103e-06,
"loss": 1.102718472480774,
"step": 1128
},
{
"epoch": 1.6472303206997085,
"grad_norm": 0.39575713872909546,
"learning_rate": 7.724456259882758e-06,
"loss": 0.9496442675590515,
"step": 1130
},
{
"epoch": 1.6501457725947521,
"grad_norm": 0.16450181603431702,
"learning_rate": 7.702831002198225e-06,
"loss": 1.1438281536102295,
"step": 1132
},
{
"epoch": 1.6530612244897958,
"grad_norm": 0.10068156570196152,
"learning_rate": 7.68121139632068e-06,
"loss": 1.2390490770339966,
"step": 1134
},
{
"epoch": 1.6559766763848396,
"grad_norm": 0.25964057445526123,
"learning_rate": 7.65959766556292e-06,
"loss": 1.0381125211715698,
"step": 1136
},
{
"epoch": 1.6588921282798834,
"grad_norm": 0.43424177169799805,
"learning_rate": 7.637990033177057e-06,
"loss": 1.109690546989441,
"step": 1138
},
{
"epoch": 1.6618075801749272,
"grad_norm": 0.21539334952831268,
"learning_rate": 7.616388722352214e-06,
"loss": 1.2123034000396729,
"step": 1140
},
{
"epoch": 1.6647230320699709,
"grad_norm": 0.20255622267723083,
"learning_rate": 7.594793956212212e-06,
"loss": 1.217584490776062,
"step": 1142
},
{
"epoch": 1.6676384839650145,
"grad_norm": 0.47754237055778503,
"learning_rate": 7.573205957813276e-06,
"loss": 0.9803376197814941,
"step": 1144
},
{
"epoch": 1.6705539358600583,
"grad_norm": 0.09026843309402466,
"learning_rate": 7.551624950141726e-06,
"loss": 1.1912260055541992,
"step": 1146
},
{
"epoch": 1.6734693877551021,
"grad_norm": 0.11982105672359467,
"learning_rate": 7.530051156111669e-06,
"loss": 1.1396859884262085,
"step": 1148
},
{
"epoch": 1.6763848396501457,
"grad_norm": 0.42154011130332947,
"learning_rate": 7.508484798562707e-06,
"loss": 1.3917794227600098,
"step": 1150
},
{
"epoch": 1.6793002915451893,
"grad_norm": 0.34086376428604126,
"learning_rate": 7.486926100257621e-06,
"loss": 1.1625425815582275,
"step": 1152
},
{
"epoch": 1.6822157434402332,
"grad_norm": 0.33954572677612305,
"learning_rate": 7.465375283880084e-06,
"loss": 1.1317555904388428,
"step": 1154
},
{
"epoch": 1.685131195335277,
"grad_norm": 0.15621435642242432,
"learning_rate": 7.44383257203236e-06,
"loss": 1.0376930236816406,
"step": 1156
},
{
"epoch": 1.6880466472303208,
"grad_norm": 0.16445010900497437,
"learning_rate": 7.422298187232988e-06,
"loss": 0.6347440481185913,
"step": 1158
},
{
"epoch": 1.6909620991253644,
"grad_norm": 0.11221948266029358,
"learning_rate": 7.4007723519145005e-06,
"loss": 1.2130205631256104,
"step": 1160
},
{
"epoch": 1.693877551020408,
"grad_norm": 0.10298870503902435,
"learning_rate": 7.37925528842113e-06,
"loss": 1.0703403949737549,
"step": 1162
},
{
"epoch": 1.6967930029154519,
"grad_norm": 0.05989653244614601,
"learning_rate": 7.357747219006487e-06,
"loss": 1.0500437021255493,
"step": 1164
},
{
"epoch": 1.6997084548104957,
"grad_norm": 0.18388091027736664,
"learning_rate": 7.336248365831293e-06,
"loss": 1.0820516347885132,
"step": 1166
},
{
"epoch": 1.7026239067055393,
"grad_norm": 0.30676501989364624,
"learning_rate": 7.314758950961069e-06,
"loss": 0.8827295303344727,
"step": 1168
},
{
"epoch": 1.7055393586005831,
"grad_norm": 0.1762169450521469,
"learning_rate": 7.293279196363844e-06,
"loss": 1.1642931699752808,
"step": 1170
},
{
"epoch": 1.7084548104956268,
"grad_norm": 0.138104647397995,
"learning_rate": 7.271809323907868e-06,
"loss": 1.3497681617736816,
"step": 1172
},
{
"epoch": 1.7113702623906706,
"grad_norm": 0.04815658926963806,
"learning_rate": 7.250349555359316e-06,
"loss": 0.9686152935028076,
"step": 1174
},
{
"epoch": 1.7142857142857144,
"grad_norm": 0.4449727535247803,
"learning_rate": 7.228900112379993e-06,
"loss": 0.8205754160881042,
"step": 1176
},
{
"epoch": 1.717201166180758,
"grad_norm": 0.19454075396060944,
"learning_rate": 7.2074612165250596e-06,
"loss": 1.1948063373565674,
"step": 1178
},
{
"epoch": 1.7201166180758016,
"grad_norm": 0.1630457043647766,
"learning_rate": 7.18603308924072e-06,
"loss": 1.122542381286621,
"step": 1180
},
{
"epoch": 1.7230320699708455,
"grad_norm": 0.2632548213005066,
"learning_rate": 7.164615951861958e-06,
"loss": 1.2288137674331665,
"step": 1182
},
{
"epoch": 1.7259475218658893,
"grad_norm": 0.185108482837677,
"learning_rate": 7.143210025610238e-06,
"loss": 1.029456615447998,
"step": 1184
},
{
"epoch": 1.728862973760933,
"grad_norm": 0.06753533333539963,
"learning_rate": 7.121815531591222e-06,
"loss": 0.9876729846000671,
"step": 1186
},
{
"epoch": 1.7317784256559767,
"grad_norm": 0.16401244699954987,
"learning_rate": 7.100432690792484e-06,
"loss": 0.6059045791625977,
"step": 1188
},
{
"epoch": 1.7346938775510203,
"grad_norm": 0.2957839369773865,
"learning_rate": 7.0790617240812374e-06,
"loss": 1.0509564876556396,
"step": 1190
},
{
"epoch": 1.7376093294460642,
"grad_norm": 0.13618314266204834,
"learning_rate": 7.057702852202037e-06,
"loss": 1.1775768995285034,
"step": 1192
},
{
"epoch": 1.740524781341108,
"grad_norm": 0.171565443277359,
"learning_rate": 7.0363562957745105e-06,
"loss": 0.9801825881004333,
"step": 1194
},
{
"epoch": 1.7434402332361516,
"grad_norm": 0.09507802128791809,
"learning_rate": 7.015022275291084e-06,
"loss": 0.969845175743103,
"step": 1196
},
{
"epoch": 1.7463556851311952,
"grad_norm": 0.49828192591667175,
"learning_rate": 6.993701011114686e-06,
"loss": 0.9284896850585938,
"step": 1198
},
{
"epoch": 1.749271137026239,
"grad_norm": 0.10986272245645523,
"learning_rate": 6.972392723476494e-06,
"loss": 1.1610954999923706,
"step": 1200
},
{
"epoch": 1.7521865889212829,
"grad_norm": 0.36414283514022827,
"learning_rate": 6.9510976324736415e-06,
"loss": 0.9902899861335754,
"step": 1202
},
{
"epoch": 1.7551020408163265,
"grad_norm": 0.15007393062114716,
"learning_rate": 6.929815958066951e-06,
"loss": 1.1686747074127197,
"step": 1204
},
{
"epoch": 1.7580174927113703,
"grad_norm": 0.09150854498147964,
"learning_rate": 6.908547920078671e-06,
"loss": 0.9296596050262451,
"step": 1206
},
{
"epoch": 1.760932944606414,
"grad_norm": 0.13725019991397858,
"learning_rate": 6.887293738190183e-06,
"loss": 0.6867948174476624,
"step": 1208
},
{
"epoch": 1.7638483965014577,
"grad_norm": 0.2506777346134186,
"learning_rate": 6.866053631939756e-06,
"loss": 1.1812880039215088,
"step": 1210
},
{
"epoch": 1.7667638483965016,
"grad_norm": 0.24459925293922424,
"learning_rate": 6.844827820720275e-06,
"loss": 1.233087420463562,
"step": 1212
},
{
"epoch": 1.7696793002915452,
"grad_norm": 0.18725088238716125,
"learning_rate": 6.8236165237769555e-06,
"loss": 1.0703694820404053,
"step": 1214
},
{
"epoch": 1.7725947521865888,
"grad_norm": 0.08817660808563232,
"learning_rate": 6.802419960205095e-06,
"loss": 0.9150586724281311,
"step": 1216
},
{
"epoch": 1.7755102040816326,
"grad_norm": 0.24206826090812683,
"learning_rate": 6.7812383489478216e-06,
"loss": 1.2116329669952393,
"step": 1218
},
{
"epoch": 1.7784256559766765,
"grad_norm": 0.13627009093761444,
"learning_rate": 6.760071908793796e-06,
"loss": 0.6978607177734375,
"step": 1220
},
{
"epoch": 1.78134110787172,
"grad_norm": 0.19865363836288452,
"learning_rate": 6.738920858374991e-06,
"loss": 1.0590617656707764,
"step": 1222
},
{
"epoch": 1.784256559766764,
"grad_norm": 0.4059164524078369,
"learning_rate": 6.717785416164414e-06,
"loss": 1.38783860206604,
"step": 1224
},
{
"epoch": 1.7871720116618075,
"grad_norm": 0.2919604480266571,
"learning_rate": 6.696665800473842e-06,
"loss": 1.1487404108047485,
"step": 1226
},
{
"epoch": 1.7900874635568513,
"grad_norm": 0.1517525017261505,
"learning_rate": 6.675562229451589e-06,
"loss": 1.206036925315857,
"step": 1228
},
{
"epoch": 1.7930029154518952,
"grad_norm": 0.2847557067871094,
"learning_rate": 6.6544749210802305e-06,
"loss": 0.8351743817329407,
"step": 1230
},
{
"epoch": 1.7959183673469388,
"grad_norm": 0.2792437672615051,
"learning_rate": 6.633404093174371e-06,
"loss": 0.9937669634819031,
"step": 1232
},
{
"epoch": 1.7988338192419824,
"grad_norm": 0.39450135827064514,
"learning_rate": 6.612349963378381e-06,
"loss": 0.9253970980644226,
"step": 1234
},
{
"epoch": 1.8017492711370262,
"grad_norm": 0.26529014110565186,
"learning_rate": 6.591312749164154e-06,
"loss": 1.1452049016952515,
"step": 1236
},
{
"epoch": 1.80466472303207,
"grad_norm": 0.23458294570446014,
"learning_rate": 6.570292667828856e-06,
"loss": 1.2078217267990112,
"step": 1238
},
{
"epoch": 1.8075801749271136,
"grad_norm": 0.13832348585128784,
"learning_rate": 6.549289936492693e-06,
"loss": 1.2237412929534912,
"step": 1240
},
{
"epoch": 1.8104956268221575,
"grad_norm": 0.08728086948394775,
"learning_rate": 6.5283047720966505e-06,
"loss": 1.1127595901489258,
"step": 1242
},
{
"epoch": 1.813411078717201,
"grad_norm": 0.2100764364004135,
"learning_rate": 6.5073373914002656e-06,
"loss": 1.0868037939071655,
"step": 1244
},
{
"epoch": 1.816326530612245,
"grad_norm": 0.13499869406223297,
"learning_rate": 6.486388010979388e-06,
"loss": 1.119627833366394,
"step": 1246
},
{
"epoch": 1.8192419825072887,
"grad_norm": 0.34346649050712585,
"learning_rate": 6.465456847223932e-06,
"loss": 1.0318715572357178,
"step": 1248
},
{
"epoch": 1.8221574344023324,
"grad_norm": 0.07944006472826004,
"learning_rate": 6.444544116335655e-06,
"loss": 1.1757546663284302,
"step": 1250
},
{
"epoch": 1.825072886297376,
"grad_norm": 0.2944159209728241,
"learning_rate": 6.423650034325915e-06,
"loss": 1.2396355867385864,
"step": 1252
},
{
"epoch": 1.8279883381924198,
"grad_norm": 0.18287204205989838,
"learning_rate": 6.402774817013442e-06,
"loss": 1.097105860710144,
"step": 1254
},
{
"epoch": 1.8309037900874636,
"grad_norm": 0.141254261136055,
"learning_rate": 6.381918680022112e-06,
"loss": 1.0068081617355347,
"step": 1256
},
{
"epoch": 1.8338192419825075,
"grad_norm": 0.17386725544929504,
"learning_rate": 6.36108183877871e-06,
"loss": 1.1032158136367798,
"step": 1258
},
{
"epoch": 1.836734693877551,
"grad_norm": 0.22268234193325043,
"learning_rate": 6.3402645085107224e-06,
"loss": 1.2912282943725586,
"step": 1260
},
{
"epoch": 1.8396501457725947,
"grad_norm": 0.411150723695755,
"learning_rate": 6.3194669042440976e-06,
"loss": 1.129095196723938,
"step": 1262
},
{
"epoch": 1.8425655976676385,
"grad_norm": 0.3001119792461395,
"learning_rate": 6.298689240801026e-06,
"loss": 1.365820050239563,
"step": 1264
},
{
"epoch": 1.8454810495626823,
"grad_norm": 0.36252474784851074,
"learning_rate": 6.277931732797732e-06,
"loss": 1.3998820781707764,
"step": 1266
},
{
"epoch": 1.848396501457726,
"grad_norm": 0.29093074798583984,
"learning_rate": 6.257194594642254e-06,
"loss": 1.0682395696640015,
"step": 1268
},
{
"epoch": 1.8513119533527695,
"grad_norm": 0.13126376271247864,
"learning_rate": 6.236478040532214e-06,
"loss": 1.0302337408065796,
"step": 1270
},
{
"epoch": 1.8542274052478134,
"grad_norm": 0.1628250777721405,
"learning_rate": 6.215782284452628e-06,
"loss": 1.098158359527588,
"step": 1272
},
{
"epoch": 1.8571428571428572,
"grad_norm": 0.20393933355808258,
"learning_rate": 6.195107540173687e-06,
"loss": 1.1833226680755615,
"step": 1274
},
{
"epoch": 1.860058309037901,
"grad_norm": 0.2242426872253418,
"learning_rate": 6.174454021248537e-06,
"loss": 1.2466531991958618,
"step": 1276
},
{
"epoch": 1.8629737609329446,
"grad_norm": 0.1543884128332138,
"learning_rate": 6.15382194101109e-06,
"loss": 0.9692124724388123,
"step": 1278
},
{
"epoch": 1.8658892128279883,
"grad_norm": 0.10594581812620163,
"learning_rate": 6.133211512573819e-06,
"loss": 1.0277884006500244,
"step": 1280
},
{
"epoch": 1.868804664723032,
"grad_norm": 0.1760384440422058,
"learning_rate": 6.1126229488255416e-06,
"loss": 1.0745232105255127,
"step": 1282
},
{
"epoch": 1.871720116618076,
"grad_norm": 0.11243575066328049,
"learning_rate": 6.092056462429238e-06,
"loss": 1.11955988407135,
"step": 1284
},
{
"epoch": 1.8746355685131195,
"grad_norm": 0.3004339337348938,
"learning_rate": 6.071512265819841e-06,
"loss": 1.1129993200302124,
"step": 1286
},
{
"epoch": 1.8775510204081631,
"grad_norm": 0.1870323270559311,
"learning_rate": 6.0509905712020554e-06,
"loss": 1.1004483699798584,
"step": 1288
},
{
"epoch": 1.880466472303207,
"grad_norm": 0.15390393137931824,
"learning_rate": 6.030491590548157e-06,
"loss": 1.1051290035247803,
"step": 1290
},
{
"epoch": 1.8833819241982508,
"grad_norm": 0.17591705918312073,
"learning_rate": 6.010015535595802e-06,
"loss": 1.19423246383667,
"step": 1292
},
{
"epoch": 1.8862973760932946,
"grad_norm": 0.517492413520813,
"learning_rate": 5.989562617845843e-06,
"loss": 0.7528221011161804,
"step": 1294
},
{
"epoch": 1.8892128279883382,
"grad_norm": 0.2763058543205261,
"learning_rate": 5.969133048560151e-06,
"loss": 0.6028561592102051,
"step": 1296
},
{
"epoch": 1.8921282798833818,
"grad_norm": 0.1741061955690384,
"learning_rate": 5.948727038759415e-06,
"loss": 0.9944829344749451,
"step": 1298
},
{
"epoch": 1.8950437317784257,
"grad_norm": 0.3421262204647064,
"learning_rate": 5.928344799220985e-06,
"loss": 1.118728756904602,
"step": 1300
},
{
"epoch": 1.8979591836734695,
"grad_norm": 0.42300957441329956,
"learning_rate": 5.907986540476678e-06,
"loss": 0.7158623337745667,
"step": 1302
},
{
"epoch": 1.900874635568513,
"grad_norm": 0.14869055151939392,
"learning_rate": 5.887652472810609e-06,
"loss": 1.0393644571304321,
"step": 1304
},
{
"epoch": 1.9037900874635567,
"grad_norm": 0.07201150804758072,
"learning_rate": 5.86734280625702e-06,
"loss": 0.5461652874946594,
"step": 1306
},
{
"epoch": 1.9067055393586005,
"grad_norm": 0.6429765820503235,
"learning_rate": 5.847057750598111e-06,
"loss": 1.1324551105499268,
"step": 1308
},
{
"epoch": 1.9096209912536444,
"grad_norm": 0.18680232763290405,
"learning_rate": 5.826797515361868e-06,
"loss": 1.274292230606079,
"step": 1310
},
{
"epoch": 1.9125364431486882,
"grad_norm": 0.1953829973936081,
"learning_rate": 5.806562309819909e-06,
"loss": 1.2884361743927002,
"step": 1312
},
{
"epoch": 1.9154518950437318,
"grad_norm": 0.28342682123184204,
"learning_rate": 5.7863523429853055e-06,
"loss": 1.279549479484558,
"step": 1314
},
{
"epoch": 1.9183673469387754,
"grad_norm": 0.45169350504875183,
"learning_rate": 5.766167823610443e-06,
"loss": 1.074336051940918,
"step": 1316
},
{
"epoch": 1.9212827988338192,
"grad_norm": 0.18884071707725525,
"learning_rate": 5.746008960184852e-06,
"loss": 1.262738585472107,
"step": 1318
},
{
"epoch": 1.924198250728863,
"grad_norm": 0.059031542390584946,
"learning_rate": 5.725875960933058e-06,
"loss": 1.0195709466934204,
"step": 1320
},
{
"epoch": 1.9271137026239067,
"grad_norm": 0.11774204671382904,
"learning_rate": 5.705769033812431e-06,
"loss": 1.04592764377594,
"step": 1322
},
{
"epoch": 1.9300291545189503,
"grad_norm": 0.13104864954948425,
"learning_rate": 5.685688386511041e-06,
"loss": 1.0482321977615356,
"step": 1324
},
{
"epoch": 1.9329446064139941,
"grad_norm": 0.15567655861377716,
"learning_rate": 5.665634226445501e-06,
"loss": 1.2044618129730225,
"step": 1326
},
{
"epoch": 1.935860058309038,
"grad_norm": 0.14479920268058777,
"learning_rate": 5.645606760758836e-06,
"loss": 1.0985395908355713,
"step": 1328
},
{
"epoch": 1.9387755102040818,
"grad_norm": 0.1920030266046524,
"learning_rate": 5.625606196318347e-06,
"loss": 1.4523109197616577,
"step": 1330
},
{
"epoch": 1.9416909620991254,
"grad_norm": 0.2637879252433777,
"learning_rate": 5.605632739713456e-06,
"loss": 1.0658267736434937,
"step": 1332
},
{
"epoch": 1.944606413994169,
"grad_norm": 0.08796999603509903,
"learning_rate": 5.585686597253593e-06,
"loss": 1.0220710039138794,
"step": 1334
},
{
"epoch": 1.9475218658892128,
"grad_norm": 0.4936763644218445,
"learning_rate": 5.5657679749660455e-06,
"loss": 0.5359926223754883,
"step": 1336
},
{
"epoch": 1.9504373177842567,
"grad_norm": 0.25524938106536865,
"learning_rate": 5.545877078593849e-06,
"loss": 1.0832246541976929,
"step": 1338
},
{
"epoch": 1.9533527696793003,
"grad_norm": 0.3815828263759613,
"learning_rate": 5.52601411359365e-06,
"loss": 1.0333139896392822,
"step": 1340
},
{
"epoch": 1.9562682215743439,
"grad_norm": 0.1364160180091858,
"learning_rate": 5.506179285133582e-06,
"loss": 0.8447660207748413,
"step": 1342
},
{
"epoch": 1.9591836734693877,
"grad_norm": 0.22036899626255035,
"learning_rate": 5.486372798091161e-06,
"loss": 1.4143515825271606,
"step": 1344
},
{
"epoch": 1.9620991253644315,
"grad_norm": 0.4314256012439728,
"learning_rate": 5.466594857051153e-06,
"loss": 0.9990249276161194,
"step": 1346
},
{
"epoch": 1.9650145772594754,
"grad_norm": 0.15996676683425903,
"learning_rate": 5.4468456663034635e-06,
"loss": 1.2198452949523926,
"step": 1348
},
{
"epoch": 1.967930029154519,
"grad_norm": 0.19972719252109528,
"learning_rate": 5.427125429841039e-06,
"loss": 1.0296826362609863,
"step": 1350
},
{
"epoch": 1.9708454810495626,
"grad_norm": 0.1828991174697876,
"learning_rate": 5.4074343513577536e-06,
"loss": 1.2304623126983643,
"step": 1352
},
{
"epoch": 1.9737609329446064,
"grad_norm": 0.2502359449863434,
"learning_rate": 5.387772634246287e-06,
"loss": 1.1169551610946655,
"step": 1354
},
{
"epoch": 1.9766763848396502,
"grad_norm": 0.1563616245985031,
"learning_rate": 5.36814048159606e-06,
"loss": 0.818549633026123,
"step": 1356
},
{
"epoch": 1.9795918367346939,
"grad_norm": 0.08790906518697739,
"learning_rate": 5.348538096191109e-06,
"loss": 1.2132847309112549,
"step": 1358
},
{
"epoch": 1.9825072886297375,
"grad_norm": 0.3884468376636505,
"learning_rate": 5.328965680507991e-06,
"loss": 1.1513258218765259,
"step": 1360
},
{
"epoch": 1.9854227405247813,
"grad_norm": 0.24757881462574005,
"learning_rate": 5.309423436713714e-06,
"loss": 0.6811099052429199,
"step": 1362
},
{
"epoch": 1.9883381924198251,
"grad_norm": 0.0917486697435379,
"learning_rate": 5.289911566663626e-06,
"loss": 0.5249199271202087,
"step": 1364
},
{
"epoch": 1.991253644314869,
"grad_norm": 0.3590066432952881,
"learning_rate": 5.270430271899342e-06,
"loss": 1.1386462450027466,
"step": 1366
},
{
"epoch": 1.9941690962099126,
"grad_norm": 0.0781368613243103,
"learning_rate": 5.250979753646664e-06,
"loss": 1.0840882062911987,
"step": 1368
},
{
"epoch": 1.9970845481049562,
"grad_norm": 0.3470701277256012,
"learning_rate": 5.231560212813487e-06,
"loss": 1.0490968227386475,
"step": 1370
},
{
"epoch": 2.0,
"grad_norm": 0.13662609457969666,
"learning_rate": 5.212171849987743e-06,
"loss": 1.1986355781555176,
"step": 1372
},
{
"epoch": 2.002915451895044,
"grad_norm": 0.15793374180793762,
"learning_rate": 5.1928148654353196e-06,
"loss": 0.921393871307373,
"step": 1374
},
{
"epoch": 2.0058309037900877,
"grad_norm": 0.4891752600669861,
"learning_rate": 5.17348945909799e-06,
"loss": 0.9690005779266357,
"step": 1376
},
{
"epoch": 2.008746355685131,
"grad_norm": 0.2033310979604721,
"learning_rate": 5.1541958305913536e-06,
"loss": 1.3568806648254395,
"step": 1378
},
{
"epoch": 2.011661807580175,
"grad_norm": 0.1594112515449524,
"learning_rate": 5.134934179202771e-06,
"loss": 1.033390998840332,
"step": 1380
},
{
"epoch": 2.0145772594752187,
"grad_norm": 0.2081524133682251,
"learning_rate": 5.115704703889299e-06,
"loss": 1.0304166078567505,
"step": 1382
},
{
"epoch": 2.0174927113702625,
"grad_norm": 0.38243576884269714,
"learning_rate": 5.096507603275648e-06,
"loss": 0.9502314925193787,
"step": 1384
},
{
"epoch": 2.020408163265306,
"grad_norm": 0.06100543960928917,
"learning_rate": 5.077343075652124e-06,
"loss": 1.1048611402511597,
"step": 1386
},
{
"epoch": 2.0233236151603498,
"grad_norm": 0.386870414018631,
"learning_rate": 5.058211318972581e-06,
"loss": 1.2929866313934326,
"step": 1388
},
{
"epoch": 2.0262390670553936,
"grad_norm": 0.1502365618944168,
"learning_rate": 5.0391125308523744e-06,
"loss": 1.2062195539474487,
"step": 1390
},
{
"epoch": 2.0291545189504374,
"grad_norm": 0.46698620915412903,
"learning_rate": 5.020046908566317e-06,
"loss": 1.2675377130508423,
"step": 1392
},
{
"epoch": 2.0320699708454812,
"grad_norm": 0.2170051783323288,
"learning_rate": 5.001014649046655e-06,
"loss": 1.0185376405715942,
"step": 1394
},
{
"epoch": 2.0349854227405246,
"grad_norm": 0.5570895671844482,
"learning_rate": 4.98201594888102e-06,
"loss": 1.1238821744918823,
"step": 1396
},
{
"epoch": 2.0379008746355685,
"grad_norm": 0.19649037718772888,
"learning_rate": 4.963051004310397e-06,
"loss": 1.1577717065811157,
"step": 1398
},
{
"epoch": 2.0408163265306123,
"grad_norm": 0.3043438494205475,
"learning_rate": 4.944120011227115e-06,
"loss": 0.945805549621582,
"step": 1400
},
{
"epoch": 2.043731778425656,
"grad_norm": 0.8879981637001038,
"learning_rate": 4.925223165172808e-06,
"loss": 1.0322425365447998,
"step": 1402
},
{
"epoch": 2.0466472303206995,
"grad_norm": 0.26241424679756165,
"learning_rate": 4.906360661336394e-06,
"loss": 1.2149442434310913,
"step": 1404
},
{
"epoch": 2.0495626822157433,
"grad_norm": 0.8886216878890991,
"learning_rate": 4.887532694552066e-06,
"loss": 1.0274255275726318,
"step": 1406
},
{
"epoch": 2.052478134110787,
"grad_norm": 0.21257859468460083,
"learning_rate": 4.868739459297286e-06,
"loss": 1.1855621337890625,
"step": 1408
},
{
"epoch": 2.055393586005831,
"grad_norm": 0.14593669772148132,
"learning_rate": 4.8499811496907506e-06,
"loss": 0.7928017377853394,
"step": 1410
},
{
"epoch": 2.058309037900875,
"grad_norm": 0.06642908602952957,
"learning_rate": 4.831257959490425e-06,
"loss": 1.0738983154296875,
"step": 1412
},
{
"epoch": 2.061224489795918,
"grad_norm": 0.3109600841999054,
"learning_rate": 4.812570082091498e-06,
"loss": 0.8972907662391663,
"step": 1414
},
{
"epoch": 2.064139941690962,
"grad_norm": 0.13277745246887207,
"learning_rate": 4.793917710524422e-06,
"loss": 1.0650956630706787,
"step": 1416
},
{
"epoch": 2.067055393586006,
"grad_norm": 0.14433449506759644,
"learning_rate": 4.775301037452898e-06,
"loss": 1.1586172580718994,
"step": 1418
},
{
"epoch": 2.0699708454810497,
"grad_norm": 0.15220968425273895,
"learning_rate": 4.756720255171887e-06,
"loss": 0.5742167234420776,
"step": 1420
},
{
"epoch": 2.072886297376093,
"grad_norm": 0.126608744263649,
"learning_rate": 4.738175555605632e-06,
"loss": 1.242780327796936,
"step": 1422
},
{
"epoch": 2.075801749271137,
"grad_norm": 0.10246127843856812,
"learning_rate": 4.719667130305671e-06,
"loss": 0.9981814622879028,
"step": 1424
},
{
"epoch": 2.0787172011661808,
"grad_norm": 0.2460668534040451,
"learning_rate": 4.701195170448857e-06,
"loss": 0.8302922248840332,
"step": 1426
},
{
"epoch": 2.0816326530612246,
"grad_norm": 0.155581995844841,
"learning_rate": 4.682759866835388e-06,
"loss": 1.3268355131149292,
"step": 1428
},
{
"epoch": 2.0845481049562684,
"grad_norm": 0.10044138133525848,
"learning_rate": 4.664361409886829e-06,
"loss": 0.9983614087104797,
"step": 1430
},
{
"epoch": 2.087463556851312,
"grad_norm": 0.2085467278957367,
"learning_rate": 4.645999989644148e-06,
"loss": 1.1001629829406738,
"step": 1432
},
{
"epoch": 2.0903790087463556,
"grad_norm": 0.33730220794677734,
"learning_rate": 4.627675795765761e-06,
"loss": 1.3111716508865356,
"step": 1434
},
{
"epoch": 2.0932944606413995,
"grad_norm": 0.2143622636795044,
"learning_rate": 4.60938901752556e-06,
"loss": 0.8293286561965942,
"step": 1436
},
{
"epoch": 2.0962099125364433,
"grad_norm": 0.07966610789299011,
"learning_rate": 4.591139843810967e-06,
"loss": 1.1742640733718872,
"step": 1438
},
{
"epoch": 2.0991253644314867,
"grad_norm": 0.18288615345954895,
"learning_rate": 4.572928463120982e-06,
"loss": 1.1798888444900513,
"step": 1440
},
{
"epoch": 2.1020408163265305,
"grad_norm": 0.2549722194671631,
"learning_rate": 4.554755063564226e-06,
"loss": 1.0986790657043457,
"step": 1442
},
{
"epoch": 2.1049562682215743,
"grad_norm": 0.1803271621465683,
"learning_rate": 4.536619832857015e-06,
"loss": 1.0121634006500244,
"step": 1444
},
{
"epoch": 2.107871720116618,
"grad_norm": 0.33244436979293823,
"learning_rate": 4.518522958321409e-06,
"loss": 1.2030587196350098,
"step": 1446
},
{
"epoch": 2.110787172011662,
"grad_norm": 0.07119657844305038,
"learning_rate": 4.500464626883276e-06,
"loss": 0.6789675354957581,
"step": 1448
},
{
"epoch": 2.1137026239067054,
"grad_norm": 0.3919859230518341,
"learning_rate": 4.4824450250703755e-06,
"loss": 0.8600730895996094,
"step": 1450
},
{
"epoch": 2.116618075801749,
"grad_norm": 0.1530391424894333,
"learning_rate": 4.464464339010414e-06,
"loss": 0.9321385622024536,
"step": 1452
},
{
"epoch": 2.119533527696793,
"grad_norm": 0.12812215089797974,
"learning_rate": 4.446522754429127e-06,
"loss": 1.1020374298095703,
"step": 1454
},
{
"epoch": 2.122448979591837,
"grad_norm": 0.2687873840332031,
"learning_rate": 4.4286204566483715e-06,
"loss": 0.548167884349823,
"step": 1456
},
{
"epoch": 2.1253644314868803,
"grad_norm": 0.351572722196579,
"learning_rate": 4.410757630584204e-06,
"loss": 0.671511709690094,
"step": 1458
},
{
"epoch": 2.128279883381924,
"grad_norm": 0.3009466230869293,
"learning_rate": 4.392934460744958e-06,
"loss": 1.0809369087219238,
"step": 1460
},
{
"epoch": 2.131195335276968,
"grad_norm": 0.1647637039422989,
"learning_rate": 4.375151131229369e-06,
"loss": 1.0825597047805786,
"step": 1462
},
{
"epoch": 2.1341107871720117,
"grad_norm": 0.15290948748588562,
"learning_rate": 4.357407825724648e-06,
"loss": 1.132341742515564,
"step": 1464
},
{
"epoch": 2.1370262390670556,
"grad_norm": 0.30983132123947144,
"learning_rate": 4.339704727504581e-06,
"loss": 1.115373969078064,
"step": 1466
},
{
"epoch": 2.139941690962099,
"grad_norm": 0.1616809368133545,
"learning_rate": 4.32204201942766e-06,
"loss": 1.2571251392364502,
"step": 1468
},
{
"epoch": 2.142857142857143,
"grad_norm": 0.44996944069862366,
"learning_rate": 4.304419883935167e-06,
"loss": 0.7702177166938782,
"step": 1470
},
{
"epoch": 2.1457725947521866,
"grad_norm": 0.08497241884469986,
"learning_rate": 4.286838503049309e-06,
"loss": 1.0834498405456543,
"step": 1472
},
{
"epoch": 2.1486880466472305,
"grad_norm": 0.4060671925544739,
"learning_rate": 4.26929805837134e-06,
"loss": 1.1200850009918213,
"step": 1474
},
{
"epoch": 2.151603498542274,
"grad_norm": 0.17709168791770935,
"learning_rate": 4.2517987310796595e-06,
"loss": 1.1172959804534912,
"step": 1476
},
{
"epoch": 2.1545189504373177,
"grad_norm": 0.1522580236196518,
"learning_rate": 4.23434070192797e-06,
"loss": 1.168565034866333,
"step": 1478
},
{
"epoch": 2.1574344023323615,
"grad_norm": 0.1714070737361908,
"learning_rate": 4.216924151243395e-06,
"loss": 1.1115281581878662,
"step": 1480
},
{
"epoch": 2.1603498542274053,
"grad_norm": 0.13482044637203217,
"learning_rate": 4.199549258924615e-06,
"loss": 1.2671080827713013,
"step": 1482
},
{
"epoch": 2.163265306122449,
"grad_norm": 0.1459122747182846,
"learning_rate": 4.18221620444002e-06,
"loss": 1.172806739807129,
"step": 1484
},
{
"epoch": 2.1661807580174925,
"grad_norm": 0.08871738612651825,
"learning_rate": 4.1649251668258475e-06,
"loss": 1.045624852180481,
"step": 1486
},
{
"epoch": 2.1690962099125364,
"grad_norm": 0.3394921123981476,
"learning_rate": 4.147676324684335e-06,
"loss": 1.1889164447784424,
"step": 1488
},
{
"epoch": 2.17201166180758,
"grad_norm": 0.1473836749792099,
"learning_rate": 4.130469856181873e-06,
"loss": 1.079075813293457,
"step": 1490
},
{
"epoch": 2.174927113702624,
"grad_norm": 0.18347686529159546,
"learning_rate": 4.113305939047174e-06,
"loss": 1.2786171436309814,
"step": 1492
},
{
"epoch": 2.1778425655976674,
"grad_norm": 0.16250960528850555,
"learning_rate": 4.096184750569422e-06,
"loss": 0.677879273891449,
"step": 1494
},
{
"epoch": 2.1807580174927113,
"grad_norm": 0.383709192276001,
"learning_rate": 4.07910646759645e-06,
"loss": 0.6416628360748291,
"step": 1496
},
{
"epoch": 2.183673469387755,
"grad_norm": 0.07085460424423218,
"learning_rate": 4.062071266532916e-06,
"loss": 1.0884201526641846,
"step": 1498
},
{
"epoch": 2.186588921282799,
"grad_norm": 0.10339315980672836,
"learning_rate": 4.045079323338477e-06,
"loss": 0.8533938527107239,
"step": 1500
},
{
"epoch": 2.1895043731778427,
"grad_norm": 0.20028476417064667,
"learning_rate": 4.0281308135259705e-06,
"loss": 0.9680588841438293,
"step": 1502
},
{
"epoch": 2.192419825072886,
"grad_norm": 0.3516143560409546,
"learning_rate": 4.0112259121596e-06,
"loss": 0.7940521240234375,
"step": 1504
},
{
"epoch": 2.19533527696793,
"grad_norm": 0.10385473072528839,
"learning_rate": 3.994364793853135e-06,
"loss": 1.1375114917755127,
"step": 1506
},
{
"epoch": 2.198250728862974,
"grad_norm": 0.10895653814077377,
"learning_rate": 3.977547632768095e-06,
"loss": 1.1559362411499023,
"step": 1508
},
{
"epoch": 2.2011661807580176,
"grad_norm": 0.11289890855550766,
"learning_rate": 3.960774602611966e-06,
"loss": 1.1142271757125854,
"step": 1510
},
{
"epoch": 2.204081632653061,
"grad_norm": 0.11957119405269623,
"learning_rate": 3.94404587663639e-06,
"loss": 0.997885525226593,
"step": 1512
},
{
"epoch": 2.206997084548105,
"grad_norm": 0.1454574018716812,
"learning_rate": 3.9273616276353904e-06,
"loss": 0.6211732625961304,
"step": 1514
},
{
"epoch": 2.2099125364431487,
"grad_norm": 0.2732894718647003,
"learning_rate": 3.910722027943569e-06,
"loss": 0.7947649955749512,
"step": 1516
},
{
"epoch": 2.2128279883381925,
"grad_norm": 0.31755542755126953,
"learning_rate": 3.894127249434352e-06,
"loss": 0.9824427366256714,
"step": 1518
},
{
"epoch": 2.2157434402332363,
"grad_norm": 0.31029990315437317,
"learning_rate": 3.877577463518183e-06,
"loss": 1.0954536199569702,
"step": 1520
},
{
"epoch": 2.2186588921282797,
"grad_norm": 0.13882219791412354,
"learning_rate": 3.861072841140779e-06,
"loss": 1.1737290620803833,
"step": 1522
},
{
"epoch": 2.2215743440233235,
"grad_norm": 0.199194073677063,
"learning_rate": 3.8446135527813596e-06,
"loss": 1.2562403678894043,
"step": 1524
},
{
"epoch": 2.2244897959183674,
"grad_norm": 0.09712310880422592,
"learning_rate": 3.828199768450866e-06,
"loss": 0.887328028678894,
"step": 1526
},
{
"epoch": 2.227405247813411,
"grad_norm": 0.3643515110015869,
"learning_rate": 3.8118316576902345e-06,
"loss": 0.13481314480304718,
"step": 1528
},
{
"epoch": 2.2303206997084546,
"grad_norm": 0.4534083604812622,
"learning_rate": 3.7955093895686242e-06,
"loss": 1.0862985849380493,
"step": 1530
},
{
"epoch": 2.2332361516034984,
"grad_norm": 0.15879718959331512,
"learning_rate": 3.779233132681675e-06,
"loss": 1.045498013496399,
"step": 1532
},
{
"epoch": 2.2361516034985423,
"grad_norm": 0.18001393973827362,
"learning_rate": 3.7630030551497728e-06,
"loss": 1.1538960933685303,
"step": 1534
},
{
"epoch": 2.239067055393586,
"grad_norm": 0.08799666166305542,
"learning_rate": 3.746819324616308e-06,
"loss": 1.0975581407546997,
"step": 1536
},
{
"epoch": 2.24198250728863,
"grad_norm": 0.24161297082901,
"learning_rate": 3.730682108245944e-06,
"loss": 0.6484414339065552,
"step": 1538
},
{
"epoch": 2.2448979591836733,
"grad_norm": 0.08378497511148453,
"learning_rate": 3.714591572722891e-06,
"loss": 0.9581442475318909,
"step": 1540
},
{
"epoch": 2.247813411078717,
"grad_norm": 0.10033685714006424,
"learning_rate": 3.698547884249187e-06,
"loss": 0.6113779544830322,
"step": 1542
},
{
"epoch": 2.250728862973761,
"grad_norm": 0.275552362203598,
"learning_rate": 3.6825512085429703e-06,
"loss": 1.1037795543670654,
"step": 1544
},
{
"epoch": 2.253644314868805,
"grad_norm": 0.5268692374229431,
"learning_rate": 3.6666017108367837e-06,
"loss": 0.8392840027809143,
"step": 1546
},
{
"epoch": 2.256559766763848,
"grad_norm": 0.24270810186862946,
"learning_rate": 3.6506995558758586e-06,
"loss": 1.0857195854187012,
"step": 1548
},
{
"epoch": 2.259475218658892,
"grad_norm": 0.11209052801132202,
"learning_rate": 3.6348449079164116e-06,
"loss": 1.0408934354782104,
"step": 1550
},
{
"epoch": 2.262390670553936,
"grad_norm": 0.3595077097415924,
"learning_rate": 3.619037930723958e-06,
"loss": 0.41006362438201904,
"step": 1552
},
{
"epoch": 2.2653061224489797,
"grad_norm": 0.20681369304656982,
"learning_rate": 3.603278787571601e-06,
"loss": 1.08263099193573,
"step": 1554
},
{
"epoch": 2.2682215743440235,
"grad_norm": 0.1791142076253891,
"learning_rate": 3.587567641238369e-06,
"loss": 1.1789532899856567,
"step": 1556
},
{
"epoch": 2.271137026239067,
"grad_norm": 0.15824060142040253,
"learning_rate": 3.5719046540075155e-06,
"loss": 1.138330101966858,
"step": 1558
},
{
"epoch": 2.2740524781341107,
"grad_norm": 0.08995150774717331,
"learning_rate": 3.5562899876648556e-06,
"loss": 1.0861237049102783,
"step": 1560
},
{
"epoch": 2.2769679300291545,
"grad_norm": 0.20422294735908508,
"learning_rate": 3.540723803497084e-06,
"loss": 1.068771481513977,
"step": 1562
},
{
"epoch": 2.2798833819241984,
"grad_norm": 0.29918450117111206,
"learning_rate": 3.5252062622901196e-06,
"loss": 1.0257431268692017,
"step": 1564
},
{
"epoch": 2.2827988338192418,
"grad_norm": 0.2508153021335602,
"learning_rate": 3.5097375243274322e-06,
"loss": 0.7228989601135254,
"step": 1566
},
{
"epoch": 2.2857142857142856,
"grad_norm": 0.20312649011611938,
"learning_rate": 3.494317749388401e-06,
"loss": 0.9408363103866577,
"step": 1568
},
{
"epoch": 2.2886297376093294,
"grad_norm": 0.18280087411403656,
"learning_rate": 3.4789470967466528e-06,
"loss": 1.1609010696411133,
"step": 1570
},
{
"epoch": 2.2915451895043732,
"grad_norm": 0.4031111001968384,
"learning_rate": 3.4636257251684247e-06,
"loss": 1.1523736715316772,
"step": 1572
},
{
"epoch": 2.294460641399417,
"grad_norm": 0.14943495392799377,
"learning_rate": 3.4483537929109212e-06,
"loss": 1.0938516855239868,
"step": 1574
},
{
"epoch": 2.2973760932944605,
"grad_norm": 0.32287096977233887,
"learning_rate": 3.433131457720673e-06,
"loss": 0.8949427604675293,
"step": 1576
},
{
"epoch": 2.3002915451895043,
"grad_norm": 0.13816498219966888,
"learning_rate": 3.4179588768319194e-06,
"loss": 1.004232406616211,
"step": 1578
},
{
"epoch": 2.303206997084548,
"grad_norm": 0.17348824441432953,
"learning_rate": 3.4028362069649807e-06,
"loss": 1.1232084035873413,
"step": 1580
},
{
"epoch": 2.306122448979592,
"grad_norm": 0.2952488362789154,
"learning_rate": 3.387763604324628e-06,
"loss": 1.2846827507019043,
"step": 1582
},
{
"epoch": 2.3090379008746353,
"grad_norm": 0.0930081456899643,
"learning_rate": 3.3727412245984863e-06,
"loss": 1.0255701541900635,
"step": 1584
},
{
"epoch": 2.311953352769679,
"grad_norm": 0.19518348574638367,
"learning_rate": 3.3577692229554225e-06,
"loss": 0.9602378606796265,
"step": 1586
},
{
"epoch": 2.314868804664723,
"grad_norm": 0.08679629117250443,
"learning_rate": 3.3428477540439295e-06,
"loss": 1.0191975831985474,
"step": 1588
},
{
"epoch": 2.317784256559767,
"grad_norm": 0.07790417969226837,
"learning_rate": 3.3279769719905438e-06,
"loss": 1.1509268283843994,
"step": 1590
},
{
"epoch": 2.3206997084548107,
"grad_norm": 0.2912391126155853,
"learning_rate": 3.3131570303982517e-06,
"loss": 0.6687411665916443,
"step": 1592
},
{
"epoch": 2.323615160349854,
"grad_norm": 0.4317520260810852,
"learning_rate": 3.2983880823448896e-06,
"loss": 0.8183987736701965,
"step": 1594
},
{
"epoch": 2.326530612244898,
"grad_norm": 0.11885584890842438,
"learning_rate": 3.283670280381581e-06,
"loss": 1.1012320518493652,
"step": 1596
},
{
"epoch": 2.3294460641399417,
"grad_norm": 0.35252460837364197,
"learning_rate": 3.269003776531148e-06,
"loss": 0.9789476990699768,
"step": 1598
},
{
"epoch": 2.3323615160349855,
"grad_norm": 0.15434707701206207,
"learning_rate": 3.2543887222865496e-06,
"loss": 1.1043654680252075,
"step": 1600
},
{
"epoch": 2.335276967930029,
"grad_norm": 0.16315020620822906,
"learning_rate": 3.239825268609309e-06,
"loss": 1.0038485527038574,
"step": 1602
},
{
"epoch": 2.3381924198250728,
"grad_norm": 0.39029252529144287,
"learning_rate": 3.2253135659279558e-06,
"loss": 1.1852213144302368,
"step": 1604
},
{
"epoch": 2.3411078717201166,
"grad_norm": 0.2913620173931122,
"learning_rate": 3.2108537641364786e-06,
"loss": 0.45255744457244873,
"step": 1606
},
{
"epoch": 2.3440233236151604,
"grad_norm": 0.06582468003034592,
"learning_rate": 3.19644601259277e-06,
"loss": 1.269538402557373,
"step": 1608
},
{
"epoch": 2.3469387755102042,
"grad_norm": 0.5571786761283875,
"learning_rate": 3.1820904601170884e-06,
"loss": 0.8519521355628967,
"step": 1610
},
{
"epoch": 2.3498542274052476,
"grad_norm": 0.31546610593795776,
"learning_rate": 3.1677872549905154e-06,
"loss": 1.3262689113616943,
"step": 1612
},
{
"epoch": 2.3527696793002915,
"grad_norm": 0.09515654295682907,
"learning_rate": 3.153536544953433e-06,
"loss": 0.9249638319015503,
"step": 1614
},
{
"epoch": 2.3556851311953353,
"grad_norm": 0.15578609704971313,
"learning_rate": 3.139338477203983e-06,
"loss": 1.1823093891143799,
"step": 1616
},
{
"epoch": 2.358600583090379,
"grad_norm": 0.2227763533592224,
"learning_rate": 3.125193198396564e-06,
"loss": 1.2877289056777954,
"step": 1618
},
{
"epoch": 2.3615160349854225,
"grad_norm": 0.4745902121067047,
"learning_rate": 3.111100854640303e-06,
"loss": 0.9719488024711609,
"step": 1620
},
{
"epoch": 2.3644314868804663,
"grad_norm": 0.24592548608779907,
"learning_rate": 3.097061591497555e-06,
"loss": 1.0211539268493652,
"step": 1622
},
{
"epoch": 2.36734693877551,
"grad_norm": 0.21700948476791382,
"learning_rate": 3.0830755539823942e-06,
"loss": 0.9550508260726929,
"step": 1624
},
{
"epoch": 2.370262390670554,
"grad_norm": 0.20466458797454834,
"learning_rate": 3.0691428865591153e-06,
"loss": 0.5767884254455566,
"step": 1626
},
{
"epoch": 2.373177842565598,
"grad_norm": 0.14715692400932312,
"learning_rate": 3.0552637331407466e-06,
"loss": 0.894551694393158,
"step": 1628
},
{
"epoch": 2.376093294460641,
"grad_norm": 0.1368647813796997,
"learning_rate": 3.0414382370875628e-06,
"loss": 1.2126644849777222,
"step": 1630
},
{
"epoch": 2.379008746355685,
"grad_norm": 0.2084326297044754,
"learning_rate": 3.027666541205592e-06,
"loss": 1.1460554599761963,
"step": 1632
},
{
"epoch": 2.381924198250729,
"grad_norm": 0.12772594392299652,
"learning_rate": 3.013948787745166e-06,
"loss": 0.8425911664962769,
"step": 1634
},
{
"epoch": 2.3848396501457727,
"grad_norm": 0.21220910549163818,
"learning_rate": 3.000285118399425e-06,
"loss": 1.0760411024093628,
"step": 1636
},
{
"epoch": 2.387755102040816,
"grad_norm": 0.16325032711029053,
"learning_rate": 2.9866756743028644e-06,
"loss": 1.1195225715637207,
"step": 1638
},
{
"epoch": 2.39067055393586,
"grad_norm": 0.1648532897233963,
"learning_rate": 2.973120596029882e-06,
"loss": 1.0467681884765625,
"step": 1640
},
{
"epoch": 2.3935860058309038,
"grad_norm": 0.5487902164459229,
"learning_rate": 2.9596200235933215e-06,
"loss": 1.1597939729690552,
"step": 1642
},
{
"epoch": 2.3965014577259476,
"grad_norm": 0.15476688742637634,
"learning_rate": 2.9461740964430176e-06,
"loss": 1.0105078220367432,
"step": 1644
},
{
"epoch": 2.3994169096209914,
"grad_norm": 1.1137182712554932,
"learning_rate": 2.932782953464373e-06,
"loss": 1.0070343017578125,
"step": 1646
},
{
"epoch": 2.402332361516035,
"grad_norm": 0.3256247043609619,
"learning_rate": 2.9194467329769166e-06,
"loss": 0.9948145151138306,
"step": 1648
},
{
"epoch": 2.4052478134110786,
"grad_norm": 0.14843417704105377,
"learning_rate": 2.9061655727328617e-06,
"loss": 1.0339670181274414,
"step": 1650
},
{
"epoch": 2.4081632653061225,
"grad_norm": 0.14106328785419464,
"learning_rate": 2.8929396099157056e-06,
"loss": 1.149165391921997,
"step": 1652
},
{
"epoch": 2.4110787172011663,
"grad_norm": 0.1781884729862213,
"learning_rate": 2.8797689811387944e-06,
"loss": 0.9708322286605835,
"step": 1654
},
{
"epoch": 2.4139941690962097,
"grad_norm": 0.16324618458747864,
"learning_rate": 2.8666538224439207e-06,
"loss": 0.9147579669952393,
"step": 1656
},
{
"epoch": 2.4169096209912535,
"grad_norm": 0.10199990123510361,
"learning_rate": 2.853594269299919e-06,
"loss": 1.1740384101867676,
"step": 1658
},
{
"epoch": 2.4198250728862973,
"grad_norm": 0.36128106713294983,
"learning_rate": 2.8405904566012634e-06,
"loss": 0.9795001149177551,
"step": 1660
},
{
"epoch": 2.422740524781341,
"grad_norm": 0.11705031245946884,
"learning_rate": 2.827642518666673e-06,
"loss": 1.0222880840301514,
"step": 1662
},
{
"epoch": 2.425655976676385,
"grad_norm": 0.19340762495994568,
"learning_rate": 2.814750589237729e-06,
"loss": 1.0553447008132935,
"step": 1664
},
{
"epoch": 2.4285714285714284,
"grad_norm": 0.09246297180652618,
"learning_rate": 2.8019148014774856e-06,
"loss": 1.0741846561431885,
"step": 1666
},
{
"epoch": 2.431486880466472,
"grad_norm": 0.23843225836753845,
"learning_rate": 2.789135287969106e-06,
"loss": 1.1993522644042969,
"step": 1668
},
{
"epoch": 2.434402332361516,
"grad_norm": 0.7431137561798096,
"learning_rate": 2.7764121807144815e-06,
"loss": 0.42419517040252686,
"step": 1670
},
{
"epoch": 2.43731778425656,
"grad_norm": 0.11922803521156311,
"learning_rate": 2.7637456111328773e-06,
"loss": 1.0701881647109985,
"step": 1672
},
{
"epoch": 2.4402332361516033,
"grad_norm": 0.238107368350029,
"learning_rate": 2.7511357100595675e-06,
"loss": 1.0204083919525146,
"step": 1674
},
{
"epoch": 2.443148688046647,
"grad_norm": 0.18065865337848663,
"learning_rate": 2.738582607744491e-06,
"loss": 1.1767973899841309,
"step": 1676
},
{
"epoch": 2.446064139941691,
"grad_norm": 0.6328040361404419,
"learning_rate": 2.7260864338508944e-06,
"loss": 1.2465075254440308,
"step": 1678
},
{
"epoch": 2.4489795918367347,
"grad_norm": 0.32334592938423157,
"learning_rate": 2.71364731745401e-06,
"loss": 0.9165597558021545,
"step": 1680
},
{
"epoch": 2.4518950437317786,
"grad_norm": 0.29830703139305115,
"learning_rate": 2.701265387039703e-06,
"loss": 1.0425974130630493,
"step": 1682
},
{
"epoch": 2.454810495626822,
"grad_norm": 0.09913703799247742,
"learning_rate": 2.688940770503163e-06,
"loss": 1.1421351432800293,
"step": 1684
},
{
"epoch": 2.457725947521866,
"grad_norm": 0.19002677500247955,
"learning_rate": 2.676673595147574e-06,
"loss": 1.14607572555542,
"step": 1686
},
{
"epoch": 2.4606413994169096,
"grad_norm": 0.17399148643016815,
"learning_rate": 2.6644639876827903e-06,
"loss": 1.0854803323745728,
"step": 1688
},
{
"epoch": 2.4635568513119535,
"grad_norm": 0.18045774102210999,
"learning_rate": 2.6523120742240457e-06,
"loss": 1.156597375869751,
"step": 1690
},
{
"epoch": 2.466472303206997,
"grad_norm": 0.36970221996307373,
"learning_rate": 2.6402179802906417e-06,
"loss": 1.1326744556427002,
"step": 1692
},
{
"epoch": 2.4693877551020407,
"grad_norm": 0.16106556355953217,
"learning_rate": 2.6281818308046466e-06,
"loss": 1.1174097061157227,
"step": 1694
},
{
"epoch": 2.4723032069970845,
"grad_norm": 0.23179616034030914,
"learning_rate": 2.6162037500896134e-06,
"loss": 1.247542381286621,
"step": 1696
},
{
"epoch": 2.4752186588921283,
"grad_norm": 0.20750805735588074,
"learning_rate": 2.6042838618692964e-06,
"loss": 1.120650291442871,
"step": 1698
},
{
"epoch": 2.478134110787172,
"grad_norm": 0.4005797207355499,
"learning_rate": 2.5924222892663607e-06,
"loss": 1.1234309673309326,
"step": 1700
},
{
"epoch": 2.481049562682216,
"grad_norm": 0.11094089597463608,
"learning_rate": 2.580619154801124e-06,
"loss": 1.0382579565048218,
"step": 1702
},
{
"epoch": 2.4839650145772594,
"grad_norm": 0.1598607450723648,
"learning_rate": 2.5688745803902863e-06,
"loss": 0.8054310083389282,
"step": 1704
},
{
"epoch": 2.486880466472303,
"grad_norm": 0.29358312487602234,
"learning_rate": 2.557188687345666e-06,
"loss": 1.2227270603179932,
"step": 1706
},
{
"epoch": 2.489795918367347,
"grad_norm": 0.10478518158197403,
"learning_rate": 2.545561596372957e-06,
"loss": 1.0256011486053467,
"step": 1708
},
{
"epoch": 2.4927113702623904,
"grad_norm": 0.19069114327430725,
"learning_rate": 2.533993427570471e-06,
"loss": 1.003487467765808,
"step": 1710
},
{
"epoch": 2.4956268221574343,
"grad_norm": 0.19944234192371368,
"learning_rate": 2.522484300427905e-06,
"loss": 1.1340402364730835,
"step": 1712
},
{
"epoch": 2.498542274052478,
"grad_norm": 0.206906259059906,
"learning_rate": 2.5110343338251055e-06,
"loss": 0.7293667793273926,
"step": 1714
},
{
"epoch": 2.501457725947522,
"grad_norm": 0.22807729244232178,
"learning_rate": 2.499643646030833e-06,
"loss": 0.6911664009094238,
"step": 1716
},
{
"epoch": 2.5043731778425657,
"grad_norm": 0.12783202528953552,
"learning_rate": 2.488312354701552e-06,
"loss": 1.0861356258392334,
"step": 1718
},
{
"epoch": 2.5072886297376096,
"grad_norm": 0.24884046614170074,
"learning_rate": 2.4770405768802087e-06,
"loss": 1.2009036540985107,
"step": 1720
},
{
"epoch": 2.510204081632653,
"grad_norm": 0.19883911311626434,
"learning_rate": 2.4658284289950235e-06,
"loss": 1.171090006828308,
"step": 1722
},
{
"epoch": 2.513119533527697,
"grad_norm": 0.2198370397090912,
"learning_rate": 2.454676026858288e-06,
"loss": 0.6773008704185486,
"step": 1724
},
{
"epoch": 2.5160349854227406,
"grad_norm": 0.3970673084259033,
"learning_rate": 2.443583485665172e-06,
"loss": 0.9177547693252563,
"step": 1726
},
{
"epoch": 2.518950437317784,
"grad_norm": 0.14196209609508514,
"learning_rate": 2.432550919992524e-06,
"loss": 1.0238224267959595,
"step": 1728
},
{
"epoch": 2.521865889212828,
"grad_norm": 0.08479610830545425,
"learning_rate": 2.4215784437977023e-06,
"loss": 1.0351308584213257,
"step": 1730
},
{
"epoch": 2.5247813411078717,
"grad_norm": 0.2791972756385803,
"learning_rate": 2.4106661704173856e-06,
"loss": 1.2357579469680786,
"step": 1732
},
{
"epoch": 2.5276967930029155,
"grad_norm": 0.300520658493042,
"learning_rate": 2.3998142125664094e-06,
"loss": 0.9955886602401733,
"step": 1734
},
{
"epoch": 2.5306122448979593,
"grad_norm": 0.07155195623636246,
"learning_rate": 2.3890226823365984e-06,
"loss": 0.9533568024635315,
"step": 1736
},
{
"epoch": 2.533527696793003,
"grad_norm": 0.37421008944511414,
"learning_rate": 2.3782916911956072e-06,
"loss": 0.7588440179824829,
"step": 1738
},
{
"epoch": 2.5364431486880465,
"grad_norm": 0.21846982836723328,
"learning_rate": 2.3676213499857742e-06,
"loss": 1.0482406616210938,
"step": 1740
},
{
"epoch": 2.5393586005830904,
"grad_norm": 0.22150775790214539,
"learning_rate": 2.357011768922975e-06,
"loss": 0.9425265789031982,
"step": 1742
},
{
"epoch": 2.542274052478134,
"grad_norm": 0.0946943610906601,
"learning_rate": 2.3464630575954748e-06,
"loss": 1.0236523151397705,
"step": 1744
},
{
"epoch": 2.5451895043731776,
"grad_norm": 0.2336379438638687,
"learning_rate": 2.3359753249628156e-06,
"loss": 0.9605098962783813,
"step": 1746
},
{
"epoch": 2.5481049562682214,
"grad_norm": 0.38517579436302185,
"learning_rate": 2.3255486793546735e-06,
"loss": 0.7055401802062988,
"step": 1748
},
{
"epoch": 2.5510204081632653,
"grad_norm": 0.22488614916801453,
"learning_rate": 2.3151832284697437e-06,
"loss": 1.3222585916519165,
"step": 1750
},
{
"epoch": 2.553935860058309,
"grad_norm": 0.14808881282806396,
"learning_rate": 2.304879079374634e-06,
"loss": 0.6318288445472717,
"step": 1752
},
{
"epoch": 2.556851311953353,
"grad_norm": 0.12122584134340286,
"learning_rate": 2.2946363385027555e-06,
"loss": 1.0979853868484497,
"step": 1754
},
{
"epoch": 2.5597667638483967,
"grad_norm": 0.17218822240829468,
"learning_rate": 2.2844551116532164e-06,
"loss": 1.1333314180374146,
"step": 1756
},
{
"epoch": 2.56268221574344,
"grad_norm": 0.2076103240251541,
"learning_rate": 2.274335503989743e-06,
"loss": 1.1102957725524902,
"step": 1758
},
{
"epoch": 2.565597667638484,
"grad_norm": 0.3147886395454407,
"learning_rate": 2.2642776200395825e-06,
"loss": 1.0110862255096436,
"step": 1760
},
{
"epoch": 2.568513119533528,
"grad_norm": 0.199388787150383,
"learning_rate": 2.2542815636924273e-06,
"loss": 1.1791144609451294,
"step": 1762
},
{
"epoch": 2.571428571428571,
"grad_norm": 0.14399054646492004,
"learning_rate": 2.2443474381993418e-06,
"loss": 0.6136134266853333,
"step": 1764
},
{
"epoch": 2.574344023323615,
"grad_norm": 0.12786594033241272,
"learning_rate": 2.2344753461716924e-06,
"loss": 1.169732928276062,
"step": 1766
},
{
"epoch": 2.577259475218659,
"grad_norm": 0.42270779609680176,
"learning_rate": 2.2246653895800945e-06,
"loss": 1.167303442955017,
"step": 1768
},
{
"epoch": 2.5801749271137027,
"grad_norm": 0.3366575539112091,
"learning_rate": 2.2149176697533547e-06,
"loss": 0.7395915985107422,
"step": 1770
},
{
"epoch": 2.5830903790087465,
"grad_norm": 0.11204802244901657,
"learning_rate": 2.2052322873774243e-06,
"loss": 1.130765676498413,
"step": 1772
},
{
"epoch": 2.5860058309037903,
"grad_norm": 0.40100663900375366,
"learning_rate": 2.195609342494358e-06,
"loss": 0.9160555601119995,
"step": 1774
},
{
"epoch": 2.5889212827988337,
"grad_norm": 0.3878629505634308,
"learning_rate": 2.1860489345012882e-06,
"loss": 1.1737711429595947,
"step": 1776
},
{
"epoch": 2.5918367346938775,
"grad_norm": 0.2504361569881439,
"learning_rate": 2.1765511621493837e-06,
"loss": 1.1497868299484253,
"step": 1778
},
{
"epoch": 2.5947521865889214,
"grad_norm": 0.399038165807724,
"learning_rate": 2.1671161235428466e-06,
"loss": 1.0515235662460327,
"step": 1780
},
{
"epoch": 2.5976676384839648,
"grad_norm": 0.18093329668045044,
"learning_rate": 2.1577439161378857e-06,
"loss": 1.0114405155181885,
"step": 1782
},
{
"epoch": 2.6005830903790086,
"grad_norm": 0.20376266539096832,
"learning_rate": 2.1484346367417174e-06,
"loss": 1.1349772214889526,
"step": 1784
},
{
"epoch": 2.6034985422740524,
"grad_norm": 0.12697869539260864,
"learning_rate": 2.139188381511565e-06,
"loss": 1.0220611095428467,
"step": 1786
},
{
"epoch": 2.6064139941690962,
"grad_norm": 0.17522640526294708,
"learning_rate": 2.1300052459536577e-06,
"loss": 1.04948890209198,
"step": 1788
},
{
"epoch": 2.60932944606414,
"grad_norm": 0.33081164956092834,
"learning_rate": 2.120885324922257e-06,
"loss": 1.067612648010254,
"step": 1790
},
{
"epoch": 2.612244897959184,
"grad_norm": 0.19511879980564117,
"learning_rate": 2.1118287126186663e-06,
"loss": 1.1198432445526123,
"step": 1792
},
{
"epoch": 2.6151603498542273,
"grad_norm": 0.12612418830394745,
"learning_rate": 2.102835502590264e-06,
"loss": 0.9212133884429932,
"step": 1794
},
{
"epoch": 2.618075801749271,
"grad_norm": 1.4945578575134277,
"learning_rate": 2.0939057877295337e-06,
"loss": 0.9755832552909851,
"step": 1796
},
{
"epoch": 2.620991253644315,
"grad_norm": 0.11096255481243134,
"learning_rate": 2.085039660273107e-06,
"loss": 0.8870418071746826,
"step": 1798
},
{
"epoch": 2.6239067055393583,
"grad_norm": 0.16551688313484192,
"learning_rate": 2.076237211800807e-06,
"loss": 1.1013219356536865,
"step": 1800
},
{
"epoch": 2.626822157434402,
"grad_norm": 0.12267225235700607,
"learning_rate": 2.067498533234708e-06,
"loss": 1.1636854410171509,
"step": 1802
},
{
"epoch": 2.629737609329446,
"grad_norm": 0.21022585034370422,
"learning_rate": 2.0588237148381937e-06,
"loss": 1.0870646238327026,
"step": 1804
},
{
"epoch": 2.63265306122449,
"grad_norm": 0.12315444648265839,
"learning_rate": 2.05021284621502e-06,
"loss": 1.0031044483184814,
"step": 1806
},
{
"epoch": 2.6355685131195337,
"grad_norm": 0.08722248673439026,
"learning_rate": 2.0416660163084007e-06,
"loss": 1.1768810749053955,
"step": 1808
},
{
"epoch": 2.6384839650145775,
"grad_norm": 0.14608271420001984,
"learning_rate": 2.0331833134000806e-06,
"loss": 1.1812292337417603,
"step": 1810
},
{
"epoch": 2.641399416909621,
"grad_norm": 0.12209862470626831,
"learning_rate": 2.0247648251094187e-06,
"loss": 0.5496333241462708,
"step": 1812
},
{
"epoch": 2.6443148688046647,
"grad_norm": 0.14420591294765472,
"learning_rate": 2.0164106383924995e-06,
"loss": 1.0734022855758667,
"step": 1814
},
{
"epoch": 2.6472303206997085,
"grad_norm": 0.34557104110717773,
"learning_rate": 2.008120839541217e-06,
"loss": 0.8214896321296692,
"step": 1816
},
{
"epoch": 2.650145772594752,
"grad_norm": 0.19864369928836823,
"learning_rate": 1.9998955141823947e-06,
"loss": 1.1074302196502686,
"step": 1818
},
{
"epoch": 2.6530612244897958,
"grad_norm": 0.1151181161403656,
"learning_rate": 1.9917347472768996e-06,
"loss": 1.1880613565444946,
"step": 1820
},
{
"epoch": 2.6559766763848396,
"grad_norm": 0.3938349783420563,
"learning_rate": 1.983638623118759e-06,
"loss": 0.8221843242645264,
"step": 1822
},
{
"epoch": 2.6588921282798834,
"grad_norm": 0.4980735182762146,
"learning_rate": 1.9756072253342956e-06,
"loss": 1.0243555307388306,
"step": 1824
},
{
"epoch": 2.6618075801749272,
"grad_norm": 0.2903914451599121,
"learning_rate": 1.967640636881263e-06,
"loss": 1.1823608875274658,
"step": 1826
},
{
"epoch": 2.664723032069971,
"grad_norm": 0.1528269499540329,
"learning_rate": 1.9597389400479843e-06,
"loss": 1.1882878541946411,
"step": 1828
},
{
"epoch": 2.6676384839650145,
"grad_norm": 0.37738537788391113,
"learning_rate": 1.9519022164525086e-06,
"loss": 0.8332970142364502,
"step": 1830
},
{
"epoch": 2.6705539358600583,
"grad_norm": 0.10077593475580215,
"learning_rate": 1.9441305470417622e-06,
"loss": 1.1155685186386108,
"step": 1832
},
{
"epoch": 2.673469387755102,
"grad_norm": 0.24888084828853607,
"learning_rate": 1.936424012090716e-06,
"loss": 1.0899043083190918,
"step": 1834
},
{
"epoch": 2.6763848396501455,
"grad_norm": 0.3049887418746948,
"learning_rate": 1.9287826912015588e-06,
"loss": 1.3089343309402466,
"step": 1836
},
{
"epoch": 2.6793002915451893,
"grad_norm": 0.15812550485134125,
"learning_rate": 1.9212066633028635e-06,
"loss": 1.0993826389312744,
"step": 1838
},
{
"epoch": 2.682215743440233,
"grad_norm": 0.265886515378952,
"learning_rate": 1.9136960066487884e-06,
"loss": 1.0602340698242188,
"step": 1840
},
{
"epoch": 2.685131195335277,
"grad_norm": 0.8439386487007141,
"learning_rate": 1.9062507988182545e-06,
"loss": 1.0067952871322632,
"step": 1842
},
{
"epoch": 2.688046647230321,
"grad_norm": 0.45330727100372314,
"learning_rate": 1.8988711167141542e-06,
"loss": 0.5957139134407043,
"step": 1844
},
{
"epoch": 2.6909620991253647,
"grad_norm": 0.14824670553207397,
"learning_rate": 1.8915570365625508e-06,
"loss": 1.1712740659713745,
"step": 1846
},
{
"epoch": 2.693877551020408,
"grad_norm": 0.10511742532253265,
"learning_rate": 1.8843086339118943e-06,
"loss": 1.0602518320083618,
"step": 1848
},
{
"epoch": 2.696793002915452,
"grad_norm": 0.07894819229841232,
"learning_rate": 1.8771259836322376e-06,
"loss": 1.014635682106018,
"step": 1850
},
{
"epoch": 2.6997084548104957,
"grad_norm": 0.10334635525941849,
"learning_rate": 1.8700091599144688e-06,
"loss": 1.0106903314590454,
"step": 1852
},
{
"epoch": 2.702623906705539,
"grad_norm": 0.30136221647262573,
"learning_rate": 1.8629582362695395e-06,
"loss": 0.673401951789856,
"step": 1854
},
{
"epoch": 2.705539358600583,
"grad_norm": 0.5134400129318237,
"learning_rate": 1.8559732855277067e-06,
"loss": 1.1158447265625,
"step": 1856
},
{
"epoch": 2.7084548104956268,
"grad_norm": 0.35808032751083374,
"learning_rate": 1.8490543798377848e-06,
"loss": 1.2872017621994019,
"step": 1858
},
{
"epoch": 2.7113702623906706,
"grad_norm": 0.04801107197999954,
"learning_rate": 1.8422015906663964e-06,
"loss": 0.932016909122467,
"step": 1860
},
{
"epoch": 2.7142857142857144,
"grad_norm": 0.34277820587158203,
"learning_rate": 1.8354149887972297e-06,
"loss": 0.6936520338058472,
"step": 1862
},
{
"epoch": 2.7172011661807582,
"grad_norm": 0.16731053590774536,
"learning_rate": 1.8286946443303187e-06,
"loss": 1.1427615880966187,
"step": 1864
},
{
"epoch": 2.7201166180758016,
"grad_norm": 0.8489914536476135,
"learning_rate": 1.822040626681308e-06,
"loss": 1.0948349237442017,
"step": 1866
},
{
"epoch": 2.7230320699708455,
"grad_norm": 0.41851627826690674,
"learning_rate": 1.8154530045807438e-06,
"loss": 1.157147765159607,
"step": 1868
},
{
"epoch": 2.7259475218658893,
"grad_norm": 0.09261982142925262,
"learning_rate": 1.808931846073361e-06,
"loss": 1.0182065963745117,
"step": 1870
},
{
"epoch": 2.7288629737609327,
"grad_norm": 0.07328807562589645,
"learning_rate": 1.8024772185173758e-06,
"loss": 0.9535019397735596,
"step": 1872
},
{
"epoch": 2.7317784256559765,
"grad_norm": 0.3953118324279785,
"learning_rate": 1.7960891885837988e-06,
"loss": 0.5561579465866089,
"step": 1874
},
{
"epoch": 2.7346938775510203,
"grad_norm": 0.7391979694366455,
"learning_rate": 1.7897678222557402e-06,
"loss": 0.9951037764549255,
"step": 1876
},
{
"epoch": 2.737609329446064,
"grad_norm": 0.16622287034988403,
"learning_rate": 1.7835131848277288e-06,
"loss": 1.129691243171692,
"step": 1878
},
{
"epoch": 2.740524781341108,
"grad_norm": 0.08795658499002457,
"learning_rate": 1.7773253409050398e-06,
"loss": 0.9720866680145264,
"step": 1880
},
{
"epoch": 2.743440233236152,
"grad_norm": 0.10475818812847137,
"learning_rate": 1.7712043544030265e-06,
"loss": 0.9624143242835999,
"step": 1882
},
{
"epoch": 2.746355685131195,
"grad_norm": 0.5169785618782043,
"learning_rate": 1.7651502885464582e-06,
"loss": 0.7830743789672852,
"step": 1884
},
{
"epoch": 2.749271137026239,
"grad_norm": 0.06864479184150696,
"learning_rate": 1.7591632058688719e-06,
"loss": 1.1376532316207886,
"step": 1886
},
{
"epoch": 2.752186588921283,
"grad_norm": 4.637813091278076,
"learning_rate": 1.7532431682119205e-06,
"loss": 0.8696690797805786,
"step": 1888
},
{
"epoch": 2.7551020408163263,
"grad_norm": 0.15929657220840454,
"learning_rate": 1.7473902367247361e-06,
"loss": 1.1236258745193481,
"step": 1890
},
{
"epoch": 2.75801749271137,
"grad_norm": 0.3590356707572937,
"learning_rate": 1.7416044718633025e-06,
"loss": 0.8365395665168762,
"step": 1892
},
{
"epoch": 2.760932944606414,
"grad_norm": 0.1510230451822281,
"learning_rate": 1.735885933389825e-06,
"loss": 0.6292239427566528,
"step": 1894
},
{
"epoch": 2.7638483965014577,
"grad_norm": 0.18348506093025208,
"learning_rate": 1.730234680372116e-06,
"loss": 1.1290793418884277,
"step": 1896
},
{
"epoch": 2.7667638483965016,
"grad_norm": 0.16462060809135437,
"learning_rate": 1.7246507711829852e-06,
"loss": 1.1606987714767456,
"step": 1898
},
{
"epoch": 2.7696793002915454,
"grad_norm": 0.16783565282821655,
"learning_rate": 1.719134263499633e-06,
"loss": 0.9577206373214722,
"step": 1900
},
{
"epoch": 2.772594752186589,
"grad_norm": 0.08972535282373428,
"learning_rate": 1.7136852143030605e-06,
"loss": 0.9086419343948364,
"step": 1902
},
{
"epoch": 2.7755102040816326,
"grad_norm": 0.25966984033584595,
"learning_rate": 1.7083036798774771e-06,
"loss": 1.16250479221344,
"step": 1904
},
{
"epoch": 2.7784256559766765,
"grad_norm": 0.14714005589485168,
"learning_rate": 1.7029897158097191e-06,
"loss": 0.6218932867050171,
"step": 1906
},
{
"epoch": 2.78134110787172,
"grad_norm": 0.1505810022354126,
"learning_rate": 1.6977433769886777e-06,
"loss": 0.9435967206954956,
"step": 1908
},
{
"epoch": 2.7842565597667637,
"grad_norm": 0.5554741621017456,
"learning_rate": 1.6925647176047304e-06,
"loss": 1.2954356670379639,
"step": 1910
},
{
"epoch": 2.7871720116618075,
"grad_norm": 0.7726877331733704,
"learning_rate": 1.6874537911491804e-06,
"loss": 1.100317120552063,
"step": 1912
},
{
"epoch": 2.7900874635568513,
"grad_norm": 0.1900632381439209,
"learning_rate": 1.682410650413707e-06,
"loss": 1.1734505891799927,
"step": 1914
},
{
"epoch": 2.793002915451895,
"grad_norm": 0.2996356189250946,
"learning_rate": 1.6774353474898176e-06,
"loss": 0.6496275067329407,
"step": 1916
},
{
"epoch": 2.795918367346939,
"grad_norm": 0.28916487097740173,
"learning_rate": 1.6725279337683096e-06,
"loss": 0.8404643535614014,
"step": 1918
},
{
"epoch": 2.7988338192419824,
"grad_norm": 0.30399462580680847,
"learning_rate": 1.6676884599387447e-06,
"loss": 0.8097843527793884,
"step": 1920
},
{
"epoch": 2.801749271137026,
"grad_norm": 0.15744291245937347,
"learning_rate": 1.6629169759889167e-06,
"loss": 1.1007176637649536,
"step": 1922
},
{
"epoch": 2.80466472303207,
"grad_norm": 0.22451713681221008,
"learning_rate": 1.6582135312043415e-06,
"loss": 1.1043728590011597,
"step": 1924
},
{
"epoch": 2.8075801749271134,
"grad_norm": 0.16485294699668884,
"learning_rate": 1.6535781741677468e-06,
"loss": 1.1978418827056885,
"step": 1926
},
{
"epoch": 2.8104956268221573,
"grad_norm": 0.11872020363807678,
"learning_rate": 1.6490109527585685e-06,
"loss": 1.0319398641586304,
"step": 1928
},
{
"epoch": 2.813411078717201,
"grad_norm": 0.22041387856006622,
"learning_rate": 1.6445119141524586e-06,
"loss": 1.0383124351501465,
"step": 1930
},
{
"epoch": 2.816326530612245,
"grad_norm": 0.1371716856956482,
"learning_rate": 1.6400811048207957e-06,
"loss": 1.0704172849655151,
"step": 1932
},
{
"epoch": 2.8192419825072887,
"grad_norm": 0.33869630098342896,
"learning_rate": 1.6357185705302059e-06,
"loss": 0.9032880663871765,
"step": 1934
},
{
"epoch": 2.8221574344023326,
"grad_norm": 0.19506464898586273,
"learning_rate": 1.6314243563420908e-06,
"loss": 1.1649752855300903,
"step": 1936
},
{
"epoch": 2.825072886297376,
"grad_norm": 0.16767188906669617,
"learning_rate": 1.627198506612162e-06,
"loss": 1.197486162185669,
"step": 1938
},
{
"epoch": 2.82798833819242,
"grad_norm": 0.17042168974876404,
"learning_rate": 1.62304106498998e-06,
"loss": 1.065731167793274,
"step": 1940
},
{
"epoch": 2.8309037900874636,
"grad_norm": 0.25560781359672546,
"learning_rate": 1.6189520744185072e-06,
"loss": 0.9224144220352173,
"step": 1942
},
{
"epoch": 2.8338192419825075,
"grad_norm": 0.20863035321235657,
"learning_rate": 1.614931577133663e-06,
"loss": 1.0565248727798462,
"step": 1944
},
{
"epoch": 2.836734693877551,
"grad_norm": 0.19189637899398804,
"learning_rate": 1.6109796146638871e-06,
"loss": 1.232025384902954,
"step": 1946
},
{
"epoch": 2.8396501457725947,
"grad_norm": 0.6458204984664917,
"learning_rate": 1.6070962278297113e-06,
"loss": 1.0065245628356934,
"step": 1948
},
{
"epoch": 2.8425655976676385,
"grad_norm": 0.3259865939617157,
"learning_rate": 1.6032814567433348e-06,
"loss": 1.2361031770706177,
"step": 1950
},
{
"epoch": 2.8454810495626823,
"grad_norm": 0.4714111089706421,
"learning_rate": 1.5995353408082157e-06,
"loss": 1.3339447975158691,
"step": 1952
},
{
"epoch": 2.848396501457726,
"grad_norm": 0.16928227245807648,
"learning_rate": 1.5958579187186582e-06,
"loss": 1.0442076921463013,
"step": 1954
},
{
"epoch": 2.8513119533527695,
"grad_norm": 0.3731814920902252,
"learning_rate": 1.5922492284594174e-06,
"loss": 0.878253698348999,
"step": 1956
},
{
"epoch": 2.8542274052478134,
"grad_norm": 0.6527604460716248,
"learning_rate": 1.5887093073053036e-06,
"loss": 1.0772031545639038,
"step": 1958
},
{
"epoch": 2.857142857142857,
"grad_norm": 0.18542839586734772,
"learning_rate": 1.5852381918207995e-06,
"loss": 1.116060733795166,
"step": 1960
},
{
"epoch": 2.860058309037901,
"grad_norm": 0.250535786151886,
"learning_rate": 1.5818359178596806e-06,
"loss": 1.1924026012420654,
"step": 1962
},
{
"epoch": 2.8629737609329444,
"grad_norm": 0.07601413875818253,
"learning_rate": 1.5785025205646468e-06,
"loss": 0.9614888429641724,
"step": 1964
},
{
"epoch": 2.8658892128279883,
"grad_norm": 0.17522846162319183,
"learning_rate": 1.5752380343669574e-06,
"loss": 1.0021862983703613,
"step": 1966
},
{
"epoch": 2.868804664723032,
"grad_norm": 0.22332464158535004,
"learning_rate": 1.5720424929860793e-06,
"loss": 1.0522475242614746,
"step": 1968
},
{
"epoch": 2.871720116618076,
"grad_norm": 0.39566364884376526,
"learning_rate": 1.5689159294293333e-06,
"loss": 1.0991871356964111,
"step": 1970
},
{
"epoch": 2.8746355685131197,
"grad_norm": 0.3006777763366699,
"learning_rate": 1.5658583759915563e-06,
"loss": 1.068638801574707,
"step": 1972
},
{
"epoch": 2.877551020408163,
"grad_norm": 0.18835684657096863,
"learning_rate": 1.5628698642547674e-06,
"loss": 1.0682188272476196,
"step": 1974
},
{
"epoch": 2.880466472303207,
"grad_norm": 0.13527542352676392,
"learning_rate": 1.5599504250878434e-06,
"loss": 1.0796337127685547,
"step": 1976
},
{
"epoch": 2.883381924198251,
"grad_norm": 0.2289610654115677,
"learning_rate": 1.5571000886461946e-06,
"loss": 1.1682178974151611,
"step": 1978
},
{
"epoch": 2.8862973760932946,
"grad_norm": 0.3208562731742859,
"learning_rate": 1.5543188843714597e-06,
"loss": 0.6415768265724182,
"step": 1980
},
{
"epoch": 2.889212827988338,
"grad_norm": 0.2707623541355133,
"learning_rate": 1.551606840991198e-06,
"loss": 0.5584684014320374,
"step": 1982
},
{
"epoch": 2.892128279883382,
"grad_norm": 0.24681639671325684,
"learning_rate": 1.5489639865185929e-06,
"loss": 0.9024500846862793,
"step": 1984
},
{
"epoch": 2.8950437317784257,
"grad_norm": 0.2885083556175232,
"learning_rate": 1.5463903482521637e-06,
"loss": 1.0408830642700195,
"step": 1986
},
{
"epoch": 2.8979591836734695,
"grad_norm": 0.2863474190235138,
"learning_rate": 1.543885952775484e-06,
"loss": 0.5923194289207458,
"step": 1988
},
{
"epoch": 2.9008746355685133,
"grad_norm": 0.13149987161159515,
"learning_rate": 1.5414508259569033e-06,
"loss": 1.0203630924224854,
"step": 1990
},
{
"epoch": 2.9037900874635567,
"grad_norm": 0.08542142808437347,
"learning_rate": 1.5390849929492853e-06,
"loss": 0.4749288260936737,
"step": 1992
},
{
"epoch": 2.9067055393586005,
"grad_norm": 0.39572906494140625,
"learning_rate": 1.5367884781897442e-06,
"loss": 0.9975032210350037,
"step": 1994
},
{
"epoch": 2.9096209912536444,
"grad_norm": 0.3944467604160309,
"learning_rate": 1.5345613053993947e-06,
"loss": 1.2269786596298218,
"step": 1996
},
{
"epoch": 2.912536443148688,
"grad_norm": 0.14900818467140198,
"learning_rate": 1.5324034975831053e-06,
"loss": 1.2356706857681274,
"step": 1998
},
{
"epoch": 2.9154518950437316,
"grad_norm": 0.31048882007598877,
"learning_rate": 1.53031507702926e-06,
"loss": 1.218428611755371,
"step": 2000
},
{
"epoch": 2.9183673469387754,
"grad_norm": 0.1689174771308899,
"learning_rate": 1.5282960653095309e-06,
"loss": 0.9620698094367981,
"step": 2002
},
{
"epoch": 2.9212827988338192,
"grad_norm": 0.2305694818496704,
"learning_rate": 1.5263464832786536e-06,
"loss": 1.2038404941558838,
"step": 2004
},
{
"epoch": 2.924198250728863,
"grad_norm": 0.12036718428134918,
"learning_rate": 1.5244663510742102e-06,
"loss": 0.9968715310096741,
"step": 2006
},
{
"epoch": 2.927113702623907,
"grad_norm": 0.12467171996831894,
"learning_rate": 1.5226556881164256e-06,
"loss": 1.0186277627944946,
"step": 2008
},
{
"epoch": 2.9300291545189503,
"grad_norm": 0.13296104967594147,
"learning_rate": 1.5209145131079634e-06,
"loss": 1.026340365409851,
"step": 2010
},
{
"epoch": 2.932944606413994,
"grad_norm": 0.12233509868383408,
"learning_rate": 1.5192428440337316e-06,
"loss": 1.182348608970642,
"step": 2012
},
{
"epoch": 2.935860058309038,
"grad_norm": 0.1486111879348755,
"learning_rate": 1.5176406981607024e-06,
"loss": 1.0666353702545166,
"step": 2014
},
{
"epoch": 2.938775510204082,
"grad_norm": 0.5397063493728638,
"learning_rate": 1.5161080920377289e-06,
"loss": 1.389245629310608,
"step": 2016
},
{
"epoch": 2.941690962099125,
"grad_norm": 0.15026716887950897,
"learning_rate": 1.5146450414953738e-06,
"loss": 1.0400997400283813,
"step": 2018
},
{
"epoch": 2.944606413994169,
"grad_norm": 0.11009442806243896,
"learning_rate": 1.5132515616457505e-06,
"loss": 1.001649260520935,
"step": 2020
},
{
"epoch": 2.947521865889213,
"grad_norm": 0.7643895745277405,
"learning_rate": 1.5119276668823628e-06,
"loss": 0.37964844703674316,
"step": 2022
},
{
"epoch": 2.9504373177842567,
"grad_norm": 0.2546994984149933,
"learning_rate": 1.510673370879957e-06,
"loss": 1.0618635416030884,
"step": 2024
},
{
"epoch": 2.9533527696793005,
"grad_norm": 0.15609286725521088,
"learning_rate": 1.5094886865943835e-06,
"loss": 1.013123869895935,
"step": 2026
},
{
"epoch": 2.956268221574344,
"grad_norm": 0.09666828066110611,
"learning_rate": 1.5083736262624577e-06,
"loss": 0.7794107794761658,
"step": 2028
},
{
"epoch": 2.9591836734693877,
"grad_norm": 0.07339915633201599,
"learning_rate": 1.5073282014018395e-06,
"loss": 1.3735166788101196,
"step": 2030
},
{
"epoch": 2.9620991253644315,
"grad_norm": 0.6088920831680298,
"learning_rate": 1.5063524228109107e-06,
"loss": 0.8808611035346985,
"step": 2032
},
{
"epoch": 2.9650145772594754,
"grad_norm": 0.1744547188282013,
"learning_rate": 1.5054463005686626e-06,
"loss": 1.1831696033477783,
"step": 2034
},
{
"epoch": 2.9679300291545188,
"grad_norm": 0.24790845811367035,
"learning_rate": 1.5046098440345955e-06,
"loss": 1.00650155544281,
"step": 2036
},
{
"epoch": 2.9708454810495626,
"grad_norm": 0.18026836216449738,
"learning_rate": 1.5038430618486194e-06,
"loss": 1.1893560886383057,
"step": 2038
},
{
"epoch": 2.9737609329446064,
"grad_norm": 0.1259116381406784,
"learning_rate": 1.5031459619309653e-06,
"loss": 1.0219632387161255,
"step": 2040
},
{
"epoch": 2.9766763848396502,
"grad_norm": 0.15073135495185852,
"learning_rate": 1.502518551482103e-06,
"loss": 0.7194128036499023,
"step": 2042
},
{
"epoch": 2.979591836734694,
"grad_norm": 0.05049153417348862,
"learning_rate": 1.5019608369826692e-06,
"loss": 1.1609373092651367,
"step": 2044
},
{
"epoch": 2.9825072886297375,
"grad_norm": 0.11255478858947754,
"learning_rate": 1.501472824193396e-06,
"loss": 1.1452926397323608,
"step": 2046
},
{
"epoch": 2.9854227405247813,
"grad_norm": 0.16929762065410614,
"learning_rate": 1.5010545181550563e-06,
"loss": 0.5922563076019287,
"step": 2048
},
{
"epoch": 2.988338192419825,
"grad_norm": 0.1267116516828537,
"learning_rate": 1.5007059231884077e-06,
"loss": 0.49650248885154724,
"step": 2050
},
{
"epoch": 2.991253644314869,
"grad_norm": 0.1838807910680771,
"learning_rate": 1.5004270428941505e-06,
"loss": 1.1091796159744263,
"step": 2052
},
{
"epoch": 2.9941690962099123,
"grad_norm": 0.08408603817224503,
"learning_rate": 1.500217880152889e-06,
"loss": 1.0519981384277344,
"step": 2054
},
{
"epoch": 2.997084548104956,
"grad_norm": 0.36840710043907166,
"learning_rate": 1.5000784371251037e-06,
"loss": 0.9989621639251709,
"step": 2056
},
{
"epoch": 3.0,
"grad_norm": 0.15688389539718628,
"learning_rate": 1.5000087152511266e-06,
"loss": 1.1339861154556274,
"step": 2058
},
{
"epoch": 3.0,
"step": 2058,
"total_flos": 3.1865440491043553e+18,
"train_loss": 1.1440774658359985,
"train_runtime": 18974.7516,
"train_samples_per_second": 1.735,
"train_steps_per_second": 0.108
}
],
"logging_steps": 2,
"max_steps": 2058,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 9999999,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.1865440491043553e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}