{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 2058, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0029154518950437317, "grad_norm": 0.08236155658960342, "learning_rate": 1.4563106796116505e-07, "loss": 1.120621681213379, "step": 2 }, { "epoch": 0.0058309037900874635, "grad_norm": 0.5269019603729248, "learning_rate": 4.368932038834952e-07, "loss": 1.9105433225631714, "step": 4 }, { "epoch": 0.008746355685131196, "grad_norm": 0.3449331521987915, "learning_rate": 7.281553398058253e-07, "loss": 1.8805404901504517, "step": 6 }, { "epoch": 0.011661807580174927, "grad_norm": 0.10853756964206696, "learning_rate": 1.0194174757281554e-06, "loss": 1.5699371099472046, "step": 8 }, { "epoch": 0.014577259475218658, "grad_norm": 1.1428029537200928, "learning_rate": 1.3106796116504856e-06, "loss": 1.4362584352493286, "step": 10 }, { "epoch": 0.01749271137026239, "grad_norm": 0.5868045091629028, "learning_rate": 1.6019417475728156e-06, "loss": 2.0035324096679688, "step": 12 }, { "epoch": 0.02040816326530612, "grad_norm": 0.08258485049009323, "learning_rate": 1.8932038834951458e-06, "loss": 1.5183849334716797, "step": 14 }, { "epoch": 0.023323615160349854, "grad_norm": 2.6764633655548096, "learning_rate": 2.1844660194174755e-06, "loss": 1.8052839040756226, "step": 16 }, { "epoch": 0.026239067055393587, "grad_norm": 0.342227965593338, "learning_rate": 2.475728155339806e-06, "loss": 1.8929893970489502, "step": 18 }, { "epoch": 0.029154518950437316, "grad_norm": 0.1563744992017746, "learning_rate": 2.766990291262136e-06, "loss": 1.7904902696609497, "step": 20 }, { "epoch": 0.03206997084548105, "grad_norm": 0.1933348923921585, "learning_rate": 3.058252427184466e-06, "loss": 1.4513907432556152, "step": 22 }, { "epoch": 0.03498542274052478, "grad_norm": 0.5887855291366577, "learning_rate": 3.3495145631067963e-06, "loss": 2.2697947025299072, "step": 24 }, { "epoch": 0.037900874635568516, "grad_norm": 0.33995822072029114, "learning_rate": 3.6407766990291263e-06, "loss": 1.7317644357681274, "step": 26 }, { "epoch": 0.04081632653061224, "grad_norm": 0.6888100504875183, "learning_rate": 3.932038834951457e-06, "loss": 1.8117475509643555, "step": 28 }, { "epoch": 0.043731778425655975, "grad_norm": 0.7777397036552429, "learning_rate": 4.223300970873786e-06, "loss": 1.8055756092071533, "step": 30 }, { "epoch": 0.04664723032069971, "grad_norm": 0.4058018624782562, "learning_rate": 4.514563106796117e-06, "loss": 1.9432220458984375, "step": 32 }, { "epoch": 0.04956268221574344, "grad_norm": 0.22737905383110046, "learning_rate": 4.805825242718447e-06, "loss": 1.6058305501937866, "step": 34 }, { "epoch": 0.052478134110787174, "grad_norm": 0.3183080852031708, "learning_rate": 5.097087378640777e-06, "loss": 1.8658274412155151, "step": 36 }, { "epoch": 0.05539358600583091, "grad_norm": 0.17585590481758118, "learning_rate": 5.388349514563107e-06, "loss": 2.2423486709594727, "step": 38 }, { "epoch": 0.05830903790087463, "grad_norm": 0.10230281203985214, "learning_rate": 5.679611650485437e-06, "loss": 1.5302915573120117, "step": 40 }, { "epoch": 0.061224489795918366, "grad_norm": 0.4932399392127991, "learning_rate": 5.970873786407767e-06, "loss": 1.813106656074524, "step": 42 }, { "epoch": 0.0641399416909621, "grad_norm": 0.15170824527740479, "learning_rate": 6.262135922330097e-06, "loss": 1.6509969234466553, "step": 44 }, { "epoch": 0.06705539358600583, "grad_norm": 0.1539481282234192, "learning_rate": 6.553398058252427e-06, "loss": 1.7683537006378174, "step": 46 }, { "epoch": 0.06997084548104957, "grad_norm": 0.6599376201629639, "learning_rate": 6.844660194174757e-06, "loss": 2.1816630363464355, "step": 48 }, { "epoch": 0.0728862973760933, "grad_norm": 0.24105864763259888, "learning_rate": 7.135922330097088e-06, "loss": 1.910886526107788, "step": 50 }, { "epoch": 0.07580174927113703, "grad_norm": 0.09656477719545364, "learning_rate": 7.427184466019417e-06, "loss": 1.199069857597351, "step": 52 }, { "epoch": 0.07871720116618076, "grad_norm": 0.3129803240299225, "learning_rate": 7.718446601941748e-06, "loss": 1.7870614528656006, "step": 54 }, { "epoch": 0.08163265306122448, "grad_norm": 0.253489226102829, "learning_rate": 8.009708737864077e-06, "loss": 2.0801727771759033, "step": 56 }, { "epoch": 0.08454810495626822, "grad_norm": 0.12343698740005493, "learning_rate": 8.300970873786407e-06, "loss": 1.4909915924072266, "step": 58 }, { "epoch": 0.08746355685131195, "grad_norm": 0.19224074482917786, "learning_rate": 8.592233009708738e-06, "loss": 2.0119330883026123, "step": 60 }, { "epoch": 0.09037900874635568, "grad_norm": 0.2891639471054077, "learning_rate": 8.883495145631068e-06, "loss": 1.9431190490722656, "step": 62 }, { "epoch": 0.09329446064139942, "grad_norm": 0.8908348083496094, "learning_rate": 9.174757281553397e-06, "loss": 1.8723704814910889, "step": 64 }, { "epoch": 0.09620991253644315, "grad_norm": 0.09907913953065872, "learning_rate": 9.466019417475729e-06, "loss": 1.556423306465149, "step": 66 }, { "epoch": 0.09912536443148688, "grad_norm": 0.18893972039222717, "learning_rate": 9.75728155339806e-06, "loss": 1.8031634092330933, "step": 68 }, { "epoch": 0.10204081632653061, "grad_norm": 0.3021998107433319, "learning_rate": 1.004854368932039e-05, "loss": 1.6836217641830444, "step": 70 }, { "epoch": 0.10495626822157435, "grad_norm": 0.19465358555316925, "learning_rate": 1.0339805825242719e-05, "loss": 1.3162983655929565, "step": 72 }, { "epoch": 0.10787172011661808, "grad_norm": 0.35194098949432373, "learning_rate": 1.0631067961165048e-05, "loss": 1.6223976612091064, "step": 74 }, { "epoch": 0.11078717201166181, "grad_norm": 0.11608141660690308, "learning_rate": 1.092233009708738e-05, "loss": 1.5001176595687866, "step": 76 }, { "epoch": 0.11370262390670553, "grad_norm": 0.17615102231502533, "learning_rate": 1.121359223300971e-05, "loss": 1.6835155487060547, "step": 78 }, { "epoch": 0.11661807580174927, "grad_norm": 0.10972107201814651, "learning_rate": 1.1504854368932039e-05, "loss": 1.0958292484283447, "step": 80 }, { "epoch": 0.119533527696793, "grad_norm": 0.2486797422170639, "learning_rate": 1.1796116504854368e-05, "loss": 1.5743815898895264, "step": 82 }, { "epoch": 0.12244897959183673, "grad_norm": 0.6029097437858582, "learning_rate": 1.20873786407767e-05, "loss": 1.4908254146575928, "step": 84 }, { "epoch": 0.12536443148688048, "grad_norm": 0.47159314155578613, "learning_rate": 1.237864077669903e-05, "loss": 1.3921440839767456, "step": 86 }, { "epoch": 0.1282798833819242, "grad_norm": 0.23478780686855316, "learning_rate": 1.2669902912621359e-05, "loss": 1.60302734375, "step": 88 }, { "epoch": 0.13119533527696792, "grad_norm": 0.06909849494695663, "learning_rate": 1.2961165048543688e-05, "loss": 1.3646469116210938, "step": 90 }, { "epoch": 0.13411078717201166, "grad_norm": 0.12045982480049133, "learning_rate": 1.3252427184466021e-05, "loss": 1.3031418323516846, "step": 92 }, { "epoch": 0.13702623906705538, "grad_norm": 0.2616878151893616, "learning_rate": 1.3543689320388351e-05, "loss": 1.4213391542434692, "step": 94 }, { "epoch": 0.13994169096209913, "grad_norm": 0.19713328778743744, "learning_rate": 1.383495145631068e-05, "loss": 1.8326067924499512, "step": 96 }, { "epoch": 0.14285714285714285, "grad_norm": 0.42456164956092834, "learning_rate": 1.412621359223301e-05, "loss": 2.064007043838501, "step": 98 }, { "epoch": 0.1457725947521866, "grad_norm": 0.1171143651008606, "learning_rate": 1.4417475728155341e-05, "loss": 1.3881018161773682, "step": 100 }, { "epoch": 0.14868804664723032, "grad_norm": 0.5466513633728027, "learning_rate": 1.470873786407767e-05, "loss": 1.7975414991378784, "step": 102 }, { "epoch": 0.15160349854227406, "grad_norm": 0.2429724484682083, "learning_rate": 1.5e-05, "loss": 1.581913709640503, "step": 104 }, { "epoch": 0.15451895043731778, "grad_norm": 0.16082407534122467, "learning_rate": 1.4999965139018001e-05, "loss": 1.6313072443008423, "step": 106 }, { "epoch": 0.15743440233236153, "grad_norm": 0.20626085996627808, "learning_rate": 1.4999860556432087e-05, "loss": 1.4128293991088867, "step": 108 }, { "epoch": 0.16034985422740525, "grad_norm": 0.08978555351495743, "learning_rate": 1.4999686253322514e-05, "loss": 1.6325119733810425, "step": 110 }, { "epoch": 0.16326530612244897, "grad_norm": 0.17410112917423248, "learning_rate": 1.4999442231489687e-05, "loss": 1.6410691738128662, "step": 112 }, { "epoch": 0.1661807580174927, "grad_norm": 0.11147186905145645, "learning_rate": 1.4999128493454151e-05, "loss": 1.3302874565124512, "step": 114 }, { "epoch": 0.16909620991253643, "grad_norm": 0.44229331612586975, "learning_rate": 1.4998745042456563e-05, "loss": 1.6997064352035522, "step": 116 }, { "epoch": 0.17201166180758018, "grad_norm": 0.1818253993988037, "learning_rate": 1.499829188245766e-05, "loss": 1.3123167753219604, "step": 118 }, { "epoch": 0.1749271137026239, "grad_norm": 0.15915799140930176, "learning_rate": 1.4997769018138212e-05, "loss": 1.6660683155059814, "step": 120 }, { "epoch": 0.17784256559766765, "grad_norm": 0.2367630898952484, "learning_rate": 1.4997176454898977e-05, "loss": 1.4073443412780762, "step": 122 }, { "epoch": 0.18075801749271136, "grad_norm": 0.653868556022644, "learning_rate": 1.4996514198860649e-05, "loss": 1.351149082183838, "step": 124 }, { "epoch": 0.1836734693877551, "grad_norm": 0.08681757003068924, "learning_rate": 1.4995782256863785e-05, "loss": 1.3422613143920898, "step": 126 }, { "epoch": 0.18658892128279883, "grad_norm": 0.06514488905668259, "learning_rate": 1.4994980636468756e-05, "loss": 1.3343521356582642, "step": 128 }, { "epoch": 0.18950437317784258, "grad_norm": 0.9027652740478516, "learning_rate": 1.4994109345955632e-05, "loss": 1.4679464101791382, "step": 130 }, { "epoch": 0.1924198250728863, "grad_norm": 0.35018599033355713, "learning_rate": 1.4993168394324137e-05, "loss": 1.1963084936141968, "step": 132 }, { "epoch": 0.19533527696793002, "grad_norm": 0.13998304307460785, "learning_rate": 1.4992157791293523e-05, "loss": 1.333540678024292, "step": 134 }, { "epoch": 0.19825072886297376, "grad_norm": 0.11608795821666718, "learning_rate": 1.4991077547302497e-05, "loss": 1.5141417980194092, "step": 136 }, { "epoch": 0.20116618075801748, "grad_norm": 0.08046405762434006, "learning_rate": 1.4989927673509089e-05, "loss": 1.3266879320144653, "step": 138 }, { "epoch": 0.20408163265306123, "grad_norm": 0.1371355652809143, "learning_rate": 1.4988708181790555e-05, "loss": 1.2892866134643555, "step": 140 }, { "epoch": 0.20699708454810495, "grad_norm": 0.1368686705827713, "learning_rate": 1.4987419084743244e-05, "loss": 1.0467798709869385, "step": 142 }, { "epoch": 0.2099125364431487, "grad_norm": 0.23302382230758667, "learning_rate": 1.4986060395682469e-05, "loss": 1.1930760145187378, "step": 144 }, { "epoch": 0.21282798833819241, "grad_norm": 1.9061791896820068, "learning_rate": 1.4984632128642375e-05, "loss": 1.4475537538528442, "step": 146 }, { "epoch": 0.21574344023323616, "grad_norm": 0.18942643702030182, "learning_rate": 1.4983134298375787e-05, "loss": 1.376928448677063, "step": 148 }, { "epoch": 0.21865889212827988, "grad_norm": 0.21135789155960083, "learning_rate": 1.498156692035407e-05, "loss": 1.5480635166168213, "step": 150 }, { "epoch": 0.22157434402332363, "grad_norm": 0.13644421100616455, "learning_rate": 1.4979930010766947e-05, "loss": 1.7161264419555664, "step": 152 }, { "epoch": 0.22448979591836735, "grad_norm": 0.12430273741483688, "learning_rate": 1.4978223586522351e-05, "loss": 1.242932677268982, "step": 154 }, { "epoch": 0.22740524781341107, "grad_norm": 0.7622217535972595, "learning_rate": 1.4976447665246251e-05, "loss": 0.5300056338310242, "step": 156 }, { "epoch": 0.2303206997084548, "grad_norm": 0.13458958268165588, "learning_rate": 1.4974602265282451e-05, "loss": 1.571650743484497, "step": 158 }, { "epoch": 0.23323615160349853, "grad_norm": 0.2972854673862457, "learning_rate": 1.4972687405692425e-05, "loss": 1.2033076286315918, "step": 160 }, { "epoch": 0.23615160349854228, "grad_norm": 0.29232847690582275, "learning_rate": 1.4970703106255095e-05, "loss": 1.4756550788879395, "step": 162 }, { "epoch": 0.239067055393586, "grad_norm": 0.07210766524076462, "learning_rate": 1.4968649387466655e-05, "loss": 1.3033177852630615, "step": 164 }, { "epoch": 0.24198250728862974, "grad_norm": 0.5424373745918274, "learning_rate": 1.4966526270540327e-05, "loss": 1.0460329055786133, "step": 166 }, { "epoch": 0.24489795918367346, "grad_norm": 0.28463321924209595, "learning_rate": 1.4964333777406174e-05, "loss": 1.250373363494873, "step": 168 }, { "epoch": 0.2478134110787172, "grad_norm": 0.3408065140247345, "learning_rate": 1.496207193071085e-05, "loss": 0.8593610525131226, "step": 170 }, { "epoch": 0.25072886297376096, "grad_norm": 0.14829058945178986, "learning_rate": 1.4959740753817374e-05, "loss": 1.304344892501831, "step": 172 }, { "epoch": 0.2536443148688047, "grad_norm": 0.8436731696128845, "learning_rate": 1.4957340270804896e-05, "loss": 1.2743805646896362, "step": 174 }, { "epoch": 0.2565597667638484, "grad_norm": 0.11323361843824387, "learning_rate": 1.4954870506468434e-05, "loss": 1.329984188079834, "step": 176 }, { "epoch": 0.2594752186588921, "grad_norm": 0.09321129322052002, "learning_rate": 1.4952331486318626e-05, "loss": 1.2258719205856323, "step": 178 }, { "epoch": 0.26239067055393583, "grad_norm": 0.37252843379974365, "learning_rate": 1.4949723236581472e-05, "loss": 1.0671582221984863, "step": 180 }, { "epoch": 0.2653061224489796, "grad_norm": 0.3797838091850281, "learning_rate": 1.4947045784198052e-05, "loss": 1.2696138620376587, "step": 182 }, { "epoch": 0.26822157434402333, "grad_norm": 0.16805821657180786, "learning_rate": 1.4944299156824251e-05, "loss": 1.4738816022872925, "step": 184 }, { "epoch": 0.27113702623906705, "grad_norm": 0.2671731114387512, "learning_rate": 1.4941483382830475e-05, "loss": 1.3171305656433105, "step": 186 }, { "epoch": 0.27405247813411077, "grad_norm": 0.07962363958358765, "learning_rate": 1.4938598491301369e-05, "loss": 1.2901722192764282, "step": 188 }, { "epoch": 0.27696793002915454, "grad_norm": 0.280506432056427, "learning_rate": 1.4935644512035486e-05, "loss": 1.3184595108032227, "step": 190 }, { "epoch": 0.27988338192419826, "grad_norm": 0.13458193838596344, "learning_rate": 1.4932621475545014e-05, "loss": 1.1937448978424072, "step": 192 }, { "epoch": 0.282798833819242, "grad_norm": 0.7079519033432007, "learning_rate": 1.4929529413055442e-05, "loss": 1.1439327001571655, "step": 194 }, { "epoch": 0.2857142857142857, "grad_norm": 0.18462230265140533, "learning_rate": 1.4926368356505236e-05, "loss": 1.5497668981552124, "step": 196 }, { "epoch": 0.2886297376093295, "grad_norm": 0.16043758392333984, "learning_rate": 1.492313833854552e-05, "loss": 1.4568783044815063, "step": 198 }, { "epoch": 0.2915451895043732, "grad_norm": 0.42396068572998047, "learning_rate": 1.491983939253973e-05, "loss": 1.6005096435546875, "step": 200 }, { "epoch": 0.2944606413994169, "grad_norm": 0.21155761182308197, "learning_rate": 1.4916471552563272e-05, "loss": 1.3397752046585083, "step": 202 }, { "epoch": 0.29737609329446063, "grad_norm": 0.17219677567481995, "learning_rate": 1.4913034853403173e-05, "loss": 1.3317774534225464, "step": 204 }, { "epoch": 0.30029154518950435, "grad_norm": 0.12617312371730804, "learning_rate": 1.4909529330557714e-05, "loss": 1.2119510173797607, "step": 206 }, { "epoch": 0.3032069970845481, "grad_norm": 0.14850527048110962, "learning_rate": 1.4905955020236072e-05, "loss": 1.385998010635376, "step": 208 }, { "epoch": 0.30612244897959184, "grad_norm": 0.1191219687461853, "learning_rate": 1.490231195935794e-05, "loss": 1.5534725189208984, "step": 210 }, { "epoch": 0.30903790087463556, "grad_norm": 0.06989572942256927, "learning_rate": 1.4898600185553152e-05, "loss": 1.4775235652923584, "step": 212 }, { "epoch": 0.3119533527696793, "grad_norm": 0.08547376841306686, "learning_rate": 1.4894819737161285e-05, "loss": 1.033743977546692, "step": 214 }, { "epoch": 0.31486880466472306, "grad_norm": 0.11992272734642029, "learning_rate": 1.489097065323127e-05, "loss": 1.0980379581451416, "step": 216 }, { "epoch": 0.3177842565597668, "grad_norm": 0.30880632996559143, "learning_rate": 1.488705297352099e-05, "loss": 1.317891001701355, "step": 218 }, { "epoch": 0.3206997084548105, "grad_norm": 0.6510909795761108, "learning_rate": 1.4883066738496858e-05, "loss": 0.9413776993751526, "step": 220 }, { "epoch": 0.3236151603498542, "grad_norm": 0.43388184905052185, "learning_rate": 1.4879011989333418e-05, "loss": 1.381697177886963, "step": 222 }, { "epoch": 0.32653061224489793, "grad_norm": 0.21984761953353882, "learning_rate": 1.4874888767912902e-05, "loss": 1.2626378536224365, "step": 224 }, { "epoch": 0.3294460641399417, "grad_norm": 0.2687482237815857, "learning_rate": 1.48706971168248e-05, "loss": 1.2034857273101807, "step": 226 }, { "epoch": 0.3323615160349854, "grad_norm": 0.08195902407169342, "learning_rate": 1.4866437079365439e-05, "loss": 1.2773680686950684, "step": 228 }, { "epoch": 0.33527696793002915, "grad_norm": 0.1009335145354271, "learning_rate": 1.4862108699537504e-05, "loss": 1.0853190422058105, "step": 230 }, { "epoch": 0.33819241982507287, "grad_norm": 0.3376968204975128, "learning_rate": 1.4857712022049617e-05, "loss": 1.5481150150299072, "step": 232 }, { "epoch": 0.34110787172011664, "grad_norm": 0.7441994547843933, "learning_rate": 1.4853247092315843e-05, "loss": 0.9510725140571594, "step": 234 }, { "epoch": 0.34402332361516036, "grad_norm": 0.04717664048075676, "learning_rate": 1.484871395645525e-05, "loss": 1.4734127521514893, "step": 236 }, { "epoch": 0.3469387755102041, "grad_norm": 0.7886844873428345, "learning_rate": 1.4844112661291409e-05, "loss": 1.3192212581634521, "step": 238 }, { "epoch": 0.3498542274052478, "grad_norm": 0.4841660261154175, "learning_rate": 1.4839443254351925e-05, "loss": 1.691177487373352, "step": 240 }, { "epoch": 0.35276967930029157, "grad_norm": 0.06492076069116592, "learning_rate": 1.4834705783867948e-05, "loss": 1.329490065574646, "step": 242 }, { "epoch": 0.3556851311953353, "grad_norm": 0.13113148510456085, "learning_rate": 1.4829900298773655e-05, "loss": 1.4308984279632568, "step": 244 }, { "epoch": 0.358600583090379, "grad_norm": 0.2137414813041687, "learning_rate": 1.4825026848705774e-05, "loss": 1.5191004276275635, "step": 246 }, { "epoch": 0.36151603498542273, "grad_norm": 0.1302558183670044, "learning_rate": 1.482008548400304e-05, "loss": 1.1112821102142334, "step": 248 }, { "epoch": 0.36443148688046645, "grad_norm": 0.24971581995487213, "learning_rate": 1.4815076255705704e-05, "loss": 1.2628142833709717, "step": 250 }, { "epoch": 0.3673469387755102, "grad_norm": 0.06637357920408249, "learning_rate": 1.4809999215554978e-05, "loss": 1.0483888387680054, "step": 252 }, { "epoch": 0.37026239067055394, "grad_norm": 0.16134153306484222, "learning_rate": 1.4804854415992531e-05, "loss": 0.8284896612167358, "step": 254 }, { "epoch": 0.37317784256559766, "grad_norm": 0.22190812230110168, "learning_rate": 1.479964191015992e-05, "loss": 1.228007197380066, "step": 256 }, { "epoch": 0.3760932944606414, "grad_norm": 0.3965594172477722, "learning_rate": 1.4794361751898052e-05, "loss": 1.461411952972412, "step": 258 }, { "epoch": 0.37900874635568516, "grad_norm": 0.08565931022167206, "learning_rate": 1.4789013995746636e-05, "loss": 1.33036208152771, "step": 260 }, { "epoch": 0.3819241982507289, "grad_norm": 0.11709296703338623, "learning_rate": 1.4783598696943603e-05, "loss": 1.1803240776062012, "step": 262 }, { "epoch": 0.3848396501457726, "grad_norm": 0.15489286184310913, "learning_rate": 1.4778115911424552e-05, "loss": 1.234659194946289, "step": 264 }, { "epoch": 0.3877551020408163, "grad_norm": 0.19184595346450806, "learning_rate": 1.4772565695822158e-05, "loss": 1.2707804441452026, "step": 266 }, { "epoch": 0.39067055393586003, "grad_norm": 0.1356089860200882, "learning_rate": 1.4766948107465598e-05, "loss": 1.192071557044983, "step": 268 }, { "epoch": 0.3935860058309038, "grad_norm": 0.11989542841911316, "learning_rate": 1.476126320437995e-05, "loss": 1.391566276550293, "step": 270 }, { "epoch": 0.3965014577259475, "grad_norm": 0.47645920515060425, "learning_rate": 1.4755511045285605e-05, "loss": 1.1564279794692993, "step": 272 }, { "epoch": 0.39941690962099125, "grad_norm": 0.4125911593437195, "learning_rate": 1.4749691689597646e-05, "loss": 1.536888599395752, "step": 274 }, { "epoch": 0.40233236151603496, "grad_norm": 0.08971330523490906, "learning_rate": 1.4743805197425243e-05, "loss": 1.2086325883865356, "step": 276 }, { "epoch": 0.40524781341107874, "grad_norm": 0.08347416669130325, "learning_rate": 1.4737851629571035e-05, "loss": 1.190657615661621, "step": 278 }, { "epoch": 0.40816326530612246, "grad_norm": 0.20587654411792755, "learning_rate": 1.4731831047530493e-05, "loss": 1.3656525611877441, "step": 280 }, { "epoch": 0.4110787172011662, "grad_norm": 0.22432878613471985, "learning_rate": 1.4725743513491294e-05, "loss": 1.1042253971099854, "step": 282 }, { "epoch": 0.4139941690962099, "grad_norm": 0.26549288630485535, "learning_rate": 1.471958909033267e-05, "loss": 1.3797943592071533, "step": 284 }, { "epoch": 0.41690962099125367, "grad_norm": 0.15680500864982605, "learning_rate": 1.4713367841624764e-05, "loss": 1.3377087116241455, "step": 286 }, { "epoch": 0.4198250728862974, "grad_norm": 0.4737466275691986, "learning_rate": 1.4707079831627975e-05, "loss": 1.3034449815750122, "step": 288 }, { "epoch": 0.4227405247813411, "grad_norm": 0.1271553486585617, "learning_rate": 1.4700725125292288e-05, "loss": 1.1474194526672363, "step": 290 }, { "epoch": 0.42565597667638483, "grad_norm": 0.06102332845330238, "learning_rate": 1.469430378825661e-05, "loss": 1.1918046474456787, "step": 292 }, { "epoch": 0.42857142857142855, "grad_norm": 0.15844929218292236, "learning_rate": 1.4687815886848083e-05, "loss": 1.206626296043396, "step": 294 }, { "epoch": 0.4314868804664723, "grad_norm": 0.24055972695350647, "learning_rate": 1.4681261488081409e-05, "loss": 1.5187625885009766, "step": 296 }, { "epoch": 0.43440233236151604, "grad_norm": 0.7840580344200134, "learning_rate": 1.4674640659658149e-05, "loss": 1.0932797193527222, "step": 298 }, { "epoch": 0.43731778425655976, "grad_norm": 0.10844213515520096, "learning_rate": 1.4667953469966035e-05, "loss": 1.1951229572296143, "step": 300 }, { "epoch": 0.4402332361516035, "grad_norm": 0.11183289438486099, "learning_rate": 1.466119998807825e-05, "loss": 1.1717019081115723, "step": 302 }, { "epoch": 0.44314868804664725, "grad_norm": 0.30403003096580505, "learning_rate": 1.4654380283752722e-05, "loss": 1.4022222757339478, "step": 304 }, { "epoch": 0.446064139941691, "grad_norm": 0.13156169652938843, "learning_rate": 1.4647494427431404e-05, "loss": 1.4486730098724365, "step": 306 }, { "epoch": 0.4489795918367347, "grad_norm": 0.1186894103884697, "learning_rate": 1.4640542490239546e-05, "loss": 1.2088007926940918, "step": 308 }, { "epoch": 0.4518950437317784, "grad_norm": 0.3326444625854492, "learning_rate": 1.4633524543984956e-05, "loss": 1.3544650077819824, "step": 310 }, { "epoch": 0.45481049562682213, "grad_norm": 0.1379825323820114, "learning_rate": 1.4626440661157263e-05, "loss": 1.330404281616211, "step": 312 }, { "epoch": 0.4577259475218659, "grad_norm": 0.1476340889930725, "learning_rate": 1.4619290914927168e-05, "loss": 1.3507134914398193, "step": 314 }, { "epoch": 0.4606413994169096, "grad_norm": 0.1802261918783188, "learning_rate": 1.4612075379145683e-05, "loss": 1.2097649574279785, "step": 316 }, { "epoch": 0.46355685131195334, "grad_norm": 0.12077829986810684, "learning_rate": 1.460479412834338e-05, "loss": 1.3490198850631714, "step": 318 }, { "epoch": 0.46647230320699706, "grad_norm": 0.22901231050491333, "learning_rate": 1.4597447237729602e-05, "loss": 1.3041571378707886, "step": 320 }, { "epoch": 0.46938775510204084, "grad_norm": 0.1394783854484558, "learning_rate": 1.4590034783191705e-05, "loss": 1.3151127099990845, "step": 322 }, { "epoch": 0.47230320699708456, "grad_norm": 0.15815502405166626, "learning_rate": 1.4582556841294272e-05, "loss": 1.4624110460281372, "step": 324 }, { "epoch": 0.4752186588921283, "grad_norm": 0.2137562483549118, "learning_rate": 1.45750134892783e-05, "loss": 1.4430997371673584, "step": 326 }, { "epoch": 0.478134110787172, "grad_norm": 0.3299601376056671, "learning_rate": 1.4567404805060432e-05, "loss": 1.3537228107452393, "step": 328 }, { "epoch": 0.48104956268221577, "grad_norm": 0.21562345325946808, "learning_rate": 1.4559730867232141e-05, "loss": 1.169204592704773, "step": 330 }, { "epoch": 0.4839650145772595, "grad_norm": 0.1736089438199997, "learning_rate": 1.4551991755058902e-05, "loss": 1.1071885824203491, "step": 332 }, { "epoch": 0.4868804664723032, "grad_norm": 0.1834300458431244, "learning_rate": 1.45441875484794e-05, "loss": 1.5676034688949585, "step": 334 }, { "epoch": 0.4897959183673469, "grad_norm": 0.0843748077750206, "learning_rate": 1.4536318328104693e-05, "loss": 1.2121503353118896, "step": 336 }, { "epoch": 0.49271137026239065, "grad_norm": 0.36758843064308167, "learning_rate": 1.452838417521737e-05, "loss": 1.1275235414505005, "step": 338 }, { "epoch": 0.4956268221574344, "grad_norm": 0.18445612490177155, "learning_rate": 1.452038517177072e-05, "loss": 1.3472223281860352, "step": 340 }, { "epoch": 0.49854227405247814, "grad_norm": 0.05781463533639908, "learning_rate": 1.4512321400387896e-05, "loss": 1.0872787237167358, "step": 342 }, { "epoch": 0.5014577259475219, "grad_norm": 0.19518744945526123, "learning_rate": 1.4504192944361035e-05, "loss": 1.1387406587600708, "step": 344 }, { "epoch": 0.5043731778425656, "grad_norm": 0.12471595406532288, "learning_rate": 1.4495999887650425e-05, "loss": 1.2551310062408447, "step": 346 }, { "epoch": 0.5072886297376094, "grad_norm": 0.21368560194969177, "learning_rate": 1.4487742314883622e-05, "loss": 1.4745806455612183, "step": 348 }, { "epoch": 0.5102040816326531, "grad_norm": 0.20728199183940887, "learning_rate": 1.447942031135458e-05, "loss": 1.3776572942733765, "step": 350 }, { "epoch": 0.5131195335276968, "grad_norm": 0.3676038384437561, "learning_rate": 1.447103396302277e-05, "loss": 1.393446922302246, "step": 352 }, { "epoch": 0.5160349854227405, "grad_norm": 0.4812930226325989, "learning_rate": 1.4462583356512293e-05, "loss": 1.6455305814743042, "step": 354 }, { "epoch": 0.5189504373177842, "grad_norm": 0.14569929242134094, "learning_rate": 1.4454068579110982e-05, "loss": 1.1214039325714111, "step": 356 }, { "epoch": 0.521865889212828, "grad_norm": 0.08566080778837204, "learning_rate": 1.4445489718769505e-05, "loss": 1.0862312316894531, "step": 358 }, { "epoch": 0.5247813411078717, "grad_norm": 0.1737866848707199, "learning_rate": 1.4436846864100454e-05, "loss": 1.4677766561508179, "step": 360 }, { "epoch": 0.5276967930029155, "grad_norm": 0.24478068947792053, "learning_rate": 1.4428140104377428e-05, "loss": 1.4088914394378662, "step": 362 }, { "epoch": 0.5306122448979592, "grad_norm": 0.07167135179042816, "learning_rate": 1.4419369529534117e-05, "loss": 1.0589109659194946, "step": 364 }, { "epoch": 0.5335276967930029, "grad_norm": 0.4344414472579956, "learning_rate": 1.4410535230163361e-05, "loss": 1.0916839838027954, "step": 366 }, { "epoch": 0.5364431486880467, "grad_norm": 0.1588602066040039, "learning_rate": 1.440163729751623e-05, "loss": 1.2339898347854614, "step": 368 }, { "epoch": 0.5393586005830904, "grad_norm": 0.08355646580457687, "learning_rate": 1.4392675823501075e-05, "loss": 1.0559823513031006, "step": 370 }, { "epoch": 0.5422740524781341, "grad_norm": 0.09950409084558487, "learning_rate": 1.4383650900682563e-05, "loss": 1.1664844751358032, "step": 372 }, { "epoch": 0.5451895043731778, "grad_norm": 0.21663829684257507, "learning_rate": 1.4374562622280753e-05, "loss": 1.2800816297531128, "step": 374 }, { "epoch": 0.5481049562682215, "grad_norm": 0.45721420645713806, "learning_rate": 1.4365411082170105e-05, "loss": 1.0968526601791382, "step": 376 }, { "epoch": 0.5510204081632653, "grad_norm": 0.34029263257980347, "learning_rate": 1.435619637487852e-05, "loss": 1.4795793294906616, "step": 378 }, { "epoch": 0.5539358600583091, "grad_norm": 0.07205039262771606, "learning_rate": 1.4346918595586371e-05, "loss": 0.8370588421821594, "step": 380 }, { "epoch": 0.5568513119533528, "grad_norm": 0.12168021500110626, "learning_rate": 1.4337577840125506e-05, "loss": 1.2106021642684937, "step": 382 }, { "epoch": 0.5597667638483965, "grad_norm": 0.32209160923957825, "learning_rate": 1.4328174204978268e-05, "loss": 1.321066975593567, "step": 384 }, { "epoch": 0.5626822157434402, "grad_norm": 0.2250237762928009, "learning_rate": 1.4318707787276499e-05, "loss": 1.292655348777771, "step": 386 }, { "epoch": 0.565597667638484, "grad_norm": 0.2742823362350464, "learning_rate": 1.4309178684800527e-05, "loss": 1.2520337104797363, "step": 388 }, { "epoch": 0.5685131195335277, "grad_norm": 0.27688226103782654, "learning_rate": 1.4299586995978166e-05, "loss": 1.38676917552948, "step": 390 }, { "epoch": 0.5714285714285714, "grad_norm": 0.2949990928173065, "learning_rate": 1.4289932819883696e-05, "loss": 0.8451089262962341, "step": 392 }, { "epoch": 0.5743440233236151, "grad_norm": 0.1089571937918663, "learning_rate": 1.4280216256236834e-05, "loss": 1.2847154140472412, "step": 394 }, { "epoch": 0.577259475218659, "grad_norm": 0.19184090197086334, "learning_rate": 1.427043740540172e-05, "loss": 1.387587547302246, "step": 396 }, { "epoch": 0.5801749271137027, "grad_norm": 0.54814612865448, "learning_rate": 1.4260596368385856e-05, "loss": 1.3909755945205688, "step": 398 }, { "epoch": 0.5830903790087464, "grad_norm": 0.12275420129299164, "learning_rate": 1.4250693246839092e-05, "loss": 1.2625775337219238, "step": 400 }, { "epoch": 0.5860058309037901, "grad_norm": 0.7932881712913513, "learning_rate": 1.4240728143052544e-05, "loss": 1.2152988910675049, "step": 402 }, { "epoch": 0.5889212827988338, "grad_norm": 0.37155717611312866, "learning_rate": 1.4230701159957563e-05, "loss": 1.3423740863800049, "step": 404 }, { "epoch": 0.5918367346938775, "grad_norm": 0.18500366806983948, "learning_rate": 1.4220612401124663e-05, "loss": 1.3449385166168213, "step": 406 }, { "epoch": 0.5947521865889213, "grad_norm": 0.11731770634651184, "learning_rate": 1.4210461970762447e-05, "loss": 1.1119245290756226, "step": 408 }, { "epoch": 0.597667638483965, "grad_norm": 0.10353056341409683, "learning_rate": 1.4200249973716534e-05, "loss": 1.263884425163269, "step": 410 }, { "epoch": 0.6005830903790087, "grad_norm": 0.14419683814048767, "learning_rate": 1.418997651546848e-05, "loss": 1.307144284248352, "step": 412 }, { "epoch": 0.6034985422740525, "grad_norm": 0.10403470695018768, "learning_rate": 1.4179641702134683e-05, "loss": 1.1156686544418335, "step": 414 }, { "epoch": 0.6064139941690962, "grad_norm": 0.14356708526611328, "learning_rate": 1.4169245640465292e-05, "loss": 1.1539418697357178, "step": 416 }, { "epoch": 0.60932944606414, "grad_norm": 0.20612405240535736, "learning_rate": 1.415878843784309e-05, "loss": 1.2595444917678833, "step": 418 }, { "epoch": 0.6122448979591837, "grad_norm": 0.11746654659509659, "learning_rate": 1.414827020228241e-05, "loss": 1.2829625606536865, "step": 420 }, { "epoch": 0.6151603498542274, "grad_norm": 0.16831901669502258, "learning_rate": 1.4137691042427996e-05, "loss": 1.3437942266464233, "step": 422 }, { "epoch": 0.6180758017492711, "grad_norm": 0.35040462017059326, "learning_rate": 1.4127051067553895e-05, "loss": 1.4076067209243774, "step": 424 }, { "epoch": 0.6209912536443148, "grad_norm": 0.061461448669433594, "learning_rate": 1.4116350387562316e-05, "loss": 1.0884675979614258, "step": 426 }, { "epoch": 0.6239067055393586, "grad_norm": 0.15810243785381317, "learning_rate": 1.4105589112982514e-05, "loss": 1.2547569274902344, "step": 428 }, { "epoch": 0.6268221574344023, "grad_norm": 0.8622474074363708, "learning_rate": 1.4094767354969625e-05, "loss": 1.3274284601211548, "step": 430 }, { "epoch": 0.6297376093294461, "grad_norm": 0.13593973219394684, "learning_rate": 1.4083885225303535e-05, "loss": 1.2320295572280884, "step": 432 }, { "epoch": 0.6326530612244898, "grad_norm": 0.07243333756923676, "learning_rate": 1.407294283638772e-05, "loss": 1.4667418003082275, "step": 434 }, { "epoch": 0.6355685131195336, "grad_norm": 0.07801775634288788, "learning_rate": 1.406194030124808e-05, "loss": 1.3038822412490845, "step": 436 }, { "epoch": 0.6384839650145773, "grad_norm": 0.304385781288147, "learning_rate": 1.4050877733531783e-05, "loss": 1.3447275161743164, "step": 438 }, { "epoch": 0.641399416909621, "grad_norm": 0.10865950584411621, "learning_rate": 1.4039755247506077e-05, "loss": 0.6549509167671204, "step": 440 }, { "epoch": 0.6443148688046647, "grad_norm": 0.28575700521469116, "learning_rate": 1.4028572958057122e-05, "loss": 1.1795369386672974, "step": 442 }, { "epoch": 0.6472303206997084, "grad_norm": 0.5246424078941345, "learning_rate": 1.4017330980688798e-05, "loss": 1.1711264848709106, "step": 444 }, { "epoch": 0.6501457725947521, "grad_norm": 0.18553860485553741, "learning_rate": 1.400602943152151e-05, "loss": 1.2232381105422974, "step": 446 }, { "epoch": 0.6530612244897959, "grad_norm": 0.12490701675415039, "learning_rate": 1.3994668427290992e-05, "loss": 1.3382079601287842, "step": 448 }, { "epoch": 0.6559766763848397, "grad_norm": 0.22397291660308838, "learning_rate": 1.3983248085347099e-05, "loss": 1.3612568378448486, "step": 450 }, { "epoch": 0.6588921282798834, "grad_norm": 0.35306331515312195, "learning_rate": 1.3971768523652598e-05, "loss": 1.2464739084243774, "step": 452 }, { "epoch": 0.6618075801749271, "grad_norm": 0.2772669494152069, "learning_rate": 1.3960229860781952e-05, "loss": 1.2844020128250122, "step": 454 }, { "epoch": 0.6647230320699709, "grad_norm": 0.10081592947244644, "learning_rate": 1.3948632215920074e-05, "loss": 1.2844829559326172, "step": 456 }, { "epoch": 0.6676384839650146, "grad_norm": 0.4896067678928375, "learning_rate": 1.3936975708861129e-05, "loss": 1.2661151885986328, "step": 458 }, { "epoch": 0.6705539358600583, "grad_norm": 0.09726856648921967, "learning_rate": 1.3925260460007276e-05, "loss": 1.3103440999984741, "step": 460 }, { "epoch": 0.673469387755102, "grad_norm": 0.15830014646053314, "learning_rate": 1.3913486590367426e-05, "loss": 1.2458621263504028, "step": 462 }, { "epoch": 0.6763848396501457, "grad_norm": 0.3230348229408264, "learning_rate": 1.3901654221555998e-05, "loss": 1.534423589706421, "step": 464 }, { "epoch": 0.6793002915451894, "grad_norm": 0.1509629487991333, "learning_rate": 1.3889763475791653e-05, "loss": 1.2820494174957275, "step": 466 }, { "epoch": 0.6822157434402333, "grad_norm": 0.34530624747276306, "learning_rate": 1.3877814475896049e-05, "loss": 1.2601618766784668, "step": 468 }, { "epoch": 0.685131195335277, "grad_norm": 0.10481005907058716, "learning_rate": 1.3865807345292548e-05, "loss": 1.1044316291809082, "step": 470 }, { "epoch": 0.6880466472303207, "grad_norm": 0.07815049588680267, "learning_rate": 1.3853742208004967e-05, "loss": 0.741702139377594, "step": 472 }, { "epoch": 0.6909620991253644, "grad_norm": 0.22590938210487366, "learning_rate": 1.3841619188656277e-05, "loss": 1.2955025434494019, "step": 474 }, { "epoch": 0.6938775510204082, "grad_norm": 0.08640377968549728, "learning_rate": 1.3829438412467324e-05, "loss": 1.1016216278076172, "step": 476 }, { "epoch": 0.6967930029154519, "grad_norm": 0.09496122598648071, "learning_rate": 1.3817200005255538e-05, "loss": 1.1232506036758423, "step": 478 }, { "epoch": 0.6997084548104956, "grad_norm": 0.07495642453432083, "learning_rate": 1.380490409343363e-05, "loss": 1.2044416666030884, "step": 480 }, { "epoch": 0.7026239067055393, "grad_norm": 0.339239239692688, "learning_rate": 1.3792550804008275e-05, "loss": 1.2485543489456177, "step": 482 }, { "epoch": 0.7055393586005831, "grad_norm": 0.17572255432605743, "learning_rate": 1.3780140264578833e-05, "loss": 1.2681964635849, "step": 484 }, { "epoch": 0.7084548104956269, "grad_norm": 0.16934579610824585, "learning_rate": 1.3767672603335994e-05, "loss": 1.4810711145401, "step": 486 }, { "epoch": 0.7113702623906706, "grad_norm": 0.04486797749996185, "learning_rate": 1.375514794906047e-05, "loss": 1.046045184135437, "step": 488 }, { "epoch": 0.7142857142857143, "grad_norm": 0.700762927532196, "learning_rate": 1.374256643112167e-05, "loss": 1.0363354682922363, "step": 490 }, { "epoch": 0.717201166180758, "grad_norm": 0.2569397985935211, "learning_rate": 1.3729928179476355e-05, "loss": 1.3074244260787964, "step": 492 }, { "epoch": 0.7201166180758017, "grad_norm": 0.20563913881778717, "learning_rate": 1.3717233324667303e-05, "loss": 1.1921494007110596, "step": 494 }, { "epoch": 0.7230320699708455, "grad_norm": 0.201784148812294, "learning_rate": 1.3704481997821944e-05, "loss": 1.3657381534576416, "step": 496 }, { "epoch": 0.7259475218658892, "grad_norm": 0.20627616345882416, "learning_rate": 1.3691674330651038e-05, "loss": 1.062203288078308, "step": 498 }, { "epoch": 0.7288629737609329, "grad_norm": 0.04925013706088066, "learning_rate": 1.3678810455447272e-05, "loss": 1.0565184354782104, "step": 500 }, { "epoch": 0.7317784256559767, "grad_norm": 0.2994559407234192, "learning_rate": 1.3665890505083932e-05, "loss": 0.7342221140861511, "step": 502 }, { "epoch": 0.7346938775510204, "grad_norm": 0.2312147170305252, "learning_rate": 1.365291461301351e-05, "loss": 1.1462215185165405, "step": 504 }, { "epoch": 0.7376093294460642, "grad_norm": 0.1264645904302597, "learning_rate": 1.3639882913266321e-05, "loss": 1.2779966592788696, "step": 506 }, { "epoch": 0.7405247813411079, "grad_norm": 0.09908440709114075, "learning_rate": 1.3626795540449146e-05, "loss": 1.0050630569458008, "step": 508 }, { "epoch": 0.7434402332361516, "grad_norm": 0.0948040708899498, "learning_rate": 1.3613652629743807e-05, "loss": 0.9955649375915527, "step": 510 }, { "epoch": 0.7463556851311953, "grad_norm": 0.442697137594223, "learning_rate": 1.3600454316905794e-05, "loss": 1.2189491987228394, "step": 512 }, { "epoch": 0.749271137026239, "grad_norm": 0.08219840377569199, "learning_rate": 1.3587200738262852e-05, "loss": 1.2169828414916992, "step": 514 }, { "epoch": 0.7521865889212828, "grad_norm": 0.39055153727531433, "learning_rate": 1.3573892030713581e-05, "loss": 1.1840598583221436, "step": 516 }, { "epoch": 0.7551020408163265, "grad_norm": 0.16979742050170898, "learning_rate": 1.3560528331726012e-05, "loss": 1.2608612775802612, "step": 518 }, { "epoch": 0.7580174927113703, "grad_norm": 0.18750780820846558, "learning_rate": 1.3547109779336198e-05, "loss": 1.0730546712875366, "step": 520 }, { "epoch": 0.760932944606414, "grad_norm": 0.16917291283607483, "learning_rate": 1.3533636512146778e-05, "loss": 0.8358052968978882, "step": 522 }, { "epoch": 0.7638483965014577, "grad_norm": 0.21615351736545563, "learning_rate": 1.3520108669325555e-05, "loss": 1.2778382301330566, "step": 524 }, { "epoch": 0.7667638483965015, "grad_norm": 0.2199150174856186, "learning_rate": 1.350652639060405e-05, "loss": 1.3584939241409302, "step": 526 }, { "epoch": 0.7696793002915452, "grad_norm": 0.12701602280139923, "learning_rate": 1.3492889816276057e-05, "loss": 1.2652432918548584, "step": 528 }, { "epoch": 0.7725947521865889, "grad_norm": 0.2043219953775406, "learning_rate": 1.3479199087196211e-05, "loss": 0.9363166093826294, "step": 530 }, { "epoch": 0.7755102040816326, "grad_norm": 0.25679811835289, "learning_rate": 1.3465454344778514e-05, "loss": 1.30280601978302, "step": 532 }, { "epoch": 0.7784256559766763, "grad_norm": 0.1782459169626236, "learning_rate": 1.3451655730994879e-05, "loss": 0.8852262496948242, "step": 534 }, { "epoch": 0.7813411078717201, "grad_norm": 0.15585428476333618, "learning_rate": 1.3437803388373673e-05, "loss": 1.2652050256729126, "step": 536 }, { "epoch": 0.7842565597667639, "grad_norm": 0.28724268078804016, "learning_rate": 1.3423897459998234e-05, "loss": 1.5547116994857788, "step": 538 }, { "epoch": 0.7871720116618076, "grad_norm": 0.2500779628753662, "learning_rate": 1.3409938089505396e-05, "loss": 1.2525265216827393, "step": 540 }, { "epoch": 0.7900874635568513, "grad_norm": 0.45470234751701355, "learning_rate": 1.3395925421084008e-05, "loss": 1.2771704196929932, "step": 542 }, { "epoch": 0.793002915451895, "grad_norm": 0.29030269384384155, "learning_rate": 1.3381859599473444e-05, "loss": 1.17940354347229, "step": 544 }, { "epoch": 0.7959183673469388, "grad_norm": 0.49152040481567383, "learning_rate": 1.3367740769962097e-05, "loss": 1.2586897611618042, "step": 546 }, { "epoch": 0.7988338192419825, "grad_norm": 0.6251534819602966, "learning_rate": 1.335356907838591e-05, "loss": 1.15794837474823, "step": 548 }, { "epoch": 0.8017492711370262, "grad_norm": 1.237188696861267, "learning_rate": 1.3339344671126823e-05, "loss": 1.2396069765090942, "step": 550 }, { "epoch": 0.8046647230320699, "grad_norm": 0.18844130635261536, "learning_rate": 1.3325067695111302e-05, "loss": 1.3848127126693726, "step": 552 }, { "epoch": 0.8075801749271136, "grad_norm": 0.0720212385058403, "learning_rate": 1.3310738297808797e-05, "loss": 1.2827481031417847, "step": 554 }, { "epoch": 0.8104956268221575, "grad_norm": 0.30795788764953613, "learning_rate": 1.3296356627230233e-05, "loss": 1.2539678812026978, "step": 556 }, { "epoch": 0.8134110787172012, "grad_norm": 0.12987054884433746, "learning_rate": 1.328192283192647e-05, "loss": 1.1838477849960327, "step": 558 }, { "epoch": 0.8163265306122449, "grad_norm": 0.11866369843482971, "learning_rate": 1.3267437060986776e-05, "loss": 1.2138683795928955, "step": 560 }, { "epoch": 0.8192419825072886, "grad_norm": 1.3589751720428467, "learning_rate": 1.3252899464037285e-05, "loss": 1.241382122039795, "step": 562 }, { "epoch": 0.8221574344023324, "grad_norm": 0.11315155029296875, "learning_rate": 1.3238310191239449e-05, "loss": 1.2092612981796265, "step": 564 }, { "epoch": 0.8250728862973761, "grad_norm": 0.16663309931755066, "learning_rate": 1.3223669393288492e-05, "loss": 1.3294919729232788, "step": 566 }, { "epoch": 0.8279883381924198, "grad_norm": 0.18580849468708038, "learning_rate": 1.320897722141185e-05, "loss": 1.165387749671936, "step": 568 }, { "epoch": 0.8309037900874635, "grad_norm": 0.14969834685325623, "learning_rate": 1.3194233827367605e-05, "loss": 1.1585993766784668, "step": 570 }, { "epoch": 0.8338192419825073, "grad_norm": 0.18476836383342743, "learning_rate": 1.317943936344293e-05, "loss": 1.2080127000808716, "step": 572 }, { "epoch": 0.8367346938775511, "grad_norm": 0.19693532586097717, "learning_rate": 1.3164593982452502e-05, "loss": 1.4070855379104614, "step": 574 }, { "epoch": 0.8396501457725948, "grad_norm": 0.3612503111362457, "learning_rate": 1.3149697837736932e-05, "loss": 1.375995397567749, "step": 576 }, { "epoch": 0.8425655976676385, "grad_norm": 0.2689799964427948, "learning_rate": 1.3134751083161177e-05, "loss": 1.5882023572921753, "step": 578 }, { "epoch": 0.8454810495626822, "grad_norm": 0.45044106245040894, "learning_rate": 1.3119753873112952e-05, "loss": 1.530938744544983, "step": 580 }, { "epoch": 0.8483965014577259, "grad_norm": 0.15131127834320068, "learning_rate": 1.3104706362501138e-05, "loss": 1.1275839805603027, "step": 582 }, { "epoch": 0.8513119533527697, "grad_norm": 0.12577542662620544, "learning_rate": 1.3089608706754179e-05, "loss": 1.4129434823989868, "step": 584 }, { "epoch": 0.8542274052478134, "grad_norm": 0.2110750824213028, "learning_rate": 1.3074461061818475e-05, "loss": 1.1559196710586548, "step": 586 }, { "epoch": 0.8571428571428571, "grad_norm": 0.21649499237537384, "learning_rate": 1.3059263584156778e-05, "loss": 1.3160138130187988, "step": 588 }, { "epoch": 0.8600583090379009, "grad_norm": 0.24884088337421417, "learning_rate": 1.3044016430746563e-05, "loss": 1.362827181816101, "step": 590 }, { "epoch": 0.8629737609329446, "grad_norm": 0.13489077985286713, "learning_rate": 1.3028719759078428e-05, "loss": 0.9931049942970276, "step": 592 }, { "epoch": 0.8658892128279884, "grad_norm": 0.09495119750499725, "learning_rate": 1.3013373727154437e-05, "loss": 1.088317632675171, "step": 594 }, { "epoch": 0.8688046647230321, "grad_norm": 0.08689741790294647, "learning_rate": 1.2997978493486516e-05, "loss": 1.135114312171936, "step": 596 }, { "epoch": 0.8717201166180758, "grad_norm": 0.11740924417972565, "learning_rate": 1.2982534217094805e-05, "loss": 1.1683244705200195, "step": 598 }, { "epoch": 0.8746355685131195, "grad_norm": 0.19883382320404053, "learning_rate": 1.2967041057506012e-05, "loss": 1.200365662574768, "step": 600 }, { "epoch": 0.8775510204081632, "grad_norm": 0.1676117181777954, "learning_rate": 1.2951499174751767e-05, "loss": 1.17380952835083, "step": 602 }, { "epoch": 0.880466472303207, "grad_norm": 0.10896378010511398, "learning_rate": 1.2935908729366975e-05, "loss": 1.1691476106643677, "step": 604 }, { "epoch": 0.8833819241982507, "grad_norm": 0.48385846614837646, "learning_rate": 1.2920269882388147e-05, "loss": 1.2547780275344849, "step": 606 }, { "epoch": 0.8862973760932945, "grad_norm": 0.5236583352088928, "learning_rate": 1.290458279535175e-05, "loss": 0.9720197916030884, "step": 608 }, { "epoch": 0.8892128279883382, "grad_norm": 0.14302794635295868, "learning_rate": 1.2888847630292523e-05, "loss": 0.7114431858062744, "step": 610 }, { "epoch": 0.892128279883382, "grad_norm": 0.24016736447811127, "learning_rate": 1.287306454974182e-05, "loss": 1.1511893272399902, "step": 612 }, { "epoch": 0.8950437317784257, "grad_norm": 0.23368032276630402, "learning_rate": 1.2857233716725915e-05, "loss": 1.270735740661621, "step": 614 }, { "epoch": 0.8979591836734694, "grad_norm": 0.31318148970603943, "learning_rate": 1.2841355294764332e-05, "loss": 0.9339938163757324, "step": 616 }, { "epoch": 0.9008746355685131, "grad_norm": 0.14631935954093933, "learning_rate": 1.2825429447868144e-05, "loss": 1.0888878107070923, "step": 618 }, { "epoch": 0.9037900874635568, "grad_norm": 0.05644264817237854, "learning_rate": 1.2809456340538295e-05, "loss": 0.6944148540496826, "step": 620 }, { "epoch": 0.9067055393586005, "grad_norm": 0.5780438780784607, "learning_rate": 1.2793436137763877e-05, "loss": 1.4030423164367676, "step": 622 }, { "epoch": 0.9096209912536443, "grad_norm": 0.25053542852401733, "learning_rate": 1.2777369005020443e-05, "loss": 1.366930603981018, "step": 624 }, { "epoch": 0.9125364431486881, "grad_norm": 0.668838381767273, "learning_rate": 1.2761255108268305e-05, "loss": 1.4005160331726074, "step": 626 }, { "epoch": 0.9154518950437318, "grad_norm": 0.39348724484443665, "learning_rate": 1.2745094613950798e-05, "loss": 1.3920326232910156, "step": 628 }, { "epoch": 0.9183673469387755, "grad_norm": 0.21188022196292877, "learning_rate": 1.2728887688992571e-05, "loss": 1.2693376541137695, "step": 630 }, { "epoch": 0.9212827988338192, "grad_norm": 0.13943858444690704, "learning_rate": 1.2712634500797868e-05, "loss": 1.3852614164352417, "step": 632 }, { "epoch": 0.924198250728863, "grad_norm": 0.09973420947790146, "learning_rate": 1.2696335217248797e-05, "loss": 1.0728514194488525, "step": 634 }, { "epoch": 0.9271137026239067, "grad_norm": 0.0977744311094284, "learning_rate": 1.2679990006703583e-05, "loss": 1.1080187559127808, "step": 636 }, { "epoch": 0.9300291545189504, "grad_norm": 0.09669560194015503, "learning_rate": 1.2663599037994848e-05, "loss": 1.101372480392456, "step": 638 }, { "epoch": 0.9329446064139941, "grad_norm": 0.2537369430065155, "learning_rate": 1.264716248042786e-05, "loss": 1.2607650756835938, "step": 640 }, { "epoch": 0.9358600583090378, "grad_norm": 0.10567066818475723, "learning_rate": 1.263068050377877e-05, "loss": 1.176032304763794, "step": 642 }, { "epoch": 0.9387755102040817, "grad_norm": 0.23190894722938538, "learning_rate": 1.2614153278292888e-05, "loss": 1.569797158241272, "step": 644 }, { "epoch": 0.9416909620991254, "grad_norm": 0.11260157078504562, "learning_rate": 1.259758097468289e-05, "loss": 1.124619960784912, "step": 646 }, { "epoch": 0.9446064139941691, "grad_norm": 0.10838615894317627, "learning_rate": 1.2580963764127086e-05, "loss": 1.0758150815963745, "step": 648 }, { "epoch": 0.9475218658892128, "grad_norm": 0.862457275390625, "learning_rate": 1.2564301818267634e-05, "loss": 0.809301495552063, "step": 650 }, { "epoch": 0.9504373177842566, "grad_norm": 0.13666097819805145, "learning_rate": 1.2547595309208762e-05, "loss": 1.1373188495635986, "step": 652 }, { "epoch": 0.9533527696793003, "grad_norm": 0.14616422355175018, "learning_rate": 1.2530844409515015e-05, "loss": 1.0827115774154663, "step": 654 }, { "epoch": 0.956268221574344, "grad_norm": 0.10559694468975067, "learning_rate": 1.2514049292209443e-05, "loss": 0.9751679301261902, "step": 656 }, { "epoch": 0.9591836734693877, "grad_norm": 0.08088317513465881, "learning_rate": 1.2497210130771838e-05, "loss": 1.495046854019165, "step": 658 }, { "epoch": 0.9620991253644315, "grad_norm": 0.6228170990943909, "learning_rate": 1.2480327099136921e-05, "loss": 1.2217864990234375, "step": 660 }, { "epoch": 0.9650145772594753, "grad_norm": 0.29220765829086304, "learning_rate": 1.2463400371692567e-05, "loss": 1.3038297891616821, "step": 662 }, { "epoch": 0.967930029154519, "grad_norm": 0.1476386934518814, "learning_rate": 1.2446430123277989e-05, "loss": 1.0814988613128662, "step": 664 }, { "epoch": 0.9708454810495627, "grad_norm": 0.5601685643196106, "learning_rate": 1.2429416529181928e-05, "loss": 1.3198177814483643, "step": 666 }, { "epoch": 0.9737609329446064, "grad_norm": 0.11794130504131317, "learning_rate": 1.2412359765140863e-05, "loss": 1.2900370359420776, "step": 668 }, { "epoch": 0.9766763848396501, "grad_norm": 0.1333070546388626, "learning_rate": 1.2395260007337178e-05, "loss": 1.0969475507736206, "step": 670 }, { "epoch": 0.9795918367346939, "grad_norm": 0.2164296805858612, "learning_rate": 1.2378117432397344e-05, "loss": 1.3217947483062744, "step": 672 }, { "epoch": 0.9825072886297376, "grad_norm": 0.1207147017121315, "learning_rate": 1.2360932217390101e-05, "loss": 1.1721763610839844, "step": 674 }, { "epoch": 0.9854227405247813, "grad_norm": 0.19854536652565002, "learning_rate": 1.2343704539824629e-05, "loss": 0.8384242057800293, "step": 676 }, { "epoch": 0.9883381924198251, "grad_norm": 0.11634889990091324, "learning_rate": 1.2326434577648703e-05, "loss": 0.5937544107437134, "step": 678 }, { "epoch": 0.9912536443148688, "grad_norm": 0.21319809556007385, "learning_rate": 1.2309122509246873e-05, "loss": 1.211629033088684, "step": 680 }, { "epoch": 0.9941690962099126, "grad_norm": 0.0654364675283432, "learning_rate": 1.2291768513438603e-05, "loss": 1.155535340309143, "step": 682 }, { "epoch": 0.9970845481049563, "grad_norm": 0.25669339299201965, "learning_rate": 1.2274372769476438e-05, "loss": 1.164899230003357, "step": 684 }, { "epoch": 1.0, "grad_norm": 0.12079296261072159, "learning_rate": 1.2256935457044149e-05, "loss": 1.3323872089385986, "step": 686 }, { "epoch": 1.0029154518950438, "grad_norm": 0.15898126363754272, "learning_rate": 1.223945675625487e-05, "loss": 0.9407209753990173, "step": 688 }, { "epoch": 1.0058309037900874, "grad_norm": 0.27969345450401306, "learning_rate": 1.2221936847649244e-05, "loss": 1.1378577947616577, "step": 690 }, { "epoch": 1.0087463556851313, "grad_norm": 0.25754043459892273, "learning_rate": 1.220437591219356e-05, "loss": 1.4397190809249878, "step": 692 }, { "epoch": 1.0116618075801749, "grad_norm": 0.10848913341760635, "learning_rate": 1.2186774131277878e-05, "loss": 1.1280958652496338, "step": 694 }, { "epoch": 1.0145772594752187, "grad_norm": 0.1306256800889969, "learning_rate": 1.2169131686714156e-05, "loss": 1.099426031112671, "step": 696 }, { "epoch": 1.0174927113702623, "grad_norm": 0.4202571511268616, "learning_rate": 1.2151448760734381e-05, "loss": 1.1389104127883911, "step": 698 }, { "epoch": 1.0204081632653061, "grad_norm": 0.24799339473247528, "learning_rate": 1.2133725535988675e-05, "loss": 1.1550320386886597, "step": 700 }, { "epoch": 1.0233236151603498, "grad_norm": 0.3226027190685272, "learning_rate": 1.211596219554341e-05, "loss": 1.3826884031295776, "step": 702 }, { "epoch": 1.0262390670553936, "grad_norm": 0.16781915724277496, "learning_rate": 1.209815892287933e-05, "loss": 1.2842170000076294, "step": 704 }, { "epoch": 1.0291545189504374, "grad_norm": 0.08502925932407379, "learning_rate": 1.2080315901889638e-05, "loss": 1.3487895727157593, "step": 706 }, { "epoch": 1.032069970845481, "grad_norm": 0.16372652351856232, "learning_rate": 1.2062433316878107e-05, "loss": 1.0846039056777954, "step": 708 }, { "epoch": 1.0349854227405249, "grad_norm": 0.2926742434501648, "learning_rate": 1.204451135255717e-05, "loss": 1.3418132066726685, "step": 710 }, { "epoch": 1.0379008746355685, "grad_norm": 0.13081398606300354, "learning_rate": 1.2026550194046027e-05, "loss": 1.2699744701385498, "step": 712 }, { "epoch": 1.0408163265306123, "grad_norm": 0.3602919578552246, "learning_rate": 1.2008550026868707e-05, "loss": 1.1103326082229614, "step": 714 }, { "epoch": 1.043731778425656, "grad_norm": 0.24668650329113007, "learning_rate": 1.1990511036952182e-05, "loss": 1.1811496019363403, "step": 716 }, { "epoch": 1.0466472303206997, "grad_norm": 0.2009333372116089, "learning_rate": 1.1972433410624415e-05, "loss": 1.3141359090805054, "step": 718 }, { "epoch": 1.0495626822157433, "grad_norm": 0.4131545126438141, "learning_rate": 1.1954317334612466e-05, "loss": 1.1311266422271729, "step": 720 }, { "epoch": 1.0524781341107872, "grad_norm": 0.26808369159698486, "learning_rate": 1.193616299604054e-05, "loss": 1.2641208171844482, "step": 722 }, { "epoch": 1.055393586005831, "grad_norm": 0.18929173052310944, "learning_rate": 1.1917970582428065e-05, "loss": 1.022256851196289, "step": 724 }, { "epoch": 1.0583090379008746, "grad_norm": 0.07950548082590103, "learning_rate": 1.1899740281687752e-05, "loss": 1.1594070196151733, "step": 726 }, { "epoch": 1.0612244897959184, "grad_norm": 0.3975690007209778, "learning_rate": 1.1881472282123659e-05, "loss": 1.09200918674469, "step": 728 }, { "epoch": 1.064139941690962, "grad_norm": 0.1322367936372757, "learning_rate": 1.1863166772429237e-05, "loss": 1.144595980644226, "step": 730 }, { "epoch": 1.0670553935860059, "grad_norm": 0.13084831833839417, "learning_rate": 1.1844823941685388e-05, "loss": 1.233044981956482, "step": 732 }, { "epoch": 1.0699708454810495, "grad_norm": 0.17538310587406158, "learning_rate": 1.1826443979358511e-05, "loss": 0.648325502872467, "step": 734 }, { "epoch": 1.0728862973760933, "grad_norm": 0.1613551825284958, "learning_rate": 1.1808027075298542e-05, "loss": 1.339321255683899, "step": 736 }, { "epoch": 1.075801749271137, "grad_norm": 0.062147416174411774, "learning_rate": 1.1789573419736995e-05, "loss": 1.0158833265304565, "step": 738 }, { "epoch": 1.0787172011661808, "grad_norm": 0.2725241184234619, "learning_rate": 1.1771083203284994e-05, "loss": 1.049664855003357, "step": 740 }, { "epoch": 1.0816326530612246, "grad_norm": 0.14118708670139313, "learning_rate": 1.1752556616931319e-05, "loss": 1.4558746814727783, "step": 742 }, { "epoch": 1.0845481049562682, "grad_norm": 0.12485146522521973, "learning_rate": 1.17339938520404e-05, "loss": 1.067897081375122, "step": 744 }, { "epoch": 1.087463556851312, "grad_norm": 0.14729249477386475, "learning_rate": 1.1715395100350386e-05, "loss": 1.2803950309753418, "step": 746 }, { "epoch": 1.0903790087463556, "grad_norm": 0.2967908978462219, "learning_rate": 1.1696760553971122e-05, "loss": 1.4100807905197144, "step": 748 }, { "epoch": 1.0932944606413995, "grad_norm": 0.18390890955924988, "learning_rate": 1.1678090405382191e-05, "loss": 1.0381572246551514, "step": 750 }, { "epoch": 1.096209912536443, "grad_norm": 0.08851258456707001, "learning_rate": 1.1659384847430916e-05, "loss": 1.2206934690475464, "step": 752 }, { "epoch": 1.099125364431487, "grad_norm": 0.1275774985551834, "learning_rate": 1.1640644073330365e-05, "loss": 1.258091688156128, "step": 754 }, { "epoch": 1.1020408163265305, "grad_norm": 0.3569571077823639, "learning_rate": 1.1621868276657371e-05, "loss": 1.2325845956802368, "step": 756 }, { "epoch": 1.1049562682215743, "grad_norm": 0.2721734642982483, "learning_rate": 1.1603057651350508e-05, "loss": 1.0642601251602173, "step": 758 }, { "epoch": 1.1078717201166182, "grad_norm": 0.2617255449295044, "learning_rate": 1.158421239170811e-05, "loss": 1.3023701906204224, "step": 760 }, { "epoch": 1.1107871720116618, "grad_norm": 0.1031145453453064, "learning_rate": 1.156533269238626e-05, "loss": 0.8144070506095886, "step": 762 }, { "epoch": 1.1137026239067056, "grad_norm": 0.1646541804075241, "learning_rate": 1.1546418748396758e-05, "loss": 1.0213180780410767, "step": 764 }, { "epoch": 1.1166180758017492, "grad_norm": 0.3250854015350342, "learning_rate": 1.1527470755105138e-05, "loss": 0.9498108625411987, "step": 766 }, { "epoch": 1.119533527696793, "grad_norm": 0.10029526799917221, "learning_rate": 1.1508488908228629e-05, "loss": 1.1771409511566162, "step": 768 }, { "epoch": 1.1224489795918366, "grad_norm": 0.09416939318180084, "learning_rate": 1.1489473403834142e-05, "loss": 0.5949094891548157, "step": 770 }, { "epoch": 1.1253644314868805, "grad_norm": 0.20775017142295837, "learning_rate": 1.1470424438336244e-05, "loss": 0.8676192760467529, "step": 772 }, { "epoch": 1.128279883381924, "grad_norm": 0.24049599468708038, "learning_rate": 1.145134220849512e-05, "loss": 1.1979655027389526, "step": 774 }, { "epoch": 1.131195335276968, "grad_norm": 0.320576548576355, "learning_rate": 1.1432226911414561e-05, "loss": 1.150422215461731, "step": 776 }, { "epoch": 1.1341107871720117, "grad_norm": 0.08741223067045212, "learning_rate": 1.1413078744539906e-05, "loss": 1.1655181646347046, "step": 778 }, { "epoch": 1.1370262390670554, "grad_norm": 0.13662189245224, "learning_rate": 1.139389790565601e-05, "loss": 1.1560207605361938, "step": 780 }, { "epoch": 1.1399416909620992, "grad_norm": 0.1589939296245575, "learning_rate": 1.1374684592885214e-05, "loss": 1.3467984199523926, "step": 782 }, { "epoch": 1.1428571428571428, "grad_norm": 0.29279693961143494, "learning_rate": 1.1355439004685278e-05, "loss": 1.0917768478393555, "step": 784 }, { "epoch": 1.1457725947521866, "grad_norm": 0.5396981835365295, "learning_rate": 1.1336161339847343e-05, "loss": 1.131831169128418, "step": 786 }, { "epoch": 1.1486880466472302, "grad_norm": 1.319527506828308, "learning_rate": 1.1316851797493877e-05, "loss": 1.287348747253418, "step": 788 }, { "epoch": 1.151603498542274, "grad_norm": 0.24090451002120972, "learning_rate": 1.1297510577076617e-05, "loss": 1.196481466293335, "step": 790 }, { "epoch": 1.1545189504373177, "grad_norm": 0.15632812678813934, "learning_rate": 1.1278137878374507e-05, "loss": 1.2842094898223877, "step": 792 }, { "epoch": 1.1574344023323615, "grad_norm": 0.1558282971382141, "learning_rate": 1.1258733901491634e-05, "loss": 1.160306453704834, "step": 794 }, { "epoch": 1.1603498542274053, "grad_norm": 0.0693809762597084, "learning_rate": 1.1239298846855166e-05, "loss": 1.3671103715896606, "step": 796 }, { "epoch": 1.163265306122449, "grad_norm": 0.11606906354427338, "learning_rate": 1.121983291521328e-05, "loss": 1.2540158033370972, "step": 798 }, { "epoch": 1.1661807580174928, "grad_norm": 0.5656346082687378, "learning_rate": 1.1200336307633083e-05, "loss": 1.095619797706604, "step": 800 }, { "epoch": 1.1690962099125364, "grad_norm": 0.3416520953178406, "learning_rate": 1.1180809225498542e-05, "loss": 1.33209228515625, "step": 802 }, { "epoch": 1.1720116618075802, "grad_norm": 0.14092491567134857, "learning_rate": 1.11612518705084e-05, "loss": 1.121877670288086, "step": 804 }, { "epoch": 1.1749271137026238, "grad_norm": 0.26185205578804016, "learning_rate": 1.1141664444674091e-05, "loss": 1.3565205335617065, "step": 806 }, { "epoch": 1.1778425655976676, "grad_norm": 0.15331599116325378, "learning_rate": 1.1122047150317665e-05, "loss": 0.7860437631607056, "step": 808 }, { "epoch": 1.1807580174927113, "grad_norm": 0.25274330377578735, "learning_rate": 1.110240019006968e-05, "loss": 0.7633789777755737, "step": 810 }, { "epoch": 1.183673469387755, "grad_norm": 0.1963554322719574, "learning_rate": 1.1082723766867123e-05, "loss": 1.133277177810669, "step": 812 }, { "epoch": 1.186588921282799, "grad_norm": 0.33926016092300415, "learning_rate": 1.1063018083951309e-05, "loss": 1.0211750268936157, "step": 814 }, { "epoch": 1.1895043731778425, "grad_norm": 0.23344306647777557, "learning_rate": 1.1043283344865776e-05, "loss": 1.1373283863067627, "step": 816 }, { "epoch": 1.1924198250728864, "grad_norm": 0.2557908594608307, "learning_rate": 1.1023519753454203e-05, "loss": 0.9404536485671997, "step": 818 }, { "epoch": 1.19533527696793, "grad_norm": 1.4168596267700195, "learning_rate": 1.1003727513858268e-05, "loss": 1.1765224933624268, "step": 820 }, { "epoch": 1.1982507288629738, "grad_norm": 0.13063687086105347, "learning_rate": 1.0983906830515584e-05, "loss": 1.222176432609558, "step": 822 }, { "epoch": 1.2011661807580174, "grad_norm": 0.07739931344985962, "learning_rate": 1.0964057908157548e-05, "loss": 1.151648998260498, "step": 824 }, { "epoch": 1.2040816326530612, "grad_norm": 0.07822076976299286, "learning_rate": 1.094418095180725e-05, "loss": 1.061394453048706, "step": 826 }, { "epoch": 1.2069970845481048, "grad_norm": 0.14568239450454712, "learning_rate": 1.0924276166777349e-05, "loss": 0.7191852927207947, "step": 828 }, { "epoch": 1.2099125364431487, "grad_norm": 0.30981016159057617, "learning_rate": 1.090434375866795e-05, "loss": 0.9558042287826538, "step": 830 }, { "epoch": 1.2128279883381925, "grad_norm": 0.2437950074672699, "learning_rate": 1.0884383933364477e-05, "loss": 1.1506716012954712, "step": 832 }, { "epoch": 1.215743440233236, "grad_norm": 0.24170175194740295, "learning_rate": 1.0864396897035558e-05, "loss": 1.1895190477371216, "step": 834 }, { "epoch": 1.21865889212828, "grad_norm": 0.1518929898738861, "learning_rate": 1.0844382856130886e-05, "loss": 1.2491060495376587, "step": 836 }, { "epoch": 1.2215743440233235, "grad_norm": 0.14055992662906647, "learning_rate": 1.0824342017379089e-05, "loss": 1.4196858406066895, "step": 838 }, { "epoch": 1.2244897959183674, "grad_norm": 0.18487177789211273, "learning_rate": 1.0804274587785595e-05, "loss": 1.0294526815414429, "step": 840 }, { "epoch": 1.227405247813411, "grad_norm": 0.6372827887535095, "learning_rate": 1.0784180774630495e-05, "loss": 0.26844465732574463, "step": 842 }, { "epoch": 1.2303206997084548, "grad_norm": 0.15034730732440948, "learning_rate": 1.0764060785466391e-05, "loss": 1.2424967288970947, "step": 844 }, { "epoch": 1.2332361516034984, "grad_norm": 0.16668657958507538, "learning_rate": 1.0743914828116281e-05, "loss": 1.0989577770233154, "step": 846 }, { "epoch": 1.2361516034985423, "grad_norm": 0.15799511969089508, "learning_rate": 1.0723743110671378e-05, "loss": 1.2244020700454712, "step": 848 }, { "epoch": 1.239067055393586, "grad_norm": 0.09745261073112488, "learning_rate": 1.0703545841488974e-05, "loss": 1.1401562690734863, "step": 850 }, { "epoch": 1.2419825072886297, "grad_norm": 0.5921195149421692, "learning_rate": 1.06833232291903e-05, "loss": 0.7718449234962463, "step": 852 }, { "epoch": 1.2448979591836735, "grad_norm": 0.08858446776866913, "learning_rate": 1.0663075482658355e-05, "loss": 1.074745774269104, "step": 854 }, { "epoch": 1.2478134110787171, "grad_norm": 0.22339816391468048, "learning_rate": 1.0642802811035753e-05, "loss": 0.6682339310646057, "step": 856 }, { "epoch": 1.250728862973761, "grad_norm": 0.22134488821029663, "learning_rate": 1.0622505423722566e-05, "loss": 1.1483386754989624, "step": 858 }, { "epoch": 1.2536443148688048, "grad_norm": 0.34351247549057007, "learning_rate": 1.0602183530374159e-05, "loss": 0.9953691959381104, "step": 860 }, { "epoch": 1.2565597667638484, "grad_norm": 0.1252131313085556, "learning_rate": 1.0581837340899022e-05, "loss": 1.152267575263977, "step": 862 }, { "epoch": 1.259475218658892, "grad_norm": 0.10258015990257263, "learning_rate": 1.0561467065456607e-05, "loss": 1.0798017978668213, "step": 864 }, { "epoch": 1.2623906705539358, "grad_norm": 0.3338652551174164, "learning_rate": 1.0541072914455152e-05, "loss": 0.6286276578903198, "step": 866 }, { "epoch": 1.2653061224489797, "grad_norm": 0.18449436128139496, "learning_rate": 1.0520655098549508e-05, "loss": 1.1572736501693726, "step": 868 }, { "epoch": 1.2682215743440233, "grad_norm": 0.1656051129102707, "learning_rate": 1.0500213828638972e-05, "loss": 1.2729966640472412, "step": 870 }, { "epoch": 1.271137026239067, "grad_norm": 0.1694529801607132, "learning_rate": 1.0479749315865093e-05, "loss": 1.1974416971206665, "step": 872 }, { "epoch": 1.2740524781341107, "grad_norm": 0.07350558042526245, "learning_rate": 1.045926177160951e-05, "loss": 1.127896785736084, "step": 874 }, { "epoch": 1.2769679300291545, "grad_norm": 0.1753559112548828, "learning_rate": 1.0438751407491745e-05, "loss": 1.1373307704925537, "step": 876 }, { "epoch": 1.2798833819241984, "grad_norm": 0.16192442178726196, "learning_rate": 1.0418218435367043e-05, "loss": 1.0873537063598633, "step": 878 }, { "epoch": 1.282798833819242, "grad_norm": 0.2647189497947693, "learning_rate": 1.0397663067324163e-05, "loss": 0.8994747400283813, "step": 880 }, { "epoch": 1.2857142857142856, "grad_norm": 0.16055135428905487, "learning_rate": 1.03770855156832e-05, "loss": 1.1629761457443237, "step": 882 }, { "epoch": 1.2886297376093294, "grad_norm": 0.1312457174062729, "learning_rate": 1.0356485992993386e-05, "loss": 1.2289665937423706, "step": 884 }, { "epoch": 1.2915451895043732, "grad_norm": 0.3237832486629486, "learning_rate": 1.0335864712030895e-05, "loss": 1.3477158546447754, "step": 886 }, { "epoch": 1.2944606413994169, "grad_norm": 0.11200102418661118, "learning_rate": 1.0315221885796648e-05, "loss": 1.1597537994384766, "step": 888 }, { "epoch": 1.2973760932944607, "grad_norm": 0.1582571268081665, "learning_rate": 1.029455772751411e-05, "loss": 1.0584282875061035, "step": 890 }, { "epoch": 1.3002915451895043, "grad_norm": 0.2713635563850403, "learning_rate": 1.0273872450627086e-05, "loss": 1.065276026725769, "step": 892 }, { "epoch": 1.3032069970845481, "grad_norm": 0.617933988571167, "learning_rate": 1.025316626879752e-05, "loss": 1.1870301961898804, "step": 894 }, { "epoch": 1.306122448979592, "grad_norm": 0.24628496170043945, "learning_rate": 1.0232439395903295e-05, "loss": 1.3716992139816284, "step": 896 }, { "epoch": 1.3090379008746356, "grad_norm": 0.07092081010341644, "learning_rate": 1.0211692046036002e-05, "loss": 1.2022879123687744, "step": 898 }, { "epoch": 1.3119533527696792, "grad_norm": 0.07380987703800201, "learning_rate": 1.019092443349875e-05, "loss": 0.9747592806816101, "step": 900 }, { "epoch": 1.314868804664723, "grad_norm": 0.07589751482009888, "learning_rate": 1.0170136772803948e-05, "loss": 1.033135175704956, "step": 902 }, { "epoch": 1.3177842565597668, "grad_norm": 0.12000124901533127, "learning_rate": 1.0149329278671082e-05, "loss": 1.1944102048873901, "step": 904 }, { "epoch": 1.3206997084548104, "grad_norm": 0.24365442991256714, "learning_rate": 1.0128502166024497e-05, "loss": 0.7611994743347168, "step": 906 }, { "epoch": 1.3236151603498543, "grad_norm": 0.5757351517677307, "learning_rate": 1.0107655649991186e-05, "loss": 1.0334023237228394, "step": 908 }, { "epoch": 1.3265306122448979, "grad_norm": 0.09015009552240372, "learning_rate": 1.0086789945898568e-05, "loss": 1.1387327909469604, "step": 910 }, { "epoch": 1.3294460641399417, "grad_norm": 0.6966755390167236, "learning_rate": 1.0065905269272245e-05, "loss": 1.0652743577957153, "step": 912 }, { "epoch": 1.3323615160349855, "grad_norm": 0.08158166706562042, "learning_rate": 1.0045001835833804e-05, "loss": 1.154505968093872, "step": 914 }, { "epoch": 1.3352769679300291, "grad_norm": 0.17343761026859283, "learning_rate": 1.0024079861498566e-05, "loss": 1.0197257995605469, "step": 916 }, { "epoch": 1.3381924198250728, "grad_norm": 0.3027811050415039, "learning_rate": 1.0003139562373365e-05, "loss": 1.3120397329330444, "step": 918 }, { "epoch": 1.3411078717201166, "grad_norm": 0.7201161980628967, "learning_rate": 9.982181154754323e-06, "loss": 0.6248821020126343, "step": 920 }, { "epoch": 1.3440233236151604, "grad_norm": 0.06654369831085205, "learning_rate": 9.961204855124595e-06, "loss": 1.3484827280044556, "step": 922 }, { "epoch": 1.346938775510204, "grad_norm": 0.3403482437133789, "learning_rate": 9.940210880152157e-06, "loss": 1.023748517036438, "step": 924 }, { "epoch": 1.3498542274052479, "grad_norm": 0.3134101629257202, "learning_rate": 9.91919944668755e-06, "loss": 1.462807536125183, "step": 926 }, { "epoch": 1.3527696793002915, "grad_norm": 0.12223192304372787, "learning_rate": 9.89817077176165e-06, "loss": 1.0908539295196533, "step": 928 }, { "epoch": 1.3556851311953353, "grad_norm": 0.14625874161720276, "learning_rate": 9.877125072583421e-06, "loss": 1.2502838373184204, "step": 930 }, { "epoch": 1.3586005830903791, "grad_norm": 0.2647968828678131, "learning_rate": 9.856062566537677e-06, "loss": 1.3731303215026855, "step": 932 }, { "epoch": 1.3615160349854227, "grad_norm": 0.14242695271968842, "learning_rate": 9.834983471182831e-06, "loss": 1.0232398509979248, "step": 934 }, { "epoch": 1.3644314868804663, "grad_norm": 0.22755105793476105, "learning_rate": 9.813888004248648e-06, "loss": 1.1105183362960815, "step": 936 }, { "epoch": 1.3673469387755102, "grad_norm": 0.10210377722978592, "learning_rate": 9.792776383634002e-06, "loss": 0.9822967648506165, "step": 938 }, { "epoch": 1.370262390670554, "grad_norm": 0.2081102728843689, "learning_rate": 9.771648827404617e-06, "loss": 0.6831743121147156, "step": 940 }, { "epoch": 1.3731778425655976, "grad_norm": 0.195752814412117, "learning_rate": 9.750505553790823e-06, "loss": 1.017356514930725, "step": 942 }, { "epoch": 1.3760932944606414, "grad_norm": 0.149446040391922, "learning_rate": 9.729346781185295e-06, "loss": 1.2844679355621338, "step": 944 }, { "epoch": 1.379008746355685, "grad_norm": 0.08231537789106369, "learning_rate": 9.708172728140804e-06, "loss": 1.2107067108154297, "step": 946 }, { "epoch": 1.3819241982507289, "grad_norm": 0.1436920166015625, "learning_rate": 9.686983613367947e-06, "loss": 0.9730831384658813, "step": 948 }, { "epoch": 1.3848396501457727, "grad_norm": 0.13865897059440613, "learning_rate": 9.665779655732905e-06, "loss": 1.134727954864502, "step": 950 }, { "epoch": 1.3877551020408163, "grad_norm": 0.1278238445520401, "learning_rate": 9.644561074255168e-06, "loss": 1.1596717834472656, "step": 952 }, { "epoch": 1.39067055393586, "grad_norm": 0.13528533279895782, "learning_rate": 9.62332808810528e-06, "loss": 1.0845617055892944, "step": 954 }, { "epoch": 1.3935860058309038, "grad_norm": 0.14649415016174316, "learning_rate": 9.602080916602573e-06, "loss": 1.223073124885559, "step": 956 }, { "epoch": 1.3965014577259476, "grad_norm": 0.1999201625585556, "learning_rate": 9.580819779212905e-06, "loss": 1.0572779178619385, "step": 958 }, { "epoch": 1.3994169096209912, "grad_norm": 0.42912936210632324, "learning_rate": 9.559544895546393e-06, "loss": 1.211446762084961, "step": 960 }, { "epoch": 1.402332361516035, "grad_norm": 0.3703382611274719, "learning_rate": 9.538256485355125e-06, "loss": 1.1024117469787598, "step": 962 }, { "epoch": 1.4052478134110786, "grad_norm": 0.09566738456487656, "learning_rate": 9.516954768530924e-06, "loss": 1.0713633298873901, "step": 964 }, { "epoch": 1.4081632653061225, "grad_norm": 0.13610726594924927, "learning_rate": 9.49563996510306e-06, "loss": 1.2085410356521606, "step": 966 }, { "epoch": 1.4110787172011663, "grad_norm": 0.19745762646198273, "learning_rate": 9.47431229523596e-06, "loss": 1.0144951343536377, "step": 968 }, { "epoch": 1.41399416909621, "grad_norm": 0.41680532693862915, "learning_rate": 9.452971979226972e-06, "loss": 1.0802420377731323, "step": 970 }, { "epoch": 1.4169096209912537, "grad_norm": 0.18726322054862976, "learning_rate": 9.431619237504052e-06, "loss": 1.2159126996994019, "step": 972 }, { "epoch": 1.4198250728862973, "grad_norm": 0.4570455551147461, "learning_rate": 9.410254290623512e-06, "loss": 1.1028673648834229, "step": 974 }, { "epoch": 1.4227405247813412, "grad_norm": 0.1720321923494339, "learning_rate": 9.388877359267732e-06, "loss": 1.053758978843689, "step": 976 }, { "epoch": 1.4256559766763848, "grad_norm": 0.7719082832336426, "learning_rate": 9.367488664242878e-06, "loss": 1.0918673276901245, "step": 978 }, { "epoch": 1.4285714285714286, "grad_norm": 0.11719834804534912, "learning_rate": 9.346088426476627e-06, "loss": 1.1107982397079468, "step": 980 }, { "epoch": 1.4314868804664722, "grad_norm": 0.26357176899909973, "learning_rate": 9.32467686701589e-06, "loss": 1.3265354633331299, "step": 982 }, { "epoch": 1.434402332361516, "grad_norm": 0.7194681167602539, "learning_rate": 9.303254207024509e-06, "loss": 0.6845600605010986, "step": 984 }, { "epoch": 1.4373177842565599, "grad_norm": 0.19328005611896515, "learning_rate": 9.28182066778099e-06, "loss": 1.1066367626190186, "step": 986 }, { "epoch": 1.4402332361516035, "grad_norm": 0.3166584372520447, "learning_rate": 9.260376470676225e-06, "loss": 1.0711687803268433, "step": 988 }, { "epoch": 1.4431486880466473, "grad_norm": 0.20059515535831451, "learning_rate": 9.238921837211175e-06, "loss": 1.2519899606704712, "step": 990 }, { "epoch": 1.446064139941691, "grad_norm": 0.15826623141765594, "learning_rate": 9.217456988994608e-06, "loss": 1.3235565423965454, "step": 992 }, { "epoch": 1.4489795918367347, "grad_norm": 0.19210676848888397, "learning_rate": 9.1959821477408e-06, "loss": 1.0224212408065796, "step": 994 }, { "epoch": 1.4518950437317784, "grad_norm": 0.26280826330184937, "learning_rate": 9.174497535267257e-06, "loss": 1.1540876626968384, "step": 996 }, { "epoch": 1.4548104956268222, "grad_norm": 0.09911534935235977, "learning_rate": 9.153003373492395e-06, "loss": 1.197079062461853, "step": 998 }, { "epoch": 1.4577259475218658, "grad_norm": 0.15191975235939026, "learning_rate": 9.131499884433285e-06, "loss": 1.2020612955093384, "step": 1000 }, { "epoch": 1.4606413994169096, "grad_norm": 0.1272922158241272, "learning_rate": 9.109987290203325e-06, "loss": 1.1222330331802368, "step": 1002 }, { "epoch": 1.4635568513119535, "grad_norm": 0.17026354372501373, "learning_rate": 9.088465813009979e-06, "loss": 1.2111908197402954, "step": 1004 }, { "epoch": 1.466472303206997, "grad_norm": 0.1192101240158081, "learning_rate": 9.06693567515245e-06, "loss": 1.186848759651184, "step": 1006 }, { "epoch": 1.469387755102041, "grad_norm": 0.5374306440353394, "learning_rate": 9.045397099019405e-06, "loss": 1.1735105514526367, "step": 1008 }, { "epoch": 1.4723032069970845, "grad_norm": 0.14989781379699707, "learning_rate": 9.02385030708667e-06, "loss": 1.3269665241241455, "step": 1010 }, { "epoch": 1.4752186588921283, "grad_norm": 0.23181524872779846, "learning_rate": 9.002295521914934e-06, "loss": 1.234397292137146, "step": 1012 }, { "epoch": 1.478134110787172, "grad_norm": 0.8318726420402527, "learning_rate": 8.980732966147451e-06, "loss": 1.2126901149749756, "step": 1014 }, { "epoch": 1.4810495626822158, "grad_norm": 0.2093929797410965, "learning_rate": 8.959162862507738e-06, "loss": 1.0737382173538208, "step": 1016 }, { "epoch": 1.4839650145772594, "grad_norm": 0.2963290214538574, "learning_rate": 8.937585433797273e-06, "loss": 0.9138633012771606, "step": 1018 }, { "epoch": 1.4868804664723032, "grad_norm": 0.2868603467941284, "learning_rate": 8.916000902893199e-06, "loss": 1.3595247268676758, "step": 1020 }, { "epoch": 1.489795918367347, "grad_norm": 0.11513882875442505, "learning_rate": 8.894409492746018e-06, "loss": 1.0969007015228271, "step": 1022 }, { "epoch": 1.4927113702623906, "grad_norm": 0.15273737907409668, "learning_rate": 8.87281142637729e-06, "loss": 1.0396068096160889, "step": 1024 }, { "epoch": 1.4956268221574345, "grad_norm": 0.12743119895458221, "learning_rate": 8.851206926877325e-06, "loss": 1.21293306350708, "step": 1026 }, { "epoch": 1.498542274052478, "grad_norm": 0.07293698191642761, "learning_rate": 8.82959621740288e-06, "loss": 0.8554050922393799, "step": 1028 }, { "epoch": 1.501457725947522, "grad_norm": 0.1396367996931076, "learning_rate": 8.807979521174866e-06, "loss": 0.8444166779518127, "step": 1030 }, { "epoch": 1.5043731778425657, "grad_norm": 0.34662795066833496, "learning_rate": 8.786357061476029e-06, "loss": 1.1405446529388428, "step": 1032 }, { "epoch": 1.5072886297376094, "grad_norm": 0.2602401673793793, "learning_rate": 8.764729061648632e-06, "loss": 1.2988492250442505, "step": 1034 }, { "epoch": 1.510204081632653, "grad_norm": 0.19908583164215088, "learning_rate": 8.743095745092185e-06, "loss": 1.2301197052001953, "step": 1036 }, { "epoch": 1.5131195335276968, "grad_norm": 0.20294634997844696, "learning_rate": 8.721457335261104e-06, "loss": 0.9326356053352356, "step": 1038 }, { "epoch": 1.5160349854227406, "grad_norm": 0.5687612295150757, "learning_rate": 8.699814055662417e-06, "loss": 1.187393069267273, "step": 1040 }, { "epoch": 1.5189504373177842, "grad_norm": 0.27902352809906006, "learning_rate": 8.678166129853442e-06, "loss": 1.0565565824508667, "step": 1042 }, { "epoch": 1.5218658892128278, "grad_norm": 0.06307139247655869, "learning_rate": 8.656513781439512e-06, "loss": 1.0471357107162476, "step": 1044 }, { "epoch": 1.5247813411078717, "grad_norm": 0.3132034242153168, "learning_rate": 8.634857234071619e-06, "loss": 1.3265520334243774, "step": 1046 }, { "epoch": 1.5276967930029155, "grad_norm": 0.25837764143943787, "learning_rate": 8.613196711444138e-06, "loss": 1.1429646015167236, "step": 1048 }, { "epoch": 1.5306122448979593, "grad_norm": 0.08677840977907181, "learning_rate": 8.591532437292502e-06, "loss": 0.9910908937454224, "step": 1050 }, { "epoch": 1.533527696793003, "grad_norm": 0.283247172832489, "learning_rate": 8.5698646353909e-06, "loss": 0.8875013589859009, "step": 1052 }, { "epoch": 1.5364431486880465, "grad_norm": 0.16179129481315613, "learning_rate": 8.548193529549947e-06, "loss": 1.1073272228240967, "step": 1054 }, { "epoch": 1.5393586005830904, "grad_norm": 0.12490551173686981, "learning_rate": 8.526519343614398e-06, "loss": 0.9769071340560913, "step": 1056 }, { "epoch": 1.5422740524781342, "grad_norm": 0.25089073181152344, "learning_rate": 8.504842301460815e-06, "loss": 1.069384217262268, "step": 1058 }, { "epoch": 1.5451895043731778, "grad_norm": 0.22324740886688232, "learning_rate": 8.483162626995268e-06, "loss": 1.0800434350967407, "step": 1060 }, { "epoch": 1.5481049562682214, "grad_norm": 0.358711302280426, "learning_rate": 8.461480544151012e-06, "loss": 0.8311281204223633, "step": 1062 }, { "epoch": 1.5510204081632653, "grad_norm": 0.35619816184043884, "learning_rate": 8.439796276886177e-06, "loss": 1.378959059715271, "step": 1064 }, { "epoch": 1.553935860058309, "grad_norm": 0.07740774750709534, "learning_rate": 8.418110049181464e-06, "loss": 0.7135167121887207, "step": 1066 }, { "epoch": 1.556851311953353, "grad_norm": 0.11709576100111008, "learning_rate": 8.396422085037822e-06, "loss": 1.1297550201416016, "step": 1068 }, { "epoch": 1.5597667638483965, "grad_norm": 0.1865878850221634, "learning_rate": 8.374732608474128e-06, "loss": 1.1906490325927734, "step": 1070 }, { "epoch": 1.5626822157434401, "grad_norm": 0.16431988775730133, "learning_rate": 8.353041843524886e-06, "loss": 1.1722774505615234, "step": 1072 }, { "epoch": 1.565597667638484, "grad_norm": 0.36135971546173096, "learning_rate": 8.331350014237912e-06, "loss": 1.1067001819610596, "step": 1074 }, { "epoch": 1.5685131195335278, "grad_norm": 0.3832073211669922, "learning_rate": 8.30965734467201e-06, "loss": 1.2439948320388794, "step": 1076 }, { "epoch": 1.5714285714285714, "grad_norm": 0.2755753993988037, "learning_rate": 8.28796405889466e-06, "loss": 0.6848400831222534, "step": 1078 }, { "epoch": 1.574344023323615, "grad_norm": 0.07128661125898361, "learning_rate": 8.266270380979723e-06, "loss": 1.2033002376556396, "step": 1080 }, { "epoch": 1.5772594752186588, "grad_norm": 0.16955770552158356, "learning_rate": 8.244576535005093e-06, "loss": 1.2546216249465942, "step": 1082 }, { "epoch": 1.5801749271137027, "grad_norm": 0.702198326587677, "learning_rate": 8.22288274505041e-06, "loss": 1.0031241178512573, "step": 1084 }, { "epoch": 1.5830903790087465, "grad_norm": 0.09851932525634766, "learning_rate": 8.201189235194729e-06, "loss": 1.171536922454834, "step": 1086 }, { "epoch": 1.58600583090379, "grad_norm": 0.5338625907897949, "learning_rate": 8.179496229514217e-06, "loss": 1.0307410955429077, "step": 1088 }, { "epoch": 1.5889212827988337, "grad_norm": 0.17403900623321533, "learning_rate": 8.157803952079832e-06, "loss": 1.2256954908370972, "step": 1090 }, { "epoch": 1.5918367346938775, "grad_norm": 0.1747167557477951, "learning_rate": 8.136112626955005e-06, "loss": 1.2137948274612427, "step": 1092 }, { "epoch": 1.5947521865889214, "grad_norm": 0.07115664333105087, "learning_rate": 8.114422478193336e-06, "loss": 1.0697215795516968, "step": 1094 }, { "epoch": 1.597667638483965, "grad_norm": 0.12972617149353027, "learning_rate": 8.09273372983628e-06, "loss": 1.1039892435073853, "step": 1096 }, { "epoch": 1.6005830903790086, "grad_norm": 0.13853909075260162, "learning_rate": 8.071046605910804e-06, "loss": 1.186689853668213, "step": 1098 }, { "epoch": 1.6034985422740524, "grad_norm": 0.1802920252084732, "learning_rate": 8.049361330427129e-06, "loss": 1.047842025756836, "step": 1100 }, { "epoch": 1.6064139941690962, "grad_norm": 0.15627241134643555, "learning_rate": 8.027678127376353e-06, "loss": 1.081397294998169, "step": 1102 }, { "epoch": 1.60932944606414, "grad_norm": 0.13871587812900543, "learning_rate": 8.005997220728181e-06, "loss": 1.129719614982605, "step": 1104 }, { "epoch": 1.6122448979591837, "grad_norm": 20.326587677001953, "learning_rate": 7.984318834428607e-06, "loss": 1.1785022020339966, "step": 1106 }, { "epoch": 1.6151603498542273, "grad_norm": 0.13852129876613617, "learning_rate": 7.962643192397574e-06, "loss": 1.0734182596206665, "step": 1108 }, { "epoch": 1.6180758017492711, "grad_norm": 0.6223950982093811, "learning_rate": 7.940970518526686e-06, "loss": 1.1438935995101929, "step": 1110 }, { "epoch": 1.620991253644315, "grad_norm": 0.0528414323925972, "learning_rate": 7.919301036676892e-06, "loss": 0.9696015119552612, "step": 1112 }, { "epoch": 1.6239067055393586, "grad_norm": 0.13710257411003113, "learning_rate": 7.897634970676166e-06, "loss": 1.1505471467971802, "step": 1114 }, { "epoch": 1.6268221574344022, "grad_norm": 0.16004100441932678, "learning_rate": 7.875972544317203e-06, "loss": 1.2167091369628906, "step": 1116 }, { "epoch": 1.629737609329446, "grad_norm": 0.45379891991615295, "learning_rate": 7.854313981355101e-06, "loss": 1.131983757019043, "step": 1118 }, { "epoch": 1.6326530612244898, "grad_norm": 0.13307584822177887, "learning_rate": 7.832659505505048e-06, "loss": 1.1805908679962158, "step": 1120 }, { "epoch": 1.6355685131195337, "grad_norm": 0.2649403214454651, "learning_rate": 7.811009340440022e-06, "loss": 1.2160626649856567, "step": 1122 }, { "epoch": 1.6384839650145773, "grad_norm": 0.16499841213226318, "learning_rate": 7.789363709788472e-06, "loss": 1.2312496900558472, "step": 1124 }, { "epoch": 1.6413994169096209, "grad_norm": 0.14581745862960815, "learning_rate": 7.767722837132008e-06, "loss": 0.5785539150238037, "step": 1126 }, { "epoch": 1.6443148688046647, "grad_norm": 0.40138673782348633, "learning_rate": 7.746086946003103e-06, "loss": 1.102718472480774, "step": 1128 }, { "epoch": 1.6472303206997085, "grad_norm": 0.39575713872909546, "learning_rate": 7.724456259882758e-06, "loss": 0.9496442675590515, "step": 1130 }, { "epoch": 1.6501457725947521, "grad_norm": 0.16450181603431702, "learning_rate": 7.702831002198225e-06, "loss": 1.1438281536102295, "step": 1132 }, { "epoch": 1.6530612244897958, "grad_norm": 0.10068156570196152, "learning_rate": 7.68121139632068e-06, "loss": 1.2390490770339966, "step": 1134 }, { "epoch": 1.6559766763848396, "grad_norm": 0.25964057445526123, "learning_rate": 7.65959766556292e-06, "loss": 1.0381125211715698, "step": 1136 }, { "epoch": 1.6588921282798834, "grad_norm": 0.43424177169799805, "learning_rate": 7.637990033177057e-06, "loss": 1.109690546989441, "step": 1138 }, { "epoch": 1.6618075801749272, "grad_norm": 0.21539334952831268, "learning_rate": 7.616388722352214e-06, "loss": 1.2123034000396729, "step": 1140 }, { "epoch": 1.6647230320699709, "grad_norm": 0.20255622267723083, "learning_rate": 7.594793956212212e-06, "loss": 1.217584490776062, "step": 1142 }, { "epoch": 1.6676384839650145, "grad_norm": 0.47754237055778503, "learning_rate": 7.573205957813276e-06, "loss": 0.9803376197814941, "step": 1144 }, { "epoch": 1.6705539358600583, "grad_norm": 0.09026843309402466, "learning_rate": 7.551624950141726e-06, "loss": 1.1912260055541992, "step": 1146 }, { "epoch": 1.6734693877551021, "grad_norm": 0.11982105672359467, "learning_rate": 7.530051156111669e-06, "loss": 1.1396859884262085, "step": 1148 }, { "epoch": 1.6763848396501457, "grad_norm": 0.42154011130332947, "learning_rate": 7.508484798562707e-06, "loss": 1.3917794227600098, "step": 1150 }, { "epoch": 1.6793002915451893, "grad_norm": 0.34086376428604126, "learning_rate": 7.486926100257621e-06, "loss": 1.1625425815582275, "step": 1152 }, { "epoch": 1.6822157434402332, "grad_norm": 0.33954572677612305, "learning_rate": 7.465375283880084e-06, "loss": 1.1317555904388428, "step": 1154 }, { "epoch": 1.685131195335277, "grad_norm": 0.15621435642242432, "learning_rate": 7.44383257203236e-06, "loss": 1.0376930236816406, "step": 1156 }, { "epoch": 1.6880466472303208, "grad_norm": 0.16445010900497437, "learning_rate": 7.422298187232988e-06, "loss": 0.6347440481185913, "step": 1158 }, { "epoch": 1.6909620991253644, "grad_norm": 0.11221948266029358, "learning_rate": 7.4007723519145005e-06, "loss": 1.2130205631256104, "step": 1160 }, { "epoch": 1.693877551020408, "grad_norm": 0.10298870503902435, "learning_rate": 7.37925528842113e-06, "loss": 1.0703403949737549, "step": 1162 }, { "epoch": 1.6967930029154519, "grad_norm": 0.05989653244614601, "learning_rate": 7.357747219006487e-06, "loss": 1.0500437021255493, "step": 1164 }, { "epoch": 1.6997084548104957, "grad_norm": 0.18388091027736664, "learning_rate": 7.336248365831293e-06, "loss": 1.0820516347885132, "step": 1166 }, { "epoch": 1.7026239067055393, "grad_norm": 0.30676501989364624, "learning_rate": 7.314758950961069e-06, "loss": 0.8827295303344727, "step": 1168 }, { "epoch": 1.7055393586005831, "grad_norm": 0.1762169450521469, "learning_rate": 7.293279196363844e-06, "loss": 1.1642931699752808, "step": 1170 }, { "epoch": 1.7084548104956268, "grad_norm": 0.138104647397995, "learning_rate": 7.271809323907868e-06, "loss": 1.3497681617736816, "step": 1172 }, { "epoch": 1.7113702623906706, "grad_norm": 0.04815658926963806, "learning_rate": 7.250349555359316e-06, "loss": 0.9686152935028076, "step": 1174 }, { "epoch": 1.7142857142857144, "grad_norm": 0.4449727535247803, "learning_rate": 7.228900112379993e-06, "loss": 0.8205754160881042, "step": 1176 }, { "epoch": 1.717201166180758, "grad_norm": 0.19454075396060944, "learning_rate": 7.2074612165250596e-06, "loss": 1.1948063373565674, "step": 1178 }, { "epoch": 1.7201166180758016, "grad_norm": 0.1630457043647766, "learning_rate": 7.18603308924072e-06, "loss": 1.122542381286621, "step": 1180 }, { "epoch": 1.7230320699708455, "grad_norm": 0.2632548213005066, "learning_rate": 7.164615951861958e-06, "loss": 1.2288137674331665, "step": 1182 }, { "epoch": 1.7259475218658893, "grad_norm": 0.185108482837677, "learning_rate": 7.143210025610238e-06, "loss": 1.029456615447998, "step": 1184 }, { "epoch": 1.728862973760933, "grad_norm": 0.06753533333539963, "learning_rate": 7.121815531591222e-06, "loss": 0.9876729846000671, "step": 1186 }, { "epoch": 1.7317784256559767, "grad_norm": 0.16401244699954987, "learning_rate": 7.100432690792484e-06, "loss": 0.6059045791625977, "step": 1188 }, { "epoch": 1.7346938775510203, "grad_norm": 0.2957839369773865, "learning_rate": 7.0790617240812374e-06, "loss": 1.0509564876556396, "step": 1190 }, { "epoch": 1.7376093294460642, "grad_norm": 0.13618314266204834, "learning_rate": 7.057702852202037e-06, "loss": 1.1775768995285034, "step": 1192 }, { "epoch": 1.740524781341108, "grad_norm": 0.171565443277359, "learning_rate": 7.0363562957745105e-06, "loss": 0.9801825881004333, "step": 1194 }, { "epoch": 1.7434402332361516, "grad_norm": 0.09507802128791809, "learning_rate": 7.015022275291084e-06, "loss": 0.969845175743103, "step": 1196 }, { "epoch": 1.7463556851311952, "grad_norm": 0.49828192591667175, "learning_rate": 6.993701011114686e-06, "loss": 0.9284896850585938, "step": 1198 }, { "epoch": 1.749271137026239, "grad_norm": 0.10986272245645523, "learning_rate": 6.972392723476494e-06, "loss": 1.1610954999923706, "step": 1200 }, { "epoch": 1.7521865889212829, "grad_norm": 0.36414283514022827, "learning_rate": 6.9510976324736415e-06, "loss": 0.9902899861335754, "step": 1202 }, { "epoch": 1.7551020408163265, "grad_norm": 0.15007393062114716, "learning_rate": 6.929815958066951e-06, "loss": 1.1686747074127197, "step": 1204 }, { "epoch": 1.7580174927113703, "grad_norm": 0.09150854498147964, "learning_rate": 6.908547920078671e-06, "loss": 0.9296596050262451, "step": 1206 }, { "epoch": 1.760932944606414, "grad_norm": 0.13725019991397858, "learning_rate": 6.887293738190183e-06, "loss": 0.6867948174476624, "step": 1208 }, { "epoch": 1.7638483965014577, "grad_norm": 0.2506777346134186, "learning_rate": 6.866053631939756e-06, "loss": 1.1812880039215088, "step": 1210 }, { "epoch": 1.7667638483965016, "grad_norm": 0.24459925293922424, "learning_rate": 6.844827820720275e-06, "loss": 1.233087420463562, "step": 1212 }, { "epoch": 1.7696793002915452, "grad_norm": 0.18725088238716125, "learning_rate": 6.8236165237769555e-06, "loss": 1.0703694820404053, "step": 1214 }, { "epoch": 1.7725947521865888, "grad_norm": 0.08817660808563232, "learning_rate": 6.802419960205095e-06, "loss": 0.9150586724281311, "step": 1216 }, { "epoch": 1.7755102040816326, "grad_norm": 0.24206826090812683, "learning_rate": 6.7812383489478216e-06, "loss": 1.2116329669952393, "step": 1218 }, { "epoch": 1.7784256559766765, "grad_norm": 0.13627009093761444, "learning_rate": 6.760071908793796e-06, "loss": 0.6978607177734375, "step": 1220 }, { "epoch": 1.78134110787172, "grad_norm": 0.19865363836288452, "learning_rate": 6.738920858374991e-06, "loss": 1.0590617656707764, "step": 1222 }, { "epoch": 1.784256559766764, "grad_norm": 0.4059164524078369, "learning_rate": 6.717785416164414e-06, "loss": 1.38783860206604, "step": 1224 }, { "epoch": 1.7871720116618075, "grad_norm": 0.2919604480266571, "learning_rate": 6.696665800473842e-06, "loss": 1.1487404108047485, "step": 1226 }, { "epoch": 1.7900874635568513, "grad_norm": 0.1517525017261505, "learning_rate": 6.675562229451589e-06, "loss": 1.206036925315857, "step": 1228 }, { "epoch": 1.7930029154518952, "grad_norm": 0.2847557067871094, "learning_rate": 6.6544749210802305e-06, "loss": 0.8351743817329407, "step": 1230 }, { "epoch": 1.7959183673469388, "grad_norm": 0.2792437672615051, "learning_rate": 6.633404093174371e-06, "loss": 0.9937669634819031, "step": 1232 }, { "epoch": 1.7988338192419824, "grad_norm": 0.39450135827064514, "learning_rate": 6.612349963378381e-06, "loss": 0.9253970980644226, "step": 1234 }, { "epoch": 1.8017492711370262, "grad_norm": 0.26529014110565186, "learning_rate": 6.591312749164154e-06, "loss": 1.1452049016952515, "step": 1236 }, { "epoch": 1.80466472303207, "grad_norm": 0.23458294570446014, "learning_rate": 6.570292667828856e-06, "loss": 1.2078217267990112, "step": 1238 }, { "epoch": 1.8075801749271136, "grad_norm": 0.13832348585128784, "learning_rate": 6.549289936492693e-06, "loss": 1.2237412929534912, "step": 1240 }, { "epoch": 1.8104956268221575, "grad_norm": 0.08728086948394775, "learning_rate": 6.5283047720966505e-06, "loss": 1.1127595901489258, "step": 1242 }, { "epoch": 1.813411078717201, "grad_norm": 0.2100764364004135, "learning_rate": 6.5073373914002656e-06, "loss": 1.0868037939071655, "step": 1244 }, { "epoch": 1.816326530612245, "grad_norm": 0.13499869406223297, "learning_rate": 6.486388010979388e-06, "loss": 1.119627833366394, "step": 1246 }, { "epoch": 1.8192419825072887, "grad_norm": 0.34346649050712585, "learning_rate": 6.465456847223932e-06, "loss": 1.0318715572357178, "step": 1248 }, { "epoch": 1.8221574344023324, "grad_norm": 0.07944006472826004, "learning_rate": 6.444544116335655e-06, "loss": 1.1757546663284302, "step": 1250 }, { "epoch": 1.825072886297376, "grad_norm": 0.2944159209728241, "learning_rate": 6.423650034325915e-06, "loss": 1.2396355867385864, "step": 1252 }, { "epoch": 1.8279883381924198, "grad_norm": 0.18287204205989838, "learning_rate": 6.402774817013442e-06, "loss": 1.097105860710144, "step": 1254 }, { "epoch": 1.8309037900874636, "grad_norm": 0.141254261136055, "learning_rate": 6.381918680022112e-06, "loss": 1.0068081617355347, "step": 1256 }, { "epoch": 1.8338192419825075, "grad_norm": 0.17386725544929504, "learning_rate": 6.36108183877871e-06, "loss": 1.1032158136367798, "step": 1258 }, { "epoch": 1.836734693877551, "grad_norm": 0.22268234193325043, "learning_rate": 6.3402645085107224e-06, "loss": 1.2912282943725586, "step": 1260 }, { "epoch": 1.8396501457725947, "grad_norm": 0.411150723695755, "learning_rate": 6.3194669042440976e-06, "loss": 1.129095196723938, "step": 1262 }, { "epoch": 1.8425655976676385, "grad_norm": 0.3001119792461395, "learning_rate": 6.298689240801026e-06, "loss": 1.365820050239563, "step": 1264 }, { "epoch": 1.8454810495626823, "grad_norm": 0.36252474784851074, "learning_rate": 6.277931732797732e-06, "loss": 1.3998820781707764, "step": 1266 }, { "epoch": 1.848396501457726, "grad_norm": 0.29093074798583984, "learning_rate": 6.257194594642254e-06, "loss": 1.0682395696640015, "step": 1268 }, { "epoch": 1.8513119533527695, "grad_norm": 0.13126376271247864, "learning_rate": 6.236478040532214e-06, "loss": 1.0302337408065796, "step": 1270 }, { "epoch": 1.8542274052478134, "grad_norm": 0.1628250777721405, "learning_rate": 6.215782284452628e-06, "loss": 1.098158359527588, "step": 1272 }, { "epoch": 1.8571428571428572, "grad_norm": 0.20393933355808258, "learning_rate": 6.195107540173687e-06, "loss": 1.1833226680755615, "step": 1274 }, { "epoch": 1.860058309037901, "grad_norm": 0.2242426872253418, "learning_rate": 6.174454021248537e-06, "loss": 1.2466531991958618, "step": 1276 }, { "epoch": 1.8629737609329446, "grad_norm": 0.1543884128332138, "learning_rate": 6.15382194101109e-06, "loss": 0.9692124724388123, "step": 1278 }, { "epoch": 1.8658892128279883, "grad_norm": 0.10594581812620163, "learning_rate": 6.133211512573819e-06, "loss": 1.0277884006500244, "step": 1280 }, { "epoch": 1.868804664723032, "grad_norm": 0.1760384440422058, "learning_rate": 6.1126229488255416e-06, "loss": 1.0745232105255127, "step": 1282 }, { "epoch": 1.871720116618076, "grad_norm": 0.11243575066328049, "learning_rate": 6.092056462429238e-06, "loss": 1.11955988407135, "step": 1284 }, { "epoch": 1.8746355685131195, "grad_norm": 0.3004339337348938, "learning_rate": 6.071512265819841e-06, "loss": 1.1129993200302124, "step": 1286 }, { "epoch": 1.8775510204081631, "grad_norm": 0.1870323270559311, "learning_rate": 6.0509905712020554e-06, "loss": 1.1004483699798584, "step": 1288 }, { "epoch": 1.880466472303207, "grad_norm": 0.15390393137931824, "learning_rate": 6.030491590548157e-06, "loss": 1.1051290035247803, "step": 1290 }, { "epoch": 1.8833819241982508, "grad_norm": 0.17591705918312073, "learning_rate": 6.010015535595802e-06, "loss": 1.19423246383667, "step": 1292 }, { "epoch": 1.8862973760932946, "grad_norm": 0.517492413520813, "learning_rate": 5.989562617845843e-06, "loss": 0.7528221011161804, "step": 1294 }, { "epoch": 1.8892128279883382, "grad_norm": 0.2763058543205261, "learning_rate": 5.969133048560151e-06, "loss": 0.6028561592102051, "step": 1296 }, { "epoch": 1.8921282798833818, "grad_norm": 0.1741061955690384, "learning_rate": 5.948727038759415e-06, "loss": 0.9944829344749451, "step": 1298 }, { "epoch": 1.8950437317784257, "grad_norm": 0.3421262204647064, "learning_rate": 5.928344799220985e-06, "loss": 1.118728756904602, "step": 1300 }, { "epoch": 1.8979591836734695, "grad_norm": 0.42300957441329956, "learning_rate": 5.907986540476678e-06, "loss": 0.7158623337745667, "step": 1302 }, { "epoch": 1.900874635568513, "grad_norm": 0.14869055151939392, "learning_rate": 5.887652472810609e-06, "loss": 1.0393644571304321, "step": 1304 }, { "epoch": 1.9037900874635567, "grad_norm": 0.07201150804758072, "learning_rate": 5.86734280625702e-06, "loss": 0.5461652874946594, "step": 1306 }, { "epoch": 1.9067055393586005, "grad_norm": 0.6429765820503235, "learning_rate": 5.847057750598111e-06, "loss": 1.1324551105499268, "step": 1308 }, { "epoch": 1.9096209912536444, "grad_norm": 0.18680232763290405, "learning_rate": 5.826797515361868e-06, "loss": 1.274292230606079, "step": 1310 }, { "epoch": 1.9125364431486882, "grad_norm": 0.1953829973936081, "learning_rate": 5.806562309819909e-06, "loss": 1.2884361743927002, "step": 1312 }, { "epoch": 1.9154518950437318, "grad_norm": 0.28342682123184204, "learning_rate": 5.7863523429853055e-06, "loss": 1.279549479484558, "step": 1314 }, { "epoch": 1.9183673469387754, "grad_norm": 0.45169350504875183, "learning_rate": 5.766167823610443e-06, "loss": 1.074336051940918, "step": 1316 }, { "epoch": 1.9212827988338192, "grad_norm": 0.18884071707725525, "learning_rate": 5.746008960184852e-06, "loss": 1.262738585472107, "step": 1318 }, { "epoch": 1.924198250728863, "grad_norm": 0.059031542390584946, "learning_rate": 5.725875960933058e-06, "loss": 1.0195709466934204, "step": 1320 }, { "epoch": 1.9271137026239067, "grad_norm": 0.11774204671382904, "learning_rate": 5.705769033812431e-06, "loss": 1.04592764377594, "step": 1322 }, { "epoch": 1.9300291545189503, "grad_norm": 0.13104864954948425, "learning_rate": 5.685688386511041e-06, "loss": 1.0482321977615356, "step": 1324 }, { "epoch": 1.9329446064139941, "grad_norm": 0.15567655861377716, "learning_rate": 5.665634226445501e-06, "loss": 1.2044618129730225, "step": 1326 }, { "epoch": 1.935860058309038, "grad_norm": 0.14479920268058777, "learning_rate": 5.645606760758836e-06, "loss": 1.0985395908355713, "step": 1328 }, { "epoch": 1.9387755102040818, "grad_norm": 0.1920030266046524, "learning_rate": 5.625606196318347e-06, "loss": 1.4523109197616577, "step": 1330 }, { "epoch": 1.9416909620991254, "grad_norm": 0.2637879252433777, "learning_rate": 5.605632739713456e-06, "loss": 1.0658267736434937, "step": 1332 }, { "epoch": 1.944606413994169, "grad_norm": 0.08796999603509903, "learning_rate": 5.585686597253593e-06, "loss": 1.0220710039138794, "step": 1334 }, { "epoch": 1.9475218658892128, "grad_norm": 0.4936763644218445, "learning_rate": 5.5657679749660455e-06, "loss": 0.5359926223754883, "step": 1336 }, { "epoch": 1.9504373177842567, "grad_norm": 0.25524938106536865, "learning_rate": 5.545877078593849e-06, "loss": 1.0832246541976929, "step": 1338 }, { "epoch": 1.9533527696793003, "grad_norm": 0.3815828263759613, "learning_rate": 5.52601411359365e-06, "loss": 1.0333139896392822, "step": 1340 }, { "epoch": 1.9562682215743439, "grad_norm": 0.1364160180091858, "learning_rate": 5.506179285133582e-06, "loss": 0.8447660207748413, "step": 1342 }, { "epoch": 1.9591836734693877, "grad_norm": 0.22036899626255035, "learning_rate": 5.486372798091161e-06, "loss": 1.4143515825271606, "step": 1344 }, { "epoch": 1.9620991253644315, "grad_norm": 0.4314256012439728, "learning_rate": 5.466594857051153e-06, "loss": 0.9990249276161194, "step": 1346 }, { "epoch": 1.9650145772594754, "grad_norm": 0.15996676683425903, "learning_rate": 5.4468456663034635e-06, "loss": 1.2198452949523926, "step": 1348 }, { "epoch": 1.967930029154519, "grad_norm": 0.19972719252109528, "learning_rate": 5.427125429841039e-06, "loss": 1.0296826362609863, "step": 1350 }, { "epoch": 1.9708454810495626, "grad_norm": 0.1828991174697876, "learning_rate": 5.4074343513577536e-06, "loss": 1.2304623126983643, "step": 1352 }, { "epoch": 1.9737609329446064, "grad_norm": 0.2502359449863434, "learning_rate": 5.387772634246287e-06, "loss": 1.1169551610946655, "step": 1354 }, { "epoch": 1.9766763848396502, "grad_norm": 0.1563616245985031, "learning_rate": 5.36814048159606e-06, "loss": 0.818549633026123, "step": 1356 }, { "epoch": 1.9795918367346939, "grad_norm": 0.08790906518697739, "learning_rate": 5.348538096191109e-06, "loss": 1.2132847309112549, "step": 1358 }, { "epoch": 1.9825072886297375, "grad_norm": 0.3884468376636505, "learning_rate": 5.328965680507991e-06, "loss": 1.1513258218765259, "step": 1360 }, { "epoch": 1.9854227405247813, "grad_norm": 0.24757881462574005, "learning_rate": 5.309423436713714e-06, "loss": 0.6811099052429199, "step": 1362 }, { "epoch": 1.9883381924198251, "grad_norm": 0.0917486697435379, "learning_rate": 5.289911566663626e-06, "loss": 0.5249199271202087, "step": 1364 }, { "epoch": 1.991253644314869, "grad_norm": 0.3590066432952881, "learning_rate": 5.270430271899342e-06, "loss": 1.1386462450027466, "step": 1366 }, { "epoch": 1.9941690962099126, "grad_norm": 0.0781368613243103, "learning_rate": 5.250979753646664e-06, "loss": 1.0840882062911987, "step": 1368 }, { "epoch": 1.9970845481049562, "grad_norm": 0.3470701277256012, "learning_rate": 5.231560212813487e-06, "loss": 1.0490968227386475, "step": 1370 }, { "epoch": 2.0, "grad_norm": 0.13662609457969666, "learning_rate": 5.212171849987743e-06, "loss": 1.1986355781555176, "step": 1372 }, { "epoch": 2.002915451895044, "grad_norm": 0.15793374180793762, "learning_rate": 5.1928148654353196e-06, "loss": 0.921393871307373, "step": 1374 }, { "epoch": 2.0058309037900877, "grad_norm": 0.4891752600669861, "learning_rate": 5.17348945909799e-06, "loss": 0.9690005779266357, "step": 1376 }, { "epoch": 2.008746355685131, "grad_norm": 0.2033310979604721, "learning_rate": 5.1541958305913536e-06, "loss": 1.3568806648254395, "step": 1378 }, { "epoch": 2.011661807580175, "grad_norm": 0.1594112515449524, "learning_rate": 5.134934179202771e-06, "loss": 1.033390998840332, "step": 1380 }, { "epoch": 2.0145772594752187, "grad_norm": 0.2081524133682251, "learning_rate": 5.115704703889299e-06, "loss": 1.0304166078567505, "step": 1382 }, { "epoch": 2.0174927113702625, "grad_norm": 0.38243576884269714, "learning_rate": 5.096507603275648e-06, "loss": 0.9502314925193787, "step": 1384 }, { "epoch": 2.020408163265306, "grad_norm": 0.06100543960928917, "learning_rate": 5.077343075652124e-06, "loss": 1.1048611402511597, "step": 1386 }, { "epoch": 2.0233236151603498, "grad_norm": 0.386870414018631, "learning_rate": 5.058211318972581e-06, "loss": 1.2929866313934326, "step": 1388 }, { "epoch": 2.0262390670553936, "grad_norm": 0.1502365618944168, "learning_rate": 5.0391125308523744e-06, "loss": 1.2062195539474487, "step": 1390 }, { "epoch": 2.0291545189504374, "grad_norm": 0.46698620915412903, "learning_rate": 5.020046908566317e-06, "loss": 1.2675377130508423, "step": 1392 }, { "epoch": 2.0320699708454812, "grad_norm": 0.2170051783323288, "learning_rate": 5.001014649046655e-06, "loss": 1.0185376405715942, "step": 1394 }, { "epoch": 2.0349854227405246, "grad_norm": 0.5570895671844482, "learning_rate": 4.98201594888102e-06, "loss": 1.1238821744918823, "step": 1396 }, { "epoch": 2.0379008746355685, "grad_norm": 0.19649037718772888, "learning_rate": 4.963051004310397e-06, "loss": 1.1577717065811157, "step": 1398 }, { "epoch": 2.0408163265306123, "grad_norm": 0.3043438494205475, "learning_rate": 4.944120011227115e-06, "loss": 0.945805549621582, "step": 1400 }, { "epoch": 2.043731778425656, "grad_norm": 0.8879981637001038, "learning_rate": 4.925223165172808e-06, "loss": 1.0322425365447998, "step": 1402 }, { "epoch": 2.0466472303206995, "grad_norm": 0.26241424679756165, "learning_rate": 4.906360661336394e-06, "loss": 1.2149442434310913, "step": 1404 }, { "epoch": 2.0495626822157433, "grad_norm": 0.8886216878890991, "learning_rate": 4.887532694552066e-06, "loss": 1.0274255275726318, "step": 1406 }, { "epoch": 2.052478134110787, "grad_norm": 0.21257859468460083, "learning_rate": 4.868739459297286e-06, "loss": 1.1855621337890625, "step": 1408 }, { "epoch": 2.055393586005831, "grad_norm": 0.14593669772148132, "learning_rate": 4.8499811496907506e-06, "loss": 0.7928017377853394, "step": 1410 }, { "epoch": 2.058309037900875, "grad_norm": 0.06642908602952957, "learning_rate": 4.831257959490425e-06, "loss": 1.0738983154296875, "step": 1412 }, { "epoch": 2.061224489795918, "grad_norm": 0.3109600841999054, "learning_rate": 4.812570082091498e-06, "loss": 0.8972907662391663, "step": 1414 }, { "epoch": 2.064139941690962, "grad_norm": 0.13277745246887207, "learning_rate": 4.793917710524422e-06, "loss": 1.0650956630706787, "step": 1416 }, { "epoch": 2.067055393586006, "grad_norm": 0.14433449506759644, "learning_rate": 4.775301037452898e-06, "loss": 1.1586172580718994, "step": 1418 }, { "epoch": 2.0699708454810497, "grad_norm": 0.15220968425273895, "learning_rate": 4.756720255171887e-06, "loss": 0.5742167234420776, "step": 1420 }, { "epoch": 2.072886297376093, "grad_norm": 0.126608744263649, "learning_rate": 4.738175555605632e-06, "loss": 1.242780327796936, "step": 1422 }, { "epoch": 2.075801749271137, "grad_norm": 0.10246127843856812, "learning_rate": 4.719667130305671e-06, "loss": 0.9981814622879028, "step": 1424 }, { "epoch": 2.0787172011661808, "grad_norm": 0.2460668534040451, "learning_rate": 4.701195170448857e-06, "loss": 0.8302922248840332, "step": 1426 }, { "epoch": 2.0816326530612246, "grad_norm": 0.155581995844841, "learning_rate": 4.682759866835388e-06, "loss": 1.3268355131149292, "step": 1428 }, { "epoch": 2.0845481049562684, "grad_norm": 0.10044138133525848, "learning_rate": 4.664361409886829e-06, "loss": 0.9983614087104797, "step": 1430 }, { "epoch": 2.087463556851312, "grad_norm": 0.2085467278957367, "learning_rate": 4.645999989644148e-06, "loss": 1.1001629829406738, "step": 1432 }, { "epoch": 2.0903790087463556, "grad_norm": 0.33730220794677734, "learning_rate": 4.627675795765761e-06, "loss": 1.3111716508865356, "step": 1434 }, { "epoch": 2.0932944606413995, "grad_norm": 0.2143622636795044, "learning_rate": 4.60938901752556e-06, "loss": 0.8293286561965942, "step": 1436 }, { "epoch": 2.0962099125364433, "grad_norm": 0.07966610789299011, "learning_rate": 4.591139843810967e-06, "loss": 1.1742640733718872, "step": 1438 }, { "epoch": 2.0991253644314867, "grad_norm": 0.18288615345954895, "learning_rate": 4.572928463120982e-06, "loss": 1.1798888444900513, "step": 1440 }, { "epoch": 2.1020408163265305, "grad_norm": 0.2549722194671631, "learning_rate": 4.554755063564226e-06, "loss": 1.0986790657043457, "step": 1442 }, { "epoch": 2.1049562682215743, "grad_norm": 0.1803271621465683, "learning_rate": 4.536619832857015e-06, "loss": 1.0121634006500244, "step": 1444 }, { "epoch": 2.107871720116618, "grad_norm": 0.33244436979293823, "learning_rate": 4.518522958321409e-06, "loss": 1.2030587196350098, "step": 1446 }, { "epoch": 2.110787172011662, "grad_norm": 0.07119657844305038, "learning_rate": 4.500464626883276e-06, "loss": 0.6789675354957581, "step": 1448 }, { "epoch": 2.1137026239067054, "grad_norm": 0.3919859230518341, "learning_rate": 4.4824450250703755e-06, "loss": 0.8600730895996094, "step": 1450 }, { "epoch": 2.116618075801749, "grad_norm": 0.1530391424894333, "learning_rate": 4.464464339010414e-06, "loss": 0.9321385622024536, "step": 1452 }, { "epoch": 2.119533527696793, "grad_norm": 0.12812215089797974, "learning_rate": 4.446522754429127e-06, "loss": 1.1020374298095703, "step": 1454 }, { "epoch": 2.122448979591837, "grad_norm": 0.2687873840332031, "learning_rate": 4.4286204566483715e-06, "loss": 0.548167884349823, "step": 1456 }, { "epoch": 2.1253644314868803, "grad_norm": 0.351572722196579, "learning_rate": 4.410757630584204e-06, "loss": 0.671511709690094, "step": 1458 }, { "epoch": 2.128279883381924, "grad_norm": 0.3009466230869293, "learning_rate": 4.392934460744958e-06, "loss": 1.0809369087219238, "step": 1460 }, { "epoch": 2.131195335276968, "grad_norm": 0.1647637039422989, "learning_rate": 4.375151131229369e-06, "loss": 1.0825597047805786, "step": 1462 }, { "epoch": 2.1341107871720117, "grad_norm": 0.15290948748588562, "learning_rate": 4.357407825724648e-06, "loss": 1.132341742515564, "step": 1464 }, { "epoch": 2.1370262390670556, "grad_norm": 0.30983132123947144, "learning_rate": 4.339704727504581e-06, "loss": 1.115373969078064, "step": 1466 }, { "epoch": 2.139941690962099, "grad_norm": 0.1616809368133545, "learning_rate": 4.32204201942766e-06, "loss": 1.2571251392364502, "step": 1468 }, { "epoch": 2.142857142857143, "grad_norm": 0.44996944069862366, "learning_rate": 4.304419883935167e-06, "loss": 0.7702177166938782, "step": 1470 }, { "epoch": 2.1457725947521866, "grad_norm": 0.08497241884469986, "learning_rate": 4.286838503049309e-06, "loss": 1.0834498405456543, "step": 1472 }, { "epoch": 2.1486880466472305, "grad_norm": 0.4060671925544739, "learning_rate": 4.26929805837134e-06, "loss": 1.1200850009918213, "step": 1474 }, { "epoch": 2.151603498542274, "grad_norm": 0.17709168791770935, "learning_rate": 4.2517987310796595e-06, "loss": 1.1172959804534912, "step": 1476 }, { "epoch": 2.1545189504373177, "grad_norm": 0.1522580236196518, "learning_rate": 4.23434070192797e-06, "loss": 1.168565034866333, "step": 1478 }, { "epoch": 2.1574344023323615, "grad_norm": 0.1714070737361908, "learning_rate": 4.216924151243395e-06, "loss": 1.1115281581878662, "step": 1480 }, { "epoch": 2.1603498542274053, "grad_norm": 0.13482044637203217, "learning_rate": 4.199549258924615e-06, "loss": 1.2671080827713013, "step": 1482 }, { "epoch": 2.163265306122449, "grad_norm": 0.1459122747182846, "learning_rate": 4.18221620444002e-06, "loss": 1.172806739807129, "step": 1484 }, { "epoch": 2.1661807580174925, "grad_norm": 0.08871738612651825, "learning_rate": 4.1649251668258475e-06, "loss": 1.045624852180481, "step": 1486 }, { "epoch": 2.1690962099125364, "grad_norm": 0.3394921123981476, "learning_rate": 4.147676324684335e-06, "loss": 1.1889164447784424, "step": 1488 }, { "epoch": 2.17201166180758, "grad_norm": 0.1473836749792099, "learning_rate": 4.130469856181873e-06, "loss": 1.079075813293457, "step": 1490 }, { "epoch": 2.174927113702624, "grad_norm": 0.18347686529159546, "learning_rate": 4.113305939047174e-06, "loss": 1.2786171436309814, "step": 1492 }, { "epoch": 2.1778425655976674, "grad_norm": 0.16250960528850555, "learning_rate": 4.096184750569422e-06, "loss": 0.677879273891449, "step": 1494 }, { "epoch": 2.1807580174927113, "grad_norm": 0.383709192276001, "learning_rate": 4.07910646759645e-06, "loss": 0.6416628360748291, "step": 1496 }, { "epoch": 2.183673469387755, "grad_norm": 0.07085460424423218, "learning_rate": 4.062071266532916e-06, "loss": 1.0884201526641846, "step": 1498 }, { "epoch": 2.186588921282799, "grad_norm": 0.10339315980672836, "learning_rate": 4.045079323338477e-06, "loss": 0.8533938527107239, "step": 1500 }, { "epoch": 2.1895043731778427, "grad_norm": 0.20028476417064667, "learning_rate": 4.0281308135259705e-06, "loss": 0.9680588841438293, "step": 1502 }, { "epoch": 2.192419825072886, "grad_norm": 0.3516143560409546, "learning_rate": 4.0112259121596e-06, "loss": 0.7940521240234375, "step": 1504 }, { "epoch": 2.19533527696793, "grad_norm": 0.10385473072528839, "learning_rate": 3.994364793853135e-06, "loss": 1.1375114917755127, "step": 1506 }, { "epoch": 2.198250728862974, "grad_norm": 0.10895653814077377, "learning_rate": 3.977547632768095e-06, "loss": 1.1559362411499023, "step": 1508 }, { "epoch": 2.2011661807580176, "grad_norm": 0.11289890855550766, "learning_rate": 3.960774602611966e-06, "loss": 1.1142271757125854, "step": 1510 }, { "epoch": 2.204081632653061, "grad_norm": 0.11957119405269623, "learning_rate": 3.94404587663639e-06, "loss": 0.997885525226593, "step": 1512 }, { "epoch": 2.206997084548105, "grad_norm": 0.1454574018716812, "learning_rate": 3.9273616276353904e-06, "loss": 0.6211732625961304, "step": 1514 }, { "epoch": 2.2099125364431487, "grad_norm": 0.2732894718647003, "learning_rate": 3.910722027943569e-06, "loss": 0.7947649955749512, "step": 1516 }, { "epoch": 2.2128279883381925, "grad_norm": 0.31755542755126953, "learning_rate": 3.894127249434352e-06, "loss": 0.9824427366256714, "step": 1518 }, { "epoch": 2.2157434402332363, "grad_norm": 0.31029990315437317, "learning_rate": 3.877577463518183e-06, "loss": 1.0954536199569702, "step": 1520 }, { "epoch": 2.2186588921282797, "grad_norm": 0.13882219791412354, "learning_rate": 3.861072841140779e-06, "loss": 1.1737290620803833, "step": 1522 }, { "epoch": 2.2215743440233235, "grad_norm": 0.199194073677063, "learning_rate": 3.8446135527813596e-06, "loss": 1.2562403678894043, "step": 1524 }, { "epoch": 2.2244897959183674, "grad_norm": 0.09712310880422592, "learning_rate": 3.828199768450866e-06, "loss": 0.887328028678894, "step": 1526 }, { "epoch": 2.227405247813411, "grad_norm": 0.3643515110015869, "learning_rate": 3.8118316576902345e-06, "loss": 0.13481314480304718, "step": 1528 }, { "epoch": 2.2303206997084546, "grad_norm": 0.4534083604812622, "learning_rate": 3.7955093895686242e-06, "loss": 1.0862985849380493, "step": 1530 }, { "epoch": 2.2332361516034984, "grad_norm": 0.15879718959331512, "learning_rate": 3.779233132681675e-06, "loss": 1.045498013496399, "step": 1532 }, { "epoch": 2.2361516034985423, "grad_norm": 0.18001393973827362, "learning_rate": 3.7630030551497728e-06, "loss": 1.1538960933685303, "step": 1534 }, { "epoch": 2.239067055393586, "grad_norm": 0.08799666166305542, "learning_rate": 3.746819324616308e-06, "loss": 1.0975581407546997, "step": 1536 }, { "epoch": 2.24198250728863, "grad_norm": 0.24161297082901, "learning_rate": 3.730682108245944e-06, "loss": 0.6484414339065552, "step": 1538 }, { "epoch": 2.2448979591836733, "grad_norm": 0.08378497511148453, "learning_rate": 3.714591572722891e-06, "loss": 0.9581442475318909, "step": 1540 }, { "epoch": 2.247813411078717, "grad_norm": 0.10033685714006424, "learning_rate": 3.698547884249187e-06, "loss": 0.6113779544830322, "step": 1542 }, { "epoch": 2.250728862973761, "grad_norm": 0.275552362203598, "learning_rate": 3.6825512085429703e-06, "loss": 1.1037795543670654, "step": 1544 }, { "epoch": 2.253644314868805, "grad_norm": 0.5268692374229431, "learning_rate": 3.6666017108367837e-06, "loss": 0.8392840027809143, "step": 1546 }, { "epoch": 2.256559766763848, "grad_norm": 0.24270810186862946, "learning_rate": 3.6506995558758586e-06, "loss": 1.0857195854187012, "step": 1548 }, { "epoch": 2.259475218658892, "grad_norm": 0.11209052801132202, "learning_rate": 3.6348449079164116e-06, "loss": 1.0408934354782104, "step": 1550 }, { "epoch": 2.262390670553936, "grad_norm": 0.3595077097415924, "learning_rate": 3.619037930723958e-06, "loss": 0.41006362438201904, "step": 1552 }, { "epoch": 2.2653061224489797, "grad_norm": 0.20681369304656982, "learning_rate": 3.603278787571601e-06, "loss": 1.08263099193573, "step": 1554 }, { "epoch": 2.2682215743440235, "grad_norm": 0.1791142076253891, "learning_rate": 3.587567641238369e-06, "loss": 1.1789532899856567, "step": 1556 }, { "epoch": 2.271137026239067, "grad_norm": 0.15824060142040253, "learning_rate": 3.5719046540075155e-06, "loss": 1.138330101966858, "step": 1558 }, { "epoch": 2.2740524781341107, "grad_norm": 0.08995150774717331, "learning_rate": 3.5562899876648556e-06, "loss": 1.0861237049102783, "step": 1560 }, { "epoch": 2.2769679300291545, "grad_norm": 0.20422294735908508, "learning_rate": 3.540723803497084e-06, "loss": 1.068771481513977, "step": 1562 }, { "epoch": 2.2798833819241984, "grad_norm": 0.29918450117111206, "learning_rate": 3.5252062622901196e-06, "loss": 1.0257431268692017, "step": 1564 }, { "epoch": 2.2827988338192418, "grad_norm": 0.2508153021335602, "learning_rate": 3.5097375243274322e-06, "loss": 0.7228989601135254, "step": 1566 }, { "epoch": 2.2857142857142856, "grad_norm": 0.20312649011611938, "learning_rate": 3.494317749388401e-06, "loss": 0.9408363103866577, "step": 1568 }, { "epoch": 2.2886297376093294, "grad_norm": 0.18280087411403656, "learning_rate": 3.4789470967466528e-06, "loss": 1.1609010696411133, "step": 1570 }, { "epoch": 2.2915451895043732, "grad_norm": 0.4031111001968384, "learning_rate": 3.4636257251684247e-06, "loss": 1.1523736715316772, "step": 1572 }, { "epoch": 2.294460641399417, "grad_norm": 0.14943495392799377, "learning_rate": 3.4483537929109212e-06, "loss": 1.0938516855239868, "step": 1574 }, { "epoch": 2.2973760932944605, "grad_norm": 0.32287096977233887, "learning_rate": 3.433131457720673e-06, "loss": 0.8949427604675293, "step": 1576 }, { "epoch": 2.3002915451895043, "grad_norm": 0.13816498219966888, "learning_rate": 3.4179588768319194e-06, "loss": 1.004232406616211, "step": 1578 }, { "epoch": 2.303206997084548, "grad_norm": 0.17348824441432953, "learning_rate": 3.4028362069649807e-06, "loss": 1.1232084035873413, "step": 1580 }, { "epoch": 2.306122448979592, "grad_norm": 0.2952488362789154, "learning_rate": 3.387763604324628e-06, "loss": 1.2846827507019043, "step": 1582 }, { "epoch": 2.3090379008746353, "grad_norm": 0.0930081456899643, "learning_rate": 3.3727412245984863e-06, "loss": 1.0255701541900635, "step": 1584 }, { "epoch": 2.311953352769679, "grad_norm": 0.19518348574638367, "learning_rate": 3.3577692229554225e-06, "loss": 0.9602378606796265, "step": 1586 }, { "epoch": 2.314868804664723, "grad_norm": 0.08679629117250443, "learning_rate": 3.3428477540439295e-06, "loss": 1.0191975831985474, "step": 1588 }, { "epoch": 2.317784256559767, "grad_norm": 0.07790417969226837, "learning_rate": 3.3279769719905438e-06, "loss": 1.1509268283843994, "step": 1590 }, { "epoch": 2.3206997084548107, "grad_norm": 0.2912391126155853, "learning_rate": 3.3131570303982517e-06, "loss": 0.6687411665916443, "step": 1592 }, { "epoch": 2.323615160349854, "grad_norm": 0.4317520260810852, "learning_rate": 3.2983880823448896e-06, "loss": 0.8183987736701965, "step": 1594 }, { "epoch": 2.326530612244898, "grad_norm": 0.11885584890842438, "learning_rate": 3.283670280381581e-06, "loss": 1.1012320518493652, "step": 1596 }, { "epoch": 2.3294460641399417, "grad_norm": 0.35252460837364197, "learning_rate": 3.269003776531148e-06, "loss": 0.9789476990699768, "step": 1598 }, { "epoch": 2.3323615160349855, "grad_norm": 0.15434707701206207, "learning_rate": 3.2543887222865496e-06, "loss": 1.1043654680252075, "step": 1600 }, { "epoch": 2.335276967930029, "grad_norm": 0.16315020620822906, "learning_rate": 3.239825268609309e-06, "loss": 1.0038485527038574, "step": 1602 }, { "epoch": 2.3381924198250728, "grad_norm": 0.39029252529144287, "learning_rate": 3.2253135659279558e-06, "loss": 1.1852213144302368, "step": 1604 }, { "epoch": 2.3411078717201166, "grad_norm": 0.2913620173931122, "learning_rate": 3.2108537641364786e-06, "loss": 0.45255744457244873, "step": 1606 }, { "epoch": 2.3440233236151604, "grad_norm": 0.06582468003034592, "learning_rate": 3.19644601259277e-06, "loss": 1.269538402557373, "step": 1608 }, { "epoch": 2.3469387755102042, "grad_norm": 0.5571786761283875, "learning_rate": 3.1820904601170884e-06, "loss": 0.8519521355628967, "step": 1610 }, { "epoch": 2.3498542274052476, "grad_norm": 0.31546610593795776, "learning_rate": 3.1677872549905154e-06, "loss": 1.3262689113616943, "step": 1612 }, { "epoch": 2.3527696793002915, "grad_norm": 0.09515654295682907, "learning_rate": 3.153536544953433e-06, "loss": 0.9249638319015503, "step": 1614 }, { "epoch": 2.3556851311953353, "grad_norm": 0.15578609704971313, "learning_rate": 3.139338477203983e-06, "loss": 1.1823093891143799, "step": 1616 }, { "epoch": 2.358600583090379, "grad_norm": 0.2227763533592224, "learning_rate": 3.125193198396564e-06, "loss": 1.2877289056777954, "step": 1618 }, { "epoch": 2.3615160349854225, "grad_norm": 0.4745902121067047, "learning_rate": 3.111100854640303e-06, "loss": 0.9719488024711609, "step": 1620 }, { "epoch": 2.3644314868804663, "grad_norm": 0.24592548608779907, "learning_rate": 3.097061591497555e-06, "loss": 1.0211539268493652, "step": 1622 }, { "epoch": 2.36734693877551, "grad_norm": 0.21700948476791382, "learning_rate": 3.0830755539823942e-06, "loss": 0.9550508260726929, "step": 1624 }, { "epoch": 2.370262390670554, "grad_norm": 0.20466458797454834, "learning_rate": 3.0691428865591153e-06, "loss": 0.5767884254455566, "step": 1626 }, { "epoch": 2.373177842565598, "grad_norm": 0.14715692400932312, "learning_rate": 3.0552637331407466e-06, "loss": 0.894551694393158, "step": 1628 }, { "epoch": 2.376093294460641, "grad_norm": 0.1368647813796997, "learning_rate": 3.0414382370875628e-06, "loss": 1.2126644849777222, "step": 1630 }, { "epoch": 2.379008746355685, "grad_norm": 0.2084326297044754, "learning_rate": 3.027666541205592e-06, "loss": 1.1460554599761963, "step": 1632 }, { "epoch": 2.381924198250729, "grad_norm": 0.12772594392299652, "learning_rate": 3.013948787745166e-06, "loss": 0.8425911664962769, "step": 1634 }, { "epoch": 2.3848396501457727, "grad_norm": 0.21220910549163818, "learning_rate": 3.000285118399425e-06, "loss": 1.0760411024093628, "step": 1636 }, { "epoch": 2.387755102040816, "grad_norm": 0.16325032711029053, "learning_rate": 2.9866756743028644e-06, "loss": 1.1195225715637207, "step": 1638 }, { "epoch": 2.39067055393586, "grad_norm": 0.1648532897233963, "learning_rate": 2.973120596029882e-06, "loss": 1.0467681884765625, "step": 1640 }, { "epoch": 2.3935860058309038, "grad_norm": 0.5487902164459229, "learning_rate": 2.9596200235933215e-06, "loss": 1.1597939729690552, "step": 1642 }, { "epoch": 2.3965014577259476, "grad_norm": 0.15476688742637634, "learning_rate": 2.9461740964430176e-06, "loss": 1.0105078220367432, "step": 1644 }, { "epoch": 2.3994169096209914, "grad_norm": 1.1137182712554932, "learning_rate": 2.932782953464373e-06, "loss": 1.0070343017578125, "step": 1646 }, { "epoch": 2.402332361516035, "grad_norm": 0.3256247043609619, "learning_rate": 2.9194467329769166e-06, "loss": 0.9948145151138306, "step": 1648 }, { "epoch": 2.4052478134110786, "grad_norm": 0.14843417704105377, "learning_rate": 2.9061655727328617e-06, "loss": 1.0339670181274414, "step": 1650 }, { "epoch": 2.4081632653061225, "grad_norm": 0.14106328785419464, "learning_rate": 2.8929396099157056e-06, "loss": 1.149165391921997, "step": 1652 }, { "epoch": 2.4110787172011663, "grad_norm": 0.1781884729862213, "learning_rate": 2.8797689811387944e-06, "loss": 0.9708322286605835, "step": 1654 }, { "epoch": 2.4139941690962097, "grad_norm": 0.16324618458747864, "learning_rate": 2.8666538224439207e-06, "loss": 0.9147579669952393, "step": 1656 }, { "epoch": 2.4169096209912535, "grad_norm": 0.10199990123510361, "learning_rate": 2.853594269299919e-06, "loss": 1.1740384101867676, "step": 1658 }, { "epoch": 2.4198250728862973, "grad_norm": 0.36128106713294983, "learning_rate": 2.8405904566012634e-06, "loss": 0.9795001149177551, "step": 1660 }, { "epoch": 2.422740524781341, "grad_norm": 0.11705031245946884, "learning_rate": 2.827642518666673e-06, "loss": 1.0222880840301514, "step": 1662 }, { "epoch": 2.425655976676385, "grad_norm": 0.19340762495994568, "learning_rate": 2.814750589237729e-06, "loss": 1.0553447008132935, "step": 1664 }, { "epoch": 2.4285714285714284, "grad_norm": 0.09246297180652618, "learning_rate": 2.8019148014774856e-06, "loss": 1.0741846561431885, "step": 1666 }, { "epoch": 2.431486880466472, "grad_norm": 0.23843225836753845, "learning_rate": 2.789135287969106e-06, "loss": 1.1993522644042969, "step": 1668 }, { "epoch": 2.434402332361516, "grad_norm": 0.7431137561798096, "learning_rate": 2.7764121807144815e-06, "loss": 0.42419517040252686, "step": 1670 }, { "epoch": 2.43731778425656, "grad_norm": 0.11922803521156311, "learning_rate": 2.7637456111328773e-06, "loss": 1.0701881647109985, "step": 1672 }, { "epoch": 2.4402332361516033, "grad_norm": 0.238107368350029, "learning_rate": 2.7511357100595675e-06, "loss": 1.0204083919525146, "step": 1674 }, { "epoch": 2.443148688046647, "grad_norm": 0.18065865337848663, "learning_rate": 2.738582607744491e-06, "loss": 1.1767973899841309, "step": 1676 }, { "epoch": 2.446064139941691, "grad_norm": 0.6328040361404419, "learning_rate": 2.7260864338508944e-06, "loss": 1.2465075254440308, "step": 1678 }, { "epoch": 2.4489795918367347, "grad_norm": 0.32334592938423157, "learning_rate": 2.71364731745401e-06, "loss": 0.9165597558021545, "step": 1680 }, { "epoch": 2.4518950437317786, "grad_norm": 0.29830703139305115, "learning_rate": 2.701265387039703e-06, "loss": 1.0425974130630493, "step": 1682 }, { "epoch": 2.454810495626822, "grad_norm": 0.09913703799247742, "learning_rate": 2.688940770503163e-06, "loss": 1.1421351432800293, "step": 1684 }, { "epoch": 2.457725947521866, "grad_norm": 0.19002677500247955, "learning_rate": 2.676673595147574e-06, "loss": 1.14607572555542, "step": 1686 }, { "epoch": 2.4606413994169096, "grad_norm": 0.17399148643016815, "learning_rate": 2.6644639876827903e-06, "loss": 1.0854803323745728, "step": 1688 }, { "epoch": 2.4635568513119535, "grad_norm": 0.18045774102210999, "learning_rate": 2.6523120742240457e-06, "loss": 1.156597375869751, "step": 1690 }, { "epoch": 2.466472303206997, "grad_norm": 0.36970221996307373, "learning_rate": 2.6402179802906417e-06, "loss": 1.1326744556427002, "step": 1692 }, { "epoch": 2.4693877551020407, "grad_norm": 0.16106556355953217, "learning_rate": 2.6281818308046466e-06, "loss": 1.1174097061157227, "step": 1694 }, { "epoch": 2.4723032069970845, "grad_norm": 0.23179616034030914, "learning_rate": 2.6162037500896134e-06, "loss": 1.247542381286621, "step": 1696 }, { "epoch": 2.4752186588921283, "grad_norm": 0.20750805735588074, "learning_rate": 2.6042838618692964e-06, "loss": 1.120650291442871, "step": 1698 }, { "epoch": 2.478134110787172, "grad_norm": 0.4005797207355499, "learning_rate": 2.5924222892663607e-06, "loss": 1.1234309673309326, "step": 1700 }, { "epoch": 2.481049562682216, "grad_norm": 0.11094089597463608, "learning_rate": 2.580619154801124e-06, "loss": 1.0382579565048218, "step": 1702 }, { "epoch": 2.4839650145772594, "grad_norm": 0.1598607450723648, "learning_rate": 2.5688745803902863e-06, "loss": 0.8054310083389282, "step": 1704 }, { "epoch": 2.486880466472303, "grad_norm": 0.29358312487602234, "learning_rate": 2.557188687345666e-06, "loss": 1.2227270603179932, "step": 1706 }, { "epoch": 2.489795918367347, "grad_norm": 0.10478518158197403, "learning_rate": 2.545561596372957e-06, "loss": 1.0256011486053467, "step": 1708 }, { "epoch": 2.4927113702623904, "grad_norm": 0.19069114327430725, "learning_rate": 2.533993427570471e-06, "loss": 1.003487467765808, "step": 1710 }, { "epoch": 2.4956268221574343, "grad_norm": 0.19944234192371368, "learning_rate": 2.522484300427905e-06, "loss": 1.1340402364730835, "step": 1712 }, { "epoch": 2.498542274052478, "grad_norm": 0.206906259059906, "learning_rate": 2.5110343338251055e-06, "loss": 0.7293667793273926, "step": 1714 }, { "epoch": 2.501457725947522, "grad_norm": 0.22807729244232178, "learning_rate": 2.499643646030833e-06, "loss": 0.6911664009094238, "step": 1716 }, { "epoch": 2.5043731778425657, "grad_norm": 0.12783202528953552, "learning_rate": 2.488312354701552e-06, "loss": 1.0861356258392334, "step": 1718 }, { "epoch": 2.5072886297376096, "grad_norm": 0.24884046614170074, "learning_rate": 2.4770405768802087e-06, "loss": 1.2009036540985107, "step": 1720 }, { "epoch": 2.510204081632653, "grad_norm": 0.19883911311626434, "learning_rate": 2.4658284289950235e-06, "loss": 1.171090006828308, "step": 1722 }, { "epoch": 2.513119533527697, "grad_norm": 0.2198370397090912, "learning_rate": 2.454676026858288e-06, "loss": 0.6773008704185486, "step": 1724 }, { "epoch": 2.5160349854227406, "grad_norm": 0.3970673084259033, "learning_rate": 2.443583485665172e-06, "loss": 0.9177547693252563, "step": 1726 }, { "epoch": 2.518950437317784, "grad_norm": 0.14196209609508514, "learning_rate": 2.432550919992524e-06, "loss": 1.0238224267959595, "step": 1728 }, { "epoch": 2.521865889212828, "grad_norm": 0.08479610830545425, "learning_rate": 2.4215784437977023e-06, "loss": 1.0351308584213257, "step": 1730 }, { "epoch": 2.5247813411078717, "grad_norm": 0.2791972756385803, "learning_rate": 2.4106661704173856e-06, "loss": 1.2357579469680786, "step": 1732 }, { "epoch": 2.5276967930029155, "grad_norm": 0.300520658493042, "learning_rate": 2.3998142125664094e-06, "loss": 0.9955886602401733, "step": 1734 }, { "epoch": 2.5306122448979593, "grad_norm": 0.07155195623636246, "learning_rate": 2.3890226823365984e-06, "loss": 0.9533568024635315, "step": 1736 }, { "epoch": 2.533527696793003, "grad_norm": 0.37421008944511414, "learning_rate": 2.3782916911956072e-06, "loss": 0.7588440179824829, "step": 1738 }, { "epoch": 2.5364431486880465, "grad_norm": 0.21846982836723328, "learning_rate": 2.3676213499857742e-06, "loss": 1.0482406616210938, "step": 1740 }, { "epoch": 2.5393586005830904, "grad_norm": 0.22150775790214539, "learning_rate": 2.357011768922975e-06, "loss": 0.9425265789031982, "step": 1742 }, { "epoch": 2.542274052478134, "grad_norm": 0.0946943610906601, "learning_rate": 2.3464630575954748e-06, "loss": 1.0236523151397705, "step": 1744 }, { "epoch": 2.5451895043731776, "grad_norm": 0.2336379438638687, "learning_rate": 2.3359753249628156e-06, "loss": 0.9605098962783813, "step": 1746 }, { "epoch": 2.5481049562682214, "grad_norm": 0.38517579436302185, "learning_rate": 2.3255486793546735e-06, "loss": 0.7055401802062988, "step": 1748 }, { "epoch": 2.5510204081632653, "grad_norm": 0.22488614916801453, "learning_rate": 2.3151832284697437e-06, "loss": 1.3222585916519165, "step": 1750 }, { "epoch": 2.553935860058309, "grad_norm": 0.14808881282806396, "learning_rate": 2.304879079374634e-06, "loss": 0.6318288445472717, "step": 1752 }, { "epoch": 2.556851311953353, "grad_norm": 0.12122584134340286, "learning_rate": 2.2946363385027555e-06, "loss": 1.0979853868484497, "step": 1754 }, { "epoch": 2.5597667638483967, "grad_norm": 0.17218822240829468, "learning_rate": 2.2844551116532164e-06, "loss": 1.1333314180374146, "step": 1756 }, { "epoch": 2.56268221574344, "grad_norm": 0.2076103240251541, "learning_rate": 2.274335503989743e-06, "loss": 1.1102957725524902, "step": 1758 }, { "epoch": 2.565597667638484, "grad_norm": 0.3147886395454407, "learning_rate": 2.2642776200395825e-06, "loss": 1.0110862255096436, "step": 1760 }, { "epoch": 2.568513119533528, "grad_norm": 0.199388787150383, "learning_rate": 2.2542815636924273e-06, "loss": 1.1791144609451294, "step": 1762 }, { "epoch": 2.571428571428571, "grad_norm": 0.14399054646492004, "learning_rate": 2.2443474381993418e-06, "loss": 0.6136134266853333, "step": 1764 }, { "epoch": 2.574344023323615, "grad_norm": 0.12786594033241272, "learning_rate": 2.2344753461716924e-06, "loss": 1.169732928276062, "step": 1766 }, { "epoch": 2.577259475218659, "grad_norm": 0.42270779609680176, "learning_rate": 2.2246653895800945e-06, "loss": 1.167303442955017, "step": 1768 }, { "epoch": 2.5801749271137027, "grad_norm": 0.3366575539112091, "learning_rate": 2.2149176697533547e-06, "loss": 0.7395915985107422, "step": 1770 }, { "epoch": 2.5830903790087465, "grad_norm": 0.11204802244901657, "learning_rate": 2.2052322873774243e-06, "loss": 1.130765676498413, "step": 1772 }, { "epoch": 2.5860058309037903, "grad_norm": 0.40100663900375366, "learning_rate": 2.195609342494358e-06, "loss": 0.9160555601119995, "step": 1774 }, { "epoch": 2.5889212827988337, "grad_norm": 0.3878629505634308, "learning_rate": 2.1860489345012882e-06, "loss": 1.1737711429595947, "step": 1776 }, { "epoch": 2.5918367346938775, "grad_norm": 0.2504361569881439, "learning_rate": 2.1765511621493837e-06, "loss": 1.1497868299484253, "step": 1778 }, { "epoch": 2.5947521865889214, "grad_norm": 0.399038165807724, "learning_rate": 2.1671161235428466e-06, "loss": 1.0515235662460327, "step": 1780 }, { "epoch": 2.5976676384839648, "grad_norm": 0.18093329668045044, "learning_rate": 2.1577439161378857e-06, "loss": 1.0114405155181885, "step": 1782 }, { "epoch": 2.6005830903790086, "grad_norm": 0.20376266539096832, "learning_rate": 2.1484346367417174e-06, "loss": 1.1349772214889526, "step": 1784 }, { "epoch": 2.6034985422740524, "grad_norm": 0.12697869539260864, "learning_rate": 2.139188381511565e-06, "loss": 1.0220611095428467, "step": 1786 }, { "epoch": 2.6064139941690962, "grad_norm": 0.17522640526294708, "learning_rate": 2.1300052459536577e-06, "loss": 1.04948890209198, "step": 1788 }, { "epoch": 2.60932944606414, "grad_norm": 0.33081164956092834, "learning_rate": 2.120885324922257e-06, "loss": 1.067612648010254, "step": 1790 }, { "epoch": 2.612244897959184, "grad_norm": 0.19511879980564117, "learning_rate": 2.1118287126186663e-06, "loss": 1.1198432445526123, "step": 1792 }, { "epoch": 2.6151603498542273, "grad_norm": 0.12612418830394745, "learning_rate": 2.102835502590264e-06, "loss": 0.9212133884429932, "step": 1794 }, { "epoch": 2.618075801749271, "grad_norm": 1.4945578575134277, "learning_rate": 2.0939057877295337e-06, "loss": 0.9755832552909851, "step": 1796 }, { "epoch": 2.620991253644315, "grad_norm": 0.11096255481243134, "learning_rate": 2.085039660273107e-06, "loss": 0.8870418071746826, "step": 1798 }, { "epoch": 2.6239067055393583, "grad_norm": 0.16551688313484192, "learning_rate": 2.076237211800807e-06, "loss": 1.1013219356536865, "step": 1800 }, { "epoch": 2.626822157434402, "grad_norm": 0.12267225235700607, "learning_rate": 2.067498533234708e-06, "loss": 1.1636854410171509, "step": 1802 }, { "epoch": 2.629737609329446, "grad_norm": 0.21022585034370422, "learning_rate": 2.0588237148381937e-06, "loss": 1.0870646238327026, "step": 1804 }, { "epoch": 2.63265306122449, "grad_norm": 0.12315444648265839, "learning_rate": 2.05021284621502e-06, "loss": 1.0031044483184814, "step": 1806 }, { "epoch": 2.6355685131195337, "grad_norm": 0.08722248673439026, "learning_rate": 2.0416660163084007e-06, "loss": 1.1768810749053955, "step": 1808 }, { "epoch": 2.6384839650145775, "grad_norm": 0.14608271420001984, "learning_rate": 2.0331833134000806e-06, "loss": 1.1812292337417603, "step": 1810 }, { "epoch": 2.641399416909621, "grad_norm": 0.12209862470626831, "learning_rate": 2.0247648251094187e-06, "loss": 0.5496333241462708, "step": 1812 }, { "epoch": 2.6443148688046647, "grad_norm": 0.14420591294765472, "learning_rate": 2.0164106383924995e-06, "loss": 1.0734022855758667, "step": 1814 }, { "epoch": 2.6472303206997085, "grad_norm": 0.34557104110717773, "learning_rate": 2.008120839541217e-06, "loss": 0.8214896321296692, "step": 1816 }, { "epoch": 2.650145772594752, "grad_norm": 0.19864369928836823, "learning_rate": 1.9998955141823947e-06, "loss": 1.1074302196502686, "step": 1818 }, { "epoch": 2.6530612244897958, "grad_norm": 0.1151181161403656, "learning_rate": 1.9917347472768996e-06, "loss": 1.1880613565444946, "step": 1820 }, { "epoch": 2.6559766763848396, "grad_norm": 0.3938349783420563, "learning_rate": 1.983638623118759e-06, "loss": 0.8221843242645264, "step": 1822 }, { "epoch": 2.6588921282798834, "grad_norm": 0.4980735182762146, "learning_rate": 1.9756072253342956e-06, "loss": 1.0243555307388306, "step": 1824 }, { "epoch": 2.6618075801749272, "grad_norm": 0.2903914451599121, "learning_rate": 1.967640636881263e-06, "loss": 1.1823608875274658, "step": 1826 }, { "epoch": 2.664723032069971, "grad_norm": 0.1528269499540329, "learning_rate": 1.9597389400479843e-06, "loss": 1.1882878541946411, "step": 1828 }, { "epoch": 2.6676384839650145, "grad_norm": 0.37738537788391113, "learning_rate": 1.9519022164525086e-06, "loss": 0.8332970142364502, "step": 1830 }, { "epoch": 2.6705539358600583, "grad_norm": 0.10077593475580215, "learning_rate": 1.9441305470417622e-06, "loss": 1.1155685186386108, "step": 1832 }, { "epoch": 2.673469387755102, "grad_norm": 0.24888084828853607, "learning_rate": 1.936424012090716e-06, "loss": 1.0899043083190918, "step": 1834 }, { "epoch": 2.6763848396501455, "grad_norm": 0.3049887418746948, "learning_rate": 1.9287826912015588e-06, "loss": 1.3089343309402466, "step": 1836 }, { "epoch": 2.6793002915451893, "grad_norm": 0.15812550485134125, "learning_rate": 1.9212066633028635e-06, "loss": 1.0993826389312744, "step": 1838 }, { "epoch": 2.682215743440233, "grad_norm": 0.265886515378952, "learning_rate": 1.9136960066487884e-06, "loss": 1.0602340698242188, "step": 1840 }, { "epoch": 2.685131195335277, "grad_norm": 0.8439386487007141, "learning_rate": 1.9062507988182545e-06, "loss": 1.0067952871322632, "step": 1842 }, { "epoch": 2.688046647230321, "grad_norm": 0.45330727100372314, "learning_rate": 1.8988711167141542e-06, "loss": 0.5957139134407043, "step": 1844 }, { "epoch": 2.6909620991253647, "grad_norm": 0.14824670553207397, "learning_rate": 1.8915570365625508e-06, "loss": 1.1712740659713745, "step": 1846 }, { "epoch": 2.693877551020408, "grad_norm": 0.10511742532253265, "learning_rate": 1.8843086339118943e-06, "loss": 1.0602518320083618, "step": 1848 }, { "epoch": 2.696793002915452, "grad_norm": 0.07894819229841232, "learning_rate": 1.8771259836322376e-06, "loss": 1.014635682106018, "step": 1850 }, { "epoch": 2.6997084548104957, "grad_norm": 0.10334635525941849, "learning_rate": 1.8700091599144688e-06, "loss": 1.0106903314590454, "step": 1852 }, { "epoch": 2.702623906705539, "grad_norm": 0.30136221647262573, "learning_rate": 1.8629582362695395e-06, "loss": 0.673401951789856, "step": 1854 }, { "epoch": 2.705539358600583, "grad_norm": 0.5134400129318237, "learning_rate": 1.8559732855277067e-06, "loss": 1.1158447265625, "step": 1856 }, { "epoch": 2.7084548104956268, "grad_norm": 0.35808032751083374, "learning_rate": 1.8490543798377848e-06, "loss": 1.2872017621994019, "step": 1858 }, { "epoch": 2.7113702623906706, "grad_norm": 0.04801107197999954, "learning_rate": 1.8422015906663964e-06, "loss": 0.932016909122467, "step": 1860 }, { "epoch": 2.7142857142857144, "grad_norm": 0.34277820587158203, "learning_rate": 1.8354149887972297e-06, "loss": 0.6936520338058472, "step": 1862 }, { "epoch": 2.7172011661807582, "grad_norm": 0.16731053590774536, "learning_rate": 1.8286946443303187e-06, "loss": 1.1427615880966187, "step": 1864 }, { "epoch": 2.7201166180758016, "grad_norm": 0.8489914536476135, "learning_rate": 1.822040626681308e-06, "loss": 1.0948349237442017, "step": 1866 }, { "epoch": 2.7230320699708455, "grad_norm": 0.41851627826690674, "learning_rate": 1.8154530045807438e-06, "loss": 1.157147765159607, "step": 1868 }, { "epoch": 2.7259475218658893, "grad_norm": 0.09261982142925262, "learning_rate": 1.808931846073361e-06, "loss": 1.0182065963745117, "step": 1870 }, { "epoch": 2.7288629737609327, "grad_norm": 0.07328807562589645, "learning_rate": 1.8024772185173758e-06, "loss": 0.9535019397735596, "step": 1872 }, { "epoch": 2.7317784256559765, "grad_norm": 0.3953118324279785, "learning_rate": 1.7960891885837988e-06, "loss": 0.5561579465866089, "step": 1874 }, { "epoch": 2.7346938775510203, "grad_norm": 0.7391979694366455, "learning_rate": 1.7897678222557402e-06, "loss": 0.9951037764549255, "step": 1876 }, { "epoch": 2.737609329446064, "grad_norm": 0.16622287034988403, "learning_rate": 1.7835131848277288e-06, "loss": 1.129691243171692, "step": 1878 }, { "epoch": 2.740524781341108, "grad_norm": 0.08795658499002457, "learning_rate": 1.7773253409050398e-06, "loss": 0.9720866680145264, "step": 1880 }, { "epoch": 2.743440233236152, "grad_norm": 0.10475818812847137, "learning_rate": 1.7712043544030265e-06, "loss": 0.9624143242835999, "step": 1882 }, { "epoch": 2.746355685131195, "grad_norm": 0.5169785618782043, "learning_rate": 1.7651502885464582e-06, "loss": 0.7830743789672852, "step": 1884 }, { "epoch": 2.749271137026239, "grad_norm": 0.06864479184150696, "learning_rate": 1.7591632058688719e-06, "loss": 1.1376532316207886, "step": 1886 }, { "epoch": 2.752186588921283, "grad_norm": 4.637813091278076, "learning_rate": 1.7532431682119205e-06, "loss": 0.8696690797805786, "step": 1888 }, { "epoch": 2.7551020408163263, "grad_norm": 0.15929657220840454, "learning_rate": 1.7473902367247361e-06, "loss": 1.1236258745193481, "step": 1890 }, { "epoch": 2.75801749271137, "grad_norm": 0.3590356707572937, "learning_rate": 1.7416044718633025e-06, "loss": 0.8365395665168762, "step": 1892 }, { "epoch": 2.760932944606414, "grad_norm": 0.1510230451822281, "learning_rate": 1.735885933389825e-06, "loss": 0.6292239427566528, "step": 1894 }, { "epoch": 2.7638483965014577, "grad_norm": 0.18348506093025208, "learning_rate": 1.730234680372116e-06, "loss": 1.1290793418884277, "step": 1896 }, { "epoch": 2.7667638483965016, "grad_norm": 0.16462060809135437, "learning_rate": 1.7246507711829852e-06, "loss": 1.1606987714767456, "step": 1898 }, { "epoch": 2.7696793002915454, "grad_norm": 0.16783565282821655, "learning_rate": 1.719134263499633e-06, "loss": 0.9577206373214722, "step": 1900 }, { "epoch": 2.772594752186589, "grad_norm": 0.08972535282373428, "learning_rate": 1.7136852143030605e-06, "loss": 0.9086419343948364, "step": 1902 }, { "epoch": 2.7755102040816326, "grad_norm": 0.25966984033584595, "learning_rate": 1.7083036798774771e-06, "loss": 1.16250479221344, "step": 1904 }, { "epoch": 2.7784256559766765, "grad_norm": 0.14714005589485168, "learning_rate": 1.7029897158097191e-06, "loss": 0.6218932867050171, "step": 1906 }, { "epoch": 2.78134110787172, "grad_norm": 0.1505810022354126, "learning_rate": 1.6977433769886777e-06, "loss": 0.9435967206954956, "step": 1908 }, { "epoch": 2.7842565597667637, "grad_norm": 0.5554741621017456, "learning_rate": 1.6925647176047304e-06, "loss": 1.2954356670379639, "step": 1910 }, { "epoch": 2.7871720116618075, "grad_norm": 0.7726877331733704, "learning_rate": 1.6874537911491804e-06, "loss": 1.100317120552063, "step": 1912 }, { "epoch": 2.7900874635568513, "grad_norm": 0.1900632381439209, "learning_rate": 1.682410650413707e-06, "loss": 1.1734505891799927, "step": 1914 }, { "epoch": 2.793002915451895, "grad_norm": 0.2996356189250946, "learning_rate": 1.6774353474898176e-06, "loss": 0.6496275067329407, "step": 1916 }, { "epoch": 2.795918367346939, "grad_norm": 0.28916487097740173, "learning_rate": 1.6725279337683096e-06, "loss": 0.8404643535614014, "step": 1918 }, { "epoch": 2.7988338192419824, "grad_norm": 0.30399462580680847, "learning_rate": 1.6676884599387447e-06, "loss": 0.8097843527793884, "step": 1920 }, { "epoch": 2.801749271137026, "grad_norm": 0.15744291245937347, "learning_rate": 1.6629169759889167e-06, "loss": 1.1007176637649536, "step": 1922 }, { "epoch": 2.80466472303207, "grad_norm": 0.22451713681221008, "learning_rate": 1.6582135312043415e-06, "loss": 1.1043728590011597, "step": 1924 }, { "epoch": 2.8075801749271134, "grad_norm": 0.16485294699668884, "learning_rate": 1.6535781741677468e-06, "loss": 1.1978418827056885, "step": 1926 }, { "epoch": 2.8104956268221573, "grad_norm": 0.11872020363807678, "learning_rate": 1.6490109527585685e-06, "loss": 1.0319398641586304, "step": 1928 }, { "epoch": 2.813411078717201, "grad_norm": 0.22041387856006622, "learning_rate": 1.6445119141524586e-06, "loss": 1.0383124351501465, "step": 1930 }, { "epoch": 2.816326530612245, "grad_norm": 0.1371716856956482, "learning_rate": 1.6400811048207957e-06, "loss": 1.0704172849655151, "step": 1932 }, { "epoch": 2.8192419825072887, "grad_norm": 0.33869630098342896, "learning_rate": 1.6357185705302059e-06, "loss": 0.9032880663871765, "step": 1934 }, { "epoch": 2.8221574344023326, "grad_norm": 0.19506464898586273, "learning_rate": 1.6314243563420908e-06, "loss": 1.1649752855300903, "step": 1936 }, { "epoch": 2.825072886297376, "grad_norm": 0.16767188906669617, "learning_rate": 1.627198506612162e-06, "loss": 1.197486162185669, "step": 1938 }, { "epoch": 2.82798833819242, "grad_norm": 0.17042168974876404, "learning_rate": 1.62304106498998e-06, "loss": 1.065731167793274, "step": 1940 }, { "epoch": 2.8309037900874636, "grad_norm": 0.25560781359672546, "learning_rate": 1.6189520744185072e-06, "loss": 0.9224144220352173, "step": 1942 }, { "epoch": 2.8338192419825075, "grad_norm": 0.20863035321235657, "learning_rate": 1.614931577133663e-06, "loss": 1.0565248727798462, "step": 1944 }, { "epoch": 2.836734693877551, "grad_norm": 0.19189637899398804, "learning_rate": 1.6109796146638871e-06, "loss": 1.232025384902954, "step": 1946 }, { "epoch": 2.8396501457725947, "grad_norm": 0.6458204984664917, "learning_rate": 1.6070962278297113e-06, "loss": 1.0065245628356934, "step": 1948 }, { "epoch": 2.8425655976676385, "grad_norm": 0.3259865939617157, "learning_rate": 1.6032814567433348e-06, "loss": 1.2361031770706177, "step": 1950 }, { "epoch": 2.8454810495626823, "grad_norm": 0.4714111089706421, "learning_rate": 1.5995353408082157e-06, "loss": 1.3339447975158691, "step": 1952 }, { "epoch": 2.848396501457726, "grad_norm": 0.16928227245807648, "learning_rate": 1.5958579187186582e-06, "loss": 1.0442076921463013, "step": 1954 }, { "epoch": 2.8513119533527695, "grad_norm": 0.3731814920902252, "learning_rate": 1.5922492284594174e-06, "loss": 0.878253698348999, "step": 1956 }, { "epoch": 2.8542274052478134, "grad_norm": 0.6527604460716248, "learning_rate": 1.5887093073053036e-06, "loss": 1.0772031545639038, "step": 1958 }, { "epoch": 2.857142857142857, "grad_norm": 0.18542839586734772, "learning_rate": 1.5852381918207995e-06, "loss": 1.116060733795166, "step": 1960 }, { "epoch": 2.860058309037901, "grad_norm": 0.250535786151886, "learning_rate": 1.5818359178596806e-06, "loss": 1.1924026012420654, "step": 1962 }, { "epoch": 2.8629737609329444, "grad_norm": 0.07601413875818253, "learning_rate": 1.5785025205646468e-06, "loss": 0.9614888429641724, "step": 1964 }, { "epoch": 2.8658892128279883, "grad_norm": 0.17522846162319183, "learning_rate": 1.5752380343669574e-06, "loss": 1.0021862983703613, "step": 1966 }, { "epoch": 2.868804664723032, "grad_norm": 0.22332464158535004, "learning_rate": 1.5720424929860793e-06, "loss": 1.0522475242614746, "step": 1968 }, { "epoch": 2.871720116618076, "grad_norm": 0.39566364884376526, "learning_rate": 1.5689159294293333e-06, "loss": 1.0991871356964111, "step": 1970 }, { "epoch": 2.8746355685131197, "grad_norm": 0.3006777763366699, "learning_rate": 1.5658583759915563e-06, "loss": 1.068638801574707, "step": 1972 }, { "epoch": 2.877551020408163, "grad_norm": 0.18835684657096863, "learning_rate": 1.5628698642547674e-06, "loss": 1.0682188272476196, "step": 1974 }, { "epoch": 2.880466472303207, "grad_norm": 0.13527542352676392, "learning_rate": 1.5599504250878434e-06, "loss": 1.0796337127685547, "step": 1976 }, { "epoch": 2.883381924198251, "grad_norm": 0.2289610654115677, "learning_rate": 1.5571000886461946e-06, "loss": 1.1682178974151611, "step": 1978 }, { "epoch": 2.8862973760932946, "grad_norm": 0.3208562731742859, "learning_rate": 1.5543188843714597e-06, "loss": 0.6415768265724182, "step": 1980 }, { "epoch": 2.889212827988338, "grad_norm": 0.2707623541355133, "learning_rate": 1.551606840991198e-06, "loss": 0.5584684014320374, "step": 1982 }, { "epoch": 2.892128279883382, "grad_norm": 0.24681639671325684, "learning_rate": 1.5489639865185929e-06, "loss": 0.9024500846862793, "step": 1984 }, { "epoch": 2.8950437317784257, "grad_norm": 0.2885083556175232, "learning_rate": 1.5463903482521637e-06, "loss": 1.0408830642700195, "step": 1986 }, { "epoch": 2.8979591836734695, "grad_norm": 0.2863474190235138, "learning_rate": 1.543885952775484e-06, "loss": 0.5923194289207458, "step": 1988 }, { "epoch": 2.9008746355685133, "grad_norm": 0.13149987161159515, "learning_rate": 1.5414508259569033e-06, "loss": 1.0203630924224854, "step": 1990 }, { "epoch": 2.9037900874635567, "grad_norm": 0.08542142808437347, "learning_rate": 1.5390849929492853e-06, "loss": 0.4749288260936737, "step": 1992 }, { "epoch": 2.9067055393586005, "grad_norm": 0.39572906494140625, "learning_rate": 1.5367884781897442e-06, "loss": 0.9975032210350037, "step": 1994 }, { "epoch": 2.9096209912536444, "grad_norm": 0.3944467604160309, "learning_rate": 1.5345613053993947e-06, "loss": 1.2269786596298218, "step": 1996 }, { "epoch": 2.912536443148688, "grad_norm": 0.14900818467140198, "learning_rate": 1.5324034975831053e-06, "loss": 1.2356706857681274, "step": 1998 }, { "epoch": 2.9154518950437316, "grad_norm": 0.31048882007598877, "learning_rate": 1.53031507702926e-06, "loss": 1.218428611755371, "step": 2000 }, { "epoch": 2.9183673469387754, "grad_norm": 0.1689174771308899, "learning_rate": 1.5282960653095309e-06, "loss": 0.9620698094367981, "step": 2002 }, { "epoch": 2.9212827988338192, "grad_norm": 0.2305694818496704, "learning_rate": 1.5263464832786536e-06, "loss": 1.2038404941558838, "step": 2004 }, { "epoch": 2.924198250728863, "grad_norm": 0.12036718428134918, "learning_rate": 1.5244663510742102e-06, "loss": 0.9968715310096741, "step": 2006 }, { "epoch": 2.927113702623907, "grad_norm": 0.12467171996831894, "learning_rate": 1.5226556881164256e-06, "loss": 1.0186277627944946, "step": 2008 }, { "epoch": 2.9300291545189503, "grad_norm": 0.13296104967594147, "learning_rate": 1.5209145131079634e-06, "loss": 1.026340365409851, "step": 2010 }, { "epoch": 2.932944606413994, "grad_norm": 0.12233509868383408, "learning_rate": 1.5192428440337316e-06, "loss": 1.182348608970642, "step": 2012 }, { "epoch": 2.935860058309038, "grad_norm": 0.1486111879348755, "learning_rate": 1.5176406981607024e-06, "loss": 1.0666353702545166, "step": 2014 }, { "epoch": 2.938775510204082, "grad_norm": 0.5397063493728638, "learning_rate": 1.5161080920377289e-06, "loss": 1.389245629310608, "step": 2016 }, { "epoch": 2.941690962099125, "grad_norm": 0.15026716887950897, "learning_rate": 1.5146450414953738e-06, "loss": 1.0400997400283813, "step": 2018 }, { "epoch": 2.944606413994169, "grad_norm": 0.11009442806243896, "learning_rate": 1.5132515616457505e-06, "loss": 1.001649260520935, "step": 2020 }, { "epoch": 2.947521865889213, "grad_norm": 0.7643895745277405, "learning_rate": 1.5119276668823628e-06, "loss": 0.37964844703674316, "step": 2022 }, { "epoch": 2.9504373177842567, "grad_norm": 0.2546994984149933, "learning_rate": 1.510673370879957e-06, "loss": 1.0618635416030884, "step": 2024 }, { "epoch": 2.9533527696793005, "grad_norm": 0.15609286725521088, "learning_rate": 1.5094886865943835e-06, "loss": 1.013123869895935, "step": 2026 }, { "epoch": 2.956268221574344, "grad_norm": 0.09666828066110611, "learning_rate": 1.5083736262624577e-06, "loss": 0.7794107794761658, "step": 2028 }, { "epoch": 2.9591836734693877, "grad_norm": 0.07339915633201599, "learning_rate": 1.5073282014018395e-06, "loss": 1.3735166788101196, "step": 2030 }, { "epoch": 2.9620991253644315, "grad_norm": 0.6088920831680298, "learning_rate": 1.5063524228109107e-06, "loss": 0.8808611035346985, "step": 2032 }, { "epoch": 2.9650145772594754, "grad_norm": 0.1744547188282013, "learning_rate": 1.5054463005686626e-06, "loss": 1.1831696033477783, "step": 2034 }, { "epoch": 2.9679300291545188, "grad_norm": 0.24790845811367035, "learning_rate": 1.5046098440345955e-06, "loss": 1.00650155544281, "step": 2036 }, { "epoch": 2.9708454810495626, "grad_norm": 0.18026836216449738, "learning_rate": 1.5038430618486194e-06, "loss": 1.1893560886383057, "step": 2038 }, { "epoch": 2.9737609329446064, "grad_norm": 0.1259116381406784, "learning_rate": 1.5031459619309653e-06, "loss": 1.0219632387161255, "step": 2040 }, { "epoch": 2.9766763848396502, "grad_norm": 0.15073135495185852, "learning_rate": 1.502518551482103e-06, "loss": 0.7194128036499023, "step": 2042 }, { "epoch": 2.979591836734694, "grad_norm": 0.05049153417348862, "learning_rate": 1.5019608369826692e-06, "loss": 1.1609373092651367, "step": 2044 }, { "epoch": 2.9825072886297375, "grad_norm": 0.11255478858947754, "learning_rate": 1.501472824193396e-06, "loss": 1.1452926397323608, "step": 2046 }, { "epoch": 2.9854227405247813, "grad_norm": 0.16929762065410614, "learning_rate": 1.5010545181550563e-06, "loss": 0.5922563076019287, "step": 2048 }, { "epoch": 2.988338192419825, "grad_norm": 0.1267116516828537, "learning_rate": 1.5007059231884077e-06, "loss": 0.49650248885154724, "step": 2050 }, { "epoch": 2.991253644314869, "grad_norm": 0.1838807910680771, "learning_rate": 1.5004270428941505e-06, "loss": 1.1091796159744263, "step": 2052 }, { "epoch": 2.9941690962099123, "grad_norm": 0.08408603817224503, "learning_rate": 1.500217880152889e-06, "loss": 1.0519981384277344, "step": 2054 }, { "epoch": 2.997084548104956, "grad_norm": 0.36840710043907166, "learning_rate": 1.5000784371251037e-06, "loss": 0.9989621639251709, "step": 2056 }, { "epoch": 3.0, "grad_norm": 0.15688389539718628, "learning_rate": 1.5000087152511266e-06, "loss": 1.1339861154556274, "step": 2058 }, { "epoch": 3.0, "step": 2058, "total_flos": 3.1865440491043553e+18, "train_loss": 1.1440774658359985, "train_runtime": 18974.7516, "train_samples_per_second": 1.735, "train_steps_per_second": 0.108 } ], "logging_steps": 2, "max_steps": 2058, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 9999999, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.1865440491043553e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }