{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 34265, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00014592149423610097, "grad_norm": 3.1947744515847005, "learning_rate": 1.458576429404901e-07, "loss": 0.9761, "step": 5 }, { "epoch": 0.00029184298847220193, "grad_norm": 2.780818532134856, "learning_rate": 2.917152858809802e-07, "loss": 0.9579, "step": 10 }, { "epoch": 0.00043776448270830295, "grad_norm": 2.8959010685272393, "learning_rate": 4.3757292882147023e-07, "loss": 0.9036, "step": 15 }, { "epoch": 0.0005836859769444039, "grad_norm": 2.5371985504370937, "learning_rate": 5.834305717619604e-07, "loss": 0.9564, "step": 20 }, { "epoch": 0.0007296074711805049, "grad_norm": 2.605626399144433, "learning_rate": 7.292882147024504e-07, "loss": 0.87, "step": 25 }, { "epoch": 0.0008755289654166059, "grad_norm": 2.3600499623595086, "learning_rate": 8.751458576429405e-07, "loss": 0.9499, "step": 30 }, { "epoch": 0.0010214504596527069, "grad_norm": 2.3297311105558225, "learning_rate": 1.0210035005834306e-06, "loss": 0.9012, "step": 35 }, { "epoch": 0.0011673719538888077, "grad_norm": 2.1913988052814823, "learning_rate": 1.1668611435239208e-06, "loss": 0.9282, "step": 40 }, { "epoch": 0.0013132934481249088, "grad_norm": 2.105605630758833, "learning_rate": 1.3127187864644107e-06, "loss": 0.8002, "step": 45 }, { "epoch": 0.0014592149423610099, "grad_norm": 1.8307704603554018, "learning_rate": 1.4585764294049009e-06, "loss": 0.864, "step": 50 }, { "epoch": 0.0016051364365971107, "grad_norm": 1.7749968650938042, "learning_rate": 1.604434072345391e-06, "loss": 0.8337, "step": 55 }, { "epoch": 0.0017510579308332118, "grad_norm": 1.8264857625369404, "learning_rate": 1.750291715285881e-06, "loss": 0.8928, "step": 60 }, { "epoch": 0.0018969794250693127, "grad_norm": 1.697528237991233, "learning_rate": 1.8961493582263713e-06, "loss": 0.794, "step": 65 }, { "epoch": 0.0020429009193054137, "grad_norm": 1.5659385030646167, "learning_rate": 2.0420070011668612e-06, "loss": 0.879, "step": 70 }, { "epoch": 0.002188822413541515, "grad_norm": 1.7961674617899717, "learning_rate": 2.187864644107351e-06, "loss": 0.8283, "step": 75 }, { "epoch": 0.0023347439077776155, "grad_norm": 1.6507596906396012, "learning_rate": 2.3337222870478415e-06, "loss": 0.8682, "step": 80 }, { "epoch": 0.0024806654020137165, "grad_norm": 1.4535579925158708, "learning_rate": 2.4795799299883315e-06, "loss": 0.7529, "step": 85 }, { "epoch": 0.0026265868962498176, "grad_norm": 1.698106722774744, "learning_rate": 2.6254375729288214e-06, "loss": 0.8147, "step": 90 }, { "epoch": 0.0027725083904859187, "grad_norm": 1.4680260129152574, "learning_rate": 2.7712952158693118e-06, "loss": 0.7276, "step": 95 }, { "epoch": 0.0029184298847220198, "grad_norm": 1.542895003410177, "learning_rate": 2.9171528588098017e-06, "loss": 0.7615, "step": 100 }, { "epoch": 0.0030643513789581204, "grad_norm": 1.429813215058147, "learning_rate": 3.0630105017502916e-06, "loss": 0.7247, "step": 105 }, { "epoch": 0.0032102728731942215, "grad_norm": 1.5130072783727644, "learning_rate": 3.208868144690782e-06, "loss": 0.7495, "step": 110 }, { "epoch": 0.0033561943674303226, "grad_norm": 1.569844141360473, "learning_rate": 3.3547257876312724e-06, "loss": 0.765, "step": 115 }, { "epoch": 0.0035021158616664236, "grad_norm": 1.6175884841912778, "learning_rate": 3.500583430571762e-06, "loss": 0.7472, "step": 120 }, { "epoch": 0.0036480373559025243, "grad_norm": 1.6904569479496365, "learning_rate": 3.6464410735122522e-06, "loss": 0.7803, "step": 125 }, { "epoch": 0.0037939588501386253, "grad_norm": 1.6251739015186617, "learning_rate": 3.7922987164527426e-06, "loss": 0.7604, "step": 130 }, { "epoch": 0.003939880344374726, "grad_norm": 1.5427918519690202, "learning_rate": 3.938156359393232e-06, "loss": 0.7573, "step": 135 }, { "epoch": 0.0040858018386108275, "grad_norm": 1.5576076657615734, "learning_rate": 4.0840140023337225e-06, "loss": 0.7623, "step": 140 }, { "epoch": 0.004231723332846929, "grad_norm": 1.3686901744336726, "learning_rate": 4.229871645274212e-06, "loss": 0.7286, "step": 145 }, { "epoch": 0.00437764482708303, "grad_norm": 1.4216493984338547, "learning_rate": 4.375729288214702e-06, "loss": 0.7901, "step": 150 }, { "epoch": 0.004523566321319131, "grad_norm": 1.6045464118077426, "learning_rate": 4.521586931155193e-06, "loss": 0.7569, "step": 155 }, { "epoch": 0.004669487815555231, "grad_norm": 1.8627989370501201, "learning_rate": 4.667444574095683e-06, "loss": 0.7453, "step": 160 }, { "epoch": 0.004815409309791332, "grad_norm": 1.4743706942055885, "learning_rate": 4.8133022170361734e-06, "loss": 0.7583, "step": 165 }, { "epoch": 0.004961330804027433, "grad_norm": 1.4542212784423725, "learning_rate": 4.959159859976663e-06, "loss": 0.7329, "step": 170 }, { "epoch": 0.005107252298263534, "grad_norm": 1.712902676622777, "learning_rate": 5.105017502917153e-06, "loss": 0.7729, "step": 175 }, { "epoch": 0.005253173792499635, "grad_norm": 1.4755335061451906, "learning_rate": 5.250875145857643e-06, "loss": 0.7765, "step": 180 }, { "epoch": 0.005399095286735736, "grad_norm": 1.5994272580423068, "learning_rate": 5.396732788798133e-06, "loss": 0.7251, "step": 185 }, { "epoch": 0.005545016780971837, "grad_norm": 1.6354259469081376, "learning_rate": 5.5425904317386235e-06, "loss": 0.7597, "step": 190 }, { "epoch": 0.0056909382752079385, "grad_norm": 1.7814990205632233, "learning_rate": 5.688448074679113e-06, "loss": 0.7613, "step": 195 }, { "epoch": 0.0058368597694440395, "grad_norm": 1.7555434578362554, "learning_rate": 5.834305717619603e-06, "loss": 0.7148, "step": 200 }, { "epoch": 0.00598278126368014, "grad_norm": 1.6267363052677177, "learning_rate": 5.980163360560093e-06, "loss": 0.7144, "step": 205 }, { "epoch": 0.006128702757916241, "grad_norm": 1.4669150349400875, "learning_rate": 6.126021003500583e-06, "loss": 0.7067, "step": 210 }, { "epoch": 0.006274624252152342, "grad_norm": 1.411713064440319, "learning_rate": 6.271878646441074e-06, "loss": 0.7083, "step": 215 }, { "epoch": 0.006420545746388443, "grad_norm": 1.607398390369769, "learning_rate": 6.417736289381564e-06, "loss": 0.7628, "step": 220 }, { "epoch": 0.006566467240624544, "grad_norm": 1.6395778660498548, "learning_rate": 6.563593932322054e-06, "loss": 0.7746, "step": 225 }, { "epoch": 0.006712388734860645, "grad_norm": 1.6890901541211365, "learning_rate": 6.709451575262545e-06, "loss": 0.7222, "step": 230 }, { "epoch": 0.006858310229096746, "grad_norm": 1.5759953543797294, "learning_rate": 6.855309218203035e-06, "loss": 0.7078, "step": 235 }, { "epoch": 0.007004231723332847, "grad_norm": 1.5849245570015338, "learning_rate": 7.001166861143524e-06, "loss": 0.6711, "step": 240 }, { "epoch": 0.007150153217568948, "grad_norm": 1.5803687806173186, "learning_rate": 7.147024504084014e-06, "loss": 0.7133, "step": 245 }, { "epoch": 0.0072960747118050485, "grad_norm": 1.7032674577448692, "learning_rate": 7.2928821470245045e-06, "loss": 0.686, "step": 250 }, { "epoch": 0.00744199620604115, "grad_norm": 1.8734431421404527, "learning_rate": 7.438739789964995e-06, "loss": 0.7024, "step": 255 }, { "epoch": 0.007587917700277251, "grad_norm": 1.8678774528018942, "learning_rate": 7.584597432905485e-06, "loss": 0.7022, "step": 260 }, { "epoch": 0.007733839194513352, "grad_norm": 1.8130034609400518, "learning_rate": 7.730455075845975e-06, "loss": 0.7036, "step": 265 }, { "epoch": 0.007879760688749453, "grad_norm": 1.5904331838236196, "learning_rate": 7.876312718786464e-06, "loss": 0.7502, "step": 270 }, { "epoch": 0.008025682182985553, "grad_norm": 1.530190434975237, "learning_rate": 8.022170361726955e-06, "loss": 0.7003, "step": 275 }, { "epoch": 0.008171603677221655, "grad_norm": 1.4468723229371472, "learning_rate": 8.168028004667445e-06, "loss": 0.6538, "step": 280 }, { "epoch": 0.008317525171457755, "grad_norm": 1.5013623893566552, "learning_rate": 8.313885647607934e-06, "loss": 0.7231, "step": 285 }, { "epoch": 0.008463446665693857, "grad_norm": 1.439329760933021, "learning_rate": 8.459743290548424e-06, "loss": 0.7081, "step": 290 }, { "epoch": 0.008609368159929957, "grad_norm": 1.5440021877487, "learning_rate": 8.605600933488915e-06, "loss": 0.6921, "step": 295 }, { "epoch": 0.00875528965416606, "grad_norm": 1.6291279310781581, "learning_rate": 8.751458576429405e-06, "loss": 0.694, "step": 300 }, { "epoch": 0.00890121114840216, "grad_norm": 1.6431941602450315, "learning_rate": 8.897316219369896e-06, "loss": 0.7175, "step": 305 }, { "epoch": 0.009047132642638261, "grad_norm": 1.7240714682583684, "learning_rate": 9.043173862310385e-06, "loss": 0.7037, "step": 310 }, { "epoch": 0.009193054136874362, "grad_norm": 1.5076816635550303, "learning_rate": 9.189031505250875e-06, "loss": 0.6908, "step": 315 }, { "epoch": 0.009338975631110462, "grad_norm": 1.6421334821435518, "learning_rate": 9.334889148191366e-06, "loss": 0.6826, "step": 320 }, { "epoch": 0.009484897125346564, "grad_norm": 1.567201180354376, "learning_rate": 9.480746791131856e-06, "loss": 0.763, "step": 325 }, { "epoch": 0.009630818619582664, "grad_norm": 1.5537223777920866, "learning_rate": 9.626604434072347e-06, "loss": 0.6731, "step": 330 }, { "epoch": 0.009776740113818766, "grad_norm": 1.5693711908918668, "learning_rate": 9.772462077012835e-06, "loss": 0.71, "step": 335 }, { "epoch": 0.009922661608054866, "grad_norm": 1.500621710642294, "learning_rate": 9.918319719953326e-06, "loss": 0.6813, "step": 340 }, { "epoch": 0.010068583102290968, "grad_norm": 1.51244614882415, "learning_rate": 1.0064177362893815e-05, "loss": 0.696, "step": 345 }, { "epoch": 0.010214504596527068, "grad_norm": 1.6592392474503885, "learning_rate": 1.0210035005834307e-05, "loss": 0.723, "step": 350 }, { "epoch": 0.01036042609076317, "grad_norm": 1.5357571274774384, "learning_rate": 1.0355892648774796e-05, "loss": 0.7266, "step": 355 }, { "epoch": 0.01050634758499927, "grad_norm": 1.7075409579058634, "learning_rate": 1.0501750291715286e-05, "loss": 0.7177, "step": 360 }, { "epoch": 0.01065226907923537, "grad_norm": 1.6769841817579385, "learning_rate": 1.0647607934655777e-05, "loss": 0.6869, "step": 365 }, { "epoch": 0.010798190573471473, "grad_norm": 1.635129245404418, "learning_rate": 1.0793465577596266e-05, "loss": 0.6174, "step": 370 }, { "epoch": 0.010944112067707573, "grad_norm": 1.5277961671404743, "learning_rate": 1.0939323220536758e-05, "loss": 0.6973, "step": 375 }, { "epoch": 0.011090033561943675, "grad_norm": 1.6672105268978017, "learning_rate": 1.1085180863477247e-05, "loss": 0.6885, "step": 380 }, { "epoch": 0.011235955056179775, "grad_norm": 1.5484556976557604, "learning_rate": 1.1231038506417737e-05, "loss": 0.6266, "step": 385 }, { "epoch": 0.011381876550415877, "grad_norm": 1.563661138118462, "learning_rate": 1.1376896149358226e-05, "loss": 0.6736, "step": 390 }, { "epoch": 0.011527798044651977, "grad_norm": 1.3888219387699416, "learning_rate": 1.1522753792298717e-05, "loss": 0.6552, "step": 395 }, { "epoch": 0.011673719538888079, "grad_norm": 1.4148867756265207, "learning_rate": 1.1668611435239207e-05, "loss": 0.6459, "step": 400 }, { "epoch": 0.01181964103312418, "grad_norm": 1.4990728642252718, "learning_rate": 1.1814469078179698e-05, "loss": 0.7266, "step": 405 }, { "epoch": 0.01196556252736028, "grad_norm": 1.5586964890227837, "learning_rate": 1.1960326721120186e-05, "loss": 0.6796, "step": 410 }, { "epoch": 0.012111484021596381, "grad_norm": 1.4423410905481469, "learning_rate": 1.2106184364060677e-05, "loss": 0.6557, "step": 415 }, { "epoch": 0.012257405515832482, "grad_norm": 1.5511893531096423, "learning_rate": 1.2252042007001167e-05, "loss": 0.6625, "step": 420 }, { "epoch": 0.012403327010068584, "grad_norm": 1.5902547361644932, "learning_rate": 1.2397899649941658e-05, "loss": 0.7426, "step": 425 }, { "epoch": 0.012549248504304684, "grad_norm": 1.5111039547721734, "learning_rate": 1.2543757292882147e-05, "loss": 0.6831, "step": 430 }, { "epoch": 0.012695169998540786, "grad_norm": 1.591632736563798, "learning_rate": 1.2689614935822638e-05, "loss": 0.7043, "step": 435 }, { "epoch": 0.012841091492776886, "grad_norm": 1.6357362500630288, "learning_rate": 1.2835472578763128e-05, "loss": 0.7664, "step": 440 }, { "epoch": 0.012987012987012988, "grad_norm": 1.5945529648895056, "learning_rate": 1.298133022170362e-05, "loss": 0.6463, "step": 445 }, { "epoch": 0.013132934481249088, "grad_norm": 1.609309211131705, "learning_rate": 1.3127187864644109e-05, "loss": 0.6818, "step": 450 }, { "epoch": 0.013278855975485188, "grad_norm": 1.5404955167609322, "learning_rate": 1.3273045507584597e-05, "loss": 0.6427, "step": 455 }, { "epoch": 0.01342477746972129, "grad_norm": 1.5181450307496702, "learning_rate": 1.341890315052509e-05, "loss": 0.7149, "step": 460 }, { "epoch": 0.01357069896395739, "grad_norm": 1.839697213371066, "learning_rate": 1.3564760793465577e-05, "loss": 0.7382, "step": 465 }, { "epoch": 0.013716620458193492, "grad_norm": 1.664060733330266, "learning_rate": 1.371061843640607e-05, "loss": 0.6614, "step": 470 }, { "epoch": 0.013862541952429593, "grad_norm": 1.6757405718327731, "learning_rate": 1.3856476079346558e-05, "loss": 0.7493, "step": 475 }, { "epoch": 0.014008463446665695, "grad_norm": 1.6644547957700877, "learning_rate": 1.4002333722287048e-05, "loss": 0.7036, "step": 480 }, { "epoch": 0.014154384940901795, "grad_norm": 1.9023550591981342, "learning_rate": 1.4148191365227539e-05, "loss": 0.7264, "step": 485 }, { "epoch": 0.014300306435137897, "grad_norm": 1.453455421325802, "learning_rate": 1.4294049008168028e-05, "loss": 0.6838, "step": 490 }, { "epoch": 0.014446227929373997, "grad_norm": 1.6908321474248393, "learning_rate": 1.443990665110852e-05, "loss": 0.724, "step": 495 }, { "epoch": 0.014592149423610097, "grad_norm": 1.5833993213625799, "learning_rate": 1.4585764294049009e-05, "loss": 0.7204, "step": 500 }, { "epoch": 0.014738070917846199, "grad_norm": 2.0269188296839427, "learning_rate": 1.4731621936989498e-05, "loss": 0.7267, "step": 505 }, { "epoch": 0.0148839924120823, "grad_norm": 1.5472605771373973, "learning_rate": 1.487747957992999e-05, "loss": 0.7459, "step": 510 }, { "epoch": 0.015029913906318401, "grad_norm": 1.7025139983932893, "learning_rate": 1.502333722287048e-05, "loss": 0.6562, "step": 515 }, { "epoch": 0.015175835400554501, "grad_norm": 1.8673550437347441, "learning_rate": 1.516919486581097e-05, "loss": 0.6948, "step": 520 }, { "epoch": 0.015321756894790603, "grad_norm": 1.3565338880388187, "learning_rate": 1.5315052508751458e-05, "loss": 0.6973, "step": 525 }, { "epoch": 0.015467678389026704, "grad_norm": 1.7590429090976183, "learning_rate": 1.546091015169195e-05, "loss": 0.7272, "step": 530 }, { "epoch": 0.015613599883262804, "grad_norm": 1.7144170173144773, "learning_rate": 1.560676779463244e-05, "loss": 0.7295, "step": 535 }, { "epoch": 0.015759521377498906, "grad_norm": 1.5606571713526964, "learning_rate": 1.575262543757293e-05, "loss": 0.665, "step": 540 }, { "epoch": 0.015905442871735006, "grad_norm": 1.6006824964823971, "learning_rate": 1.589848308051342e-05, "loss": 0.6726, "step": 545 }, { "epoch": 0.016051364365971106, "grad_norm": 2.094293530293622, "learning_rate": 1.604434072345391e-05, "loss": 0.7411, "step": 550 }, { "epoch": 0.01619728586020721, "grad_norm": 1.7078000915064542, "learning_rate": 1.61901983663944e-05, "loss": 0.7587, "step": 555 }, { "epoch": 0.01634320735444331, "grad_norm": 1.4944710379279686, "learning_rate": 1.633605600933489e-05, "loss": 0.6323, "step": 560 }, { "epoch": 0.01648912884867941, "grad_norm": 1.574611787971994, "learning_rate": 1.648191365227538e-05, "loss": 0.6682, "step": 565 }, { "epoch": 0.01663505034291551, "grad_norm": 1.5278996951086554, "learning_rate": 1.662777129521587e-05, "loss": 0.7071, "step": 570 }, { "epoch": 0.016780971837151614, "grad_norm": 1.6822476418518675, "learning_rate": 1.677362893815636e-05, "loss": 0.7017, "step": 575 }, { "epoch": 0.016926893331387714, "grad_norm": 1.7656120267143682, "learning_rate": 1.6919486581096848e-05, "loss": 0.669, "step": 580 }, { "epoch": 0.017072814825623814, "grad_norm": 1.8451054979090913, "learning_rate": 1.7065344224037343e-05, "loss": 0.689, "step": 585 }, { "epoch": 0.017218736319859915, "grad_norm": 1.6107491561470146, "learning_rate": 1.721120186697783e-05, "loss": 0.709, "step": 590 }, { "epoch": 0.017364657814096015, "grad_norm": 1.7631298036500338, "learning_rate": 1.7357059509918318e-05, "loss": 0.752, "step": 595 }, { "epoch": 0.01751057930833212, "grad_norm": 1.9098001836709986, "learning_rate": 1.750291715285881e-05, "loss": 0.774, "step": 600 }, { "epoch": 0.01765650080256822, "grad_norm": 1.6853961941046716, "learning_rate": 1.76487747957993e-05, "loss": 0.6939, "step": 605 }, { "epoch": 0.01780242229680432, "grad_norm": 1.6804282668264998, "learning_rate": 1.7794632438739792e-05, "loss": 0.6952, "step": 610 }, { "epoch": 0.01794834379104042, "grad_norm": 1.5399435158399386, "learning_rate": 1.794049008168028e-05, "loss": 0.6674, "step": 615 }, { "epoch": 0.018094265285276523, "grad_norm": 1.6417423013675476, "learning_rate": 1.808634772462077e-05, "loss": 0.6926, "step": 620 }, { "epoch": 0.018240186779512623, "grad_norm": 1.7183768786991622, "learning_rate": 1.8232205367561262e-05, "loss": 0.729, "step": 625 }, { "epoch": 0.018386108273748723, "grad_norm": 1.7341801849493557, "learning_rate": 1.837806301050175e-05, "loss": 0.6775, "step": 630 }, { "epoch": 0.018532029767984824, "grad_norm": 1.6649539048207478, "learning_rate": 1.852392065344224e-05, "loss": 0.7253, "step": 635 }, { "epoch": 0.018677951262220924, "grad_norm": 1.7590539603786544, "learning_rate": 1.8669778296382732e-05, "loss": 0.6353, "step": 640 }, { "epoch": 0.018823872756457027, "grad_norm": 1.6172500057134809, "learning_rate": 1.881563593932322e-05, "loss": 0.6806, "step": 645 }, { "epoch": 0.018969794250693128, "grad_norm": 1.6803811206114834, "learning_rate": 1.896149358226371e-05, "loss": 0.8018, "step": 650 }, { "epoch": 0.019115715744929228, "grad_norm": 1.5984355067301463, "learning_rate": 1.9107351225204202e-05, "loss": 0.7164, "step": 655 }, { "epoch": 0.019261637239165328, "grad_norm": 1.563429573575732, "learning_rate": 1.9253208868144694e-05, "loss": 0.652, "step": 660 }, { "epoch": 0.01940755873340143, "grad_norm": 1.8095363077898927, "learning_rate": 1.939906651108518e-05, "loss": 0.6753, "step": 665 }, { "epoch": 0.019553480227637532, "grad_norm": 1.5555149820078507, "learning_rate": 1.954492415402567e-05, "loss": 0.7037, "step": 670 }, { "epoch": 0.019699401721873632, "grad_norm": 1.7121310281353082, "learning_rate": 1.9690781796966164e-05, "loss": 0.6707, "step": 675 }, { "epoch": 0.019845323216109732, "grad_norm": 1.4498071021238534, "learning_rate": 1.9836639439906652e-05, "loss": 0.7046, "step": 680 }, { "epoch": 0.019991244710345833, "grad_norm": 1.4466233892468816, "learning_rate": 1.9982497082847143e-05, "loss": 0.6903, "step": 685 }, { "epoch": 0.020137166204581936, "grad_norm": 1.6278206523630134, "learning_rate": 2.012835472578763e-05, "loss": 0.7052, "step": 690 }, { "epoch": 0.020283087698818036, "grad_norm": 1.6981244706447884, "learning_rate": 2.0274212368728122e-05, "loss": 0.7527, "step": 695 }, { "epoch": 0.020429009193054137, "grad_norm": 1.7031385948067341, "learning_rate": 2.0420070011668613e-05, "loss": 0.7486, "step": 700 }, { "epoch": 0.020574930687290237, "grad_norm": 1.5818150882172228, "learning_rate": 2.05659276546091e-05, "loss": 0.6582, "step": 705 }, { "epoch": 0.02072085218152634, "grad_norm": 1.6842467002662005, "learning_rate": 2.0711785297549592e-05, "loss": 0.6885, "step": 710 }, { "epoch": 0.02086677367576244, "grad_norm": 1.4282165804306994, "learning_rate": 2.0857642940490083e-05, "loss": 0.6359, "step": 715 }, { "epoch": 0.02101269516999854, "grad_norm": 1.7032330817338395, "learning_rate": 2.100350058343057e-05, "loss": 0.7242, "step": 720 }, { "epoch": 0.02115861666423464, "grad_norm": 1.538448142519383, "learning_rate": 2.1149358226371062e-05, "loss": 0.6382, "step": 725 }, { "epoch": 0.02130453815847074, "grad_norm": 1.584463731730771, "learning_rate": 2.1295215869311554e-05, "loss": 0.7076, "step": 730 }, { "epoch": 0.021450459652706845, "grad_norm": 1.7489371138422, "learning_rate": 2.1441073512252045e-05, "loss": 0.6995, "step": 735 }, { "epoch": 0.021596381146942945, "grad_norm": 1.6427548661471818, "learning_rate": 2.1586931155192533e-05, "loss": 0.7647, "step": 740 }, { "epoch": 0.021742302641179045, "grad_norm": 1.7376674014421456, "learning_rate": 2.173278879813302e-05, "loss": 0.7063, "step": 745 }, { "epoch": 0.021888224135415146, "grad_norm": 1.7308918478302657, "learning_rate": 2.1878646441073515e-05, "loss": 0.7039, "step": 750 }, { "epoch": 0.02203414562965125, "grad_norm": 1.7009110447382148, "learning_rate": 2.2024504084014003e-05, "loss": 0.7357, "step": 755 }, { "epoch": 0.02218006712388735, "grad_norm": 1.6845153941030226, "learning_rate": 2.2170361726954494e-05, "loss": 0.6467, "step": 760 }, { "epoch": 0.02232598861812345, "grad_norm": 1.827793921542039, "learning_rate": 2.2316219369894982e-05, "loss": 0.7235, "step": 765 }, { "epoch": 0.02247191011235955, "grad_norm": 1.649800671415183, "learning_rate": 2.2462077012835473e-05, "loss": 0.7395, "step": 770 }, { "epoch": 0.02261783160659565, "grad_norm": 1.6690762339397862, "learning_rate": 2.2607934655775964e-05, "loss": 0.7044, "step": 775 }, { "epoch": 0.022763753100831754, "grad_norm": 1.611645621469813, "learning_rate": 2.2753792298716452e-05, "loss": 0.7094, "step": 780 }, { "epoch": 0.022909674595067854, "grad_norm": 1.3988095282507587, "learning_rate": 2.2899649941656943e-05, "loss": 0.7096, "step": 785 }, { "epoch": 0.023055596089303954, "grad_norm": 1.7174132991172002, "learning_rate": 2.3045507584597435e-05, "loss": 0.7229, "step": 790 }, { "epoch": 0.023201517583540054, "grad_norm": 1.3661904097330837, "learning_rate": 2.3191365227537922e-05, "loss": 0.6368, "step": 795 }, { "epoch": 0.023347439077776158, "grad_norm": 1.5937893185090637, "learning_rate": 2.3337222870478414e-05, "loss": 0.6955, "step": 800 }, { "epoch": 0.02349336057201226, "grad_norm": 1.6873174766585761, "learning_rate": 2.3483080513418905e-05, "loss": 0.6511, "step": 805 }, { "epoch": 0.02363928206624836, "grad_norm": 1.607069446791644, "learning_rate": 2.3628938156359396e-05, "loss": 0.718, "step": 810 }, { "epoch": 0.02378520356048446, "grad_norm": 1.7139729288179766, "learning_rate": 2.3774795799299884e-05, "loss": 0.6824, "step": 815 }, { "epoch": 0.02393112505472056, "grad_norm": 1.4872438626984554, "learning_rate": 2.392065344224037e-05, "loss": 0.6344, "step": 820 }, { "epoch": 0.024077046548956663, "grad_norm": 1.5654039723021604, "learning_rate": 2.4066511085180866e-05, "loss": 0.7409, "step": 825 }, { "epoch": 0.024222968043192763, "grad_norm": 1.446173231049819, "learning_rate": 2.4212368728121354e-05, "loss": 0.7065, "step": 830 }, { "epoch": 0.024368889537428863, "grad_norm": 1.7701228013414976, "learning_rate": 2.4358226371061845e-05, "loss": 0.6896, "step": 835 }, { "epoch": 0.024514811031664963, "grad_norm": 1.5583655871026196, "learning_rate": 2.4504084014002333e-05, "loss": 0.7142, "step": 840 }, { "epoch": 0.024660732525901067, "grad_norm": 1.383613673297909, "learning_rate": 2.4649941656942824e-05, "loss": 0.6883, "step": 845 }, { "epoch": 0.024806654020137167, "grad_norm": 1.6319134030710103, "learning_rate": 2.4795799299883316e-05, "loss": 0.7462, "step": 850 }, { "epoch": 0.024952575514373267, "grad_norm": 1.5517947834609296, "learning_rate": 2.4941656942823803e-05, "loss": 0.6795, "step": 855 }, { "epoch": 0.025098497008609368, "grad_norm": 1.6712335021090057, "learning_rate": 2.5087514585764295e-05, "loss": 0.69, "step": 860 }, { "epoch": 0.025244418502845468, "grad_norm": 1.998899182109342, "learning_rate": 2.523337222870479e-05, "loss": 0.747, "step": 865 }, { "epoch": 0.02539033999708157, "grad_norm": 1.458426590820653, "learning_rate": 2.5379229871645277e-05, "loss": 0.668, "step": 870 }, { "epoch": 0.02553626149131767, "grad_norm": 1.6661814738196528, "learning_rate": 2.5525087514585765e-05, "loss": 0.8119, "step": 875 }, { "epoch": 0.025682182985553772, "grad_norm": 1.8043267177876838, "learning_rate": 2.5670945157526256e-05, "loss": 0.7128, "step": 880 }, { "epoch": 0.025828104479789872, "grad_norm": 1.595635424906511, "learning_rate": 2.5816802800466744e-05, "loss": 0.6743, "step": 885 }, { "epoch": 0.025974025974025976, "grad_norm": 1.3210397592732002, "learning_rate": 2.596266044340724e-05, "loss": 0.6914, "step": 890 }, { "epoch": 0.026119947468262076, "grad_norm": 1.5700248005327584, "learning_rate": 2.6108518086347726e-05, "loss": 0.6959, "step": 895 }, { "epoch": 0.026265868962498176, "grad_norm": 1.5136291542399603, "learning_rate": 2.6254375729288217e-05, "loss": 0.7323, "step": 900 }, { "epoch": 0.026411790456734276, "grad_norm": 1.7009729282195039, "learning_rate": 2.6400233372228705e-05, "loss": 0.6918, "step": 905 }, { "epoch": 0.026557711950970377, "grad_norm": 1.4271883652876505, "learning_rate": 2.6546091015169193e-05, "loss": 0.686, "step": 910 }, { "epoch": 0.02670363344520648, "grad_norm": 1.6543368167985741, "learning_rate": 2.6691948658109688e-05, "loss": 0.7205, "step": 915 }, { "epoch": 0.02684955493944258, "grad_norm": 1.4606292966835321, "learning_rate": 2.683780630105018e-05, "loss": 0.6939, "step": 920 }, { "epoch": 0.02699547643367868, "grad_norm": 1.5086639765454437, "learning_rate": 2.6983663943990667e-05, "loss": 0.69, "step": 925 }, { "epoch": 0.02714139792791478, "grad_norm": 1.479890690618661, "learning_rate": 2.7129521586931155e-05, "loss": 0.6813, "step": 930 }, { "epoch": 0.027287319422150885, "grad_norm": 1.8008746804886702, "learning_rate": 2.7275379229871646e-05, "loss": 0.6938, "step": 935 }, { "epoch": 0.027433240916386985, "grad_norm": 1.6557731270365816, "learning_rate": 2.742123687281214e-05, "loss": 0.6724, "step": 940 }, { "epoch": 0.027579162410623085, "grad_norm": 1.7436705568477375, "learning_rate": 2.7567094515752628e-05, "loss": 0.679, "step": 945 }, { "epoch": 0.027725083904859185, "grad_norm": 1.3996057203371555, "learning_rate": 2.7712952158693116e-05, "loss": 0.7129, "step": 950 }, { "epoch": 0.027871005399095285, "grad_norm": 1.4694127082983113, "learning_rate": 2.7858809801633607e-05, "loss": 0.712, "step": 955 }, { "epoch": 0.02801692689333139, "grad_norm": 1.6722150708335481, "learning_rate": 2.8004667444574095e-05, "loss": 0.7106, "step": 960 }, { "epoch": 0.02816284838756749, "grad_norm": 1.3025846309675377, "learning_rate": 2.815052508751459e-05, "loss": 0.7091, "step": 965 }, { "epoch": 0.02830876988180359, "grad_norm": 1.4139616367230547, "learning_rate": 2.8296382730455077e-05, "loss": 0.733, "step": 970 }, { "epoch": 0.02845469137603969, "grad_norm": 1.3737571684570908, "learning_rate": 2.844224037339557e-05, "loss": 0.6835, "step": 975 }, { "epoch": 0.028600612870275793, "grad_norm": 1.6961221498002164, "learning_rate": 2.8588098016336056e-05, "loss": 0.7389, "step": 980 }, { "epoch": 0.028746534364511894, "grad_norm": 1.4455556810839383, "learning_rate": 2.8733955659276544e-05, "loss": 0.7136, "step": 985 }, { "epoch": 0.028892455858747994, "grad_norm": 1.545586528378323, "learning_rate": 2.887981330221704e-05, "loss": 0.7191, "step": 990 }, { "epoch": 0.029038377352984094, "grad_norm": 1.940525043359535, "learning_rate": 2.902567094515753e-05, "loss": 0.7679, "step": 995 }, { "epoch": 0.029184298847220194, "grad_norm": 1.6239557635261068, "learning_rate": 2.9171528588098018e-05, "loss": 0.716, "step": 1000 }, { "epoch": 0.029330220341456298, "grad_norm": 1.9001859265232084, "learning_rate": 2.9317386231038506e-05, "loss": 0.7008, "step": 1005 }, { "epoch": 0.029476141835692398, "grad_norm": 1.6080340776285513, "learning_rate": 2.9463243873978997e-05, "loss": 0.6666, "step": 1010 }, { "epoch": 0.0296220633299285, "grad_norm": 1.510133794582749, "learning_rate": 2.960910151691949e-05, "loss": 0.6858, "step": 1015 }, { "epoch": 0.0297679848241646, "grad_norm": 1.377491998083542, "learning_rate": 2.975495915985998e-05, "loss": 0.7237, "step": 1020 }, { "epoch": 0.0299139063184007, "grad_norm": 1.6423776203754805, "learning_rate": 2.9900816802800467e-05, "loss": 0.7219, "step": 1025 }, { "epoch": 0.030059827812636802, "grad_norm": 1.89278007028969, "learning_rate": 3.004667444574096e-05, "loss": 0.7372, "step": 1030 }, { "epoch": 0.030205749306872903, "grad_norm": 1.466322406875981, "learning_rate": 3.0192532088681446e-05, "loss": 0.7088, "step": 1035 }, { "epoch": 0.030351670801109003, "grad_norm": 1.601207367034692, "learning_rate": 3.033838973162194e-05, "loss": 0.7708, "step": 1040 }, { "epoch": 0.030497592295345103, "grad_norm": 1.552335498453275, "learning_rate": 3.048424737456243e-05, "loss": 0.7334, "step": 1045 }, { "epoch": 0.030643513789581207, "grad_norm": 1.5465196791465428, "learning_rate": 3.0630105017502916e-05, "loss": 0.7422, "step": 1050 }, { "epoch": 0.030789435283817307, "grad_norm": 1.8495863677675473, "learning_rate": 3.077596266044341e-05, "loss": 0.7161, "step": 1055 }, { "epoch": 0.030935356778053407, "grad_norm": 1.4806086724803613, "learning_rate": 3.09218203033839e-05, "loss": 0.6846, "step": 1060 }, { "epoch": 0.031081278272289507, "grad_norm": 1.8485293146616193, "learning_rate": 3.106767794632439e-05, "loss": 0.7296, "step": 1065 }, { "epoch": 0.031227199766525607, "grad_norm": 1.9255223889371524, "learning_rate": 3.121353558926488e-05, "loss": 0.7408, "step": 1070 }, { "epoch": 0.03137312126076171, "grad_norm": 1.423045709866818, "learning_rate": 3.135939323220537e-05, "loss": 0.7017, "step": 1075 }, { "epoch": 0.03151904275499781, "grad_norm": 1.7502566892021552, "learning_rate": 3.150525087514586e-05, "loss": 0.7621, "step": 1080 }, { "epoch": 0.031664964249233915, "grad_norm": 1.5044752857327865, "learning_rate": 3.165110851808635e-05, "loss": 0.7685, "step": 1085 }, { "epoch": 0.03181088574347001, "grad_norm": 1.706014273236418, "learning_rate": 3.179696616102684e-05, "loss": 0.7406, "step": 1090 }, { "epoch": 0.031956807237706115, "grad_norm": 1.3184924575175818, "learning_rate": 3.194282380396733e-05, "loss": 0.6446, "step": 1095 }, { "epoch": 0.03210272873194221, "grad_norm": 1.480355858792458, "learning_rate": 3.208868144690782e-05, "loss": 0.6993, "step": 1100 }, { "epoch": 0.032248650226178316, "grad_norm": 1.59599280364544, "learning_rate": 3.2234539089848306e-05, "loss": 0.7147, "step": 1105 }, { "epoch": 0.03239457172041442, "grad_norm": 1.693123809405065, "learning_rate": 3.23803967327888e-05, "loss": 0.7396, "step": 1110 }, { "epoch": 0.032540493214650516, "grad_norm": 1.2426197431450783, "learning_rate": 3.252625437572929e-05, "loss": 0.7069, "step": 1115 }, { "epoch": 0.03268641470888662, "grad_norm": 1.4824005566342304, "learning_rate": 3.267211201866978e-05, "loss": 0.6977, "step": 1120 }, { "epoch": 0.03283233620312272, "grad_norm": 1.4740320021968292, "learning_rate": 3.281796966161027e-05, "loss": 0.6351, "step": 1125 }, { "epoch": 0.03297825769735882, "grad_norm": 1.5623660740625849, "learning_rate": 3.296382730455076e-05, "loss": 0.7545, "step": 1130 }, { "epoch": 0.033124179191594924, "grad_norm": 1.4295313989283687, "learning_rate": 3.3109684947491247e-05, "loss": 0.729, "step": 1135 }, { "epoch": 0.03327010068583102, "grad_norm": 1.4341872631511632, "learning_rate": 3.325554259043174e-05, "loss": 0.6981, "step": 1140 }, { "epoch": 0.033416022180067124, "grad_norm": 1.5157801689309949, "learning_rate": 3.340140023337223e-05, "loss": 0.6526, "step": 1145 }, { "epoch": 0.03356194367430323, "grad_norm": 1.358575252506013, "learning_rate": 3.354725787631272e-05, "loss": 0.664, "step": 1150 }, { "epoch": 0.033707865168539325, "grad_norm": 1.399471604852333, "learning_rate": 3.369311551925321e-05, "loss": 0.6395, "step": 1155 }, { "epoch": 0.03385378666277543, "grad_norm": 1.6780295418808926, "learning_rate": 3.3838973162193696e-05, "loss": 0.6918, "step": 1160 }, { "epoch": 0.033999708157011525, "grad_norm": 1.8749579850500733, "learning_rate": 3.398483080513419e-05, "loss": 0.7318, "step": 1165 }, { "epoch": 0.03414562965124763, "grad_norm": 1.5350108766982589, "learning_rate": 3.4130688448074685e-05, "loss": 0.7566, "step": 1170 }, { "epoch": 0.03429155114548373, "grad_norm": 1.5655487306424387, "learning_rate": 3.427654609101517e-05, "loss": 0.7289, "step": 1175 }, { "epoch": 0.03443747263971983, "grad_norm": 1.4571372656445885, "learning_rate": 3.442240373395566e-05, "loss": 0.7596, "step": 1180 }, { "epoch": 0.03458339413395593, "grad_norm": 1.6794172098342217, "learning_rate": 3.456826137689615e-05, "loss": 0.6708, "step": 1185 }, { "epoch": 0.03472931562819203, "grad_norm": 1.6805832875498925, "learning_rate": 3.4714119019836636e-05, "loss": 0.8062, "step": 1190 }, { "epoch": 0.034875237122428134, "grad_norm": 1.3146337017621434, "learning_rate": 3.4859976662777134e-05, "loss": 0.7486, "step": 1195 }, { "epoch": 0.03502115861666424, "grad_norm": 1.7545223488891168, "learning_rate": 3.500583430571762e-05, "loss": 0.6879, "step": 1200 }, { "epoch": 0.035167080110900334, "grad_norm": 1.4967220005992836, "learning_rate": 3.515169194865811e-05, "loss": 0.6696, "step": 1205 }, { "epoch": 0.03531300160513644, "grad_norm": 1.345003682960457, "learning_rate": 3.52975495915986e-05, "loss": 0.6719, "step": 1210 }, { "epoch": 0.035458923099372534, "grad_norm": 1.3306498551040196, "learning_rate": 3.5443407234539086e-05, "loss": 0.6595, "step": 1215 }, { "epoch": 0.03560484459360864, "grad_norm": 1.6302230033146294, "learning_rate": 3.5589264877479584e-05, "loss": 0.7375, "step": 1220 }, { "epoch": 0.03575076608784474, "grad_norm": 1.384508228051698, "learning_rate": 3.5735122520420075e-05, "loss": 0.6709, "step": 1225 }, { "epoch": 0.03589668758208084, "grad_norm": 1.3008258698355437, "learning_rate": 3.588098016336056e-05, "loss": 0.6238, "step": 1230 }, { "epoch": 0.03604260907631694, "grad_norm": 1.911916803446253, "learning_rate": 3.602683780630105e-05, "loss": 0.7463, "step": 1235 }, { "epoch": 0.036188530570553046, "grad_norm": 1.4898103249753876, "learning_rate": 3.617269544924154e-05, "loss": 0.7167, "step": 1240 }, { "epoch": 0.03633445206478914, "grad_norm": 1.4667267735145646, "learning_rate": 3.631855309218203e-05, "loss": 0.7409, "step": 1245 }, { "epoch": 0.036480373559025246, "grad_norm": 1.4045462101272668, "learning_rate": 3.6464410735122524e-05, "loss": 0.797, "step": 1250 }, { "epoch": 0.03662629505326134, "grad_norm": 1.5856985921341709, "learning_rate": 3.661026837806301e-05, "loss": 0.7175, "step": 1255 }, { "epoch": 0.03677221654749745, "grad_norm": 1.5243823667462688, "learning_rate": 3.67561260210035e-05, "loss": 0.698, "step": 1260 }, { "epoch": 0.03691813804173355, "grad_norm": 1.6455881879908836, "learning_rate": 3.690198366394399e-05, "loss": 0.7294, "step": 1265 }, { "epoch": 0.03706405953596965, "grad_norm": 1.5669911601188393, "learning_rate": 3.704784130688448e-05, "loss": 0.6779, "step": 1270 }, { "epoch": 0.03720998103020575, "grad_norm": 1.5657409691383104, "learning_rate": 3.719369894982497e-05, "loss": 0.7173, "step": 1275 }, { "epoch": 0.03735590252444185, "grad_norm": 1.4649791712006108, "learning_rate": 3.7339556592765465e-05, "loss": 0.6902, "step": 1280 }, { "epoch": 0.03750182401867795, "grad_norm": 1.5115106621884336, "learning_rate": 3.748541423570595e-05, "loss": 0.796, "step": 1285 }, { "epoch": 0.037647745512914055, "grad_norm": 1.5684923982419243, "learning_rate": 3.763127187864644e-05, "loss": 0.7142, "step": 1290 }, { "epoch": 0.03779366700715015, "grad_norm": 1.5099222776771948, "learning_rate": 3.777712952158693e-05, "loss": 0.6823, "step": 1295 }, { "epoch": 0.037939588501386255, "grad_norm": 1.4243264396360908, "learning_rate": 3.792298716452742e-05, "loss": 0.6916, "step": 1300 }, { "epoch": 0.03808550999562235, "grad_norm": 1.3191401279149118, "learning_rate": 3.8068844807467914e-05, "loss": 0.6742, "step": 1305 }, { "epoch": 0.038231431489858456, "grad_norm": 1.3218496472271208, "learning_rate": 3.8214702450408405e-05, "loss": 0.7052, "step": 1310 }, { "epoch": 0.03837735298409456, "grad_norm": 1.448794701520074, "learning_rate": 3.836056009334889e-05, "loss": 0.7278, "step": 1315 }, { "epoch": 0.038523274478330656, "grad_norm": 1.3899813988499192, "learning_rate": 3.850641773628939e-05, "loss": 0.7099, "step": 1320 }, { "epoch": 0.03866919597256676, "grad_norm": 1.4726248781173996, "learning_rate": 3.865227537922987e-05, "loss": 0.581, "step": 1325 }, { "epoch": 0.03881511746680286, "grad_norm": 1.2626979658752469, "learning_rate": 3.879813302217036e-05, "loss": 0.6695, "step": 1330 }, { "epoch": 0.03896103896103896, "grad_norm": 1.435335222367685, "learning_rate": 3.8943990665110854e-05, "loss": 0.7263, "step": 1335 }, { "epoch": 0.039106960455275064, "grad_norm": 1.3976072965377189, "learning_rate": 3.908984830805134e-05, "loss": 0.7122, "step": 1340 }, { "epoch": 0.03925288194951116, "grad_norm": 1.3097883164112671, "learning_rate": 3.923570595099184e-05, "loss": 0.7605, "step": 1345 }, { "epoch": 0.039398803443747264, "grad_norm": 1.5303588178820082, "learning_rate": 3.938156359393233e-05, "loss": 0.7838, "step": 1350 }, { "epoch": 0.03954472493798337, "grad_norm": 1.6361409986880862, "learning_rate": 3.952742123687281e-05, "loss": 0.769, "step": 1355 }, { "epoch": 0.039690646432219465, "grad_norm": 1.6507215081937656, "learning_rate": 3.9673278879813304e-05, "loss": 0.7495, "step": 1360 }, { "epoch": 0.03983656792645557, "grad_norm": 1.4393876063478734, "learning_rate": 3.9819136522753795e-05, "loss": 0.6859, "step": 1365 }, { "epoch": 0.039982489420691665, "grad_norm": 1.5005476834299698, "learning_rate": 3.9964994165694286e-05, "loss": 0.7226, "step": 1370 }, { "epoch": 0.04012841091492777, "grad_norm": 1.5310678050070499, "learning_rate": 4.011085180863478e-05, "loss": 0.7582, "step": 1375 }, { "epoch": 0.04027433240916387, "grad_norm": 1.3212589600926459, "learning_rate": 4.025670945157526e-05, "loss": 0.6253, "step": 1380 }, { "epoch": 0.04042025390339997, "grad_norm": 1.2668398157226317, "learning_rate": 4.040256709451575e-05, "loss": 0.7296, "step": 1385 }, { "epoch": 0.04056617539763607, "grad_norm": 1.2292822802173848, "learning_rate": 4.0548424737456244e-05, "loss": 0.6372, "step": 1390 }, { "epoch": 0.04071209689187217, "grad_norm": 1.482502641264336, "learning_rate": 4.0694282380396735e-05, "loss": 0.7039, "step": 1395 }, { "epoch": 0.04085801838610827, "grad_norm": 1.3959579669494955, "learning_rate": 4.0840140023337226e-05, "loss": 0.6954, "step": 1400 }, { "epoch": 0.04100393988034438, "grad_norm": 1.7627540859226858, "learning_rate": 4.098599766627772e-05, "loss": 0.6798, "step": 1405 }, { "epoch": 0.041149861374580474, "grad_norm": 2.0770882092602108, "learning_rate": 4.11318553092182e-05, "loss": 0.7118, "step": 1410 }, { "epoch": 0.04129578286881658, "grad_norm": 1.5840491687532077, "learning_rate": 4.127771295215869e-05, "loss": 0.716, "step": 1415 }, { "epoch": 0.04144170436305268, "grad_norm": 1.4297647544021, "learning_rate": 4.1423570595099184e-05, "loss": 0.6965, "step": 1420 }, { "epoch": 0.04158762585728878, "grad_norm": 1.398340378534979, "learning_rate": 4.1569428238039676e-05, "loss": 0.7058, "step": 1425 }, { "epoch": 0.04173354735152488, "grad_norm": 1.3188527713486136, "learning_rate": 4.171528588098017e-05, "loss": 0.7317, "step": 1430 }, { "epoch": 0.04187946884576098, "grad_norm": 1.5069846562860565, "learning_rate": 4.186114352392065e-05, "loss": 0.7435, "step": 1435 }, { "epoch": 0.04202539033999708, "grad_norm": 1.277428473829212, "learning_rate": 4.200700116686114e-05, "loss": 0.704, "step": 1440 }, { "epoch": 0.042171311834233186, "grad_norm": 1.415125736273544, "learning_rate": 4.215285880980164e-05, "loss": 0.6985, "step": 1445 }, { "epoch": 0.04231723332846928, "grad_norm": 1.4305166349397616, "learning_rate": 4.2298716452742125e-05, "loss": 0.6834, "step": 1450 }, { "epoch": 0.042463154822705386, "grad_norm": 1.7288035751579278, "learning_rate": 4.2444574095682616e-05, "loss": 0.7263, "step": 1455 }, { "epoch": 0.04260907631694148, "grad_norm": 1.5886957733523792, "learning_rate": 4.259043173862311e-05, "loss": 0.6945, "step": 1460 }, { "epoch": 0.042754997811177586, "grad_norm": 1.5777879075523655, "learning_rate": 4.273628938156359e-05, "loss": 0.7524, "step": 1465 }, { "epoch": 0.04290091930541369, "grad_norm": 1.277107562463112, "learning_rate": 4.288214702450409e-05, "loss": 0.7441, "step": 1470 }, { "epoch": 0.04304684079964979, "grad_norm": 1.2626646097924976, "learning_rate": 4.3028004667444574e-05, "loss": 0.6478, "step": 1475 }, { "epoch": 0.04319276229388589, "grad_norm": 1.6447873754918085, "learning_rate": 4.3173862310385065e-05, "loss": 0.6455, "step": 1480 }, { "epoch": 0.04333868378812199, "grad_norm": 1.3730093102411656, "learning_rate": 4.3319719953325557e-05, "loss": 0.6718, "step": 1485 }, { "epoch": 0.04348460528235809, "grad_norm": 1.3065791285419022, "learning_rate": 4.346557759626604e-05, "loss": 0.6949, "step": 1490 }, { "epoch": 0.043630526776594195, "grad_norm": 1.4009097173279175, "learning_rate": 4.361143523920654e-05, "loss": 0.7027, "step": 1495 }, { "epoch": 0.04377644827083029, "grad_norm": 1.357777526652003, "learning_rate": 4.375729288214703e-05, "loss": 0.727, "step": 1500 }, { "epoch": 0.043922369765066395, "grad_norm": 1.8183917864134866, "learning_rate": 4.3903150525087515e-05, "loss": 0.7275, "step": 1505 }, { "epoch": 0.0440682912593025, "grad_norm": 1.4573978251867685, "learning_rate": 4.4049008168028006e-05, "loss": 0.8611, "step": 1510 }, { "epoch": 0.044214212753538595, "grad_norm": 1.6232079352018782, "learning_rate": 4.41948658109685e-05, "loss": 0.7801, "step": 1515 }, { "epoch": 0.0443601342477747, "grad_norm": 1.4392573700330675, "learning_rate": 4.434072345390899e-05, "loss": 0.7565, "step": 1520 }, { "epoch": 0.044506055742010796, "grad_norm": 1.5163832264971888, "learning_rate": 4.448658109684948e-05, "loss": 0.6971, "step": 1525 }, { "epoch": 0.0446519772362469, "grad_norm": 2.317978025584996, "learning_rate": 4.4632438739789964e-05, "loss": 0.764, "step": 1530 }, { "epoch": 0.044797898730483, "grad_norm": 1.7525092590216673, "learning_rate": 4.4778296382730455e-05, "loss": 0.7569, "step": 1535 }, { "epoch": 0.0449438202247191, "grad_norm": 1.5015982832337358, "learning_rate": 4.4924154025670946e-05, "loss": 0.6919, "step": 1540 }, { "epoch": 0.045089741718955204, "grad_norm": 1.4924861531543965, "learning_rate": 4.507001166861144e-05, "loss": 0.7925, "step": 1545 }, { "epoch": 0.0452356632131913, "grad_norm": 1.4757138590501715, "learning_rate": 4.521586931155193e-05, "loss": 0.7373, "step": 1550 }, { "epoch": 0.045381584707427404, "grad_norm": 1.2053883351250492, "learning_rate": 4.536172695449242e-05, "loss": 0.6691, "step": 1555 }, { "epoch": 0.04552750620166351, "grad_norm": 1.4506980323554002, "learning_rate": 4.5507584597432904e-05, "loss": 0.7319, "step": 1560 }, { "epoch": 0.045673427695899604, "grad_norm": 1.259230745043163, "learning_rate": 4.5653442240373396e-05, "loss": 0.7161, "step": 1565 }, { "epoch": 0.04581934919013571, "grad_norm": 1.294327365458817, "learning_rate": 4.579929988331389e-05, "loss": 0.74, "step": 1570 }, { "epoch": 0.045965270684371805, "grad_norm": 1.3861326100551372, "learning_rate": 4.594515752625438e-05, "loss": 0.7444, "step": 1575 }, { "epoch": 0.04611119217860791, "grad_norm": 1.258164525124542, "learning_rate": 4.609101516919487e-05, "loss": 0.6658, "step": 1580 }, { "epoch": 0.04625711367284401, "grad_norm": 1.5429846005126808, "learning_rate": 4.6236872812135354e-05, "loss": 0.7618, "step": 1585 }, { "epoch": 0.04640303516708011, "grad_norm": 1.2413030915088856, "learning_rate": 4.6382730455075845e-05, "loss": 0.6828, "step": 1590 }, { "epoch": 0.04654895666131621, "grad_norm": 1.2893065668062218, "learning_rate": 4.652858809801634e-05, "loss": 0.7207, "step": 1595 }, { "epoch": 0.046694878155552316, "grad_norm": 1.534254191324295, "learning_rate": 4.667444574095683e-05, "loss": 0.6597, "step": 1600 }, { "epoch": 0.04684079964978841, "grad_norm": 1.325070694110307, "learning_rate": 4.682030338389732e-05, "loss": 0.7319, "step": 1605 }, { "epoch": 0.04698672114402452, "grad_norm": 1.7421480208019755, "learning_rate": 4.696616102683781e-05, "loss": 0.7613, "step": 1610 }, { "epoch": 0.04713264263826061, "grad_norm": 1.3878222764467754, "learning_rate": 4.7112018669778294e-05, "loss": 0.7144, "step": 1615 }, { "epoch": 0.04727856413249672, "grad_norm": 1.437164477838428, "learning_rate": 4.725787631271879e-05, "loss": 0.7222, "step": 1620 }, { "epoch": 0.04742448562673282, "grad_norm": 1.4216858430458277, "learning_rate": 4.7403733955659277e-05, "loss": 0.7008, "step": 1625 }, { "epoch": 0.04757040712096892, "grad_norm": 1.4633195224395286, "learning_rate": 4.754959159859977e-05, "loss": 0.7669, "step": 1630 }, { "epoch": 0.04771632861520502, "grad_norm": 1.433515409194029, "learning_rate": 4.769544924154026e-05, "loss": 0.75, "step": 1635 }, { "epoch": 0.04786225010944112, "grad_norm": 1.4079939090780536, "learning_rate": 4.784130688448074e-05, "loss": 0.6996, "step": 1640 }, { "epoch": 0.04800817160367722, "grad_norm": 1.3618814112009943, "learning_rate": 4.798716452742124e-05, "loss": 0.7725, "step": 1645 }, { "epoch": 0.048154093097913325, "grad_norm": 1.4928210231146142, "learning_rate": 4.813302217036173e-05, "loss": 0.6997, "step": 1650 }, { "epoch": 0.04830001459214942, "grad_norm": 1.3787922186887145, "learning_rate": 4.827887981330222e-05, "loss": 0.729, "step": 1655 }, { "epoch": 0.048445936086385526, "grad_norm": 1.4613625687495408, "learning_rate": 4.842473745624271e-05, "loss": 0.6909, "step": 1660 }, { "epoch": 0.04859185758062162, "grad_norm": 1.3840626726504617, "learning_rate": 4.85705950991832e-05, "loss": 0.6814, "step": 1665 }, { "epoch": 0.048737779074857726, "grad_norm": 1.2832391941332677, "learning_rate": 4.871645274212369e-05, "loss": 0.7335, "step": 1670 }, { "epoch": 0.04888370056909383, "grad_norm": 1.5278573819228933, "learning_rate": 4.886231038506418e-05, "loss": 0.7602, "step": 1675 }, { "epoch": 0.049029622063329927, "grad_norm": 1.3147818058889946, "learning_rate": 4.9008168028004666e-05, "loss": 0.759, "step": 1680 }, { "epoch": 0.04917554355756603, "grad_norm": 1.4974669874886688, "learning_rate": 4.915402567094516e-05, "loss": 0.7281, "step": 1685 }, { "epoch": 0.049321465051802134, "grad_norm": 1.325413873664663, "learning_rate": 4.929988331388565e-05, "loss": 0.6783, "step": 1690 }, { "epoch": 0.04946738654603823, "grad_norm": 1.6775492170661628, "learning_rate": 4.944574095682614e-05, "loss": 0.7222, "step": 1695 }, { "epoch": 0.049613308040274334, "grad_norm": 1.5164031702499228, "learning_rate": 4.959159859976663e-05, "loss": 0.7444, "step": 1700 }, { "epoch": 0.04975922953451043, "grad_norm": 1.3883945829821218, "learning_rate": 4.973745624270712e-05, "loss": 0.7094, "step": 1705 }, { "epoch": 0.049905151028746535, "grad_norm": 1.4141884125777868, "learning_rate": 4.988331388564761e-05, "loss": 0.7099, "step": 1710 }, { "epoch": 0.05005107252298264, "grad_norm": 1.5489285429991548, "learning_rate": 4.99999998952091e-05, "loss": 0.7698, "step": 1715 }, { "epoch": 0.050196994017218735, "grad_norm": 1.5009385671849187, "learning_rate": 4.999999622752747e-05, "loss": 0.7016, "step": 1720 }, { "epoch": 0.05034291551145484, "grad_norm": 1.1100258836488293, "learning_rate": 4.999998732030149e-05, "loss": 0.7713, "step": 1725 }, { "epoch": 0.050488837005690936, "grad_norm": 1.3623240200429385, "learning_rate": 4.999997317353324e-05, "loss": 0.7417, "step": 1730 }, { "epoch": 0.05063475849992704, "grad_norm": 1.1959053957175283, "learning_rate": 4.999995378722602e-05, "loss": 0.7139, "step": 1735 }, { "epoch": 0.05078067999416314, "grad_norm": 1.621560063903723, "learning_rate": 4.999992916138431e-05, "loss": 0.7428, "step": 1740 }, { "epoch": 0.05092660148839924, "grad_norm": 1.1896886442979866, "learning_rate": 4.999989929601388e-05, "loss": 0.7488, "step": 1745 }, { "epoch": 0.05107252298263534, "grad_norm": 1.4603550657267539, "learning_rate": 4.999986419112167e-05, "loss": 0.7483, "step": 1750 }, { "epoch": 0.05121844447687144, "grad_norm": 1.6414486516516842, "learning_rate": 4.9999823846715864e-05, "loss": 0.748, "step": 1755 }, { "epoch": 0.051364365971107544, "grad_norm": 1.5055996794183943, "learning_rate": 4.999977826280584e-05, "loss": 0.7276, "step": 1760 }, { "epoch": 0.05151028746534365, "grad_norm": 1.3554159340661034, "learning_rate": 4.999972743940223e-05, "loss": 0.7041, "step": 1765 }, { "epoch": 0.051656208959579744, "grad_norm": 1.4503814058183095, "learning_rate": 4.999967137651686e-05, "loss": 0.7733, "step": 1770 }, { "epoch": 0.05180213045381585, "grad_norm": 1.4164752212206861, "learning_rate": 4.999961007416279e-05, "loss": 0.7353, "step": 1775 }, { "epoch": 0.05194805194805195, "grad_norm": 1.6006939733383188, "learning_rate": 4.999954353235429e-05, "loss": 0.7567, "step": 1780 }, { "epoch": 0.05209397344228805, "grad_norm": 1.3758489941956624, "learning_rate": 4.999947175110686e-05, "loss": 0.7446, "step": 1785 }, { "epoch": 0.05223989493652415, "grad_norm": 1.3256297071339445, "learning_rate": 4.999939473043721e-05, "loss": 0.8025, "step": 1790 }, { "epoch": 0.05238581643076025, "grad_norm": 1.3357548954547407, "learning_rate": 4.999931247036329e-05, "loss": 0.7409, "step": 1795 }, { "epoch": 0.05253173792499635, "grad_norm": 1.3184267663106126, "learning_rate": 4.999922497090424e-05, "loss": 0.6972, "step": 1800 }, { "epoch": 0.052677659419232456, "grad_norm": 1.4287090803256959, "learning_rate": 4.999913223208044e-05, "loss": 0.7577, "step": 1805 }, { "epoch": 0.05282358091346855, "grad_norm": 1.4513227193251272, "learning_rate": 4.999903425391349e-05, "loss": 0.7461, "step": 1810 }, { "epoch": 0.052969502407704656, "grad_norm": 1.5577580011424679, "learning_rate": 4.99989310364262e-05, "loss": 0.7457, "step": 1815 }, { "epoch": 0.05311542390194075, "grad_norm": 1.4317513899882508, "learning_rate": 4.9998822579642616e-05, "loss": 0.683, "step": 1820 }, { "epoch": 0.05326134539617686, "grad_norm": 1.3873535818291367, "learning_rate": 4.999870888358799e-05, "loss": 0.7166, "step": 1825 }, { "epoch": 0.05340726689041296, "grad_norm": 1.3423429338740624, "learning_rate": 4.9998589948288786e-05, "loss": 0.7035, "step": 1830 }, { "epoch": 0.05355318838464906, "grad_norm": 1.3402373645243024, "learning_rate": 4.999846577377272e-05, "loss": 0.7836, "step": 1835 }, { "epoch": 0.05369910987888516, "grad_norm": 1.4676081185696548, "learning_rate": 4.9998336360068695e-05, "loss": 0.6916, "step": 1840 }, { "epoch": 0.05384503137312126, "grad_norm": 3.2226642715494096, "learning_rate": 4.999820170720686e-05, "loss": 0.7324, "step": 1845 }, { "epoch": 0.05399095286735736, "grad_norm": 1.5147730323151014, "learning_rate": 4.999806181521855e-05, "loss": 0.775, "step": 1850 }, { "epoch": 0.054136874361593465, "grad_norm": 1.2677879562387062, "learning_rate": 4.9997916684136357e-05, "loss": 0.6843, "step": 1855 }, { "epoch": 0.05428279585582956, "grad_norm": 1.4626998503363509, "learning_rate": 4.9997766313994075e-05, "loss": 0.7271, "step": 1860 }, { "epoch": 0.054428717350065665, "grad_norm": 1.6242490303940638, "learning_rate": 4.999761070482672e-05, "loss": 0.7511, "step": 1865 }, { "epoch": 0.05457463884430177, "grad_norm": 1.471367070391399, "learning_rate": 4.999744985667053e-05, "loss": 0.7383, "step": 1870 }, { "epoch": 0.054720560338537866, "grad_norm": 1.2398458084232888, "learning_rate": 4.9997283769562965e-05, "loss": 0.6543, "step": 1875 }, { "epoch": 0.05486648183277397, "grad_norm": 1.2460398967700295, "learning_rate": 4.9997112443542686e-05, "loss": 0.7728, "step": 1880 }, { "epoch": 0.055012403327010066, "grad_norm": 1.5515635682991524, "learning_rate": 4.9996935878649616e-05, "loss": 0.8184, "step": 1885 }, { "epoch": 0.05515832482124617, "grad_norm": 1.4140897260560117, "learning_rate": 4.999675407492484e-05, "loss": 0.6774, "step": 1890 }, { "epoch": 0.055304246315482274, "grad_norm": 1.2477321550872238, "learning_rate": 4.9996567032410724e-05, "loss": 0.6574, "step": 1895 }, { "epoch": 0.05545016780971837, "grad_norm": 1.3933862786721032, "learning_rate": 4.99963747511508e-05, "loss": 0.7777, "step": 1900 }, { "epoch": 0.055596089303954474, "grad_norm": 1.3336333392863404, "learning_rate": 4.999617723118985e-05, "loss": 0.7539, "step": 1905 }, { "epoch": 0.05574201079819057, "grad_norm": 1.2310526135000395, "learning_rate": 4.999597447257388e-05, "loss": 0.7208, "step": 1910 }, { "epoch": 0.055887932292426674, "grad_norm": 1.195398380972192, "learning_rate": 4.9995766475350106e-05, "loss": 0.6929, "step": 1915 }, { "epoch": 0.05603385378666278, "grad_norm": 1.3763010713502086, "learning_rate": 4.9995553239566956e-05, "loss": 0.7382, "step": 1920 }, { "epoch": 0.056179775280898875, "grad_norm": 1.3502815894725948, "learning_rate": 4.9995334765274093e-05, "loss": 0.7508, "step": 1925 }, { "epoch": 0.05632569677513498, "grad_norm": 1.2630819320313937, "learning_rate": 4.9995111052522374e-05, "loss": 0.7831, "step": 1930 }, { "epoch": 0.056471618269371075, "grad_norm": 1.1598061036629406, "learning_rate": 4.999488210136393e-05, "loss": 0.7027, "step": 1935 }, { "epoch": 0.05661753976360718, "grad_norm": 1.3085030319156963, "learning_rate": 4.999464791185204e-05, "loss": 0.6834, "step": 1940 }, { "epoch": 0.05676346125784328, "grad_norm": 1.189253421244276, "learning_rate": 4.9994408484041266e-05, "loss": 0.6539, "step": 1945 }, { "epoch": 0.05690938275207938, "grad_norm": 1.328377197158246, "learning_rate": 4.999416381798735e-05, "loss": 0.7957, "step": 1950 }, { "epoch": 0.05705530424631548, "grad_norm": 1.3831582959239892, "learning_rate": 4.999391391374726e-05, "loss": 0.7051, "step": 1955 }, { "epoch": 0.05720122574055159, "grad_norm": 1.2371294210719483, "learning_rate": 4.999365877137922e-05, "loss": 0.6856, "step": 1960 }, { "epoch": 0.05734714723478768, "grad_norm": 1.1433664875714422, "learning_rate": 4.9993398390942605e-05, "loss": 0.6796, "step": 1965 }, { "epoch": 0.05749306872902379, "grad_norm": 1.4680255325182232, "learning_rate": 4.9993132772498075e-05, "loss": 0.7548, "step": 1970 }, { "epoch": 0.057638990223259884, "grad_norm": 1.2455145333553008, "learning_rate": 4.999286191610749e-05, "loss": 0.7135, "step": 1975 }, { "epoch": 0.05778491171749599, "grad_norm": 1.8252722413199478, "learning_rate": 4.99925858218339e-05, "loss": 0.7772, "step": 1980 }, { "epoch": 0.05793083321173209, "grad_norm": 1.5337533716975953, "learning_rate": 4.999230448974161e-05, "loss": 0.7922, "step": 1985 }, { "epoch": 0.05807675470596819, "grad_norm": 1.304275618366775, "learning_rate": 4.999201791989614e-05, "loss": 0.698, "step": 1990 }, { "epoch": 0.05822267620020429, "grad_norm": 1.480397438136825, "learning_rate": 4.999172611236422e-05, "loss": 0.7348, "step": 1995 }, { "epoch": 0.05836859769444039, "grad_norm": 1.252769843255006, "learning_rate": 4.99914290672138e-05, "loss": 0.7229, "step": 2000 }, { "epoch": 0.05851451918867649, "grad_norm": 1.1784037490192194, "learning_rate": 4.9991126784514056e-05, "loss": 0.715, "step": 2005 }, { "epoch": 0.058660440682912596, "grad_norm": 1.2044815996761107, "learning_rate": 4.999081926433538e-05, "loss": 0.7012, "step": 2010 }, { "epoch": 0.05880636217714869, "grad_norm": 1.291539137109826, "learning_rate": 4.999050650674937e-05, "loss": 0.7418, "step": 2015 }, { "epoch": 0.058952283671384796, "grad_norm": 1.320349320649597, "learning_rate": 4.9990188511828876e-05, "loss": 0.7824, "step": 2020 }, { "epoch": 0.05909820516562089, "grad_norm": 1.2782542334459186, "learning_rate": 4.998986527964794e-05, "loss": 0.7197, "step": 2025 }, { "epoch": 0.059244126659857, "grad_norm": 1.3127633566021457, "learning_rate": 4.998953681028184e-05, "loss": 0.8242, "step": 2030 }, { "epoch": 0.0593900481540931, "grad_norm": 1.2456638738937682, "learning_rate": 4.998920310380706e-05, "loss": 0.7047, "step": 2035 }, { "epoch": 0.0595359696483292, "grad_norm": 1.5294055379073646, "learning_rate": 4.998886416030131e-05, "loss": 0.7231, "step": 2040 }, { "epoch": 0.0596818911425653, "grad_norm": 1.404503139124834, "learning_rate": 4.9988519979843515e-05, "loss": 0.741, "step": 2045 }, { "epoch": 0.0598278126368014, "grad_norm": 1.4071879656978263, "learning_rate": 4.998817056251383e-05, "loss": 0.7122, "step": 2050 }, { "epoch": 0.0599737341310375, "grad_norm": 1.5919737599471258, "learning_rate": 4.9987815908393624e-05, "loss": 0.6738, "step": 2055 }, { "epoch": 0.060119655625273605, "grad_norm": 1.3510983803352308, "learning_rate": 4.9987456017565486e-05, "loss": 0.8096, "step": 2060 }, { "epoch": 0.0602655771195097, "grad_norm": 1.38060452056238, "learning_rate": 4.998709089011321e-05, "loss": 0.8115, "step": 2065 }, { "epoch": 0.060411498613745805, "grad_norm": 1.5194135746506383, "learning_rate": 4.998672052612185e-05, "loss": 0.7685, "step": 2070 }, { "epoch": 0.06055742010798191, "grad_norm": 1.1273003115312668, "learning_rate": 4.998634492567762e-05, "loss": 0.7174, "step": 2075 }, { "epoch": 0.060703341602218006, "grad_norm": 1.2092491367776603, "learning_rate": 4.998596408886801e-05, "loss": 0.7448, "step": 2080 }, { "epoch": 0.06084926309645411, "grad_norm": 1.290181374820797, "learning_rate": 4.9985578015781685e-05, "loss": 0.7346, "step": 2085 }, { "epoch": 0.060995184590690206, "grad_norm": 1.1652853683145246, "learning_rate": 4.998518670650857e-05, "loss": 0.6608, "step": 2090 }, { "epoch": 0.06114110608492631, "grad_norm": 1.2051801845636834, "learning_rate": 4.998479016113978e-05, "loss": 0.6581, "step": 2095 }, { "epoch": 0.06128702757916241, "grad_norm": 1.3292843794218534, "learning_rate": 4.9984388379767645e-05, "loss": 0.7597, "step": 2100 }, { "epoch": 0.06143294907339851, "grad_norm": 1.1499416491903214, "learning_rate": 4.9983981362485747e-05, "loss": 0.7384, "step": 2105 }, { "epoch": 0.061578870567634614, "grad_norm": 1.493032918350083, "learning_rate": 4.998356910938886e-05, "loss": 0.7258, "step": 2110 }, { "epoch": 0.06172479206187071, "grad_norm": 1.1758083858592523, "learning_rate": 4.998315162057298e-05, "loss": 0.7267, "step": 2115 }, { "epoch": 0.061870713556106814, "grad_norm": 1.1991582459982524, "learning_rate": 4.998272889613534e-05, "loss": 0.685, "step": 2120 }, { "epoch": 0.06201663505034292, "grad_norm": 1.1807827849653567, "learning_rate": 4.998230093617437e-05, "loss": 0.6939, "step": 2125 }, { "epoch": 0.062162556544579015, "grad_norm": 1.3403326608048847, "learning_rate": 4.998186774078972e-05, "loss": 0.7885, "step": 2130 }, { "epoch": 0.06230847803881512, "grad_norm": 1.3819878556600145, "learning_rate": 4.9981429310082284e-05, "loss": 0.8333, "step": 2135 }, { "epoch": 0.062454399533051215, "grad_norm": 1.1141701273740472, "learning_rate": 4.9980985644154144e-05, "loss": 0.7041, "step": 2140 }, { "epoch": 0.06260032102728733, "grad_norm": 1.286037742419842, "learning_rate": 4.998053674310863e-05, "loss": 0.7495, "step": 2145 }, { "epoch": 0.06274624252152342, "grad_norm": 1.3604656301414162, "learning_rate": 4.998008260705027e-05, "loss": 0.7477, "step": 2150 }, { "epoch": 0.06289216401575952, "grad_norm": 1.2711167229338713, "learning_rate": 4.997962323608482e-05, "loss": 0.7133, "step": 2155 }, { "epoch": 0.06303808550999562, "grad_norm": 1.2439249243670005, "learning_rate": 4.997915863031926e-05, "loss": 0.7814, "step": 2160 }, { "epoch": 0.06318400700423173, "grad_norm": 1.0594446077432493, "learning_rate": 4.997868878986176e-05, "loss": 0.6933, "step": 2165 }, { "epoch": 0.06332992849846783, "grad_norm": 1.4538820689363272, "learning_rate": 4.997821371482175e-05, "loss": 0.7247, "step": 2170 }, { "epoch": 0.06347584999270392, "grad_norm": 1.5099163474537787, "learning_rate": 4.997773340530985e-05, "loss": 0.7228, "step": 2175 }, { "epoch": 0.06362177148694002, "grad_norm": 1.5250824525307163, "learning_rate": 4.9977247861437917e-05, "loss": 0.7148, "step": 2180 }, { "epoch": 0.06376769298117613, "grad_norm": 1.4301435712897916, "learning_rate": 4.997675708331902e-05, "loss": 0.7548, "step": 2185 }, { "epoch": 0.06391361447541223, "grad_norm": 1.1769706738775563, "learning_rate": 4.997626107106744e-05, "loss": 0.7117, "step": 2190 }, { "epoch": 0.06405953596964833, "grad_norm": 1.419877004192711, "learning_rate": 4.9975759824798676e-05, "loss": 0.7297, "step": 2195 }, { "epoch": 0.06420545746388442, "grad_norm": 1.4033595729322852, "learning_rate": 4.997525334462948e-05, "loss": 0.6969, "step": 2200 }, { "epoch": 0.06435137895812053, "grad_norm": 1.1946787927906284, "learning_rate": 4.997474163067776e-05, "loss": 0.7459, "step": 2205 }, { "epoch": 0.06449730045235663, "grad_norm": 1.241877789114203, "learning_rate": 4.99742246830627e-05, "loss": 0.768, "step": 2210 }, { "epoch": 0.06464322194659274, "grad_norm": 1.2228153686102359, "learning_rate": 4.997370250190468e-05, "loss": 0.7219, "step": 2215 }, { "epoch": 0.06478914344082884, "grad_norm": 1.3195641442753128, "learning_rate": 4.9973175087325285e-05, "loss": 0.7507, "step": 2220 }, { "epoch": 0.06493506493506493, "grad_norm": 1.4244210629807366, "learning_rate": 4.997264243944735e-05, "loss": 0.721, "step": 2225 }, { "epoch": 0.06508098642930103, "grad_norm": 1.1960041483445072, "learning_rate": 4.997210455839491e-05, "loss": 0.7399, "step": 2230 }, { "epoch": 0.06522690792353714, "grad_norm": 1.1991878426224685, "learning_rate": 4.997156144429321e-05, "loss": 0.7562, "step": 2235 }, { "epoch": 0.06537282941777324, "grad_norm": 1.3054437830587324, "learning_rate": 4.997101309726873e-05, "loss": 0.6905, "step": 2240 }, { "epoch": 0.06551875091200934, "grad_norm": 1.1787769240409458, "learning_rate": 4.997045951744917e-05, "loss": 0.7622, "step": 2245 }, { "epoch": 0.06566467240624543, "grad_norm": 1.3229927030556956, "learning_rate": 4.9969900704963436e-05, "loss": 0.7085, "step": 2250 }, { "epoch": 0.06581059390048154, "grad_norm": 1.1124966798807985, "learning_rate": 4.9969336659941646e-05, "loss": 0.6807, "step": 2255 }, { "epoch": 0.06595651539471764, "grad_norm": 1.2596715345120049, "learning_rate": 4.9968767382515164e-05, "loss": 0.7053, "step": 2260 }, { "epoch": 0.06610243688895374, "grad_norm": 1.5016762619635984, "learning_rate": 4.9968192872816554e-05, "loss": 0.7123, "step": 2265 }, { "epoch": 0.06624835838318985, "grad_norm": 1.4737925975333308, "learning_rate": 4.996761313097959e-05, "loss": 0.7365, "step": 2270 }, { "epoch": 0.06639427987742594, "grad_norm": 1.186690900860536, "learning_rate": 4.9967028157139304e-05, "loss": 0.678, "step": 2275 }, { "epoch": 0.06654020137166204, "grad_norm": 1.1589042008228565, "learning_rate": 4.996643795143189e-05, "loss": 0.6864, "step": 2280 }, { "epoch": 0.06668612286589815, "grad_norm": 1.1064561143791263, "learning_rate": 4.9965842513994795e-05, "loss": 0.6957, "step": 2285 }, { "epoch": 0.06683204436013425, "grad_norm": 1.2865770484963301, "learning_rate": 4.9965241844966674e-05, "loss": 0.706, "step": 2290 }, { "epoch": 0.06697796585437035, "grad_norm": 1.1870033260695017, "learning_rate": 4.9964635944487424e-05, "loss": 0.7526, "step": 2295 }, { "epoch": 0.06712388734860646, "grad_norm": 1.2389259592263213, "learning_rate": 4.996402481269812e-05, "loss": 0.6709, "step": 2300 }, { "epoch": 0.06726980884284255, "grad_norm": 1.2067755710375718, "learning_rate": 4.996340844974108e-05, "loss": 0.7896, "step": 2305 }, { "epoch": 0.06741573033707865, "grad_norm": 1.1379793099244961, "learning_rate": 4.9962786855759843e-05, "loss": 0.7196, "step": 2310 }, { "epoch": 0.06756165183131475, "grad_norm": 1.4337643942611982, "learning_rate": 4.996216003089914e-05, "loss": 0.7121, "step": 2315 }, { "epoch": 0.06770757332555086, "grad_norm": 1.2297091477283193, "learning_rate": 4.996152797530497e-05, "loss": 0.6277, "step": 2320 }, { "epoch": 0.06785349481978696, "grad_norm": 1.336303746163312, "learning_rate": 4.996089068912449e-05, "loss": 0.6736, "step": 2325 }, { "epoch": 0.06799941631402305, "grad_norm": 1.2172510478016814, "learning_rate": 4.9960248172506115e-05, "loss": 0.6564, "step": 2330 }, { "epoch": 0.06814533780825915, "grad_norm": 1.2510885722792702, "learning_rate": 4.995960042559947e-05, "loss": 0.7631, "step": 2335 }, { "epoch": 0.06829125930249526, "grad_norm": 1.086301288205196, "learning_rate": 4.99589474485554e-05, "loss": 0.7352, "step": 2340 }, { "epoch": 0.06843718079673136, "grad_norm": 1.2941420068079943, "learning_rate": 4.9958289241525945e-05, "loss": 0.6949, "step": 2345 }, { "epoch": 0.06858310229096747, "grad_norm": 1.1265269718678437, "learning_rate": 4.9957625804664386e-05, "loss": 0.6864, "step": 2350 }, { "epoch": 0.06872902378520356, "grad_norm": 1.091114609828629, "learning_rate": 4.9956957138125234e-05, "loss": 0.6432, "step": 2355 }, { "epoch": 0.06887494527943966, "grad_norm": 1.521826692370768, "learning_rate": 4.995628324206419e-05, "loss": 0.7355, "step": 2360 }, { "epoch": 0.06902086677367576, "grad_norm": 1.169906654154956, "learning_rate": 4.995560411663817e-05, "loss": 0.6685, "step": 2365 }, { "epoch": 0.06916678826791187, "grad_norm": 1.2552300646284833, "learning_rate": 4.995491976200534e-05, "loss": 0.6765, "step": 2370 }, { "epoch": 0.06931270976214797, "grad_norm": 1.1080565485099827, "learning_rate": 4.995423017832505e-05, "loss": 0.7245, "step": 2375 }, { "epoch": 0.06945863125638406, "grad_norm": 1.3039222055343613, "learning_rate": 4.995353536575789e-05, "loss": 0.7249, "step": 2380 }, { "epoch": 0.06960455275062016, "grad_norm": 1.2252843417382429, "learning_rate": 4.9952835324465663e-05, "loss": 0.7206, "step": 2385 }, { "epoch": 0.06975047424485627, "grad_norm": 1.3286629535986196, "learning_rate": 4.995213005461139e-05, "loss": 0.7701, "step": 2390 }, { "epoch": 0.06989639573909237, "grad_norm": 1.376678831939567, "learning_rate": 4.995141955635928e-05, "loss": 0.6757, "step": 2395 }, { "epoch": 0.07004231723332847, "grad_norm": 1.2135980799250596, "learning_rate": 4.995070382987483e-05, "loss": 0.7356, "step": 2400 }, { "epoch": 0.07018823872756456, "grad_norm": 1.1511071959834587, "learning_rate": 4.9949982875324666e-05, "loss": 0.692, "step": 2405 }, { "epoch": 0.07033416022180067, "grad_norm": 1.1891927187571163, "learning_rate": 4.9949256692876704e-05, "loss": 0.6449, "step": 2410 }, { "epoch": 0.07048008171603677, "grad_norm": 1.3449098804464046, "learning_rate": 4.9948525282700044e-05, "loss": 0.6946, "step": 2415 }, { "epoch": 0.07062600321027288, "grad_norm": 1.467551334116575, "learning_rate": 4.9947788644965e-05, "loss": 0.7251, "step": 2420 }, { "epoch": 0.07077192470450898, "grad_norm": 1.2865829004410783, "learning_rate": 4.9947046779843116e-05, "loss": 0.7013, "step": 2425 }, { "epoch": 0.07091784619874507, "grad_norm": 1.495112540509866, "learning_rate": 4.994629968750715e-05, "loss": 0.7519, "step": 2430 }, { "epoch": 0.07106376769298117, "grad_norm": 1.28058130982145, "learning_rate": 4.994554736813108e-05, "loss": 0.7423, "step": 2435 }, { "epoch": 0.07120968918721728, "grad_norm": 1.041245082212463, "learning_rate": 4.9944789821890095e-05, "loss": 0.7318, "step": 2440 }, { "epoch": 0.07135561068145338, "grad_norm": 1.6895277069336783, "learning_rate": 4.9944027048960594e-05, "loss": 0.7415, "step": 2445 }, { "epoch": 0.07150153217568948, "grad_norm": 1.4092738824681306, "learning_rate": 4.994325904952022e-05, "loss": 0.7828, "step": 2450 }, { "epoch": 0.07164745366992557, "grad_norm": 1.0498275429598172, "learning_rate": 4.9942485823747805e-05, "loss": 0.6679, "step": 2455 }, { "epoch": 0.07179337516416168, "grad_norm": 1.2457136035786602, "learning_rate": 4.994170737182341e-05, "loss": 0.685, "step": 2460 }, { "epoch": 0.07193929665839778, "grad_norm": 1.084800211714994, "learning_rate": 4.994092369392832e-05, "loss": 0.6796, "step": 2465 }, { "epoch": 0.07208521815263388, "grad_norm": 1.3031546895787143, "learning_rate": 4.994013479024501e-05, "loss": 0.717, "step": 2470 }, { "epoch": 0.07223113964686999, "grad_norm": 1.1445623682796333, "learning_rate": 4.993934066095722e-05, "loss": 0.7178, "step": 2475 }, { "epoch": 0.07237706114110609, "grad_norm": 1.7556927538055653, "learning_rate": 4.9938541306249846e-05, "loss": 0.7739, "step": 2480 }, { "epoch": 0.07252298263534218, "grad_norm": 1.153150601321591, "learning_rate": 4.993773672630906e-05, "loss": 0.7342, "step": 2485 }, { "epoch": 0.07266890412957829, "grad_norm": 1.157971600723398, "learning_rate": 4.9936926921322206e-05, "loss": 0.7251, "step": 2490 }, { "epoch": 0.07281482562381439, "grad_norm": 1.2305823435532268, "learning_rate": 4.993611189147788e-05, "loss": 0.6486, "step": 2495 }, { "epoch": 0.07296074711805049, "grad_norm": 1.157643925802279, "learning_rate": 4.9935291636965854e-05, "loss": 0.7137, "step": 2500 }, { "epoch": 0.0731066686122866, "grad_norm": 1.0908201149923213, "learning_rate": 4.993446615797715e-05, "loss": 0.6464, "step": 2505 }, { "epoch": 0.07325259010652269, "grad_norm": 1.319550772585411, "learning_rate": 4.9933635454704e-05, "loss": 0.7119, "step": 2510 }, { "epoch": 0.07339851160075879, "grad_norm": 1.3133574588174883, "learning_rate": 4.993279952733984e-05, "loss": 0.6559, "step": 2515 }, { "epoch": 0.0735444330949949, "grad_norm": 1.3731793545515336, "learning_rate": 4.993195837607935e-05, "loss": 0.757, "step": 2520 }, { "epoch": 0.073690354589231, "grad_norm": 1.3463596177268926, "learning_rate": 4.993111200111838e-05, "loss": 0.7369, "step": 2525 }, { "epoch": 0.0738362760834671, "grad_norm": 1.1720972772559952, "learning_rate": 4.993026040265404e-05, "loss": 0.6913, "step": 2530 }, { "epoch": 0.07398219757770319, "grad_norm": 1.2311212852262081, "learning_rate": 4.992940358088465e-05, "loss": 0.7237, "step": 2535 }, { "epoch": 0.0741281190719393, "grad_norm": 1.2313173688199424, "learning_rate": 4.992854153600972e-05, "loss": 0.7099, "step": 2540 }, { "epoch": 0.0742740405661754, "grad_norm": 1.116975561940082, "learning_rate": 4.992767426823e-05, "loss": 0.7015, "step": 2545 }, { "epoch": 0.0744199620604115, "grad_norm": 1.3447765667807197, "learning_rate": 4.992680177774745e-05, "loss": 0.6965, "step": 2550 }, { "epoch": 0.0745658835546476, "grad_norm": 1.2803959342160747, "learning_rate": 4.9925924064765245e-05, "loss": 0.7027, "step": 2555 }, { "epoch": 0.0747118050488837, "grad_norm": 1.244134295034872, "learning_rate": 4.9925041129487776e-05, "loss": 0.7836, "step": 2560 }, { "epoch": 0.0748577265431198, "grad_norm": 1.2429016066981884, "learning_rate": 4.992415297212065e-05, "loss": 0.7296, "step": 2565 }, { "epoch": 0.0750036480373559, "grad_norm": 1.2265724380564247, "learning_rate": 4.9923259592870694e-05, "loss": 0.7165, "step": 2570 }, { "epoch": 0.075149569531592, "grad_norm": 1.117185974511804, "learning_rate": 4.992236099194595e-05, "loss": 0.6919, "step": 2575 }, { "epoch": 0.07529549102582811, "grad_norm": 1.3265955971131624, "learning_rate": 4.992145716955567e-05, "loss": 0.6736, "step": 2580 }, { "epoch": 0.0754414125200642, "grad_norm": 1.2238152641126043, "learning_rate": 4.992054812591032e-05, "loss": 0.6726, "step": 2585 }, { "epoch": 0.0755873340143003, "grad_norm": 1.1525077899777592, "learning_rate": 4.99196338612216e-05, "loss": 0.7443, "step": 2590 }, { "epoch": 0.0757332555085364, "grad_norm": 1.302404501600372, "learning_rate": 4.991871437570241e-05, "loss": 0.7528, "step": 2595 }, { "epoch": 0.07587917700277251, "grad_norm": 1.102349301530605, "learning_rate": 4.991778966956687e-05, "loss": 0.6982, "step": 2600 }, { "epoch": 0.07602509849700861, "grad_norm": 1.1613283935385472, "learning_rate": 4.9916859743030296e-05, "loss": 0.6683, "step": 2605 }, { "epoch": 0.0761710199912447, "grad_norm": 1.2000643942941749, "learning_rate": 4.991592459630927e-05, "loss": 0.7065, "step": 2610 }, { "epoch": 0.07631694148548081, "grad_norm": 1.2361901542750646, "learning_rate": 4.991498422962154e-05, "loss": 0.7013, "step": 2615 }, { "epoch": 0.07646286297971691, "grad_norm": 1.027968841207083, "learning_rate": 4.9914038643186094e-05, "loss": 0.6505, "step": 2620 }, { "epoch": 0.07660878447395301, "grad_norm": 1.214881352411773, "learning_rate": 4.9913087837223126e-05, "loss": 0.6923, "step": 2625 }, { "epoch": 0.07675470596818912, "grad_norm": 1.0925923759394451, "learning_rate": 4.991213181195405e-05, "loss": 0.6846, "step": 2630 }, { "epoch": 0.07690062746242521, "grad_norm": 1.502110141792278, "learning_rate": 4.991117056760149e-05, "loss": 0.7293, "step": 2635 }, { "epoch": 0.07704654895666131, "grad_norm": 1.3626183124314462, "learning_rate": 4.99102041043893e-05, "loss": 0.7359, "step": 2640 }, { "epoch": 0.07719247045089742, "grad_norm": 1.1885957240536167, "learning_rate": 4.9909232422542536e-05, "loss": 0.7286, "step": 2645 }, { "epoch": 0.07733839194513352, "grad_norm": 1.3070204347903123, "learning_rate": 4.9908255522287456e-05, "loss": 0.6748, "step": 2650 }, { "epoch": 0.07748431343936962, "grad_norm": 1.4889256476361912, "learning_rate": 4.9907273403851576e-05, "loss": 0.7004, "step": 2655 }, { "epoch": 0.07763023493360573, "grad_norm": 1.1707314013743833, "learning_rate": 4.990628606746358e-05, "loss": 0.68, "step": 2660 }, { "epoch": 0.07777615642784182, "grad_norm": 1.1793157559061662, "learning_rate": 4.9905293513353404e-05, "loss": 0.7043, "step": 2665 }, { "epoch": 0.07792207792207792, "grad_norm": 1.367234663921608, "learning_rate": 4.990429574175216e-05, "loss": 0.7256, "step": 2670 }, { "epoch": 0.07806799941631402, "grad_norm": 1.1716092733437509, "learning_rate": 4.990329275289222e-05, "loss": 0.6514, "step": 2675 }, { "epoch": 0.07821392091055013, "grad_norm": 1.2881490710967423, "learning_rate": 4.9902284547007136e-05, "loss": 0.6928, "step": 2680 }, { "epoch": 0.07835984240478623, "grad_norm": 1.4308049463344874, "learning_rate": 4.99012711243317e-05, "loss": 0.687, "step": 2685 }, { "epoch": 0.07850576389902232, "grad_norm": 1.1257191513918912, "learning_rate": 4.99002524851019e-05, "loss": 0.7151, "step": 2690 }, { "epoch": 0.07865168539325842, "grad_norm": 1.029860229789929, "learning_rate": 4.9899228629554936e-05, "loss": 0.7179, "step": 2695 }, { "epoch": 0.07879760688749453, "grad_norm": 1.050000619595012, "learning_rate": 4.989819955792924e-05, "loss": 0.7189, "step": 2700 }, { "epoch": 0.07894352838173063, "grad_norm": 1.2054061729668342, "learning_rate": 4.9897165270464455e-05, "loss": 0.6993, "step": 2705 }, { "epoch": 0.07908944987596674, "grad_norm": 1.3294811677975067, "learning_rate": 4.989612576740142e-05, "loss": 0.6857, "step": 2710 }, { "epoch": 0.07923537137020283, "grad_norm": 1.2837275469413965, "learning_rate": 4.989508104898222e-05, "loss": 0.8049, "step": 2715 }, { "epoch": 0.07938129286443893, "grad_norm": 1.0284261979165217, "learning_rate": 4.989403111545012e-05, "loss": 0.6989, "step": 2720 }, { "epoch": 0.07952721435867503, "grad_norm": 1.0804210127140008, "learning_rate": 4.989297596704963e-05, "loss": 0.7475, "step": 2725 }, { "epoch": 0.07967313585291114, "grad_norm": 1.2579467326388063, "learning_rate": 4.989191560402646e-05, "loss": 0.7253, "step": 2730 }, { "epoch": 0.07981905734714724, "grad_norm": 1.3763527016356465, "learning_rate": 4.9890850026627535e-05, "loss": 0.6831, "step": 2735 }, { "epoch": 0.07996497884138333, "grad_norm": 1.48212909796467, "learning_rate": 4.988977923510098e-05, "loss": 0.6892, "step": 2740 }, { "epoch": 0.08011090033561943, "grad_norm": 1.3190649102951475, "learning_rate": 4.988870322969617e-05, "loss": 0.6818, "step": 2745 }, { "epoch": 0.08025682182985554, "grad_norm": 1.1274317740056343, "learning_rate": 4.988762201066366e-05, "loss": 0.6693, "step": 2750 }, { "epoch": 0.08040274332409164, "grad_norm": 1.2275943658402562, "learning_rate": 4.988653557825523e-05, "loss": 0.7176, "step": 2755 }, { "epoch": 0.08054866481832774, "grad_norm": 1.1836597863183846, "learning_rate": 4.988544393272389e-05, "loss": 0.7263, "step": 2760 }, { "epoch": 0.08069458631256383, "grad_norm": 1.08063105033071, "learning_rate": 4.988434707432384e-05, "loss": 0.7262, "step": 2765 }, { "epoch": 0.08084050780679994, "grad_norm": 1.1881601832723485, "learning_rate": 4.98832450033105e-05, "loss": 0.6736, "step": 2770 }, { "epoch": 0.08098642930103604, "grad_norm": 1.3997745095761405, "learning_rate": 4.988213771994052e-05, "loss": 0.7748, "step": 2775 }, { "epoch": 0.08113235079527215, "grad_norm": 1.1908259079867707, "learning_rate": 4.988102522447174e-05, "loss": 0.7158, "step": 2780 }, { "epoch": 0.08127827228950825, "grad_norm": 1.3583654691551421, "learning_rate": 4.987990751716324e-05, "loss": 0.7132, "step": 2785 }, { "epoch": 0.08142419378374434, "grad_norm": 1.4059881030327255, "learning_rate": 4.987878459827528e-05, "loss": 0.6947, "step": 2790 }, { "epoch": 0.08157011527798044, "grad_norm": 1.2023733663689815, "learning_rate": 4.987765646806936e-05, "loss": 0.689, "step": 2795 }, { "epoch": 0.08171603677221655, "grad_norm": 1.317230584309162, "learning_rate": 4.987652312680819e-05, "loss": 0.7492, "step": 2800 }, { "epoch": 0.08186195826645265, "grad_norm": 1.3783632730546544, "learning_rate": 4.9875384574755694e-05, "loss": 0.7346, "step": 2805 }, { "epoch": 0.08200787976068875, "grad_norm": 1.196338333312184, "learning_rate": 4.9874240812177e-05, "loss": 0.7646, "step": 2810 }, { "epoch": 0.08215380125492484, "grad_norm": 1.246386257612844, "learning_rate": 4.987309183933844e-05, "loss": 0.7509, "step": 2815 }, { "epoch": 0.08229972274916095, "grad_norm": 1.2055115580600684, "learning_rate": 4.98719376565076e-05, "loss": 0.7155, "step": 2820 }, { "epoch": 0.08244564424339705, "grad_norm": 1.1966349790298623, "learning_rate": 4.987077826395323e-05, "loss": 0.7391, "step": 2825 }, { "epoch": 0.08259156573763315, "grad_norm": 1.0963618941025084, "learning_rate": 4.986961366194533e-05, "loss": 0.6591, "step": 2830 }, { "epoch": 0.08273748723186926, "grad_norm": 1.192009327828126, "learning_rate": 4.9868443850755104e-05, "loss": 0.7139, "step": 2835 }, { "epoch": 0.08288340872610536, "grad_norm": 1.1364803193025055, "learning_rate": 4.9867268830654944e-05, "loss": 0.6826, "step": 2840 }, { "epoch": 0.08302933022034145, "grad_norm": 1.1202359205091355, "learning_rate": 4.98660886019185e-05, "loss": 0.755, "step": 2845 }, { "epoch": 0.08317525171457756, "grad_norm": 1.1929110366360403, "learning_rate": 4.986490316482059e-05, "loss": 0.7325, "step": 2850 }, { "epoch": 0.08332117320881366, "grad_norm": 1.2271051062947187, "learning_rate": 4.9863712519637265e-05, "loss": 0.6565, "step": 2855 }, { "epoch": 0.08346709470304976, "grad_norm": 1.5273667328861529, "learning_rate": 4.986251666664581e-05, "loss": 0.7884, "step": 2860 }, { "epoch": 0.08361301619728587, "grad_norm": 1.267577656974418, "learning_rate": 4.9861315606124686e-05, "loss": 0.6872, "step": 2865 }, { "epoch": 0.08375893769152196, "grad_norm": 1.4567896589375045, "learning_rate": 4.9860109338353584e-05, "loss": 0.7867, "step": 2870 }, { "epoch": 0.08390485918575806, "grad_norm": 1.3045714144836331, "learning_rate": 4.9858897863613404e-05, "loss": 0.7437, "step": 2875 }, { "epoch": 0.08405078067999416, "grad_norm": 0.9988958220535561, "learning_rate": 4.985768118218627e-05, "loss": 0.6783, "step": 2880 }, { "epoch": 0.08419670217423027, "grad_norm": 1.1781778104181264, "learning_rate": 4.9856459294355497e-05, "loss": 0.6905, "step": 2885 }, { "epoch": 0.08434262366846637, "grad_norm": 1.231471787388155, "learning_rate": 4.985523220040563e-05, "loss": 0.7351, "step": 2890 }, { "epoch": 0.08448854516270246, "grad_norm": 1.268218950110271, "learning_rate": 4.9853999900622416e-05, "loss": 0.6934, "step": 2895 }, { "epoch": 0.08463446665693856, "grad_norm": 1.113655372665911, "learning_rate": 4.9852762395292835e-05, "loss": 0.7237, "step": 2900 }, { "epoch": 0.08478038815117467, "grad_norm": 1.2525174924193185, "learning_rate": 4.985151968470505e-05, "loss": 0.6752, "step": 2905 }, { "epoch": 0.08492630964541077, "grad_norm": 1.335474313666936, "learning_rate": 4.9850271769148444e-05, "loss": 0.7254, "step": 2910 }, { "epoch": 0.08507223113964688, "grad_norm": 1.3590788327898935, "learning_rate": 4.984901864891363e-05, "loss": 0.7297, "step": 2915 }, { "epoch": 0.08521815263388297, "grad_norm": 1.3306705278241404, "learning_rate": 4.984776032429241e-05, "loss": 0.7401, "step": 2920 }, { "epoch": 0.08536407412811907, "grad_norm": 1.2362927879506074, "learning_rate": 4.984649679557782e-05, "loss": 0.6944, "step": 2925 }, { "epoch": 0.08550999562235517, "grad_norm": 1.1225371760943397, "learning_rate": 4.984522806306408e-05, "loss": 0.6683, "step": 2930 }, { "epoch": 0.08565591711659128, "grad_norm": 1.0449421965989825, "learning_rate": 4.984395412704666e-05, "loss": 0.6312, "step": 2935 }, { "epoch": 0.08580183861082738, "grad_norm": 1.0897455771394955, "learning_rate": 4.98426749878222e-05, "loss": 0.7312, "step": 2940 }, { "epoch": 0.08594776010506347, "grad_norm": 0.9833949712313572, "learning_rate": 4.984139064568858e-05, "loss": 0.7011, "step": 2945 }, { "epoch": 0.08609368159929957, "grad_norm": 1.0874351286478818, "learning_rate": 4.984010110094489e-05, "loss": 0.7516, "step": 2950 }, { "epoch": 0.08623960309353568, "grad_norm": 1.2921691113693496, "learning_rate": 4.9838806353891407e-05, "loss": 0.7186, "step": 2955 }, { "epoch": 0.08638552458777178, "grad_norm": 1.0415040392541335, "learning_rate": 4.983750640482965e-05, "loss": 0.6917, "step": 2960 }, { "epoch": 0.08653144608200788, "grad_norm": 1.1673917672784808, "learning_rate": 4.9836201254062334e-05, "loss": 0.7273, "step": 2965 }, { "epoch": 0.08667736757624397, "grad_norm": 1.262873236997477, "learning_rate": 4.9834890901893386e-05, "loss": 0.6596, "step": 2970 }, { "epoch": 0.08682328907048008, "grad_norm": 1.1768615350196587, "learning_rate": 4.983357534862795e-05, "loss": 0.6493, "step": 2975 }, { "epoch": 0.08696921056471618, "grad_norm": 1.059654404353001, "learning_rate": 4.9832254594572366e-05, "loss": 0.6768, "step": 2980 }, { "epoch": 0.08711513205895229, "grad_norm": 1.1256162724457224, "learning_rate": 4.983092864003421e-05, "loss": 0.7422, "step": 2985 }, { "epoch": 0.08726105355318839, "grad_norm": 1.148474679319773, "learning_rate": 4.982959748532225e-05, "loss": 0.7004, "step": 2990 }, { "epoch": 0.08740697504742448, "grad_norm": 1.173954561594251, "learning_rate": 4.9828261130746466e-05, "loss": 0.7753, "step": 2995 }, { "epoch": 0.08755289654166058, "grad_norm": 1.3498005917217282, "learning_rate": 4.9826919576618066e-05, "loss": 0.6834, "step": 3000 }, { "epoch": 0.08769881803589669, "grad_norm": 1.1462259162480168, "learning_rate": 4.982557282324943e-05, "loss": 0.6973, "step": 3005 }, { "epoch": 0.08784473953013279, "grad_norm": 1.1072100689840563, "learning_rate": 4.9824220870954206e-05, "loss": 0.6913, "step": 3010 }, { "epoch": 0.0879906610243689, "grad_norm": 1.2807573783932273, "learning_rate": 4.9822863720047203e-05, "loss": 0.6536, "step": 3015 }, { "epoch": 0.088136582518605, "grad_norm": 1.0365849324960532, "learning_rate": 4.982150137084446e-05, "loss": 0.6788, "step": 3020 }, { "epoch": 0.08828250401284109, "grad_norm": 1.1952376905923747, "learning_rate": 4.982013382366323e-05, "loss": 0.6548, "step": 3025 }, { "epoch": 0.08842842550707719, "grad_norm": 1.3987627048957174, "learning_rate": 4.981876107882198e-05, "loss": 0.7139, "step": 3030 }, { "epoch": 0.0885743470013133, "grad_norm": 1.0769926083176462, "learning_rate": 4.9817383136640355e-05, "loss": 0.6507, "step": 3035 }, { "epoch": 0.0887202684955494, "grad_norm": 1.1732014292395665, "learning_rate": 4.9815999997439256e-05, "loss": 0.7292, "step": 3040 }, { "epoch": 0.0888661899897855, "grad_norm": 1.003817734610896, "learning_rate": 4.9814611661540774e-05, "loss": 0.6988, "step": 3045 }, { "epoch": 0.08901211148402159, "grad_norm": 2.040218808094716, "learning_rate": 4.981321812926819e-05, "loss": 0.685, "step": 3050 }, { "epoch": 0.0891580329782577, "grad_norm": 1.1880004978203618, "learning_rate": 4.981181940094602e-05, "loss": 0.6618, "step": 3055 }, { "epoch": 0.0893039544724938, "grad_norm": 1.1564266807523542, "learning_rate": 4.9810415476899994e-05, "loss": 0.7569, "step": 3060 }, { "epoch": 0.0894498759667299, "grad_norm": 1.2535946055928098, "learning_rate": 4.980900635745704e-05, "loss": 0.7508, "step": 3065 }, { "epoch": 0.089595797460966, "grad_norm": 1.1351444943082982, "learning_rate": 4.980759204294529e-05, "loss": 0.6699, "step": 3070 }, { "epoch": 0.0897417189552021, "grad_norm": 1.3462445407924764, "learning_rate": 4.98061725336941e-05, "loss": 0.715, "step": 3075 }, { "epoch": 0.0898876404494382, "grad_norm": 1.1012961748924166, "learning_rate": 4.980474783003403e-05, "loss": 0.642, "step": 3080 }, { "epoch": 0.0900335619436743, "grad_norm": 1.1211860552780468, "learning_rate": 4.980331793229684e-05, "loss": 0.6619, "step": 3085 }, { "epoch": 0.09017948343791041, "grad_norm": 1.184993006230416, "learning_rate": 4.980188284081552e-05, "loss": 0.6635, "step": 3090 }, { "epoch": 0.09032540493214651, "grad_norm": 1.1983927883515362, "learning_rate": 4.980044255592425e-05, "loss": 0.6764, "step": 3095 }, { "epoch": 0.0904713264263826, "grad_norm": 1.3113670074414379, "learning_rate": 4.979899707795844e-05, "loss": 0.727, "step": 3100 }, { "epoch": 0.0906172479206187, "grad_norm": 1.3952508835169883, "learning_rate": 4.979754640725467e-05, "loss": 0.711, "step": 3105 }, { "epoch": 0.09076316941485481, "grad_norm": 1.031330087988221, "learning_rate": 4.979609054415078e-05, "loss": 0.6518, "step": 3110 }, { "epoch": 0.09090909090909091, "grad_norm": 1.4750094137763674, "learning_rate": 4.979462948898578e-05, "loss": 0.7427, "step": 3115 }, { "epoch": 0.09105501240332702, "grad_norm": 1.3348483940492921, "learning_rate": 4.979316324209992e-05, "loss": 0.6531, "step": 3120 }, { "epoch": 0.0912009338975631, "grad_norm": 1.0286431946256631, "learning_rate": 4.979169180383463e-05, "loss": 0.6378, "step": 3125 }, { "epoch": 0.09134685539179921, "grad_norm": 1.0571151209612941, "learning_rate": 4.9790215174532566e-05, "loss": 0.7234, "step": 3130 }, { "epoch": 0.09149277688603531, "grad_norm": 1.1991209013086663, "learning_rate": 4.978873335453758e-05, "loss": 0.6777, "step": 3135 }, { "epoch": 0.09163869838027142, "grad_norm": 1.175362245670362, "learning_rate": 4.978724634419477e-05, "loss": 0.6914, "step": 3140 }, { "epoch": 0.09178461987450752, "grad_norm": 1.196997930322026, "learning_rate": 4.978575414385038e-05, "loss": 0.6474, "step": 3145 }, { "epoch": 0.09193054136874361, "grad_norm": 1.3384223340418013, "learning_rate": 4.978425675385191e-05, "loss": 0.7768, "step": 3150 }, { "epoch": 0.09207646286297971, "grad_norm": 1.0384767224639597, "learning_rate": 4.978275417454806e-05, "loss": 0.6996, "step": 3155 }, { "epoch": 0.09222238435721582, "grad_norm": 1.2381868343202516, "learning_rate": 4.978124640628873e-05, "loss": 0.7248, "step": 3160 }, { "epoch": 0.09236830585145192, "grad_norm": 1.2102390617323306, "learning_rate": 4.977973344942503e-05, "loss": 0.7065, "step": 3165 }, { "epoch": 0.09251422734568802, "grad_norm": 1.1861424369383182, "learning_rate": 4.977821530430928e-05, "loss": 0.7112, "step": 3170 }, { "epoch": 0.09266014883992411, "grad_norm": 1.1126931965592837, "learning_rate": 4.977669197129502e-05, "loss": 0.7256, "step": 3175 }, { "epoch": 0.09280607033416022, "grad_norm": 1.2141998501071505, "learning_rate": 4.9775163450736964e-05, "loss": 0.7438, "step": 3180 }, { "epoch": 0.09295199182839632, "grad_norm": 1.0884904273507405, "learning_rate": 4.977362974299108e-05, "loss": 0.6883, "step": 3185 }, { "epoch": 0.09309791332263243, "grad_norm": 1.16234164762006, "learning_rate": 4.97720908484145e-05, "loss": 0.6792, "step": 3190 }, { "epoch": 0.09324383481686853, "grad_norm": 1.1112809561411157, "learning_rate": 4.97705467673656e-05, "loss": 0.7112, "step": 3195 }, { "epoch": 0.09338975631110463, "grad_norm": 1.1256078172996438, "learning_rate": 4.976899750020394e-05, "loss": 0.7838, "step": 3200 }, { "epoch": 0.09353567780534072, "grad_norm": 0.9768412065363534, "learning_rate": 4.97674430472903e-05, "loss": 0.6704, "step": 3205 }, { "epoch": 0.09368159929957683, "grad_norm": 1.08551886453828, "learning_rate": 4.976588340898666e-05, "loss": 0.7083, "step": 3210 }, { "epoch": 0.09382752079381293, "grad_norm": 1.1884734839902384, "learning_rate": 4.976431858565621e-05, "loss": 0.6928, "step": 3215 }, { "epoch": 0.09397344228804903, "grad_norm": 1.105909237294257, "learning_rate": 4.976274857766336e-05, "loss": 0.6955, "step": 3220 }, { "epoch": 0.09411936378228514, "grad_norm": 1.0942350340815739, "learning_rate": 4.97611733853737e-05, "loss": 0.69, "step": 3225 }, { "epoch": 0.09426528527652123, "grad_norm": 1.0963794673649976, "learning_rate": 4.9759593009154046e-05, "loss": 0.7406, "step": 3230 }, { "epoch": 0.09441120677075733, "grad_norm": 1.150762137302678, "learning_rate": 4.975800744937243e-05, "loss": 0.6666, "step": 3235 }, { "epoch": 0.09455712826499343, "grad_norm": 1.2624789491552089, "learning_rate": 4.975641670639806e-05, "loss": 0.6959, "step": 3240 }, { "epoch": 0.09470304975922954, "grad_norm": 1.1590024853640135, "learning_rate": 4.9754820780601395e-05, "loss": 0.7952, "step": 3245 }, { "epoch": 0.09484897125346564, "grad_norm": 1.427640198590033, "learning_rate": 4.9753219672354054e-05, "loss": 0.7542, "step": 3250 }, { "epoch": 0.09499489274770173, "grad_norm": 1.1741027034407927, "learning_rate": 4.97516133820289e-05, "loss": 0.7209, "step": 3255 }, { "epoch": 0.09514081424193783, "grad_norm": 1.0880492775724266, "learning_rate": 4.9750001909999966e-05, "loss": 0.6759, "step": 3260 }, { "epoch": 0.09528673573617394, "grad_norm": 1.7605510387792094, "learning_rate": 4.974838525664254e-05, "loss": 0.7226, "step": 3265 }, { "epoch": 0.09543265723041004, "grad_norm": 1.05993250960408, "learning_rate": 4.974676342233307e-05, "loss": 0.6388, "step": 3270 }, { "epoch": 0.09557857872464615, "grad_norm": 1.1455793222520776, "learning_rate": 4.9745136407449246e-05, "loss": 0.7146, "step": 3275 }, { "epoch": 0.09572450021888224, "grad_norm": 1.0616651244419713, "learning_rate": 4.974350421236994e-05, "loss": 0.6945, "step": 3280 }, { "epoch": 0.09587042171311834, "grad_norm": 1.121995227351763, "learning_rate": 4.974186683747523e-05, "loss": 0.7067, "step": 3285 }, { "epoch": 0.09601634320735444, "grad_norm": 1.2071546013549588, "learning_rate": 4.974022428314643e-05, "loss": 0.7406, "step": 3290 }, { "epoch": 0.09616226470159055, "grad_norm": 1.1636972409998387, "learning_rate": 4.9738576549766036e-05, "loss": 0.6935, "step": 3295 }, { "epoch": 0.09630818619582665, "grad_norm": 1.2427381463150373, "learning_rate": 4.9736923637717734e-05, "loss": 0.7455, "step": 3300 }, { "epoch": 0.09645410769006274, "grad_norm": 1.016242405024273, "learning_rate": 4.973526554738646e-05, "loss": 0.6622, "step": 3305 }, { "epoch": 0.09660002918429884, "grad_norm": 1.2431076572728077, "learning_rate": 4.973360227915831e-05, "loss": 0.6717, "step": 3310 }, { "epoch": 0.09674595067853495, "grad_norm": 1.6131792652997232, "learning_rate": 4.973193383342061e-05, "loss": 0.7117, "step": 3315 }, { "epoch": 0.09689187217277105, "grad_norm": 1.186107378187242, "learning_rate": 4.9730260210561906e-05, "loss": 0.7224, "step": 3320 }, { "epoch": 0.09703779366700716, "grad_norm": 1.2147870521498203, "learning_rate": 4.9728581410971914e-05, "loss": 0.6879, "step": 3325 }, { "epoch": 0.09718371516124324, "grad_norm": 1.2312511243072912, "learning_rate": 4.972689743504159e-05, "loss": 0.7256, "step": 3330 }, { "epoch": 0.09732963665547935, "grad_norm": 1.2516252749369035, "learning_rate": 4.9725208283163065e-05, "loss": 0.6975, "step": 3335 }, { "epoch": 0.09747555814971545, "grad_norm": 1.1007904395171177, "learning_rate": 4.97235139557297e-05, "loss": 0.6565, "step": 3340 }, { "epoch": 0.09762147964395156, "grad_norm": 1.03754920253913, "learning_rate": 4.9721814453136044e-05, "loss": 0.6863, "step": 3345 }, { "epoch": 0.09776740113818766, "grad_norm": 1.0957651348877475, "learning_rate": 4.9720109775777856e-05, "loss": 0.6866, "step": 3350 }, { "epoch": 0.09791332263242375, "grad_norm": 1.1472195240860321, "learning_rate": 4.9718399924052114e-05, "loss": 0.708, "step": 3355 }, { "epoch": 0.09805924412665985, "grad_norm": 1.1121867630779652, "learning_rate": 4.971668489835697e-05, "loss": 0.6556, "step": 3360 }, { "epoch": 0.09820516562089596, "grad_norm": 1.1609523591723236, "learning_rate": 4.9714964699091815e-05, "loss": 0.6499, "step": 3365 }, { "epoch": 0.09835108711513206, "grad_norm": 1.1403769749316155, "learning_rate": 4.971323932665724e-05, "loss": 0.6795, "step": 3370 }, { "epoch": 0.09849700860936816, "grad_norm": 1.218840248722583, "learning_rate": 4.9711508781454994e-05, "loss": 0.6973, "step": 3375 }, { "epoch": 0.09864293010360427, "grad_norm": 0.9992988573750679, "learning_rate": 4.970977306388809e-05, "loss": 0.7881, "step": 3380 }, { "epoch": 0.09878885159784036, "grad_norm": 1.0798197309985502, "learning_rate": 4.9708032174360736e-05, "loss": 0.6932, "step": 3385 }, { "epoch": 0.09893477309207646, "grad_norm": 1.0567826946360994, "learning_rate": 4.97062861132783e-05, "loss": 0.6397, "step": 3390 }, { "epoch": 0.09908069458631256, "grad_norm": 1.1183540851594458, "learning_rate": 4.97045348810474e-05, "loss": 0.7244, "step": 3395 }, { "epoch": 0.09922661608054867, "grad_norm": 1.08644138005523, "learning_rate": 4.970277847807585e-05, "loss": 0.6688, "step": 3400 }, { "epoch": 0.09937253757478477, "grad_norm": 1.0183267086715422, "learning_rate": 4.970101690477265e-05, "loss": 0.7521, "step": 3405 }, { "epoch": 0.09951845906902086, "grad_norm": 0.9532633375357952, "learning_rate": 4.9699250161548025e-05, "loss": 0.6653, "step": 3410 }, { "epoch": 0.09966438056325697, "grad_norm": 1.1128782232322667, "learning_rate": 4.969747824881339e-05, "loss": 0.7332, "step": 3415 }, { "epoch": 0.09981030205749307, "grad_norm": 1.2277887767057005, "learning_rate": 4.969570116698136e-05, "loss": 0.7424, "step": 3420 }, { "epoch": 0.09995622355172917, "grad_norm": 1.3449492003729393, "learning_rate": 4.969391891646577e-05, "loss": 0.688, "step": 3425 }, { "epoch": 0.10010214504596528, "grad_norm": 1.0547876220999304, "learning_rate": 4.969213149768165e-05, "loss": 0.7012, "step": 3430 }, { "epoch": 0.10024806654020137, "grad_norm": 1.1135509924007378, "learning_rate": 4.9690338911045224e-05, "loss": 0.6858, "step": 3435 }, { "epoch": 0.10039398803443747, "grad_norm": 1.2930177751139922, "learning_rate": 4.968854115697395e-05, "loss": 0.7177, "step": 3440 }, { "epoch": 0.10053990952867357, "grad_norm": 0.9801091259944152, "learning_rate": 4.968673823588644e-05, "loss": 0.68, "step": 3445 }, { "epoch": 0.10068583102290968, "grad_norm": 1.3230915844116853, "learning_rate": 4.968493014820257e-05, "loss": 0.6961, "step": 3450 }, { "epoch": 0.10083175251714578, "grad_norm": 1.121684088453338, "learning_rate": 4.9683116894343365e-05, "loss": 0.6681, "step": 3455 }, { "epoch": 0.10097767401138187, "grad_norm": 1.3146857840377224, "learning_rate": 4.968129847473109e-05, "loss": 0.6483, "step": 3460 }, { "epoch": 0.10112359550561797, "grad_norm": 1.1950769138034345, "learning_rate": 4.967947488978918e-05, "loss": 0.6859, "step": 3465 }, { "epoch": 0.10126951699985408, "grad_norm": 2.6954405873542187, "learning_rate": 4.967764613994231e-05, "loss": 0.7587, "step": 3470 }, { "epoch": 0.10141543849409018, "grad_norm": 1.1494002331768778, "learning_rate": 4.9675812225616315e-05, "loss": 0.7168, "step": 3475 }, { "epoch": 0.10156135998832629, "grad_norm": 1.1387928894097017, "learning_rate": 4.967397314723827e-05, "loss": 0.7162, "step": 3480 }, { "epoch": 0.10170728148256238, "grad_norm": 1.0665249328499524, "learning_rate": 4.967212890523645e-05, "loss": 0.6751, "step": 3485 }, { "epoch": 0.10185320297679848, "grad_norm": 1.3485398869041203, "learning_rate": 4.9670279500040304e-05, "loss": 0.7123, "step": 3490 }, { "epoch": 0.10199912447103458, "grad_norm": 1.3247771365679535, "learning_rate": 4.9668424932080496e-05, "loss": 0.7611, "step": 3495 }, { "epoch": 0.10214504596527069, "grad_norm": 1.3875140355929463, "learning_rate": 4.9666565201788924e-05, "loss": 0.6743, "step": 3500 }, { "epoch": 0.10229096745950679, "grad_norm": 1.118033850514331, "learning_rate": 4.966470030959863e-05, "loss": 0.6804, "step": 3505 }, { "epoch": 0.10243688895374288, "grad_norm": 1.1170995094232437, "learning_rate": 4.966283025594391e-05, "loss": 0.6768, "step": 3510 }, { "epoch": 0.10258281044797898, "grad_norm": 1.1493628430712297, "learning_rate": 4.9660955041260237e-05, "loss": 0.6983, "step": 3515 }, { "epoch": 0.10272873194221509, "grad_norm": 1.4000711593961446, "learning_rate": 4.9659074665984286e-05, "loss": 0.6511, "step": 3520 }, { "epoch": 0.10287465343645119, "grad_norm": 1.3070196294106666, "learning_rate": 4.965718913055393e-05, "loss": 0.7775, "step": 3525 }, { "epoch": 0.1030205749306873, "grad_norm": 1.2717808332885354, "learning_rate": 4.965529843540827e-05, "loss": 0.7129, "step": 3530 }, { "epoch": 0.10316649642492338, "grad_norm": 1.1457671612092146, "learning_rate": 4.9653402580987576e-05, "loss": 0.72, "step": 3535 }, { "epoch": 0.10331241791915949, "grad_norm": 1.2061272507395866, "learning_rate": 4.965150156773333e-05, "loss": 0.6714, "step": 3540 }, { "epoch": 0.10345833941339559, "grad_norm": 1.309567326916106, "learning_rate": 4.9649595396088225e-05, "loss": 0.6464, "step": 3545 }, { "epoch": 0.1036042609076317, "grad_norm": 1.1702096394824586, "learning_rate": 4.964768406649616e-05, "loss": 0.7203, "step": 3550 }, { "epoch": 0.1037501824018678, "grad_norm": 1.2502357271888245, "learning_rate": 4.9645767579402196e-05, "loss": 0.6603, "step": 3555 }, { "epoch": 0.1038961038961039, "grad_norm": 1.151807755146837, "learning_rate": 4.9643845935252645e-05, "loss": 0.717, "step": 3560 }, { "epoch": 0.10404202539033999, "grad_norm": 1.1128499028564218, "learning_rate": 4.964191913449499e-05, "loss": 0.7244, "step": 3565 }, { "epoch": 0.1041879468845761, "grad_norm": 1.0151509912176624, "learning_rate": 4.963998717757793e-05, "loss": 0.7432, "step": 3570 }, { "epoch": 0.1043338683788122, "grad_norm": 1.1681965560359133, "learning_rate": 4.9638050064951345e-05, "loss": 0.6638, "step": 3575 }, { "epoch": 0.1044797898730483, "grad_norm": 1.0094258276922727, "learning_rate": 4.963610779706634e-05, "loss": 0.6754, "step": 3580 }, { "epoch": 0.10462571136728441, "grad_norm": 1.0151724027887894, "learning_rate": 4.9634160374375204e-05, "loss": 0.6727, "step": 3585 }, { "epoch": 0.1047716328615205, "grad_norm": 1.305935248131152, "learning_rate": 4.963220779733142e-05, "loss": 0.7117, "step": 3590 }, { "epoch": 0.1049175543557566, "grad_norm": 1.0699537996460562, "learning_rate": 4.963025006638969e-05, "loss": 0.6609, "step": 3595 }, { "epoch": 0.1050634758499927, "grad_norm": 1.1105590591702637, "learning_rate": 4.962828718200592e-05, "loss": 0.6672, "step": 3600 }, { "epoch": 0.10520939734422881, "grad_norm": 1.1904371044392517, "learning_rate": 4.962631914463719e-05, "loss": 0.7133, "step": 3605 }, { "epoch": 0.10535531883846491, "grad_norm": 1.152210070696163, "learning_rate": 4.96243459547418e-05, "loss": 0.715, "step": 3610 }, { "epoch": 0.105501240332701, "grad_norm": 1.137920585360193, "learning_rate": 4.962236761277924e-05, "loss": 0.7249, "step": 3615 }, { "epoch": 0.1056471618269371, "grad_norm": 1.0733114833174349, "learning_rate": 4.9620384119210204e-05, "loss": 0.6783, "step": 3620 }, { "epoch": 0.10579308332117321, "grad_norm": 1.2624089700840981, "learning_rate": 4.961839547449659e-05, "loss": 0.7054, "step": 3625 }, { "epoch": 0.10593900481540931, "grad_norm": 1.1024741720966582, "learning_rate": 4.961640167910149e-05, "loss": 0.6817, "step": 3630 }, { "epoch": 0.10608492630964542, "grad_norm": 1.055684212905771, "learning_rate": 4.961440273348919e-05, "loss": 0.7008, "step": 3635 }, { "epoch": 0.1062308478038815, "grad_norm": 1.1231448411310259, "learning_rate": 4.961239863812519e-05, "loss": 0.7423, "step": 3640 }, { "epoch": 0.10637676929811761, "grad_norm": 1.263122923150071, "learning_rate": 4.9610389393476184e-05, "loss": 0.653, "step": 3645 }, { "epoch": 0.10652269079235371, "grad_norm": 1.1323829386152715, "learning_rate": 4.960837500001005e-05, "loss": 0.6384, "step": 3650 }, { "epoch": 0.10666861228658982, "grad_norm": 1.1650779744183986, "learning_rate": 4.9606355458195876e-05, "loss": 0.6542, "step": 3655 }, { "epoch": 0.10681453378082592, "grad_norm": 1.234097765982052, "learning_rate": 4.9604330768503964e-05, "loss": 0.6946, "step": 3660 }, { "epoch": 0.10696045527506201, "grad_norm": 1.2408927795713185, "learning_rate": 4.9602300931405795e-05, "loss": 0.6617, "step": 3665 }, { "epoch": 0.10710637676929811, "grad_norm": 1.0662935084016967, "learning_rate": 4.9600265947374053e-05, "loss": 0.6373, "step": 3670 }, { "epoch": 0.10725229826353422, "grad_norm": 1.1469927538453333, "learning_rate": 4.9598225816882624e-05, "loss": 0.6683, "step": 3675 }, { "epoch": 0.10739821975777032, "grad_norm": 1.186915343133445, "learning_rate": 4.959618054040659e-05, "loss": 0.7256, "step": 3680 }, { "epoch": 0.10754414125200643, "grad_norm": 1.000177644978394, "learning_rate": 4.959413011842223e-05, "loss": 0.6498, "step": 3685 }, { "epoch": 0.10769006274624252, "grad_norm": 1.1539678091223273, "learning_rate": 4.9592074551407015e-05, "loss": 0.6186, "step": 3690 }, { "epoch": 0.10783598424047862, "grad_norm": 1.2031960672132378, "learning_rate": 4.959001383983964e-05, "loss": 0.6727, "step": 3695 }, { "epoch": 0.10798190573471472, "grad_norm": 1.1203093307457614, "learning_rate": 4.9587947984199974e-05, "loss": 0.7084, "step": 3700 }, { "epoch": 0.10812782722895083, "grad_norm": 1.0930047803753644, "learning_rate": 4.958587698496908e-05, "loss": 0.6911, "step": 3705 }, { "epoch": 0.10827374872318693, "grad_norm": 1.0804426531539648, "learning_rate": 4.958380084262924e-05, "loss": 0.7264, "step": 3710 }, { "epoch": 0.10841967021742302, "grad_norm": 1.0327209110706148, "learning_rate": 4.958171955766392e-05, "loss": 0.6717, "step": 3715 }, { "epoch": 0.10856559171165912, "grad_norm": 1.1456593956829084, "learning_rate": 4.957963313055778e-05, "loss": 0.6963, "step": 3720 }, { "epoch": 0.10871151320589523, "grad_norm": 1.0654436206228783, "learning_rate": 4.95775415617967e-05, "loss": 0.6323, "step": 3725 }, { "epoch": 0.10885743470013133, "grad_norm": 1.139456632225083, "learning_rate": 4.9575444851867717e-05, "loss": 0.6703, "step": 3730 }, { "epoch": 0.10900335619436743, "grad_norm": 1.0315056259890136, "learning_rate": 4.9573343001259104e-05, "loss": 0.6562, "step": 3735 }, { "epoch": 0.10914927768860354, "grad_norm": 0.9934468425215857, "learning_rate": 4.957123601046032e-05, "loss": 0.7148, "step": 3740 }, { "epoch": 0.10929519918283963, "grad_norm": 1.1066247601584256, "learning_rate": 4.9569123879962e-05, "loss": 0.7387, "step": 3745 }, { "epoch": 0.10944112067707573, "grad_norm": 0.9969143972124256, "learning_rate": 4.9567006610256005e-05, "loss": 0.6504, "step": 3750 }, { "epoch": 0.10958704217131184, "grad_norm": 1.1041909453708623, "learning_rate": 4.956488420183539e-05, "loss": 0.6679, "step": 3755 }, { "epoch": 0.10973296366554794, "grad_norm": 1.2074690639340238, "learning_rate": 4.956275665519438e-05, "loss": 0.6779, "step": 3760 }, { "epoch": 0.10987888515978404, "grad_norm": 0.999561955003958, "learning_rate": 4.9560623970828416e-05, "loss": 0.6711, "step": 3765 }, { "epoch": 0.11002480665402013, "grad_norm": 1.0014789293999864, "learning_rate": 4.9558486149234146e-05, "loss": 0.6856, "step": 3770 }, { "epoch": 0.11017072814825624, "grad_norm": 1.0498638907167868, "learning_rate": 4.955634319090939e-05, "loss": 0.6988, "step": 3775 }, { "epoch": 0.11031664964249234, "grad_norm": 1.1802100008229806, "learning_rate": 4.9554195096353176e-05, "loss": 0.6323, "step": 3780 }, { "epoch": 0.11046257113672844, "grad_norm": 1.3345812142761189, "learning_rate": 4.9552041866065745e-05, "loss": 0.6842, "step": 3785 }, { "epoch": 0.11060849263096455, "grad_norm": 1.2694364016288235, "learning_rate": 4.9549883500548494e-05, "loss": 0.7214, "step": 3790 }, { "epoch": 0.11075441412520064, "grad_norm": 1.1593843434128928, "learning_rate": 4.9547720000304046e-05, "loss": 0.7009, "step": 3795 }, { "epoch": 0.11090033561943674, "grad_norm": 1.0358096270572228, "learning_rate": 4.954555136583622e-05, "loss": 0.6624, "step": 3800 }, { "epoch": 0.11104625711367284, "grad_norm": 1.4308647177244223, "learning_rate": 4.954337759765002e-05, "loss": 0.6679, "step": 3805 }, { "epoch": 0.11119217860790895, "grad_norm": 1.0952065711390835, "learning_rate": 4.9541198696251644e-05, "loss": 0.7018, "step": 3810 }, { "epoch": 0.11133810010214505, "grad_norm": 1.3218450322554676, "learning_rate": 4.9539014662148484e-05, "loss": 0.7201, "step": 3815 }, { "epoch": 0.11148402159638114, "grad_norm": 1.2463715817740397, "learning_rate": 4.9536825495849155e-05, "loss": 0.718, "step": 3820 }, { "epoch": 0.11162994309061725, "grad_norm": 1.102259866702118, "learning_rate": 4.9534631197863423e-05, "loss": 0.6718, "step": 3825 }, { "epoch": 0.11177586458485335, "grad_norm": 1.2021265414902833, "learning_rate": 4.9532431768702286e-05, "loss": 0.7545, "step": 3830 }, { "epoch": 0.11192178607908945, "grad_norm": 1.347624790502832, "learning_rate": 4.953022720887791e-05, "loss": 0.7185, "step": 3835 }, { "epoch": 0.11206770757332556, "grad_norm": 1.165874375525849, "learning_rate": 4.952801751890368e-05, "loss": 0.6511, "step": 3840 }, { "epoch": 0.11221362906756165, "grad_norm": 1.205242395696266, "learning_rate": 4.952580269929414e-05, "loss": 0.6618, "step": 3845 }, { "epoch": 0.11235955056179775, "grad_norm": 0.9887761301249266, "learning_rate": 4.952358275056509e-05, "loss": 0.6744, "step": 3850 }, { "epoch": 0.11250547205603385, "grad_norm": 1.1099126410011242, "learning_rate": 4.952135767323347e-05, "loss": 0.677, "step": 3855 }, { "epoch": 0.11265139355026996, "grad_norm": 1.1898642626689542, "learning_rate": 4.951912746781741e-05, "loss": 0.679, "step": 3860 }, { "epoch": 0.11279731504450606, "grad_norm": 1.0778655871867437, "learning_rate": 4.951689213483627e-05, "loss": 0.6526, "step": 3865 }, { "epoch": 0.11294323653874215, "grad_norm": 1.0782087896187873, "learning_rate": 4.9514651674810605e-05, "loss": 0.6702, "step": 3870 }, { "epoch": 0.11308915803297825, "grad_norm": 1.1134393749112423, "learning_rate": 4.9512406088262134e-05, "loss": 0.6703, "step": 3875 }, { "epoch": 0.11323507952721436, "grad_norm": 1.3534667536585756, "learning_rate": 4.9510155375713765e-05, "loss": 0.6475, "step": 3880 }, { "epoch": 0.11338100102145046, "grad_norm": 1.3521566330146864, "learning_rate": 4.9507899537689655e-05, "loss": 0.7709, "step": 3885 }, { "epoch": 0.11352692251568657, "grad_norm": 0.960034124184033, "learning_rate": 4.9505638574715094e-05, "loss": 0.6727, "step": 3890 }, { "epoch": 0.11367284400992266, "grad_norm": 1.1115533964068696, "learning_rate": 4.95033724873166e-05, "loss": 0.6992, "step": 3895 }, { "epoch": 0.11381876550415876, "grad_norm": 1.2702673940872604, "learning_rate": 4.950110127602186e-05, "loss": 0.6363, "step": 3900 }, { "epoch": 0.11396468699839486, "grad_norm": 1.1744532141248218, "learning_rate": 4.949882494135979e-05, "loss": 0.6128, "step": 3905 }, { "epoch": 0.11411060849263097, "grad_norm": 1.1534039386334998, "learning_rate": 4.9496543483860457e-05, "loss": 0.6734, "step": 3910 }, { "epoch": 0.11425652998686707, "grad_norm": 1.2909904316329845, "learning_rate": 4.9494256904055156e-05, "loss": 0.7373, "step": 3915 }, { "epoch": 0.11440245148110317, "grad_norm": 1.082559793202668, "learning_rate": 4.949196520247634e-05, "loss": 0.663, "step": 3920 }, { "epoch": 0.11454837297533926, "grad_norm": 1.037947317039563, "learning_rate": 4.948966837965769e-05, "loss": 0.676, "step": 3925 }, { "epoch": 0.11469429446957537, "grad_norm": 1.0357868161932815, "learning_rate": 4.948736643613406e-05, "loss": 0.6627, "step": 3930 }, { "epoch": 0.11484021596381147, "grad_norm": 1.1225526894162952, "learning_rate": 4.948505937244151e-05, "loss": 0.6769, "step": 3935 }, { "epoch": 0.11498613745804757, "grad_norm": 1.0133660858244744, "learning_rate": 4.948274718911727e-05, "loss": 0.7131, "step": 3940 }, { "epoch": 0.11513205895228368, "grad_norm": 1.0866598577665991, "learning_rate": 4.948042988669978e-05, "loss": 0.7543, "step": 3945 }, { "epoch": 0.11527798044651977, "grad_norm": 0.9942202007589459, "learning_rate": 4.947810746572867e-05, "loss": 0.7076, "step": 3950 }, { "epoch": 0.11542390194075587, "grad_norm": 1.0969714442260865, "learning_rate": 4.9475779926744745e-05, "loss": 0.6907, "step": 3955 }, { "epoch": 0.11556982343499198, "grad_norm": 1.3370496962122116, "learning_rate": 4.947344727029003e-05, "loss": 0.6899, "step": 3960 }, { "epoch": 0.11571574492922808, "grad_norm": 1.0718127570063432, "learning_rate": 4.947110949690773e-05, "loss": 0.6848, "step": 3965 }, { "epoch": 0.11586166642346418, "grad_norm": 1.0313075927975328, "learning_rate": 4.946876660714223e-05, "loss": 0.7205, "step": 3970 }, { "epoch": 0.11600758791770027, "grad_norm": 1.0133343716034173, "learning_rate": 4.946641860153912e-05, "loss": 0.6747, "step": 3975 }, { "epoch": 0.11615350941193638, "grad_norm": 1.0503887675078798, "learning_rate": 4.946406548064517e-05, "loss": 0.6747, "step": 3980 }, { "epoch": 0.11629943090617248, "grad_norm": 1.203700752827074, "learning_rate": 4.9461707245008366e-05, "loss": 0.7548, "step": 3985 }, { "epoch": 0.11644535240040858, "grad_norm": 1.0132059358440235, "learning_rate": 4.9459343895177846e-05, "loss": 0.6902, "step": 3990 }, { "epoch": 0.11659127389464469, "grad_norm": 1.3602021524431593, "learning_rate": 4.945697543170397e-05, "loss": 0.7233, "step": 3995 }, { "epoch": 0.11673719538888078, "grad_norm": 1.4124815233747543, "learning_rate": 4.9454601855138275e-05, "loss": 0.7342, "step": 4000 }, { "epoch": 0.11688311688311688, "grad_norm": 1.0415423509236525, "learning_rate": 4.9452223166033505e-05, "loss": 0.7283, "step": 4005 }, { "epoch": 0.11702903837735298, "grad_norm": 1.0177430690987501, "learning_rate": 4.9449839364943565e-05, "loss": 0.6869, "step": 4010 }, { "epoch": 0.11717495987158909, "grad_norm": 1.1911709596838373, "learning_rate": 4.944745045242359e-05, "loss": 0.7244, "step": 4015 }, { "epoch": 0.11732088136582519, "grad_norm": 1.5140239272691038, "learning_rate": 4.944505642902985e-05, "loss": 0.708, "step": 4020 }, { "epoch": 0.11746680286006128, "grad_norm": 1.245772020034725, "learning_rate": 4.944265729531987e-05, "loss": 0.7375, "step": 4025 }, { "epoch": 0.11761272435429738, "grad_norm": 1.0706146586105905, "learning_rate": 4.9440253051852314e-05, "loss": 0.6, "step": 4030 }, { "epoch": 0.11775864584853349, "grad_norm": 1.1510707015059383, "learning_rate": 4.943784369918707e-05, "loss": 0.7206, "step": 4035 }, { "epoch": 0.11790456734276959, "grad_norm": 1.2125701370710924, "learning_rate": 4.9435429237885175e-05, "loss": 0.6427, "step": 4040 }, { "epoch": 0.1180504888370057, "grad_norm": 1.0945514690903801, "learning_rate": 4.9433009668508906e-05, "loss": 0.7198, "step": 4045 }, { "epoch": 0.11819641033124179, "grad_norm": 0.9882674443895904, "learning_rate": 4.9430584991621705e-05, "loss": 0.6413, "step": 4050 }, { "epoch": 0.11834233182547789, "grad_norm": 1.175455587370083, "learning_rate": 4.9428155207788186e-05, "loss": 0.7123, "step": 4055 }, { "epoch": 0.118488253319714, "grad_norm": 1.126090153112986, "learning_rate": 4.942572031757418e-05, "loss": 0.6487, "step": 4060 }, { "epoch": 0.1186341748139501, "grad_norm": 1.1899483259446826, "learning_rate": 4.9423280321546694e-05, "loss": 0.6808, "step": 4065 }, { "epoch": 0.1187800963081862, "grad_norm": 1.2708256085640324, "learning_rate": 4.942083522027393e-05, "loss": 0.7331, "step": 4070 }, { "epoch": 0.11892601780242229, "grad_norm": 1.1621919290063742, "learning_rate": 4.941838501432528e-05, "loss": 0.706, "step": 4075 }, { "epoch": 0.1190719392966584, "grad_norm": 1.1698227735759303, "learning_rate": 4.9415929704271305e-05, "loss": 0.6504, "step": 4080 }, { "epoch": 0.1192178607908945, "grad_norm": 1.087854250147867, "learning_rate": 4.941346929068379e-05, "loss": 0.6949, "step": 4085 }, { "epoch": 0.1193637822851306, "grad_norm": 1.1253673663926613, "learning_rate": 4.9411003774135665e-05, "loss": 0.6163, "step": 4090 }, { "epoch": 0.1195097037793667, "grad_norm": 0.9752195636234635, "learning_rate": 4.940853315520108e-05, "loss": 0.6488, "step": 4095 }, { "epoch": 0.1196556252736028, "grad_norm": 1.130358705459771, "learning_rate": 4.940605743445538e-05, "loss": 0.6918, "step": 4100 }, { "epoch": 0.1198015467678389, "grad_norm": 1.044950772445061, "learning_rate": 4.940357661247507e-05, "loss": 0.6876, "step": 4105 }, { "epoch": 0.119947468262075, "grad_norm": 1.1423681046813763, "learning_rate": 4.9401090689837845e-05, "loss": 0.6599, "step": 4110 }, { "epoch": 0.1200933897563111, "grad_norm": 1.1078600357014607, "learning_rate": 4.939859966712262e-05, "loss": 0.7092, "step": 4115 }, { "epoch": 0.12023931125054721, "grad_norm": 1.0821247812326489, "learning_rate": 4.939610354490947e-05, "loss": 0.6713, "step": 4120 }, { "epoch": 0.12038523274478331, "grad_norm": 1.189240848690589, "learning_rate": 4.939360232377965e-05, "loss": 0.7582, "step": 4125 }, { "epoch": 0.1205311542390194, "grad_norm": 1.102766516656728, "learning_rate": 4.939109600431564e-05, "loss": 0.7039, "step": 4130 }, { "epoch": 0.1206770757332555, "grad_norm": 1.2338185259530063, "learning_rate": 4.938858458710105e-05, "loss": 0.6249, "step": 4135 }, { "epoch": 0.12082299722749161, "grad_norm": 1.3950450279512345, "learning_rate": 4.938606807272075e-05, "loss": 0.7288, "step": 4140 }, { "epoch": 0.12096891872172771, "grad_norm": 1.5536625376003828, "learning_rate": 4.938354646176072e-05, "loss": 0.6954, "step": 4145 }, { "epoch": 0.12111484021596382, "grad_norm": 1.1243355087911477, "learning_rate": 4.9381019754808195e-05, "loss": 0.6975, "step": 4150 }, { "epoch": 0.12126076171019991, "grad_norm": 1.1423456295198111, "learning_rate": 4.9378487952451543e-05, "loss": 0.7113, "step": 4155 }, { "epoch": 0.12140668320443601, "grad_norm": 1.020352268854177, "learning_rate": 4.9375951055280354e-05, "loss": 0.7548, "step": 4160 }, { "epoch": 0.12155260469867211, "grad_norm": 1.0902738623073274, "learning_rate": 4.937340906388538e-05, "loss": 0.6624, "step": 4165 }, { "epoch": 0.12169852619290822, "grad_norm": 1.2323026010364944, "learning_rate": 4.93708619788586e-05, "loss": 0.705, "step": 4170 }, { "epoch": 0.12184444768714432, "grad_norm": 1.0858094879022224, "learning_rate": 4.936830980079312e-05, "loss": 0.6603, "step": 4175 }, { "epoch": 0.12199036918138041, "grad_norm": 1.4823841077723303, "learning_rate": 4.9365752530283265e-05, "loss": 0.718, "step": 4180 }, { "epoch": 0.12213629067561652, "grad_norm": 1.1247872681375826, "learning_rate": 4.936319016792456e-05, "loss": 0.702, "step": 4185 }, { "epoch": 0.12228221216985262, "grad_norm": 0.947658429483614, "learning_rate": 4.936062271431369e-05, "loss": 0.6895, "step": 4190 }, { "epoch": 0.12242813366408872, "grad_norm": 1.0737204863217322, "learning_rate": 4.935805017004853e-05, "loss": 0.6789, "step": 4195 }, { "epoch": 0.12257405515832483, "grad_norm": 0.9740477450508347, "learning_rate": 4.935547253572815e-05, "loss": 0.6973, "step": 4200 }, { "epoch": 0.12271997665256092, "grad_norm": 1.3669559040356531, "learning_rate": 4.93528898119528e-05, "loss": 0.7341, "step": 4205 }, { "epoch": 0.12286589814679702, "grad_norm": 1.1197413087704782, "learning_rate": 4.935030199932392e-05, "loss": 0.6459, "step": 4210 }, { "epoch": 0.12301181964103312, "grad_norm": 1.1820391059611846, "learning_rate": 4.9347709098444114e-05, "loss": 0.7717, "step": 4215 }, { "epoch": 0.12315774113526923, "grad_norm": 0.8971069426873689, "learning_rate": 4.934511110991721e-05, "loss": 0.6174, "step": 4220 }, { "epoch": 0.12330366262950533, "grad_norm": 1.0219446113715438, "learning_rate": 4.934250803434818e-05, "loss": 0.6937, "step": 4225 }, { "epoch": 0.12344958412374142, "grad_norm": 1.4596743787177244, "learning_rate": 4.933989987234321e-05, "loss": 0.7195, "step": 4230 }, { "epoch": 0.12359550561797752, "grad_norm": 1.008207225955857, "learning_rate": 4.9337286624509655e-05, "loss": 0.6482, "step": 4235 }, { "epoch": 0.12374142711221363, "grad_norm": 1.1801377144179224, "learning_rate": 4.9334668291456054e-05, "loss": 0.711, "step": 4240 }, { "epoch": 0.12388734860644973, "grad_norm": 1.257243421328094, "learning_rate": 4.933204487379214e-05, "loss": 0.7059, "step": 4245 }, { "epoch": 0.12403327010068584, "grad_norm": 1.1182641851236583, "learning_rate": 4.932941637212883e-05, "loss": 0.7219, "step": 4250 }, { "epoch": 0.12417919159492193, "grad_norm": 1.1027275067917797, "learning_rate": 4.9326782787078215e-05, "loss": 0.6704, "step": 4255 }, { "epoch": 0.12432511308915803, "grad_norm": 1.2503708162830354, "learning_rate": 4.932414411925356e-05, "loss": 0.715, "step": 4260 }, { "epoch": 0.12447103458339413, "grad_norm": 1.0708093798217997, "learning_rate": 4.9321500369269345e-05, "loss": 0.7156, "step": 4265 }, { "epoch": 0.12461695607763024, "grad_norm": 1.0249800173155617, "learning_rate": 4.9318851537741224e-05, "loss": 0.6608, "step": 4270 }, { "epoch": 0.12476287757186634, "grad_norm": 1.2562160401716829, "learning_rate": 4.931619762528601e-05, "loss": 0.6431, "step": 4275 }, { "epoch": 0.12490879906610243, "grad_norm": 1.0807148903111947, "learning_rate": 4.931353863252172e-05, "loss": 0.7285, "step": 4280 }, { "epoch": 0.12505472056033853, "grad_norm": 1.1314341038735056, "learning_rate": 4.9310874560067554e-05, "loss": 0.6278, "step": 4285 }, { "epoch": 0.12520064205457465, "grad_norm": 0.9660677941704441, "learning_rate": 4.930820540854389e-05, "loss": 0.6952, "step": 4290 }, { "epoch": 0.12534656354881074, "grad_norm": 1.325618375733907, "learning_rate": 4.930553117857228e-05, "loss": 0.7188, "step": 4295 }, { "epoch": 0.12549248504304683, "grad_norm": 1.0740669808255412, "learning_rate": 4.930285187077549e-05, "loss": 0.7184, "step": 4300 }, { "epoch": 0.12563840653728295, "grad_norm": 1.0665592702074642, "learning_rate": 4.930016748577743e-05, "loss": 0.7001, "step": 4305 }, { "epoch": 0.12578432803151904, "grad_norm": 0.9641585920422673, "learning_rate": 4.929747802420321e-05, "loss": 0.6355, "step": 4310 }, { "epoch": 0.12593024952575516, "grad_norm": 1.1397503515141967, "learning_rate": 4.929478348667913e-05, "loss": 0.665, "step": 4315 }, { "epoch": 0.12607617101999125, "grad_norm": 1.0569617435941585, "learning_rate": 4.929208387383265e-05, "loss": 0.6023, "step": 4320 }, { "epoch": 0.12622209251422734, "grad_norm": 1.1146521228633575, "learning_rate": 4.9289379186292434e-05, "loss": 0.7216, "step": 4325 }, { "epoch": 0.12636801400846345, "grad_norm": 1.1676304605724792, "learning_rate": 4.9286669424688326e-05, "loss": 0.6546, "step": 4330 }, { "epoch": 0.12651393550269954, "grad_norm": 1.0647112575028943, "learning_rate": 4.928395458965134e-05, "loss": 0.68, "step": 4335 }, { "epoch": 0.12665985699693566, "grad_norm": 1.1318939328332238, "learning_rate": 4.928123468181366e-05, "loss": 0.6718, "step": 4340 }, { "epoch": 0.12680577849117175, "grad_norm": 1.137918290181405, "learning_rate": 4.927850970180869e-05, "loss": 0.6484, "step": 4345 }, { "epoch": 0.12695169998540784, "grad_norm": 1.106880749643652, "learning_rate": 4.927577965027097e-05, "loss": 0.6762, "step": 4350 }, { "epoch": 0.12709762147964396, "grad_norm": 1.255135741197893, "learning_rate": 4.927304452783627e-05, "loss": 0.7049, "step": 4355 }, { "epoch": 0.12724354297388005, "grad_norm": 0.9412812453220591, "learning_rate": 4.9270304335141494e-05, "loss": 0.7122, "step": 4360 }, { "epoch": 0.12738946446811616, "grad_norm": 1.0080959791537925, "learning_rate": 4.926755907282475e-05, "loss": 0.6345, "step": 4365 }, { "epoch": 0.12753538596235225, "grad_norm": 1.1128014160594106, "learning_rate": 4.926480874152534e-05, "loss": 0.6976, "step": 4370 }, { "epoch": 0.12768130745658834, "grad_norm": 1.3479512069390571, "learning_rate": 4.9262053341883716e-05, "loss": 0.7235, "step": 4375 }, { "epoch": 0.12782722895082446, "grad_norm": 1.4031641810510902, "learning_rate": 4.925929287454151e-05, "loss": 0.7379, "step": 4380 }, { "epoch": 0.12797315044506055, "grad_norm": 1.1046431647470536, "learning_rate": 4.9256527340141576e-05, "loss": 0.7056, "step": 4385 }, { "epoch": 0.12811907193929667, "grad_norm": 0.9880921574790832, "learning_rate": 4.92537567393279e-05, "loss": 0.7059, "step": 4390 }, { "epoch": 0.12826499343353276, "grad_norm": 1.0059221521204251, "learning_rate": 4.925098107274569e-05, "loss": 0.6627, "step": 4395 }, { "epoch": 0.12841091492776885, "grad_norm": 1.230884368448577, "learning_rate": 4.924820034104129e-05, "loss": 0.746, "step": 4400 }, { "epoch": 0.12855683642200497, "grad_norm": 1.1311689317839877, "learning_rate": 4.924541454486226e-05, "loss": 0.6738, "step": 4405 }, { "epoch": 0.12870275791624106, "grad_norm": 1.081562653473379, "learning_rate": 4.92426236848573e-05, "loss": 0.7304, "step": 4410 }, { "epoch": 0.12884867941047717, "grad_norm": 1.108022164741601, "learning_rate": 4.923982776167634e-05, "loss": 0.6601, "step": 4415 }, { "epoch": 0.12899460090471326, "grad_norm": 1.1913322601354124, "learning_rate": 4.923702677597047e-05, "loss": 0.6678, "step": 4420 }, { "epoch": 0.12914052239894935, "grad_norm": 1.180137582174622, "learning_rate": 4.923422072839192e-05, "loss": 0.7028, "step": 4425 }, { "epoch": 0.12928644389318547, "grad_norm": 1.062312363516143, "learning_rate": 4.9231409619594154e-05, "loss": 0.7371, "step": 4430 }, { "epoch": 0.12943236538742156, "grad_norm": 1.094084924334837, "learning_rate": 4.9228593450231783e-05, "loss": 0.6962, "step": 4435 }, { "epoch": 0.12957828688165768, "grad_norm": 1.1019862792373496, "learning_rate": 4.9225772220960615e-05, "loss": 0.6869, "step": 4440 }, { "epoch": 0.12972420837589377, "grad_norm": 1.11205304567185, "learning_rate": 4.92229459324376e-05, "loss": 0.6253, "step": 4445 }, { "epoch": 0.12987012987012986, "grad_norm": 1.1615422757029696, "learning_rate": 4.922011458532093e-05, "loss": 0.7022, "step": 4450 }, { "epoch": 0.13001605136436598, "grad_norm": 1.2643020115074577, "learning_rate": 4.9217278180269896e-05, "loss": 0.7211, "step": 4455 }, { "epoch": 0.13016197285860207, "grad_norm": 1.0660846661940044, "learning_rate": 4.921443671794504e-05, "loss": 0.7106, "step": 4460 }, { "epoch": 0.13030789435283818, "grad_norm": 1.015264723691173, "learning_rate": 4.9211590199008034e-05, "loss": 0.6745, "step": 4465 }, { "epoch": 0.13045381584707427, "grad_norm": 1.2598946183709452, "learning_rate": 4.9208738624121746e-05, "loss": 0.714, "step": 4470 }, { "epoch": 0.13059973734131036, "grad_norm": 1.1393077380814258, "learning_rate": 4.9205881993950226e-05, "loss": 0.6199, "step": 4475 }, { "epoch": 0.13074565883554648, "grad_norm": 1.2068626428688078, "learning_rate": 4.920302030915868e-05, "loss": 0.7118, "step": 4480 }, { "epoch": 0.13089158032978257, "grad_norm": 1.1276464526411303, "learning_rate": 4.920015357041352e-05, "loss": 0.6904, "step": 4485 }, { "epoch": 0.1310375018240187, "grad_norm": 1.0378150446365662, "learning_rate": 4.9197281778382294e-05, "loss": 0.6299, "step": 4490 }, { "epoch": 0.13118342331825478, "grad_norm": 1.1522706788153476, "learning_rate": 4.9194404933733785e-05, "loss": 0.6915, "step": 4495 }, { "epoch": 0.13132934481249087, "grad_norm": 1.5649634900383422, "learning_rate": 4.9191523037137896e-05, "loss": 0.7416, "step": 4500 }, { "epoch": 0.13147526630672698, "grad_norm": 0.9639275009163263, "learning_rate": 4.918863608926575e-05, "loss": 0.658, "step": 4505 }, { "epoch": 0.13162118780096307, "grad_norm": 1.0803790708853265, "learning_rate": 4.918574409078961e-05, "loss": 0.7385, "step": 4510 }, { "epoch": 0.1317671092951992, "grad_norm": 1.0431205640469672, "learning_rate": 4.9182847042382935e-05, "loss": 0.6494, "step": 4515 }, { "epoch": 0.13191303078943528, "grad_norm": 1.2574540054935648, "learning_rate": 4.917994494472036e-05, "loss": 0.7031, "step": 4520 }, { "epoch": 0.13205895228367137, "grad_norm": 1.2716559360632516, "learning_rate": 4.91770377984777e-05, "loss": 0.7143, "step": 4525 }, { "epoch": 0.1322048737779075, "grad_norm": 1.2012888935894919, "learning_rate": 4.917412560433192e-05, "loss": 0.7083, "step": 4530 }, { "epoch": 0.13235079527214358, "grad_norm": 1.1803619899103541, "learning_rate": 4.9171208362961204e-05, "loss": 0.699, "step": 4535 }, { "epoch": 0.1324967167663797, "grad_norm": 1.0113819215671593, "learning_rate": 4.916828607504486e-05, "loss": 0.6507, "step": 4540 }, { "epoch": 0.13264263826061579, "grad_norm": 1.24736339433036, "learning_rate": 4.916535874126341e-05, "loss": 0.6628, "step": 4545 }, { "epoch": 0.13278855975485188, "grad_norm": 1.3041346693606257, "learning_rate": 4.9162426362298536e-05, "loss": 0.6616, "step": 4550 }, { "epoch": 0.132934481249088, "grad_norm": 0.9813318283956239, "learning_rate": 4.91594889388331e-05, "loss": 0.6752, "step": 4555 }, { "epoch": 0.13308040274332408, "grad_norm": 1.2641451451617216, "learning_rate": 4.915654647155114e-05, "loss": 0.6096, "step": 4560 }, { "epoch": 0.1332263242375602, "grad_norm": 3.9537948455186704, "learning_rate": 4.915359896113785e-05, "loss": 0.5926, "step": 4565 }, { "epoch": 0.1333722457317963, "grad_norm": 1.1490951671689527, "learning_rate": 4.9150646408279634e-05, "loss": 0.6715, "step": 4570 }, { "epoch": 0.1335181672260324, "grad_norm": 1.5253736779489158, "learning_rate": 4.914768881366403e-05, "loss": 0.7057, "step": 4575 }, { "epoch": 0.1336640887202685, "grad_norm": 0.9836469966393219, "learning_rate": 4.9144726177979764e-05, "loss": 0.6935, "step": 4580 }, { "epoch": 0.1338100102145046, "grad_norm": 1.2595981973617496, "learning_rate": 4.914175850191677e-05, "loss": 0.6646, "step": 4585 }, { "epoch": 0.1339559317087407, "grad_norm": 1.0990961231019638, "learning_rate": 4.9138785786166105e-05, "loss": 0.7039, "step": 4590 }, { "epoch": 0.1341018532029768, "grad_norm": 1.066233568866453, "learning_rate": 4.913580803142002e-05, "loss": 0.7323, "step": 4595 }, { "epoch": 0.1342477746972129, "grad_norm": 1.021845294965165, "learning_rate": 4.913282523837195e-05, "loss": 0.7186, "step": 4600 }, { "epoch": 0.134393696191449, "grad_norm": 1.0648379183111054, "learning_rate": 4.912983740771649e-05, "loss": 0.6623, "step": 4605 }, { "epoch": 0.1345396176856851, "grad_norm": 1.119953961319831, "learning_rate": 4.912684454014942e-05, "loss": 0.6414, "step": 4610 }, { "epoch": 0.1346855391799212, "grad_norm": 1.070167813054844, "learning_rate": 4.912384663636768e-05, "loss": 0.6872, "step": 4615 }, { "epoch": 0.1348314606741573, "grad_norm": 0.9937951025153453, "learning_rate": 4.9120843697069366e-05, "loss": 0.6578, "step": 4620 }, { "epoch": 0.13497738216839342, "grad_norm": 1.150098588548831, "learning_rate": 4.9117835722953817e-05, "loss": 0.6992, "step": 4625 }, { "epoch": 0.1351233036626295, "grad_norm": 0.9620579969205204, "learning_rate": 4.911482271472145e-05, "loss": 0.6356, "step": 4630 }, { "epoch": 0.1352692251568656, "grad_norm": 0.9772741486828223, "learning_rate": 4.911180467307392e-05, "loss": 0.6842, "step": 4635 }, { "epoch": 0.13541514665110171, "grad_norm": 1.0244177126704055, "learning_rate": 4.910878159871403e-05, "loss": 0.6888, "step": 4640 }, { "epoch": 0.1355610681453378, "grad_norm": 1.032799435304836, "learning_rate": 4.9105753492345766e-05, "loss": 0.7135, "step": 4645 }, { "epoch": 0.13570698963957392, "grad_norm": 1.070007781509982, "learning_rate": 4.910272035467427e-05, "loss": 0.7026, "step": 4650 }, { "epoch": 0.13585291113381, "grad_norm": 1.271958715442886, "learning_rate": 4.909968218640588e-05, "loss": 0.6879, "step": 4655 }, { "epoch": 0.1359988326280461, "grad_norm": 0.9339405685144769, "learning_rate": 4.909663898824807e-05, "loss": 0.6276, "step": 4660 }, { "epoch": 0.13614475412228222, "grad_norm": 1.0887017892321462, "learning_rate": 4.909359076090952e-05, "loss": 0.7068, "step": 4665 }, { "epoch": 0.1362906756165183, "grad_norm": 1.1379308712176395, "learning_rate": 4.909053750510005e-05, "loss": 0.6944, "step": 4670 }, { "epoch": 0.13643659711075443, "grad_norm": 1.1635600427634953, "learning_rate": 4.9087479221530696e-05, "loss": 0.7056, "step": 4675 }, { "epoch": 0.13658251860499052, "grad_norm": 1.1933753832724734, "learning_rate": 4.908441591091362e-05, "loss": 0.6832, "step": 4680 }, { "epoch": 0.1367284400992266, "grad_norm": 1.065601768601014, "learning_rate": 4.908134757396216e-05, "loss": 0.6863, "step": 4685 }, { "epoch": 0.13687436159346272, "grad_norm": 1.3104649809246165, "learning_rate": 4.907827421139085e-05, "loss": 0.7004, "step": 4690 }, { "epoch": 0.1370202830876988, "grad_norm": 1.2607542895960762, "learning_rate": 4.907519582391538e-05, "loss": 0.6578, "step": 4695 }, { "epoch": 0.13716620458193493, "grad_norm": 1.3972807316000297, "learning_rate": 4.90721124122526e-05, "loss": 0.7043, "step": 4700 }, { "epoch": 0.13731212607617102, "grad_norm": 1.345064497649985, "learning_rate": 4.9069023977120555e-05, "loss": 0.665, "step": 4705 }, { "epoch": 0.1374580475704071, "grad_norm": 1.168170781570877, "learning_rate": 4.906593051923843e-05, "loss": 0.6474, "step": 4710 }, { "epoch": 0.13760396906464323, "grad_norm": 1.2861586385641843, "learning_rate": 4.90628320393266e-05, "loss": 0.6904, "step": 4715 }, { "epoch": 0.13774989055887932, "grad_norm": 1.128370005126937, "learning_rate": 4.9059728538106605e-05, "loss": 0.65, "step": 4720 }, { "epoch": 0.13789581205311544, "grad_norm": 1.196672964789622, "learning_rate": 4.905662001630116e-05, "loss": 0.7126, "step": 4725 }, { "epoch": 0.13804173354735153, "grad_norm": 1.2021832701329431, "learning_rate": 4.905350647463412e-05, "loss": 0.6709, "step": 4730 }, { "epoch": 0.13818765504158761, "grad_norm": 0.9824054962661549, "learning_rate": 4.905038791383056e-05, "loss": 0.6711, "step": 4735 }, { "epoch": 0.13833357653582373, "grad_norm": 0.9710205271656333, "learning_rate": 4.904726433461668e-05, "loss": 0.6784, "step": 4740 }, { "epoch": 0.13847949803005982, "grad_norm": 1.3081974111966406, "learning_rate": 4.904413573771987e-05, "loss": 0.6411, "step": 4745 }, { "epoch": 0.13862541952429594, "grad_norm": 1.2014268435934012, "learning_rate": 4.904100212386867e-05, "loss": 0.6922, "step": 4750 }, { "epoch": 0.13877134101853203, "grad_norm": 1.149626759325989, "learning_rate": 4.9037863493792804e-05, "loss": 0.6724, "step": 4755 }, { "epoch": 0.13891726251276812, "grad_norm": 1.0969383172941878, "learning_rate": 4.9034719848223176e-05, "loss": 0.5979, "step": 4760 }, { "epoch": 0.13906318400700424, "grad_norm": 1.3775594643994746, "learning_rate": 4.9031571187891824e-05, "loss": 0.7088, "step": 4765 }, { "epoch": 0.13920910550124033, "grad_norm": 1.0982138184363692, "learning_rate": 4.902841751353198e-05, "loss": 0.586, "step": 4770 }, { "epoch": 0.13935502699547644, "grad_norm": 1.287348970584067, "learning_rate": 4.902525882587804e-05, "loss": 0.6989, "step": 4775 }, { "epoch": 0.13950094848971253, "grad_norm": 1.158895061664004, "learning_rate": 4.9022095125665556e-05, "loss": 0.76, "step": 4780 }, { "epoch": 0.13964686998394862, "grad_norm": 1.0175963904056657, "learning_rate": 4.9018926413631264e-05, "loss": 0.671, "step": 4785 }, { "epoch": 0.13979279147818474, "grad_norm": 1.0738061674913413, "learning_rate": 4.901575269051304e-05, "loss": 0.6443, "step": 4790 }, { "epoch": 0.13993871297242083, "grad_norm": 1.2007880769927555, "learning_rate": 4.901257395704996e-05, "loss": 0.6912, "step": 4795 }, { "epoch": 0.14008463446665695, "grad_norm": 1.1057567414366558, "learning_rate": 4.900939021398225e-05, "loss": 0.6735, "step": 4800 }, { "epoch": 0.14023055596089304, "grad_norm": 0.9618347954465882, "learning_rate": 4.9006201462051304e-05, "loss": 0.6853, "step": 4805 }, { "epoch": 0.14037647745512913, "grad_norm": 1.146186579488446, "learning_rate": 4.900300770199968e-05, "loss": 0.6854, "step": 4810 }, { "epoch": 0.14052239894936525, "grad_norm": 1.2796031039217017, "learning_rate": 4.8999808934571105e-05, "loss": 0.5789, "step": 4815 }, { "epoch": 0.14066832044360134, "grad_norm": 0.9250417270724687, "learning_rate": 4.899660516051047e-05, "loss": 0.7052, "step": 4820 }, { "epoch": 0.14081424193783745, "grad_norm": 1.1235962312884695, "learning_rate": 4.899339638056383e-05, "loss": 0.7218, "step": 4825 }, { "epoch": 0.14096016343207354, "grad_norm": 1.3728019715463347, "learning_rate": 4.899018259547842e-05, "loss": 0.6882, "step": 4830 }, { "epoch": 0.14110608492630963, "grad_norm": 1.2141369863368308, "learning_rate": 4.898696380600263e-05, "loss": 0.6143, "step": 4835 }, { "epoch": 0.14125200642054575, "grad_norm": 1.0633781600946668, "learning_rate": 4.8983740012886006e-05, "loss": 0.6758, "step": 4840 }, { "epoch": 0.14139792791478184, "grad_norm": 1.1798611618248072, "learning_rate": 4.898051121687927e-05, "loss": 0.6439, "step": 4845 }, { "epoch": 0.14154384940901796, "grad_norm": 1.182610569361954, "learning_rate": 4.897727741873431e-05, "loss": 0.5814, "step": 4850 }, { "epoch": 0.14168977090325405, "grad_norm": 1.1453313508660983, "learning_rate": 4.897403861920417e-05, "loss": 0.6654, "step": 4855 }, { "epoch": 0.14183569239749014, "grad_norm": 1.2003229203678043, "learning_rate": 4.897079481904308e-05, "loss": 0.7414, "step": 4860 }, { "epoch": 0.14198161389172625, "grad_norm": 0.9859090190617696, "learning_rate": 4.896754601900641e-05, "loss": 0.6386, "step": 4865 }, { "epoch": 0.14212753538596234, "grad_norm": 1.2551714302410668, "learning_rate": 4.89642922198507e-05, "loss": 0.682, "step": 4870 }, { "epoch": 0.14227345688019846, "grad_norm": 1.0549960492077843, "learning_rate": 4.896103342233366e-05, "loss": 0.6495, "step": 4875 }, { "epoch": 0.14241937837443455, "grad_norm": 1.163727737321146, "learning_rate": 4.8957769627214165e-05, "loss": 0.6879, "step": 4880 }, { "epoch": 0.14256529986867064, "grad_norm": 1.1715697763094626, "learning_rate": 4.895450083525225e-05, "loss": 0.6547, "step": 4885 }, { "epoch": 0.14271122136290676, "grad_norm": 1.2938395441259627, "learning_rate": 4.895122704720912e-05, "loss": 0.788, "step": 4890 }, { "epoch": 0.14285714285714285, "grad_norm": 0.9947646306332976, "learning_rate": 4.894794826384713e-05, "loss": 0.6386, "step": 4895 }, { "epoch": 0.14300306435137897, "grad_norm": 1.1393631171633438, "learning_rate": 4.89446644859298e-05, "loss": 0.7044, "step": 4900 }, { "epoch": 0.14314898584561506, "grad_norm": 1.0325043364535382, "learning_rate": 4.894137571422183e-05, "loss": 0.6716, "step": 4905 }, { "epoch": 0.14329490733985115, "grad_norm": 1.0884063442719651, "learning_rate": 4.8938081949489076e-05, "loss": 0.7032, "step": 4910 }, { "epoch": 0.14344082883408726, "grad_norm": 1.0482144588280986, "learning_rate": 4.893478319249854e-05, "loss": 0.6732, "step": 4915 }, { "epoch": 0.14358675032832335, "grad_norm": 1.1021930031427787, "learning_rate": 4.8931479444018405e-05, "loss": 0.6692, "step": 4920 }, { "epoch": 0.14373267182255947, "grad_norm": 0.9880131115060304, "learning_rate": 4.8928170704818e-05, "loss": 0.6224, "step": 4925 }, { "epoch": 0.14387859331679556, "grad_norm": 1.132437443518024, "learning_rate": 4.8924856975667846e-05, "loss": 0.6935, "step": 4930 }, { "epoch": 0.14402451481103168, "grad_norm": 1.1129437648839622, "learning_rate": 4.8921538257339593e-05, "loss": 0.6585, "step": 4935 }, { "epoch": 0.14417043630526777, "grad_norm": 0.9815954620433064, "learning_rate": 4.891821455060607e-05, "loss": 0.7098, "step": 4940 }, { "epoch": 0.14431635779950386, "grad_norm": 1.059684768469752, "learning_rate": 4.8914885856241267e-05, "loss": 0.6469, "step": 4945 }, { "epoch": 0.14446227929373998, "grad_norm": 1.0538157876388539, "learning_rate": 4.8911552175020334e-05, "loss": 0.691, "step": 4950 }, { "epoch": 0.14460820078797607, "grad_norm": 1.03665936643074, "learning_rate": 4.8908213507719574e-05, "loss": 0.6919, "step": 4955 }, { "epoch": 0.14475412228221218, "grad_norm": 1.1438242333865547, "learning_rate": 4.890486985511646e-05, "loss": 0.6259, "step": 4960 }, { "epoch": 0.14490004377644827, "grad_norm": 1.299769319417847, "learning_rate": 4.8901521217989624e-05, "loss": 0.705, "step": 4965 }, { "epoch": 0.14504596527068436, "grad_norm": 1.0054967292808565, "learning_rate": 4.889816759711887e-05, "loss": 0.7142, "step": 4970 }, { "epoch": 0.14519188676492048, "grad_norm": 1.1293320400197682, "learning_rate": 4.889480899328513e-05, "loss": 0.6492, "step": 4975 }, { "epoch": 0.14533780825915657, "grad_norm": 1.0636561751639835, "learning_rate": 4.889144540727054e-05, "loss": 0.6319, "step": 4980 }, { "epoch": 0.1454837297533927, "grad_norm": 1.0355254899477946, "learning_rate": 4.888807683985836e-05, "loss": 0.6272, "step": 4985 }, { "epoch": 0.14562965124762878, "grad_norm": 0.8114873826693616, "learning_rate": 4.8884703291833016e-05, "loss": 0.6872, "step": 4990 }, { "epoch": 0.14577557274186487, "grad_norm": 1.1570324957219593, "learning_rate": 4.888132476398012e-05, "loss": 0.7216, "step": 4995 }, { "epoch": 0.14592149423610098, "grad_norm": 1.0605098990256272, "learning_rate": 4.8877941257086426e-05, "loss": 0.6517, "step": 5000 }, { "epoch": 0.14606741573033707, "grad_norm": 1.0447378828772196, "learning_rate": 4.887455277193983e-05, "loss": 0.685, "step": 5005 }, { "epoch": 0.1462133372245732, "grad_norm": 1.1086047539944173, "learning_rate": 4.887115930932941e-05, "loss": 0.6356, "step": 5010 }, { "epoch": 0.14635925871880928, "grad_norm": 1.1572395217436067, "learning_rate": 4.8867760870045407e-05, "loss": 0.6728, "step": 5015 }, { "epoch": 0.14650518021304537, "grad_norm": 1.161251114302908, "learning_rate": 4.886435745487919e-05, "loss": 0.6273, "step": 5020 }, { "epoch": 0.1466511017072815, "grad_norm": 1.1784241428774225, "learning_rate": 4.8860949064623334e-05, "loss": 0.6599, "step": 5025 }, { "epoch": 0.14679702320151758, "grad_norm": 1.1036218345992077, "learning_rate": 4.885753570007153e-05, "loss": 0.6713, "step": 5030 }, { "epoch": 0.1469429446957537, "grad_norm": 1.2354273027950193, "learning_rate": 4.8854117362018646e-05, "loss": 0.6909, "step": 5035 }, { "epoch": 0.1470888661899898, "grad_norm": 0.9618966191704894, "learning_rate": 4.8850694051260706e-05, "loss": 0.5868, "step": 5040 }, { "epoch": 0.14723478768422588, "grad_norm": 1.1227826210418184, "learning_rate": 4.8847265768594904e-05, "loss": 0.7109, "step": 5045 }, { "epoch": 0.147380709178462, "grad_norm": 1.0305454471962345, "learning_rate": 4.884383251481956e-05, "loss": 0.7081, "step": 5050 }, { "epoch": 0.14752663067269808, "grad_norm": 0.9091481542770193, "learning_rate": 4.8840394290734184e-05, "loss": 0.6882, "step": 5055 }, { "epoch": 0.1476725521669342, "grad_norm": 1.2964403866327527, "learning_rate": 4.8836951097139425e-05, "loss": 0.6731, "step": 5060 }, { "epoch": 0.1478184736611703, "grad_norm": 1.294351175807002, "learning_rate": 4.88335029348371e-05, "loss": 0.7685, "step": 5065 }, { "epoch": 0.14796439515540638, "grad_norm": 1.0056899358987934, "learning_rate": 4.8830049804630165e-05, "loss": 0.6663, "step": 5070 }, { "epoch": 0.1481103166496425, "grad_norm": 1.2437689987091818, "learning_rate": 4.8826591707322763e-05, "loss": 0.6802, "step": 5075 }, { "epoch": 0.1482562381438786, "grad_norm": 1.5587137635670796, "learning_rate": 4.8823128643720164e-05, "loss": 0.6358, "step": 5080 }, { "epoch": 0.1484021596381147, "grad_norm": 1.2639662143413182, "learning_rate": 4.881966061462882e-05, "loss": 0.6509, "step": 5085 }, { "epoch": 0.1485480811323508, "grad_norm": 1.277369191959652, "learning_rate": 4.881618762085631e-05, "loss": 0.698, "step": 5090 }, { "epoch": 0.14869400262658689, "grad_norm": 1.0087963830058446, "learning_rate": 4.88127096632114e-05, "loss": 0.6757, "step": 5095 }, { "epoch": 0.148839924120823, "grad_norm": 1.0282290704085635, "learning_rate": 4.8809226742504e-05, "loss": 0.7144, "step": 5100 }, { "epoch": 0.1489858456150591, "grad_norm": 1.1488234619012705, "learning_rate": 4.8805738859545145e-05, "loss": 0.6593, "step": 5105 }, { "epoch": 0.1491317671092952, "grad_norm": 1.1336513346242925, "learning_rate": 4.880224601514709e-05, "loss": 0.6366, "step": 5110 }, { "epoch": 0.1492776886035313, "grad_norm": 1.2423066341851585, "learning_rate": 4.879874821012318e-05, "loss": 0.6618, "step": 5115 }, { "epoch": 0.1494236100977674, "grad_norm": 1.0125245719137783, "learning_rate": 4.879524544528797e-05, "loss": 0.6662, "step": 5120 }, { "epoch": 0.1495695315920035, "grad_norm": 1.0921171765322315, "learning_rate": 4.879173772145712e-05, "loss": 0.7018, "step": 5125 }, { "epoch": 0.1497154530862396, "grad_norm": 1.0515883225222553, "learning_rate": 4.878822503944748e-05, "loss": 0.6242, "step": 5130 }, { "epoch": 0.14986137458047571, "grad_norm": 1.077422894481393, "learning_rate": 4.8784707400077046e-05, "loss": 0.6864, "step": 5135 }, { "epoch": 0.1500072960747118, "grad_norm": 1.0463109059324265, "learning_rate": 4.878118480416496e-05, "loss": 0.7291, "step": 5140 }, { "epoch": 0.1501532175689479, "grad_norm": 1.1686528462841, "learning_rate": 4.877765725253153e-05, "loss": 0.6392, "step": 5145 }, { "epoch": 0.150299139063184, "grad_norm": 1.1830796683410716, "learning_rate": 4.877412474599822e-05, "loss": 0.7308, "step": 5150 }, { "epoch": 0.1504450605574201, "grad_norm": 1.0811186690939443, "learning_rate": 4.8770587285387614e-05, "loss": 0.6596, "step": 5155 }, { "epoch": 0.15059098205165622, "grad_norm": 1.1366921499180116, "learning_rate": 4.876704487152349e-05, "loss": 0.6531, "step": 5160 }, { "epoch": 0.1507369035458923, "grad_norm": 0.9852321412385228, "learning_rate": 4.876349750523078e-05, "loss": 0.6501, "step": 5165 }, { "epoch": 0.1508828250401284, "grad_norm": 1.0925438342031004, "learning_rate": 4.8759945187335536e-05, "loss": 0.7011, "step": 5170 }, { "epoch": 0.15102874653436452, "grad_norm": 1.1415468833364038, "learning_rate": 4.875638791866499e-05, "loss": 0.6853, "step": 5175 }, { "epoch": 0.1511746680286006, "grad_norm": 1.1206948253508857, "learning_rate": 4.875282570004752e-05, "loss": 0.6391, "step": 5180 }, { "epoch": 0.15132058952283672, "grad_norm": 1.179685788984038, "learning_rate": 4.8749258532312634e-05, "loss": 0.7173, "step": 5185 }, { "epoch": 0.1514665110170728, "grad_norm": 1.1748425401073708, "learning_rate": 4.874568641629105e-05, "loss": 0.6443, "step": 5190 }, { "epoch": 0.1516124325113089, "grad_norm": 0.9436392998085367, "learning_rate": 4.8742109352814566e-05, "loss": 0.6405, "step": 5195 }, { "epoch": 0.15175835400554502, "grad_norm": 1.1083561088494678, "learning_rate": 4.873852734271619e-05, "loss": 0.6832, "step": 5200 }, { "epoch": 0.1519042754997811, "grad_norm": 1.1630036048375099, "learning_rate": 4.8734940386830056e-05, "loss": 0.6366, "step": 5205 }, { "epoch": 0.15205019699401723, "grad_norm": 1.2124648252002201, "learning_rate": 4.873134848599146e-05, "loss": 0.6641, "step": 5210 }, { "epoch": 0.15219611848825332, "grad_norm": 1.0871915424942498, "learning_rate": 4.872775164103683e-05, "loss": 0.631, "step": 5215 }, { "epoch": 0.1523420399824894, "grad_norm": 1.209576301952038, "learning_rate": 4.872414985280375e-05, "loss": 0.71, "step": 5220 }, { "epoch": 0.15248796147672553, "grad_norm": 1.1416729248026865, "learning_rate": 4.872054312213099e-05, "loss": 0.6771, "step": 5225 }, { "epoch": 0.15263388297096162, "grad_norm": 1.0748770372325782, "learning_rate": 4.8716931449858425e-05, "loss": 0.6954, "step": 5230 }, { "epoch": 0.15277980446519773, "grad_norm": 1.0393095883770218, "learning_rate": 4.871331483682712e-05, "loss": 0.6499, "step": 5235 }, { "epoch": 0.15292572595943382, "grad_norm": 1.2908996352090831, "learning_rate": 4.870969328387925e-05, "loss": 0.6323, "step": 5240 }, { "epoch": 0.1530716474536699, "grad_norm": 1.2845316508047322, "learning_rate": 4.870606679185816e-05, "loss": 0.6603, "step": 5245 }, { "epoch": 0.15321756894790603, "grad_norm": 1.2062113371983205, "learning_rate": 4.870243536160837e-05, "loss": 0.7101, "step": 5250 }, { "epoch": 0.15336349044214212, "grad_norm": 1.1831901751159792, "learning_rate": 4.869879899397551e-05, "loss": 0.6826, "step": 5255 }, { "epoch": 0.15350941193637824, "grad_norm": 1.1160114001769645, "learning_rate": 4.8695157689806376e-05, "loss": 0.6613, "step": 5260 }, { "epoch": 0.15365533343061433, "grad_norm": 1.1258650726921502, "learning_rate": 4.869151144994891e-05, "loss": 0.6861, "step": 5265 }, { "epoch": 0.15380125492485042, "grad_norm": 0.9280687045307197, "learning_rate": 4.8687860275252215e-05, "loss": 0.6183, "step": 5270 }, { "epoch": 0.15394717641908653, "grad_norm": 0.9064983444339252, "learning_rate": 4.8684204166566544e-05, "loss": 0.5915, "step": 5275 }, { "epoch": 0.15409309791332262, "grad_norm": 1.0988152905414788, "learning_rate": 4.868054312474327e-05, "loss": 0.6409, "step": 5280 }, { "epoch": 0.15423901940755874, "grad_norm": 1.1223331190394121, "learning_rate": 4.867687715063495e-05, "loss": 0.6541, "step": 5285 }, { "epoch": 0.15438494090179483, "grad_norm": 1.008787848856111, "learning_rate": 4.867320624509526e-05, "loss": 0.6431, "step": 5290 }, { "epoch": 0.15453086239603095, "grad_norm": 1.2080754350677765, "learning_rate": 4.866953040897905e-05, "loss": 0.6512, "step": 5295 }, { "epoch": 0.15467678389026704, "grad_norm": 1.07179898974179, "learning_rate": 4.8665849643142317e-05, "loss": 0.6637, "step": 5300 }, { "epoch": 0.15482270538450313, "grad_norm": 1.1328167409438716, "learning_rate": 4.866216394844217e-05, "loss": 0.714, "step": 5305 }, { "epoch": 0.15496862687873925, "grad_norm": 0.9425023546525281, "learning_rate": 4.865847332573691e-05, "loss": 0.6911, "step": 5310 }, { "epoch": 0.15511454837297534, "grad_norm": 1.0125632535244773, "learning_rate": 4.8654777775885965e-05, "loss": 0.6659, "step": 5315 }, { "epoch": 0.15526046986721145, "grad_norm": 1.19689588460288, "learning_rate": 4.8651077299749906e-05, "loss": 0.6954, "step": 5320 }, { "epoch": 0.15540639136144754, "grad_norm": 1.0873149093220642, "learning_rate": 4.864737189819046e-05, "loss": 0.6731, "step": 5325 }, { "epoch": 0.15555231285568363, "grad_norm": 1.0397964420372123, "learning_rate": 4.864366157207052e-05, "loss": 0.7068, "step": 5330 }, { "epoch": 0.15569823434991975, "grad_norm": 1.148051255288061, "learning_rate": 4.863994632225407e-05, "loss": 0.629, "step": 5335 }, { "epoch": 0.15584415584415584, "grad_norm": 0.9748830282118187, "learning_rate": 4.86362261496063e-05, "loss": 0.7136, "step": 5340 }, { "epoch": 0.15599007733839196, "grad_norm": 1.04018221273993, "learning_rate": 4.863250105499351e-05, "loss": 0.7435, "step": 5345 }, { "epoch": 0.15613599883262805, "grad_norm": 1.0753301489715088, "learning_rate": 4.8628771039283164e-05, "loss": 0.691, "step": 5350 }, { "epoch": 0.15628192032686414, "grad_norm": 0.9987315263637163, "learning_rate": 4.862503610334386e-05, "loss": 0.6576, "step": 5355 }, { "epoch": 0.15642784182110026, "grad_norm": 1.110055366325937, "learning_rate": 4.8621296248045356e-05, "loss": 0.6804, "step": 5360 }, { "epoch": 0.15657376331533635, "grad_norm": 1.0318457910467402, "learning_rate": 4.861755147425854e-05, "loss": 0.6495, "step": 5365 }, { "epoch": 0.15671968480957246, "grad_norm": 1.3122231548784538, "learning_rate": 4.861380178285545e-05, "loss": 0.7177, "step": 5370 }, { "epoch": 0.15686560630380855, "grad_norm": 1.086020519330073, "learning_rate": 4.8610047174709286e-05, "loss": 0.6827, "step": 5375 }, { "epoch": 0.15701152779804464, "grad_norm": 1.1448770673103832, "learning_rate": 4.860628765069436e-05, "loss": 0.7152, "step": 5380 }, { "epoch": 0.15715744929228076, "grad_norm": 1.3072081786400245, "learning_rate": 4.860252321168616e-05, "loss": 0.7188, "step": 5385 }, { "epoch": 0.15730337078651685, "grad_norm": 1.199245824404477, "learning_rate": 4.859875385856131e-05, "loss": 0.6852, "step": 5390 }, { "epoch": 0.15744929228075297, "grad_norm": 0.9864940431883602, "learning_rate": 4.8594979592197556e-05, "loss": 0.627, "step": 5395 }, { "epoch": 0.15759521377498906, "grad_norm": 1.3693859584373105, "learning_rate": 4.859120041347381e-05, "loss": 0.6847, "step": 5400 }, { "epoch": 0.15774113526922515, "grad_norm": 1.1882215755116146, "learning_rate": 4.8587416323270143e-05, "loss": 0.6731, "step": 5405 }, { "epoch": 0.15788705676346126, "grad_norm": 1.1036861819893564, "learning_rate": 4.858362732246774e-05, "loss": 0.6396, "step": 5410 }, { "epoch": 0.15803297825769735, "grad_norm": 0.939339364635614, "learning_rate": 4.857983341194894e-05, "loss": 0.6229, "step": 5415 }, { "epoch": 0.15817889975193347, "grad_norm": 1.091683234533646, "learning_rate": 4.857603459259722e-05, "loss": 0.6843, "step": 5420 }, { "epoch": 0.15832482124616956, "grad_norm": 1.182683675277485, "learning_rate": 4.857223086529721e-05, "loss": 0.678, "step": 5425 }, { "epoch": 0.15847074274040565, "grad_norm": 1.0125636639076858, "learning_rate": 4.856842223093469e-05, "loss": 0.6581, "step": 5430 }, { "epoch": 0.15861666423464177, "grad_norm": 1.1544716654081766, "learning_rate": 4.856460869039656e-05, "loss": 0.6591, "step": 5435 }, { "epoch": 0.15876258572887786, "grad_norm": 1.0557542586450097, "learning_rate": 4.856079024457087e-05, "loss": 0.7035, "step": 5440 }, { "epoch": 0.15890850722311398, "grad_norm": 1.068501828389954, "learning_rate": 4.8556966894346835e-05, "loss": 0.6997, "step": 5445 }, { "epoch": 0.15905442871735007, "grad_norm": 1.2898699377901977, "learning_rate": 4.855313864061478e-05, "loss": 0.7242, "step": 5450 }, { "epoch": 0.15920035021158616, "grad_norm": 0.9952796665767981, "learning_rate": 4.854930548426617e-05, "loss": 0.6669, "step": 5455 }, { "epoch": 0.15934627170582227, "grad_norm": 1.0317824180744029, "learning_rate": 4.854546742619366e-05, "loss": 0.6626, "step": 5460 }, { "epoch": 0.15949219320005836, "grad_norm": 1.2959033115840224, "learning_rate": 4.854162446729099e-05, "loss": 0.6843, "step": 5465 }, { "epoch": 0.15963811469429448, "grad_norm": 0.9682518497042534, "learning_rate": 4.8537776608453076e-05, "loss": 0.6969, "step": 5470 }, { "epoch": 0.15978403618853057, "grad_norm": 1.152364469758684, "learning_rate": 4.853392385057596e-05, "loss": 0.7765, "step": 5475 }, { "epoch": 0.15992995768276666, "grad_norm": 1.2953412871187777, "learning_rate": 4.8530066194556826e-05, "loss": 0.7051, "step": 5480 }, { "epoch": 0.16007587917700278, "grad_norm": 1.13569127606471, "learning_rate": 4.852620364129401e-05, "loss": 0.614, "step": 5485 }, { "epoch": 0.16022180067123887, "grad_norm": 1.089300887087154, "learning_rate": 4.852233619168697e-05, "loss": 0.6372, "step": 5490 }, { "epoch": 0.16036772216547499, "grad_norm": 1.0636244357901254, "learning_rate": 4.851846384663631e-05, "loss": 0.6333, "step": 5495 }, { "epoch": 0.16051364365971107, "grad_norm": 1.1966686080424564, "learning_rate": 4.85145866070438e-05, "loss": 0.6535, "step": 5500 }, { "epoch": 0.16065956515394716, "grad_norm": 1.2361775717794952, "learning_rate": 4.851070447381231e-05, "loss": 0.6928, "step": 5505 }, { "epoch": 0.16080548664818328, "grad_norm": 1.0366677003504112, "learning_rate": 4.850681744784586e-05, "loss": 0.6936, "step": 5510 }, { "epoch": 0.16095140814241937, "grad_norm": 0.9784890372392392, "learning_rate": 4.8502925530049645e-05, "loss": 0.6742, "step": 5515 }, { "epoch": 0.1610973296366555, "grad_norm": 1.1623953035476973, "learning_rate": 4.849902872132994e-05, "loss": 0.6962, "step": 5520 }, { "epoch": 0.16124325113089158, "grad_norm": 0.9920771397838775, "learning_rate": 4.84951270225942e-05, "loss": 0.6755, "step": 5525 }, { "epoch": 0.16138917262512767, "grad_norm": 1.0360150273961153, "learning_rate": 4.849122043475101e-05, "loss": 0.646, "step": 5530 }, { "epoch": 0.1615350941193638, "grad_norm": 1.0842899779982063, "learning_rate": 4.8487308958710095e-05, "loss": 0.7225, "step": 5535 }, { "epoch": 0.16168101561359988, "grad_norm": 0.9934102350266147, "learning_rate": 4.848339259538232e-05, "loss": 0.6545, "step": 5540 }, { "epoch": 0.161826937107836, "grad_norm": 1.0007913908525419, "learning_rate": 4.8479471345679665e-05, "loss": 0.6836, "step": 5545 }, { "epoch": 0.16197285860207208, "grad_norm": 0.9987884105604277, "learning_rate": 4.847554521051529e-05, "loss": 0.6396, "step": 5550 }, { "epoch": 0.16211878009630817, "grad_norm": 1.1616065848634984, "learning_rate": 4.8471614190803445e-05, "loss": 0.6765, "step": 5555 }, { "epoch": 0.1622647015905443, "grad_norm": 1.0277702503069528, "learning_rate": 4.846767828745956e-05, "loss": 0.6814, "step": 5560 }, { "epoch": 0.16241062308478038, "grad_norm": 0.940727128258496, "learning_rate": 4.846373750140017e-05, "loss": 0.6683, "step": 5565 }, { "epoch": 0.1625565445790165, "grad_norm": 1.1316840728421842, "learning_rate": 4.8459791833542974e-05, "loss": 0.7343, "step": 5570 }, { "epoch": 0.1627024660732526, "grad_norm": 1.1362685968542987, "learning_rate": 4.845584128480678e-05, "loss": 0.7035, "step": 5575 }, { "epoch": 0.16284838756748868, "grad_norm": 1.1619185968850532, "learning_rate": 4.845188585611156e-05, "loss": 0.6272, "step": 5580 }, { "epoch": 0.1629943090617248, "grad_norm": 1.1493295018543637, "learning_rate": 4.84479255483784e-05, "loss": 0.6721, "step": 5585 }, { "epoch": 0.16314023055596089, "grad_norm": 1.2637264689619623, "learning_rate": 4.844396036252954e-05, "loss": 0.6259, "step": 5590 }, { "epoch": 0.163286152050197, "grad_norm": 1.0338309243815138, "learning_rate": 4.843999029948834e-05, "loss": 0.6724, "step": 5595 }, { "epoch": 0.1634320735444331, "grad_norm": 1.142890497919786, "learning_rate": 4.84360153601793e-05, "loss": 0.711, "step": 5600 }, { "epoch": 0.16357799503866918, "grad_norm": 1.199686987301231, "learning_rate": 4.843203554552808e-05, "loss": 0.7436, "step": 5605 }, { "epoch": 0.1637239165329053, "grad_norm": 1.1146011661956983, "learning_rate": 4.8428050856461435e-05, "loss": 0.6477, "step": 5610 }, { "epoch": 0.1638698380271414, "grad_norm": 1.012823412350095, "learning_rate": 4.842406129390728e-05, "loss": 0.6844, "step": 5615 }, { "epoch": 0.1640157595213775, "grad_norm": 1.0118054412475699, "learning_rate": 4.842006685879466e-05, "loss": 0.6253, "step": 5620 }, { "epoch": 0.1641616810156136, "grad_norm": 1.2776973921047927, "learning_rate": 4.841606755205374e-05, "loss": 0.6913, "step": 5625 }, { "epoch": 0.1643076025098497, "grad_norm": 1.2615868176843938, "learning_rate": 4.841206337461585e-05, "loss": 0.7349, "step": 5630 }, { "epoch": 0.1644535240040858, "grad_norm": 1.413966864551977, "learning_rate": 4.840805432741343e-05, "loss": 0.6433, "step": 5635 }, { "epoch": 0.1645994454983219, "grad_norm": 1.0752591122123394, "learning_rate": 4.840404041138007e-05, "loss": 0.6604, "step": 5640 }, { "epoch": 0.164745366992558, "grad_norm": 1.2220504694822736, "learning_rate": 4.840002162745048e-05, "loss": 0.7091, "step": 5645 }, { "epoch": 0.1648912884867941, "grad_norm": 1.00067850003655, "learning_rate": 4.8395997976560494e-05, "loss": 0.6487, "step": 5650 }, { "epoch": 0.16503720998103022, "grad_norm": 1.227101205694228, "learning_rate": 4.839196945964713e-05, "loss": 0.7506, "step": 5655 }, { "epoch": 0.1651831314752663, "grad_norm": 0.8271325522279511, "learning_rate": 4.838793607764847e-05, "loss": 0.6034, "step": 5660 }, { "epoch": 0.1653290529695024, "grad_norm": 0.9097144350949397, "learning_rate": 4.838389783150377e-05, "loss": 0.6275, "step": 5665 }, { "epoch": 0.16547497446373852, "grad_norm": 1.023868334317864, "learning_rate": 4.837985472215343e-05, "loss": 0.737, "step": 5670 }, { "epoch": 0.1656208959579746, "grad_norm": 1.0397327730501735, "learning_rate": 4.8375806750538935e-05, "loss": 0.6662, "step": 5675 }, { "epoch": 0.16576681745221072, "grad_norm": 1.1020131697539324, "learning_rate": 4.837175391760295e-05, "loss": 0.7612, "step": 5680 }, { "epoch": 0.1659127389464468, "grad_norm": 1.0589878823799486, "learning_rate": 4.8367696224289246e-05, "loss": 0.7373, "step": 5685 }, { "epoch": 0.1660586604406829, "grad_norm": 1.1197679951174682, "learning_rate": 4.836363367154273e-05, "loss": 0.7379, "step": 5690 }, { "epoch": 0.16620458193491902, "grad_norm": 1.114287211537706, "learning_rate": 4.835956626030946e-05, "loss": 0.6572, "step": 5695 }, { "epoch": 0.1663505034291551, "grad_norm": 0.936589720333887, "learning_rate": 4.835549399153659e-05, "loss": 0.6742, "step": 5700 }, { "epoch": 0.16649642492339123, "grad_norm": 1.044362062233453, "learning_rate": 4.835141686617242e-05, "loss": 0.6395, "step": 5705 }, { "epoch": 0.16664234641762732, "grad_norm": 1.309826716022715, "learning_rate": 4.8347334885166415e-05, "loss": 0.7038, "step": 5710 }, { "epoch": 0.1667882679118634, "grad_norm": 1.0851768951951963, "learning_rate": 4.83432480494691e-05, "loss": 0.6787, "step": 5715 }, { "epoch": 0.16693418940609953, "grad_norm": 1.3255765713766328, "learning_rate": 4.833915636003221e-05, "loss": 0.6378, "step": 5720 }, { "epoch": 0.16708011090033562, "grad_norm": 1.1504464039235212, "learning_rate": 4.8335059817808534e-05, "loss": 0.6257, "step": 5725 }, { "epoch": 0.16722603239457173, "grad_norm": 1.210658498690085, "learning_rate": 4.833095842375205e-05, "loss": 0.6785, "step": 5730 }, { "epoch": 0.16737195388880782, "grad_norm": 1.1507376808743373, "learning_rate": 4.832685217881785e-05, "loss": 0.6526, "step": 5735 }, { "epoch": 0.1675178753830439, "grad_norm": 0.935819823610171, "learning_rate": 4.832274108396214e-05, "loss": 0.6681, "step": 5740 }, { "epoch": 0.16766379687728003, "grad_norm": 0.9730334815352761, "learning_rate": 4.831862514014226e-05, "loss": 0.7321, "step": 5745 }, { "epoch": 0.16780971837151612, "grad_norm": 0.9993958934888955, "learning_rate": 4.8314504348316696e-05, "loss": 0.6565, "step": 5750 }, { "epoch": 0.16795563986575224, "grad_norm": 1.001239674928972, "learning_rate": 4.8310378709445046e-05, "loss": 0.6337, "step": 5755 }, { "epoch": 0.16810156135998833, "grad_norm": 1.0611798977997413, "learning_rate": 4.830624822448804e-05, "loss": 0.6625, "step": 5760 }, { "epoch": 0.16824748285422442, "grad_norm": 0.9888993272834491, "learning_rate": 4.830211289440753e-05, "loss": 0.6557, "step": 5765 }, { "epoch": 0.16839340434846053, "grad_norm": 2.152173819012769, "learning_rate": 4.829797272016653e-05, "loss": 0.766, "step": 5770 }, { "epoch": 0.16853932584269662, "grad_norm": 1.0701881200332837, "learning_rate": 4.829382770272914e-05, "loss": 0.6659, "step": 5775 }, { "epoch": 0.16868524733693274, "grad_norm": 0.9238367398869555, "learning_rate": 4.82896778430606e-05, "loss": 0.6124, "step": 5780 }, { "epoch": 0.16883116883116883, "grad_norm": 1.0253541688718055, "learning_rate": 4.82855231421273e-05, "loss": 0.6254, "step": 5785 }, { "epoch": 0.16897709032540492, "grad_norm": 0.9757344795619404, "learning_rate": 4.828136360089672e-05, "loss": 0.6405, "step": 5790 }, { "epoch": 0.16912301181964104, "grad_norm": 1.0157182161251295, "learning_rate": 4.8277199220337505e-05, "loss": 0.5949, "step": 5795 }, { "epoch": 0.16926893331387713, "grad_norm": 1.1023884601325222, "learning_rate": 4.827303000141939e-05, "loss": 0.7219, "step": 5800 }, { "epoch": 0.16941485480811325, "grad_norm": 1.1919971111854084, "learning_rate": 4.826885594511326e-05, "loss": 0.7427, "step": 5805 }, { "epoch": 0.16956077630234934, "grad_norm": 1.2008317899810013, "learning_rate": 4.8264677052391145e-05, "loss": 0.7178, "step": 5810 }, { "epoch": 0.16970669779658543, "grad_norm": 1.060616247228624, "learning_rate": 4.8260493324226144e-05, "loss": 0.6464, "step": 5815 }, { "epoch": 0.16985261929082154, "grad_norm": 1.170001997881897, "learning_rate": 4.825630476159255e-05, "loss": 0.7141, "step": 5820 }, { "epoch": 0.16999854078505763, "grad_norm": 1.263408537229351, "learning_rate": 4.8252111365465716e-05, "loss": 0.7057, "step": 5825 }, { "epoch": 0.17014446227929375, "grad_norm": 1.267697984164991, "learning_rate": 4.824791313682218e-05, "loss": 0.6737, "step": 5830 }, { "epoch": 0.17029038377352984, "grad_norm": 1.1313684764504524, "learning_rate": 4.824371007663955e-05, "loss": 0.6446, "step": 5835 }, { "epoch": 0.17043630526776593, "grad_norm": 0.8904872063757217, "learning_rate": 4.823950218589661e-05, "loss": 0.5936, "step": 5840 }, { "epoch": 0.17058222676200205, "grad_norm": 0.9177114011028235, "learning_rate": 4.823528946557324e-05, "loss": 0.6051, "step": 5845 }, { "epoch": 0.17072814825623814, "grad_norm": 1.0465279849310067, "learning_rate": 4.8231071916650437e-05, "loss": 0.6362, "step": 5850 }, { "epoch": 0.17087406975047426, "grad_norm": 1.0484570449255428, "learning_rate": 4.822684954011036e-05, "loss": 0.6922, "step": 5855 }, { "epoch": 0.17101999124471035, "grad_norm": 1.0630360493698148, "learning_rate": 4.822262233693625e-05, "loss": 0.6545, "step": 5860 }, { "epoch": 0.17116591273894644, "grad_norm": 1.064448379920905, "learning_rate": 4.82183903081125e-05, "loss": 0.6681, "step": 5865 }, { "epoch": 0.17131183423318255, "grad_norm": 1.0179475441838266, "learning_rate": 4.821415345462462e-05, "loss": 0.6805, "step": 5870 }, { "epoch": 0.17145775572741864, "grad_norm": 1.0529543354757331, "learning_rate": 4.8209911777459224e-05, "loss": 0.7163, "step": 5875 }, { "epoch": 0.17160367722165476, "grad_norm": 1.2444090941640662, "learning_rate": 4.820566527760408e-05, "loss": 0.6994, "step": 5880 }, { "epoch": 0.17174959871589085, "grad_norm": 0.9796656943074434, "learning_rate": 4.820141395604806e-05, "loss": 0.6353, "step": 5885 }, { "epoch": 0.17189552021012694, "grad_norm": 1.0947312013035475, "learning_rate": 4.819715781378115e-05, "loss": 0.753, "step": 5890 }, { "epoch": 0.17204144170436306, "grad_norm": 1.0291982170118557, "learning_rate": 4.8192896851794505e-05, "loss": 0.6487, "step": 5895 }, { "epoch": 0.17218736319859915, "grad_norm": 1.0234653008674273, "learning_rate": 4.818863107108034e-05, "loss": 0.7204, "step": 5900 }, { "epoch": 0.17233328469283526, "grad_norm": 1.1212524385769005, "learning_rate": 4.818436047263204e-05, "loss": 0.657, "step": 5905 }, { "epoch": 0.17247920618707135, "grad_norm": 1.0782562264447584, "learning_rate": 4.818008505744408e-05, "loss": 0.681, "step": 5910 }, { "epoch": 0.17262512768130744, "grad_norm": 1.0428112906770304, "learning_rate": 4.817580482651208e-05, "loss": 0.6951, "step": 5915 }, { "epoch": 0.17277104917554356, "grad_norm": 0.972462681192514, "learning_rate": 4.8171519780832765e-05, "loss": 0.6707, "step": 5920 }, { "epoch": 0.17291697066977965, "grad_norm": 1.0255109633322963, "learning_rate": 4.816722992140399e-05, "loss": 0.62, "step": 5925 }, { "epoch": 0.17306289216401577, "grad_norm": 1.0797119296181434, "learning_rate": 4.816293524922474e-05, "loss": 0.7043, "step": 5930 }, { "epoch": 0.17320881365825186, "grad_norm": 1.1114160469440066, "learning_rate": 4.815863576529509e-05, "loss": 0.6796, "step": 5935 }, { "epoch": 0.17335473515248795, "grad_norm": 1.0707451870361893, "learning_rate": 4.815433147061627e-05, "loss": 0.7113, "step": 5940 }, { "epoch": 0.17350065664672407, "grad_norm": 0.9441907324371049, "learning_rate": 4.8150022366190603e-05, "loss": 0.6214, "step": 5945 }, { "epoch": 0.17364657814096016, "grad_norm": 1.003492656610968, "learning_rate": 4.8145708453021556e-05, "loss": 0.7142, "step": 5950 }, { "epoch": 0.17379249963519627, "grad_norm": 0.995657731749954, "learning_rate": 4.814138973211369e-05, "loss": 0.6679, "step": 5955 }, { "epoch": 0.17393842112943236, "grad_norm": 1.1481344668759932, "learning_rate": 4.813706620447272e-05, "loss": 0.6584, "step": 5960 }, { "epoch": 0.17408434262366845, "grad_norm": 1.0689489093974827, "learning_rate": 4.813273787110545e-05, "loss": 0.6767, "step": 5965 }, { "epoch": 0.17423026411790457, "grad_norm": 0.9954588182114794, "learning_rate": 4.812840473301981e-05, "loss": 0.6818, "step": 5970 }, { "epoch": 0.17437618561214066, "grad_norm": 0.9873849933885248, "learning_rate": 4.8124066791224856e-05, "loss": 0.6692, "step": 5975 }, { "epoch": 0.17452210710637678, "grad_norm": 1.2391376806185903, "learning_rate": 4.811972404673076e-05, "loss": 0.7145, "step": 5980 }, { "epoch": 0.17466802860061287, "grad_norm": 1.1920169049206775, "learning_rate": 4.8115376500548805e-05, "loss": 0.6718, "step": 5985 }, { "epoch": 0.17481395009484896, "grad_norm": 1.041370507070442, "learning_rate": 4.8111024153691404e-05, "loss": 0.6688, "step": 5990 }, { "epoch": 0.17495987158908508, "grad_norm": 0.9506914333904248, "learning_rate": 4.810666700717208e-05, "loss": 0.6129, "step": 5995 }, { "epoch": 0.17510579308332117, "grad_norm": 1.1161032087763265, "learning_rate": 4.810230506200548e-05, "loss": 0.66, "step": 6000 }, { "epoch": 0.17525171457755728, "grad_norm": 1.1309355315614864, "learning_rate": 4.8097938319207344e-05, "loss": 0.6208, "step": 6005 }, { "epoch": 0.17539763607179337, "grad_norm": 1.123185824147339, "learning_rate": 4.809356677979459e-05, "loss": 0.6356, "step": 6010 }, { "epoch": 0.1755435575660295, "grad_norm": 1.132026453055045, "learning_rate": 4.808919044478518e-05, "loss": 0.664, "step": 6015 }, { "epoch": 0.17568947906026558, "grad_norm": 1.036896144774975, "learning_rate": 4.808480931519823e-05, "loss": 0.6217, "step": 6020 }, { "epoch": 0.17583540055450167, "grad_norm": 1.027792930328618, "learning_rate": 4.8080423392053974e-05, "loss": 0.6529, "step": 6025 }, { "epoch": 0.1759813220487378, "grad_norm": 1.1116890228310785, "learning_rate": 4.807603267637376e-05, "loss": 0.7438, "step": 6030 }, { "epoch": 0.17612724354297388, "grad_norm": 1.0100042795081638, "learning_rate": 4.807163716918004e-05, "loss": 0.6307, "step": 6035 }, { "epoch": 0.17627316503721, "grad_norm": 1.1178401914737577, "learning_rate": 4.806723687149639e-05, "loss": 0.5722, "step": 6040 }, { "epoch": 0.17641908653144608, "grad_norm": 1.0737063318935556, "learning_rate": 4.80628317843475e-05, "loss": 0.6561, "step": 6045 }, { "epoch": 0.17656500802568217, "grad_norm": 0.9639959347348943, "learning_rate": 4.805842190875919e-05, "loss": 0.6287, "step": 6050 }, { "epoch": 0.1767109295199183, "grad_norm": 1.1674550348764956, "learning_rate": 4.8054007245758366e-05, "loss": 0.7137, "step": 6055 }, { "epoch": 0.17685685101415438, "grad_norm": 1.1415061379870395, "learning_rate": 4.804958779637307e-05, "loss": 0.6984, "step": 6060 }, { "epoch": 0.1770027725083905, "grad_norm": 0.9811302103482721, "learning_rate": 4.8045163561632464e-05, "loss": 0.6271, "step": 6065 }, { "epoch": 0.1771486940026266, "grad_norm": 1.0717390539277747, "learning_rate": 4.80407345425668e-05, "loss": 0.6716, "step": 6070 }, { "epoch": 0.17729461549686268, "grad_norm": 1.0128661408453679, "learning_rate": 4.803630074020746e-05, "loss": 0.638, "step": 6075 }, { "epoch": 0.1774405369910988, "grad_norm": 1.0359011777591365, "learning_rate": 4.803186215558694e-05, "loss": 0.7067, "step": 6080 }, { "epoch": 0.17758645848533489, "grad_norm": 0.9434861313338309, "learning_rate": 4.8027418789738856e-05, "loss": 0.6349, "step": 6085 }, { "epoch": 0.177732379979571, "grad_norm": 0.9817959766461949, "learning_rate": 4.802297064369792e-05, "loss": 0.5733, "step": 6090 }, { "epoch": 0.1778783014738071, "grad_norm": 1.1010829244932019, "learning_rate": 4.8018517718499954e-05, "loss": 0.6447, "step": 6095 }, { "epoch": 0.17802422296804318, "grad_norm": 1.1612357818858425, "learning_rate": 4.801406001518193e-05, "loss": 0.7169, "step": 6100 }, { "epoch": 0.1781701444622793, "grad_norm": 1.0588180113576187, "learning_rate": 4.800959753478188e-05, "loss": 0.6918, "step": 6105 }, { "epoch": 0.1783160659565154, "grad_norm": 1.0819446219492115, "learning_rate": 4.800513027833899e-05, "loss": 0.7051, "step": 6110 }, { "epoch": 0.1784619874507515, "grad_norm": 1.26935986814738, "learning_rate": 4.800065824689355e-05, "loss": 0.6974, "step": 6115 }, { "epoch": 0.1786079089449876, "grad_norm": 0.9414455874726568, "learning_rate": 4.7996181441486956e-05, "loss": 0.6378, "step": 6120 }, { "epoch": 0.1787538304392237, "grad_norm": 1.0219113583134098, "learning_rate": 4.79916998631617e-05, "loss": 0.6913, "step": 6125 }, { "epoch": 0.1788997519334598, "grad_norm": 1.077174276236011, "learning_rate": 4.798721351296143e-05, "loss": 0.6507, "step": 6130 }, { "epoch": 0.1790456734276959, "grad_norm": 1.5031551601664102, "learning_rate": 4.7982722391930836e-05, "loss": 0.6695, "step": 6135 }, { "epoch": 0.179191594921932, "grad_norm": 1.2128193036952357, "learning_rate": 4.797822650111578e-05, "loss": 0.6958, "step": 6140 }, { "epoch": 0.1793375164161681, "grad_norm": 0.9556463067340181, "learning_rate": 4.7973725841563235e-05, "loss": 0.582, "step": 6145 }, { "epoch": 0.1794834379104042, "grad_norm": 0.8949170976311094, "learning_rate": 4.796922041432123e-05, "loss": 0.6042, "step": 6150 }, { "epoch": 0.1796293594046403, "grad_norm": 0.8307943038662808, "learning_rate": 4.7964710220438955e-05, "loss": 0.5589, "step": 6155 }, { "epoch": 0.1797752808988764, "grad_norm": 1.2085501426829277, "learning_rate": 4.796019526096669e-05, "loss": 0.6419, "step": 6160 }, { "epoch": 0.17992120239311252, "grad_norm": 1.203525025146659, "learning_rate": 4.795567553695583e-05, "loss": 0.6876, "step": 6165 }, { "epoch": 0.1800671238873486, "grad_norm": 1.137892597316037, "learning_rate": 4.795115104945887e-05, "loss": 0.6512, "step": 6170 }, { "epoch": 0.1802130453815847, "grad_norm": 0.9297481779726161, "learning_rate": 4.7946621799529425e-05, "loss": 0.5781, "step": 6175 }, { "epoch": 0.18035896687582081, "grad_norm": 0.9683969973650844, "learning_rate": 4.794208778822222e-05, "loss": 0.7322, "step": 6180 }, { "epoch": 0.1805048883700569, "grad_norm": 1.1950088806934311, "learning_rate": 4.793754901659308e-05, "loss": 0.6243, "step": 6185 }, { "epoch": 0.18065080986429302, "grad_norm": 1.069789473235599, "learning_rate": 4.7933005485698935e-05, "loss": 0.6426, "step": 6190 }, { "epoch": 0.1807967313585291, "grad_norm": 0.9221254606544639, "learning_rate": 4.792845719659785e-05, "loss": 0.6564, "step": 6195 }, { "epoch": 0.1809426528527652, "grad_norm": 1.0405067806342225, "learning_rate": 4.792390415034896e-05, "loss": 0.6926, "step": 6200 }, { "epoch": 0.18108857434700132, "grad_norm": 0.9865959892063116, "learning_rate": 4.791934634801254e-05, "loss": 0.6577, "step": 6205 }, { "epoch": 0.1812344958412374, "grad_norm": 1.1541761691466992, "learning_rate": 4.791478379064995e-05, "loss": 0.6403, "step": 6210 }, { "epoch": 0.18138041733547353, "grad_norm": 1.0592593559496497, "learning_rate": 4.791021647932368e-05, "loss": 0.6836, "step": 6215 }, { "epoch": 0.18152633882970962, "grad_norm": 1.054394693247683, "learning_rate": 4.79056444150973e-05, "loss": 0.6195, "step": 6220 }, { "epoch": 0.1816722603239457, "grad_norm": 1.092947735961128, "learning_rate": 4.79010675990355e-05, "loss": 0.6957, "step": 6225 }, { "epoch": 0.18181818181818182, "grad_norm": 0.9145313085964767, "learning_rate": 4.789648603220409e-05, "loss": 0.6493, "step": 6230 }, { "epoch": 0.1819641033124179, "grad_norm": 1.1543108191792981, "learning_rate": 4.789189971566996e-05, "loss": 0.707, "step": 6235 }, { "epoch": 0.18211002480665403, "grad_norm": 0.9973475345644737, "learning_rate": 4.788730865050113e-05, "loss": 0.6055, "step": 6240 }, { "epoch": 0.18225594630089012, "grad_norm": 0.9964083712353639, "learning_rate": 4.788271283776671e-05, "loss": 0.6313, "step": 6245 }, { "epoch": 0.1824018677951262, "grad_norm": 1.1053482173325482, "learning_rate": 4.787811227853693e-05, "loss": 0.6362, "step": 6250 }, { "epoch": 0.18254778928936233, "grad_norm": 1.0657924676749686, "learning_rate": 4.78735069738831e-05, "loss": 0.6592, "step": 6255 }, { "epoch": 0.18269371078359842, "grad_norm": 1.3867094774485877, "learning_rate": 4.786889692487766e-05, "loss": 0.7252, "step": 6260 }, { "epoch": 0.18283963227783454, "grad_norm": 0.8749883791255879, "learning_rate": 4.7864282132594144e-05, "loss": 0.6059, "step": 6265 }, { "epoch": 0.18298555377207062, "grad_norm": 1.1091481714620315, "learning_rate": 4.7859662598107184e-05, "loss": 0.6656, "step": 6270 }, { "epoch": 0.18313147526630671, "grad_norm": 1.2251151464167234, "learning_rate": 4.785503832249255e-05, "loss": 0.6715, "step": 6275 }, { "epoch": 0.18327739676054283, "grad_norm": 1.3348698757094466, "learning_rate": 4.785040930682706e-05, "loss": 0.7539, "step": 6280 }, { "epoch": 0.18342331825477892, "grad_norm": 1.1331883741282, "learning_rate": 4.784577555218871e-05, "loss": 0.7123, "step": 6285 }, { "epoch": 0.18356923974901504, "grad_norm": 1.0607702341484464, "learning_rate": 4.7841137059656496e-05, "loss": 0.6456, "step": 6290 }, { "epoch": 0.18371516124325113, "grad_norm": 0.9179007088115045, "learning_rate": 4.783649383031063e-05, "loss": 0.6533, "step": 6295 }, { "epoch": 0.18386108273748722, "grad_norm": 0.928421946967105, "learning_rate": 4.783184586523235e-05, "loss": 0.6611, "step": 6300 }, { "epoch": 0.18400700423172334, "grad_norm": 0.9657871704618354, "learning_rate": 4.782719316550402e-05, "loss": 0.6562, "step": 6305 }, { "epoch": 0.18415292572595943, "grad_norm": 1.0250506981466307, "learning_rate": 4.7822535732209114e-05, "loss": 0.6743, "step": 6310 }, { "epoch": 0.18429884722019554, "grad_norm": 1.2547229483808064, "learning_rate": 4.781787356643221e-05, "loss": 0.6738, "step": 6315 }, { "epoch": 0.18444476871443163, "grad_norm": 1.053602139242692, "learning_rate": 4.781320666925897e-05, "loss": 0.6908, "step": 6320 }, { "epoch": 0.18459069020866772, "grad_norm": 1.1389212011012986, "learning_rate": 4.780853504177618e-05, "loss": 0.7353, "step": 6325 }, { "epoch": 0.18473661170290384, "grad_norm": 0.9306343862163826, "learning_rate": 4.780385868507169e-05, "loss": 0.578, "step": 6330 }, { "epoch": 0.18488253319713993, "grad_norm": 1.3112971974964946, "learning_rate": 4.77991776002345e-05, "loss": 0.7199, "step": 6335 }, { "epoch": 0.18502845469137605, "grad_norm": 0.9185205421190875, "learning_rate": 4.779449178835469e-05, "loss": 0.643, "step": 6340 }, { "epoch": 0.18517437618561214, "grad_norm": 0.9408501696544516, "learning_rate": 4.778980125052342e-05, "loss": 0.6195, "step": 6345 }, { "epoch": 0.18532029767984823, "grad_norm": 1.1435914700391108, "learning_rate": 4.778510598783298e-05, "loss": 0.7102, "step": 6350 }, { "epoch": 0.18546621917408435, "grad_norm": 1.0323140847819996, "learning_rate": 4.778040600137675e-05, "loss": 0.7061, "step": 6355 }, { "epoch": 0.18561214066832044, "grad_norm": 0.8508506708388412, "learning_rate": 4.7775701292249216e-05, "loss": 0.6398, "step": 6360 }, { "epoch": 0.18575806216255655, "grad_norm": 1.0913335807064852, "learning_rate": 4.777099186154596e-05, "loss": 0.6632, "step": 6365 }, { "epoch": 0.18590398365679264, "grad_norm": 1.1264675963207216, "learning_rate": 4.776627771036363e-05, "loss": 0.6836, "step": 6370 }, { "epoch": 0.18604990515102873, "grad_norm": 1.0901891942435533, "learning_rate": 4.776155883980004e-05, "loss": 0.6542, "step": 6375 }, { "epoch": 0.18619582664526485, "grad_norm": 0.8510195056285907, "learning_rate": 4.775683525095405e-05, "loss": 0.6037, "step": 6380 }, { "epoch": 0.18634174813950094, "grad_norm": 1.0702993445210793, "learning_rate": 4.775210694492563e-05, "loss": 0.6518, "step": 6385 }, { "epoch": 0.18648766963373706, "grad_norm": 1.0103717806419281, "learning_rate": 4.774737392281587e-05, "loss": 0.6251, "step": 6390 }, { "epoch": 0.18663359112797315, "grad_norm": 1.0444080874384256, "learning_rate": 4.774263618572693e-05, "loss": 0.6471, "step": 6395 }, { "epoch": 0.18677951262220926, "grad_norm": 1.077373469858686, "learning_rate": 4.7737893734762094e-05, "loss": 0.642, "step": 6400 }, { "epoch": 0.18692543411644535, "grad_norm": 1.2061842981349742, "learning_rate": 4.773314657102573e-05, "loss": 0.6482, "step": 6405 }, { "epoch": 0.18707135561068144, "grad_norm": 1.3391927614225956, "learning_rate": 4.772839469562328e-05, "loss": 0.6393, "step": 6410 }, { "epoch": 0.18721727710491756, "grad_norm": 1.0302767115260794, "learning_rate": 4.7723638109661326e-05, "loss": 0.6479, "step": 6415 }, { "epoch": 0.18736319859915365, "grad_norm": 1.0821221185157603, "learning_rate": 4.771887681424753e-05, "loss": 0.6549, "step": 6420 }, { "epoch": 0.18750912009338977, "grad_norm": 1.1196781487509881, "learning_rate": 4.771411081049065e-05, "loss": 0.6575, "step": 6425 }, { "epoch": 0.18765504158762586, "grad_norm": 1.096012458357204, "learning_rate": 4.770934009950052e-05, "loss": 0.7954, "step": 6430 }, { "epoch": 0.18780096308186195, "grad_norm": 1.054387235681322, "learning_rate": 4.770456468238811e-05, "loss": 0.6618, "step": 6435 }, { "epoch": 0.18794688457609807, "grad_norm": 1.1934403162734706, "learning_rate": 4.769978456026545e-05, "loss": 0.6832, "step": 6440 }, { "epoch": 0.18809280607033416, "grad_norm": 1.108957855217466, "learning_rate": 4.769499973424569e-05, "loss": 0.7001, "step": 6445 }, { "epoch": 0.18823872756457027, "grad_norm": 1.121477539021858, "learning_rate": 4.7690210205443064e-05, "loss": 0.5854, "step": 6450 }, { "epoch": 0.18838464905880636, "grad_norm": 0.963012984332486, "learning_rate": 4.76854159749729e-05, "loss": 0.6395, "step": 6455 }, { "epoch": 0.18853057055304245, "grad_norm": 0.9717360416316129, "learning_rate": 4.768061704395164e-05, "loss": 0.6392, "step": 6460 }, { "epoch": 0.18867649204727857, "grad_norm": 1.0446803016216542, "learning_rate": 4.7675813413496776e-05, "loss": 0.6067, "step": 6465 }, { "epoch": 0.18882241354151466, "grad_norm": 1.1289546660224659, "learning_rate": 4.767100508472694e-05, "loss": 0.6597, "step": 6470 }, { "epoch": 0.18896833503575078, "grad_norm": 1.0813879804760762, "learning_rate": 4.7666192058761846e-05, "loss": 0.5891, "step": 6475 }, { "epoch": 0.18911425652998687, "grad_norm": 0.9874927657928418, "learning_rate": 4.7661374336722295e-05, "loss": 0.6059, "step": 6480 }, { "epoch": 0.18926017802422296, "grad_norm": 1.1574181698748829, "learning_rate": 4.765655191973017e-05, "loss": 0.6947, "step": 6485 }, { "epoch": 0.18940609951845908, "grad_norm": 0.9236695678307092, "learning_rate": 4.765172480890847e-05, "loss": 0.6225, "step": 6490 }, { "epoch": 0.18955202101269517, "grad_norm": 1.1691949116014009, "learning_rate": 4.764689300538129e-05, "loss": 0.6598, "step": 6495 }, { "epoch": 0.18969794250693128, "grad_norm": 1.0012166008101588, "learning_rate": 4.764205651027378e-05, "loss": 0.659, "step": 6500 }, { "epoch": 0.18984386400116737, "grad_norm": 1.096608112673433, "learning_rate": 4.763721532471223e-05, "loss": 0.693, "step": 6505 }, { "epoch": 0.18998978549540346, "grad_norm": 1.0921466671057476, "learning_rate": 4.7632369449823985e-05, "loss": 0.6536, "step": 6510 }, { "epoch": 0.19013570698963958, "grad_norm": 1.0037785754982855, "learning_rate": 4.762751888673752e-05, "loss": 0.6204, "step": 6515 }, { "epoch": 0.19028162848387567, "grad_norm": 1.2000223857746146, "learning_rate": 4.762266363658236e-05, "loss": 0.6262, "step": 6520 }, { "epoch": 0.1904275499781118, "grad_norm": 0.9308168238781539, "learning_rate": 4.761780370048914e-05, "loss": 0.6649, "step": 6525 }, { "epoch": 0.19057347147234788, "grad_norm": 0.9283035334728622, "learning_rate": 4.761293907958959e-05, "loss": 0.653, "step": 6530 }, { "epoch": 0.19071939296658397, "grad_norm": 1.1500026947608422, "learning_rate": 4.760806977501654e-05, "loss": 0.6856, "step": 6535 }, { "epoch": 0.19086531446082008, "grad_norm": 0.9758282295107473, "learning_rate": 4.760319578790387e-05, "loss": 0.6762, "step": 6540 }, { "epoch": 0.19101123595505617, "grad_norm": 1.0090983716089659, "learning_rate": 4.759831711938661e-05, "loss": 0.6334, "step": 6545 }, { "epoch": 0.1911571574492923, "grad_norm": 1.0738045462103987, "learning_rate": 4.759343377060083e-05, "loss": 0.6486, "step": 6550 }, { "epoch": 0.19130307894352838, "grad_norm": 1.116726838877331, "learning_rate": 4.758854574268372e-05, "loss": 0.7294, "step": 6555 }, { "epoch": 0.19144900043776447, "grad_norm": 1.1269600840472502, "learning_rate": 4.758365303677354e-05, "loss": 0.6475, "step": 6560 }, { "epoch": 0.1915949219320006, "grad_norm": 1.1690961820421073, "learning_rate": 4.757875565400966e-05, "loss": 0.7045, "step": 6565 }, { "epoch": 0.19174084342623668, "grad_norm": 0.9105339143251346, "learning_rate": 4.7573853595532514e-05, "loss": 0.6553, "step": 6570 }, { "epoch": 0.1918867649204728, "grad_norm": 1.0393979860287295, "learning_rate": 4.756894686248364e-05, "loss": 0.6044, "step": 6575 }, { "epoch": 0.1920326864147089, "grad_norm": 1.0910726865267946, "learning_rate": 4.7564035456005665e-05, "loss": 0.6641, "step": 6580 }, { "epoch": 0.19217860790894498, "grad_norm": 1.0476567579804363, "learning_rate": 4.7559119377242314e-05, "loss": 0.678, "step": 6585 }, { "epoch": 0.1923245294031811, "grad_norm": 1.3435523023442613, "learning_rate": 4.755419862733836e-05, "loss": 0.5655, "step": 6590 }, { "epoch": 0.19247045089741718, "grad_norm": 1.1618468904465096, "learning_rate": 4.754927320743971e-05, "loss": 0.7504, "step": 6595 }, { "epoch": 0.1926163723916533, "grad_norm": 1.069578954625427, "learning_rate": 4.7544343118693333e-05, "loss": 0.6604, "step": 6600 }, { "epoch": 0.1927622938858894, "grad_norm": 1.1058686229663288, "learning_rate": 4.753940836224731e-05, "loss": 0.6901, "step": 6605 }, { "epoch": 0.19290821538012548, "grad_norm": 1.0441784519624024, "learning_rate": 4.7534468939250776e-05, "loss": 0.6507, "step": 6610 }, { "epoch": 0.1930541368743616, "grad_norm": 1.3172366548273295, "learning_rate": 4.752952485085396e-05, "loss": 0.7757, "step": 6615 }, { "epoch": 0.1932000583685977, "grad_norm": 1.0919544046884762, "learning_rate": 4.7524576098208195e-05, "loss": 0.6227, "step": 6620 }, { "epoch": 0.1933459798628338, "grad_norm": 1.011298643045648, "learning_rate": 4.7519622682465896e-05, "loss": 0.6433, "step": 6625 }, { "epoch": 0.1934919013570699, "grad_norm": 1.103939144420848, "learning_rate": 4.751466460478055e-05, "loss": 0.6967, "step": 6630 }, { "epoch": 0.19363782285130599, "grad_norm": 1.0199218675373252, "learning_rate": 4.750970186630674e-05, "loss": 0.6224, "step": 6635 }, { "epoch": 0.1937837443455421, "grad_norm": 0.9267471049802014, "learning_rate": 4.750473446820014e-05, "loss": 0.6635, "step": 6640 }, { "epoch": 0.1939296658397782, "grad_norm": 1.046856200619719, "learning_rate": 4.749976241161749e-05, "loss": 0.6996, "step": 6645 }, { "epoch": 0.1940755873340143, "grad_norm": 1.1076283860982237, "learning_rate": 4.7494785697716635e-05, "loss": 0.6673, "step": 6650 }, { "epoch": 0.1942215088282504, "grad_norm": 1.0289516170087116, "learning_rate": 4.748980432765648e-05, "loss": 0.6571, "step": 6655 }, { "epoch": 0.1943674303224865, "grad_norm": 1.383148541260663, "learning_rate": 4.7484818302597044e-05, "loss": 0.7467, "step": 6660 }, { "epoch": 0.1945133518167226, "grad_norm": 0.9981041575891282, "learning_rate": 4.747982762369942e-05, "loss": 0.6326, "step": 6665 }, { "epoch": 0.1946592733109587, "grad_norm": 1.015899829286423, "learning_rate": 4.747483229212577e-05, "loss": 0.6858, "step": 6670 }, { "epoch": 0.19480519480519481, "grad_norm": 1.0888391003400781, "learning_rate": 4.746983230903936e-05, "loss": 0.6497, "step": 6675 }, { "epoch": 0.1949511162994309, "grad_norm": 1.0169088233194108, "learning_rate": 4.746482767560452e-05, "loss": 0.6399, "step": 6680 }, { "epoch": 0.195097037793667, "grad_norm": 1.2240384293424118, "learning_rate": 4.7459818392986674e-05, "loss": 0.6296, "step": 6685 }, { "epoch": 0.1952429592879031, "grad_norm": 0.9764077789927863, "learning_rate": 4.745480446235233e-05, "loss": 0.6501, "step": 6690 }, { "epoch": 0.1953888807821392, "grad_norm": 1.031592025792826, "learning_rate": 4.7449785884869075e-05, "loss": 0.6349, "step": 6695 }, { "epoch": 0.19553480227637532, "grad_norm": 0.9722213567725236, "learning_rate": 4.744476266170558e-05, "loss": 0.6906, "step": 6700 }, { "epoch": 0.1956807237706114, "grad_norm": 0.9781752872530398, "learning_rate": 4.7439734794031594e-05, "loss": 0.6642, "step": 6705 }, { "epoch": 0.1958266452648475, "grad_norm": 0.9368228142924034, "learning_rate": 4.743470228301795e-05, "loss": 0.6062, "step": 6710 }, { "epoch": 0.19597256675908362, "grad_norm": 1.120891321215899, "learning_rate": 4.7429665129836566e-05, "loss": 0.68, "step": 6715 }, { "epoch": 0.1961184882533197, "grad_norm": 0.9755363423344646, "learning_rate": 4.742462333566043e-05, "loss": 0.6728, "step": 6720 }, { "epoch": 0.19626440974755582, "grad_norm": 0.9037010963358724, "learning_rate": 4.741957690166362e-05, "loss": 0.6124, "step": 6725 }, { "epoch": 0.1964103312417919, "grad_norm": 0.935710690771161, "learning_rate": 4.74145258290213e-05, "loss": 0.6664, "step": 6730 }, { "epoch": 0.196556252736028, "grad_norm": 0.9508973949324396, "learning_rate": 4.7409470118909696e-05, "loss": 0.6252, "step": 6735 }, { "epoch": 0.19670217423026412, "grad_norm": 1.1397935191116033, "learning_rate": 4.740440977250613e-05, "loss": 0.5876, "step": 6740 }, { "epoch": 0.1968480957245002, "grad_norm": 1.0865488555557214, "learning_rate": 4.7399344790989e-05, "loss": 0.6663, "step": 6745 }, { "epoch": 0.19699401721873633, "grad_norm": 0.9425440292226442, "learning_rate": 4.7394275175537786e-05, "loss": 0.6417, "step": 6750 }, { "epoch": 0.19713993871297242, "grad_norm": 1.3444134711085947, "learning_rate": 4.738920092733302e-05, "loss": 0.6242, "step": 6755 }, { "epoch": 0.19728586020720854, "grad_norm": 1.1784758621807747, "learning_rate": 4.738412204755636e-05, "loss": 0.6504, "step": 6760 }, { "epoch": 0.19743178170144463, "grad_norm": 1.0366145966340217, "learning_rate": 4.7379038537390504e-05, "loss": 0.6234, "step": 6765 }, { "epoch": 0.19757770319568072, "grad_norm": 1.3248750088146903, "learning_rate": 4.737395039801926e-05, "loss": 0.7027, "step": 6770 }, { "epoch": 0.19772362468991683, "grad_norm": 1.3379530734272604, "learning_rate": 4.736885763062747e-05, "loss": 0.6045, "step": 6775 }, { "epoch": 0.19786954618415292, "grad_norm": 0.9822153261271002, "learning_rate": 4.736376023640111e-05, "loss": 0.5932, "step": 6780 }, { "epoch": 0.19801546767838904, "grad_norm": 1.041987249695403, "learning_rate": 4.735865821652717e-05, "loss": 0.6688, "step": 6785 }, { "epoch": 0.19816138917262513, "grad_norm": 1.033502153320255, "learning_rate": 4.7353551572193774e-05, "loss": 0.6069, "step": 6790 }, { "epoch": 0.19830731066686122, "grad_norm": 1.2771916490689248, "learning_rate": 4.734844030459009e-05, "loss": 0.6547, "step": 6795 }, { "epoch": 0.19845323216109734, "grad_norm": 1.1844232423303866, "learning_rate": 4.734332441490638e-05, "loss": 0.6676, "step": 6800 }, { "epoch": 0.19859915365533343, "grad_norm": 1.115957162946092, "learning_rate": 4.733820390433397e-05, "loss": 0.7077, "step": 6805 }, { "epoch": 0.19874507514956954, "grad_norm": 1.0337584772295996, "learning_rate": 4.733307877406526e-05, "loss": 0.6067, "step": 6810 }, { "epoch": 0.19889099664380563, "grad_norm": 1.2126984061692443, "learning_rate": 4.732794902529375e-05, "loss": 0.7241, "step": 6815 }, { "epoch": 0.19903691813804172, "grad_norm": 1.0196398264549338, "learning_rate": 4.732281465921398e-05, "loss": 0.6566, "step": 6820 }, { "epoch": 0.19918283963227784, "grad_norm": 1.027466485874258, "learning_rate": 4.731767567702159e-05, "loss": 0.7184, "step": 6825 }, { "epoch": 0.19932876112651393, "grad_norm": 1.0345727192662957, "learning_rate": 4.731253207991329e-05, "loss": 0.6255, "step": 6830 }, { "epoch": 0.19947468262075005, "grad_norm": 0.9710338198528217, "learning_rate": 4.730738386908685e-05, "loss": 0.7239, "step": 6835 }, { "epoch": 0.19962060411498614, "grad_norm": 1.0840738938520926, "learning_rate": 4.7302231045741155e-05, "loss": 0.6311, "step": 6840 }, { "epoch": 0.19976652560922223, "grad_norm": 1.159372544076218, "learning_rate": 4.72970736110761e-05, "loss": 0.7027, "step": 6845 }, { "epoch": 0.19991244710345835, "grad_norm": 1.0581160310886049, "learning_rate": 4.729191156629272e-05, "loss": 0.6527, "step": 6850 }, { "epoch": 0.20005836859769444, "grad_norm": 1.042686197366578, "learning_rate": 4.728674491259308e-05, "loss": 0.6426, "step": 6855 }, { "epoch": 0.20020429009193055, "grad_norm": 0.9538219784456546, "learning_rate": 4.728157365118033e-05, "loss": 0.5846, "step": 6860 }, { "epoch": 0.20035021158616664, "grad_norm": 1.1410763823129053, "learning_rate": 4.7276397783258704e-05, "loss": 0.6514, "step": 6865 }, { "epoch": 0.20049613308040273, "grad_norm": 0.8994330418932087, "learning_rate": 4.72712173100335e-05, "loss": 0.638, "step": 6870 }, { "epoch": 0.20064205457463885, "grad_norm": 0.991636648260618, "learning_rate": 4.7266032232711066e-05, "loss": 0.648, "step": 6875 }, { "epoch": 0.20078797606887494, "grad_norm": 1.0711945313425744, "learning_rate": 4.726084255249887e-05, "loss": 0.6388, "step": 6880 }, { "epoch": 0.20093389756311106, "grad_norm": 1.0736267645967408, "learning_rate": 4.725564827060541e-05, "loss": 0.624, "step": 6885 }, { "epoch": 0.20107981905734715, "grad_norm": 1.1152268552481455, "learning_rate": 4.725044938824028e-05, "loss": 0.6922, "step": 6890 }, { "epoch": 0.20122574055158324, "grad_norm": 1.070326805658138, "learning_rate": 4.7245245906614135e-05, "loss": 0.6443, "step": 6895 }, { "epoch": 0.20137166204581936, "grad_norm": 1.212511864940288, "learning_rate": 4.724003782693871e-05, "loss": 0.633, "step": 6900 }, { "epoch": 0.20151758354005544, "grad_norm": 1.0059937543670032, "learning_rate": 4.723482515042679e-05, "loss": 0.6335, "step": 6905 }, { "epoch": 0.20166350503429156, "grad_norm": 1.038829969386479, "learning_rate": 4.722960787829225e-05, "loss": 0.6223, "step": 6910 }, { "epoch": 0.20180942652852765, "grad_norm": 1.0160669391493726, "learning_rate": 4.7224386011750024e-05, "loss": 0.6163, "step": 6915 }, { "epoch": 0.20195534802276374, "grad_norm": 1.039408920648934, "learning_rate": 4.7219159552016133e-05, "loss": 0.7062, "step": 6920 }, { "epoch": 0.20210126951699986, "grad_norm": 0.9427194393697952, "learning_rate": 4.721392850030765e-05, "loss": 0.6896, "step": 6925 }, { "epoch": 0.20224719101123595, "grad_norm": 1.1679738392459498, "learning_rate": 4.720869285784272e-05, "loss": 0.6946, "step": 6930 }, { "epoch": 0.20239311250547207, "grad_norm": 0.9391318151626903, "learning_rate": 4.720345262584056e-05, "loss": 0.6924, "step": 6935 }, { "epoch": 0.20253903399970816, "grad_norm": 0.9588551078489763, "learning_rate": 4.719820780552146e-05, "loss": 0.6628, "step": 6940 }, { "epoch": 0.20268495549394425, "grad_norm": 0.8749889989565968, "learning_rate": 4.719295839810678e-05, "loss": 0.6018, "step": 6945 }, { "epoch": 0.20283087698818036, "grad_norm": 0.9398262452419113, "learning_rate": 4.718770440481892e-05, "loss": 0.669, "step": 6950 }, { "epoch": 0.20297679848241645, "grad_norm": 1.1493432072613248, "learning_rate": 4.71824458268814e-05, "loss": 0.6922, "step": 6955 }, { "epoch": 0.20312271997665257, "grad_norm": 0.9226450378933662, "learning_rate": 4.717718266551876e-05, "loss": 0.65, "step": 6960 }, { "epoch": 0.20326864147088866, "grad_norm": 0.9894497141437163, "learning_rate": 4.717191492195662e-05, "loss": 0.7219, "step": 6965 }, { "epoch": 0.20341456296512475, "grad_norm": 1.1702694357766992, "learning_rate": 4.7166642597421694e-05, "loss": 0.6847, "step": 6970 }, { "epoch": 0.20356048445936087, "grad_norm": 1.0133427838030136, "learning_rate": 4.716136569314173e-05, "loss": 0.6474, "step": 6975 }, { "epoch": 0.20370640595359696, "grad_norm": 1.1317470783411034, "learning_rate": 4.715608421034554e-05, "loss": 0.6391, "step": 6980 }, { "epoch": 0.20385232744783308, "grad_norm": 1.3146596701037883, "learning_rate": 4.715079815026304e-05, "loss": 0.711, "step": 6985 }, { "epoch": 0.20399824894206917, "grad_norm": 1.047542170006923, "learning_rate": 4.714550751412517e-05, "loss": 0.6728, "step": 6990 }, { "epoch": 0.20414417043630526, "grad_norm": 1.1227977061211136, "learning_rate": 4.714021230316396e-05, "loss": 0.6782, "step": 6995 }, { "epoch": 0.20429009193054137, "grad_norm": 0.8566714275650146, "learning_rate": 4.7134912518612505e-05, "loss": 0.6473, "step": 7000 }, { "epoch": 0.20443601342477746, "grad_norm": 1.1670620875905955, "learning_rate": 4.7129608161704944e-05, "loss": 0.6618, "step": 7005 }, { "epoch": 0.20458193491901358, "grad_norm": 1.169494280289647, "learning_rate": 4.7124299233676505e-05, "loss": 0.7274, "step": 7010 }, { "epoch": 0.20472785641324967, "grad_norm": 0.9880420599646522, "learning_rate": 4.7118985735763473e-05, "loss": 0.6801, "step": 7015 }, { "epoch": 0.20487377790748576, "grad_norm": 1.0748091866208749, "learning_rate": 4.71136676692032e-05, "loss": 0.637, "step": 7020 }, { "epoch": 0.20501969940172188, "grad_norm": 1.0359670441881943, "learning_rate": 4.710834503523408e-05, "loss": 0.6444, "step": 7025 }, { "epoch": 0.20516562089595797, "grad_norm": 1.0576428276181502, "learning_rate": 4.7103017835095596e-05, "loss": 0.6065, "step": 7030 }, { "epoch": 0.20531154239019409, "grad_norm": 1.050881756544358, "learning_rate": 4.709768607002828e-05, "loss": 0.6332, "step": 7035 }, { "epoch": 0.20545746388443017, "grad_norm": 1.0588481059191026, "learning_rate": 4.709234974127376e-05, "loss": 0.6563, "step": 7040 }, { "epoch": 0.20560338537866626, "grad_norm": 1.0876354510122306, "learning_rate": 4.708700885007466e-05, "loss": 0.6265, "step": 7045 }, { "epoch": 0.20574930687290238, "grad_norm": 0.9494208042655983, "learning_rate": 4.7081663397674744e-05, "loss": 0.6197, "step": 7050 }, { "epoch": 0.20589522836713847, "grad_norm": 1.0264984709933391, "learning_rate": 4.7076313385318775e-05, "loss": 0.6334, "step": 7055 }, { "epoch": 0.2060411498613746, "grad_norm": 1.087782420699243, "learning_rate": 4.707095881425261e-05, "loss": 0.6535, "step": 7060 }, { "epoch": 0.20618707135561068, "grad_norm": 1.1475040738201219, "learning_rate": 4.706559968572317e-05, "loss": 0.6668, "step": 7065 }, { "epoch": 0.20633299284984677, "grad_norm": 1.0117981738040611, "learning_rate": 4.706023600097842e-05, "loss": 0.666, "step": 7070 }, { "epoch": 0.2064789143440829, "grad_norm": 0.9885571687448587, "learning_rate": 4.705486776126738e-05, "loss": 0.6268, "step": 7075 }, { "epoch": 0.20662483583831898, "grad_norm": 1.323306997951047, "learning_rate": 4.704949496784017e-05, "loss": 0.6735, "step": 7080 }, { "epoch": 0.2067707573325551, "grad_norm": 1.074861145612746, "learning_rate": 4.7044117621947933e-05, "loss": 0.6723, "step": 7085 }, { "epoch": 0.20691667882679118, "grad_norm": 1.0981060487750376, "learning_rate": 4.703873572484288e-05, "loss": 0.6272, "step": 7090 }, { "epoch": 0.20706260032102727, "grad_norm": 0.9059518698618659, "learning_rate": 4.70333492777783e-05, "loss": 0.673, "step": 7095 }, { "epoch": 0.2072085218152634, "grad_norm": 1.1167295448443404, "learning_rate": 4.7027958282008516e-05, "loss": 0.643, "step": 7100 }, { "epoch": 0.20735444330949948, "grad_norm": 1.1798667190916945, "learning_rate": 4.702256273878892e-05, "loss": 0.6635, "step": 7105 }, { "epoch": 0.2075003648037356, "grad_norm": 1.1848115007393785, "learning_rate": 4.701716264937598e-05, "loss": 0.6926, "step": 7110 }, { "epoch": 0.2076462862979717, "grad_norm": 0.9503304612994011, "learning_rate": 4.70117580150272e-05, "loss": 0.6511, "step": 7115 }, { "epoch": 0.2077922077922078, "grad_norm": 1.0693049133378056, "learning_rate": 4.7006348837001145e-05, "loss": 0.6802, "step": 7120 }, { "epoch": 0.2079381292864439, "grad_norm": 0.9821430157861724, "learning_rate": 4.700093511655745e-05, "loss": 0.6213, "step": 7125 }, { "epoch": 0.20808405078067999, "grad_norm": 1.1652465809060788, "learning_rate": 4.6995516854956796e-05, "loss": 0.6685, "step": 7130 }, { "epoch": 0.2082299722749161, "grad_norm": 1.0341241783327564, "learning_rate": 4.6990094053460924e-05, "loss": 0.6053, "step": 7135 }, { "epoch": 0.2083758937691522, "grad_norm": 1.1164556157591872, "learning_rate": 4.698466671333264e-05, "loss": 0.707, "step": 7140 }, { "epoch": 0.2085218152633883, "grad_norm": 1.2647222430466085, "learning_rate": 4.69792348358358e-05, "loss": 0.6888, "step": 7145 }, { "epoch": 0.2086677367576244, "grad_norm": 1.0699456711868018, "learning_rate": 4.697379842223532e-05, "loss": 0.6718, "step": 7150 }, { "epoch": 0.2088136582518605, "grad_norm": 1.0503008123369013, "learning_rate": 4.6968357473797174e-05, "loss": 0.6629, "step": 7155 }, { "epoch": 0.2089595797460966, "grad_norm": 1.0033675388866816, "learning_rate": 4.696291199178838e-05, "loss": 0.6922, "step": 7160 }, { "epoch": 0.2091055012403327, "grad_norm": 1.0364023713431059, "learning_rate": 4.695746197747702e-05, "loss": 0.6557, "step": 7165 }, { "epoch": 0.20925142273456881, "grad_norm": 1.0745685251413466, "learning_rate": 4.695200743213224e-05, "loss": 0.6962, "step": 7170 }, { "epoch": 0.2093973442288049, "grad_norm": 1.0447779279640634, "learning_rate": 4.6946548357024224e-05, "loss": 0.65, "step": 7175 }, { "epoch": 0.209543265723041, "grad_norm": 1.0173300390857254, "learning_rate": 4.6941084753424226e-05, "loss": 0.658, "step": 7180 }, { "epoch": 0.2096891872172771, "grad_norm": 1.139286188217955, "learning_rate": 4.693561662260454e-05, "loss": 0.6968, "step": 7185 }, { "epoch": 0.2098351087115132, "grad_norm": 1.0910424147480646, "learning_rate": 4.693014396583854e-05, "loss": 0.6471, "step": 7190 }, { "epoch": 0.20998103020574932, "grad_norm": 1.0281749584360789, "learning_rate": 4.692466678440061e-05, "loss": 0.6242, "step": 7195 }, { "epoch": 0.2101269516999854, "grad_norm": 0.9279945676590206, "learning_rate": 4.691918507956624e-05, "loss": 0.6891, "step": 7200 }, { "epoch": 0.2102728731942215, "grad_norm": 1.0985422939099756, "learning_rate": 4.691369885261193e-05, "loss": 0.6215, "step": 7205 }, { "epoch": 0.21041879468845762, "grad_norm": 1.0573035147819643, "learning_rate": 4.690820810481525e-05, "loss": 0.6838, "step": 7210 }, { "epoch": 0.2105647161826937, "grad_norm": 0.9201826896755473, "learning_rate": 4.690271283745484e-05, "loss": 0.598, "step": 7215 }, { "epoch": 0.21071063767692982, "grad_norm": 1.029101847571502, "learning_rate": 4.689721305181037e-05, "loss": 0.6384, "step": 7220 }, { "epoch": 0.2108565591711659, "grad_norm": 0.9370642598255721, "learning_rate": 4.689170874916255e-05, "loss": 0.6642, "step": 7225 }, { "epoch": 0.211002480665402, "grad_norm": 0.9698462818170229, "learning_rate": 4.688619993079318e-05, "loss": 0.6149, "step": 7230 }, { "epoch": 0.21114840215963812, "grad_norm": 1.0293263506971218, "learning_rate": 4.688068659798508e-05, "loss": 0.6537, "step": 7235 }, { "epoch": 0.2112943236538742, "grad_norm": 0.9575458990738401, "learning_rate": 4.687516875202213e-05, "loss": 0.6693, "step": 7240 }, { "epoch": 0.21144024514811033, "grad_norm": 1.24600445598782, "learning_rate": 4.6869646394189274e-05, "loss": 0.6387, "step": 7245 }, { "epoch": 0.21158616664234642, "grad_norm": 0.9455277141074121, "learning_rate": 4.6864119525772496e-05, "loss": 0.6646, "step": 7250 }, { "epoch": 0.2117320881365825, "grad_norm": 0.9425085597144862, "learning_rate": 4.685858814805883e-05, "loss": 0.6479, "step": 7255 }, { "epoch": 0.21187800963081863, "grad_norm": 1.0950647941401213, "learning_rate": 4.685305226233635e-05, "loss": 0.7046, "step": 7260 }, { "epoch": 0.21202393112505472, "grad_norm": 1.069777006581796, "learning_rate": 4.68475118698942e-05, "loss": 0.676, "step": 7265 }, { "epoch": 0.21216985261929083, "grad_norm": 1.013498824900132, "learning_rate": 4.684196697202256e-05, "loss": 0.7036, "step": 7270 }, { "epoch": 0.21231577411352692, "grad_norm": 1.021137698202152, "learning_rate": 4.683641757001266e-05, "loss": 0.7009, "step": 7275 }, { "epoch": 0.212461695607763, "grad_norm": 1.0145919802847734, "learning_rate": 4.683086366515679e-05, "loss": 0.7158, "step": 7280 }, { "epoch": 0.21260761710199913, "grad_norm": 1.0189317326825382, "learning_rate": 4.682530525874828e-05, "loss": 0.6372, "step": 7285 }, { "epoch": 0.21275353859623522, "grad_norm": 0.9927991164962339, "learning_rate": 4.68197423520815e-05, "loss": 0.6601, "step": 7290 }, { "epoch": 0.21289946009047134, "grad_norm": 1.06097897855751, "learning_rate": 4.6814174946451886e-05, "loss": 0.6187, "step": 7295 }, { "epoch": 0.21304538158470743, "grad_norm": 1.1754294225562802, "learning_rate": 4.680860304315591e-05, "loss": 0.6299, "step": 7300 }, { "epoch": 0.21319130307894352, "grad_norm": 1.0015192944093279, "learning_rate": 4.680302664349109e-05, "loss": 0.6567, "step": 7305 }, { "epoch": 0.21333722457317963, "grad_norm": 1.1631806279649357, "learning_rate": 4.6797445748755994e-05, "loss": 0.6114, "step": 7310 }, { "epoch": 0.21348314606741572, "grad_norm": 0.9092795358625102, "learning_rate": 4.679186036025025e-05, "loss": 0.6191, "step": 7315 }, { "epoch": 0.21362906756165184, "grad_norm": 1.1668756863865415, "learning_rate": 4.67862704792745e-05, "loss": 0.6784, "step": 7320 }, { "epoch": 0.21377498905588793, "grad_norm": 0.8366312586188658, "learning_rate": 4.678067610713046e-05, "loss": 0.6352, "step": 7325 }, { "epoch": 0.21392091055012402, "grad_norm": 0.9769138828210789, "learning_rate": 4.6775077245120895e-05, "loss": 0.6489, "step": 7330 }, { "epoch": 0.21406683204436014, "grad_norm": 1.0710505378380875, "learning_rate": 4.67694738945496e-05, "loss": 0.6602, "step": 7335 }, { "epoch": 0.21421275353859623, "grad_norm": 0.9565802930587681, "learning_rate": 4.676386605672141e-05, "loss": 0.6517, "step": 7340 }, { "epoch": 0.21435867503283235, "grad_norm": 1.082336256883279, "learning_rate": 4.6758253732942225e-05, "loss": 0.7029, "step": 7345 }, { "epoch": 0.21450459652706844, "grad_norm": 1.1233606213372693, "learning_rate": 4.675263692451897e-05, "loss": 0.6742, "step": 7350 }, { "epoch": 0.21465051802130453, "grad_norm": 0.9118210230912315, "learning_rate": 4.674701563275963e-05, "loss": 0.6802, "step": 7355 }, { "epoch": 0.21479643951554064, "grad_norm": 0.9407238998155552, "learning_rate": 4.674138985897323e-05, "loss": 0.6252, "step": 7360 }, { "epoch": 0.21494236100977673, "grad_norm": 1.0670195978737167, "learning_rate": 4.673575960446983e-05, "loss": 0.6757, "step": 7365 }, { "epoch": 0.21508828250401285, "grad_norm": 0.9390137390057487, "learning_rate": 4.673012487056054e-05, "loss": 0.5942, "step": 7370 }, { "epoch": 0.21523420399824894, "grad_norm": 0.9688860667326249, "learning_rate": 4.672448565855753e-05, "loss": 0.6816, "step": 7375 }, { "epoch": 0.21538012549248503, "grad_norm": 1.1335265174271185, "learning_rate": 4.6718841969773974e-05, "loss": 0.6708, "step": 7380 }, { "epoch": 0.21552604698672115, "grad_norm": 0.8345896703669826, "learning_rate": 4.671319380552412e-05, "loss": 0.6382, "step": 7385 }, { "epoch": 0.21567196848095724, "grad_norm": 0.8908583081029197, "learning_rate": 4.6707541167123245e-05, "loss": 0.6445, "step": 7390 }, { "epoch": 0.21581788997519336, "grad_norm": 0.9177576348937473, "learning_rate": 4.670188405588768e-05, "loss": 0.683, "step": 7395 }, { "epoch": 0.21596381146942945, "grad_norm": 1.1292408023159637, "learning_rate": 4.669622247313478e-05, "loss": 0.6793, "step": 7400 }, { "epoch": 0.21610973296366554, "grad_norm": 1.093415973367532, "learning_rate": 4.669055642018294e-05, "loss": 0.6948, "step": 7405 }, { "epoch": 0.21625565445790165, "grad_norm": 1.0503185924329987, "learning_rate": 4.6684885898351634e-05, "loss": 0.6216, "step": 7410 }, { "epoch": 0.21640157595213774, "grad_norm": 0.9387566942021225, "learning_rate": 4.667921090896133e-05, "loss": 0.6928, "step": 7415 }, { "epoch": 0.21654749744637386, "grad_norm": 0.9598127539859136, "learning_rate": 4.6673531453333556e-05, "loss": 0.6502, "step": 7420 }, { "epoch": 0.21669341894060995, "grad_norm": 1.2961967017194649, "learning_rate": 4.666784753279089e-05, "loss": 0.6072, "step": 7425 }, { "epoch": 0.21683934043484604, "grad_norm": 1.038007380448776, "learning_rate": 4.666215914865692e-05, "loss": 0.6937, "step": 7430 }, { "epoch": 0.21698526192908216, "grad_norm": 0.8714575714025019, "learning_rate": 4.665646630225631e-05, "loss": 0.6275, "step": 7435 }, { "epoch": 0.21713118342331825, "grad_norm": 1.0941428131078197, "learning_rate": 4.6650768994914745e-05, "loss": 0.6476, "step": 7440 }, { "epoch": 0.21727710491755436, "grad_norm": 1.2795917803982988, "learning_rate": 4.6645067227958936e-05, "loss": 0.6831, "step": 7445 }, { "epoch": 0.21742302641179045, "grad_norm": 1.1606057579949522, "learning_rate": 4.6639361002716664e-05, "loss": 0.6281, "step": 7450 }, { "epoch": 0.21756894790602654, "grad_norm": 1.0364768107140152, "learning_rate": 4.6633650320516715e-05, "loss": 0.6391, "step": 7455 }, { "epoch": 0.21771486940026266, "grad_norm": 1.0330098649566157, "learning_rate": 4.662793518268893e-05, "loss": 0.5885, "step": 7460 }, { "epoch": 0.21786079089449875, "grad_norm": 1.1419313769732622, "learning_rate": 4.6622215590564195e-05, "loss": 0.6259, "step": 7465 }, { "epoch": 0.21800671238873487, "grad_norm": 1.0983366181806211, "learning_rate": 4.661649154547441e-05, "loss": 0.6039, "step": 7470 }, { "epoch": 0.21815263388297096, "grad_norm": 0.9894326338066628, "learning_rate": 4.6610763048752536e-05, "loss": 0.6497, "step": 7475 }, { "epoch": 0.21829855537720708, "grad_norm": 0.9945505031730846, "learning_rate": 4.660503010173257e-05, "loss": 0.6227, "step": 7480 }, { "epoch": 0.21844447687144317, "grad_norm": 1.0310615789054935, "learning_rate": 4.659929270574951e-05, "loss": 0.6831, "step": 7485 }, { "epoch": 0.21859039836567926, "grad_norm": 1.0852740994889694, "learning_rate": 4.659355086213943e-05, "loss": 0.6238, "step": 7490 }, { "epoch": 0.21873631985991537, "grad_norm": 1.0804409621228588, "learning_rate": 4.658780457223943e-05, "loss": 0.6445, "step": 7495 }, { "epoch": 0.21888224135415146, "grad_norm": 1.2114396661840598, "learning_rate": 4.658205383738763e-05, "loss": 0.7118, "step": 7500 }, { "epoch": 0.21902816284838758, "grad_norm": 1.136607885035878, "learning_rate": 4.6576298658923204e-05, "loss": 0.6585, "step": 7505 }, { "epoch": 0.21917408434262367, "grad_norm": 0.9593866783911533, "learning_rate": 4.6570539038186364e-05, "loss": 0.6273, "step": 7510 }, { "epoch": 0.21932000583685976, "grad_norm": 1.2315478632914256, "learning_rate": 4.656477497651831e-05, "loss": 0.6671, "step": 7515 }, { "epoch": 0.21946592733109588, "grad_norm": 1.0103810708300325, "learning_rate": 4.6559006475261346e-05, "loss": 0.6908, "step": 7520 }, { "epoch": 0.21961184882533197, "grad_norm": 1.0459683956658206, "learning_rate": 4.655323353575876e-05, "loss": 0.6262, "step": 7525 }, { "epoch": 0.21975777031956809, "grad_norm": 1.0178379259190358, "learning_rate": 4.65474561593549e-05, "loss": 0.685, "step": 7530 }, { "epoch": 0.21990369181380418, "grad_norm": 0.9983536592704769, "learning_rate": 4.654167434739512e-05, "loss": 0.6595, "step": 7535 }, { "epoch": 0.22004961330804026, "grad_norm": 1.0708320003311147, "learning_rate": 4.653588810122583e-05, "loss": 0.7093, "step": 7540 }, { "epoch": 0.22019553480227638, "grad_norm": 1.3927245501382268, "learning_rate": 4.653009742219446e-05, "loss": 0.6493, "step": 7545 }, { "epoch": 0.22034145629651247, "grad_norm": 1.0596785589390862, "learning_rate": 4.65243023116495e-05, "loss": 0.6508, "step": 7550 }, { "epoch": 0.2204873777907486, "grad_norm": 0.9104080685412203, "learning_rate": 4.651850277094042e-05, "loss": 0.5673, "step": 7555 }, { "epoch": 0.22063329928498468, "grad_norm": 1.124816621662688, "learning_rate": 4.6512698801417764e-05, "loss": 0.6861, "step": 7560 }, { "epoch": 0.22077922077922077, "grad_norm": 0.9569717256213983, "learning_rate": 4.65068904044331e-05, "loss": 0.6211, "step": 7565 }, { "epoch": 0.2209251422734569, "grad_norm": 0.9835127678564364, "learning_rate": 4.650107758133902e-05, "loss": 0.6003, "step": 7570 }, { "epoch": 0.22107106376769298, "grad_norm": 0.9500234760155296, "learning_rate": 4.649526033348914e-05, "loss": 0.6293, "step": 7575 }, { "epoch": 0.2212169852619291, "grad_norm": 1.1420086690552729, "learning_rate": 4.648943866223813e-05, "loss": 0.6669, "step": 7580 }, { "epoch": 0.22136290675616518, "grad_norm": 1.105151708378207, "learning_rate": 4.648361256894165e-05, "loss": 0.6936, "step": 7585 }, { "epoch": 0.22150882825040127, "grad_norm": 1.4111895099639133, "learning_rate": 4.6477782054956434e-05, "loss": 0.6792, "step": 7590 }, { "epoch": 0.2216547497446374, "grad_norm": 0.9960524177161135, "learning_rate": 4.647194712164023e-05, "loss": 0.6881, "step": 7595 }, { "epoch": 0.22180067123887348, "grad_norm": 1.0727881848830199, "learning_rate": 4.6466107770351805e-05, "loss": 0.6765, "step": 7600 }, { "epoch": 0.2219465927331096, "grad_norm": 0.9493967287522872, "learning_rate": 4.6460264002450955e-05, "loss": 0.6388, "step": 7605 }, { "epoch": 0.2220925142273457, "grad_norm": 1.0857764231493143, "learning_rate": 4.645441581929851e-05, "loss": 0.6019, "step": 7610 }, { "epoch": 0.22223843572158178, "grad_norm": 1.1562511495727308, "learning_rate": 4.6448563222256336e-05, "loss": 0.6299, "step": 7615 }, { "epoch": 0.2223843572158179, "grad_norm": 0.904999610580651, "learning_rate": 4.644270621268732e-05, "loss": 0.6241, "step": 7620 }, { "epoch": 0.22253027871005399, "grad_norm": 1.0347466023762502, "learning_rate": 4.6436844791955364e-05, "loss": 0.5937, "step": 7625 }, { "epoch": 0.2226762002042901, "grad_norm": 1.0193732613943782, "learning_rate": 4.6430978961425434e-05, "loss": 0.6303, "step": 7630 }, { "epoch": 0.2228221216985262, "grad_norm": 0.9735957266428596, "learning_rate": 4.642510872246347e-05, "loss": 0.597, "step": 7635 }, { "epoch": 0.22296804319276228, "grad_norm": 1.1488660874717775, "learning_rate": 4.641923407643648e-05, "loss": 0.5677, "step": 7640 }, { "epoch": 0.2231139646869984, "grad_norm": 0.9736238729284256, "learning_rate": 4.641335502471249e-05, "loss": 0.6511, "step": 7645 }, { "epoch": 0.2232598861812345, "grad_norm": 1.127096262411377, "learning_rate": 4.6407471568660536e-05, "loss": 0.7143, "step": 7650 }, { "epoch": 0.2234058076754706, "grad_norm": 1.2861637996060618, "learning_rate": 4.6401583709650695e-05, "loss": 0.6601, "step": 7655 }, { "epoch": 0.2235517291697067, "grad_norm": 1.1626155280170258, "learning_rate": 4.639569144905407e-05, "loss": 0.5874, "step": 7660 }, { "epoch": 0.2236976506639428, "grad_norm": 1.1450179666918712, "learning_rate": 4.638979478824279e-05, "loss": 0.6883, "step": 7665 }, { "epoch": 0.2238435721581789, "grad_norm": 1.0276036788467968, "learning_rate": 4.6383893728589975e-05, "loss": 0.6587, "step": 7670 }, { "epoch": 0.223989493652415, "grad_norm": 0.893358429157451, "learning_rate": 4.637798827146982e-05, "loss": 0.634, "step": 7675 }, { "epoch": 0.2241354151466511, "grad_norm": 1.2468015038346163, "learning_rate": 4.637207841825751e-05, "loss": 0.6122, "step": 7680 }, { "epoch": 0.2242813366408872, "grad_norm": 1.0105129161254078, "learning_rate": 4.6366164170329284e-05, "loss": 0.6172, "step": 7685 }, { "epoch": 0.2244272581351233, "grad_norm": 0.9546960535958109, "learning_rate": 4.6360245529062356e-05, "loss": 0.6712, "step": 7690 }, { "epoch": 0.2245731796293594, "grad_norm": 0.9246250406019902, "learning_rate": 4.635432249583502e-05, "loss": 0.6112, "step": 7695 }, { "epoch": 0.2247191011235955, "grad_norm": 0.961446412475409, "learning_rate": 4.634839507202654e-05, "loss": 0.6771, "step": 7700 }, { "epoch": 0.22486502261783162, "grad_norm": 1.1360914160925013, "learning_rate": 4.634246325901725e-05, "loss": 0.6564, "step": 7705 }, { "epoch": 0.2250109441120677, "grad_norm": 1.0333366042083656, "learning_rate": 4.633652705818847e-05, "loss": 0.6196, "step": 7710 }, { "epoch": 0.2251568656063038, "grad_norm": 1.1391768680085324, "learning_rate": 4.633058647092256e-05, "loss": 0.623, "step": 7715 }, { "epoch": 0.22530278710053991, "grad_norm": 1.0248908811314938, "learning_rate": 4.632464149860289e-05, "loss": 0.6221, "step": 7720 }, { "epoch": 0.225448708594776, "grad_norm": 0.9108781939269617, "learning_rate": 4.631869214261386e-05, "loss": 0.661, "step": 7725 }, { "epoch": 0.22559463008901212, "grad_norm": 1.0285466781606194, "learning_rate": 4.631273840434091e-05, "loss": 0.6735, "step": 7730 }, { "epoch": 0.2257405515832482, "grad_norm": 1.2330159732465003, "learning_rate": 4.6306780285170434e-05, "loss": 0.6234, "step": 7735 }, { "epoch": 0.2258864730774843, "grad_norm": 1.1110321228740665, "learning_rate": 4.630081778648994e-05, "loss": 0.6139, "step": 7740 }, { "epoch": 0.22603239457172042, "grad_norm": 1.0018857334264872, "learning_rate": 4.629485090968788e-05, "loss": 0.7078, "step": 7745 }, { "epoch": 0.2261783160659565, "grad_norm": 1.1962417503635852, "learning_rate": 4.6288879656153746e-05, "loss": 0.6401, "step": 7750 }, { "epoch": 0.22632423756019263, "grad_norm": 1.5167026681710511, "learning_rate": 4.6282904027278074e-05, "loss": 0.703, "step": 7755 }, { "epoch": 0.22647015905442872, "grad_norm": 1.158930895185318, "learning_rate": 4.62769240244524e-05, "loss": 0.6592, "step": 7760 }, { "epoch": 0.2266160805486648, "grad_norm": 0.9992968381378073, "learning_rate": 4.627093964906927e-05, "loss": 0.6375, "step": 7765 }, { "epoch": 0.22676200204290092, "grad_norm": 0.8744079986899453, "learning_rate": 4.6264950902522253e-05, "loss": 0.6297, "step": 7770 }, { "epoch": 0.226907923537137, "grad_norm": 1.0743209051150564, "learning_rate": 4.625895778620597e-05, "loss": 0.6679, "step": 7775 }, { "epoch": 0.22705384503137313, "grad_norm": 0.9659038976755095, "learning_rate": 4.6252960301515984e-05, "loss": 0.5621, "step": 7780 }, { "epoch": 0.22719976652560922, "grad_norm": 1.2384217917337363, "learning_rate": 4.624695844984895e-05, "loss": 0.6708, "step": 7785 }, { "epoch": 0.2273456880198453, "grad_norm": 1.0826045097940296, "learning_rate": 4.624095223260253e-05, "loss": 0.6473, "step": 7790 }, { "epoch": 0.22749160951408143, "grad_norm": 0.9188450931386862, "learning_rate": 4.623494165117535e-05, "loss": 0.6237, "step": 7795 }, { "epoch": 0.22763753100831752, "grad_norm": 0.9859928298860685, "learning_rate": 4.622892670696708e-05, "loss": 0.6464, "step": 7800 }, { "epoch": 0.22778345250255363, "grad_norm": 1.1669358062041992, "learning_rate": 4.622290740137845e-05, "loss": 0.6264, "step": 7805 }, { "epoch": 0.22792937399678972, "grad_norm": 1.011989914026944, "learning_rate": 4.6216883735811146e-05, "loss": 0.6276, "step": 7810 }, { "epoch": 0.22807529549102581, "grad_norm": 0.9732925583985184, "learning_rate": 4.6210855711667885e-05, "loss": 0.6405, "step": 7815 }, { "epoch": 0.22822121698526193, "grad_norm": 0.9784160267226092, "learning_rate": 4.620482333035241e-05, "loss": 0.6491, "step": 7820 }, { "epoch": 0.22836713847949802, "grad_norm": 0.950981513343793, "learning_rate": 4.619878659326948e-05, "loss": 0.6567, "step": 7825 }, { "epoch": 0.22851305997373414, "grad_norm": 0.9936534018963062, "learning_rate": 4.6192745501824865e-05, "loss": 0.6688, "step": 7830 }, { "epoch": 0.22865898146797023, "grad_norm": 0.9694163741131531, "learning_rate": 4.618670005742533e-05, "loss": 0.5973, "step": 7835 }, { "epoch": 0.22880490296220635, "grad_norm": 1.2899515244708895, "learning_rate": 4.618065026147868e-05, "loss": 0.6725, "step": 7840 }, { "epoch": 0.22895082445644244, "grad_norm": 1.005719952855351, "learning_rate": 4.617459611539372e-05, "loss": 0.6579, "step": 7845 }, { "epoch": 0.22909674595067853, "grad_norm": 0.8829964980674914, "learning_rate": 4.616853762058028e-05, "loss": 0.6145, "step": 7850 }, { "epoch": 0.22924266744491464, "grad_norm": 0.8873918242522172, "learning_rate": 4.6162474778449184e-05, "loss": 0.5489, "step": 7855 }, { "epoch": 0.22938858893915073, "grad_norm": 1.0776994625416034, "learning_rate": 4.6156407590412285e-05, "loss": 0.6575, "step": 7860 }, { "epoch": 0.22953451043338685, "grad_norm": 1.1758551600791651, "learning_rate": 4.615033605788242e-05, "loss": 0.6575, "step": 7865 }, { "epoch": 0.22968043192762294, "grad_norm": 0.8689326529679396, "learning_rate": 4.614426018227348e-05, "loss": 0.651, "step": 7870 }, { "epoch": 0.22982635342185903, "grad_norm": 1.0663020340156029, "learning_rate": 4.613817996500036e-05, "loss": 0.6443, "step": 7875 }, { "epoch": 0.22997227491609515, "grad_norm": 0.7975763582854271, "learning_rate": 4.613209540747891e-05, "loss": 0.6341, "step": 7880 }, { "epoch": 0.23011819641033124, "grad_norm": 1.0200147171388074, "learning_rate": 4.612600651112607e-05, "loss": 0.6805, "step": 7885 }, { "epoch": 0.23026411790456736, "grad_norm": 0.9997976272014104, "learning_rate": 4.611991327735973e-05, "loss": 0.6419, "step": 7890 }, { "epoch": 0.23041003939880345, "grad_norm": 1.0124966414078505, "learning_rate": 4.611381570759883e-05, "loss": 0.6435, "step": 7895 }, { "epoch": 0.23055596089303954, "grad_norm": 1.0148534133246132, "learning_rate": 4.61077138032633e-05, "loss": 0.6597, "step": 7900 }, { "epoch": 0.23070188238727565, "grad_norm": 1.0487671208762825, "learning_rate": 4.610160756577406e-05, "loss": 0.6318, "step": 7905 }, { "epoch": 0.23084780388151174, "grad_norm": 0.9922302979776743, "learning_rate": 4.6095496996553095e-05, "loss": 0.6464, "step": 7910 }, { "epoch": 0.23099372537574786, "grad_norm": 1.0756828244911314, "learning_rate": 4.6089382097023345e-05, "loss": 0.6208, "step": 7915 }, { "epoch": 0.23113964686998395, "grad_norm": 0.9858037716854757, "learning_rate": 4.608326286860878e-05, "loss": 0.6216, "step": 7920 }, { "epoch": 0.23128556836422004, "grad_norm": 1.0829510209891522, "learning_rate": 4.607713931273439e-05, "loss": 0.6361, "step": 7925 }, { "epoch": 0.23143148985845616, "grad_norm": 0.9828157474884484, "learning_rate": 4.607101143082614e-05, "loss": 0.6119, "step": 7930 }, { "epoch": 0.23157741135269225, "grad_norm": 0.9218204062158182, "learning_rate": 4.606487922431104e-05, "loss": 0.6352, "step": 7935 }, { "epoch": 0.23172333284692836, "grad_norm": 1.2627713029350476, "learning_rate": 4.6058742694617075e-05, "loss": 0.6829, "step": 7940 }, { "epoch": 0.23186925434116445, "grad_norm": 1.0605732958080172, "learning_rate": 4.605260184317325e-05, "loss": 0.6669, "step": 7945 }, { "epoch": 0.23201517583540054, "grad_norm": 1.2039780403227063, "learning_rate": 4.604645667140959e-05, "loss": 0.6668, "step": 7950 }, { "epoch": 0.23216109732963666, "grad_norm": 1.3794917993873899, "learning_rate": 4.6040307180757095e-05, "loss": 0.6919, "step": 7955 }, { "epoch": 0.23230701882387275, "grad_norm": 1.1482924914290773, "learning_rate": 4.603415337264781e-05, "loss": 0.6708, "step": 7960 }, { "epoch": 0.23245294031810887, "grad_norm": 0.9893061667380306, "learning_rate": 4.602799524851476e-05, "loss": 0.664, "step": 7965 }, { "epoch": 0.23259886181234496, "grad_norm": 1.285670934417748, "learning_rate": 4.602183280979195e-05, "loss": 0.6462, "step": 7970 }, { "epoch": 0.23274478330658105, "grad_norm": 1.1094735932361646, "learning_rate": 4.6015666057914456e-05, "loss": 0.6972, "step": 7975 }, { "epoch": 0.23289070480081717, "grad_norm": 1.03649916776004, "learning_rate": 4.60094949943183e-05, "loss": 0.6695, "step": 7980 }, { "epoch": 0.23303662629505326, "grad_norm": 0.9357764020518337, "learning_rate": 4.600331962044055e-05, "loss": 0.5931, "step": 7985 }, { "epoch": 0.23318254778928937, "grad_norm": 1.0140902828062521, "learning_rate": 4.599713993771923e-05, "loss": 0.5776, "step": 7990 }, { "epoch": 0.23332846928352546, "grad_norm": 1.0311708165876745, "learning_rate": 4.5990955947593415e-05, "loss": 0.6237, "step": 7995 }, { "epoch": 0.23347439077776155, "grad_norm": 0.8908995237987619, "learning_rate": 4.598476765150315e-05, "loss": 0.6277, "step": 8000 }, { "epoch": 0.23362031227199767, "grad_norm": 1.2632321959770048, "learning_rate": 4.59785750508895e-05, "loss": 0.6476, "step": 8005 }, { "epoch": 0.23376623376623376, "grad_norm": 1.0704734377102212, "learning_rate": 4.5972378147194526e-05, "loss": 0.679, "step": 8010 }, { "epoch": 0.23391215526046988, "grad_norm": 1.0413044935624627, "learning_rate": 4.59661769418613e-05, "loss": 0.6957, "step": 8015 }, { "epoch": 0.23405807675470597, "grad_norm": 1.001972236043797, "learning_rate": 4.595997143633389e-05, "loss": 0.6207, "step": 8020 }, { "epoch": 0.23420399824894206, "grad_norm": 1.0369600631181088, "learning_rate": 4.5953761632057354e-05, "loss": 0.6807, "step": 8025 }, { "epoch": 0.23434991974317818, "grad_norm": 0.9484125250865714, "learning_rate": 4.594754753047776e-05, "loss": 0.6598, "step": 8030 }, { "epoch": 0.23449584123741427, "grad_norm": 0.9575622002482312, "learning_rate": 4.594132913304219e-05, "loss": 0.6517, "step": 8035 }, { "epoch": 0.23464176273165038, "grad_norm": 1.1696590967036824, "learning_rate": 4.5935106441198696e-05, "loss": 0.6378, "step": 8040 }, { "epoch": 0.23478768422588647, "grad_norm": 1.0122955899702997, "learning_rate": 4.5928879456396364e-05, "loss": 0.6531, "step": 8045 }, { "epoch": 0.23493360572012256, "grad_norm": 1.0435555441240427, "learning_rate": 4.592264818008526e-05, "loss": 0.6529, "step": 8050 }, { "epoch": 0.23507952721435868, "grad_norm": 0.8714261880951645, "learning_rate": 4.591641261371644e-05, "loss": 0.6241, "step": 8055 }, { "epoch": 0.23522544870859477, "grad_norm": 0.9694657008764006, "learning_rate": 4.5910172758742e-05, "loss": 0.6399, "step": 8060 }, { "epoch": 0.2353713702028309, "grad_norm": 1.0901896256951364, "learning_rate": 4.590392861661497e-05, "loss": 0.5976, "step": 8065 }, { "epoch": 0.23551729169706698, "grad_norm": 1.1780420339147422, "learning_rate": 4.589768018878945e-05, "loss": 0.6908, "step": 8070 }, { "epoch": 0.23566321319130307, "grad_norm": 1.1293836913029043, "learning_rate": 4.589142747672048e-05, "loss": 0.6715, "step": 8075 }, { "epoch": 0.23580913468553918, "grad_norm": 0.9637251883730631, "learning_rate": 4.588517048186414e-05, "loss": 0.6194, "step": 8080 }, { "epoch": 0.23595505617977527, "grad_norm": 1.055958609880301, "learning_rate": 4.587890920567746e-05, "loss": 0.6513, "step": 8085 }, { "epoch": 0.2361009776740114, "grad_norm": 1.1356198807463125, "learning_rate": 4.5872643649618525e-05, "loss": 0.6765, "step": 8090 }, { "epoch": 0.23624689916824748, "grad_norm": 1.1309373299141614, "learning_rate": 4.5866373815146376e-05, "loss": 0.6277, "step": 8095 }, { "epoch": 0.23639282066248357, "grad_norm": 1.1037383743425087, "learning_rate": 4.5860099703721046e-05, "loss": 0.6472, "step": 8100 }, { "epoch": 0.2365387421567197, "grad_norm": 1.0501082677608518, "learning_rate": 4.58538213168036e-05, "loss": 0.6222, "step": 8105 }, { "epoch": 0.23668466365095578, "grad_norm": 1.0830137055276021, "learning_rate": 4.584753865585607e-05, "loss": 0.6814, "step": 8110 }, { "epoch": 0.2368305851451919, "grad_norm": 0.9762513270228858, "learning_rate": 4.584125172234148e-05, "loss": 0.5637, "step": 8115 }, { "epoch": 0.236976506639428, "grad_norm": 0.9170030123885389, "learning_rate": 4.5834960517723886e-05, "loss": 0.6327, "step": 8120 }, { "epoch": 0.23712242813366408, "grad_norm": 1.346858358083094, "learning_rate": 4.582866504346829e-05, "loss": 0.6949, "step": 8125 }, { "epoch": 0.2372683496279002, "grad_norm": 1.0356137902428821, "learning_rate": 4.5822365301040715e-05, "loss": 0.6582, "step": 8130 }, { "epoch": 0.23741427112213628, "grad_norm": 0.9622530458907691, "learning_rate": 4.581606129190818e-05, "loss": 0.6768, "step": 8135 }, { "epoch": 0.2375601926163724, "grad_norm": 1.0684809591415723, "learning_rate": 4.5809753017538686e-05, "loss": 0.6712, "step": 8140 }, { "epoch": 0.2377061141106085, "grad_norm": 1.0635318856228781, "learning_rate": 4.580344047940123e-05, "loss": 0.5895, "step": 8145 }, { "epoch": 0.23785203560484458, "grad_norm": 1.0955118622977071, "learning_rate": 4.5797123678965814e-05, "loss": 0.6034, "step": 8150 }, { "epoch": 0.2379979570990807, "grad_norm": 1.2686453968599383, "learning_rate": 4.579080261770342e-05, "loss": 0.6942, "step": 8155 }, { "epoch": 0.2381438785933168, "grad_norm": 1.2471436362989532, "learning_rate": 4.578447729708602e-05, "loss": 0.6804, "step": 8160 }, { "epoch": 0.2382898000875529, "grad_norm": 1.045783213201761, "learning_rate": 4.577814771858658e-05, "loss": 0.6716, "step": 8165 }, { "epoch": 0.238435721581789, "grad_norm": 1.1585868358342493, "learning_rate": 4.5771813883679065e-05, "loss": 0.7187, "step": 8170 }, { "epoch": 0.23858164307602509, "grad_norm": 1.130478461335575, "learning_rate": 4.576547579383844e-05, "loss": 0.619, "step": 8175 }, { "epoch": 0.2387275645702612, "grad_norm": 1.0630748840472055, "learning_rate": 4.575913345054062e-05, "loss": 0.6305, "step": 8180 }, { "epoch": 0.2388734860644973, "grad_norm": 1.0176127054434814, "learning_rate": 4.575278685526257e-05, "loss": 0.6032, "step": 8185 }, { "epoch": 0.2390194075587334, "grad_norm": 1.1287092283985667, "learning_rate": 4.574643600948219e-05, "loss": 0.6703, "step": 8190 }, { "epoch": 0.2391653290529695, "grad_norm": 0.9424134334247461, "learning_rate": 4.57400809146784e-05, "loss": 0.6561, "step": 8195 }, { "epoch": 0.2393112505472056, "grad_norm": 1.200322925664898, "learning_rate": 4.573372157233109e-05, "loss": 0.7545, "step": 8200 }, { "epoch": 0.2394571720414417, "grad_norm": 1.228633216218335, "learning_rate": 4.5727357983921184e-05, "loss": 0.6702, "step": 8205 }, { "epoch": 0.2396030935356778, "grad_norm": 0.9490043695159041, "learning_rate": 4.572099015093053e-05, "loss": 0.6463, "step": 8210 }, { "epoch": 0.23974901502991391, "grad_norm": 1.1769671690484456, "learning_rate": 4.571461807484202e-05, "loss": 0.6321, "step": 8215 }, { "epoch": 0.23989493652415, "grad_norm": 1.0427798594595943, "learning_rate": 4.570824175713949e-05, "loss": 0.6399, "step": 8220 }, { "epoch": 0.24004085801838612, "grad_norm": 1.0942267393901677, "learning_rate": 4.57018611993078e-05, "loss": 0.606, "step": 8225 }, { "epoch": 0.2401867795126222, "grad_norm": 1.1724021908143896, "learning_rate": 4.5695476402832774e-05, "loss": 0.6503, "step": 8230 }, { "epoch": 0.2403327010068583, "grad_norm": 1.0117924318821647, "learning_rate": 4.568908736920123e-05, "loss": 0.5978, "step": 8235 }, { "epoch": 0.24047862250109442, "grad_norm": 0.9965386070949268, "learning_rate": 4.568269409990098e-05, "loss": 0.6359, "step": 8240 }, { "epoch": 0.2406245439953305, "grad_norm": 0.9885244588560603, "learning_rate": 4.5676296596420825e-05, "loss": 0.6078, "step": 8245 }, { "epoch": 0.24077046548956663, "grad_norm": 1.2099127854577707, "learning_rate": 4.566989486025053e-05, "loss": 0.6809, "step": 8250 }, { "epoch": 0.24091638698380272, "grad_norm": 0.9826882431929175, "learning_rate": 4.566348889288085e-05, "loss": 0.588, "step": 8255 }, { "epoch": 0.2410623084780388, "grad_norm": 0.9460549204407736, "learning_rate": 4.565707869580355e-05, "loss": 0.6337, "step": 8260 }, { "epoch": 0.24120822997227492, "grad_norm": 1.0415505101044353, "learning_rate": 4.565066427051136e-05, "loss": 0.6895, "step": 8265 }, { "epoch": 0.241354151466511, "grad_norm": 1.0425238728016206, "learning_rate": 4.5644245618498e-05, "loss": 0.6206, "step": 8270 }, { "epoch": 0.24150007296074713, "grad_norm": 1.0464667302913129, "learning_rate": 4.563782274125817e-05, "loss": 0.7063, "step": 8275 }, { "epoch": 0.24164599445498322, "grad_norm": 1.1866509120432345, "learning_rate": 4.563139564028756e-05, "loss": 0.696, "step": 8280 }, { "epoch": 0.2417919159492193, "grad_norm": 1.0836941728350327, "learning_rate": 4.562496431708283e-05, "loss": 0.6541, "step": 8285 }, { "epoch": 0.24193783744345543, "grad_norm": 1.1270774332039217, "learning_rate": 4.561852877314165e-05, "loss": 0.6531, "step": 8290 }, { "epoch": 0.24208375893769152, "grad_norm": 1.012840678344838, "learning_rate": 4.5612089009962645e-05, "loss": 0.6219, "step": 8295 }, { "epoch": 0.24222968043192764, "grad_norm": 1.2387280499643039, "learning_rate": 4.560564502904544e-05, "loss": 0.6194, "step": 8300 }, { "epoch": 0.24237560192616373, "grad_norm": 1.1672074661401954, "learning_rate": 4.559919683189063e-05, "loss": 0.6951, "step": 8305 }, { "epoch": 0.24252152342039981, "grad_norm": 1.0688373696208793, "learning_rate": 4.559274441999981e-05, "loss": 0.6083, "step": 8310 }, { "epoch": 0.24266744491463593, "grad_norm": 1.0240525212158664, "learning_rate": 4.558628779487553e-05, "loss": 0.6694, "step": 8315 }, { "epoch": 0.24281336640887202, "grad_norm": 0.885445710509539, "learning_rate": 4.5579826958021346e-05, "loss": 0.583, "step": 8320 }, { "epoch": 0.24295928790310814, "grad_norm": 0.9226945774382187, "learning_rate": 4.557336191094178e-05, "loss": 0.658, "step": 8325 }, { "epoch": 0.24310520939734423, "grad_norm": 1.2038544331400904, "learning_rate": 4.5566892655142346e-05, "loss": 0.597, "step": 8330 }, { "epoch": 0.24325113089158032, "grad_norm": 1.0821248713422733, "learning_rate": 4.556041919212952e-05, "loss": 0.666, "step": 8335 }, { "epoch": 0.24339705238581644, "grad_norm": 1.056849069876137, "learning_rate": 4.555394152341078e-05, "loss": 0.6495, "step": 8340 }, { "epoch": 0.24354297388005253, "grad_norm": 0.8936922841811944, "learning_rate": 4.554745965049455e-05, "loss": 0.6318, "step": 8345 }, { "epoch": 0.24368889537428864, "grad_norm": 0.8634715950232043, "learning_rate": 4.554097357489029e-05, "loss": 0.657, "step": 8350 }, { "epoch": 0.24383481686852473, "grad_norm": 1.2203497179324476, "learning_rate": 4.5534483298108376e-05, "loss": 0.6704, "step": 8355 }, { "epoch": 0.24398073836276082, "grad_norm": 0.8483026686485925, "learning_rate": 4.55279888216602e-05, "loss": 0.5976, "step": 8360 }, { "epoch": 0.24412665985699694, "grad_norm": 1.1346864152443763, "learning_rate": 4.552149014705813e-05, "loss": 0.704, "step": 8365 }, { "epoch": 0.24427258135123303, "grad_norm": 1.0198316606837534, "learning_rate": 4.551498727581548e-05, "loss": 0.6574, "step": 8370 }, { "epoch": 0.24441850284546915, "grad_norm": 1.0515082285160497, "learning_rate": 4.550848020944659e-05, "loss": 0.6658, "step": 8375 }, { "epoch": 0.24456442433970524, "grad_norm": 1.133774436600005, "learning_rate": 4.550196894946674e-05, "loss": 0.653, "step": 8380 }, { "epoch": 0.24471034583394133, "grad_norm": 0.9605194852435424, "learning_rate": 4.5495453497392204e-05, "loss": 0.5541, "step": 8385 }, { "epoch": 0.24485626732817745, "grad_norm": 1.1635317660708095, "learning_rate": 4.5488933854740216e-05, "loss": 0.6443, "step": 8390 }, { "epoch": 0.24500218882241354, "grad_norm": 0.989708829256966, "learning_rate": 4.548241002302901e-05, "loss": 0.6283, "step": 8395 }, { "epoch": 0.24514811031664965, "grad_norm": 0.8773266072878004, "learning_rate": 4.547588200377777e-05, "loss": 0.6002, "step": 8400 }, { "epoch": 0.24529403181088574, "grad_norm": 0.8716366177549859, "learning_rate": 4.5469349798506676e-05, "loss": 0.5974, "step": 8405 }, { "epoch": 0.24543995330512183, "grad_norm": 1.152374746893429, "learning_rate": 4.546281340873688e-05, "loss": 0.6654, "step": 8410 }, { "epoch": 0.24558587479935795, "grad_norm": 1.1378550464077823, "learning_rate": 4.545627283599048e-05, "loss": 0.6309, "step": 8415 }, { "epoch": 0.24573179629359404, "grad_norm": 1.0919652209746074, "learning_rate": 4.5449728081790595e-05, "loss": 0.6122, "step": 8420 }, { "epoch": 0.24587771778783016, "grad_norm": 0.972255598277924, "learning_rate": 4.544317914766128e-05, "loss": 0.6211, "step": 8425 }, { "epoch": 0.24602363928206625, "grad_norm": 0.9853205829633739, "learning_rate": 4.543662603512758e-05, "loss": 0.6416, "step": 8430 }, { "epoch": 0.24616956077630234, "grad_norm": 1.056632555575449, "learning_rate": 4.54300687457155e-05, "loss": 0.6182, "step": 8435 }, { "epoch": 0.24631548227053846, "grad_norm": 0.904331251783767, "learning_rate": 4.5423507280952035e-05, "loss": 0.6268, "step": 8440 }, { "epoch": 0.24646140376477454, "grad_norm": 1.0785131583947387, "learning_rate": 4.5416941642365156e-05, "loss": 0.7279, "step": 8445 }, { "epoch": 0.24660732525901066, "grad_norm": 0.9408671732821459, "learning_rate": 4.541037183148378e-05, "loss": 0.5732, "step": 8450 }, { "epoch": 0.24675324675324675, "grad_norm": 0.9558067477128036, "learning_rate": 4.540379784983782e-05, "loss": 0.6305, "step": 8455 }, { "epoch": 0.24689916824748284, "grad_norm": 1.365267892482908, "learning_rate": 4.5397219698958135e-05, "loss": 0.634, "step": 8460 }, { "epoch": 0.24704508974171896, "grad_norm": 1.078231203840172, "learning_rate": 4.5390637380376584e-05, "loss": 0.6261, "step": 8465 }, { "epoch": 0.24719101123595505, "grad_norm": 0.9099464200016814, "learning_rate": 4.538405089562598e-05, "loss": 0.6064, "step": 8470 }, { "epoch": 0.24733693273019117, "grad_norm": 1.0196934506706474, "learning_rate": 4.537746024624011e-05, "loss": 0.6695, "step": 8475 }, { "epoch": 0.24748285422442726, "grad_norm": 1.0435196695261786, "learning_rate": 4.537086543375373e-05, "loss": 0.5878, "step": 8480 }, { "epoch": 0.24762877571866335, "grad_norm": 0.9955620064745526, "learning_rate": 4.536426645970255e-05, "loss": 0.5973, "step": 8485 }, { "epoch": 0.24777469721289946, "grad_norm": 1.017454961241555, "learning_rate": 4.535766332562329e-05, "loss": 0.5482, "step": 8490 }, { "epoch": 0.24792061870713555, "grad_norm": 0.9880543818092671, "learning_rate": 4.535105603305359e-05, "loss": 0.6253, "step": 8495 }, { "epoch": 0.24806654020137167, "grad_norm": 1.090615192104329, "learning_rate": 4.53444445835321e-05, "loss": 0.6516, "step": 8500 }, { "epoch": 0.24821246169560776, "grad_norm": 0.9290383012488898, "learning_rate": 4.533782897859841e-05, "loss": 0.6623, "step": 8505 }, { "epoch": 0.24835838318984385, "grad_norm": 0.9009167803126679, "learning_rate": 4.533120921979309e-05, "loss": 0.618, "step": 8510 }, { "epoch": 0.24850430468407997, "grad_norm": 0.9315590673598744, "learning_rate": 4.532458530865767e-05, "loss": 0.6065, "step": 8515 }, { "epoch": 0.24865022617831606, "grad_norm": 1.1045170609903816, "learning_rate": 4.531795724673466e-05, "loss": 0.6344, "step": 8520 }, { "epoch": 0.24879614767255218, "grad_norm": 1.1037701955497576, "learning_rate": 4.5311325035567506e-05, "loss": 0.6532, "step": 8525 }, { "epoch": 0.24894206916678827, "grad_norm": 1.3239404377087938, "learning_rate": 4.530468867670068e-05, "loss": 0.6647, "step": 8530 }, { "epoch": 0.24908799066102436, "grad_norm": 0.9195634618115939, "learning_rate": 4.529804817167955e-05, "loss": 0.6354, "step": 8535 }, { "epoch": 0.24923391215526047, "grad_norm": 1.0231926947010603, "learning_rate": 4.529140352205049e-05, "loss": 0.6499, "step": 8540 }, { "epoch": 0.24937983364949656, "grad_norm": 1.1107839505169632, "learning_rate": 4.528475472936083e-05, "loss": 0.662, "step": 8545 }, { "epoch": 0.24952575514373268, "grad_norm": 1.1748706160940787, "learning_rate": 4.5278101795158887e-05, "loss": 0.6913, "step": 8550 }, { "epoch": 0.24967167663796877, "grad_norm": 1.0095853667048336, "learning_rate": 4.527144472099389e-05, "loss": 0.6568, "step": 8555 }, { "epoch": 0.24981759813220486, "grad_norm": 0.9869538252845689, "learning_rate": 4.526478350841607e-05, "loss": 0.6437, "step": 8560 }, { "epoch": 0.24996351962644098, "grad_norm": 1.0706587362468276, "learning_rate": 4.5258118158976634e-05, "loss": 0.6694, "step": 8565 }, { "epoch": 0.25010944112067707, "grad_norm": 1.0556725259557305, "learning_rate": 4.5251448674227706e-05, "loss": 0.7093, "step": 8570 }, { "epoch": 0.25025536261491316, "grad_norm": 1.0651615949570774, "learning_rate": 4.5244775055722424e-05, "loss": 0.6834, "step": 8575 }, { "epoch": 0.2504012841091493, "grad_norm": 1.016408579623856, "learning_rate": 4.523809730501485e-05, "loss": 0.624, "step": 8580 }, { "epoch": 0.2505472056033854, "grad_norm": 1.108958059462406, "learning_rate": 4.523141542366003e-05, "loss": 0.666, "step": 8585 }, { "epoch": 0.2506931270976215, "grad_norm": 0.9893682032067783, "learning_rate": 4.522472941321396e-05, "loss": 0.65, "step": 8590 }, { "epoch": 0.25083904859185757, "grad_norm": 1.0517656203180392, "learning_rate": 4.5218039275233605e-05, "loss": 0.6924, "step": 8595 }, { "epoch": 0.25098497008609366, "grad_norm": 1.106810116163582, "learning_rate": 4.521134501127689e-05, "loss": 0.6378, "step": 8600 }, { "epoch": 0.2511308915803298, "grad_norm": 1.0653247405328319, "learning_rate": 4.52046466229027e-05, "loss": 0.6794, "step": 8605 }, { "epoch": 0.2512768130745659, "grad_norm": 0.935325001010799, "learning_rate": 4.519794411167087e-05, "loss": 0.602, "step": 8610 }, { "epoch": 0.251422734568802, "grad_norm": 0.948232298837928, "learning_rate": 4.5191237479142215e-05, "loss": 0.6187, "step": 8615 }, { "epoch": 0.2515686560630381, "grad_norm": 1.1143308928301126, "learning_rate": 4.51845267268785e-05, "loss": 0.6335, "step": 8620 }, { "epoch": 0.25171457755727417, "grad_norm": 1.1211944331520276, "learning_rate": 4.517781185644244e-05, "loss": 0.6318, "step": 8625 }, { "epoch": 0.2518604990515103, "grad_norm": 1.0660203848166905, "learning_rate": 4.517109286939773e-05, "loss": 0.607, "step": 8630 }, { "epoch": 0.2520064205457464, "grad_norm": 1.044750099957479, "learning_rate": 4.5164369767309e-05, "loss": 0.6132, "step": 8635 }, { "epoch": 0.2521523420399825, "grad_norm": 0.8598749404929849, "learning_rate": 4.5157642551741864e-05, "loss": 0.5784, "step": 8640 }, { "epoch": 0.2522982635342186, "grad_norm": 0.9280651180986257, "learning_rate": 4.515091122426287e-05, "loss": 0.633, "step": 8645 }, { "epoch": 0.25244418502845467, "grad_norm": 1.0437175231155282, "learning_rate": 4.514417578643953e-05, "loss": 0.6762, "step": 8650 }, { "epoch": 0.2525901065226908, "grad_norm": 1.0976874522613829, "learning_rate": 4.513743623984032e-05, "loss": 0.622, "step": 8655 }, { "epoch": 0.2527360280169269, "grad_norm": 1.1713320919190777, "learning_rate": 4.513069258603467e-05, "loss": 0.6391, "step": 8660 }, { "epoch": 0.252881949511163, "grad_norm": 1.1693868341335154, "learning_rate": 4.512394482659297e-05, "loss": 0.6066, "step": 8665 }, { "epoch": 0.2530278710053991, "grad_norm": 0.8934313973242964, "learning_rate": 4.511719296308656e-05, "loss": 0.5705, "step": 8670 }, { "epoch": 0.2531737924996352, "grad_norm": 1.005388539933998, "learning_rate": 4.511043699708773e-05, "loss": 0.6552, "step": 8675 }, { "epoch": 0.2533197139938713, "grad_norm": 1.0494716156555224, "learning_rate": 4.510367693016974e-05, "loss": 0.6032, "step": 8680 }, { "epoch": 0.2534656354881074, "grad_norm": 1.2078200903618985, "learning_rate": 4.50969127639068e-05, "loss": 0.6438, "step": 8685 }, { "epoch": 0.2536115569823435, "grad_norm": 0.9663618367115041, "learning_rate": 4.5090144499874065e-05, "loss": 0.6985, "step": 8690 }, { "epoch": 0.2537574784765796, "grad_norm": 1.0777409111031242, "learning_rate": 4.508337213964766e-05, "loss": 0.6609, "step": 8695 }, { "epoch": 0.2539033999708157, "grad_norm": 1.0048933064083798, "learning_rate": 4.507659568480464e-05, "loss": 0.6396, "step": 8700 }, { "epoch": 0.2540493214650518, "grad_norm": 0.9707644540521843, "learning_rate": 4.506981513692305e-05, "loss": 0.6317, "step": 8705 }, { "epoch": 0.2541952429592879, "grad_norm": 1.1263204027773712, "learning_rate": 4.5063030497581855e-05, "loss": 0.7023, "step": 8710 }, { "epoch": 0.254341164453524, "grad_norm": 1.2280235298093964, "learning_rate": 4.505624176836099e-05, "loss": 0.6149, "step": 8715 }, { "epoch": 0.2544870859477601, "grad_norm": 0.9889017375908826, "learning_rate": 4.504944895084132e-05, "loss": 0.6286, "step": 8720 }, { "epoch": 0.2546330074419962, "grad_norm": 1.1044853248915083, "learning_rate": 4.504265204660472e-05, "loss": 0.6741, "step": 8725 }, { "epoch": 0.25477892893623233, "grad_norm": 1.167529989993183, "learning_rate": 4.503585105723392e-05, "loss": 0.6503, "step": 8730 }, { "epoch": 0.2549248504304684, "grad_norm": 0.8834601678822187, "learning_rate": 4.502904598431271e-05, "loss": 0.5466, "step": 8735 }, { "epoch": 0.2550707719247045, "grad_norm": 1.1465013234736792, "learning_rate": 4.502223682942575e-05, "loss": 0.6927, "step": 8740 }, { "epoch": 0.2552166934189406, "grad_norm": 0.9242748984545789, "learning_rate": 4.5015423594158687e-05, "loss": 0.6378, "step": 8745 }, { "epoch": 0.2553626149131767, "grad_norm": 0.9314522882806904, "learning_rate": 4.50086062800981e-05, "loss": 0.6409, "step": 8750 }, { "epoch": 0.25550853640741283, "grad_norm": 1.1333642925365388, "learning_rate": 4.500178488883154e-05, "loss": 0.5847, "step": 8755 }, { "epoch": 0.2556544579016489, "grad_norm": 0.8985439645057681, "learning_rate": 4.499495942194749e-05, "loss": 0.6874, "step": 8760 }, { "epoch": 0.255800379395885, "grad_norm": 1.1175351223172236, "learning_rate": 4.498812988103539e-05, "loss": 0.701, "step": 8765 }, { "epoch": 0.2559463008901211, "grad_norm": 0.9886598131585127, "learning_rate": 4.4981296267685625e-05, "loss": 0.6798, "step": 8770 }, { "epoch": 0.2560922223843572, "grad_norm": 1.0816684480585688, "learning_rate": 4.4974458583489535e-05, "loss": 0.7554, "step": 8775 }, { "epoch": 0.25623814387859334, "grad_norm": 1.1262262489071266, "learning_rate": 4.496761683003938e-05, "loss": 0.6731, "step": 8780 }, { "epoch": 0.25638406537282943, "grad_norm": 1.0711936800059774, "learning_rate": 4.496077100892842e-05, "loss": 0.6485, "step": 8785 }, { "epoch": 0.2565299868670655, "grad_norm": 1.0638662267386898, "learning_rate": 4.4953921121750816e-05, "loss": 0.6413, "step": 8790 }, { "epoch": 0.2566759083613016, "grad_norm": 1.027071226153336, "learning_rate": 4.494706717010169e-05, "loss": 0.6422, "step": 8795 }, { "epoch": 0.2568218298555377, "grad_norm": 1.1281795437000612, "learning_rate": 4.494020915557713e-05, "loss": 0.7148, "step": 8800 }, { "epoch": 0.25696775134977384, "grad_norm": 1.091748509726268, "learning_rate": 4.493334707977413e-05, "loss": 0.6536, "step": 8805 }, { "epoch": 0.25711367284400993, "grad_norm": 1.0596611242723968, "learning_rate": 4.492648094429067e-05, "loss": 0.6559, "step": 8810 }, { "epoch": 0.257259594338246, "grad_norm": 1.0317673128159683, "learning_rate": 4.491961075072564e-05, "loss": 0.7173, "step": 8815 }, { "epoch": 0.2574055158324821, "grad_norm": 1.196954737433136, "learning_rate": 4.491273650067891e-05, "loss": 0.6859, "step": 8820 }, { "epoch": 0.2575514373267182, "grad_norm": 1.045284749576078, "learning_rate": 4.4905858195751284e-05, "loss": 0.6137, "step": 8825 }, { "epoch": 0.25769735882095435, "grad_norm": 0.9940565864241572, "learning_rate": 4.4898975837544474e-05, "loss": 0.6506, "step": 8830 }, { "epoch": 0.25784328031519044, "grad_norm": 1.0359292098971435, "learning_rate": 4.489208942766119e-05, "loss": 0.6134, "step": 8835 }, { "epoch": 0.2579892018094265, "grad_norm": 1.2482575873253083, "learning_rate": 4.488519896770505e-05, "loss": 0.6277, "step": 8840 }, { "epoch": 0.2581351233036626, "grad_norm": 0.9570472557612177, "learning_rate": 4.4878304459280635e-05, "loss": 0.5576, "step": 8845 }, { "epoch": 0.2582810447978987, "grad_norm": 0.9820151190363825, "learning_rate": 4.487140590399345e-05, "loss": 0.6412, "step": 8850 }, { "epoch": 0.25842696629213485, "grad_norm": 1.5260257101818073, "learning_rate": 4.4864503303449965e-05, "loss": 0.7492, "step": 8855 }, { "epoch": 0.25857288778637094, "grad_norm": 0.9709510842242115, "learning_rate": 4.485759665925756e-05, "loss": 0.5788, "step": 8860 }, { "epoch": 0.25871880928060703, "grad_norm": 1.030335770730899, "learning_rate": 4.48506859730246e-05, "loss": 0.6275, "step": 8865 }, { "epoch": 0.2588647307748431, "grad_norm": 1.148293871146626, "learning_rate": 4.484377124636035e-05, "loss": 0.6522, "step": 8870 }, { "epoch": 0.2590106522690792, "grad_norm": 1.0315312134836274, "learning_rate": 4.4836852480875044e-05, "loss": 0.665, "step": 8875 }, { "epoch": 0.25915657376331536, "grad_norm": 0.8996174653397238, "learning_rate": 4.482992967817984e-05, "loss": 0.5888, "step": 8880 }, { "epoch": 0.25930249525755145, "grad_norm": 1.0282357250944139, "learning_rate": 4.482300283988684e-05, "loss": 0.6569, "step": 8885 }, { "epoch": 0.25944841675178754, "grad_norm": 0.9655238959303517, "learning_rate": 4.481607196760909e-05, "loss": 0.6178, "step": 8890 }, { "epoch": 0.2595943382460236, "grad_norm": 0.863262081048551, "learning_rate": 4.4809137062960574e-05, "loss": 0.6088, "step": 8895 }, { "epoch": 0.2597402597402597, "grad_norm": 0.8698525165931709, "learning_rate": 4.4802198127556213e-05, "loss": 0.6184, "step": 8900 }, { "epoch": 0.25988618123449586, "grad_norm": 1.0250804577905368, "learning_rate": 4.479525516301187e-05, "loss": 0.6335, "step": 8905 }, { "epoch": 0.26003210272873195, "grad_norm": 1.3822614195453675, "learning_rate": 4.4788308170944335e-05, "loss": 0.6248, "step": 8910 }, { "epoch": 0.26017802422296804, "grad_norm": 0.9860823565374516, "learning_rate": 4.478135715297136e-05, "loss": 0.6489, "step": 8915 }, { "epoch": 0.26032394571720413, "grad_norm": 1.058019067413886, "learning_rate": 4.4774402110711606e-05, "loss": 0.6251, "step": 8920 }, { "epoch": 0.2604698672114402, "grad_norm": 1.00906502323677, "learning_rate": 4.47674430457847e-05, "loss": 0.6301, "step": 8925 }, { "epoch": 0.26061578870567637, "grad_norm": 1.2686657373904193, "learning_rate": 4.476047995981117e-05, "loss": 0.7124, "step": 8930 }, { "epoch": 0.26076171019991246, "grad_norm": 0.9058661015445746, "learning_rate": 4.475351285441251e-05, "loss": 0.6162, "step": 8935 }, { "epoch": 0.26090763169414855, "grad_norm": 1.0833744894301809, "learning_rate": 4.474654173121115e-05, "loss": 0.622, "step": 8940 }, { "epoch": 0.26105355318838463, "grad_norm": 0.9971828059390756, "learning_rate": 4.4739566591830426e-05, "loss": 0.6169, "step": 8945 }, { "epoch": 0.2611994746826207, "grad_norm": 1.258582025535961, "learning_rate": 4.4732587437894645e-05, "loss": 0.644, "step": 8950 }, { "epoch": 0.26134539617685687, "grad_norm": 1.0642359177210923, "learning_rate": 4.472560427102903e-05, "loss": 0.6496, "step": 8955 }, { "epoch": 0.26149131767109296, "grad_norm": 1.0471021165508552, "learning_rate": 4.471861709285974e-05, "loss": 0.6554, "step": 8960 }, { "epoch": 0.26163723916532905, "grad_norm": 0.9981258819363995, "learning_rate": 4.471162590501388e-05, "loss": 0.6282, "step": 8965 }, { "epoch": 0.26178316065956514, "grad_norm": 1.3139455062385663, "learning_rate": 4.4704630709119464e-05, "loss": 0.6421, "step": 8970 }, { "epoch": 0.26192908215380123, "grad_norm": 1.2672334590660292, "learning_rate": 4.4697631506805456e-05, "loss": 0.6542, "step": 8975 }, { "epoch": 0.2620750036480374, "grad_norm": 0.8780000848733512, "learning_rate": 4.4690628299701756e-05, "loss": 0.5674, "step": 8980 }, { "epoch": 0.26222092514227346, "grad_norm": 1.106894253749715, "learning_rate": 4.4683621089439196e-05, "loss": 0.7116, "step": 8985 }, { "epoch": 0.26236684663650955, "grad_norm": 0.94579533978562, "learning_rate": 4.467660987764952e-05, "loss": 0.638, "step": 8990 }, { "epoch": 0.26251276813074564, "grad_norm": 0.9336607293029315, "learning_rate": 4.466959466596543e-05, "loss": 0.5988, "step": 8995 }, { "epoch": 0.26265868962498173, "grad_norm": 1.106859738574095, "learning_rate": 4.466257545602056e-05, "loss": 0.6259, "step": 9000 }, { "epoch": 0.2628046111192179, "grad_norm": 1.1534969708972123, "learning_rate": 4.465555224944944e-05, "loss": 0.5885, "step": 9005 }, { "epoch": 0.26295053261345397, "grad_norm": 1.1529115198321225, "learning_rate": 4.4648525047887576e-05, "loss": 0.6453, "step": 9010 }, { "epoch": 0.26309645410769006, "grad_norm": 1.0293377078051025, "learning_rate": 4.464149385297137e-05, "loss": 0.5986, "step": 9015 }, { "epoch": 0.26324237560192615, "grad_norm": 0.8846142765905193, "learning_rate": 4.463445866633818e-05, "loss": 0.6949, "step": 9020 }, { "epoch": 0.26338829709616224, "grad_norm": 1.049061138368147, "learning_rate": 4.462741948962627e-05, "loss": 0.6353, "step": 9025 }, { "epoch": 0.2635342185903984, "grad_norm": 0.9257236183525315, "learning_rate": 4.462037632447484e-05, "loss": 0.6453, "step": 9030 }, { "epoch": 0.2636801400846345, "grad_norm": 0.9340035789822353, "learning_rate": 4.461332917252403e-05, "loss": 0.6532, "step": 9035 }, { "epoch": 0.26382606157887056, "grad_norm": 0.9498286322382832, "learning_rate": 4.46062780354149e-05, "loss": 0.6407, "step": 9040 }, { "epoch": 0.26397198307310665, "grad_norm": 1.1612496284411775, "learning_rate": 4.459922291478944e-05, "loss": 0.707, "step": 9045 }, { "epoch": 0.26411790456734274, "grad_norm": 1.0934823183156754, "learning_rate": 4.4592163812290565e-05, "loss": 0.7081, "step": 9050 }, { "epoch": 0.2642638260615789, "grad_norm": 1.1152636045238529, "learning_rate": 4.4585100729562116e-05, "loss": 0.6228, "step": 9055 }, { "epoch": 0.264409747555815, "grad_norm": 0.9944470213777442, "learning_rate": 4.457803366824888e-05, "loss": 0.5851, "step": 9060 }, { "epoch": 0.26455566905005107, "grad_norm": 0.9611301361871022, "learning_rate": 4.457096262999653e-05, "loss": 0.6424, "step": 9065 }, { "epoch": 0.26470159054428716, "grad_norm": 0.9282054393101163, "learning_rate": 4.456388761645171e-05, "loss": 0.6507, "step": 9070 }, { "epoch": 0.26484751203852325, "grad_norm": 0.9407764753115578, "learning_rate": 4.4556808629261944e-05, "loss": 0.6002, "step": 9075 }, { "epoch": 0.2649934335327594, "grad_norm": 1.1649189255342702, "learning_rate": 4.454972567007573e-05, "loss": 0.6486, "step": 9080 }, { "epoch": 0.2651393550269955, "grad_norm": 1.1724980079511826, "learning_rate": 4.4542638740542454e-05, "loss": 0.6327, "step": 9085 }, { "epoch": 0.26528527652123157, "grad_norm": 1.0462629703387767, "learning_rate": 4.4535547842312456e-05, "loss": 0.6257, "step": 9090 }, { "epoch": 0.26543119801546766, "grad_norm": 1.0117152089921373, "learning_rate": 4.452845297703697e-05, "loss": 0.6589, "step": 9095 }, { "epoch": 0.26557711950970375, "grad_norm": 1.1398775730241093, "learning_rate": 4.4521354146368165e-05, "loss": 0.6717, "step": 9100 }, { "epoch": 0.2657230410039399, "grad_norm": 1.0412595582253366, "learning_rate": 4.4514251351959145e-05, "loss": 0.6614, "step": 9105 }, { "epoch": 0.265868962498176, "grad_norm": 1.0767540727890639, "learning_rate": 4.4507144595463925e-05, "loss": 0.6545, "step": 9110 }, { "epoch": 0.2660148839924121, "grad_norm": 1.0196518532788041, "learning_rate": 4.4500033878537444e-05, "loss": 0.6301, "step": 9115 }, { "epoch": 0.26616080548664817, "grad_norm": 1.0851434005566267, "learning_rate": 4.449291920283558e-05, "loss": 0.6374, "step": 9120 }, { "epoch": 0.2663067269808843, "grad_norm": 1.0650008141540834, "learning_rate": 4.44858005700151e-05, "loss": 0.7091, "step": 9125 }, { "epoch": 0.2664526484751204, "grad_norm": 0.9157099178884863, "learning_rate": 4.447867798173371e-05, "loss": 0.6179, "step": 9130 }, { "epoch": 0.2665985699693565, "grad_norm": 1.2341099504920638, "learning_rate": 4.447155143965005e-05, "loss": 0.7344, "step": 9135 }, { "epoch": 0.2667444914635926, "grad_norm": 1.0026383364226366, "learning_rate": 4.446442094542366e-05, "loss": 0.61, "step": 9140 }, { "epoch": 0.26689041295782867, "grad_norm": 1.0532006414265724, "learning_rate": 4.445728650071501e-05, "loss": 0.6424, "step": 9145 }, { "epoch": 0.2670363344520648, "grad_norm": 0.9517323938666045, "learning_rate": 4.44501481071855e-05, "loss": 0.5986, "step": 9150 }, { "epoch": 0.2671822559463009, "grad_norm": 0.9046201282895397, "learning_rate": 4.444300576649742e-05, "loss": 0.6449, "step": 9155 }, { "epoch": 0.267328177440537, "grad_norm": 0.8882552940513714, "learning_rate": 4.4435859480314004e-05, "loss": 0.6053, "step": 9160 }, { "epoch": 0.2674740989347731, "grad_norm": 1.0321453914897791, "learning_rate": 4.442870925029941e-05, "loss": 0.7211, "step": 9165 }, { "epoch": 0.2676200204290092, "grad_norm": 0.9371634087659004, "learning_rate": 4.442155507811868e-05, "loss": 0.5801, "step": 9170 }, { "epoch": 0.2677659419232453, "grad_norm": 0.9158064321777787, "learning_rate": 4.441439696543782e-05, "loss": 0.6015, "step": 9175 }, { "epoch": 0.2679118634174814, "grad_norm": 1.026516340182458, "learning_rate": 4.440723491392372e-05, "loss": 0.6103, "step": 9180 }, { "epoch": 0.2680577849117175, "grad_norm": 0.9583772134122046, "learning_rate": 4.440006892524419e-05, "loss": 0.7085, "step": 9185 }, { "epoch": 0.2682037064059536, "grad_norm": 0.9447783990267581, "learning_rate": 4.439289900106797e-05, "loss": 0.6681, "step": 9190 }, { "epoch": 0.2683496279001897, "grad_norm": 1.0381487807905978, "learning_rate": 4.4385725143064725e-05, "loss": 0.5999, "step": 9195 }, { "epoch": 0.2684955493944258, "grad_norm": 0.9480348043040826, "learning_rate": 4.437854735290499e-05, "loss": 0.6019, "step": 9200 }, { "epoch": 0.2686414708886619, "grad_norm": 1.140769265493787, "learning_rate": 4.437136563226028e-05, "loss": 0.6559, "step": 9205 }, { "epoch": 0.268787392382898, "grad_norm": 1.115671240264836, "learning_rate": 4.436417998280297e-05, "loss": 0.6619, "step": 9210 }, { "epoch": 0.2689333138771341, "grad_norm": 1.075428668338084, "learning_rate": 4.4356990406206395e-05, "loss": 0.6501, "step": 9215 }, { "epoch": 0.2690792353713702, "grad_norm": 1.3603889989071605, "learning_rate": 4.4349796904144756e-05, "loss": 0.681, "step": 9220 }, { "epoch": 0.26922515686560633, "grad_norm": 1.2073389673461323, "learning_rate": 4.434259947829321e-05, "loss": 0.6722, "step": 9225 }, { "epoch": 0.2693710783598424, "grad_norm": 1.1162252568589999, "learning_rate": 4.433539813032781e-05, "loss": 0.6038, "step": 9230 }, { "epoch": 0.2695169998540785, "grad_norm": 1.039021375198927, "learning_rate": 4.432819286192553e-05, "loss": 0.6192, "step": 9235 }, { "epoch": 0.2696629213483146, "grad_norm": 1.2151238941194762, "learning_rate": 4.4320983674764245e-05, "loss": 0.6778, "step": 9240 }, { "epoch": 0.2698088428425507, "grad_norm": 1.1930561614733945, "learning_rate": 4.431377057052274e-05, "loss": 0.5987, "step": 9245 }, { "epoch": 0.26995476433678683, "grad_norm": 0.9057296505516568, "learning_rate": 4.4306553550880744e-05, "loss": 0.596, "step": 9250 }, { "epoch": 0.2701006858310229, "grad_norm": 1.0758695906453382, "learning_rate": 4.4299332617518845e-05, "loss": 0.6365, "step": 9255 }, { "epoch": 0.270246607325259, "grad_norm": 1.0142675707376867, "learning_rate": 4.42921077721186e-05, "loss": 0.6528, "step": 9260 }, { "epoch": 0.2703925288194951, "grad_norm": 1.1300311779497318, "learning_rate": 4.428487901636243e-05, "loss": 0.6846, "step": 9265 }, { "epoch": 0.2705384503137312, "grad_norm": 1.1637159505865442, "learning_rate": 4.4277646351933696e-05, "loss": 0.5712, "step": 9270 }, { "epoch": 0.27068437180796734, "grad_norm": 1.3591688453547368, "learning_rate": 4.4270409780516656e-05, "loss": 0.5792, "step": 9275 }, { "epoch": 0.27083029330220343, "grad_norm": 0.9664209164435206, "learning_rate": 4.426316930379648e-05, "loss": 0.6053, "step": 9280 }, { "epoch": 0.2709762147964395, "grad_norm": 1.0373865749406095, "learning_rate": 4.425592492345925e-05, "loss": 0.6667, "step": 9285 }, { "epoch": 0.2711221362906756, "grad_norm": 0.9648188488889569, "learning_rate": 4.4248676641191943e-05, "loss": 0.6204, "step": 9290 }, { "epoch": 0.2712680577849117, "grad_norm": 1.0927805250882077, "learning_rate": 4.424142445868248e-05, "loss": 0.6631, "step": 9295 }, { "epoch": 0.27141397927914784, "grad_norm": 0.969006039490248, "learning_rate": 4.4234168377619665e-05, "loss": 0.6294, "step": 9300 }, { "epoch": 0.27155990077338393, "grad_norm": 0.8923776068719823, "learning_rate": 4.422690839969318e-05, "loss": 0.6003, "step": 9305 }, { "epoch": 0.27170582226762, "grad_norm": 0.9805808319216262, "learning_rate": 4.421964452659368e-05, "loss": 0.6365, "step": 9310 }, { "epoch": 0.2718517437618561, "grad_norm": 1.116916243497408, "learning_rate": 4.4212376760012683e-05, "loss": 0.6713, "step": 9315 }, { "epoch": 0.2719976652560922, "grad_norm": 1.1596441871577454, "learning_rate": 4.420510510164263e-05, "loss": 0.6769, "step": 9320 }, { "epoch": 0.27214358675032835, "grad_norm": 0.9336167716258548, "learning_rate": 4.419782955317685e-05, "loss": 0.6157, "step": 9325 }, { "epoch": 0.27228950824456444, "grad_norm": 1.0730903395786142, "learning_rate": 4.4190550116309584e-05, "loss": 0.6552, "step": 9330 }, { "epoch": 0.2724354297388005, "grad_norm": 0.9622106854368614, "learning_rate": 4.418326679273601e-05, "loss": 0.6408, "step": 9335 }, { "epoch": 0.2725813512330366, "grad_norm": 0.9128000562317355, "learning_rate": 4.417597958415218e-05, "loss": 0.6605, "step": 9340 }, { "epoch": 0.2727272727272727, "grad_norm": 20.84925563166816, "learning_rate": 4.416868849225504e-05, "loss": 0.6775, "step": 9345 }, { "epoch": 0.27287319422150885, "grad_norm": 0.8810440240742944, "learning_rate": 4.416139351874246e-05, "loss": 0.6168, "step": 9350 }, { "epoch": 0.27301911571574494, "grad_norm": 0.9387956436694727, "learning_rate": 4.415409466531323e-05, "loss": 0.6026, "step": 9355 }, { "epoch": 0.27316503720998103, "grad_norm": 1.0288075506259988, "learning_rate": 4.4146791933667e-05, "loss": 0.7043, "step": 9360 }, { "epoch": 0.2733109587042171, "grad_norm": 1.0673780004829119, "learning_rate": 4.413948532550437e-05, "loss": 0.6474, "step": 9365 }, { "epoch": 0.2734568801984532, "grad_norm": 1.0532994663723354, "learning_rate": 4.4132174842526806e-05, "loss": 0.6418, "step": 9370 }, { "epoch": 0.27360280169268936, "grad_norm": 1.1156220457324426, "learning_rate": 4.4124860486436695e-05, "loss": 0.6407, "step": 9375 }, { "epoch": 0.27374872318692545, "grad_norm": 1.0251531039399187, "learning_rate": 4.411754225893731e-05, "loss": 0.6034, "step": 9380 }, { "epoch": 0.27389464468116154, "grad_norm": 0.9608927108302535, "learning_rate": 4.4110220161732855e-05, "loss": 0.5993, "step": 9385 }, { "epoch": 0.2740405661753976, "grad_norm": 1.1877678378869183, "learning_rate": 4.410289419652841e-05, "loss": 0.6675, "step": 9390 }, { "epoch": 0.2741864876696337, "grad_norm": 1.2854077719513781, "learning_rate": 4.409556436502997e-05, "loss": 0.7148, "step": 9395 }, { "epoch": 0.27433240916386986, "grad_norm": 1.25513412571786, "learning_rate": 4.408823066894441e-05, "loss": 0.6142, "step": 9400 }, { "epoch": 0.27447833065810595, "grad_norm": 1.0300988635514428, "learning_rate": 4.408089310997952e-05, "loss": 0.6067, "step": 9405 }, { "epoch": 0.27462425215234204, "grad_norm": 0.9999223548561794, "learning_rate": 4.407355168984399e-05, "loss": 0.6807, "step": 9410 }, { "epoch": 0.27477017364657813, "grad_norm": 1.1459692121118263, "learning_rate": 4.4066206410247416e-05, "loss": 0.6552, "step": 9415 }, { "epoch": 0.2749160951408142, "grad_norm": 0.991276255895814, "learning_rate": 4.405885727290026e-05, "loss": 0.6314, "step": 9420 }, { "epoch": 0.27506201663505037, "grad_norm": 0.9834130128941496, "learning_rate": 4.4051504279513935e-05, "loss": 0.6115, "step": 9425 }, { "epoch": 0.27520793812928646, "grad_norm": 1.0521378607421596, "learning_rate": 4.4044147431800696e-05, "loss": 0.6918, "step": 9430 }, { "epoch": 0.27535385962352255, "grad_norm": 1.247796341541082, "learning_rate": 4.4036786731473747e-05, "loss": 0.6333, "step": 9435 }, { "epoch": 0.27549978111775864, "grad_norm": 1.1345550226866046, "learning_rate": 4.402942218024716e-05, "loss": 0.6332, "step": 9440 }, { "epoch": 0.2756457026119947, "grad_norm": 1.0609247401101654, "learning_rate": 4.4022053779835883e-05, "loss": 0.5988, "step": 9445 }, { "epoch": 0.27579162410623087, "grad_norm": 1.0309338328314486, "learning_rate": 4.401468153195581e-05, "loss": 0.6929, "step": 9450 }, { "epoch": 0.27593754560046696, "grad_norm": 1.34035568672471, "learning_rate": 4.4007305438323716e-05, "loss": 0.7262, "step": 9455 }, { "epoch": 0.27608346709470305, "grad_norm": 0.9962609990804928, "learning_rate": 4.399992550065723e-05, "loss": 0.6697, "step": 9460 }, { "epoch": 0.27622938858893914, "grad_norm": 1.0155749732647312, "learning_rate": 4.399254172067493e-05, "loss": 0.6543, "step": 9465 }, { "epoch": 0.27637531008317523, "grad_norm": 0.8872903575437754, "learning_rate": 4.398515410009626e-05, "loss": 0.6388, "step": 9470 }, { "epoch": 0.2765212315774114, "grad_norm": 0.9356294193846711, "learning_rate": 4.397776264064157e-05, "loss": 0.6202, "step": 9475 }, { "epoch": 0.27666715307164746, "grad_norm": 0.966011345876543, "learning_rate": 4.397036734403209e-05, "loss": 0.5627, "step": 9480 }, { "epoch": 0.27681307456588355, "grad_norm": 0.794485955161537, "learning_rate": 4.396296821198997e-05, "loss": 0.5567, "step": 9485 }, { "epoch": 0.27695899606011964, "grad_norm": 1.2233095189996241, "learning_rate": 4.3955565246238225e-05, "loss": 0.5855, "step": 9490 }, { "epoch": 0.27710491755435573, "grad_norm": 0.9510694777237836, "learning_rate": 4.394815844850077e-05, "loss": 0.6041, "step": 9495 }, { "epoch": 0.2772508390485919, "grad_norm": 1.0953573357454538, "learning_rate": 4.394074782050242e-05, "loss": 0.6975, "step": 9500 }, { "epoch": 0.27739676054282797, "grad_norm": 0.9674381844392873, "learning_rate": 4.3933333363968884e-05, "loss": 0.6114, "step": 9505 }, { "epoch": 0.27754268203706406, "grad_norm": 2.1746813973067844, "learning_rate": 4.3925915080626756e-05, "loss": 0.6002, "step": 9510 }, { "epoch": 0.27768860353130015, "grad_norm": 0.8947633376924069, "learning_rate": 4.3918492972203507e-05, "loss": 0.568, "step": 9515 }, { "epoch": 0.27783452502553624, "grad_norm": 0.9385015533082346, "learning_rate": 4.391106704042753e-05, "loss": 0.5574, "step": 9520 }, { "epoch": 0.2779804465197724, "grad_norm": 0.940384338699598, "learning_rate": 4.390363728702809e-05, "loss": 0.5786, "step": 9525 }, { "epoch": 0.2781263680140085, "grad_norm": 1.105896961869733, "learning_rate": 4.3896203713735335e-05, "loss": 0.6334, "step": 9530 }, { "epoch": 0.27827228950824456, "grad_norm": 1.1600073440010827, "learning_rate": 4.3888766322280315e-05, "loss": 0.6404, "step": 9535 }, { "epoch": 0.27841821100248065, "grad_norm": 0.9794154358006812, "learning_rate": 4.388132511439497e-05, "loss": 0.6914, "step": 9540 }, { "epoch": 0.27856413249671674, "grad_norm": 1.4066206240283308, "learning_rate": 4.387388009181212e-05, "loss": 0.6718, "step": 9545 }, { "epoch": 0.2787100539909529, "grad_norm": 0.9899398666962982, "learning_rate": 4.386643125626548e-05, "loss": 0.6164, "step": 9550 }, { "epoch": 0.278855975485189, "grad_norm": 1.0108367918445247, "learning_rate": 4.3858978609489646e-05, "loss": 0.7144, "step": 9555 }, { "epoch": 0.27900189697942507, "grad_norm": 1.1668019237590037, "learning_rate": 4.3851522153220114e-05, "loss": 0.6831, "step": 9560 }, { "epoch": 0.27914781847366116, "grad_norm": 1.1421775344752243, "learning_rate": 4.384406188919325e-05, "loss": 0.6287, "step": 9565 }, { "epoch": 0.27929373996789725, "grad_norm": 0.8948808720896755, "learning_rate": 4.3836597819146324e-05, "loss": 0.6615, "step": 9570 }, { "epoch": 0.2794396614621334, "grad_norm": 1.1626018511343774, "learning_rate": 4.3829129944817476e-05, "loss": 0.6796, "step": 9575 }, { "epoch": 0.2795855829563695, "grad_norm": 1.0008460850257275, "learning_rate": 4.3821658267945747e-05, "loss": 0.6513, "step": 9580 }, { "epoch": 0.2797315044506056, "grad_norm": 1.1390733826075226, "learning_rate": 4.381418279027105e-05, "loss": 0.6401, "step": 9585 }, { "epoch": 0.27987742594484166, "grad_norm": 1.1303844071107665, "learning_rate": 4.38067035135342e-05, "loss": 0.6096, "step": 9590 }, { "epoch": 0.28002334743907775, "grad_norm": 1.0165469434512835, "learning_rate": 4.379922043947688e-05, "loss": 0.5765, "step": 9595 }, { "epoch": 0.2801692689333139, "grad_norm": 1.0844107475307103, "learning_rate": 4.379173356984165e-05, "loss": 0.5534, "step": 9600 }, { "epoch": 0.28031519042755, "grad_norm": 0.911743408262357, "learning_rate": 4.378424290637199e-05, "loss": 0.6006, "step": 9605 }, { "epoch": 0.2804611119217861, "grad_norm": 1.016591910148524, "learning_rate": 4.377674845081224e-05, "loss": 0.6795, "step": 9610 }, { "epoch": 0.28060703341602217, "grad_norm": 0.9716943752328762, "learning_rate": 4.37692502049076e-05, "loss": 0.6223, "step": 9615 }, { "epoch": 0.28075295491025826, "grad_norm": 1.0479342340199176, "learning_rate": 4.37617481704042e-05, "loss": 0.6601, "step": 9620 }, { "epoch": 0.2808988764044944, "grad_norm": 0.9210887513347649, "learning_rate": 4.375424234904902e-05, "loss": 0.5854, "step": 9625 }, { "epoch": 0.2810447978987305, "grad_norm": 0.9585073815288241, "learning_rate": 4.374673274258993e-05, "loss": 0.6723, "step": 9630 }, { "epoch": 0.2811907193929666, "grad_norm": 1.1027813582192172, "learning_rate": 4.3739219352775685e-05, "loss": 0.6795, "step": 9635 }, { "epoch": 0.28133664088720267, "grad_norm": 0.8025597456106637, "learning_rate": 4.373170218135592e-05, "loss": 0.619, "step": 9640 }, { "epoch": 0.28148256238143876, "grad_norm": 0.9707168305896594, "learning_rate": 4.3724181230081144e-05, "loss": 0.6139, "step": 9645 }, { "epoch": 0.2816284838756749, "grad_norm": 1.4606884832562046, "learning_rate": 4.371665650070275e-05, "loss": 0.7026, "step": 9650 }, { "epoch": 0.281774405369911, "grad_norm": 0.9409385044568542, "learning_rate": 4.3709127994973017e-05, "loss": 0.6136, "step": 9655 }, { "epoch": 0.2819203268641471, "grad_norm": 1.1872235379032545, "learning_rate": 4.370159571464509e-05, "loss": 0.6534, "step": 9660 }, { "epoch": 0.2820662483583832, "grad_norm": 1.0462182943011342, "learning_rate": 4.3694059661473006e-05, "loss": 0.6413, "step": 9665 }, { "epoch": 0.28221216985261927, "grad_norm": 1.1015599076451104, "learning_rate": 4.368651983721169e-05, "loss": 0.6475, "step": 9670 }, { "epoch": 0.2823580913468554, "grad_norm": 1.0058822146180264, "learning_rate": 4.36789762436169e-05, "loss": 0.6229, "step": 9675 }, { "epoch": 0.2825040128410915, "grad_norm": 1.1300018511742729, "learning_rate": 4.3671428882445335e-05, "loss": 0.6585, "step": 9680 }, { "epoch": 0.2826499343353276, "grad_norm": 1.247901797910968, "learning_rate": 4.3663877755454514e-05, "loss": 0.651, "step": 9685 }, { "epoch": 0.2827958558295637, "grad_norm": 0.9637139935804208, "learning_rate": 4.365632286440287e-05, "loss": 0.5837, "step": 9690 }, { "epoch": 0.28294177732379977, "grad_norm": 1.204244325118993, "learning_rate": 4.36487642110497e-05, "loss": 0.67, "step": 9695 }, { "epoch": 0.2830876988180359, "grad_norm": 1.0817756537220737, "learning_rate": 4.3641201797155176e-05, "loss": 0.6488, "step": 9700 }, { "epoch": 0.283233620312272, "grad_norm": 0.9795316655113857, "learning_rate": 4.3633635624480344e-05, "loss": 0.5998, "step": 9705 }, { "epoch": 0.2833795418065081, "grad_norm": 1.0444383856671975, "learning_rate": 4.362606569478715e-05, "loss": 0.5875, "step": 9710 }, { "epoch": 0.2835254633007442, "grad_norm": 1.0873743232596855, "learning_rate": 4.361849200983835e-05, "loss": 0.6857, "step": 9715 }, { "epoch": 0.2836713847949803, "grad_norm": 0.950539944845474, "learning_rate": 4.361091457139765e-05, "loss": 0.6709, "step": 9720 }, { "epoch": 0.2838173062892164, "grad_norm": 1.0180070499760425, "learning_rate": 4.3603333381229594e-05, "loss": 0.6299, "step": 9725 }, { "epoch": 0.2839632277834525, "grad_norm": 1.0707600314051786, "learning_rate": 4.35957484410996e-05, "loss": 0.5681, "step": 9730 }, { "epoch": 0.2841091492776886, "grad_norm": 0.9714989321799462, "learning_rate": 4.3588159752773974e-05, "loss": 0.6275, "step": 9735 }, { "epoch": 0.2842550707719247, "grad_norm": 1.0654212860029892, "learning_rate": 4.358056731801986e-05, "loss": 0.661, "step": 9740 }, { "epoch": 0.2844009922661608, "grad_norm": 1.002122570213685, "learning_rate": 4.357297113860532e-05, "loss": 0.5713, "step": 9745 }, { "epoch": 0.2845469137603969, "grad_norm": 0.9270802455919358, "learning_rate": 4.3565371216299264e-05, "loss": 0.6234, "step": 9750 }, { "epoch": 0.284692835254633, "grad_norm": 1.093674120690185, "learning_rate": 4.355776755287146e-05, "loss": 0.589, "step": 9755 }, { "epoch": 0.2848387567488691, "grad_norm": 1.044829503458416, "learning_rate": 4.355016015009257e-05, "loss": 0.7154, "step": 9760 }, { "epoch": 0.2849846782431052, "grad_norm": 1.1066046950139847, "learning_rate": 4.3542549009734136e-05, "loss": 0.7046, "step": 9765 }, { "epoch": 0.2851305997373413, "grad_norm": 1.062783413036839, "learning_rate": 4.353493413356853e-05, "loss": 0.6056, "step": 9770 }, { "epoch": 0.28527652123157743, "grad_norm": 1.3500126679976774, "learning_rate": 4.352731552336905e-05, "loss": 0.6969, "step": 9775 }, { "epoch": 0.2854224427258135, "grad_norm": 1.040460287107519, "learning_rate": 4.351969318090979e-05, "loss": 0.6014, "step": 9780 }, { "epoch": 0.2855683642200496, "grad_norm": 0.9308769001662154, "learning_rate": 4.351206710796578e-05, "loss": 0.5696, "step": 9785 }, { "epoch": 0.2857142857142857, "grad_norm": 1.0272684037108237, "learning_rate": 4.3504437306312895e-05, "loss": 0.5701, "step": 9790 }, { "epoch": 0.2858602072085218, "grad_norm": 1.1346822901872113, "learning_rate": 4.349680377772786e-05, "loss": 0.6213, "step": 9795 }, { "epoch": 0.28600612870275793, "grad_norm": 1.035502910710529, "learning_rate": 4.3489166523988314e-05, "loss": 0.6203, "step": 9800 }, { "epoch": 0.286152050196994, "grad_norm": 1.1199922438013243, "learning_rate": 4.34815255468727e-05, "loss": 0.6182, "step": 9805 }, { "epoch": 0.2862979716912301, "grad_norm": 0.8558041401700277, "learning_rate": 4.347388084816039e-05, "loss": 0.6022, "step": 9810 }, { "epoch": 0.2864438931854662, "grad_norm": 0.925740740465212, "learning_rate": 4.346623242963158e-05, "loss": 0.631, "step": 9815 }, { "epoch": 0.2865898146797023, "grad_norm": 1.1047848031013747, "learning_rate": 4.3458580293067345e-05, "loss": 0.6051, "step": 9820 }, { "epoch": 0.28673573617393844, "grad_norm": 1.1444653627692365, "learning_rate": 4.3450924440249637e-05, "loss": 0.6535, "step": 9825 }, { "epoch": 0.28688165766817453, "grad_norm": 1.0742557687030576, "learning_rate": 4.3443264872961255e-05, "loss": 0.6239, "step": 9830 }, { "epoch": 0.2870275791624106, "grad_norm": 0.9713479646957056, "learning_rate": 4.343560159298588e-05, "loss": 0.6541, "step": 9835 }, { "epoch": 0.2871735006566467, "grad_norm": 1.0861694514831983, "learning_rate": 4.342793460210806e-05, "loss": 0.6214, "step": 9840 }, { "epoch": 0.28731942215088285, "grad_norm": 1.0432355044644355, "learning_rate": 4.3420263902113176e-05, "loss": 0.6898, "step": 9845 }, { "epoch": 0.28746534364511894, "grad_norm": 1.166679167665881, "learning_rate": 4.34125894947875e-05, "loss": 0.6332, "step": 9850 }, { "epoch": 0.28761126513935503, "grad_norm": 1.68116103506683, "learning_rate": 4.340491138191817e-05, "loss": 0.7144, "step": 9855 }, { "epoch": 0.2877571866335911, "grad_norm": 0.980564752548983, "learning_rate": 4.3397229565293165e-05, "loss": 0.6922, "step": 9860 }, { "epoch": 0.2879031081278272, "grad_norm": 0.930910514750492, "learning_rate": 4.3389544046701354e-05, "loss": 0.6594, "step": 9865 }, { "epoch": 0.28804902962206336, "grad_norm": 0.9475673383234228, "learning_rate": 4.3381854827932435e-05, "loss": 0.5478, "step": 9870 }, { "epoch": 0.28819495111629945, "grad_norm": 1.0363261615363515, "learning_rate": 4.3374161910777003e-05, "loss": 0.5618, "step": 9875 }, { "epoch": 0.28834087261053554, "grad_norm": 1.0473784030084592, "learning_rate": 4.336646529702649e-05, "loss": 0.6984, "step": 9880 }, { "epoch": 0.2884867941047716, "grad_norm": 1.240370729375362, "learning_rate": 4.3358764988473196e-05, "loss": 0.6158, "step": 9885 }, { "epoch": 0.2886327155990077, "grad_norm": 1.1672795191268774, "learning_rate": 4.335106098691029e-05, "loss": 0.7262, "step": 9890 }, { "epoch": 0.28877863709324386, "grad_norm": 1.029083997695849, "learning_rate": 4.3343353294131785e-05, "loss": 0.6104, "step": 9895 }, { "epoch": 0.28892455858747995, "grad_norm": 1.0161911817919154, "learning_rate": 4.333564191193256e-05, "loss": 0.6007, "step": 9900 }, { "epoch": 0.28907048008171604, "grad_norm": 0.9981588214667476, "learning_rate": 4.332792684210835e-05, "loss": 0.6406, "step": 9905 }, { "epoch": 0.28921640157595213, "grad_norm": 0.9717252770177476, "learning_rate": 4.332020808645577e-05, "loss": 0.608, "step": 9910 }, { "epoch": 0.2893623230701882, "grad_norm": 1.1660459974683668, "learning_rate": 4.331248564677226e-05, "loss": 0.6685, "step": 9915 }, { "epoch": 0.28950824456442437, "grad_norm": 0.8983558702272, "learning_rate": 4.330475952485614e-05, "loss": 0.6366, "step": 9920 }, { "epoch": 0.28965416605866046, "grad_norm": 0.9507257548877919, "learning_rate": 4.329702972250658e-05, "loss": 0.6451, "step": 9925 }, { "epoch": 0.28980008755289655, "grad_norm": 1.083196117585931, "learning_rate": 4.328929624152362e-05, "loss": 0.5941, "step": 9930 }, { "epoch": 0.28994600904713264, "grad_norm": 0.9248926369171733, "learning_rate": 4.328155908370813e-05, "loss": 0.6035, "step": 9935 }, { "epoch": 0.2900919305413687, "grad_norm": 1.0511133690881107, "learning_rate": 4.327381825086186e-05, "loss": 0.6358, "step": 9940 }, { "epoch": 0.29023785203560487, "grad_norm": 0.927003854340498, "learning_rate": 4.3266073744787406e-05, "loss": 0.6097, "step": 9945 }, { "epoch": 0.29038377352984096, "grad_norm": 0.9717675203678348, "learning_rate": 4.3258325567288215e-05, "loss": 0.5977, "step": 9950 }, { "epoch": 0.29052969502407705, "grad_norm": 0.9250312722624365, "learning_rate": 4.325057372016861e-05, "loss": 0.6068, "step": 9955 }, { "epoch": 0.29067561651831314, "grad_norm": 1.1098752507120653, "learning_rate": 4.324281820523373e-05, "loss": 0.6617, "step": 9960 }, { "epoch": 0.29082153801254923, "grad_norm": 1.0808518096613506, "learning_rate": 4.3235059024289615e-05, "loss": 0.6192, "step": 9965 }, { "epoch": 0.2909674595067854, "grad_norm": 1.0580047157919588, "learning_rate": 4.3227296179143126e-05, "loss": 0.6085, "step": 9970 }, { "epoch": 0.29111338100102147, "grad_norm": 0.9393415328644182, "learning_rate": 4.321952967160198e-05, "loss": 0.6067, "step": 9975 }, { "epoch": 0.29125930249525755, "grad_norm": 1.0528534113984533, "learning_rate": 4.321175950347477e-05, "loss": 0.6098, "step": 9980 }, { "epoch": 0.29140522398949364, "grad_norm": 1.1579308027948558, "learning_rate": 4.32039856765709e-05, "loss": 0.6611, "step": 9985 }, { "epoch": 0.29155114548372973, "grad_norm": 0.957011185388438, "learning_rate": 4.319620819270067e-05, "loss": 0.6418, "step": 9990 }, { "epoch": 0.2916970669779659, "grad_norm": 1.0536886335974478, "learning_rate": 4.318842705367521e-05, "loss": 0.5636, "step": 9995 }, { "epoch": 0.29184298847220197, "grad_norm": 1.0145603355228385, "learning_rate": 4.3180642261306506e-05, "loss": 0.6145, "step": 10000 }, { "epoch": 0.29198890996643806, "grad_norm": 0.9321637361889581, "learning_rate": 4.317285381740738e-05, "loss": 0.6233, "step": 10005 }, { "epoch": 0.29213483146067415, "grad_norm": 0.9838231596547119, "learning_rate": 4.316506172379153e-05, "loss": 0.6283, "step": 10010 }, { "epoch": 0.29228075295491024, "grad_norm": 1.0149236769482028, "learning_rate": 4.315726598227347e-05, "loss": 0.6284, "step": 10015 }, { "epoch": 0.2924266744491464, "grad_norm": 0.9228175888109796, "learning_rate": 4.3149466594668606e-05, "loss": 0.5833, "step": 10020 }, { "epoch": 0.2925725959433825, "grad_norm": 1.2540863532382087, "learning_rate": 4.3141663562793165e-05, "loss": 0.6084, "step": 10025 }, { "epoch": 0.29271851743761856, "grad_norm": 1.1280488310772208, "learning_rate": 4.313385688846423e-05, "loss": 0.6259, "step": 10030 }, { "epoch": 0.29286443893185465, "grad_norm": 0.9539739015453806, "learning_rate": 4.3126046573499716e-05, "loss": 0.6301, "step": 10035 }, { "epoch": 0.29301036042609074, "grad_norm": 1.2548090990064173, "learning_rate": 4.311823261971843e-05, "loss": 0.6483, "step": 10040 }, { "epoch": 0.2931562819203269, "grad_norm": 1.1174417456436043, "learning_rate": 4.3110415028939966e-05, "loss": 0.6417, "step": 10045 }, { "epoch": 0.293302203414563, "grad_norm": 0.8869065438456617, "learning_rate": 4.310259380298482e-05, "loss": 0.662, "step": 10050 }, { "epoch": 0.29344812490879907, "grad_norm": 1.1824128584437887, "learning_rate": 4.309476894367428e-05, "loss": 0.6157, "step": 10055 }, { "epoch": 0.29359404640303516, "grad_norm": 1.1472900066551646, "learning_rate": 4.3086940452830554e-05, "loss": 0.6242, "step": 10060 }, { "epoch": 0.29373996789727125, "grad_norm": 1.0760054068103626, "learning_rate": 4.3079108332276626e-05, "loss": 0.6066, "step": 10065 }, { "epoch": 0.2938858893915074, "grad_norm": 0.9376591745026733, "learning_rate": 4.307127258383635e-05, "loss": 0.6123, "step": 10070 }, { "epoch": 0.2940318108857435, "grad_norm": 1.0417039857327592, "learning_rate": 4.3063433209334424e-05, "loss": 0.6729, "step": 10075 }, { "epoch": 0.2941777323799796, "grad_norm": 1.08621513057713, "learning_rate": 4.30555902105964e-05, "loss": 0.6065, "step": 10080 }, { "epoch": 0.29432365387421566, "grad_norm": 1.0581195725664356, "learning_rate": 4.304774358944868e-05, "loss": 0.7031, "step": 10085 }, { "epoch": 0.29446957536845175, "grad_norm": 0.9561695129752983, "learning_rate": 4.303989334771847e-05, "loss": 0.6385, "step": 10090 }, { "epoch": 0.2946154968626879, "grad_norm": 1.1846734382622395, "learning_rate": 4.3032039487233874e-05, "loss": 0.7111, "step": 10095 }, { "epoch": 0.294761418356924, "grad_norm": 1.2588346196190965, "learning_rate": 4.302418200982378e-05, "loss": 0.675, "step": 10100 }, { "epoch": 0.2949073398511601, "grad_norm": 1.1225204658783883, "learning_rate": 4.301632091731796e-05, "loss": 0.6334, "step": 10105 }, { "epoch": 0.29505326134539617, "grad_norm": 1.0495946245311563, "learning_rate": 4.300845621154703e-05, "loss": 0.6137, "step": 10110 }, { "epoch": 0.29519918283963226, "grad_norm": 1.0144801099071106, "learning_rate": 4.300058789434242e-05, "loss": 0.611, "step": 10115 }, { "epoch": 0.2953451043338684, "grad_norm": 0.9492646281438598, "learning_rate": 4.2992715967536415e-05, "loss": 0.6006, "step": 10120 }, { "epoch": 0.2954910258281045, "grad_norm": 1.039909505418342, "learning_rate": 4.298484043296214e-05, "loss": 0.6506, "step": 10125 }, { "epoch": 0.2956369473223406, "grad_norm": 0.9382999988781197, "learning_rate": 4.297696129245357e-05, "loss": 0.6067, "step": 10130 }, { "epoch": 0.29578286881657667, "grad_norm": 0.8427903373144261, "learning_rate": 4.2969078547845496e-05, "loss": 0.614, "step": 10135 }, { "epoch": 0.29592879031081276, "grad_norm": 1.1997101368112018, "learning_rate": 4.2961192200973565e-05, "loss": 0.6175, "step": 10140 }, { "epoch": 0.2960747118050489, "grad_norm": 1.0675060821333409, "learning_rate": 4.295330225367428e-05, "loss": 0.6323, "step": 10145 }, { "epoch": 0.296220633299285, "grad_norm": 1.2598320735122508, "learning_rate": 4.294540870778493e-05, "loss": 0.6441, "step": 10150 }, { "epoch": 0.2963665547935211, "grad_norm": 1.1203985738708426, "learning_rate": 4.29375115651437e-05, "loss": 0.7003, "step": 10155 }, { "epoch": 0.2965124762877572, "grad_norm": 1.0751261921213477, "learning_rate": 4.292961082758958e-05, "loss": 0.6283, "step": 10160 }, { "epoch": 0.29665839778199327, "grad_norm": 1.0305019782884048, "learning_rate": 4.29217064969624e-05, "loss": 0.6098, "step": 10165 }, { "epoch": 0.2968043192762294, "grad_norm": 1.0394666710884315, "learning_rate": 4.2913798575102835e-05, "loss": 0.6087, "step": 10170 }, { "epoch": 0.2969502407704655, "grad_norm": 1.1419209069357503, "learning_rate": 4.290588706385238e-05, "loss": 0.6373, "step": 10175 }, { "epoch": 0.2970961622647016, "grad_norm": 0.9017028778278497, "learning_rate": 4.289797196505341e-05, "loss": 0.5877, "step": 10180 }, { "epoch": 0.2972420837589377, "grad_norm": 0.9460849873541182, "learning_rate": 4.289005328054908e-05, "loss": 0.5985, "step": 10185 }, { "epoch": 0.29738800525317377, "grad_norm": 1.2996981812683828, "learning_rate": 4.288213101218339e-05, "loss": 0.6697, "step": 10190 }, { "epoch": 0.2975339267474099, "grad_norm": 1.3591789291783238, "learning_rate": 4.287420516180122e-05, "loss": 0.6539, "step": 10195 }, { "epoch": 0.297679848241646, "grad_norm": 0.9869883294475226, "learning_rate": 4.286627573124823e-05, "loss": 0.6674, "step": 10200 }, { "epoch": 0.2978257697358821, "grad_norm": 1.0633924641034176, "learning_rate": 4.285834272237094e-05, "loss": 0.6973, "step": 10205 }, { "epoch": 0.2979716912301182, "grad_norm": 1.0806751091156153, "learning_rate": 4.285040613701671e-05, "loss": 0.6813, "step": 10210 }, { "epoch": 0.2981176127243543, "grad_norm": 1.2665292301136417, "learning_rate": 4.28424659770337e-05, "loss": 0.638, "step": 10215 }, { "epoch": 0.2982635342185904, "grad_norm": 0.9375090116684476, "learning_rate": 4.283452224427096e-05, "loss": 0.5913, "step": 10220 }, { "epoch": 0.2984094557128265, "grad_norm": 0.9225897619759121, "learning_rate": 4.282657494057829e-05, "loss": 0.5837, "step": 10225 }, { "epoch": 0.2985553772070626, "grad_norm": 1.113410029140933, "learning_rate": 4.281862406780642e-05, "loss": 0.6331, "step": 10230 }, { "epoch": 0.2987012987012987, "grad_norm": 0.9690772171491423, "learning_rate": 4.2810669627806816e-05, "loss": 0.6192, "step": 10235 }, { "epoch": 0.2988472201955348, "grad_norm": 0.9564890214667199, "learning_rate": 4.280271162243184e-05, "loss": 0.612, "step": 10240 }, { "epoch": 0.2989931416897709, "grad_norm": 0.9493370878109928, "learning_rate": 4.279475005353466e-05, "loss": 0.6438, "step": 10245 }, { "epoch": 0.299139063184007, "grad_norm": 1.1689399438433885, "learning_rate": 4.2786784922969266e-05, "loss": 0.6227, "step": 10250 }, { "epoch": 0.2992849846782431, "grad_norm": 1.074599957155284, "learning_rate": 4.27788162325905e-05, "loss": 0.6673, "step": 10255 }, { "epoch": 0.2994309061724792, "grad_norm": 1.131822976058886, "learning_rate": 4.2770843984254004e-05, "loss": 0.69, "step": 10260 }, { "epoch": 0.2995768276667153, "grad_norm": 0.9818669758049734, "learning_rate": 4.276286817981628e-05, "loss": 0.613, "step": 10265 }, { "epoch": 0.29972274916095143, "grad_norm": 1.049951587286779, "learning_rate": 4.275488882113465e-05, "loss": 0.5821, "step": 10270 }, { "epoch": 0.2998686706551875, "grad_norm": 0.8586504788102471, "learning_rate": 4.274690591006723e-05, "loss": 0.5918, "step": 10275 }, { "epoch": 0.3000145921494236, "grad_norm": 0.9193938930162942, "learning_rate": 4.273891944847302e-05, "loss": 0.5885, "step": 10280 }, { "epoch": 0.3001605136436597, "grad_norm": 1.1161642067206445, "learning_rate": 4.273092943821179e-05, "loss": 0.6991, "step": 10285 }, { "epoch": 0.3003064351378958, "grad_norm": 1.1258610428926281, "learning_rate": 4.272293588114418e-05, "loss": 0.6382, "step": 10290 }, { "epoch": 0.30045235663213193, "grad_norm": 0.9402908905474704, "learning_rate": 4.271493877913163e-05, "loss": 0.6128, "step": 10295 }, { "epoch": 0.300598278126368, "grad_norm": 1.0321535989101016, "learning_rate": 4.270693813403643e-05, "loss": 0.6417, "step": 10300 }, { "epoch": 0.3007441996206041, "grad_norm": 1.0208349594054953, "learning_rate": 4.269893394772166e-05, "loss": 0.6325, "step": 10305 }, { "epoch": 0.3008901211148402, "grad_norm": 1.1903529750006026, "learning_rate": 4.269092622205125e-05, "loss": 0.6456, "step": 10310 }, { "epoch": 0.3010360426090763, "grad_norm": 0.8049629739529954, "learning_rate": 4.268291495888995e-05, "loss": 0.584, "step": 10315 }, { "epoch": 0.30118196410331244, "grad_norm": 1.3438261508082656, "learning_rate": 4.267490016010334e-05, "loss": 0.6373, "step": 10320 }, { "epoch": 0.30132788559754853, "grad_norm": 1.020809353844001, "learning_rate": 4.2666881827557814e-05, "loss": 0.6617, "step": 10325 }, { "epoch": 0.3014738070917846, "grad_norm": 1.0982308795495834, "learning_rate": 4.2658859963120575e-05, "loss": 0.6369, "step": 10330 }, { "epoch": 0.3016197285860207, "grad_norm": 0.941527729279539, "learning_rate": 4.265083456865968e-05, "loss": 0.6944, "step": 10335 }, { "epoch": 0.3017656500802568, "grad_norm": 0.9426140706935435, "learning_rate": 4.2642805646043973e-05, "loss": 0.6319, "step": 10340 }, { "epoch": 0.30191157157449294, "grad_norm": 1.0254375989601374, "learning_rate": 4.263477319714317e-05, "loss": 0.6737, "step": 10345 }, { "epoch": 0.30205749306872903, "grad_norm": 0.9279709604109861, "learning_rate": 4.262673722382775e-05, "loss": 0.6231, "step": 10350 }, { "epoch": 0.3022034145629651, "grad_norm": 1.1858764494611205, "learning_rate": 4.2618697727969054e-05, "loss": 0.6182, "step": 10355 }, { "epoch": 0.3023493360572012, "grad_norm": 0.9713788051436115, "learning_rate": 4.261065471143922e-05, "loss": 0.6282, "step": 10360 }, { "epoch": 0.3024952575514373, "grad_norm": 1.0979719956230793, "learning_rate": 4.260260817611121e-05, "loss": 0.5908, "step": 10365 }, { "epoch": 0.30264117904567345, "grad_norm": 1.3096098724003238, "learning_rate": 4.259455812385883e-05, "loss": 0.6846, "step": 10370 }, { "epoch": 0.30278710053990954, "grad_norm": 1.0476064464434878, "learning_rate": 4.258650455655666e-05, "loss": 0.6528, "step": 10375 }, { "epoch": 0.3029330220341456, "grad_norm": 0.9798439797794413, "learning_rate": 4.257844747608015e-05, "loss": 0.5728, "step": 10380 }, { "epoch": 0.3030789435283817, "grad_norm": 1.1895595078410353, "learning_rate": 4.257038688430552e-05, "loss": 0.6049, "step": 10385 }, { "epoch": 0.3032248650226178, "grad_norm": 1.1103587792395997, "learning_rate": 4.2562322783109835e-05, "loss": 0.6094, "step": 10390 }, { "epoch": 0.30337078651685395, "grad_norm": 0.9348793693097873, "learning_rate": 4.255425517437098e-05, "loss": 0.5397, "step": 10395 }, { "epoch": 0.30351670801109004, "grad_norm": 1.0060834487029522, "learning_rate": 4.2546184059967634e-05, "loss": 0.596, "step": 10400 }, { "epoch": 0.30366262950532613, "grad_norm": 0.8997301478964824, "learning_rate": 4.253810944177932e-05, "loss": 0.6551, "step": 10405 }, { "epoch": 0.3038085509995622, "grad_norm": 1.0814856771207757, "learning_rate": 4.253003132168636e-05, "loss": 0.6845, "step": 10410 }, { "epoch": 0.3039544724937983, "grad_norm": 1.0549408476391093, "learning_rate": 4.252194970156989e-05, "loss": 0.6582, "step": 10415 }, { "epoch": 0.30410039398803446, "grad_norm": 1.1040764999147177, "learning_rate": 4.2513864583311876e-05, "loss": 0.6354, "step": 10420 }, { "epoch": 0.30424631548227055, "grad_norm": 0.9637786458674548, "learning_rate": 4.250577596879507e-05, "loss": 0.6583, "step": 10425 }, { "epoch": 0.30439223697650664, "grad_norm": 1.0484402766038825, "learning_rate": 4.249768385990309e-05, "loss": 0.5982, "step": 10430 }, { "epoch": 0.3045381584707427, "grad_norm": 0.9464641488333255, "learning_rate": 4.248958825852029e-05, "loss": 0.6251, "step": 10435 }, { "epoch": 0.3046840799649788, "grad_norm": 1.1148626531290056, "learning_rate": 4.2481489166531915e-05, "loss": 0.6138, "step": 10440 }, { "epoch": 0.30483000145921496, "grad_norm": 0.9975829799111149, "learning_rate": 4.2473386585823986e-05, "loss": 0.6749, "step": 10445 }, { "epoch": 0.30497592295345105, "grad_norm": 1.3235769403332804, "learning_rate": 4.246528051828333e-05, "loss": 0.721, "step": 10450 }, { "epoch": 0.30512184444768714, "grad_norm": 1.1303143472315673, "learning_rate": 4.2457170965797614e-05, "loss": 0.6744, "step": 10455 }, { "epoch": 0.30526776594192323, "grad_norm": 1.1619974982806487, "learning_rate": 4.244905793025528e-05, "loss": 0.6553, "step": 10460 }, { "epoch": 0.3054136874361593, "grad_norm": 1.0191806419220486, "learning_rate": 4.24409414135456e-05, "loss": 0.6569, "step": 10465 }, { "epoch": 0.30555960893039547, "grad_norm": 0.9681256557453185, "learning_rate": 4.243282141755866e-05, "loss": 0.6268, "step": 10470 }, { "epoch": 0.30570553042463156, "grad_norm": 1.2333501019281996, "learning_rate": 4.242469794418536e-05, "loss": 0.6088, "step": 10475 }, { "epoch": 0.30585145191886765, "grad_norm": 1.061462196546728, "learning_rate": 4.241657099531741e-05, "loss": 0.6111, "step": 10480 }, { "epoch": 0.30599737341310373, "grad_norm": 1.0573594054231026, "learning_rate": 4.24084405728473e-05, "loss": 0.6086, "step": 10485 }, { "epoch": 0.3061432949073398, "grad_norm": 1.0199023301421437, "learning_rate": 4.2400306678668355e-05, "loss": 0.5654, "step": 10490 }, { "epoch": 0.30628921640157597, "grad_norm": 1.0326110428813924, "learning_rate": 4.239216931467472e-05, "loss": 0.6163, "step": 10495 }, { "epoch": 0.30643513789581206, "grad_norm": 1.1178488965029092, "learning_rate": 4.2384028482761315e-05, "loss": 0.6546, "step": 10500 }, { "epoch": 0.30658105939004815, "grad_norm": 0.8714919913236092, "learning_rate": 4.237588418482389e-05, "loss": 0.6007, "step": 10505 }, { "epoch": 0.30672698088428424, "grad_norm": 1.1928375713498323, "learning_rate": 4.2367736422759005e-05, "loss": 0.681, "step": 10510 }, { "epoch": 0.30687290237852033, "grad_norm": 1.0159384696385847, "learning_rate": 4.2359585198464006e-05, "loss": 0.5611, "step": 10515 }, { "epoch": 0.3070188238727565, "grad_norm": 1.0785841915099939, "learning_rate": 4.235143051383706e-05, "loss": 0.5966, "step": 10520 }, { "epoch": 0.30716474536699256, "grad_norm": 1.0469491083340396, "learning_rate": 4.2343272370777155e-05, "loss": 0.61, "step": 10525 }, { "epoch": 0.30731066686122865, "grad_norm": 0.9353888289185007, "learning_rate": 4.233511077118404e-05, "loss": 0.6322, "step": 10530 }, { "epoch": 0.30745658835546474, "grad_norm": 1.0130973165669086, "learning_rate": 4.232694571695832e-05, "loss": 0.6719, "step": 10535 }, { "epoch": 0.30760250984970083, "grad_norm": 1.3664932792745657, "learning_rate": 4.2318777210001364e-05, "loss": 0.6945, "step": 10540 }, { "epoch": 0.307748431343937, "grad_norm": 0.9783476864455728, "learning_rate": 4.231060525221537e-05, "loss": 0.5983, "step": 10545 }, { "epoch": 0.30789435283817307, "grad_norm": 1.1209529751495166, "learning_rate": 4.230242984550333e-05, "loss": 0.6078, "step": 10550 }, { "epoch": 0.30804027433240916, "grad_norm": 1.0096799691066385, "learning_rate": 4.229425099176903e-05, "loss": 0.5979, "step": 10555 }, { "epoch": 0.30818619582664525, "grad_norm": 1.069972240463686, "learning_rate": 4.228606869291708e-05, "loss": 0.6059, "step": 10560 }, { "epoch": 0.30833211732088134, "grad_norm": 0.9923507863451684, "learning_rate": 4.227788295085288e-05, "loss": 0.605, "step": 10565 }, { "epoch": 0.3084780388151175, "grad_norm": 1.0936931170147761, "learning_rate": 4.2269693767482635e-05, "loss": 0.6062, "step": 10570 }, { "epoch": 0.3086239603093536, "grad_norm": 1.0228773314444557, "learning_rate": 4.226150114471334e-05, "loss": 0.6837, "step": 10575 }, { "epoch": 0.30876988180358966, "grad_norm": 0.8764492522245093, "learning_rate": 4.225330508445281e-05, "loss": 0.599, "step": 10580 }, { "epoch": 0.30891580329782575, "grad_norm": 1.0172661540894417, "learning_rate": 4.224510558860964e-05, "loss": 0.6441, "step": 10585 }, { "epoch": 0.3090617247920619, "grad_norm": 1.078365886822906, "learning_rate": 4.2236902659093244e-05, "loss": 0.5695, "step": 10590 }, { "epoch": 0.309207646286298, "grad_norm": 1.0555723066920377, "learning_rate": 4.222869629781383e-05, "loss": 0.585, "step": 10595 }, { "epoch": 0.3093535677805341, "grad_norm": 1.1878394546788085, "learning_rate": 4.222048650668239e-05, "loss": 0.6247, "step": 10600 }, { "epoch": 0.30949948927477017, "grad_norm": 0.9529532759971112, "learning_rate": 4.2212273287610744e-05, "loss": 0.6345, "step": 10605 }, { "epoch": 0.30964541076900626, "grad_norm": 1.2587038968001414, "learning_rate": 4.220405664251148e-05, "loss": 0.6873, "step": 10610 }, { "epoch": 0.3097913322632424, "grad_norm": 1.0210864913772684, "learning_rate": 4.2195836573298006e-05, "loss": 0.7019, "step": 10615 }, { "epoch": 0.3099372537574785, "grad_norm": 1.0136017567723619, "learning_rate": 4.218761308188451e-05, "loss": 0.5834, "step": 10620 }, { "epoch": 0.3100831752517146, "grad_norm": 1.1353931397937516, "learning_rate": 4.217938617018599e-05, "loss": 0.6398, "step": 10625 }, { "epoch": 0.31022909674595067, "grad_norm": 1.0139825340526678, "learning_rate": 4.217115584011824e-05, "loss": 0.6074, "step": 10630 }, { "epoch": 0.31037501824018676, "grad_norm": 1.1376670674778286, "learning_rate": 4.216292209359784e-05, "loss": 0.6844, "step": 10635 }, { "epoch": 0.3105209397344229, "grad_norm": 0.9654074776744501, "learning_rate": 4.215468493254217e-05, "loss": 0.5866, "step": 10640 }, { "epoch": 0.310666861228659, "grad_norm": 1.0227474480457441, "learning_rate": 4.214644435886942e-05, "loss": 0.5637, "step": 10645 }, { "epoch": 0.3108127827228951, "grad_norm": 1.0341845935844518, "learning_rate": 4.213820037449854e-05, "loss": 0.6125, "step": 10650 }, { "epoch": 0.3109587042171312, "grad_norm": 1.0871959171133545, "learning_rate": 4.212995298134932e-05, "loss": 0.6707, "step": 10655 }, { "epoch": 0.31110462571136727, "grad_norm": 1.0314432776746578, "learning_rate": 4.212170218134229e-05, "loss": 0.607, "step": 10660 }, { "epoch": 0.3112505472056034, "grad_norm": 0.8306397051529498, "learning_rate": 4.211344797639884e-05, "loss": 0.6039, "step": 10665 }, { "epoch": 0.3113964686998395, "grad_norm": 1.110747509191792, "learning_rate": 4.210519036844109e-05, "loss": 0.5711, "step": 10670 }, { "epoch": 0.3115423901940756, "grad_norm": 1.0494420892508958, "learning_rate": 4.209692935939198e-05, "loss": 0.6827, "step": 10675 }, { "epoch": 0.3116883116883117, "grad_norm": 1.0138262915884315, "learning_rate": 4.2088664951175246e-05, "loss": 0.6282, "step": 10680 }, { "epoch": 0.31183423318254777, "grad_norm": 0.8593114601658431, "learning_rate": 4.208039714571541e-05, "loss": 0.5704, "step": 10685 }, { "epoch": 0.3119801546767839, "grad_norm": 1.2149061859123544, "learning_rate": 4.207212594493778e-05, "loss": 0.6452, "step": 10690 }, { "epoch": 0.31212607617102, "grad_norm": 0.9980836237159034, "learning_rate": 4.206385135076847e-05, "loss": 0.5795, "step": 10695 }, { "epoch": 0.3122719976652561, "grad_norm": 0.9631069460264696, "learning_rate": 4.205557336513435e-05, "loss": 0.5393, "step": 10700 }, { "epoch": 0.3124179191594922, "grad_norm": 1.1007460261290956, "learning_rate": 4.204729198996314e-05, "loss": 0.6376, "step": 10705 }, { "epoch": 0.3125638406537283, "grad_norm": 1.2187241010617924, "learning_rate": 4.203900722718328e-05, "loss": 0.6176, "step": 10710 }, { "epoch": 0.3127097621479644, "grad_norm": 1.126356224322464, "learning_rate": 4.203071907872405e-05, "loss": 0.6695, "step": 10715 }, { "epoch": 0.3128556836422005, "grad_norm": 1.0258710602760182, "learning_rate": 4.2022427546515497e-05, "loss": 0.5803, "step": 10720 }, { "epoch": 0.3130016051364366, "grad_norm": 1.0666554581542338, "learning_rate": 4.2014132632488456e-05, "loss": 0.6525, "step": 10725 }, { "epoch": 0.3131475266306727, "grad_norm": 0.922861217567363, "learning_rate": 4.200583433857455e-05, "loss": 0.5665, "step": 10730 }, { "epoch": 0.3132934481249088, "grad_norm": 1.1050478683539562, "learning_rate": 4.19975326667062e-05, "loss": 0.6062, "step": 10735 }, { "epoch": 0.3134393696191449, "grad_norm": 1.036686179087357, "learning_rate": 4.1989227618816604e-05, "loss": 0.5828, "step": 10740 }, { "epoch": 0.313585291113381, "grad_norm": 1.2464291074809783, "learning_rate": 4.198091919683973e-05, "loss": 0.6058, "step": 10745 }, { "epoch": 0.3137312126076171, "grad_norm": 1.0411816435634602, "learning_rate": 4.197260740271038e-05, "loss": 0.6415, "step": 10750 }, { "epoch": 0.3138771341018532, "grad_norm": 1.17981679213735, "learning_rate": 4.196429223836408e-05, "loss": 0.6391, "step": 10755 }, { "epoch": 0.3140230555960893, "grad_norm": 1.1889506574903397, "learning_rate": 4.19559737057372e-05, "loss": 0.6569, "step": 10760 }, { "epoch": 0.31416897709032543, "grad_norm": 1.183577120267219, "learning_rate": 4.194765180676684e-05, "loss": 0.6648, "step": 10765 }, { "epoch": 0.3143148985845615, "grad_norm": 1.1719465246178082, "learning_rate": 4.1939326543390926e-05, "loss": 0.624, "step": 10770 }, { "epoch": 0.3144608200787976, "grad_norm": 0.9473059984670127, "learning_rate": 4.193099791754815e-05, "loss": 0.6816, "step": 10775 }, { "epoch": 0.3146067415730337, "grad_norm": 1.0582699688118096, "learning_rate": 4.192266593117797e-05, "loss": 0.6011, "step": 10780 }, { "epoch": 0.3147526630672698, "grad_norm": 0.8657390430632141, "learning_rate": 4.191433058622067e-05, "loss": 0.6248, "step": 10785 }, { "epoch": 0.31489858456150593, "grad_norm": 1.1959672626993205, "learning_rate": 4.190599188461727e-05, "loss": 0.7468, "step": 10790 }, { "epoch": 0.315044506055742, "grad_norm": 1.1954092627551398, "learning_rate": 4.189764982830961e-05, "loss": 0.6675, "step": 10795 }, { "epoch": 0.3151904275499781, "grad_norm": 1.1013725880470744, "learning_rate": 4.188930441924029e-05, "loss": 0.6017, "step": 10800 }, { "epoch": 0.3153363490442142, "grad_norm": 1.2449000053096426, "learning_rate": 4.188095565935268e-05, "loss": 0.6477, "step": 10805 }, { "epoch": 0.3154822705384503, "grad_norm": 0.9925361941041598, "learning_rate": 4.1872603550590956e-05, "loss": 0.5667, "step": 10810 }, { "epoch": 0.31562819203268644, "grad_norm": 8.94427111548512, "learning_rate": 4.1864248094900056e-05, "loss": 0.6022, "step": 10815 }, { "epoch": 0.31577411352692253, "grad_norm": 20.613875296370193, "learning_rate": 4.185588929422572e-05, "loss": 0.6696, "step": 10820 }, { "epoch": 0.3159200350211586, "grad_norm": 1.0461280342824664, "learning_rate": 4.1847527150514423e-05, "loss": 0.5919, "step": 10825 }, { "epoch": 0.3160659565153947, "grad_norm": 0.9761229171053537, "learning_rate": 4.183916166571348e-05, "loss": 0.637, "step": 10830 }, { "epoch": 0.3162118780096308, "grad_norm": 1.0637595291027524, "learning_rate": 4.183079284177093e-05, "loss": 0.6499, "step": 10835 }, { "epoch": 0.31635779950386694, "grad_norm": 1.034824977281423, "learning_rate": 4.182242068063561e-05, "loss": 0.604, "step": 10840 }, { "epoch": 0.31650372099810303, "grad_norm": 1.0281952931191731, "learning_rate": 4.1814045184257137e-05, "loss": 0.6107, "step": 10845 }, { "epoch": 0.3166496424923391, "grad_norm": 1.2192013642131252, "learning_rate": 4.180566635458591e-05, "loss": 0.667, "step": 10850 }, { "epoch": 0.3167955639865752, "grad_norm": 1.4924884820422448, "learning_rate": 4.179728419357307e-05, "loss": 0.695, "step": 10855 }, { "epoch": 0.3169414854808113, "grad_norm": 1.046807972073833, "learning_rate": 4.178889870317059e-05, "loss": 0.6295, "step": 10860 }, { "epoch": 0.31708740697504745, "grad_norm": 1.0922531445192847, "learning_rate": 4.178050988533118e-05, "loss": 0.6483, "step": 10865 }, { "epoch": 0.31723332846928354, "grad_norm": 1.0942237791905836, "learning_rate": 4.177211774200832e-05, "loss": 0.5993, "step": 10870 }, { "epoch": 0.3173792499635196, "grad_norm": 0.966554865748181, "learning_rate": 4.176372227515629e-05, "loss": 0.579, "step": 10875 }, { "epoch": 0.3175251714577557, "grad_norm": 1.1416827676907393, "learning_rate": 4.175532348673012e-05, "loss": 0.6455, "step": 10880 }, { "epoch": 0.3176710929519918, "grad_norm": 1.0338401559576693, "learning_rate": 4.1746921378685645e-05, "loss": 0.6303, "step": 10885 }, { "epoch": 0.31781701444622795, "grad_norm": 0.9358905890110428, "learning_rate": 4.1738515952979437e-05, "loss": 0.5801, "step": 10890 }, { "epoch": 0.31796293594046404, "grad_norm": 0.9884018544105023, "learning_rate": 4.173010721156885e-05, "loss": 0.5735, "step": 10895 }, { "epoch": 0.31810885743470013, "grad_norm": 1.115863129104097, "learning_rate": 4.172169515641203e-05, "loss": 0.6714, "step": 10900 }, { "epoch": 0.3182547789289362, "grad_norm": 0.9213878315711291, "learning_rate": 4.1713279789467885e-05, "loss": 0.6328, "step": 10905 }, { "epoch": 0.3184007004231723, "grad_norm": 1.142383580418868, "learning_rate": 4.170486111269607e-05, "loss": 0.6116, "step": 10910 }, { "epoch": 0.31854662191740846, "grad_norm": 0.9637397484995928, "learning_rate": 4.1696439128057046e-05, "loss": 0.5982, "step": 10915 }, { "epoch": 0.31869254341164455, "grad_norm": 1.0529787774530486, "learning_rate": 4.168801383751203e-05, "loss": 0.6153, "step": 10920 }, { "epoch": 0.31883846490588064, "grad_norm": 1.026921087027283, "learning_rate": 4.167958524302301e-05, "loss": 0.6557, "step": 10925 }, { "epoch": 0.3189843864001167, "grad_norm": 2.8855164738541, "learning_rate": 4.167115334655273e-05, "loss": 0.6603, "step": 10930 }, { "epoch": 0.3191303078943528, "grad_norm": 1.127725152009719, "learning_rate": 4.166271815006472e-05, "loss": 0.6743, "step": 10935 }, { "epoch": 0.31927622938858896, "grad_norm": 1.1322587046957713, "learning_rate": 4.1654279655523285e-05, "loss": 0.6542, "step": 10940 }, { "epoch": 0.31942215088282505, "grad_norm": 1.0132380480317145, "learning_rate": 4.164583786489346e-05, "loss": 0.5809, "step": 10945 }, { "epoch": 0.31956807237706114, "grad_norm": 1.0402728433038912, "learning_rate": 4.16373927801411e-05, "loss": 0.6701, "step": 10950 }, { "epoch": 0.31971399387129723, "grad_norm": 1.2710436361080821, "learning_rate": 4.162894440323278e-05, "loss": 0.6675, "step": 10955 }, { "epoch": 0.3198599153655333, "grad_norm": 0.9650906233915678, "learning_rate": 4.162049273613588e-05, "loss": 0.6025, "step": 10960 }, { "epoch": 0.32000583685976947, "grad_norm": 0.9641418941329717, "learning_rate": 4.161203778081852e-05, "loss": 0.6092, "step": 10965 }, { "epoch": 0.32015175835400556, "grad_norm": 0.8953927753027289, "learning_rate": 4.160357953924959e-05, "loss": 0.6123, "step": 10970 }, { "epoch": 0.32029767984824165, "grad_norm": 0.9199657888722718, "learning_rate": 4.159511801339876e-05, "loss": 0.5749, "step": 10975 }, { "epoch": 0.32044360134247774, "grad_norm": 1.0249280125653601, "learning_rate": 4.1586653205236447e-05, "loss": 0.6084, "step": 10980 }, { "epoch": 0.3205895228367138, "grad_norm": 1.1417663925975452, "learning_rate": 4.1578185116733846e-05, "loss": 0.6533, "step": 10985 }, { "epoch": 0.32073544433094997, "grad_norm": 1.0871716549112553, "learning_rate": 4.15697137498629e-05, "loss": 0.5614, "step": 10990 }, { "epoch": 0.32088136582518606, "grad_norm": 1.1016528591290073, "learning_rate": 4.156123910659632e-05, "loss": 0.6273, "step": 10995 }, { "epoch": 0.32102728731942215, "grad_norm": 1.0290611996249612, "learning_rate": 4.1552761188907605e-05, "loss": 0.6139, "step": 11000 }, { "epoch": 0.32117320881365824, "grad_norm": 1.0931694809931243, "learning_rate": 4.154427999877099e-05, "loss": 0.6528, "step": 11005 }, { "epoch": 0.32131913030789433, "grad_norm": 0.9548319298644551, "learning_rate": 4.153579553816147e-05, "loss": 0.641, "step": 11010 }, { "epoch": 0.3214650518021305, "grad_norm": 0.9841313854224992, "learning_rate": 4.152730780905482e-05, "loss": 0.7052, "step": 11015 }, { "epoch": 0.32161097329636656, "grad_norm": 0.988289395598077, "learning_rate": 4.1518816813427556e-05, "loss": 0.6301, "step": 11020 }, { "epoch": 0.32175689479060265, "grad_norm": 0.9396116658389815, "learning_rate": 4.151032255325697e-05, "loss": 0.5954, "step": 11025 }, { "epoch": 0.32190281628483874, "grad_norm": 1.0568483133635853, "learning_rate": 4.150182503052111e-05, "loss": 0.6344, "step": 11030 }, { "epoch": 0.32204873777907483, "grad_norm": 1.0601604768196267, "learning_rate": 4.149332424719879e-05, "loss": 0.6798, "step": 11035 }, { "epoch": 0.322194659273311, "grad_norm": 1.1254572699623864, "learning_rate": 4.148482020526956e-05, "loss": 0.6162, "step": 11040 }, { "epoch": 0.32234058076754707, "grad_norm": 1.2528959522564855, "learning_rate": 4.147631290671376e-05, "loss": 0.5619, "step": 11045 }, { "epoch": 0.32248650226178316, "grad_norm": 1.260123007927408, "learning_rate": 4.1467802353512455e-05, "loss": 0.6595, "step": 11050 }, { "epoch": 0.32263242375601925, "grad_norm": 0.9933220625274068, "learning_rate": 4.1459288547647504e-05, "loss": 0.6269, "step": 11055 }, { "epoch": 0.32277834525025534, "grad_norm": 0.9715078817219612, "learning_rate": 4.14507714911015e-05, "loss": 0.6045, "step": 11060 }, { "epoch": 0.3229242667444915, "grad_norm": 0.9812043210205588, "learning_rate": 4.144225118585779e-05, "loss": 0.5581, "step": 11065 }, { "epoch": 0.3230701882387276, "grad_norm": 1.0286662370463173, "learning_rate": 4.143372763390051e-05, "loss": 0.6451, "step": 11070 }, { "epoch": 0.32321610973296366, "grad_norm": 1.1639407130847483, "learning_rate": 4.1425200837214505e-05, "loss": 0.6655, "step": 11075 }, { "epoch": 0.32336203122719975, "grad_norm": 1.0132817647094212, "learning_rate": 4.141667079778541e-05, "loss": 0.6587, "step": 11080 }, { "epoch": 0.32350795272143584, "grad_norm": 1.2678494324028673, "learning_rate": 4.1408137517599596e-05, "loss": 0.5957, "step": 11085 }, { "epoch": 0.323653874215672, "grad_norm": 1.112567411200346, "learning_rate": 4.13996009986442e-05, "loss": 0.6728, "step": 11090 }, { "epoch": 0.3237997957099081, "grad_norm": 1.069826485767921, "learning_rate": 4.139106124290712e-05, "loss": 0.6838, "step": 11095 }, { "epoch": 0.32394571720414417, "grad_norm": 1.0727866184639705, "learning_rate": 4.1382518252376985e-05, "loss": 0.5858, "step": 11100 }, { "epoch": 0.32409163869838026, "grad_norm": 0.8936483939993378, "learning_rate": 4.137397202904319e-05, "loss": 0.6067, "step": 11105 }, { "epoch": 0.32423756019261635, "grad_norm": 1.1346353130779891, "learning_rate": 4.136542257489589e-05, "loss": 0.6161, "step": 11110 }, { "epoch": 0.3243834816868525, "grad_norm": 1.0960577024248674, "learning_rate": 4.1356869891925984e-05, "loss": 0.6804, "step": 11115 }, { "epoch": 0.3245294031810886, "grad_norm": 0.9792482318117541, "learning_rate": 4.134831398212511e-05, "loss": 0.6196, "step": 11120 }, { "epoch": 0.3246753246753247, "grad_norm": 1.036312950440058, "learning_rate": 4.133975484748569e-05, "loss": 0.6321, "step": 11125 }, { "epoch": 0.32482124616956076, "grad_norm": 0.9612634765544875, "learning_rate": 4.1331192490000875e-05, "loss": 0.5852, "step": 11130 }, { "epoch": 0.32496716766379685, "grad_norm": 1.01009473889226, "learning_rate": 4.132262691166456e-05, "loss": 0.6316, "step": 11135 }, { "epoch": 0.325113089158033, "grad_norm": 1.0315024708222542, "learning_rate": 4.131405811447141e-05, "loss": 0.6103, "step": 11140 }, { "epoch": 0.3252590106522691, "grad_norm": 0.9133158035057377, "learning_rate": 4.130548610041682e-05, "loss": 0.677, "step": 11145 }, { "epoch": 0.3254049321465052, "grad_norm": 0.8593012497677728, "learning_rate": 4.1296910871496955e-05, "loss": 0.5773, "step": 11150 }, { "epoch": 0.32555085364074127, "grad_norm": 1.040992806162476, "learning_rate": 4.1288332429708715e-05, "loss": 0.6235, "step": 11155 }, { "epoch": 0.32569677513497736, "grad_norm": 1.0317014632904054, "learning_rate": 4.127975077704973e-05, "loss": 0.6291, "step": 11160 }, { "epoch": 0.3258426966292135, "grad_norm": 1.6706118109451882, "learning_rate": 4.127116591551843e-05, "loss": 0.7069, "step": 11165 }, { "epoch": 0.3259886181234496, "grad_norm": 1.020123354206265, "learning_rate": 4.126257784711394e-05, "loss": 0.6371, "step": 11170 }, { "epoch": 0.3261345396176857, "grad_norm": 0.9066459696650431, "learning_rate": 4.125398657383616e-05, "loss": 0.6216, "step": 11175 }, { "epoch": 0.32628046111192177, "grad_norm": 1.0130280003722434, "learning_rate": 4.124539209768573e-05, "loss": 0.6074, "step": 11180 }, { "epoch": 0.32642638260615786, "grad_norm": 0.9384077668714056, "learning_rate": 4.1236794420664014e-05, "loss": 0.6406, "step": 11185 }, { "epoch": 0.326572304100394, "grad_norm": 0.9555525609756401, "learning_rate": 4.122819354477317e-05, "loss": 0.6486, "step": 11190 }, { "epoch": 0.3267182255946301, "grad_norm": 1.1022165736744427, "learning_rate": 4.121958947201606e-05, "loss": 0.607, "step": 11195 }, { "epoch": 0.3268641470888662, "grad_norm": 0.9738858125506661, "learning_rate": 4.12109822043963e-05, "loss": 0.5715, "step": 11200 }, { "epoch": 0.3270100685831023, "grad_norm": 1.1979891364281465, "learning_rate": 4.120237174391826e-05, "loss": 0.6247, "step": 11205 }, { "epoch": 0.32715599007733837, "grad_norm": 1.2370376908493559, "learning_rate": 4.1193758092587034e-05, "loss": 0.6777, "step": 11210 }, { "epoch": 0.3273019115715745, "grad_norm": 0.9018058068412678, "learning_rate": 4.118514125240849e-05, "loss": 0.5635, "step": 11215 }, { "epoch": 0.3274478330658106, "grad_norm": 1.0579673015690656, "learning_rate": 4.117652122538921e-05, "loss": 0.6732, "step": 11220 }, { "epoch": 0.3275937545600467, "grad_norm": 1.1723372942662171, "learning_rate": 4.116789801353652e-05, "loss": 0.6172, "step": 11225 }, { "epoch": 0.3277396760542828, "grad_norm": 1.0673296369873437, "learning_rate": 4.115927161885851e-05, "loss": 0.6362, "step": 11230 }, { "epoch": 0.32788559754851887, "grad_norm": 0.9483166837365734, "learning_rate": 4.1150642043363994e-05, "loss": 0.6025, "step": 11235 }, { "epoch": 0.328031519042755, "grad_norm": 0.8741627602015403, "learning_rate": 4.114200928906252e-05, "loss": 0.6055, "step": 11240 }, { "epoch": 0.3281774405369911, "grad_norm": 1.1334322761907387, "learning_rate": 4.1133373357964405e-05, "loss": 0.679, "step": 11245 }, { "epoch": 0.3283233620312272, "grad_norm": 0.984608212113143, "learning_rate": 4.1124734252080674e-05, "loss": 0.5891, "step": 11250 }, { "epoch": 0.3284692835254633, "grad_norm": 1.0136537559423136, "learning_rate": 4.1116091973423104e-05, "loss": 0.624, "step": 11255 }, { "epoch": 0.3286152050196994, "grad_norm": 0.8956438853083151, "learning_rate": 4.1107446524004205e-05, "loss": 0.598, "step": 11260 }, { "epoch": 0.3287611265139355, "grad_norm": 1.0021347414901831, "learning_rate": 4.109879790583725e-05, "loss": 0.5737, "step": 11265 }, { "epoch": 0.3289070480081716, "grad_norm": 1.139457046426254, "learning_rate": 4.1090146120936214e-05, "loss": 0.6488, "step": 11270 }, { "epoch": 0.3290529695024077, "grad_norm": 1.025917268212767, "learning_rate": 4.1081491171315834e-05, "loss": 0.6672, "step": 11275 }, { "epoch": 0.3291988909966438, "grad_norm": 1.12371622829611, "learning_rate": 4.107283305899158e-05, "loss": 0.696, "step": 11280 }, { "epoch": 0.3293448124908799, "grad_norm": 1.0537778864026242, "learning_rate": 4.1064171785979646e-05, "loss": 0.5684, "step": 11285 }, { "epoch": 0.329490733985116, "grad_norm": 1.0199855695199556, "learning_rate": 4.1055507354296974e-05, "loss": 0.6387, "step": 11290 }, { "epoch": 0.3296366554793521, "grad_norm": 0.9583958003578545, "learning_rate": 4.104683976596124e-05, "loss": 0.6216, "step": 11295 }, { "epoch": 0.3297825769735882, "grad_norm": 0.9497042714701236, "learning_rate": 4.103816902299087e-05, "loss": 0.5393, "step": 11300 }, { "epoch": 0.3299284984678243, "grad_norm": 1.1500956032313587, "learning_rate": 4.102949512740498e-05, "loss": 0.6515, "step": 11305 }, { "epoch": 0.33007441996206044, "grad_norm": 1.0790323256220478, "learning_rate": 4.102081808122346e-05, "loss": 0.6245, "step": 11310 }, { "epoch": 0.33022034145629653, "grad_norm": 0.9668724223886171, "learning_rate": 4.1012137886466926e-05, "loss": 0.5914, "step": 11315 }, { "epoch": 0.3303662629505326, "grad_norm": 1.206666398509121, "learning_rate": 4.100345454515673e-05, "loss": 0.687, "step": 11320 }, { "epoch": 0.3305121844447687, "grad_norm": 1.119715151656299, "learning_rate": 4.0994768059314934e-05, "loss": 0.6153, "step": 11325 }, { "epoch": 0.3306581059390048, "grad_norm": 0.9107223002161154, "learning_rate": 4.098607843096435e-05, "loss": 0.5969, "step": 11330 }, { "epoch": 0.33080402743324094, "grad_norm": 1.1094921257211765, "learning_rate": 4.097738566212854e-05, "loss": 0.6428, "step": 11335 }, { "epoch": 0.33094994892747703, "grad_norm": 1.1329701633719147, "learning_rate": 4.096868975483176e-05, "loss": 0.6269, "step": 11340 }, { "epoch": 0.3310958704217131, "grad_norm": 0.9741960681615446, "learning_rate": 4.095999071109901e-05, "loss": 0.5984, "step": 11345 }, { "epoch": 0.3312417919159492, "grad_norm": 1.2207424965425768, "learning_rate": 4.095128853295604e-05, "loss": 0.6507, "step": 11350 }, { "epoch": 0.3313877134101853, "grad_norm": 1.0327260774376408, "learning_rate": 4.094258322242931e-05, "loss": 0.5989, "step": 11355 }, { "epoch": 0.33153363490442145, "grad_norm": 1.2866155800385293, "learning_rate": 4.0933874781546004e-05, "loss": 0.646, "step": 11360 }, { "epoch": 0.33167955639865754, "grad_norm": 0.9436024293875237, "learning_rate": 4.092516321233406e-05, "loss": 0.5883, "step": 11365 }, { "epoch": 0.3318254778928936, "grad_norm": 0.9986688957699347, "learning_rate": 4.0916448516822125e-05, "loss": 0.6994, "step": 11370 }, { "epoch": 0.3319713993871297, "grad_norm": 1.1694862800543997, "learning_rate": 4.090773069703957e-05, "loss": 0.5735, "step": 11375 }, { "epoch": 0.3321173208813658, "grad_norm": 0.9386583051525885, "learning_rate": 4.089900975501651e-05, "loss": 0.6672, "step": 11380 }, { "epoch": 0.33226324237560195, "grad_norm": 1.076219490657191, "learning_rate": 4.089028569278378e-05, "loss": 0.5972, "step": 11385 }, { "epoch": 0.33240916386983804, "grad_norm": 0.9554744577351556, "learning_rate": 4.088155851237294e-05, "loss": 0.6338, "step": 11390 }, { "epoch": 0.33255508536407413, "grad_norm": 1.0174314800802398, "learning_rate": 4.0872828215816266e-05, "loss": 0.6115, "step": 11395 }, { "epoch": 0.3327010068583102, "grad_norm": 0.9218432861621283, "learning_rate": 4.086409480514679e-05, "loss": 0.5522, "step": 11400 }, { "epoch": 0.3328469283525463, "grad_norm": 1.0458586652940354, "learning_rate": 4.085535828239823e-05, "loss": 0.6031, "step": 11405 }, { "epoch": 0.33299284984678246, "grad_norm": 1.0612795754860267, "learning_rate": 4.084661864960507e-05, "loss": 0.6412, "step": 11410 }, { "epoch": 0.33313877134101855, "grad_norm": 0.9647009468365729, "learning_rate": 4.083787590880249e-05, "loss": 0.591, "step": 11415 }, { "epoch": 0.33328469283525464, "grad_norm": 1.0154476708360052, "learning_rate": 4.082913006202637e-05, "loss": 0.6261, "step": 11420 }, { "epoch": 0.3334306143294907, "grad_norm": 1.1342561582274007, "learning_rate": 4.082038111131339e-05, "loss": 0.594, "step": 11425 }, { "epoch": 0.3335765358237268, "grad_norm": 1.0843847008166378, "learning_rate": 4.081162905870089e-05, "loss": 0.594, "step": 11430 }, { "epoch": 0.33372245731796296, "grad_norm": 0.8950674521866501, "learning_rate": 4.080287390622693e-05, "loss": 0.585, "step": 11435 }, { "epoch": 0.33386837881219905, "grad_norm": 1.0938790435332206, "learning_rate": 4.079411565593033e-05, "loss": 0.6792, "step": 11440 }, { "epoch": 0.33401430030643514, "grad_norm": 1.1204408051939787, "learning_rate": 4.0785354309850614e-05, "loss": 0.6735, "step": 11445 }, { "epoch": 0.33416022180067123, "grad_norm": 1.117585344081694, "learning_rate": 4.0776589870028017e-05, "loss": 0.6867, "step": 11450 }, { "epoch": 0.3343061432949073, "grad_norm": 1.1241996933499665, "learning_rate": 4.076782233850349e-05, "loss": 0.6537, "step": 11455 }, { "epoch": 0.33445206478914347, "grad_norm": 1.2439817880282968, "learning_rate": 4.0759051717318755e-05, "loss": 0.5821, "step": 11460 }, { "epoch": 0.33459798628337956, "grad_norm": 1.089365830702333, "learning_rate": 4.075027800851617e-05, "loss": 0.6324, "step": 11465 }, { "epoch": 0.33474390777761565, "grad_norm": 1.0072398694470346, "learning_rate": 4.0741501214138886e-05, "loss": 0.6171, "step": 11470 }, { "epoch": 0.33488982927185174, "grad_norm": 1.0880742122794438, "learning_rate": 4.073272133623073e-05, "loss": 0.6128, "step": 11475 }, { "epoch": 0.3350357507660878, "grad_norm": 1.144030467809617, "learning_rate": 4.072393837683628e-05, "loss": 0.6133, "step": 11480 }, { "epoch": 0.33518167226032397, "grad_norm": 1.1489113111514178, "learning_rate": 4.071515233800079e-05, "loss": 0.6896, "step": 11485 }, { "epoch": 0.33532759375456006, "grad_norm": 1.0171107054003874, "learning_rate": 4.0706363221770254e-05, "loss": 0.6311, "step": 11490 }, { "epoch": 0.33547351524879615, "grad_norm": 0.9438188284955568, "learning_rate": 4.06975710301914e-05, "loss": 0.5863, "step": 11495 }, { "epoch": 0.33561943674303224, "grad_norm": 0.9609928362104976, "learning_rate": 4.0688775765311636e-05, "loss": 0.5922, "step": 11500 }, { "epoch": 0.33576535823726833, "grad_norm": 0.8851583716387176, "learning_rate": 4.067997742917911e-05, "loss": 0.6575, "step": 11505 }, { "epoch": 0.3359112797315045, "grad_norm": 1.2135837909130345, "learning_rate": 4.067117602384269e-05, "loss": 0.6412, "step": 11510 }, { "epoch": 0.33605720122574056, "grad_norm": 0.9932059901168055, "learning_rate": 4.0662371551351926e-05, "loss": 0.5701, "step": 11515 }, { "epoch": 0.33620312271997665, "grad_norm": 1.2438525231146114, "learning_rate": 4.0653564013757124e-05, "loss": 0.6326, "step": 11520 }, { "epoch": 0.33634904421421274, "grad_norm": 0.9721708801783453, "learning_rate": 4.064475341310926e-05, "loss": 0.6224, "step": 11525 }, { "epoch": 0.33649496570844883, "grad_norm": 0.9635945941202059, "learning_rate": 4.063593975146008e-05, "loss": 0.6199, "step": 11530 }, { "epoch": 0.336640887202685, "grad_norm": 0.9477525384098505, "learning_rate": 4.062712303086198e-05, "loss": 0.6349, "step": 11535 }, { "epoch": 0.33678680869692107, "grad_norm": 0.9500872000313519, "learning_rate": 4.061830325336811e-05, "loss": 0.6004, "step": 11540 }, { "epoch": 0.33693273019115716, "grad_norm": 1.079264998691053, "learning_rate": 4.0609480421032326e-05, "loss": 0.6225, "step": 11545 }, { "epoch": 0.33707865168539325, "grad_norm": 1.1255850604057762, "learning_rate": 4.0600654535909174e-05, "loss": 0.6407, "step": 11550 }, { "epoch": 0.33722457317962934, "grad_norm": 1.0196165755708377, "learning_rate": 4.0591825600053944e-05, "loss": 0.5924, "step": 11555 }, { "epoch": 0.3373704946738655, "grad_norm": 0.978065943675194, "learning_rate": 4.05829936155226e-05, "loss": 0.6415, "step": 11560 }, { "epoch": 0.3375164161681016, "grad_norm": 1.0630273684517053, "learning_rate": 4.057415858437186e-05, "loss": 0.6218, "step": 11565 }, { "epoch": 0.33766233766233766, "grad_norm": 0.9754306458450281, "learning_rate": 4.056532050865909e-05, "loss": 0.6261, "step": 11570 }, { "epoch": 0.33780825915657375, "grad_norm": 1.0120772896129, "learning_rate": 4.0556479390442425e-05, "loss": 0.5415, "step": 11575 }, { "epoch": 0.33795418065080984, "grad_norm": 1.030702413983348, "learning_rate": 4.0547635231780686e-05, "loss": 0.6605, "step": 11580 }, { "epoch": 0.338100102145046, "grad_norm": 1.0705045927934609, "learning_rate": 4.05387880347334e-05, "loss": 0.6228, "step": 11585 }, { "epoch": 0.3382460236392821, "grad_norm": 0.8869581727263077, "learning_rate": 4.0529937801360796e-05, "loss": 0.5799, "step": 11590 }, { "epoch": 0.33839194513351817, "grad_norm": 0.9585061856170709, "learning_rate": 4.0521084533723805e-05, "loss": 0.6266, "step": 11595 }, { "epoch": 0.33853786662775426, "grad_norm": 13.614486155253502, "learning_rate": 4.051222823388409e-05, "loss": 0.6292, "step": 11600 }, { "epoch": 0.33868378812199035, "grad_norm": 1.0186451921533686, "learning_rate": 4.0503368903904015e-05, "loss": 0.6519, "step": 11605 }, { "epoch": 0.3388297096162265, "grad_norm": 0.9520120308383339, "learning_rate": 4.0494506545846625e-05, "loss": 0.5591, "step": 11610 }, { "epoch": 0.3389756311104626, "grad_norm": 1.094136677831175, "learning_rate": 4.0485641161775686e-05, "loss": 0.5687, "step": 11615 }, { "epoch": 0.3391215526046987, "grad_norm": 0.9167299249673083, "learning_rate": 4.047677275375567e-05, "loss": 0.6232, "step": 11620 }, { "epoch": 0.33926747409893476, "grad_norm": 1.0155349389072916, "learning_rate": 4.046790132385177e-05, "loss": 0.6473, "step": 11625 }, { "epoch": 0.33941339559317085, "grad_norm": 1.204353672645426, "learning_rate": 4.045902687412983e-05, "loss": 0.6325, "step": 11630 }, { "epoch": 0.339559317087407, "grad_norm": 1.0352811936427893, "learning_rate": 4.0450149406656454e-05, "loss": 0.5752, "step": 11635 }, { "epoch": 0.3397052385816431, "grad_norm": 1.2036224500518966, "learning_rate": 4.044126892349892e-05, "loss": 0.5869, "step": 11640 }, { "epoch": 0.3398511600758792, "grad_norm": 0.9049889018835772, "learning_rate": 4.043238542672522e-05, "loss": 0.7039, "step": 11645 }, { "epoch": 0.33999708157011527, "grad_norm": 1.0746934526866798, "learning_rate": 4.042349891840403e-05, "loss": 0.6011, "step": 11650 }, { "epoch": 0.34014300306435136, "grad_norm": 1.2791431374070314, "learning_rate": 4.041460940060475e-05, "loss": 0.575, "step": 11655 }, { "epoch": 0.3402889245585875, "grad_norm": 0.956046019274133, "learning_rate": 4.0405716875397474e-05, "loss": 0.5684, "step": 11660 }, { "epoch": 0.3404348460528236, "grad_norm": 0.8957865227576939, "learning_rate": 4.039682134485299e-05, "loss": 0.6254, "step": 11665 }, { "epoch": 0.3405807675470597, "grad_norm": 1.3255957146955113, "learning_rate": 4.038792281104277e-05, "loss": 0.6362, "step": 11670 }, { "epoch": 0.34072668904129577, "grad_norm": 0.9825886998592437, "learning_rate": 4.037902127603902e-05, "loss": 0.6321, "step": 11675 }, { "epoch": 0.34087261053553186, "grad_norm": 1.022042790376303, "learning_rate": 4.0370116741914626e-05, "loss": 0.6232, "step": 11680 }, { "epoch": 0.341018532029768, "grad_norm": 1.0367874143276963, "learning_rate": 4.036120921074319e-05, "loss": 0.5685, "step": 11685 }, { "epoch": 0.3411644535240041, "grad_norm": 0.9625893045756854, "learning_rate": 4.035229868459897e-05, "loss": 0.5705, "step": 11690 }, { "epoch": 0.3413103750182402, "grad_norm": 1.1416292137182558, "learning_rate": 4.034338516555696e-05, "loss": 0.631, "step": 11695 }, { "epoch": 0.3414562965124763, "grad_norm": 0.9392387893446615, "learning_rate": 4.033446865569285e-05, "loss": 0.5502, "step": 11700 }, { "epoch": 0.34160221800671237, "grad_norm": 1.1494205510912594, "learning_rate": 4.0325549157083e-05, "loss": 0.6027, "step": 11705 }, { "epoch": 0.3417481395009485, "grad_norm": 0.9954427588894809, "learning_rate": 4.031662667180449e-05, "loss": 0.6416, "step": 11710 }, { "epoch": 0.3418940609951846, "grad_norm": 0.9478626331065628, "learning_rate": 4.030770120193509e-05, "loss": 0.5836, "step": 11715 }, { "epoch": 0.3420399824894207, "grad_norm": 1.0318209514457515, "learning_rate": 4.029877274955325e-05, "loss": 0.6742, "step": 11720 }, { "epoch": 0.3421859039836568, "grad_norm": 0.9361076460990716, "learning_rate": 4.028984131673814e-05, "loss": 0.5534, "step": 11725 }, { "epoch": 0.34233182547789287, "grad_norm": 1.182780341461957, "learning_rate": 4.02809069055696e-05, "loss": 0.6184, "step": 11730 }, { "epoch": 0.342477746972129, "grad_norm": 1.1217329252882884, "learning_rate": 4.027196951812819e-05, "loss": 0.6618, "step": 11735 }, { "epoch": 0.3426236684663651, "grad_norm": 1.041416129745986, "learning_rate": 4.026302915649513e-05, "loss": 0.6615, "step": 11740 }, { "epoch": 0.3427695899606012, "grad_norm": 0.9929462762558451, "learning_rate": 4.0254085822752365e-05, "loss": 0.5981, "step": 11745 }, { "epoch": 0.3429155114548373, "grad_norm": 1.1719165717497415, "learning_rate": 4.02451395189825e-05, "loss": 0.6287, "step": 11750 }, { "epoch": 0.3430614329490734, "grad_norm": 1.0985426542323578, "learning_rate": 4.023619024726887e-05, "loss": 0.6175, "step": 11755 }, { "epoch": 0.3432073544433095, "grad_norm": 1.0344749049824091, "learning_rate": 4.022723800969547e-05, "loss": 0.6391, "step": 11760 }, { "epoch": 0.3433532759375456, "grad_norm": 1.2512350611789893, "learning_rate": 4.0218282808346986e-05, "loss": 0.6675, "step": 11765 }, { "epoch": 0.3434991974317817, "grad_norm": 1.016037781223652, "learning_rate": 4.020932464530882e-05, "loss": 0.5805, "step": 11770 }, { "epoch": 0.3436451189260178, "grad_norm": 1.142195942102537, "learning_rate": 4.020036352266704e-05, "loss": 0.644, "step": 11775 }, { "epoch": 0.3437910404202539, "grad_norm": 1.412425677520266, "learning_rate": 4.019139944250841e-05, "loss": 0.6912, "step": 11780 }, { "epoch": 0.34393696191449, "grad_norm": 1.2608008988458768, "learning_rate": 4.018243240692038e-05, "loss": 0.6782, "step": 11785 }, { "epoch": 0.3440828834087261, "grad_norm": 1.066099336377157, "learning_rate": 4.017346241799111e-05, "loss": 0.6353, "step": 11790 }, { "epoch": 0.3442288049029622, "grad_norm": 0.9886717340988644, "learning_rate": 4.01644894778094e-05, "loss": 0.622, "step": 11795 }, { "epoch": 0.3443747263971983, "grad_norm": 1.0569965976704123, "learning_rate": 4.015551358846479e-05, "loss": 0.6027, "step": 11800 }, { "epoch": 0.3445206478914344, "grad_norm": 1.071537093921575, "learning_rate": 4.0146534752047485e-05, "loss": 0.617, "step": 11805 }, { "epoch": 0.34466656938567053, "grad_norm": 1.3423382903000434, "learning_rate": 4.013755297064835e-05, "loss": 0.624, "step": 11810 }, { "epoch": 0.3448124908799066, "grad_norm": 1.0202858938297927, "learning_rate": 4.012856824635899e-05, "loss": 0.6005, "step": 11815 }, { "epoch": 0.3449584123741427, "grad_norm": 1.0306726778606794, "learning_rate": 4.011958058127165e-05, "loss": 0.6073, "step": 11820 }, { "epoch": 0.3451043338683788, "grad_norm": 1.1076206505861514, "learning_rate": 4.0110589977479265e-05, "loss": 0.6005, "step": 11825 }, { "epoch": 0.3452502553626149, "grad_norm": 0.992354674622993, "learning_rate": 4.01015964370755e-05, "loss": 0.6576, "step": 11830 }, { "epoch": 0.34539617685685103, "grad_norm": 0.9359407467285924, "learning_rate": 4.0092599962154625e-05, "loss": 0.5697, "step": 11835 }, { "epoch": 0.3455420983510871, "grad_norm": 1.0197674730096449, "learning_rate": 4.008360055481167e-05, "loss": 0.6101, "step": 11840 }, { "epoch": 0.3456880198453232, "grad_norm": 1.202661499911798, "learning_rate": 4.007459821714231e-05, "loss": 0.6433, "step": 11845 }, { "epoch": 0.3458339413395593, "grad_norm": 0.9701731955478315, "learning_rate": 4.006559295124289e-05, "loss": 0.6774, "step": 11850 }, { "epoch": 0.3459798628337954, "grad_norm": 1.0998235098898552, "learning_rate": 4.005658475921047e-05, "loss": 0.601, "step": 11855 }, { "epoch": 0.34612578432803154, "grad_norm": 0.8687070239156239, "learning_rate": 4.004757364314277e-05, "loss": 0.5828, "step": 11860 }, { "epoch": 0.34627170582226763, "grad_norm": 1.0973373423020554, "learning_rate": 4.00385596051382e-05, "loss": 0.6614, "step": 11865 }, { "epoch": 0.3464176273165037, "grad_norm": 1.0186541566260618, "learning_rate": 4.002954264729585e-05, "loss": 0.5837, "step": 11870 }, { "epoch": 0.3465635488107398, "grad_norm": 1.047456859805427, "learning_rate": 4.002052277171548e-05, "loss": 0.6187, "step": 11875 }, { "epoch": 0.3467094703049759, "grad_norm": 1.0559355868145486, "learning_rate": 4.001149998049754e-05, "loss": 0.611, "step": 11880 }, { "epoch": 0.34685539179921204, "grad_norm": 1.012127312246685, "learning_rate": 4.000247427574316e-05, "loss": 0.5921, "step": 11885 }, { "epoch": 0.34700131329344813, "grad_norm": 1.212715477570161, "learning_rate": 3.999344565955412e-05, "loss": 0.6416, "step": 11890 }, { "epoch": 0.3471472347876842, "grad_norm": 1.219259746588776, "learning_rate": 3.9984414134032936e-05, "loss": 0.617, "step": 11895 }, { "epoch": 0.3472931562819203, "grad_norm": 1.0408859393241783, "learning_rate": 3.9975379701282754e-05, "loss": 0.5939, "step": 11900 }, { "epoch": 0.3474390777761564, "grad_norm": 1.1164331437651034, "learning_rate": 3.99663423634074e-05, "loss": 0.6066, "step": 11905 }, { "epoch": 0.34758499927039255, "grad_norm": 1.0687503314288125, "learning_rate": 3.99573021225114e-05, "loss": 0.584, "step": 11910 }, { "epoch": 0.34773092076462864, "grad_norm": 1.0727786430714359, "learning_rate": 3.9948258980699934e-05, "loss": 0.6311, "step": 11915 }, { "epoch": 0.3478768422588647, "grad_norm": 1.2509984583005835, "learning_rate": 3.993921294007888e-05, "loss": 0.6784, "step": 11920 }, { "epoch": 0.3480227637531008, "grad_norm": 1.1234430072469523, "learning_rate": 3.9930164002754755e-05, "loss": 0.6479, "step": 11925 }, { "epoch": 0.3481686852473369, "grad_norm": 0.9946724937840159, "learning_rate": 3.992111217083479e-05, "loss": 0.6574, "step": 11930 }, { "epoch": 0.34831460674157305, "grad_norm": 1.0423653695316986, "learning_rate": 3.991205744642687e-05, "loss": 0.7139, "step": 11935 }, { "epoch": 0.34846052823580914, "grad_norm": 1.044499716132989, "learning_rate": 3.990299983163955e-05, "loss": 0.6085, "step": 11940 }, { "epoch": 0.34860644973004523, "grad_norm": 1.0596993552345453, "learning_rate": 3.989393932858208e-05, "loss": 0.5797, "step": 11945 }, { "epoch": 0.3487523712242813, "grad_norm": 0.9288701138259828, "learning_rate": 3.9884875939364356e-05, "loss": 0.5617, "step": 11950 }, { "epoch": 0.3488982927185174, "grad_norm": 0.9501583776531269, "learning_rate": 3.987580966609696e-05, "loss": 0.5739, "step": 11955 }, { "epoch": 0.34904421421275356, "grad_norm": 0.9489743282834809, "learning_rate": 3.9866740510891146e-05, "loss": 0.6315, "step": 11960 }, { "epoch": 0.34919013570698965, "grad_norm": 1.11705639204195, "learning_rate": 3.985766847585883e-05, "loss": 0.6338, "step": 11965 }, { "epoch": 0.34933605720122574, "grad_norm": 0.9908553784863717, "learning_rate": 3.98485935631126e-05, "loss": 0.5678, "step": 11970 }, { "epoch": 0.3494819786954618, "grad_norm": 1.2087208819872626, "learning_rate": 3.9839515774765734e-05, "loss": 0.7002, "step": 11975 }, { "epoch": 0.3496279001896979, "grad_norm": 1.0111445846843672, "learning_rate": 3.983043511293216e-05, "loss": 0.6007, "step": 11980 }, { "epoch": 0.34977382168393406, "grad_norm": 0.8873050546606204, "learning_rate": 3.982135157972647e-05, "loss": 0.634, "step": 11985 }, { "epoch": 0.34991974317817015, "grad_norm": 1.088884100690103, "learning_rate": 3.981226517726394e-05, "loss": 0.6144, "step": 11990 }, { "epoch": 0.35006566467240624, "grad_norm": 1.0440919603189667, "learning_rate": 3.980317590766051e-05, "loss": 0.6564, "step": 11995 }, { "epoch": 0.35021158616664233, "grad_norm": 0.9068609447339323, "learning_rate": 3.979408377303279e-05, "loss": 0.5537, "step": 12000 }, { "epoch": 0.3503575076608784, "grad_norm": 1.1693639016047885, "learning_rate": 3.978498877549805e-05, "loss": 0.5993, "step": 12005 }, { "epoch": 0.35050342915511457, "grad_norm": 1.0621269135223315, "learning_rate": 3.977589091717423e-05, "loss": 0.5957, "step": 12010 }, { "epoch": 0.35064935064935066, "grad_norm": 0.9255826927281953, "learning_rate": 3.9766790200179926e-05, "loss": 0.5885, "step": 12015 }, { "epoch": 0.35079527214358674, "grad_norm": 1.172959074159462, "learning_rate": 3.975768662663442e-05, "loss": 0.607, "step": 12020 }, { "epoch": 0.35094119363782283, "grad_norm": 1.1405879773489567, "learning_rate": 3.974858019865766e-05, "loss": 0.6829, "step": 12025 }, { "epoch": 0.351087115132059, "grad_norm": 0.9571356477421576, "learning_rate": 3.973947091837023e-05, "loss": 0.6318, "step": 12030 }, { "epoch": 0.35123303662629507, "grad_norm": 1.0538091204882005, "learning_rate": 3.97303587878934e-05, "loss": 0.6895, "step": 12035 }, { "epoch": 0.35137895812053116, "grad_norm": 0.9383875321038203, "learning_rate": 3.972124380934911e-05, "loss": 0.5984, "step": 12040 }, { "epoch": 0.35152487961476725, "grad_norm": 1.0600920043661486, "learning_rate": 3.971212598485993e-05, "loss": 0.6829, "step": 12045 }, { "epoch": 0.35167080110900334, "grad_norm": 4.835728397734678, "learning_rate": 3.9703005316549144e-05, "loss": 0.6366, "step": 12050 }, { "epoch": 0.3518167226032395, "grad_norm": 1.0425580132273389, "learning_rate": 3.9693881806540663e-05, "loss": 0.6193, "step": 12055 }, { "epoch": 0.3519626440974756, "grad_norm": 1.1221501876056423, "learning_rate": 3.968475545695904e-05, "loss": 0.663, "step": 12060 }, { "epoch": 0.35210856559171166, "grad_norm": 1.3164184007608646, "learning_rate": 3.967562626992955e-05, "loss": 0.5855, "step": 12065 }, { "epoch": 0.35225448708594775, "grad_norm": 1.0839853136591844, "learning_rate": 3.966649424757808e-05, "loss": 0.6053, "step": 12070 }, { "epoch": 0.35240040858018384, "grad_norm": 1.1444259043679468, "learning_rate": 3.9657359392031194e-05, "loss": 0.6678, "step": 12075 }, { "epoch": 0.35254633007442, "grad_norm": 1.0408842539417962, "learning_rate": 3.964822170541611e-05, "loss": 0.5983, "step": 12080 }, { "epoch": 0.3526922515686561, "grad_norm": 1.2503432594686106, "learning_rate": 3.963908118986071e-05, "loss": 0.6024, "step": 12085 }, { "epoch": 0.35283817306289217, "grad_norm": 0.8876723098489058, "learning_rate": 3.962993784749354e-05, "loss": 0.6067, "step": 12090 }, { "epoch": 0.35298409455712826, "grad_norm": 0.8594298888912166, "learning_rate": 3.9620791680443786e-05, "loss": 0.6063, "step": 12095 }, { "epoch": 0.35313001605136435, "grad_norm": 1.1372547791451348, "learning_rate": 3.961164269084132e-05, "loss": 0.6218, "step": 12100 }, { "epoch": 0.3532759375456005, "grad_norm": 1.1459439724212972, "learning_rate": 3.9602490880816645e-05, "loss": 0.691, "step": 12105 }, { "epoch": 0.3534218590398366, "grad_norm": 1.1228696081698626, "learning_rate": 3.959333625250093e-05, "loss": 0.6424, "step": 12110 }, { "epoch": 0.3535677805340727, "grad_norm": 1.1043214863169466, "learning_rate": 3.958417880802601e-05, "loss": 0.6485, "step": 12115 }, { "epoch": 0.35371370202830876, "grad_norm": 1.1234498375565625, "learning_rate": 3.957501854952434e-05, "loss": 0.6113, "step": 12120 }, { "epoch": 0.35385962352254485, "grad_norm": 1.219363438942327, "learning_rate": 3.9565855479129096e-05, "loss": 0.5836, "step": 12125 }, { "epoch": 0.354005545016781, "grad_norm": 1.005811850355647, "learning_rate": 3.955668959897405e-05, "loss": 0.6167, "step": 12130 }, { "epoch": 0.3541514665110171, "grad_norm": 0.932909719896271, "learning_rate": 3.9547520911193655e-05, "loss": 0.6075, "step": 12135 }, { "epoch": 0.3542973880052532, "grad_norm": 1.1001747951757632, "learning_rate": 3.9538349417923e-05, "loss": 0.6051, "step": 12140 }, { "epoch": 0.35444330949948927, "grad_norm": 0.948816669053959, "learning_rate": 3.952917512129785e-05, "loss": 0.6289, "step": 12145 }, { "epoch": 0.35458923099372536, "grad_norm": 0.9499283574304829, "learning_rate": 3.9519998023454595e-05, "loss": 0.6412, "step": 12150 }, { "epoch": 0.3547351524879615, "grad_norm": 1.251550171986757, "learning_rate": 3.951081812653032e-05, "loss": 0.6189, "step": 12155 }, { "epoch": 0.3548810739821976, "grad_norm": 1.1282563332718842, "learning_rate": 3.950163543266271e-05, "loss": 0.6395, "step": 12160 }, { "epoch": 0.3550269954764337, "grad_norm": 1.0819370419567056, "learning_rate": 3.949244994399014e-05, "loss": 0.635, "step": 12165 }, { "epoch": 0.35517291697066977, "grad_norm": 1.0778565547247143, "learning_rate": 3.948326166265162e-05, "loss": 0.656, "step": 12170 }, { "epoch": 0.35531883846490586, "grad_norm": 1.0059555884553493, "learning_rate": 3.947407059078681e-05, "loss": 0.6421, "step": 12175 }, { "epoch": 0.355464759959142, "grad_norm": 0.955652087152547, "learning_rate": 3.9464876730536023e-05, "loss": 0.6316, "step": 12180 }, { "epoch": 0.3556106814533781, "grad_norm": 1.0743625569302586, "learning_rate": 3.945568008404023e-05, "loss": 0.596, "step": 12185 }, { "epoch": 0.3557566029476142, "grad_norm": 1.356186403434128, "learning_rate": 3.9446480653441034e-05, "loss": 0.6279, "step": 12190 }, { "epoch": 0.3559025244418503, "grad_norm": 0.9841138898777383, "learning_rate": 3.94372784408807e-05, "loss": 0.5922, "step": 12195 }, { "epoch": 0.35604844593608637, "grad_norm": 0.9898787268814568, "learning_rate": 3.9428073448502125e-05, "loss": 0.5846, "step": 12200 }, { "epoch": 0.3561943674303225, "grad_norm": 1.2283082897526931, "learning_rate": 3.941886567844887e-05, "loss": 0.6322, "step": 12205 }, { "epoch": 0.3563402889245586, "grad_norm": 0.8672762403587563, "learning_rate": 3.9409655132865134e-05, "loss": 0.6118, "step": 12210 }, { "epoch": 0.3564862104187947, "grad_norm": 1.0170053194543032, "learning_rate": 3.940044181389576e-05, "loss": 0.6611, "step": 12215 }, { "epoch": 0.3566321319130308, "grad_norm": 1.3992662603842216, "learning_rate": 3.939122572368626e-05, "loss": 0.6259, "step": 12220 }, { "epoch": 0.35677805340726687, "grad_norm": 0.931400118462024, "learning_rate": 3.938200686438276e-05, "loss": 0.5905, "step": 12225 }, { "epoch": 0.356923974901503, "grad_norm": 1.0077482460544411, "learning_rate": 3.937278523813204e-05, "loss": 0.6177, "step": 12230 }, { "epoch": 0.3570698963957391, "grad_norm": 0.9732156132169247, "learning_rate": 3.936356084708153e-05, "loss": 0.6197, "step": 12235 }, { "epoch": 0.3572158178899752, "grad_norm": 1.1821515070926205, "learning_rate": 3.93543336933793e-05, "loss": 0.5949, "step": 12240 }, { "epoch": 0.3573617393842113, "grad_norm": 0.8798611610294117, "learning_rate": 3.934510377917407e-05, "loss": 0.6146, "step": 12245 }, { "epoch": 0.3575076608784474, "grad_norm": 1.1159368694368452, "learning_rate": 3.9335871106615195e-05, "loss": 0.6392, "step": 12250 }, { "epoch": 0.3576535823726835, "grad_norm": 1.1735250087724647, "learning_rate": 3.9326635677852685e-05, "loss": 0.5661, "step": 12255 }, { "epoch": 0.3577995038669196, "grad_norm": 1.0239488702992352, "learning_rate": 3.931739749503717e-05, "loss": 0.6409, "step": 12260 }, { "epoch": 0.3579454253611557, "grad_norm": 1.0047671993049745, "learning_rate": 3.930815656031993e-05, "loss": 0.6143, "step": 12265 }, { "epoch": 0.3580913468553918, "grad_norm": 1.0219145610569702, "learning_rate": 3.92989128758529e-05, "loss": 0.5726, "step": 12270 }, { "epoch": 0.3582372683496279, "grad_norm": 1.026163154691809, "learning_rate": 3.928966644378864e-05, "loss": 0.6568, "step": 12275 }, { "epoch": 0.358383189843864, "grad_norm": 1.0479287348082684, "learning_rate": 3.9280417266280356e-05, "loss": 0.5927, "step": 12280 }, { "epoch": 0.3585291113381001, "grad_norm": 0.9236183679134596, "learning_rate": 3.927116534548189e-05, "loss": 0.6215, "step": 12285 }, { "epoch": 0.3586750328323362, "grad_norm": 0.9600579308213374, "learning_rate": 3.926191068354773e-05, "loss": 0.5791, "step": 12290 }, { "epoch": 0.3588209543265723, "grad_norm": 0.9653476417998901, "learning_rate": 3.925265328263299e-05, "loss": 0.6087, "step": 12295 }, { "epoch": 0.3589668758208084, "grad_norm": 0.9437025841774455, "learning_rate": 3.9243393144893434e-05, "loss": 0.6025, "step": 12300 }, { "epoch": 0.35911279731504453, "grad_norm": 0.941321908566054, "learning_rate": 3.923413027248545e-05, "loss": 0.5963, "step": 12305 }, { "epoch": 0.3592587188092806, "grad_norm": 1.076612889604512, "learning_rate": 3.922486466756607e-05, "loss": 0.6977, "step": 12310 }, { "epoch": 0.3594046403035167, "grad_norm": 1.0821463482985947, "learning_rate": 3.9215596332292984e-05, "loss": 0.599, "step": 12315 }, { "epoch": 0.3595505617977528, "grad_norm": 1.0601812976146276, "learning_rate": 3.920632526882447e-05, "loss": 0.6622, "step": 12320 }, { "epoch": 0.3596964832919889, "grad_norm": 1.0766512517970406, "learning_rate": 3.919705147931949e-05, "loss": 0.5967, "step": 12325 }, { "epoch": 0.35984240478622503, "grad_norm": 1.0052417408289198, "learning_rate": 3.91877749659376e-05, "loss": 0.6004, "step": 12330 }, { "epoch": 0.3599883262804611, "grad_norm": 1.2221946812596904, "learning_rate": 3.9178495730839025e-05, "loss": 0.6456, "step": 12335 }, { "epoch": 0.3601342477746972, "grad_norm": 1.079130279648457, "learning_rate": 3.9169213776184604e-05, "loss": 0.6441, "step": 12340 }, { "epoch": 0.3602801692689333, "grad_norm": 0.9419757338439992, "learning_rate": 3.91599291041358e-05, "loss": 0.6623, "step": 12345 }, { "epoch": 0.3604260907631694, "grad_norm": 1.0769944355614132, "learning_rate": 3.915064171685474e-05, "loss": 0.609, "step": 12350 }, { "epoch": 0.36057201225740554, "grad_norm": 1.1742438348013977, "learning_rate": 3.9141351616504166e-05, "loss": 0.6361, "step": 12355 }, { "epoch": 0.36071793375164163, "grad_norm": 1.1344928126963667, "learning_rate": 3.913205880524744e-05, "loss": 0.6066, "step": 12360 }, { "epoch": 0.3608638552458777, "grad_norm": 0.8598928058434672, "learning_rate": 3.912276328524857e-05, "loss": 0.6158, "step": 12365 }, { "epoch": 0.3610097767401138, "grad_norm": 0.878949872083786, "learning_rate": 3.911346505867219e-05, "loss": 0.5126, "step": 12370 }, { "epoch": 0.3611556982343499, "grad_norm": 1.0196358038958668, "learning_rate": 3.910416412768358e-05, "loss": 0.5805, "step": 12375 }, { "epoch": 0.36130161972858604, "grad_norm": 0.9966378808576365, "learning_rate": 3.909486049444861e-05, "loss": 0.6235, "step": 12380 }, { "epoch": 0.36144754122282213, "grad_norm": 0.9980804698093425, "learning_rate": 3.9085554161133834e-05, "loss": 0.641, "step": 12385 }, { "epoch": 0.3615934627170582, "grad_norm": 1.0416197783131367, "learning_rate": 3.9076245129906374e-05, "loss": 0.6057, "step": 12390 }, { "epoch": 0.3617393842112943, "grad_norm": 1.0936552446637446, "learning_rate": 3.9066933402934034e-05, "loss": 0.6714, "step": 12395 }, { "epoch": 0.3618853057055304, "grad_norm": 1.0771502397942445, "learning_rate": 3.905761898238522e-05, "loss": 0.6039, "step": 12400 }, { "epoch": 0.36203122719976655, "grad_norm": 1.1268914571276445, "learning_rate": 3.904830187042897e-05, "loss": 0.5972, "step": 12405 }, { "epoch": 0.36217714869400264, "grad_norm": 1.1882576064225108, "learning_rate": 3.9038982069234936e-05, "loss": 0.607, "step": 12410 }, { "epoch": 0.3623230701882387, "grad_norm": 1.0251124486738417, "learning_rate": 3.902965958097342e-05, "loss": 0.5594, "step": 12415 }, { "epoch": 0.3624689916824748, "grad_norm": 1.389894960278433, "learning_rate": 3.902033440781533e-05, "loss": 0.6343, "step": 12420 }, { "epoch": 0.3626149131767109, "grad_norm": 1.0507809040936216, "learning_rate": 3.901100655193221e-05, "loss": 0.631, "step": 12425 }, { "epoch": 0.36276083467094705, "grad_norm": 1.0251105360431334, "learning_rate": 3.9001676015496235e-05, "loss": 0.6455, "step": 12430 }, { "epoch": 0.36290675616518314, "grad_norm": 1.0333728094239572, "learning_rate": 3.899234280068018e-05, "loss": 0.662, "step": 12435 }, { "epoch": 0.36305267765941923, "grad_norm": 1.0839391075800229, "learning_rate": 3.898300690965745e-05, "loss": 0.5593, "step": 12440 }, { "epoch": 0.3631985991536553, "grad_norm": 1.2468282912684354, "learning_rate": 3.8973668344602116e-05, "loss": 0.6087, "step": 12445 }, { "epoch": 0.3633445206478914, "grad_norm": 0.9949617451404282, "learning_rate": 3.8964327107688804e-05, "loss": 0.6435, "step": 12450 }, { "epoch": 0.36349044214212756, "grad_norm": 1.1795341630793073, "learning_rate": 3.8954983201092814e-05, "loss": 0.6108, "step": 12455 }, { "epoch": 0.36363636363636365, "grad_norm": 1.041792239753175, "learning_rate": 3.894563662699004e-05, "loss": 0.6231, "step": 12460 }, { "epoch": 0.36378228513059974, "grad_norm": 1.1061810632519775, "learning_rate": 3.893628738755701e-05, "loss": 0.6529, "step": 12465 }, { "epoch": 0.3639282066248358, "grad_norm": 0.8645596604878601, "learning_rate": 3.892693548497086e-05, "loss": 0.6119, "step": 12470 }, { "epoch": 0.3640741281190719, "grad_norm": 1.1858786637033374, "learning_rate": 3.891758092140938e-05, "loss": 0.5881, "step": 12475 }, { "epoch": 0.36422004961330806, "grad_norm": 1.163342889683199, "learning_rate": 3.8908223699050936e-05, "loss": 0.6064, "step": 12480 }, { "epoch": 0.36436597110754415, "grad_norm": 0.9931086008485451, "learning_rate": 3.8898863820074525e-05, "loss": 0.6562, "step": 12485 }, { "epoch": 0.36451189260178024, "grad_norm": 1.047634168827138, "learning_rate": 3.888950128665978e-05, "loss": 0.6066, "step": 12490 }, { "epoch": 0.36465781409601633, "grad_norm": 1.0349906291528372, "learning_rate": 3.8880136100986947e-05, "loss": 0.6756, "step": 12495 }, { "epoch": 0.3648037355902524, "grad_norm": 0.9990501090781592, "learning_rate": 3.887076826523687e-05, "loss": 0.6031, "step": 12500 }, { "epoch": 0.36494965708448857, "grad_norm": 0.9856391914854163, "learning_rate": 3.886139778159104e-05, "loss": 0.5714, "step": 12505 }, { "epoch": 0.36509557857872466, "grad_norm": 1.0282245342282552, "learning_rate": 3.8852024652231537e-05, "loss": 0.6121, "step": 12510 }, { "epoch": 0.36524150007296075, "grad_norm": 1.0137111171321143, "learning_rate": 3.884264887934108e-05, "loss": 0.6234, "step": 12515 }, { "epoch": 0.36538742156719684, "grad_norm": 1.1053439189415284, "learning_rate": 3.883327046510298e-05, "loss": 0.6796, "step": 12520 }, { "epoch": 0.3655333430614329, "grad_norm": 1.090195817061205, "learning_rate": 3.882388941170119e-05, "loss": 0.659, "step": 12525 }, { "epoch": 0.36567926455566907, "grad_norm": 0.961115465503495, "learning_rate": 3.881450572132025e-05, "loss": 0.6249, "step": 12530 }, { "epoch": 0.36582518604990516, "grad_norm": 1.2178822857034812, "learning_rate": 3.880511939614533e-05, "loss": 0.661, "step": 12535 }, { "epoch": 0.36597110754414125, "grad_norm": 1.3523029561268745, "learning_rate": 3.8795730438362224e-05, "loss": 0.5938, "step": 12540 }, { "epoch": 0.36611702903837734, "grad_norm": 0.9179444846417907, "learning_rate": 3.878633885015731e-05, "loss": 0.5721, "step": 12545 }, { "epoch": 0.36626295053261343, "grad_norm": 1.0304136184299528, "learning_rate": 3.877694463371761e-05, "loss": 0.6008, "step": 12550 }, { "epoch": 0.3664088720268496, "grad_norm": 1.1308417072170858, "learning_rate": 3.876754779123073e-05, "loss": 0.6391, "step": 12555 }, { "epoch": 0.36655479352108566, "grad_norm": 1.0366168139855234, "learning_rate": 3.875814832488491e-05, "loss": 0.5866, "step": 12560 }, { "epoch": 0.36670071501532175, "grad_norm": 0.9091783498818459, "learning_rate": 3.874874623686898e-05, "loss": 0.6333, "step": 12565 }, { "epoch": 0.36684663650955784, "grad_norm": 1.0585857295806147, "learning_rate": 3.87393415293724e-05, "loss": 0.6515, "step": 12570 }, { "epoch": 0.36699255800379393, "grad_norm": 1.0582641329813207, "learning_rate": 3.8729934204585226e-05, "loss": 0.5822, "step": 12575 }, { "epoch": 0.3671384794980301, "grad_norm": 1.045085837100344, "learning_rate": 3.872052426469814e-05, "loss": 0.643, "step": 12580 }, { "epoch": 0.36728440099226617, "grad_norm": 1.019812038817716, "learning_rate": 3.8711111711902406e-05, "loss": 0.6066, "step": 12585 }, { "epoch": 0.36743032248650226, "grad_norm": 1.011380568256649, "learning_rate": 3.870169654838994e-05, "loss": 0.5816, "step": 12590 }, { "epoch": 0.36757624398073835, "grad_norm": 1.0616611536982334, "learning_rate": 3.86922787763532e-05, "loss": 0.6318, "step": 12595 }, { "epoch": 0.36772216547497444, "grad_norm": 1.1184250893439995, "learning_rate": 3.868285839798532e-05, "loss": 0.6662, "step": 12600 }, { "epoch": 0.3678680869692106, "grad_norm": 1.885339962299369, "learning_rate": 3.8673435415480005e-05, "loss": 0.5327, "step": 12605 }, { "epoch": 0.3680140084634467, "grad_norm": 0.8908082129284852, "learning_rate": 3.8664009831031566e-05, "loss": 0.5671, "step": 12610 }, { "epoch": 0.36815992995768276, "grad_norm": 1.045861962298631, "learning_rate": 3.865458164683493e-05, "loss": 0.6534, "step": 12615 }, { "epoch": 0.36830585145191885, "grad_norm": 0.9957578815874936, "learning_rate": 3.864515086508562e-05, "loss": 0.6275, "step": 12620 }, { "epoch": 0.36845177294615494, "grad_norm": 1.0942732330491125, "learning_rate": 3.863571748797979e-05, "loss": 0.6436, "step": 12625 }, { "epoch": 0.3685976944403911, "grad_norm": 0.950652580489795, "learning_rate": 3.862628151771415e-05, "loss": 0.6246, "step": 12630 }, { "epoch": 0.3687436159346272, "grad_norm": 0.9616192886459279, "learning_rate": 3.861684295648606e-05, "loss": 0.6019, "step": 12635 }, { "epoch": 0.36888953742886327, "grad_norm": 0.9931400635283733, "learning_rate": 3.860740180649345e-05, "loss": 0.6009, "step": 12640 }, { "epoch": 0.36903545892309936, "grad_norm": 0.9639892742723815, "learning_rate": 3.85979580699349e-05, "loss": 0.5688, "step": 12645 }, { "epoch": 0.36918138041733545, "grad_norm": 0.9729417778348433, "learning_rate": 3.858851174900952e-05, "loss": 0.6117, "step": 12650 }, { "epoch": 0.3693273019115716, "grad_norm": 0.9107797937901947, "learning_rate": 3.857906284591709e-05, "loss": 0.6185, "step": 12655 }, { "epoch": 0.3694732234058077, "grad_norm": 0.9722002897827574, "learning_rate": 3.856961136285795e-05, "loss": 0.6317, "step": 12660 }, { "epoch": 0.3696191449000438, "grad_norm": 1.0326197333647278, "learning_rate": 3.856015730203306e-05, "loss": 0.6638, "step": 12665 }, { "epoch": 0.36976506639427986, "grad_norm": 1.1396947701012703, "learning_rate": 3.855070066564398e-05, "loss": 0.604, "step": 12670 }, { "epoch": 0.36991098788851595, "grad_norm": 1.045316932712372, "learning_rate": 3.854124145589285e-05, "loss": 0.5985, "step": 12675 }, { "epoch": 0.3700569093827521, "grad_norm": 0.9618470098764271, "learning_rate": 3.8531779674982434e-05, "loss": 0.6567, "step": 12680 }, { "epoch": 0.3702028308769882, "grad_norm": 1.00610176739214, "learning_rate": 3.852231532511609e-05, "loss": 0.5776, "step": 12685 }, { "epoch": 0.3703487523712243, "grad_norm": 0.9658205113334071, "learning_rate": 3.8512848408497746e-05, "loss": 0.616, "step": 12690 }, { "epoch": 0.37049467386546037, "grad_norm": 1.3336789365602073, "learning_rate": 3.850337892733198e-05, "loss": 0.66, "step": 12695 }, { "epoch": 0.37064059535969646, "grad_norm": 1.101886589801939, "learning_rate": 3.8493906883823915e-05, "loss": 0.5811, "step": 12700 }, { "epoch": 0.3707865168539326, "grad_norm": 0.9750669598075934, "learning_rate": 3.84844322801793e-05, "loss": 0.5374, "step": 12705 }, { "epoch": 0.3709324383481687, "grad_norm": 0.9709111582543624, "learning_rate": 3.847495511860447e-05, "loss": 0.567, "step": 12710 }, { "epoch": 0.3710783598424048, "grad_norm": 1.3180067061823268, "learning_rate": 3.846547540130637e-05, "loss": 0.632, "step": 12715 }, { "epoch": 0.37122428133664087, "grad_norm": 0.9667394666544465, "learning_rate": 3.845599313049253e-05, "loss": 0.6046, "step": 12720 }, { "epoch": 0.37137020283087696, "grad_norm": 0.9751186960856844, "learning_rate": 3.844650830837106e-05, "loss": 0.6077, "step": 12725 }, { "epoch": 0.3715161243251131, "grad_norm": 1.1397828739197962, "learning_rate": 3.843702093715068e-05, "loss": 0.6012, "step": 12730 }, { "epoch": 0.3716620458193492, "grad_norm": 1.8619660240201024, "learning_rate": 3.8427531019040714e-05, "loss": 0.7072, "step": 12735 }, { "epoch": 0.3718079673135853, "grad_norm": 0.9742030449831929, "learning_rate": 3.841803855625105e-05, "loss": 0.6346, "step": 12740 }, { "epoch": 0.3719538888078214, "grad_norm": 3.271679807241188, "learning_rate": 3.8408543550992196e-05, "loss": 0.6249, "step": 12745 }, { "epoch": 0.37209981030205747, "grad_norm": 0.9656604098482288, "learning_rate": 3.839904600547524e-05, "loss": 0.6225, "step": 12750 }, { "epoch": 0.3722457317962936, "grad_norm": 0.9504054394895505, "learning_rate": 3.8389545921911865e-05, "loss": 0.6205, "step": 12755 }, { "epoch": 0.3723916532905297, "grad_norm": 0.9905034245827565, "learning_rate": 3.838004330251434e-05, "loss": 0.67, "step": 12760 }, { "epoch": 0.3725375747847658, "grad_norm": 1.3376309404289846, "learning_rate": 3.837053814949553e-05, "loss": 0.6285, "step": 12765 }, { "epoch": 0.3726834962790019, "grad_norm": 1.0004165256576445, "learning_rate": 3.8361030465068884e-05, "loss": 0.6298, "step": 12770 }, { "epoch": 0.372829417773238, "grad_norm": 1.04397337729708, "learning_rate": 3.835152025144844e-05, "loss": 0.5732, "step": 12775 }, { "epoch": 0.3729753392674741, "grad_norm": 1.0128129125340108, "learning_rate": 3.834200751084883e-05, "loss": 0.5578, "step": 12780 }, { "epoch": 0.3731212607617102, "grad_norm": 1.1896644231464701, "learning_rate": 3.833249224548529e-05, "loss": 0.659, "step": 12785 }, { "epoch": 0.3732671822559463, "grad_norm": 1.215454849919349, "learning_rate": 3.83229744575736e-05, "loss": 0.5604, "step": 12790 }, { "epoch": 0.3734131037501824, "grad_norm": 1.0591844084790443, "learning_rate": 3.831345414933018e-05, "loss": 0.6322, "step": 12795 }, { "epoch": 0.37355902524441853, "grad_norm": 1.0887968637760528, "learning_rate": 3.830393132297199e-05, "loss": 0.5991, "step": 12800 }, { "epoch": 0.3737049467386546, "grad_norm": 1.0022259438497505, "learning_rate": 3.829440598071662e-05, "loss": 0.6065, "step": 12805 }, { "epoch": 0.3738508682328907, "grad_norm": 1.0217075380911824, "learning_rate": 3.828487812478221e-05, "loss": 0.5708, "step": 12810 }, { "epoch": 0.3739967897271268, "grad_norm": 1.115545274492303, "learning_rate": 3.827534775738749e-05, "loss": 0.58, "step": 12815 }, { "epoch": 0.3741427112213629, "grad_norm": 0.9595017086464345, "learning_rate": 3.826581488075181e-05, "loss": 0.6343, "step": 12820 }, { "epoch": 0.37428863271559903, "grad_norm": 0.9549765546841054, "learning_rate": 3.825627949709505e-05, "loss": 0.5749, "step": 12825 }, { "epoch": 0.3744345542098351, "grad_norm": 1.0330177422368476, "learning_rate": 3.8246741608637726e-05, "loss": 0.6064, "step": 12830 }, { "epoch": 0.3745804757040712, "grad_norm": 0.9829673160402712, "learning_rate": 3.823720121760091e-05, "loss": 0.5857, "step": 12835 }, { "epoch": 0.3747263971983073, "grad_norm": 1.0281768797957225, "learning_rate": 3.822765832620624e-05, "loss": 0.5732, "step": 12840 }, { "epoch": 0.3748723186925434, "grad_norm": 0.9382774485290768, "learning_rate": 3.821811293667598e-05, "loss": 0.5803, "step": 12845 }, { "epoch": 0.37501824018677954, "grad_norm": 0.9635125686831632, "learning_rate": 3.820856505123293e-05, "loss": 0.5368, "step": 12850 }, { "epoch": 0.37516416168101563, "grad_norm": 1.1575095067689325, "learning_rate": 3.819901467210052e-05, "loss": 0.6313, "step": 12855 }, { "epoch": 0.3753100831752517, "grad_norm": 1.117952276145879, "learning_rate": 3.8189461801502715e-05, "loss": 0.626, "step": 12860 }, { "epoch": 0.3754560046694878, "grad_norm": 0.9977116637210961, "learning_rate": 3.817990644166408e-05, "loss": 0.5977, "step": 12865 }, { "epoch": 0.3756019261637239, "grad_norm": 1.0321914683689113, "learning_rate": 3.8170348594809765e-05, "loss": 0.6216, "step": 12870 }, { "epoch": 0.37574784765796004, "grad_norm": 0.9462924306626755, "learning_rate": 3.8160788263165496e-05, "loss": 0.6183, "step": 12875 }, { "epoch": 0.37589376915219613, "grad_norm": 1.1799633139763062, "learning_rate": 3.815122544895757e-05, "loss": 0.66, "step": 12880 }, { "epoch": 0.3760396906464322, "grad_norm": 1.001258722806958, "learning_rate": 3.8141660154412865e-05, "loss": 0.5556, "step": 12885 }, { "epoch": 0.3761856121406683, "grad_norm": 0.8791707175069545, "learning_rate": 3.8132092381758846e-05, "loss": 0.5547, "step": 12890 }, { "epoch": 0.3763315336349044, "grad_norm": 1.1322911472454005, "learning_rate": 3.812252213322353e-05, "loss": 0.6101, "step": 12895 }, { "epoch": 0.37647745512914055, "grad_norm": 0.9407462937266452, "learning_rate": 3.811294941103555e-05, "loss": 0.5881, "step": 12900 }, { "epoch": 0.37662337662337664, "grad_norm": 0.9709251372175559, "learning_rate": 3.810337421742408e-05, "loss": 0.6158, "step": 12905 }, { "epoch": 0.3767692981176127, "grad_norm": 0.9977510911272339, "learning_rate": 3.8093796554618885e-05, "loss": 0.573, "step": 12910 }, { "epoch": 0.3769152196118488, "grad_norm": 1.099417182577981, "learning_rate": 3.8084216424850314e-05, "loss": 0.5798, "step": 12915 }, { "epoch": 0.3770611411060849, "grad_norm": 1.3389044345815033, "learning_rate": 3.807463383034926e-05, "loss": 0.5727, "step": 12920 }, { "epoch": 0.37720706260032105, "grad_norm": 1.0067748495421789, "learning_rate": 3.806504877334722e-05, "loss": 0.6679, "step": 12925 }, { "epoch": 0.37735298409455714, "grad_norm": 0.9949862510734755, "learning_rate": 3.805546125607625e-05, "loss": 0.6035, "step": 12930 }, { "epoch": 0.37749890558879323, "grad_norm": 1.146551034871961, "learning_rate": 3.804587128076898e-05, "loss": 0.6039, "step": 12935 }, { "epoch": 0.3776448270830293, "grad_norm": 1.1597662231488113, "learning_rate": 3.803627884965863e-05, "loss": 0.6059, "step": 12940 }, { "epoch": 0.3777907485772654, "grad_norm": 0.968271936861751, "learning_rate": 3.802668396497896e-05, "loss": 0.6395, "step": 12945 }, { "epoch": 0.37793667007150156, "grad_norm": 1.072996036898714, "learning_rate": 3.801708662896433e-05, "loss": 0.6738, "step": 12950 }, { "epoch": 0.37808259156573765, "grad_norm": 0.9541908255595443, "learning_rate": 3.8007486843849635e-05, "loss": 0.5561, "step": 12955 }, { "epoch": 0.37822851305997374, "grad_norm": 1.075452140074578, "learning_rate": 3.79978846118704e-05, "loss": 0.6096, "step": 12960 }, { "epoch": 0.3783744345542098, "grad_norm": 1.0158755784704179, "learning_rate": 3.7988279935262656e-05, "loss": 0.5778, "step": 12965 }, { "epoch": 0.3785203560484459, "grad_norm": 0.9122407264209598, "learning_rate": 3.7978672816263045e-05, "loss": 0.6296, "step": 12970 }, { "epoch": 0.37866627754268206, "grad_norm": 0.9971090820513184, "learning_rate": 3.796906325710876e-05, "loss": 0.606, "step": 12975 }, { "epoch": 0.37881219903691815, "grad_norm": 0.9734110774785772, "learning_rate": 3.7959451260037564e-05, "loss": 0.5813, "step": 12980 }, { "epoch": 0.37895812053115424, "grad_norm": 0.9459167277518017, "learning_rate": 3.79498368272878e-05, "loss": 0.6637, "step": 12985 }, { "epoch": 0.37910404202539033, "grad_norm": 0.842421739030566, "learning_rate": 3.794021996109836e-05, "loss": 0.6037, "step": 12990 }, { "epoch": 0.3792499635196264, "grad_norm": 0.9501889940935216, "learning_rate": 3.793060066370871e-05, "loss": 0.533, "step": 12995 }, { "epoch": 0.37939588501386257, "grad_norm": 1.1191525290837185, "learning_rate": 3.7920978937358885e-05, "loss": 0.6292, "step": 13000 }, { "epoch": 0.37954180650809866, "grad_norm": 1.045038239294286, "learning_rate": 3.791135478428948e-05, "loss": 0.6813, "step": 13005 }, { "epoch": 0.37968772800233475, "grad_norm": 1.0565577569987286, "learning_rate": 3.7901728206741674e-05, "loss": 0.5849, "step": 13010 }, { "epoch": 0.37983364949657084, "grad_norm": 1.1456704421709585, "learning_rate": 3.789209920695717e-05, "loss": 0.6029, "step": 13015 }, { "epoch": 0.3799795709908069, "grad_norm": 1.0875918783375462, "learning_rate": 3.7882467787178284e-05, "loss": 0.6736, "step": 13020 }, { "epoch": 0.38012549248504307, "grad_norm": 0.9808879171610269, "learning_rate": 3.7872833949647856e-05, "loss": 0.6292, "step": 13025 }, { "epoch": 0.38027141397927916, "grad_norm": 0.9348055817099865, "learning_rate": 3.7863197696609314e-05, "loss": 0.5647, "step": 13030 }, { "epoch": 0.38041733547351525, "grad_norm": 0.9701751132538474, "learning_rate": 3.785355903030664e-05, "loss": 0.6907, "step": 13035 }, { "epoch": 0.38056325696775134, "grad_norm": 1.1600988361420812, "learning_rate": 3.784391795298437e-05, "loss": 0.619, "step": 13040 }, { "epoch": 0.38070917846198743, "grad_norm": 1.0131561001501868, "learning_rate": 3.783427446688762e-05, "loss": 0.6703, "step": 13045 }, { "epoch": 0.3808550999562236, "grad_norm": 1.1421043897462002, "learning_rate": 3.782462857426203e-05, "loss": 0.645, "step": 13050 }, { "epoch": 0.38100102145045966, "grad_norm": 1.221099821669162, "learning_rate": 3.7814980277353866e-05, "loss": 0.6311, "step": 13055 }, { "epoch": 0.38114694294469575, "grad_norm": 0.9774869555552376, "learning_rate": 3.780532957840989e-05, "loss": 0.5176, "step": 13060 }, { "epoch": 0.38129286443893184, "grad_norm": 1.209502800218115, "learning_rate": 3.7795676479677444e-05, "loss": 0.6076, "step": 13065 }, { "epoch": 0.38143878593316793, "grad_norm": 1.1527283419804677, "learning_rate": 3.778602098340445e-05, "loss": 0.6368, "step": 13070 }, { "epoch": 0.3815847074274041, "grad_norm": 1.0349343271925626, "learning_rate": 3.777636309183934e-05, "loss": 0.5743, "step": 13075 }, { "epoch": 0.38173062892164017, "grad_norm": 0.9815000757187583, "learning_rate": 3.776670280723117e-05, "loss": 0.5751, "step": 13080 }, { "epoch": 0.38187655041587626, "grad_norm": 1.1831942925259558, "learning_rate": 3.7757040131829505e-05, "loss": 0.6293, "step": 13085 }, { "epoch": 0.38202247191011235, "grad_norm": 0.8095494336732907, "learning_rate": 3.774737506788447e-05, "loss": 0.5888, "step": 13090 }, { "epoch": 0.38216839340434844, "grad_norm": 0.919947988340151, "learning_rate": 3.773770761764676e-05, "loss": 0.5536, "step": 13095 }, { "epoch": 0.3823143148985846, "grad_norm": 1.1331767343574564, "learning_rate": 3.772803778336762e-05, "loss": 0.5538, "step": 13100 }, { "epoch": 0.3824602363928207, "grad_norm": 1.051309810678946, "learning_rate": 3.771836556729887e-05, "loss": 0.6258, "step": 13105 }, { "epoch": 0.38260615788705676, "grad_norm": 1.0200278746090872, "learning_rate": 3.770869097169284e-05, "loss": 0.6071, "step": 13110 }, { "epoch": 0.38275207938129285, "grad_norm": 0.8729450912026568, "learning_rate": 3.769901399880245e-05, "loss": 0.6236, "step": 13115 }, { "epoch": 0.38289800087552894, "grad_norm": 1.2662868661938753, "learning_rate": 3.768933465088116e-05, "loss": 0.6407, "step": 13120 }, { "epoch": 0.3830439223697651, "grad_norm": 1.059206088634198, "learning_rate": 3.7679652930183014e-05, "loss": 0.5934, "step": 13125 }, { "epoch": 0.3831898438640012, "grad_norm": 0.9353408834517517, "learning_rate": 3.766996883896254e-05, "loss": 0.5889, "step": 13130 }, { "epoch": 0.38333576535823727, "grad_norm": 1.1972273175383707, "learning_rate": 3.766028237947489e-05, "loss": 0.6274, "step": 13135 }, { "epoch": 0.38348168685247336, "grad_norm": 1.0268512914602315, "learning_rate": 3.765059355397572e-05, "loss": 0.6316, "step": 13140 }, { "epoch": 0.38362760834670945, "grad_norm": 1.0691451064709403, "learning_rate": 3.764090236472127e-05, "loss": 0.5622, "step": 13145 }, { "epoch": 0.3837735298409456, "grad_norm": 0.9874043835814377, "learning_rate": 3.7631208813968286e-05, "loss": 0.568, "step": 13150 }, { "epoch": 0.3839194513351817, "grad_norm": 1.0102520648377, "learning_rate": 3.7621512903974126e-05, "loss": 0.6261, "step": 13155 }, { "epoch": 0.3840653728294178, "grad_norm": 1.0651339557817885, "learning_rate": 3.761181463699664e-05, "loss": 0.6368, "step": 13160 }, { "epoch": 0.38421129432365386, "grad_norm": 1.1024669067981412, "learning_rate": 3.760211401529427e-05, "loss": 0.6402, "step": 13165 }, { "epoch": 0.38435721581788995, "grad_norm": 0.9111714021220461, "learning_rate": 3.759241104112597e-05, "loss": 0.5538, "step": 13170 }, { "epoch": 0.3845031373121261, "grad_norm": 0.8618220241855702, "learning_rate": 3.758270571675127e-05, "loss": 0.6663, "step": 13175 }, { "epoch": 0.3846490588063622, "grad_norm": 1.0005318912144738, "learning_rate": 3.757299804443022e-05, "loss": 0.6432, "step": 13180 }, { "epoch": 0.3847949803005983, "grad_norm": 1.023621737111874, "learning_rate": 3.756328802642345e-05, "loss": 0.5995, "step": 13185 }, { "epoch": 0.38494090179483437, "grad_norm": 1.0275594622875548, "learning_rate": 3.755357566499211e-05, "loss": 0.5664, "step": 13190 }, { "epoch": 0.38508682328907046, "grad_norm": 1.074611049274864, "learning_rate": 3.754386096239792e-05, "loss": 0.5693, "step": 13195 }, { "epoch": 0.3852327447833066, "grad_norm": 1.046615633320191, "learning_rate": 3.753414392090311e-05, "loss": 0.6139, "step": 13200 }, { "epoch": 0.3853786662775427, "grad_norm": 1.003972254791306, "learning_rate": 3.7524424542770474e-05, "loss": 0.6088, "step": 13205 }, { "epoch": 0.3855245877717788, "grad_norm": 0.9510938384382496, "learning_rate": 3.751470283026336e-05, "loss": 0.609, "step": 13210 }, { "epoch": 0.38567050926601487, "grad_norm": 1.1241891382002371, "learning_rate": 3.750497878564566e-05, "loss": 0.6409, "step": 13215 }, { "epoch": 0.38581643076025096, "grad_norm": 1.213772879456788, "learning_rate": 3.749525241118178e-05, "loss": 0.6054, "step": 13220 }, { "epoch": 0.3859623522544871, "grad_norm": 1.0136068535027045, "learning_rate": 3.7485523709136703e-05, "loss": 0.617, "step": 13225 }, { "epoch": 0.3861082737487232, "grad_norm": 1.1194260148829809, "learning_rate": 3.747579268177592e-05, "loss": 0.6222, "step": 13230 }, { "epoch": 0.3862541952429593, "grad_norm": 1.226612637197211, "learning_rate": 3.7466059331365514e-05, "loss": 0.6664, "step": 13235 }, { "epoch": 0.3864001167371954, "grad_norm": 1.0707023686878625, "learning_rate": 3.745632366017205e-05, "loss": 0.5931, "step": 13240 }, { "epoch": 0.38654603823143147, "grad_norm": 1.0337206648806554, "learning_rate": 3.744658567046267e-05, "loss": 0.5973, "step": 13245 }, { "epoch": 0.3866919597256676, "grad_norm": 1.0135370598491595, "learning_rate": 3.743684536450505e-05, "loss": 0.6459, "step": 13250 }, { "epoch": 0.3868378812199037, "grad_norm": 0.9559325574631224, "learning_rate": 3.7427102744567383e-05, "loss": 0.5423, "step": 13255 }, { "epoch": 0.3869838027141398, "grad_norm": 1.0825917244953278, "learning_rate": 3.7417357812918446e-05, "loss": 0.6121, "step": 13260 }, { "epoch": 0.3871297242083759, "grad_norm": 1.2495067830388449, "learning_rate": 3.740761057182751e-05, "loss": 0.6613, "step": 13265 }, { "epoch": 0.38727564570261197, "grad_norm": 0.953704618028202, "learning_rate": 3.7397861023564426e-05, "loss": 0.6447, "step": 13270 }, { "epoch": 0.3874215671968481, "grad_norm": 1.1364379587081346, "learning_rate": 3.7388109170399536e-05, "loss": 0.6375, "step": 13275 }, { "epoch": 0.3875674886910842, "grad_norm": 1.022040496706585, "learning_rate": 3.7378355014603734e-05, "loss": 0.634, "step": 13280 }, { "epoch": 0.3877134101853203, "grad_norm": 1.0323467662871728, "learning_rate": 3.7368598558448484e-05, "loss": 0.5829, "step": 13285 }, { "epoch": 0.3878593316795564, "grad_norm": 1.1454618317949399, "learning_rate": 3.735883980420574e-05, "loss": 0.5941, "step": 13290 }, { "epoch": 0.3880052531737925, "grad_norm": 0.9639361559463864, "learning_rate": 3.7349078754148023e-05, "loss": 0.5669, "step": 13295 }, { "epoch": 0.3881511746680286, "grad_norm": 1.0453915296987675, "learning_rate": 3.7339315410548356e-05, "loss": 0.6212, "step": 13300 }, { "epoch": 0.3882970961622647, "grad_norm": 1.2666039441616714, "learning_rate": 3.732954977568034e-05, "loss": 0.5824, "step": 13305 }, { "epoch": 0.3884430176565008, "grad_norm": 0.9494146600552643, "learning_rate": 3.731978185181807e-05, "loss": 0.5716, "step": 13310 }, { "epoch": 0.3885889391507369, "grad_norm": 1.0670522568509824, "learning_rate": 3.73100116412362e-05, "loss": 0.5944, "step": 13315 }, { "epoch": 0.388734860644973, "grad_norm": 1.1133037954436609, "learning_rate": 3.730023914620989e-05, "loss": 0.6541, "step": 13320 }, { "epoch": 0.3888807821392091, "grad_norm": 1.3443072366828142, "learning_rate": 3.729046436901486e-05, "loss": 0.6463, "step": 13325 }, { "epoch": 0.3890267036334452, "grad_norm": 1.0503781254569338, "learning_rate": 3.7280687311927345e-05, "loss": 0.5526, "step": 13330 }, { "epoch": 0.3891726251276813, "grad_norm": 1.0326084446996835, "learning_rate": 3.727090797722412e-05, "loss": 0.6362, "step": 13335 }, { "epoch": 0.3893185466219174, "grad_norm": 1.1288020408681696, "learning_rate": 3.726112636718249e-05, "loss": 0.6272, "step": 13340 }, { "epoch": 0.3894644681161535, "grad_norm": 1.0641342870016819, "learning_rate": 3.725134248408027e-05, "loss": 0.6002, "step": 13345 }, { "epoch": 0.38961038961038963, "grad_norm": 1.1762173072742437, "learning_rate": 3.724155633019583e-05, "loss": 0.6268, "step": 13350 }, { "epoch": 0.3897563111046257, "grad_norm": 1.023280310855562, "learning_rate": 3.723176790780806e-05, "loss": 0.5585, "step": 13355 }, { "epoch": 0.3899022325988618, "grad_norm": 0.8527399562964203, "learning_rate": 3.7221977219196366e-05, "loss": 0.5913, "step": 13360 }, { "epoch": 0.3900481540930979, "grad_norm": 0.9705276841256729, "learning_rate": 3.721218426664071e-05, "loss": 0.5933, "step": 13365 }, { "epoch": 0.390194075587334, "grad_norm": 0.9427855587160271, "learning_rate": 3.7202389052421546e-05, "loss": 0.5633, "step": 13370 }, { "epoch": 0.39033999708157013, "grad_norm": 1.0948552019642581, "learning_rate": 3.71925915788199e-05, "loss": 0.6108, "step": 13375 }, { "epoch": 0.3904859185758062, "grad_norm": 0.964746014766271, "learning_rate": 3.7182791848117266e-05, "loss": 0.5766, "step": 13380 }, { "epoch": 0.3906318400700423, "grad_norm": 1.2018675643885772, "learning_rate": 3.71729898625957e-05, "loss": 0.6777, "step": 13385 }, { "epoch": 0.3907777615642784, "grad_norm": 1.1017440196022457, "learning_rate": 3.71631856245378e-05, "loss": 0.6375, "step": 13390 }, { "epoch": 0.3909236830585145, "grad_norm": 1.1664211869880705, "learning_rate": 3.7153379136226633e-05, "loss": 0.6443, "step": 13395 }, { "epoch": 0.39106960455275064, "grad_norm": 1.1285230780410467, "learning_rate": 3.714357039994585e-05, "loss": 0.6808, "step": 13400 }, { "epoch": 0.39121552604698673, "grad_norm": 0.9703363072407105, "learning_rate": 3.713375941797958e-05, "loss": 0.5831, "step": 13405 }, { "epoch": 0.3913614475412228, "grad_norm": 0.8893430594707917, "learning_rate": 3.7123946192612505e-05, "loss": 0.5768, "step": 13410 }, { "epoch": 0.3915073690354589, "grad_norm": 0.8641656756119463, "learning_rate": 3.711413072612982e-05, "loss": 0.5865, "step": 13415 }, { "epoch": 0.391653290529695, "grad_norm": 0.9433621627765796, "learning_rate": 3.710431302081723e-05, "loss": 0.674, "step": 13420 }, { "epoch": 0.39179921202393114, "grad_norm": 1.066283511052582, "learning_rate": 3.709449307896098e-05, "loss": 0.5443, "step": 13425 }, { "epoch": 0.39194513351816723, "grad_norm": 0.9551737816832169, "learning_rate": 3.708467090284781e-05, "loss": 0.5382, "step": 13430 }, { "epoch": 0.3920910550124033, "grad_norm": 1.0502458196011253, "learning_rate": 3.7074846494765015e-05, "loss": 0.676, "step": 13435 }, { "epoch": 0.3922369765066394, "grad_norm": 1.0125396367129404, "learning_rate": 3.706501985700039e-05, "loss": 0.6583, "step": 13440 }, { "epoch": 0.3923828980008755, "grad_norm": 0.9747712792982066, "learning_rate": 3.7055190991842245e-05, "loss": 0.5672, "step": 13445 }, { "epoch": 0.39252881949511165, "grad_norm": 1.1151222065630897, "learning_rate": 3.704535990157941e-05, "loss": 0.5651, "step": 13450 }, { "epoch": 0.39267474098934774, "grad_norm": 1.0300698781354214, "learning_rate": 3.703552658850126e-05, "loss": 0.5683, "step": 13455 }, { "epoch": 0.3928206624835838, "grad_norm": 1.1914322222012972, "learning_rate": 3.7025691054897644e-05, "loss": 0.6332, "step": 13460 }, { "epoch": 0.3929665839778199, "grad_norm": 0.9662409733925251, "learning_rate": 3.701585330305895e-05, "loss": 0.576, "step": 13465 }, { "epoch": 0.393112505472056, "grad_norm": 1.1915595893841633, "learning_rate": 3.700601333527609e-05, "loss": 0.6191, "step": 13470 }, { "epoch": 0.39325842696629215, "grad_norm": 1.1273482770823606, "learning_rate": 3.6996171153840486e-05, "loss": 0.5853, "step": 13475 }, { "epoch": 0.39340434846052824, "grad_norm": 1.0481133573851806, "learning_rate": 3.698632676104407e-05, "loss": 0.5901, "step": 13480 }, { "epoch": 0.39355026995476433, "grad_norm": 1.025377981232465, "learning_rate": 3.69764801591793e-05, "loss": 0.5787, "step": 13485 }, { "epoch": 0.3936961914490004, "grad_norm": 1.089141628678364, "learning_rate": 3.696663135053912e-05, "loss": 0.5804, "step": 13490 }, { "epoch": 0.39384211294323657, "grad_norm": 0.9607370785168048, "learning_rate": 3.695678033741704e-05, "loss": 0.6499, "step": 13495 }, { "epoch": 0.39398803443747266, "grad_norm": 1.0180811514253796, "learning_rate": 3.694692712210702e-05, "loss": 0.578, "step": 13500 }, { "epoch": 0.39413395593170875, "grad_norm": 0.973042457466423, "learning_rate": 3.6937071706903594e-05, "loss": 0.6045, "step": 13505 }, { "epoch": 0.39427987742594484, "grad_norm": 0.9688845182368967, "learning_rate": 3.692721409410176e-05, "loss": 0.6066, "step": 13510 }, { "epoch": 0.3944257989201809, "grad_norm": 0.9883454293206793, "learning_rate": 3.691735428599705e-05, "loss": 0.6406, "step": 13515 }, { "epoch": 0.39457172041441707, "grad_norm": 0.829441584996368, "learning_rate": 3.690749228488552e-05, "loss": 0.5494, "step": 13520 }, { "epoch": 0.39471764190865316, "grad_norm": 1.0275314433702603, "learning_rate": 3.689762809306371e-05, "loss": 0.6194, "step": 13525 }, { "epoch": 0.39486356340288925, "grad_norm": 1.031530453016531, "learning_rate": 3.6887761712828664e-05, "loss": 0.5895, "step": 13530 }, { "epoch": 0.39500948489712534, "grad_norm": 0.881401894301783, "learning_rate": 3.687789314647798e-05, "loss": 0.5452, "step": 13535 }, { "epoch": 0.39515540639136143, "grad_norm": 1.115580208343848, "learning_rate": 3.686802239630973e-05, "loss": 0.5874, "step": 13540 }, { "epoch": 0.3953013278855976, "grad_norm": 0.9796559102548387, "learning_rate": 3.68581494646225e-05, "loss": 0.5827, "step": 13545 }, { "epoch": 0.39544724937983367, "grad_norm": 1.0259949206107621, "learning_rate": 3.684827435371538e-05, "loss": 0.6274, "step": 13550 }, { "epoch": 0.39559317087406975, "grad_norm": 1.0634990670213518, "learning_rate": 3.6838397065887984e-05, "loss": 0.5929, "step": 13555 }, { "epoch": 0.39573909236830584, "grad_norm": 1.0055012700306751, "learning_rate": 3.682851760344043e-05, "loss": 0.6009, "step": 13560 }, { "epoch": 0.39588501386254193, "grad_norm": 0.894091094982307, "learning_rate": 3.681863596867331e-05, "loss": 0.5433, "step": 13565 }, { "epoch": 0.3960309353567781, "grad_norm": 0.9535542740351096, "learning_rate": 3.680875216388777e-05, "loss": 0.5813, "step": 13570 }, { "epoch": 0.39617685685101417, "grad_norm": 0.9829963185440888, "learning_rate": 3.6798866191385425e-05, "loss": 0.6063, "step": 13575 }, { "epoch": 0.39632277834525026, "grad_norm": 1.104239828591839, "learning_rate": 3.678897805346842e-05, "loss": 0.6735, "step": 13580 }, { "epoch": 0.39646869983948635, "grad_norm": 1.2819688282238808, "learning_rate": 3.6779087752439383e-05, "loss": 0.6639, "step": 13585 }, { "epoch": 0.39661462133372244, "grad_norm": 0.9217824514390351, "learning_rate": 3.6769195290601466e-05, "loss": 0.5527, "step": 13590 }, { "epoch": 0.3967605428279586, "grad_norm": 1.0587990042490922, "learning_rate": 3.67593006702583e-05, "loss": 0.602, "step": 13595 }, { "epoch": 0.3969064643221947, "grad_norm": 1.0898791746846404, "learning_rate": 3.6749403893714037e-05, "loss": 0.6227, "step": 13600 }, { "epoch": 0.39705238581643076, "grad_norm": 1.0320325658931797, "learning_rate": 3.673950496327333e-05, "loss": 0.6285, "step": 13605 }, { "epoch": 0.39719830731066685, "grad_norm": 0.984491998091008, "learning_rate": 3.672960388124133e-05, "loss": 0.6877, "step": 13610 }, { "epoch": 0.39734422880490294, "grad_norm": 0.8440649500665628, "learning_rate": 3.671970064992369e-05, "loss": 0.59, "step": 13615 }, { "epoch": 0.3974901502991391, "grad_norm": 0.9771649580000522, "learning_rate": 3.670979527162655e-05, "loss": 0.6197, "step": 13620 }, { "epoch": 0.3976360717933752, "grad_norm": 0.9541157363617031, "learning_rate": 3.669988774865658e-05, "loss": 0.5667, "step": 13625 }, { "epoch": 0.39778199328761127, "grad_norm": 0.9310572266318097, "learning_rate": 3.668997808332092e-05, "loss": 0.5754, "step": 13630 }, { "epoch": 0.39792791478184736, "grad_norm": 1.111370438445708, "learning_rate": 3.668006627792722e-05, "loss": 0.6323, "step": 13635 }, { "epoch": 0.39807383627608345, "grad_norm": 1.3663612148113984, "learning_rate": 3.667015233478364e-05, "loss": 0.6658, "step": 13640 }, { "epoch": 0.3982197577703196, "grad_norm": 1.0311973801675742, "learning_rate": 3.666023625619881e-05, "loss": 0.6008, "step": 13645 }, { "epoch": 0.3983656792645557, "grad_norm": 1.3123705487764714, "learning_rate": 3.6650318044481894e-05, "loss": 0.6126, "step": 13650 }, { "epoch": 0.3985116007587918, "grad_norm": 1.2384923137253803, "learning_rate": 3.664039770194251e-05, "loss": 0.6247, "step": 13655 }, { "epoch": 0.39865752225302786, "grad_norm": 0.8722670468896063, "learning_rate": 3.663047523089081e-05, "loss": 0.6215, "step": 13660 }, { "epoch": 0.39880344374726395, "grad_norm": 0.9853884733120933, "learning_rate": 3.662055063363744e-05, "loss": 0.6233, "step": 13665 }, { "epoch": 0.3989493652415001, "grad_norm": 1.154867516607522, "learning_rate": 3.6610623912493504e-05, "loss": 0.6154, "step": 13670 }, { "epoch": 0.3990952867357362, "grad_norm": 1.204579376537676, "learning_rate": 3.660069506977063e-05, "loss": 0.6879, "step": 13675 }, { "epoch": 0.3992412082299723, "grad_norm": 1.145256755755369, "learning_rate": 3.6590764107780935e-05, "loss": 0.5878, "step": 13680 }, { "epoch": 0.39938712972420837, "grad_norm": 0.988170369728385, "learning_rate": 3.658083102883703e-05, "loss": 0.5949, "step": 13685 }, { "epoch": 0.39953305121844446, "grad_norm": 1.1473880230341127, "learning_rate": 3.6570895835252026e-05, "loss": 0.6305, "step": 13690 }, { "epoch": 0.3996789727126806, "grad_norm": 1.1801438201474737, "learning_rate": 3.656095852933951e-05, "loss": 0.5602, "step": 13695 }, { "epoch": 0.3998248942069167, "grad_norm": 1.0924793581340302, "learning_rate": 3.6551019113413566e-05, "loss": 0.6096, "step": 13700 }, { "epoch": 0.3999708157011528, "grad_norm": 1.0027412796223487, "learning_rate": 3.654107758978878e-05, "loss": 0.6331, "step": 13705 }, { "epoch": 0.40011673719538887, "grad_norm": 0.994242794278382, "learning_rate": 3.6531133960780226e-05, "loss": 0.6325, "step": 13710 }, { "epoch": 0.40026265868962496, "grad_norm": 1.1191020572553931, "learning_rate": 3.652118822870344e-05, "loss": 0.5738, "step": 13715 }, { "epoch": 0.4004085801838611, "grad_norm": 0.9296404937289181, "learning_rate": 3.6511240395874495e-05, "loss": 0.6328, "step": 13720 }, { "epoch": 0.4005545016780972, "grad_norm": 1.0750205733649723, "learning_rate": 3.6501290464609926e-05, "loss": 0.5833, "step": 13725 }, { "epoch": 0.4007004231723333, "grad_norm": 0.9192040161905642, "learning_rate": 3.6491338437226743e-05, "loss": 0.6081, "step": 13730 }, { "epoch": 0.4008463446665694, "grad_norm": 1.1610054487960284, "learning_rate": 3.6481384316042496e-05, "loss": 0.6424, "step": 13735 }, { "epoch": 0.40099226616080547, "grad_norm": 0.9795576044721311, "learning_rate": 3.647142810337516e-05, "loss": 0.5698, "step": 13740 }, { "epoch": 0.4011381876550416, "grad_norm": 0.934323468674151, "learning_rate": 3.646146980154323e-05, "loss": 0.5997, "step": 13745 }, { "epoch": 0.4012841091492777, "grad_norm": 0.9576380261424753, "learning_rate": 3.6451509412865684e-05, "loss": 0.5465, "step": 13750 }, { "epoch": 0.4014300306435138, "grad_norm": 1.0898459220001486, "learning_rate": 3.644154693966199e-05, "loss": 0.5873, "step": 13755 }, { "epoch": 0.4015759521377499, "grad_norm": 0.9479643965150694, "learning_rate": 3.643158238425209e-05, "loss": 0.5547, "step": 13760 }, { "epoch": 0.40172187363198597, "grad_norm": 0.9619721874819919, "learning_rate": 3.6421615748956417e-05, "loss": 0.5495, "step": 13765 }, { "epoch": 0.4018677951262221, "grad_norm": 1.125480959699391, "learning_rate": 3.6411647036095886e-05, "loss": 0.5903, "step": 13770 }, { "epoch": 0.4020137166204582, "grad_norm": 0.8793812411167892, "learning_rate": 3.6401676247991916e-05, "loss": 0.5591, "step": 13775 }, { "epoch": 0.4021596381146943, "grad_norm": 1.126724265902178, "learning_rate": 3.639170338696637e-05, "loss": 0.5857, "step": 13780 }, { "epoch": 0.4023055596089304, "grad_norm": 1.0181733665073054, "learning_rate": 3.638172845534161e-05, "loss": 0.5652, "step": 13785 }, { "epoch": 0.4024514811031665, "grad_norm": 0.895455295310825, "learning_rate": 3.63717514554405e-05, "loss": 0.5866, "step": 13790 }, { "epoch": 0.4025974025974026, "grad_norm": 0.9914312052974177, "learning_rate": 3.636177238958638e-05, "loss": 0.5689, "step": 13795 }, { "epoch": 0.4027433240916387, "grad_norm": 1.0567095521583312, "learning_rate": 3.635179126010303e-05, "loss": 0.5529, "step": 13800 }, { "epoch": 0.4028892455858748, "grad_norm": 0.9378974422761764, "learning_rate": 3.634180806931478e-05, "loss": 0.6413, "step": 13805 }, { "epoch": 0.4030351670801109, "grad_norm": 0.9975682218349748, "learning_rate": 3.633182281954638e-05, "loss": 0.6203, "step": 13810 }, { "epoch": 0.403181088574347, "grad_norm": 0.9271969131038335, "learning_rate": 3.6321835513123084e-05, "loss": 0.6169, "step": 13815 }, { "epoch": 0.4033270100685831, "grad_norm": 1.0514958083973167, "learning_rate": 3.631184615237062e-05, "loss": 0.5748, "step": 13820 }, { "epoch": 0.4034729315628192, "grad_norm": 0.9259155342573695, "learning_rate": 3.630185473961521e-05, "loss": 0.6272, "step": 13825 }, { "epoch": 0.4036188530570553, "grad_norm": 1.3429468789369587, "learning_rate": 3.629186127718353e-05, "loss": 0.5878, "step": 13830 }, { "epoch": 0.4037647745512914, "grad_norm": 0.9380880442445885, "learning_rate": 3.628186576740274e-05, "loss": 0.5697, "step": 13835 }, { "epoch": 0.4039106960455275, "grad_norm": 0.8733286759127851, "learning_rate": 3.6271868212600506e-05, "loss": 0.5756, "step": 13840 }, { "epoch": 0.40405661753976363, "grad_norm": 0.8677100655425329, "learning_rate": 3.626186861510492e-05, "loss": 0.5867, "step": 13845 }, { "epoch": 0.4042025390339997, "grad_norm": 0.9315842074886852, "learning_rate": 3.625186697724458e-05, "loss": 0.5962, "step": 13850 }, { "epoch": 0.4043484605282358, "grad_norm": 0.92928993964647, "learning_rate": 3.624186330134856e-05, "loss": 0.6267, "step": 13855 }, { "epoch": 0.4044943820224719, "grad_norm": 1.0136256621626847, "learning_rate": 3.62318575897464e-05, "loss": 0.5846, "step": 13860 }, { "epoch": 0.404640303516708, "grad_norm": 1.0088796088920677, "learning_rate": 3.622184984476812e-05, "loss": 0.6267, "step": 13865 }, { "epoch": 0.40478622501094413, "grad_norm": 0.902065045847803, "learning_rate": 3.62118400687442e-05, "loss": 0.5337, "step": 13870 }, { "epoch": 0.4049321465051802, "grad_norm": 0.9705934211972295, "learning_rate": 3.6201828264005614e-05, "loss": 0.6258, "step": 13875 }, { "epoch": 0.4050780679994163, "grad_norm": 1.0283730795958412, "learning_rate": 3.61918144328838e-05, "loss": 0.6136, "step": 13880 }, { "epoch": 0.4052239894936524, "grad_norm": 0.9397230067689413, "learning_rate": 3.6181798577710644e-05, "loss": 0.5573, "step": 13885 }, { "epoch": 0.4053699109878885, "grad_norm": 1.6922130444161771, "learning_rate": 3.617178070081856e-05, "loss": 0.6131, "step": 13890 }, { "epoch": 0.40551583248212464, "grad_norm": 1.0096660556690698, "learning_rate": 3.616176080454036e-05, "loss": 0.6192, "step": 13895 }, { "epoch": 0.40566175397636073, "grad_norm": 1.0451144359005133, "learning_rate": 3.6151738891209394e-05, "loss": 0.6493, "step": 13900 }, { "epoch": 0.4058076754705968, "grad_norm": 1.0319053984768454, "learning_rate": 3.614171496315944e-05, "loss": 0.5912, "step": 13905 }, { "epoch": 0.4059535969648329, "grad_norm": 0.8879732590190528, "learning_rate": 3.6131689022724764e-05, "loss": 0.6598, "step": 13910 }, { "epoch": 0.406099518459069, "grad_norm": 0.9775436158322994, "learning_rate": 3.6121661072240076e-05, "loss": 0.573, "step": 13915 }, { "epoch": 0.40624543995330514, "grad_norm": 1.083073518810078, "learning_rate": 3.611163111404059e-05, "loss": 0.5962, "step": 13920 }, { "epoch": 0.40639136144754123, "grad_norm": 1.0182637229742864, "learning_rate": 3.610159915046195e-05, "loss": 0.5993, "step": 13925 }, { "epoch": 0.4065372829417773, "grad_norm": 1.1678979149989746, "learning_rate": 3.6091565183840305e-05, "loss": 0.5442, "step": 13930 }, { "epoch": 0.4066832044360134, "grad_norm": 1.1390569990514423, "learning_rate": 3.608152921651224e-05, "loss": 0.6341, "step": 13935 }, { "epoch": 0.4068291259302495, "grad_norm": 1.0747654773090607, "learning_rate": 3.607149125081483e-05, "loss": 0.6095, "step": 13940 }, { "epoch": 0.40697504742448565, "grad_norm": 1.0630998548921433, "learning_rate": 3.606145128908558e-05, "loss": 0.5796, "step": 13945 }, { "epoch": 0.40712096891872174, "grad_norm": 1.023127529672732, "learning_rate": 3.6051409333662503e-05, "loss": 0.5564, "step": 13950 }, { "epoch": 0.4072668904129578, "grad_norm": 0.9636646909808338, "learning_rate": 3.6041365386884055e-05, "loss": 0.6323, "step": 13955 }, { "epoch": 0.4074128119071939, "grad_norm": 1.0851052170394584, "learning_rate": 3.603131945108915e-05, "loss": 0.6588, "step": 13960 }, { "epoch": 0.40755873340143, "grad_norm": 1.0394377436243798, "learning_rate": 3.602127152861717e-05, "loss": 0.6954, "step": 13965 }, { "epoch": 0.40770465489566615, "grad_norm": 1.195528184638518, "learning_rate": 3.601122162180796e-05, "loss": 0.6763, "step": 13970 }, { "epoch": 0.40785057638990224, "grad_norm": 0.9889296116803785, "learning_rate": 3.600116973300185e-05, "loss": 0.6158, "step": 13975 }, { "epoch": 0.40799649788413833, "grad_norm": 1.2071272670170186, "learning_rate": 3.599111586453957e-05, "loss": 0.6344, "step": 13980 }, { "epoch": 0.4081424193783744, "grad_norm": 1.0953133997781315, "learning_rate": 3.598106001876238e-05, "loss": 0.5506, "step": 13985 }, { "epoch": 0.4082883408726105, "grad_norm": 1.1085463464816405, "learning_rate": 3.597100219801197e-05, "loss": 0.6358, "step": 13990 }, { "epoch": 0.40843426236684666, "grad_norm": 0.8556361731608682, "learning_rate": 3.5960942404630497e-05, "loss": 0.5673, "step": 13995 }, { "epoch": 0.40858018386108275, "grad_norm": 1.1799374011645758, "learning_rate": 3.595088064096055e-05, "loss": 0.6093, "step": 14000 }, { "epoch": 0.40872610535531884, "grad_norm": 1.0177815942960753, "learning_rate": 3.594081690934521e-05, "loss": 0.5833, "step": 14005 }, { "epoch": 0.4088720268495549, "grad_norm": 1.1309553338843712, "learning_rate": 3.593075121212801e-05, "loss": 0.6023, "step": 14010 }, { "epoch": 0.409017948343791, "grad_norm": 0.9500253907375841, "learning_rate": 3.592068355165292e-05, "loss": 0.5598, "step": 14015 }, { "epoch": 0.40916386983802716, "grad_norm": 0.9973913241208755, "learning_rate": 3.59106139302644e-05, "loss": 0.5489, "step": 14020 }, { "epoch": 0.40930979133226325, "grad_norm": 1.0308698393981637, "learning_rate": 3.590054235030734e-05, "loss": 0.5778, "step": 14025 }, { "epoch": 0.40945571282649934, "grad_norm": 1.0117583811466544, "learning_rate": 3.589046881412711e-05, "loss": 0.5796, "step": 14030 }, { "epoch": 0.40960163432073543, "grad_norm": 1.00842219251295, "learning_rate": 3.58803933240695e-05, "loss": 0.6385, "step": 14035 }, { "epoch": 0.4097475558149715, "grad_norm": 1.1053934821537055, "learning_rate": 3.587031588248079e-05, "loss": 0.6243, "step": 14040 }, { "epoch": 0.40989347730920767, "grad_norm": 1.188126522612844, "learning_rate": 3.5860236491707686e-05, "loss": 0.6117, "step": 14045 }, { "epoch": 0.41003939880344376, "grad_norm": 1.0794860725213764, "learning_rate": 3.585015515409738e-05, "loss": 0.6048, "step": 14050 }, { "epoch": 0.41018532029767985, "grad_norm": 0.8496648965780355, "learning_rate": 3.5840071871997485e-05, "loss": 0.5895, "step": 14055 }, { "epoch": 0.41033124179191593, "grad_norm": 0.9699657408285617, "learning_rate": 3.58299866477561e-05, "loss": 0.6194, "step": 14060 }, { "epoch": 0.410477163286152, "grad_norm": 1.4890033837865122, "learning_rate": 3.581989948372175e-05, "loss": 0.6413, "step": 14065 }, { "epoch": 0.41062308478038817, "grad_norm": 0.9635370470138371, "learning_rate": 3.5809810382243395e-05, "loss": 0.6343, "step": 14070 }, { "epoch": 0.41076900627462426, "grad_norm": 0.9846049198950082, "learning_rate": 3.5799719345670514e-05, "loss": 0.5887, "step": 14075 }, { "epoch": 0.41091492776886035, "grad_norm": 1.1979670651968295, "learning_rate": 3.578962637635295e-05, "loss": 0.6117, "step": 14080 }, { "epoch": 0.41106084926309644, "grad_norm": 1.0409560722513993, "learning_rate": 3.5779531476641076e-05, "loss": 0.5827, "step": 14085 }, { "epoch": 0.41120677075733253, "grad_norm": 1.1542669307522804, "learning_rate": 3.576943464888566e-05, "loss": 0.5933, "step": 14090 }, { "epoch": 0.4113526922515687, "grad_norm": 1.1638165516107504, "learning_rate": 3.575933589543794e-05, "loss": 0.6201, "step": 14095 }, { "epoch": 0.41149861374580476, "grad_norm": 1.0427308156340584, "learning_rate": 3.57492352186496e-05, "loss": 0.5648, "step": 14100 }, { "epoch": 0.41164453524004085, "grad_norm": 1.0174082723047517, "learning_rate": 3.573913262087276e-05, "loss": 0.5764, "step": 14105 }, { "epoch": 0.41179045673427694, "grad_norm": 1.063599536068323, "learning_rate": 3.572902810446002e-05, "loss": 0.5934, "step": 14110 }, { "epoch": 0.41193637822851303, "grad_norm": 1.103158390089056, "learning_rate": 3.571892167176438e-05, "loss": 0.6199, "step": 14115 }, { "epoch": 0.4120822997227492, "grad_norm": 1.0748310281525493, "learning_rate": 3.570881332513933e-05, "loss": 0.5754, "step": 14120 }, { "epoch": 0.41222822121698527, "grad_norm": 1.1293010474353045, "learning_rate": 3.569870306693879e-05, "loss": 0.6522, "step": 14125 }, { "epoch": 0.41237414271122136, "grad_norm": 0.9065958344337489, "learning_rate": 3.568859089951711e-05, "loss": 0.5756, "step": 14130 }, { "epoch": 0.41252006420545745, "grad_norm": 1.078713814030312, "learning_rate": 3.567847682522911e-05, "loss": 0.6294, "step": 14135 }, { "epoch": 0.41266598569969354, "grad_norm": 1.0503517984580948, "learning_rate": 3.5668360846430025e-05, "loss": 0.5431, "step": 14140 }, { "epoch": 0.4128119071939297, "grad_norm": 1.05780503030239, "learning_rate": 3.5658242965475555e-05, "loss": 0.5895, "step": 14145 }, { "epoch": 0.4129578286881658, "grad_norm": 1.0798976387133552, "learning_rate": 3.564812318472185e-05, "loss": 0.6087, "step": 14150 }, { "epoch": 0.41310375018240186, "grad_norm": 1.0301023869953623, "learning_rate": 3.563800150652547e-05, "loss": 0.5844, "step": 14155 }, { "epoch": 0.41324967167663795, "grad_norm": 0.9661610362243159, "learning_rate": 3.562787793324346e-05, "loss": 0.5759, "step": 14160 }, { "epoch": 0.41339559317087404, "grad_norm": 1.2766466026450818, "learning_rate": 3.561775246723326e-05, "loss": 0.6638, "step": 14165 }, { "epoch": 0.4135415146651102, "grad_norm": 0.9713674514443876, "learning_rate": 3.560762511085279e-05, "loss": 0.6106, "step": 14170 }, { "epoch": 0.4136874361593463, "grad_norm": 1.1063337114226663, "learning_rate": 3.559749586646039e-05, "loss": 0.5737, "step": 14175 }, { "epoch": 0.41383335765358237, "grad_norm": 1.015296495620961, "learning_rate": 3.5587364736414834e-05, "loss": 0.6151, "step": 14180 }, { "epoch": 0.41397927914781846, "grad_norm": 1.097725490542968, "learning_rate": 3.557723172307536e-05, "loss": 0.6344, "step": 14185 }, { "epoch": 0.41412520064205455, "grad_norm": 1.060477601245144, "learning_rate": 3.556709682880162e-05, "loss": 0.5914, "step": 14190 }, { "epoch": 0.4142711221362907, "grad_norm": 0.9852390758343812, "learning_rate": 3.555696005595371e-05, "loss": 0.6329, "step": 14195 }, { "epoch": 0.4144170436305268, "grad_norm": 0.907624557008725, "learning_rate": 3.5546821406892186e-05, "loss": 0.527, "step": 14200 }, { "epoch": 0.41456296512476287, "grad_norm": 1.0041630200590868, "learning_rate": 3.5536680883978e-05, "loss": 0.586, "step": 14205 }, { "epoch": 0.41470888661899896, "grad_norm": 1.0940688591390892, "learning_rate": 3.552653848957257e-05, "loss": 0.6573, "step": 14210 }, { "epoch": 0.41485480811323505, "grad_norm": 1.1690224777847, "learning_rate": 3.551639422603774e-05, "loss": 0.5826, "step": 14215 }, { "epoch": 0.4150007296074712, "grad_norm": 0.9807015531500162, "learning_rate": 3.55062480957358e-05, "loss": 0.6108, "step": 14220 }, { "epoch": 0.4151466511017073, "grad_norm": 1.0355150883902318, "learning_rate": 3.5496100101029456e-05, "loss": 0.53, "step": 14225 }, { "epoch": 0.4152925725959434, "grad_norm": 1.1220035134010093, "learning_rate": 3.548595024428187e-05, "loss": 0.6176, "step": 14230 }, { "epoch": 0.41543849409017947, "grad_norm": 1.0633359663432378, "learning_rate": 3.54757985278566e-05, "loss": 0.616, "step": 14235 }, { "epoch": 0.4155844155844156, "grad_norm": 1.0282625079134138, "learning_rate": 3.5465644954117693e-05, "loss": 0.6616, "step": 14240 }, { "epoch": 0.4157303370786517, "grad_norm": 0.9918514711690857, "learning_rate": 3.545548952542959e-05, "loss": 0.5928, "step": 14245 }, { "epoch": 0.4158762585728878, "grad_norm": 1.1002923665765318, "learning_rate": 3.544533224415716e-05, "loss": 0.6624, "step": 14250 }, { "epoch": 0.4160221800671239, "grad_norm": 1.06023785845152, "learning_rate": 3.5435173112665725e-05, "loss": 0.5557, "step": 14255 }, { "epoch": 0.41616810156135997, "grad_norm": 1.0960143983061892, "learning_rate": 3.542501213332102e-05, "loss": 0.6125, "step": 14260 }, { "epoch": 0.4163140230555961, "grad_norm": 1.0249244061679479, "learning_rate": 3.5414849308489236e-05, "loss": 0.6244, "step": 14265 }, { "epoch": 0.4164599445498322, "grad_norm": 1.0323971363105802, "learning_rate": 3.540468464053696e-05, "loss": 0.6001, "step": 14270 }, { "epoch": 0.4166058660440683, "grad_norm": 0.9687706572251554, "learning_rate": 3.5394518131831236e-05, "loss": 0.5599, "step": 14275 }, { "epoch": 0.4167517875383044, "grad_norm": 0.9175495914932794, "learning_rate": 3.538434978473952e-05, "loss": 0.5871, "step": 14280 }, { "epoch": 0.4168977090325405, "grad_norm": 0.9022083755802284, "learning_rate": 3.5374179601629695e-05, "loss": 0.6408, "step": 14285 }, { "epoch": 0.4170436305267766, "grad_norm": 1.0055822122706368, "learning_rate": 3.53640075848701e-05, "loss": 0.6534, "step": 14290 }, { "epoch": 0.4171895520210127, "grad_norm": 1.0522571062849835, "learning_rate": 3.535383373682945e-05, "loss": 0.5886, "step": 14295 }, { "epoch": 0.4173354735152488, "grad_norm": 1.107140653670357, "learning_rate": 3.534365805987694e-05, "loss": 0.6334, "step": 14300 }, { "epoch": 0.4174813950094849, "grad_norm": 0.921656305829327, "learning_rate": 3.5333480556382145e-05, "loss": 0.5223, "step": 14305 }, { "epoch": 0.417627316503721, "grad_norm": 1.056612875777694, "learning_rate": 3.532330122871511e-05, "loss": 0.6025, "step": 14310 }, { "epoch": 0.4177732379979571, "grad_norm": 1.0299752674288745, "learning_rate": 3.531312007924626e-05, "loss": 0.6141, "step": 14315 }, { "epoch": 0.4179191594921932, "grad_norm": 1.104480192557672, "learning_rate": 3.530293711034648e-05, "loss": 0.6381, "step": 14320 }, { "epoch": 0.4180650809864293, "grad_norm": 0.9994627633885773, "learning_rate": 3.529275232438706e-05, "loss": 0.5619, "step": 14325 }, { "epoch": 0.4182110024806654, "grad_norm": 0.8397527690825002, "learning_rate": 3.528256572373972e-05, "loss": 0.5883, "step": 14330 }, { "epoch": 0.4183569239749015, "grad_norm": 1.3507100766803446, "learning_rate": 3.5272377310776587e-05, "loss": 0.6187, "step": 14335 }, { "epoch": 0.41850284546913763, "grad_norm": 1.1339780521382532, "learning_rate": 3.526218708787024e-05, "loss": 0.6476, "step": 14340 }, { "epoch": 0.4186487669633737, "grad_norm": 1.075417424478344, "learning_rate": 3.525199505739366e-05, "loss": 0.6113, "step": 14345 }, { "epoch": 0.4187946884576098, "grad_norm": 0.9655707341614594, "learning_rate": 3.524180122172025e-05, "loss": 0.6142, "step": 14350 }, { "epoch": 0.4189406099518459, "grad_norm": 1.0030324258943075, "learning_rate": 3.523160558322383e-05, "loss": 0.6348, "step": 14355 }, { "epoch": 0.419086531446082, "grad_norm": 1.124070642831555, "learning_rate": 3.522140814427864e-05, "loss": 0.5046, "step": 14360 }, { "epoch": 0.41923245294031813, "grad_norm": 1.22871033614499, "learning_rate": 3.5211208907259356e-05, "loss": 0.6182, "step": 14365 }, { "epoch": 0.4193783744345542, "grad_norm": 0.9811542020866325, "learning_rate": 3.5201007874541064e-05, "loss": 0.5788, "step": 14370 }, { "epoch": 0.4195242959287903, "grad_norm": 1.1851259092855806, "learning_rate": 3.519080504849925e-05, "loss": 0.5824, "step": 14375 }, { "epoch": 0.4196702174230264, "grad_norm": 1.0740830122313665, "learning_rate": 3.5180600431509856e-05, "loss": 0.5719, "step": 14380 }, { "epoch": 0.4198161389172625, "grad_norm": 0.8662558134425952, "learning_rate": 3.517039402594919e-05, "loss": 0.5678, "step": 14385 }, { "epoch": 0.41996206041149864, "grad_norm": 1.1409192121898903, "learning_rate": 3.5160185834194016e-05, "loss": 0.6169, "step": 14390 }, { "epoch": 0.42010798190573473, "grad_norm": 0.960539445868417, "learning_rate": 3.5149975858621516e-05, "loss": 0.5259, "step": 14395 }, { "epoch": 0.4202539033999708, "grad_norm": 0.9213290129950416, "learning_rate": 3.513976410160924e-05, "loss": 0.586, "step": 14400 }, { "epoch": 0.4203998248942069, "grad_norm": 1.282168223289508, "learning_rate": 3.5129550565535215e-05, "loss": 0.6768, "step": 14405 }, { "epoch": 0.420545746388443, "grad_norm": 0.9233702665083272, "learning_rate": 3.5119335252777844e-05, "loss": 0.619, "step": 14410 }, { "epoch": 0.42069166788267914, "grad_norm": 1.095082350014174, "learning_rate": 3.510911816571595e-05, "loss": 0.6691, "step": 14415 }, { "epoch": 0.42083758937691523, "grad_norm": 0.9325288171921042, "learning_rate": 3.509889930672879e-05, "loss": 0.5886, "step": 14420 }, { "epoch": 0.4209835108711513, "grad_norm": 0.8672598065256684, "learning_rate": 3.5088678678196e-05, "loss": 0.5769, "step": 14425 }, { "epoch": 0.4211294323653874, "grad_norm": 1.1388268128079726, "learning_rate": 3.507845628249765e-05, "loss": 0.6277, "step": 14430 }, { "epoch": 0.4212753538596235, "grad_norm": 0.8462210710162821, "learning_rate": 3.50682321220142e-05, "loss": 0.572, "step": 14435 }, { "epoch": 0.42142127535385965, "grad_norm": 1.0520908565130593, "learning_rate": 3.505800619912656e-05, "loss": 0.6226, "step": 14440 }, { "epoch": 0.42156719684809574, "grad_norm": 0.847584599913274, "learning_rate": 3.504777851621602e-05, "loss": 0.5856, "step": 14445 }, { "epoch": 0.4217131183423318, "grad_norm": 1.059548211609444, "learning_rate": 3.503754907566427e-05, "loss": 0.6076, "step": 14450 }, { "epoch": 0.4218590398365679, "grad_norm": 1.2293196163149804, "learning_rate": 3.502731787985345e-05, "loss": 0.69, "step": 14455 }, { "epoch": 0.422004961330804, "grad_norm": 1.072054277110249, "learning_rate": 3.501708493116608e-05, "loss": 0.5986, "step": 14460 }, { "epoch": 0.42215088282504015, "grad_norm": 0.9702137632458881, "learning_rate": 3.500685023198508e-05, "loss": 0.5334, "step": 14465 }, { "epoch": 0.42229680431927624, "grad_norm": 0.8771242097362763, "learning_rate": 3.49966137846938e-05, "loss": 0.5643, "step": 14470 }, { "epoch": 0.42244272581351233, "grad_norm": 0.946588377018901, "learning_rate": 3.498637559167599e-05, "loss": 0.5689, "step": 14475 }, { "epoch": 0.4225886473077484, "grad_norm": 1.1403335673419035, "learning_rate": 3.49761356553158e-05, "loss": 0.5718, "step": 14480 }, { "epoch": 0.4227345688019845, "grad_norm": 1.1402872598041047, "learning_rate": 3.496589397799779e-05, "loss": 0.5776, "step": 14485 }, { "epoch": 0.42288049029622066, "grad_norm": 1.0738909364394391, "learning_rate": 3.495565056210693e-05, "loss": 0.6311, "step": 14490 }, { "epoch": 0.42302641179045675, "grad_norm": 0.9592686697343905, "learning_rate": 3.4945405410028605e-05, "loss": 0.6275, "step": 14495 }, { "epoch": 0.42317233328469284, "grad_norm": 0.8961623619200217, "learning_rate": 3.493515852414855e-05, "loss": 0.604, "step": 14500 }, { "epoch": 0.4233182547789289, "grad_norm": 0.9660838086441447, "learning_rate": 3.492490990685298e-05, "loss": 0.5785, "step": 14505 }, { "epoch": 0.423464176273165, "grad_norm": 1.177025120962537, "learning_rate": 3.491465956052846e-05, "loss": 0.6541, "step": 14510 }, { "epoch": 0.42361009776740116, "grad_norm": 0.9137641572032487, "learning_rate": 3.4904407487561987e-05, "loss": 0.5901, "step": 14515 }, { "epoch": 0.42375601926163725, "grad_norm": 0.965328698268098, "learning_rate": 3.4894153690340926e-05, "loss": 0.6251, "step": 14520 }, { "epoch": 0.42390194075587334, "grad_norm": 1.0448285479062196, "learning_rate": 3.488389817125309e-05, "loss": 0.6183, "step": 14525 }, { "epoch": 0.42404786225010943, "grad_norm": 1.0833674364203354, "learning_rate": 3.487364093268666e-05, "loss": 0.553, "step": 14530 }, { "epoch": 0.4241937837443455, "grad_norm": 1.1287078322651714, "learning_rate": 3.486338197703021e-05, "loss": 0.6124, "step": 14535 }, { "epoch": 0.42433970523858167, "grad_norm": 1.0564906310867284, "learning_rate": 3.485312130667275e-05, "loss": 0.5833, "step": 14540 }, { "epoch": 0.42448562673281776, "grad_norm": 1.0656181074987345, "learning_rate": 3.4842858924003654e-05, "loss": 0.6534, "step": 14545 }, { "epoch": 0.42463154822705385, "grad_norm": 1.048484757568297, "learning_rate": 3.4832594831412724e-05, "loss": 0.5974, "step": 14550 }, { "epoch": 0.42477746972128994, "grad_norm": 0.9794724082955425, "learning_rate": 3.482232903129013e-05, "loss": 0.6469, "step": 14555 }, { "epoch": 0.424923391215526, "grad_norm": 1.0589184614388112, "learning_rate": 3.481206152602647e-05, "loss": 0.6166, "step": 14560 }, { "epoch": 0.42506931270976217, "grad_norm": 0.964708633393379, "learning_rate": 3.480179231801272e-05, "loss": 0.5767, "step": 14565 }, { "epoch": 0.42521523420399826, "grad_norm": 0.9952948771382572, "learning_rate": 3.4791521409640256e-05, "loss": 0.5994, "step": 14570 }, { "epoch": 0.42536115569823435, "grad_norm": 0.8714708249182996, "learning_rate": 3.478124880330084e-05, "loss": 0.5989, "step": 14575 }, { "epoch": 0.42550707719247044, "grad_norm": 1.2000549971076717, "learning_rate": 3.477097450138666e-05, "loss": 0.6266, "step": 14580 }, { "epoch": 0.42565299868670653, "grad_norm": 1.054526289381861, "learning_rate": 3.476069850629026e-05, "loss": 0.6354, "step": 14585 }, { "epoch": 0.4257989201809427, "grad_norm": 1.1340311171921678, "learning_rate": 3.475042082040461e-05, "loss": 0.6224, "step": 14590 }, { "epoch": 0.42594484167517876, "grad_norm": 1.2247781164857576, "learning_rate": 3.4740141446123046e-05, "loss": 0.6242, "step": 14595 }, { "epoch": 0.42609076316941485, "grad_norm": 1.2902071767432588, "learning_rate": 3.472986038583934e-05, "loss": 0.5977, "step": 14600 }, { "epoch": 0.42623668466365094, "grad_norm": 0.9240076849811838, "learning_rate": 3.471957764194761e-05, "loss": 0.5447, "step": 14605 }, { "epoch": 0.42638260615788703, "grad_norm": 0.9232150807016487, "learning_rate": 3.4709293216842383e-05, "loss": 0.5634, "step": 14610 }, { "epoch": 0.4265285276521232, "grad_norm": 1.44191113136554, "learning_rate": 3.469900711291858e-05, "loss": 0.5829, "step": 14615 }, { "epoch": 0.42667444914635927, "grad_norm": 1.0753292628866873, "learning_rate": 3.4688719332571526e-05, "loss": 0.6083, "step": 14620 }, { "epoch": 0.42682037064059536, "grad_norm": 1.0144987080420538, "learning_rate": 3.4678429878196905e-05, "loss": 0.698, "step": 14625 }, { "epoch": 0.42696629213483145, "grad_norm": 1.0989524795214847, "learning_rate": 3.4668138752190815e-05, "loss": 0.6716, "step": 14630 }, { "epoch": 0.42711221362906754, "grad_norm": 1.1709035506008318, "learning_rate": 3.4657845956949745e-05, "loss": 0.6411, "step": 14635 }, { "epoch": 0.4272581351233037, "grad_norm": 1.1138648921056695, "learning_rate": 3.464755149487056e-05, "loss": 0.5775, "step": 14640 }, { "epoch": 0.4274040566175398, "grad_norm": 1.1812056106658904, "learning_rate": 3.463725536835051e-05, "loss": 0.5672, "step": 14645 }, { "epoch": 0.42754997811177586, "grad_norm": 1.2241758595708385, "learning_rate": 3.4626957579787244e-05, "loss": 0.6744, "step": 14650 }, { "epoch": 0.42769589960601195, "grad_norm": 0.9480877642606028, "learning_rate": 3.4616658131578805e-05, "loss": 0.6246, "step": 14655 }, { "epoch": 0.42784182110024804, "grad_norm": 1.0003426543032912, "learning_rate": 3.46063570261236e-05, "loss": 0.6176, "step": 14660 }, { "epoch": 0.4279877425944842, "grad_norm": 0.9356697760347451, "learning_rate": 3.459605426582043e-05, "loss": 0.5599, "step": 14665 }, { "epoch": 0.4281336640887203, "grad_norm": 1.1163592928833672, "learning_rate": 3.45857498530685e-05, "loss": 0.5844, "step": 14670 }, { "epoch": 0.42827958558295637, "grad_norm": 1.0829333685346898, "learning_rate": 3.457544379026738e-05, "loss": 0.6072, "step": 14675 }, { "epoch": 0.42842550707719246, "grad_norm": 1.082003619700801, "learning_rate": 3.456513607981703e-05, "loss": 0.607, "step": 14680 }, { "epoch": 0.42857142857142855, "grad_norm": 0.9186345387077305, "learning_rate": 3.455482672411778e-05, "loss": 0.6095, "step": 14685 }, { "epoch": 0.4287173500656647, "grad_norm": 0.961724746355123, "learning_rate": 3.454451572557036e-05, "loss": 0.528, "step": 14690 }, { "epoch": 0.4288632715599008, "grad_norm": 0.9231988902429026, "learning_rate": 3.45342030865759e-05, "loss": 0.58, "step": 14695 }, { "epoch": 0.4290091930541369, "grad_norm": 1.067185859881822, "learning_rate": 3.452388880953587e-05, "loss": 0.6824, "step": 14700 }, { "epoch": 0.42915511454837296, "grad_norm": 0.9610803245485077, "learning_rate": 3.451357289685214e-05, "loss": 0.5487, "step": 14705 }, { "epoch": 0.42930103604260905, "grad_norm": 1.1642097834681657, "learning_rate": 3.450325535092698e-05, "loss": 0.6231, "step": 14710 }, { "epoch": 0.4294469575368452, "grad_norm": 1.031502523325777, "learning_rate": 3.449293617416301e-05, "loss": 0.6245, "step": 14715 }, { "epoch": 0.4295928790310813, "grad_norm": 1.2699125631894317, "learning_rate": 3.448261536896324e-05, "loss": 0.6418, "step": 14720 }, { "epoch": 0.4297388005253174, "grad_norm": 0.9215147245324534, "learning_rate": 3.4472292937731064e-05, "loss": 0.6082, "step": 14725 }, { "epoch": 0.42988472201955347, "grad_norm": 1.134100042377912, "learning_rate": 3.4461968882870277e-05, "loss": 0.582, "step": 14730 }, { "epoch": 0.43003064351378956, "grad_norm": 1.0550186252394929, "learning_rate": 3.4451643206784986e-05, "loss": 0.5555, "step": 14735 }, { "epoch": 0.4301765650080257, "grad_norm": 0.9470831240385473, "learning_rate": 3.4441315911879756e-05, "loss": 0.5782, "step": 14740 }, { "epoch": 0.4303224865022618, "grad_norm": 0.8935949165620442, "learning_rate": 3.443098700055947e-05, "loss": 0.5637, "step": 14745 }, { "epoch": 0.4304684079964979, "grad_norm": 0.9960770842223797, "learning_rate": 3.442065647522941e-05, "loss": 0.6152, "step": 14750 }, { "epoch": 0.43061432949073397, "grad_norm": 0.8686976779723603, "learning_rate": 3.441032433829523e-05, "loss": 0.6471, "step": 14755 }, { "epoch": 0.43076025098497006, "grad_norm": 1.2142185733703368, "learning_rate": 3.439999059216297e-05, "loss": 0.6019, "step": 14760 }, { "epoch": 0.4309061724792062, "grad_norm": 0.9941572809716894, "learning_rate": 3.438965523923903e-05, "loss": 0.5698, "step": 14765 }, { "epoch": 0.4310520939734423, "grad_norm": 1.1751590566908852, "learning_rate": 3.437931828193019e-05, "loss": 0.6197, "step": 14770 }, { "epoch": 0.4311980154676784, "grad_norm": 1.1017336752785598, "learning_rate": 3.436897972264361e-05, "loss": 0.5696, "step": 14775 }, { "epoch": 0.4313439369619145, "grad_norm": 0.9761712375978427, "learning_rate": 3.4358639563786803e-05, "loss": 0.5918, "step": 14780 }, { "epoch": 0.43148985845615057, "grad_norm": 0.9627952198204527, "learning_rate": 3.434829780776769e-05, "loss": 0.5748, "step": 14785 }, { "epoch": 0.4316357799503867, "grad_norm": 1.0358676092014028, "learning_rate": 3.433795445699451e-05, "loss": 0.5875, "step": 14790 }, { "epoch": 0.4317817014446228, "grad_norm": 1.2272333712935142, "learning_rate": 3.432760951387593e-05, "loss": 0.6019, "step": 14795 }, { "epoch": 0.4319276229388589, "grad_norm": 1.1675025999424626, "learning_rate": 3.4317262980820955e-05, "loss": 0.6488, "step": 14800 }, { "epoch": 0.432073544433095, "grad_norm": 1.2682792520499573, "learning_rate": 3.430691486023897e-05, "loss": 0.5887, "step": 14805 }, { "epoch": 0.43221946592733107, "grad_norm": 1.050471902364545, "learning_rate": 3.4296565154539735e-05, "loss": 0.5445, "step": 14810 }, { "epoch": 0.4323653874215672, "grad_norm": 1.0074986947221647, "learning_rate": 3.428621386613336e-05, "loss": 0.6325, "step": 14815 }, { "epoch": 0.4325113089158033, "grad_norm": 1.0463143726485051, "learning_rate": 3.427586099743034e-05, "loss": 0.6451, "step": 14820 }, { "epoch": 0.4326572304100394, "grad_norm": 1.171815048880382, "learning_rate": 3.426550655084154e-05, "loss": 0.6042, "step": 14825 }, { "epoch": 0.4328031519042755, "grad_norm": 0.8735331935608498, "learning_rate": 3.4255150528778186e-05, "loss": 0.6063, "step": 14830 }, { "epoch": 0.4329490733985116, "grad_norm": 1.1073257796844498, "learning_rate": 3.424479293365186e-05, "loss": 0.6162, "step": 14835 }, { "epoch": 0.4330949948927477, "grad_norm": 1.0463564294646084, "learning_rate": 3.423443376787452e-05, "loss": 0.664, "step": 14840 }, { "epoch": 0.4332409163869838, "grad_norm": 0.9710156166509877, "learning_rate": 3.422407303385851e-05, "loss": 0.6058, "step": 14845 }, { "epoch": 0.4333868378812199, "grad_norm": 1.1115312949177574, "learning_rate": 3.421371073401651e-05, "loss": 0.6342, "step": 14850 }, { "epoch": 0.433532759375456, "grad_norm": 0.9612200796972009, "learning_rate": 3.420334687076157e-05, "loss": 0.5573, "step": 14855 }, { "epoch": 0.4336786808696921, "grad_norm": 0.8982438896090172, "learning_rate": 3.419298144650712e-05, "loss": 0.6045, "step": 14860 }, { "epoch": 0.4338246023639282, "grad_norm": 1.27416370256648, "learning_rate": 3.418261446366693e-05, "loss": 0.6287, "step": 14865 }, { "epoch": 0.4339705238581643, "grad_norm": 1.1327345581771062, "learning_rate": 3.417224592465516e-05, "loss": 0.6665, "step": 14870 }, { "epoch": 0.4341164453524004, "grad_norm": 1.2935850454568887, "learning_rate": 3.4161875831886305e-05, "loss": 0.6055, "step": 14875 }, { "epoch": 0.4342623668466365, "grad_norm": 1.0063549724489365, "learning_rate": 3.415150418777524e-05, "loss": 0.5448, "step": 14880 }, { "epoch": 0.4344082883408726, "grad_norm": 0.9395070160447475, "learning_rate": 3.41411309947372e-05, "loss": 0.5964, "step": 14885 }, { "epoch": 0.43455420983510873, "grad_norm": 1.0128619887098098, "learning_rate": 3.4130756255187766e-05, "loss": 0.5398, "step": 14890 }, { "epoch": 0.4347001313293448, "grad_norm": 1.1392782819000082, "learning_rate": 3.41203799715429e-05, "loss": 0.598, "step": 14895 }, { "epoch": 0.4348460528235809, "grad_norm": 1.0167659566556178, "learning_rate": 3.411000214621891e-05, "loss": 0.6286, "step": 14900 }, { "epoch": 0.434991974317817, "grad_norm": 0.9279072705353846, "learning_rate": 3.4099622781632464e-05, "loss": 0.5561, "step": 14905 }, { "epoch": 0.4351378958120531, "grad_norm": 0.9722983712918133, "learning_rate": 3.4089241880200593e-05, "loss": 0.5854, "step": 14910 }, { "epoch": 0.43528381730628923, "grad_norm": 1.047881147011593, "learning_rate": 3.407885944434068e-05, "loss": 0.6314, "step": 14915 }, { "epoch": 0.4354297388005253, "grad_norm": 0.9253653074681496, "learning_rate": 3.406847547647049e-05, "loss": 0.6362, "step": 14920 }, { "epoch": 0.4355756602947614, "grad_norm": 1.0614789017596018, "learning_rate": 3.4058089979008095e-05, "loss": 0.585, "step": 14925 }, { "epoch": 0.4357215817889975, "grad_norm": 1.0619885524342625, "learning_rate": 3.404770295437197e-05, "loss": 0.6204, "step": 14930 }, { "epoch": 0.4358675032832336, "grad_norm": 0.9489630508453383, "learning_rate": 3.403731440498092e-05, "loss": 0.513, "step": 14935 }, { "epoch": 0.43601342477746974, "grad_norm": 0.9737341109412176, "learning_rate": 3.402692433325412e-05, "loss": 0.6417, "step": 14940 }, { "epoch": 0.43615934627170583, "grad_norm": 1.0447307011395082, "learning_rate": 3.401653274161107e-05, "loss": 0.5915, "step": 14945 }, { "epoch": 0.4363052677659419, "grad_norm": 0.9676710840373857, "learning_rate": 3.4006139632471675e-05, "loss": 0.5659, "step": 14950 }, { "epoch": 0.436451189260178, "grad_norm": 0.9253596332734276, "learning_rate": 3.3995745008256146e-05, "loss": 0.5598, "step": 14955 }, { "epoch": 0.43659711075441415, "grad_norm": 1.0179512650412093, "learning_rate": 3.398534887138508e-05, "loss": 0.5717, "step": 14960 }, { "epoch": 0.43674303224865024, "grad_norm": 1.041646236743334, "learning_rate": 3.39749512242794e-05, "loss": 0.5489, "step": 14965 }, { "epoch": 0.43688895374288633, "grad_norm": 0.9942978016835857, "learning_rate": 3.39645520693604e-05, "loss": 0.5674, "step": 14970 }, { "epoch": 0.4370348752371224, "grad_norm": 1.0022983503636906, "learning_rate": 3.395415140904971e-05, "loss": 0.567, "step": 14975 }, { "epoch": 0.4371807967313585, "grad_norm": 1.0642492384656053, "learning_rate": 3.394374924576932e-05, "loss": 0.6156, "step": 14980 }, { "epoch": 0.43732671822559466, "grad_norm": 0.9379956570349489, "learning_rate": 3.3933345581941564e-05, "loss": 0.6172, "step": 14985 }, { "epoch": 0.43747263971983075, "grad_norm": 0.966830581931971, "learning_rate": 3.392294041998914e-05, "loss": 0.6503, "step": 14990 }, { "epoch": 0.43761856121406684, "grad_norm": 0.9487043663364297, "learning_rate": 3.391253376233508e-05, "loss": 0.6219, "step": 14995 }, { "epoch": 0.4377644827083029, "grad_norm": 1.3401538829977264, "learning_rate": 3.390212561140277e-05, "loss": 0.6602, "step": 15000 }, { "epoch": 0.437910404202539, "grad_norm": 1.082827187229185, "learning_rate": 3.389171596961594e-05, "loss": 0.6024, "step": 15005 }, { "epoch": 0.43805632569677516, "grad_norm": 1.2588159317785443, "learning_rate": 3.388130483939867e-05, "loss": 0.6019, "step": 15010 }, { "epoch": 0.43820224719101125, "grad_norm": 0.8167574099841444, "learning_rate": 3.387089222317538e-05, "loss": 0.5474, "step": 15015 }, { "epoch": 0.43834816868524734, "grad_norm": 1.0582371762697391, "learning_rate": 3.386047812337085e-05, "loss": 0.5479, "step": 15020 }, { "epoch": 0.43849409017948343, "grad_norm": 1.2014907263197525, "learning_rate": 3.385006254241019e-05, "loss": 0.6313, "step": 15025 }, { "epoch": 0.4386400116737195, "grad_norm": 0.922362904638179, "learning_rate": 3.3839645482718876e-05, "loss": 0.5868, "step": 15030 }, { "epoch": 0.43878593316795567, "grad_norm": 0.9909309871937789, "learning_rate": 3.382922694672272e-05, "loss": 0.5811, "step": 15035 }, { "epoch": 0.43893185466219176, "grad_norm": 0.8915043010921688, "learning_rate": 3.381880693684784e-05, "loss": 0.5946, "step": 15040 }, { "epoch": 0.43907777615642785, "grad_norm": 1.014321636298506, "learning_rate": 3.380838545552075e-05, "loss": 0.5328, "step": 15045 }, { "epoch": 0.43922369765066394, "grad_norm": 0.9783062774969534, "learning_rate": 3.3797962505168295e-05, "loss": 0.6209, "step": 15050 }, { "epoch": 0.4393696191449, "grad_norm": 1.0502065097108184, "learning_rate": 3.378753808821764e-05, "loss": 0.5312, "step": 15055 }, { "epoch": 0.43951554063913617, "grad_norm": 0.9095734806730934, "learning_rate": 3.3777112207096315e-05, "loss": 0.5806, "step": 15060 }, { "epoch": 0.43966146213337226, "grad_norm": 1.1093471595482236, "learning_rate": 3.376668486423216e-05, "loss": 0.6112, "step": 15065 }, { "epoch": 0.43980738362760835, "grad_norm": 1.1927504053347295, "learning_rate": 3.375625606205342e-05, "loss": 0.6168, "step": 15070 }, { "epoch": 0.43995330512184444, "grad_norm": 1.007807674891563, "learning_rate": 3.374582580298859e-05, "loss": 0.6941, "step": 15075 }, { "epoch": 0.44009922661608053, "grad_norm": 1.1135702503202363, "learning_rate": 3.3735394089466574e-05, "loss": 0.596, "step": 15080 }, { "epoch": 0.4402451481103167, "grad_norm": 0.8416660886898341, "learning_rate": 3.372496092391659e-05, "loss": 0.6069, "step": 15085 }, { "epoch": 0.44039106960455277, "grad_norm": 0.9626430180071739, "learning_rate": 3.3714526308768177e-05, "loss": 0.6245, "step": 15090 }, { "epoch": 0.44053699109878885, "grad_norm": 0.9345578557755848, "learning_rate": 3.3704090246451264e-05, "loss": 0.596, "step": 15095 }, { "epoch": 0.44068291259302494, "grad_norm": 1.1071101407204926, "learning_rate": 3.3693652739396054e-05, "loss": 0.6044, "step": 15100 }, { "epoch": 0.44082883408726103, "grad_norm": 1.0470767070671643, "learning_rate": 3.368321379003313e-05, "loss": 0.5579, "step": 15105 }, { "epoch": 0.4409747555814972, "grad_norm": 1.096572528531247, "learning_rate": 3.367277340079339e-05, "loss": 0.5821, "step": 15110 }, { "epoch": 0.44112067707573327, "grad_norm": 0.9463021632800804, "learning_rate": 3.366233157410808e-05, "loss": 0.5732, "step": 15115 }, { "epoch": 0.44126659856996936, "grad_norm": 0.9886869477006601, "learning_rate": 3.3651888312408766e-05, "loss": 0.6171, "step": 15120 }, { "epoch": 0.44141252006420545, "grad_norm": 0.9817643653603855, "learning_rate": 3.3641443618127366e-05, "loss": 0.6247, "step": 15125 }, { "epoch": 0.44155844155844154, "grad_norm": 1.042498855092766, "learning_rate": 3.363099749369612e-05, "loss": 0.586, "step": 15130 }, { "epoch": 0.4417043630526777, "grad_norm": 0.9696650778195526, "learning_rate": 3.362054994154758e-05, "loss": 0.6047, "step": 15135 }, { "epoch": 0.4418502845469138, "grad_norm": 1.0350340472103092, "learning_rate": 3.361010096411469e-05, "loss": 0.574, "step": 15140 }, { "epoch": 0.44199620604114986, "grad_norm": 1.0501103392445714, "learning_rate": 3.359965056383068e-05, "loss": 0.5968, "step": 15145 }, { "epoch": 0.44214212753538595, "grad_norm": 0.9489985685780461, "learning_rate": 3.3589198743129104e-05, "loss": 0.5949, "step": 15150 }, { "epoch": 0.44228804902962204, "grad_norm": 0.9935727131012589, "learning_rate": 3.357874550444388e-05, "loss": 0.6468, "step": 15155 }, { "epoch": 0.4424339705238582, "grad_norm": 1.1224827913390254, "learning_rate": 3.356829085020922e-05, "loss": 0.6021, "step": 15160 }, { "epoch": 0.4425798920180943, "grad_norm": 1.112257596305318, "learning_rate": 3.355783478285971e-05, "loss": 0.6193, "step": 15165 }, { "epoch": 0.44272581351233037, "grad_norm": 1.0067536116452678, "learning_rate": 3.354737730483023e-05, "loss": 0.6309, "step": 15170 }, { "epoch": 0.44287173500656646, "grad_norm": 1.036330841820235, "learning_rate": 3.353691841855599e-05, "loss": 0.5378, "step": 15175 }, { "epoch": 0.44301765650080255, "grad_norm": 0.9705157807120757, "learning_rate": 3.352645812647256e-05, "loss": 0.6129, "step": 15180 }, { "epoch": 0.4431635779950387, "grad_norm": 0.9550643259820844, "learning_rate": 3.351599643101579e-05, "loss": 0.5603, "step": 15185 }, { "epoch": 0.4433094994892748, "grad_norm": 0.938996838157557, "learning_rate": 3.3505533334621894e-05, "loss": 0.5646, "step": 15190 }, { "epoch": 0.4434554209835109, "grad_norm": 0.9569242456275914, "learning_rate": 3.349506883972739e-05, "loss": 0.565, "step": 15195 }, { "epoch": 0.44360134247774696, "grad_norm": 1.024650959083687, "learning_rate": 3.348460294876915e-05, "loss": 0.5885, "step": 15200 }, { "epoch": 0.44374726397198305, "grad_norm": 0.982234310306115, "learning_rate": 3.347413566418434e-05, "loss": 0.5601, "step": 15205 }, { "epoch": 0.4438931854662192, "grad_norm": 0.9407601324461912, "learning_rate": 3.3463666988410454e-05, "loss": 0.5952, "step": 15210 }, { "epoch": 0.4440391069604553, "grad_norm": 1.061224155639477, "learning_rate": 3.345319692388533e-05, "loss": 0.6029, "step": 15215 }, { "epoch": 0.4441850284546914, "grad_norm": 1.0250934349925327, "learning_rate": 3.344272547304712e-05, "loss": 0.6459, "step": 15220 }, { "epoch": 0.44433094994892747, "grad_norm": 0.9277423655168385, "learning_rate": 3.3432252638334285e-05, "loss": 0.5841, "step": 15225 }, { "epoch": 0.44447687144316356, "grad_norm": 1.0519419008985296, "learning_rate": 3.3421778422185635e-05, "loss": 0.5901, "step": 15230 }, { "epoch": 0.4446227929373997, "grad_norm": 1.091299003832944, "learning_rate": 3.3411302827040275e-05, "loss": 0.6318, "step": 15235 }, { "epoch": 0.4447687144316358, "grad_norm": 1.1553049277039817, "learning_rate": 3.340082585533765e-05, "loss": 0.6027, "step": 15240 }, { "epoch": 0.4449146359258719, "grad_norm": 0.9515248979775799, "learning_rate": 3.339034750951751e-05, "loss": 0.5822, "step": 15245 }, { "epoch": 0.44506055742010797, "grad_norm": 0.8411647701571835, "learning_rate": 3.3379867792019945e-05, "loss": 0.6079, "step": 15250 }, { "epoch": 0.44520647891434406, "grad_norm": 1.0222166659217409, "learning_rate": 3.3369386705285346e-05, "loss": 0.6314, "step": 15255 }, { "epoch": 0.4453524004085802, "grad_norm": 1.0955096604615973, "learning_rate": 3.3358904251754436e-05, "loss": 0.5889, "step": 15260 }, { "epoch": 0.4454983219028163, "grad_norm": 0.9660014397522572, "learning_rate": 3.3348420433868235e-05, "loss": 0.5217, "step": 15265 }, { "epoch": 0.4456442433970524, "grad_norm": 1.0649112114660237, "learning_rate": 3.3337935254068116e-05, "loss": 0.642, "step": 15270 }, { "epoch": 0.4457901648912885, "grad_norm": 1.0646489450179117, "learning_rate": 3.332744871479573e-05, "loss": 0.64, "step": 15275 }, { "epoch": 0.44593608638552457, "grad_norm": 1.0210810091645277, "learning_rate": 3.331696081849308e-05, "loss": 0.5859, "step": 15280 }, { "epoch": 0.4460820078797607, "grad_norm": 1.1013563955223056, "learning_rate": 3.3306471567602454e-05, "loss": 0.5398, "step": 15285 }, { "epoch": 0.4462279293739968, "grad_norm": 0.9582345179102, "learning_rate": 3.329598096456648e-05, "loss": 0.5985, "step": 15290 }, { "epoch": 0.4463738508682329, "grad_norm": 1.4801209189499522, "learning_rate": 3.32854890118281e-05, "loss": 0.6338, "step": 15295 }, { "epoch": 0.446519772362469, "grad_norm": 1.0595813889613248, "learning_rate": 3.327499571183054e-05, "loss": 0.6071, "step": 15300 }, { "epoch": 0.44666569385670507, "grad_norm": 0.9701932680779322, "learning_rate": 3.3264501067017365e-05, "loss": 0.5698, "step": 15305 }, { "epoch": 0.4468116153509412, "grad_norm": 0.9691293912955153, "learning_rate": 3.325400507983245e-05, "loss": 0.6545, "step": 15310 }, { "epoch": 0.4469575368451773, "grad_norm": 1.3766902005369608, "learning_rate": 3.3243507752719996e-05, "loss": 0.5278, "step": 15315 }, { "epoch": 0.4471034583394134, "grad_norm": 1.0940274184668297, "learning_rate": 3.32330090881245e-05, "loss": 0.5736, "step": 15320 }, { "epoch": 0.4472493798336495, "grad_norm": 0.9003880547172795, "learning_rate": 3.3222509088490745e-05, "loss": 0.6181, "step": 15325 }, { "epoch": 0.4473953013278856, "grad_norm": 1.221717558449494, "learning_rate": 3.3212007756263886e-05, "loss": 0.5919, "step": 15330 }, { "epoch": 0.4475412228221217, "grad_norm": 0.978352758371311, "learning_rate": 3.3201505093889324e-05, "loss": 0.6969, "step": 15335 }, { "epoch": 0.4476871443163578, "grad_norm": 1.0808389494346902, "learning_rate": 3.3191001103812816e-05, "loss": 0.594, "step": 15340 }, { "epoch": 0.4478330658105939, "grad_norm": 0.968828869070613, "learning_rate": 3.318049578848042e-05, "loss": 0.5706, "step": 15345 }, { "epoch": 0.44797898730483, "grad_norm": 1.4936361212984097, "learning_rate": 3.3169989150338474e-05, "loss": 0.6418, "step": 15350 }, { "epoch": 0.4481249087990661, "grad_norm": 1.1277461384053353, "learning_rate": 3.315948119183367e-05, "loss": 0.5557, "step": 15355 }, { "epoch": 0.4482708302933022, "grad_norm": 1.0174018532199776, "learning_rate": 3.314897191541297e-05, "loss": 0.5855, "step": 15360 }, { "epoch": 0.4484167517875383, "grad_norm": 1.0746846227499856, "learning_rate": 3.313846132352365e-05, "loss": 0.6264, "step": 15365 }, { "epoch": 0.4485626732817744, "grad_norm": 1.0494340328477192, "learning_rate": 3.3127949418613304e-05, "loss": 0.5639, "step": 15370 }, { "epoch": 0.4487085947760105, "grad_norm": 0.9646150372309438, "learning_rate": 3.311743620312982e-05, "loss": 0.6229, "step": 15375 }, { "epoch": 0.4488545162702466, "grad_norm": 1.0103380082583522, "learning_rate": 3.3106921679521405e-05, "loss": 0.5708, "step": 15380 }, { "epoch": 0.44900043776448273, "grad_norm": 0.9699920865509774, "learning_rate": 3.309640585023655e-05, "loss": 0.6199, "step": 15385 }, { "epoch": 0.4491463592587188, "grad_norm": 0.9405598917719894, "learning_rate": 3.308588871772408e-05, "loss": 0.5522, "step": 15390 }, { "epoch": 0.4492922807529549, "grad_norm": 1.1340629498309085, "learning_rate": 3.307537028443309e-05, "loss": 0.5464, "step": 15395 }, { "epoch": 0.449438202247191, "grad_norm": 1.263708817457306, "learning_rate": 3.3064850552812996e-05, "loss": 0.6057, "step": 15400 }, { "epoch": 0.4495841237414271, "grad_norm": 0.9614440944512271, "learning_rate": 3.305432952531352e-05, "loss": 0.5802, "step": 15405 }, { "epoch": 0.44973004523566323, "grad_norm": 1.0618567661042626, "learning_rate": 3.304380720438468e-05, "loss": 0.6304, "step": 15410 }, { "epoch": 0.4498759667298993, "grad_norm": 1.4074500590171208, "learning_rate": 3.3033283592476786e-05, "loss": 0.646, "step": 15415 }, { "epoch": 0.4500218882241354, "grad_norm": 1.069658930890198, "learning_rate": 3.302275869204047e-05, "loss": 0.577, "step": 15420 }, { "epoch": 0.4501678097183715, "grad_norm": 1.2068179667633927, "learning_rate": 3.301223250552664e-05, "loss": 0.637, "step": 15425 }, { "epoch": 0.4503137312126076, "grad_norm": 1.1090883088641412, "learning_rate": 3.300170503538653e-05, "loss": 0.6254, "step": 15430 }, { "epoch": 0.45045965270684374, "grad_norm": 0.9145535769414993, "learning_rate": 3.299117628407163e-05, "loss": 0.6333, "step": 15435 }, { "epoch": 0.45060557420107983, "grad_norm": 1.0470490795968856, "learning_rate": 3.2980646254033787e-05, "loss": 0.6173, "step": 15440 }, { "epoch": 0.4507514956953159, "grad_norm": 0.9414471742592865, "learning_rate": 3.29701149477251e-05, "loss": 0.576, "step": 15445 }, { "epoch": 0.450897417189552, "grad_norm": 1.027438451466081, "learning_rate": 3.295958236759799e-05, "loss": 0.6058, "step": 15450 }, { "epoch": 0.4510433386837881, "grad_norm": 1.3833992470231662, "learning_rate": 3.294904851610514e-05, "loss": 0.6049, "step": 15455 }, { "epoch": 0.45118926017802424, "grad_norm": 1.2449868103003836, "learning_rate": 3.2938513395699585e-05, "loss": 0.5943, "step": 15460 }, { "epoch": 0.45133518167226033, "grad_norm": 0.9851991930409953, "learning_rate": 3.292797700883461e-05, "loss": 0.5943, "step": 15465 }, { "epoch": 0.4514811031664964, "grad_norm": 1.0407686194741652, "learning_rate": 3.291743935796381e-05, "loss": 0.5492, "step": 15470 }, { "epoch": 0.4516270246607325, "grad_norm": 0.9530060947491329, "learning_rate": 3.290690044554108e-05, "loss": 0.6019, "step": 15475 }, { "epoch": 0.4517729461549686, "grad_norm": 0.8823918455499635, "learning_rate": 3.2896360274020596e-05, "loss": 0.5407, "step": 15480 }, { "epoch": 0.45191886764920475, "grad_norm": 0.9354243929459383, "learning_rate": 3.288581884585683e-05, "loss": 0.5088, "step": 15485 }, { "epoch": 0.45206478914344084, "grad_norm": 0.8766881555579561, "learning_rate": 3.287527616350456e-05, "loss": 0.5206, "step": 15490 }, { "epoch": 0.4522107106376769, "grad_norm": 1.0957480159060131, "learning_rate": 3.286473222941884e-05, "loss": 0.5418, "step": 15495 }, { "epoch": 0.452356632131913, "grad_norm": 1.07198053955402, "learning_rate": 3.2854187046055024e-05, "loss": 0.6047, "step": 15500 }, { "epoch": 0.4525025536261491, "grad_norm": 0.9212762341843745, "learning_rate": 3.284364061586876e-05, "loss": 0.5956, "step": 15505 }, { "epoch": 0.45264847512038525, "grad_norm": 0.9743527744295987, "learning_rate": 3.2833092941315975e-05, "loss": 0.6023, "step": 15510 }, { "epoch": 0.45279439661462134, "grad_norm": 1.0438093504371246, "learning_rate": 3.282254402485289e-05, "loss": 0.6041, "step": 15515 }, { "epoch": 0.45294031810885743, "grad_norm": 1.2693671668816704, "learning_rate": 3.2811993868936024e-05, "loss": 0.5524, "step": 15520 }, { "epoch": 0.4530862396030935, "grad_norm": 0.989367182498016, "learning_rate": 3.280144247602217e-05, "loss": 0.5957, "step": 15525 }, { "epoch": 0.4532321610973296, "grad_norm": 0.8592103927843617, "learning_rate": 3.2790889848568416e-05, "loss": 0.5775, "step": 15530 }, { "epoch": 0.45337808259156576, "grad_norm": 1.0883419221677124, "learning_rate": 3.2780335989032164e-05, "loss": 0.6629, "step": 15535 }, { "epoch": 0.45352400408580185, "grad_norm": 1.0326728842703878, "learning_rate": 3.276978089987104e-05, "loss": 0.5678, "step": 15540 }, { "epoch": 0.45366992558003794, "grad_norm": 0.9331684749223301, "learning_rate": 3.275922458354302e-05, "loss": 0.6306, "step": 15545 }, { "epoch": 0.453815847074274, "grad_norm": 1.0652826440564773, "learning_rate": 3.2748667042506326e-05, "loss": 0.5582, "step": 15550 }, { "epoch": 0.4539617685685101, "grad_norm": 1.2697183729862582, "learning_rate": 3.2738108279219484e-05, "loss": 0.6246, "step": 15555 }, { "epoch": 0.45410769006274626, "grad_norm": 0.9561355340400615, "learning_rate": 3.27275482961413e-05, "loss": 0.5685, "step": 15560 }, { "epoch": 0.45425361155698235, "grad_norm": 1.1375440726329225, "learning_rate": 3.2716987095730854e-05, "loss": 0.624, "step": 15565 }, { "epoch": 0.45439953305121844, "grad_norm": 1.251949342269883, "learning_rate": 3.270642468044753e-05, "loss": 0.6375, "step": 15570 }, { "epoch": 0.45454545454545453, "grad_norm": 0.9445923297152842, "learning_rate": 3.269586105275098e-05, "loss": 0.5817, "step": 15575 }, { "epoch": 0.4546913760396906, "grad_norm": 1.1316410560885413, "learning_rate": 3.268529621510115e-05, "loss": 0.6748, "step": 15580 }, { "epoch": 0.45483729753392677, "grad_norm": 0.9441436662422065, "learning_rate": 3.2674730169958234e-05, "loss": 0.5859, "step": 15585 }, { "epoch": 0.45498321902816286, "grad_norm": 0.9069016560298085, "learning_rate": 3.2664162919782765e-05, "loss": 0.5871, "step": 15590 }, { "epoch": 0.45512914052239895, "grad_norm": 1.00714936401814, "learning_rate": 3.26535944670355e-05, "loss": 0.6045, "step": 15595 }, { "epoch": 0.45527506201663503, "grad_norm": 0.9963012408117092, "learning_rate": 3.264302481417751e-05, "loss": 0.5278, "step": 15600 }, { "epoch": 0.4554209835108711, "grad_norm": 0.9763836435771481, "learning_rate": 3.263245396367015e-05, "loss": 0.5498, "step": 15605 }, { "epoch": 0.45556690500510727, "grad_norm": 1.283773050445861, "learning_rate": 3.2621881917975014e-05, "loss": 0.6138, "step": 15610 }, { "epoch": 0.45571282649934336, "grad_norm": 1.1166999859464244, "learning_rate": 3.261130867955403e-05, "loss": 0.6111, "step": 15615 }, { "epoch": 0.45585874799357945, "grad_norm": 0.9286310253959584, "learning_rate": 3.2600734250869333e-05, "loss": 0.5983, "step": 15620 }, { "epoch": 0.45600466948781554, "grad_norm": 1.2083062434129777, "learning_rate": 3.259015863438341e-05, "loss": 0.596, "step": 15625 }, { "epoch": 0.45615059098205163, "grad_norm": 1.6283205043882343, "learning_rate": 3.257958183255899e-05, "loss": 0.5995, "step": 15630 }, { "epoch": 0.4562965124762878, "grad_norm": 0.9109724092675541, "learning_rate": 3.2569003847859056e-05, "loss": 0.5983, "step": 15635 }, { "epoch": 0.45644243397052386, "grad_norm": 1.1087699477704822, "learning_rate": 3.255842468274691e-05, "loss": 0.5714, "step": 15640 }, { "epoch": 0.45658835546475995, "grad_norm": 1.0506735351840697, "learning_rate": 3.254784433968611e-05, "loss": 0.574, "step": 15645 }, { "epoch": 0.45673427695899604, "grad_norm": 1.2098919805362127, "learning_rate": 3.2537262821140465e-05, "loss": 0.5639, "step": 15650 }, { "epoch": 0.45688019845323213, "grad_norm": 1.0338719695370537, "learning_rate": 3.25266801295741e-05, "loss": 0.6475, "step": 15655 }, { "epoch": 0.4570261199474683, "grad_norm": 1.1459744175517204, "learning_rate": 3.251609626745137e-05, "loss": 0.5843, "step": 15660 }, { "epoch": 0.45717204144170437, "grad_norm": 1.2885086908276957, "learning_rate": 3.250551123723696e-05, "loss": 0.6055, "step": 15665 }, { "epoch": 0.45731796293594046, "grad_norm": 1.0478937924211262, "learning_rate": 3.2494925041395755e-05, "loss": 0.6184, "step": 15670 }, { "epoch": 0.45746388443017655, "grad_norm": 1.1573852810061225, "learning_rate": 3.2484337682392976e-05, "loss": 0.6093, "step": 15675 }, { "epoch": 0.4576098059244127, "grad_norm": 0.8407046206846887, "learning_rate": 3.247374916269407e-05, "loss": 0.5653, "step": 15680 }, { "epoch": 0.4577557274186488, "grad_norm": 1.2137561234384373, "learning_rate": 3.246315948476479e-05, "loss": 0.5505, "step": 15685 }, { "epoch": 0.4579016489128849, "grad_norm": 0.9239642804148287, "learning_rate": 3.245256865107111e-05, "loss": 0.5703, "step": 15690 }, { "epoch": 0.45804757040712096, "grad_norm": 1.1111061635795683, "learning_rate": 3.2441976664079326e-05, "loss": 0.5917, "step": 15695 }, { "epoch": 0.45819349190135705, "grad_norm": 0.9292923152123604, "learning_rate": 3.243138352625598e-05, "loss": 0.5305, "step": 15700 }, { "epoch": 0.4583394133955932, "grad_norm": 1.1360188916673053, "learning_rate": 3.242078924006787e-05, "loss": 0.5992, "step": 15705 }, { "epoch": 0.4584853348898293, "grad_norm": 1.1695538741765779, "learning_rate": 3.241019380798209e-05, "loss": 0.6202, "step": 15710 }, { "epoch": 0.4586312563840654, "grad_norm": 0.9597372734198713, "learning_rate": 3.2399597232465976e-05, "loss": 0.5477, "step": 15715 }, { "epoch": 0.45877717787830147, "grad_norm": 0.9525963551637229, "learning_rate": 3.238899951598713e-05, "loss": 0.6136, "step": 15720 }, { "epoch": 0.45892309937253756, "grad_norm": 1.093767354710571, "learning_rate": 3.237840066101344e-05, "loss": 0.6348, "step": 15725 }, { "epoch": 0.4590690208667737, "grad_norm": 1.014376923467261, "learning_rate": 3.2367800670013035e-05, "loss": 0.6575, "step": 15730 }, { "epoch": 0.4592149423610098, "grad_norm": 0.963490668162711, "learning_rate": 3.235719954545434e-05, "loss": 0.6087, "step": 15735 }, { "epoch": 0.4593608638552459, "grad_norm": 0.9777063123112228, "learning_rate": 3.2346597289806005e-05, "loss": 0.5823, "step": 15740 }, { "epoch": 0.45950678534948197, "grad_norm": 0.9306426212604956, "learning_rate": 3.233599390553698e-05, "loss": 0.5817, "step": 15745 }, { "epoch": 0.45965270684371806, "grad_norm": 0.9934307294628575, "learning_rate": 3.2325389395116445e-05, "loss": 0.6817, "step": 15750 }, { "epoch": 0.4597986283379542, "grad_norm": 1.0248496278926615, "learning_rate": 3.2314783761013876e-05, "loss": 0.543, "step": 15755 }, { "epoch": 0.4599445498321903, "grad_norm": 0.9492459367095261, "learning_rate": 3.2304177005698985e-05, "loss": 0.6236, "step": 15760 }, { "epoch": 0.4600904713264264, "grad_norm": 1.023610604436513, "learning_rate": 3.229356913164174e-05, "loss": 0.5156, "step": 15765 }, { "epoch": 0.4602363928206625, "grad_norm": 1.047680341406216, "learning_rate": 3.2282960141312404e-05, "loss": 0.6315, "step": 15770 }, { "epoch": 0.46038231431489857, "grad_norm": 0.997127441253729, "learning_rate": 3.2272350037181456e-05, "loss": 0.5839, "step": 15775 }, { "epoch": 0.4605282358091347, "grad_norm": 1.0417916231889532, "learning_rate": 3.226173882171968e-05, "loss": 0.6547, "step": 15780 }, { "epoch": 0.4606741573033708, "grad_norm": 1.0631821333318783, "learning_rate": 3.225112649739808e-05, "loss": 0.5847, "step": 15785 }, { "epoch": 0.4608200787976069, "grad_norm": 0.9716857898144506, "learning_rate": 3.224051306668795e-05, "loss": 0.5793, "step": 15790 }, { "epoch": 0.460966000291843, "grad_norm": 1.0047720592026088, "learning_rate": 3.22298985320608e-05, "loss": 0.5995, "step": 15795 }, { "epoch": 0.46111192178607907, "grad_norm": 1.3832403520722822, "learning_rate": 3.2219282895988445e-05, "loss": 0.588, "step": 15800 }, { "epoch": 0.4612578432803152, "grad_norm": 1.019242116775801, "learning_rate": 3.220866616094293e-05, "loss": 0.5614, "step": 15805 }, { "epoch": 0.4614037647745513, "grad_norm": 0.9282579570792205, "learning_rate": 3.219804832939655e-05, "loss": 0.526, "step": 15810 }, { "epoch": 0.4615496862687874, "grad_norm": 1.0687882475730643, "learning_rate": 3.2187429403821876e-05, "loss": 0.6267, "step": 15815 }, { "epoch": 0.4616956077630235, "grad_norm": 0.9478941843039194, "learning_rate": 3.2176809386691714e-05, "loss": 0.5995, "step": 15820 }, { "epoch": 0.4618415292572596, "grad_norm": 0.9662875553531348, "learning_rate": 3.216618828047914e-05, "loss": 0.5921, "step": 15825 }, { "epoch": 0.4619874507514957, "grad_norm": 1.0516865340645782, "learning_rate": 3.215556608765748e-05, "loss": 0.5966, "step": 15830 }, { "epoch": 0.4621333722457318, "grad_norm": 1.1336114712008754, "learning_rate": 3.21449428107003e-05, "loss": 0.5943, "step": 15835 }, { "epoch": 0.4622792937399679, "grad_norm": 1.1637276547351914, "learning_rate": 3.2134318452081444e-05, "loss": 0.6183, "step": 15840 }, { "epoch": 0.462425215234204, "grad_norm": 1.0266984522162912, "learning_rate": 3.2123693014274965e-05, "loss": 0.5574, "step": 15845 }, { "epoch": 0.4625711367284401, "grad_norm": 0.8845639906006659, "learning_rate": 3.2113066499755215e-05, "loss": 0.5606, "step": 15850 }, { "epoch": 0.4627170582226762, "grad_norm": 0.9073966151433892, "learning_rate": 3.210243891099678e-05, "loss": 0.5569, "step": 15855 }, { "epoch": 0.4628629797169123, "grad_norm": 1.0662052997774707, "learning_rate": 3.209181025047449e-05, "loss": 0.5629, "step": 15860 }, { "epoch": 0.4630089012111484, "grad_norm": 1.01163438062521, "learning_rate": 3.208118052066342e-05, "loss": 0.5393, "step": 15865 }, { "epoch": 0.4631548227053845, "grad_norm": 1.1041645687777277, "learning_rate": 3.20705497240389e-05, "loss": 0.6139, "step": 15870 }, { "epoch": 0.4633007441996206, "grad_norm": 0.9121251771958931, "learning_rate": 3.205991786307652e-05, "loss": 0.6044, "step": 15875 }, { "epoch": 0.46344666569385673, "grad_norm": 0.9355383760583984, "learning_rate": 3.204928494025209e-05, "loss": 0.5854, "step": 15880 }, { "epoch": 0.4635925871880928, "grad_norm": 1.10289375735494, "learning_rate": 3.20386509580417e-05, "loss": 0.6014, "step": 15885 }, { "epoch": 0.4637385086823289, "grad_norm": 1.046126257150109, "learning_rate": 3.202801591892167e-05, "loss": 0.6139, "step": 15890 }, { "epoch": 0.463884430176565, "grad_norm": 0.9749659423553443, "learning_rate": 3.201737982536857e-05, "loss": 0.5912, "step": 15895 }, { "epoch": 0.4640303516708011, "grad_norm": 1.0591973016379579, "learning_rate": 3.20067426798592e-05, "loss": 0.5544, "step": 15900 }, { "epoch": 0.46417627316503723, "grad_norm": 1.1682535035086854, "learning_rate": 3.1996104484870615e-05, "loss": 0.4743, "step": 15905 }, { "epoch": 0.4643221946592733, "grad_norm": 1.1598578416372252, "learning_rate": 3.198546524288014e-05, "loss": 0.5987, "step": 15910 }, { "epoch": 0.4644681161535094, "grad_norm": 0.9575683339289085, "learning_rate": 3.197482495636531e-05, "loss": 0.5513, "step": 15915 }, { "epoch": 0.4646140376477455, "grad_norm": 1.1147247113284615, "learning_rate": 3.196418362780391e-05, "loss": 0.5959, "step": 15920 }, { "epoch": 0.4647599591419816, "grad_norm": 1.02720540148014, "learning_rate": 3.195354125967397e-05, "loss": 0.6222, "step": 15925 }, { "epoch": 0.46490588063621774, "grad_norm": 0.9927925502777519, "learning_rate": 3.1942897854453776e-05, "loss": 0.5505, "step": 15930 }, { "epoch": 0.46505180213045383, "grad_norm": 1.232419275419981, "learning_rate": 3.1932253414621846e-05, "loss": 0.5836, "step": 15935 }, { "epoch": 0.4651977236246899, "grad_norm": 1.198392516016938, "learning_rate": 3.192160794265691e-05, "loss": 0.5601, "step": 15940 }, { "epoch": 0.465343645118926, "grad_norm": 0.9290787294382147, "learning_rate": 3.1910961441038e-05, "loss": 0.5783, "step": 15945 }, { "epoch": 0.4654895666131621, "grad_norm": 0.9673793305861905, "learning_rate": 3.1900313912244326e-05, "loss": 0.5959, "step": 15950 }, { "epoch": 0.46563548810739824, "grad_norm": 1.1998292550313885, "learning_rate": 3.188966535875537e-05, "loss": 0.5464, "step": 15955 }, { "epoch": 0.46578140960163433, "grad_norm": 1.171475298071113, "learning_rate": 3.1879015783050855e-05, "loss": 0.6397, "step": 15960 }, { "epoch": 0.4659273310958704, "grad_norm": 1.1307752464317156, "learning_rate": 3.1868365187610736e-05, "loss": 0.63, "step": 15965 }, { "epoch": 0.4660732525901065, "grad_norm": 1.1731864130109129, "learning_rate": 3.185771357491519e-05, "loss": 0.6413, "step": 15970 }, { "epoch": 0.4662191740843426, "grad_norm": 1.0920781863690463, "learning_rate": 3.1847060947444643e-05, "loss": 0.6109, "step": 15975 }, { "epoch": 0.46636509557857875, "grad_norm": 0.9719439390500204, "learning_rate": 3.183640730767977e-05, "loss": 0.6134, "step": 15980 }, { "epoch": 0.46651101707281484, "grad_norm": 0.9675295952738631, "learning_rate": 3.1825752658101474e-05, "loss": 0.6123, "step": 15985 }, { "epoch": 0.4666569385670509, "grad_norm": 1.0373621345418962, "learning_rate": 3.181509700119087e-05, "loss": 0.5901, "step": 15990 }, { "epoch": 0.466802860061287, "grad_norm": 1.0249244672043027, "learning_rate": 3.1804440339429344e-05, "loss": 0.6608, "step": 15995 }, { "epoch": 0.4669487815555231, "grad_norm": 1.0724877856702746, "learning_rate": 3.1793782675298485e-05, "loss": 0.6656, "step": 16000 }, { "epoch": 0.46709470304975925, "grad_norm": 1.1952657635486619, "learning_rate": 3.178312401128014e-05, "loss": 0.6083, "step": 16005 }, { "epoch": 0.46724062454399534, "grad_norm": 1.0690913191527671, "learning_rate": 3.177246434985638e-05, "loss": 0.5899, "step": 16010 }, { "epoch": 0.46738654603823143, "grad_norm": 1.1834225817099195, "learning_rate": 3.1761803693509486e-05, "loss": 0.7149, "step": 16015 }, { "epoch": 0.4675324675324675, "grad_norm": 1.2295242212235264, "learning_rate": 3.175114204472201e-05, "loss": 0.6701, "step": 16020 }, { "epoch": 0.4676783890267036, "grad_norm": 1.3023199828516847, "learning_rate": 3.174047940597671e-05, "loss": 0.6676, "step": 16025 }, { "epoch": 0.46782431052093976, "grad_norm": 0.9432176597114115, "learning_rate": 3.1729815779756586e-05, "loss": 0.5697, "step": 16030 }, { "epoch": 0.46797023201517585, "grad_norm": 1.067917631624269, "learning_rate": 3.171915116854486e-05, "loss": 0.5941, "step": 16035 }, { "epoch": 0.46811615350941194, "grad_norm": 0.8236801347322392, "learning_rate": 3.170848557482498e-05, "loss": 0.4979, "step": 16040 }, { "epoch": 0.468262075003648, "grad_norm": 0.9382195157037129, "learning_rate": 3.1697819001080636e-05, "loss": 0.629, "step": 16045 }, { "epoch": 0.4684079964978841, "grad_norm": 0.9991670203979078, "learning_rate": 3.1687151449795735e-05, "loss": 0.5543, "step": 16050 }, { "epoch": 0.46855391799212026, "grad_norm": 1.138257288287831, "learning_rate": 3.1676482923454406e-05, "loss": 0.5942, "step": 16055 }, { "epoch": 0.46869983948635635, "grad_norm": 0.9383685087244243, "learning_rate": 3.166581342454104e-05, "loss": 0.5821, "step": 16060 }, { "epoch": 0.46884576098059244, "grad_norm": 1.0208261833720251, "learning_rate": 3.165514295554021e-05, "loss": 0.5804, "step": 16065 }, { "epoch": 0.46899168247482853, "grad_norm": 1.1095146268454237, "learning_rate": 3.164447151893673e-05, "loss": 0.5952, "step": 16070 }, { "epoch": 0.4691376039690646, "grad_norm": 1.012549555463579, "learning_rate": 3.163379911721566e-05, "loss": 0.6351, "step": 16075 }, { "epoch": 0.46928352546330077, "grad_norm": 1.1788680249148444, "learning_rate": 3.162312575286226e-05, "loss": 0.5721, "step": 16080 }, { "epoch": 0.46942944695753686, "grad_norm": 1.0263215462979607, "learning_rate": 3.161245142836201e-05, "loss": 0.5891, "step": 16085 }, { "epoch": 0.46957536845177295, "grad_norm": 1.096561843456749, "learning_rate": 3.160177614620065e-05, "loss": 0.6271, "step": 16090 }, { "epoch": 0.46972128994600904, "grad_norm": 0.9582351993218208, "learning_rate": 3.15910999088641e-05, "loss": 0.5922, "step": 16095 }, { "epoch": 0.4698672114402451, "grad_norm": 1.1435621609478108, "learning_rate": 3.158042271883853e-05, "loss": 0.6033, "step": 16100 }, { "epoch": 0.47001313293448127, "grad_norm": 0.9624437619131209, "learning_rate": 3.156974457861032e-05, "loss": 0.6375, "step": 16105 }, { "epoch": 0.47015905442871736, "grad_norm": 1.0190270413678166, "learning_rate": 3.155906549066607e-05, "loss": 0.5898, "step": 16110 }, { "epoch": 0.47030497592295345, "grad_norm": 1.1826229508212447, "learning_rate": 3.1548385457492615e-05, "loss": 0.5615, "step": 16115 }, { "epoch": 0.47045089741718954, "grad_norm": 1.1285316377328358, "learning_rate": 3.1537704481576996e-05, "loss": 0.5909, "step": 16120 }, { "epoch": 0.47059681891142563, "grad_norm": 1.6147334791451788, "learning_rate": 3.1527022565406485e-05, "loss": 0.6389, "step": 16125 }, { "epoch": 0.4707427404056618, "grad_norm": 1.117986456933243, "learning_rate": 3.151633971146854e-05, "loss": 0.5609, "step": 16130 }, { "epoch": 0.47088866189989786, "grad_norm": 0.9438890624816828, "learning_rate": 3.150565592225089e-05, "loss": 0.6322, "step": 16135 }, { "epoch": 0.47103458339413395, "grad_norm": 0.9417274734364771, "learning_rate": 3.1494971200241445e-05, "loss": 0.5182, "step": 16140 }, { "epoch": 0.47118050488837004, "grad_norm": 1.047334740433062, "learning_rate": 3.1484285547928346e-05, "loss": 0.5999, "step": 16145 }, { "epoch": 0.47132642638260613, "grad_norm": 1.0935660709235446, "learning_rate": 3.147359896779994e-05, "loss": 0.6251, "step": 16150 }, { "epoch": 0.4714723478768423, "grad_norm": 1.0378166892325567, "learning_rate": 3.14629114623448e-05, "loss": 0.5395, "step": 16155 }, { "epoch": 0.47161826937107837, "grad_norm": 1.126316973640822, "learning_rate": 3.1452223034051714e-05, "loss": 0.6217, "step": 16160 }, { "epoch": 0.47176419086531446, "grad_norm": 1.036274101388914, "learning_rate": 3.144153368540967e-05, "loss": 0.5966, "step": 16165 }, { "epoch": 0.47191011235955055, "grad_norm": 1.0825445332083152, "learning_rate": 3.1430843418907905e-05, "loss": 0.6309, "step": 16170 }, { "epoch": 0.47205603385378664, "grad_norm": 1.1010543405445865, "learning_rate": 3.1420152237035824e-05, "loss": 0.5654, "step": 16175 }, { "epoch": 0.4722019553480228, "grad_norm": 1.291319672829736, "learning_rate": 3.140946014228308e-05, "loss": 0.6257, "step": 16180 }, { "epoch": 0.4723478768422589, "grad_norm": 1.1042257214611904, "learning_rate": 3.139876713713954e-05, "loss": 0.6235, "step": 16185 }, { "epoch": 0.47249379833649496, "grad_norm": 0.9629843497570609, "learning_rate": 3.1388073224095236e-05, "loss": 0.5866, "step": 16190 }, { "epoch": 0.47263971983073105, "grad_norm": 1.0866402931963635, "learning_rate": 3.137737840564048e-05, "loss": 0.5875, "step": 16195 }, { "epoch": 0.47278564132496714, "grad_norm": 1.059982232823143, "learning_rate": 3.136668268426574e-05, "loss": 0.5419, "step": 16200 }, { "epoch": 0.4729315628192033, "grad_norm": 1.2767011738484482, "learning_rate": 3.135598606246171e-05, "loss": 0.6259, "step": 16205 }, { "epoch": 0.4730774843134394, "grad_norm": 1.0727429676467213, "learning_rate": 3.134528854271932e-05, "loss": 0.5979, "step": 16210 }, { "epoch": 0.47322340580767547, "grad_norm": 0.935253270093171, "learning_rate": 3.1334590127529676e-05, "loss": 0.6169, "step": 16215 }, { "epoch": 0.47336932730191156, "grad_norm": 0.9088583229338272, "learning_rate": 3.13238908193841e-05, "loss": 0.5648, "step": 16220 }, { "epoch": 0.47351524879614765, "grad_norm": 1.0388776791127656, "learning_rate": 3.131319062077412e-05, "loss": 0.6178, "step": 16225 }, { "epoch": 0.4736611702903838, "grad_norm": 1.058695593297206, "learning_rate": 3.1302489534191496e-05, "loss": 0.549, "step": 16230 }, { "epoch": 0.4738070917846199, "grad_norm": 0.9573896971764649, "learning_rate": 3.129178756212816e-05, "loss": 0.6673, "step": 16235 }, { "epoch": 0.473953013278856, "grad_norm": 1.1798643992256792, "learning_rate": 3.128108470707627e-05, "loss": 0.5958, "step": 16240 }, { "epoch": 0.47409893477309206, "grad_norm": 0.9749965823953239, "learning_rate": 3.127038097152819e-05, "loss": 0.6324, "step": 16245 }, { "epoch": 0.47424485626732815, "grad_norm": 1.0032067699445637, "learning_rate": 3.125967635797648e-05, "loss": 0.5879, "step": 16250 }, { "epoch": 0.4743907777615643, "grad_norm": 1.0337100139887638, "learning_rate": 3.1248970868913926e-05, "loss": 0.5537, "step": 16255 }, { "epoch": 0.4745366992558004, "grad_norm": 1.068809433506519, "learning_rate": 3.123826450683347e-05, "loss": 0.5618, "step": 16260 }, { "epoch": 0.4746826207500365, "grad_norm": 1.0405455587530177, "learning_rate": 3.1227557274228314e-05, "loss": 0.5527, "step": 16265 }, { "epoch": 0.47482854224427257, "grad_norm": 1.0661025990390622, "learning_rate": 3.1216849173591816e-05, "loss": 0.5644, "step": 16270 }, { "epoch": 0.47497446373850866, "grad_norm": 1.124048760964528, "learning_rate": 3.120614020741758e-05, "loss": 0.6207, "step": 16275 }, { "epoch": 0.4751203852327448, "grad_norm": 0.9775749805402151, "learning_rate": 3.1195430378199376e-05, "loss": 0.5665, "step": 16280 }, { "epoch": 0.4752663067269809, "grad_norm": 1.1100688950990143, "learning_rate": 3.118471968843118e-05, "loss": 0.5943, "step": 16285 }, { "epoch": 0.475412228221217, "grad_norm": 1.0291055831161693, "learning_rate": 3.11740081406072e-05, "loss": 0.6502, "step": 16290 }, { "epoch": 0.47555814971545307, "grad_norm": 1.0315300126889353, "learning_rate": 3.11632957372218e-05, "loss": 0.5642, "step": 16295 }, { "epoch": 0.47570407120968916, "grad_norm": 0.9797770643424677, "learning_rate": 3.1152582480769566e-05, "loss": 0.5703, "step": 16300 }, { "epoch": 0.4758499927039253, "grad_norm": 0.9462656804437485, "learning_rate": 3.1141868373745285e-05, "loss": 0.5997, "step": 16305 }, { "epoch": 0.4759959141981614, "grad_norm": 0.9769441795159095, "learning_rate": 3.113115341864393e-05, "loss": 0.5707, "step": 16310 }, { "epoch": 0.4761418356923975, "grad_norm": 1.2045468012988314, "learning_rate": 3.1120437617960695e-05, "loss": 0.6186, "step": 16315 }, { "epoch": 0.4762877571866336, "grad_norm": 0.810664063243908, "learning_rate": 3.110972097419093e-05, "loss": 0.5105, "step": 16320 }, { "epoch": 0.47643367868086967, "grad_norm": 1.0101058675561303, "learning_rate": 3.1099003489830225e-05, "loss": 0.619, "step": 16325 }, { "epoch": 0.4765796001751058, "grad_norm": 1.3988922571449385, "learning_rate": 3.1088285167374345e-05, "loss": 0.5865, "step": 16330 }, { "epoch": 0.4767255216693419, "grad_norm": 1.1101767982009851, "learning_rate": 3.107756600931923e-05, "loss": 0.5892, "step": 16335 }, { "epoch": 0.476871443163578, "grad_norm": 1.0213661041585358, "learning_rate": 3.106684601816106e-05, "loss": 0.5502, "step": 16340 }, { "epoch": 0.4770173646578141, "grad_norm": 0.983270686263814, "learning_rate": 3.1056125196396174e-05, "loss": 0.5851, "step": 16345 }, { "epoch": 0.47716328615205017, "grad_norm": 1.0366162681146514, "learning_rate": 3.104540354652113e-05, "loss": 0.5291, "step": 16350 }, { "epoch": 0.4773092076462863, "grad_norm": 1.216918503959385, "learning_rate": 3.103468107103265e-05, "loss": 0.6292, "step": 16355 }, { "epoch": 0.4774551291405224, "grad_norm": 1.0767064027176807, "learning_rate": 3.1023957772427656e-05, "loss": 0.6254, "step": 16360 }, { "epoch": 0.4776010506347585, "grad_norm": 1.1485535990927525, "learning_rate": 3.101323365320329e-05, "loss": 0.6227, "step": 16365 }, { "epoch": 0.4777469721289946, "grad_norm": 1.0113956100330284, "learning_rate": 3.100250871585684e-05, "loss": 0.59, "step": 16370 }, { "epoch": 0.4778928936232307, "grad_norm": 1.284691180996894, "learning_rate": 3.099178296288583e-05, "loss": 0.6798, "step": 16375 }, { "epoch": 0.4780388151174668, "grad_norm": 0.9987102666277092, "learning_rate": 3.0981056396787936e-05, "loss": 0.5675, "step": 16380 }, { "epoch": 0.4781847366117029, "grad_norm": 1.017702166853108, "learning_rate": 3.097032902006105e-05, "loss": 0.5337, "step": 16385 }, { "epoch": 0.478330658105939, "grad_norm": 1.0309861509135925, "learning_rate": 3.095960083520323e-05, "loss": 0.5986, "step": 16390 }, { "epoch": 0.4784765796001751, "grad_norm": 1.0609202762615813, "learning_rate": 3.0948871844712746e-05, "loss": 0.5649, "step": 16395 }, { "epoch": 0.4786225010944112, "grad_norm": 1.05094668256943, "learning_rate": 3.093814205108804e-05, "loss": 0.6136, "step": 16400 }, { "epoch": 0.4787684225886473, "grad_norm": 1.097103947953059, "learning_rate": 3.092741145682774e-05, "loss": 0.6099, "step": 16405 }, { "epoch": 0.4789143440828834, "grad_norm": 1.0286617622228802, "learning_rate": 3.091668006443067e-05, "loss": 0.5911, "step": 16410 }, { "epoch": 0.4790602655771195, "grad_norm": 0.9191288172393072, "learning_rate": 3.090594787639584e-05, "loss": 0.6147, "step": 16415 }, { "epoch": 0.4792061870713556, "grad_norm": 1.1372882945790748, "learning_rate": 3.089521489522243e-05, "loss": 0.5636, "step": 16420 }, { "epoch": 0.47935210856559174, "grad_norm": 1.0693982931276715, "learning_rate": 3.088448112340982e-05, "loss": 0.5732, "step": 16425 }, { "epoch": 0.47949803005982783, "grad_norm": 1.07027696047337, "learning_rate": 3.0873746563457567e-05, "loss": 0.5641, "step": 16430 }, { "epoch": 0.4796439515540639, "grad_norm": 0.9514757147042575, "learning_rate": 3.0863011217865415e-05, "loss": 0.6107, "step": 16435 }, { "epoch": 0.4797898730483, "grad_norm": 0.8974841732761474, "learning_rate": 3.0852275089133305e-05, "loss": 0.5669, "step": 16440 }, { "epoch": 0.4799357945425361, "grad_norm": 0.9491273704131361, "learning_rate": 3.084153817976131e-05, "loss": 0.5835, "step": 16445 }, { "epoch": 0.48008171603677224, "grad_norm": 0.9386445190771993, "learning_rate": 3.083080049224976e-05, "loss": 0.5788, "step": 16450 }, { "epoch": 0.48022763753100833, "grad_norm": 1.1587704595737656, "learning_rate": 3.082006202909909e-05, "loss": 0.6252, "step": 16455 }, { "epoch": 0.4803735590252444, "grad_norm": 0.8934272110359924, "learning_rate": 3.0809322792809974e-05, "loss": 0.5625, "step": 16460 }, { "epoch": 0.4805194805194805, "grad_norm": 1.0051712165565045, "learning_rate": 3.079858278588324e-05, "loss": 0.5512, "step": 16465 }, { "epoch": 0.4806654020137166, "grad_norm": 0.924265478352745, "learning_rate": 3.0787842010819896e-05, "loss": 0.5612, "step": 16470 }, { "epoch": 0.48081132350795275, "grad_norm": 1.1785058206193082, "learning_rate": 3.0777100470121135e-05, "loss": 0.5665, "step": 16475 }, { "epoch": 0.48095724500218884, "grad_norm": 1.1115991749510084, "learning_rate": 3.0766358166288325e-05, "loss": 0.6064, "step": 16480 }, { "epoch": 0.4811031664964249, "grad_norm": 1.057264070379488, "learning_rate": 3.075561510182301e-05, "loss": 0.5368, "step": 16485 }, { "epoch": 0.481249087990661, "grad_norm": 1.02301090036176, "learning_rate": 3.074487127922691e-05, "loss": 0.5116, "step": 16490 }, { "epoch": 0.4813950094848971, "grad_norm": 0.9098775770077692, "learning_rate": 3.073412670100193e-05, "loss": 0.55, "step": 16495 }, { "epoch": 0.48154093097913325, "grad_norm": 1.0395773719043275, "learning_rate": 3.0723381369650144e-05, "loss": 0.6279, "step": 16500 }, { "epoch": 0.48168685247336934, "grad_norm": 1.0969355584335883, "learning_rate": 3.0712635287673806e-05, "loss": 0.6313, "step": 16505 }, { "epoch": 0.48183277396760543, "grad_norm": 1.0646441403544518, "learning_rate": 3.070188845757534e-05, "loss": 0.5381, "step": 16510 }, { "epoch": 0.4819786954618415, "grad_norm": 1.0026345696667833, "learning_rate": 3.069114088185735e-05, "loss": 0.6036, "step": 16515 }, { "epoch": 0.4821246169560776, "grad_norm": 1.1186335478111695, "learning_rate": 3.068039256302259e-05, "loss": 0.5318, "step": 16520 }, { "epoch": 0.48227053845031376, "grad_norm": 1.0032186205758673, "learning_rate": 3.066964350357403e-05, "loss": 0.6589, "step": 16525 }, { "epoch": 0.48241645994454985, "grad_norm": 1.1064033076938589, "learning_rate": 3.065889370601479e-05, "loss": 0.5713, "step": 16530 }, { "epoch": 0.48256238143878594, "grad_norm": 0.8758987582809168, "learning_rate": 3.064814317284814e-05, "loss": 0.5113, "step": 16535 }, { "epoch": 0.482708302933022, "grad_norm": 1.0407519143116468, "learning_rate": 3.063739190657756e-05, "loss": 0.599, "step": 16540 }, { "epoch": 0.4828542244272581, "grad_norm": 1.1284490311184938, "learning_rate": 3.0626639909706686e-05, "loss": 0.5651, "step": 16545 }, { "epoch": 0.48300014592149426, "grad_norm": 0.9132063911299511, "learning_rate": 3.0615887184739306e-05, "loss": 0.5937, "step": 16550 }, { "epoch": 0.48314606741573035, "grad_norm": 0.9788954521872006, "learning_rate": 3.06051337341794e-05, "loss": 0.5512, "step": 16555 }, { "epoch": 0.48329198890996644, "grad_norm": 1.095220995826207, "learning_rate": 3.05943795605311e-05, "loss": 0.588, "step": 16560 }, { "epoch": 0.48343791040420253, "grad_norm": 1.101814668046757, "learning_rate": 3.0583624666298744e-05, "loss": 0.5677, "step": 16565 }, { "epoch": 0.4835838318984386, "grad_norm": 1.0767350502821125, "learning_rate": 3.057286905398678e-05, "loss": 0.5672, "step": 16570 }, { "epoch": 0.48372975339267477, "grad_norm": 1.0145287764316528, "learning_rate": 3.0562112726099874e-05, "loss": 0.5659, "step": 16575 }, { "epoch": 0.48387567488691086, "grad_norm": 0.8951101173973824, "learning_rate": 3.0551355685142824e-05, "loss": 0.5689, "step": 16580 }, { "epoch": 0.48402159638114695, "grad_norm": 1.0388098563140675, "learning_rate": 3.054059793362062e-05, "loss": 0.6006, "step": 16585 }, { "epoch": 0.48416751787538304, "grad_norm": 1.094177681656709, "learning_rate": 3.052983947403839e-05, "loss": 0.5793, "step": 16590 }, { "epoch": 0.4843134393696191, "grad_norm": 0.9406811228138509, "learning_rate": 3.0519080308901446e-05, "loss": 0.5824, "step": 16595 }, { "epoch": 0.48445936086385527, "grad_norm": 1.235780288180848, "learning_rate": 3.0508320440715276e-05, "loss": 0.6251, "step": 16600 }, { "epoch": 0.48460528235809136, "grad_norm": 1.101615762660566, "learning_rate": 3.04975598719855e-05, "loss": 0.6339, "step": 16605 }, { "epoch": 0.48475120385232745, "grad_norm": 0.8748141487967571, "learning_rate": 3.048679860521793e-05, "loss": 0.5477, "step": 16610 }, { "epoch": 0.48489712534656354, "grad_norm": 1.1252109265861094, "learning_rate": 3.0476036642918515e-05, "loss": 0.635, "step": 16615 }, { "epoch": 0.48504304684079963, "grad_norm": 1.1513319214737583, "learning_rate": 3.046527398759339e-05, "loss": 0.6009, "step": 16620 }, { "epoch": 0.4851889683350358, "grad_norm": 1.2551123248600837, "learning_rate": 3.0454510641748822e-05, "loss": 0.6141, "step": 16625 }, { "epoch": 0.48533488982927186, "grad_norm": 1.031188379187192, "learning_rate": 3.044374660789128e-05, "loss": 0.6948, "step": 16630 }, { "epoch": 0.48548081132350795, "grad_norm": 0.9956876355652408, "learning_rate": 3.0432981888527363e-05, "loss": 0.5561, "step": 16635 }, { "epoch": 0.48562673281774404, "grad_norm": 1.1056522815640566, "learning_rate": 3.042221648616383e-05, "loss": 0.598, "step": 16640 }, { "epoch": 0.48577265431198013, "grad_norm": 1.0722816944801006, "learning_rate": 3.041145040330761e-05, "loss": 0.6027, "step": 16645 }, { "epoch": 0.4859185758062163, "grad_norm": 1.1311406265845234, "learning_rate": 3.040068364246579e-05, "loss": 0.5923, "step": 16650 }, { "epoch": 0.48606449730045237, "grad_norm": 0.9701960581456185, "learning_rate": 3.0389916206145602e-05, "loss": 0.5999, "step": 16655 }, { "epoch": 0.48621041879468846, "grad_norm": 1.1194453829949578, "learning_rate": 3.037914809685446e-05, "loss": 0.5755, "step": 16660 }, { "epoch": 0.48635634028892455, "grad_norm": 1.1052857023913059, "learning_rate": 3.0368379317099904e-05, "loss": 0.5864, "step": 16665 }, { "epoch": 0.48650226178316064, "grad_norm": 1.1250900258444685, "learning_rate": 3.0357609869389657e-05, "loss": 0.606, "step": 16670 }, { "epoch": 0.4866481832773968, "grad_norm": 1.1594551943657159, "learning_rate": 3.0346839756231577e-05, "loss": 0.5219, "step": 16675 }, { "epoch": 0.4867941047716329, "grad_norm": 1.0504391740277301, "learning_rate": 3.033606898013369e-05, "loss": 0.5188, "step": 16680 }, { "epoch": 0.48694002626586896, "grad_norm": 0.9968746885784527, "learning_rate": 3.0325297543604174e-05, "loss": 0.5715, "step": 16685 }, { "epoch": 0.48708594776010505, "grad_norm": 0.9227738120060264, "learning_rate": 3.031452544915136e-05, "loss": 0.5631, "step": 16690 }, { "epoch": 0.48723186925434114, "grad_norm": 1.079810228938708, "learning_rate": 3.0303752699283728e-05, "loss": 0.6024, "step": 16695 }, { "epoch": 0.4873777907485773, "grad_norm": 1.006269550053113, "learning_rate": 3.0292979296509906e-05, "loss": 0.576, "step": 16700 }, { "epoch": 0.4875237122428134, "grad_norm": 0.9359264658024572, "learning_rate": 3.02822052433387e-05, "loss": 0.5991, "step": 16705 }, { "epoch": 0.48766963373704947, "grad_norm": 1.0477115565446178, "learning_rate": 3.0271430542279033e-05, "loss": 0.5938, "step": 16710 }, { "epoch": 0.48781555523128556, "grad_norm": 1.1035232161775983, "learning_rate": 3.026065519584001e-05, "loss": 0.6556, "step": 16715 }, { "epoch": 0.48796147672552165, "grad_norm": 0.9596841525527193, "learning_rate": 3.0249879206530857e-05, "loss": 0.5701, "step": 16720 }, { "epoch": 0.4881073982197578, "grad_norm": 1.0794334018029093, "learning_rate": 3.023910257686098e-05, "loss": 0.6171, "step": 16725 }, { "epoch": 0.4882533197139939, "grad_norm": 0.9116137211414866, "learning_rate": 3.0228325309339907e-05, "loss": 0.6255, "step": 16730 }, { "epoch": 0.48839924120823, "grad_norm": 0.895479894629434, "learning_rate": 3.0217547406477327e-05, "loss": 0.5234, "step": 16735 }, { "epoch": 0.48854516270246606, "grad_norm": 1.2151650945823496, "learning_rate": 3.0206768870783086e-05, "loss": 0.5981, "step": 16740 }, { "epoch": 0.48869108419670215, "grad_norm": 1.0855610104739521, "learning_rate": 3.0195989704767154e-05, "loss": 0.5799, "step": 16745 }, { "epoch": 0.4888370056909383, "grad_norm": 1.3638768224077351, "learning_rate": 3.018520991093966e-05, "loss": 0.5795, "step": 16750 }, { "epoch": 0.4889829271851744, "grad_norm": 1.039542191491571, "learning_rate": 3.0174429491810897e-05, "loss": 0.5466, "step": 16755 }, { "epoch": 0.4891288486794105, "grad_norm": 1.2436322898659926, "learning_rate": 3.0163648449891275e-05, "loss": 0.6047, "step": 16760 }, { "epoch": 0.48927477017364657, "grad_norm": 1.0463341469666652, "learning_rate": 3.0152866787691365e-05, "loss": 0.5878, "step": 16765 }, { "epoch": 0.48942069166788266, "grad_norm": 0.9986395190908836, "learning_rate": 3.014208450772187e-05, "loss": 0.6148, "step": 16770 }, { "epoch": 0.4895666131621188, "grad_norm": 0.9508145375780831, "learning_rate": 3.013130161249365e-05, "loss": 0.5526, "step": 16775 }, { "epoch": 0.4897125346563549, "grad_norm": 1.1855955482255367, "learning_rate": 3.0120518104517713e-05, "loss": 0.6268, "step": 16780 }, { "epoch": 0.489858456150591, "grad_norm": 1.0269621056162834, "learning_rate": 3.0109733986305182e-05, "loss": 0.5716, "step": 16785 }, { "epoch": 0.49000437764482707, "grad_norm": 1.139210936347588, "learning_rate": 3.009894926036736e-05, "loss": 0.5592, "step": 16790 }, { "epoch": 0.49015029913906316, "grad_norm": 1.173464924838714, "learning_rate": 3.0088163929215668e-05, "loss": 0.6348, "step": 16795 }, { "epoch": 0.4902962206332993, "grad_norm": 0.8586206797685373, "learning_rate": 3.0077377995361662e-05, "loss": 0.5568, "step": 16800 }, { "epoch": 0.4904421421275354, "grad_norm": 1.1914081996807593, "learning_rate": 3.0066591461317047e-05, "loss": 0.6222, "step": 16805 }, { "epoch": 0.4905880636217715, "grad_norm": 0.8844532684601611, "learning_rate": 3.0055804329593678e-05, "loss": 0.6, "step": 16810 }, { "epoch": 0.4907339851160076, "grad_norm": 1.1051806903627077, "learning_rate": 3.004501660270353e-05, "loss": 0.6008, "step": 16815 }, { "epoch": 0.49087990661024367, "grad_norm": 1.1046818058469494, "learning_rate": 3.0034228283158738e-05, "loss": 0.5839, "step": 16820 }, { "epoch": 0.4910258281044798, "grad_norm": 0.9772652604582976, "learning_rate": 3.0023439373471556e-05, "loss": 0.6183, "step": 16825 }, { "epoch": 0.4911717495987159, "grad_norm": 0.99527660067659, "learning_rate": 3.0012649876154396e-05, "loss": 0.6558, "step": 16830 }, { "epoch": 0.491317671092952, "grad_norm": 1.1049813052801287, "learning_rate": 3.0001859793719778e-05, "loss": 0.5754, "step": 16835 }, { "epoch": 0.4914635925871881, "grad_norm": 1.0804230143070144, "learning_rate": 2.999106912868038e-05, "loss": 0.565, "step": 16840 }, { "epoch": 0.49160951408142417, "grad_norm": 0.9667300843938154, "learning_rate": 2.9980277883549014e-05, "loss": 0.5602, "step": 16845 }, { "epoch": 0.4917554355756603, "grad_norm": 1.112504019395495, "learning_rate": 2.9969486060838616e-05, "loss": 0.6162, "step": 16850 }, { "epoch": 0.4919013570698964, "grad_norm": 1.0123761453453675, "learning_rate": 2.9958693663062266e-05, "loss": 0.6031, "step": 16855 }, { "epoch": 0.4920472785641325, "grad_norm": 0.9328559186875812, "learning_rate": 2.9947900692733172e-05, "loss": 0.5659, "step": 16860 }, { "epoch": 0.4921932000583686, "grad_norm": 1.135837648885987, "learning_rate": 2.9937107152364684e-05, "loss": 0.6413, "step": 16865 }, { "epoch": 0.4923391215526047, "grad_norm": 1.0315662764197113, "learning_rate": 2.9926313044470287e-05, "loss": 0.5843, "step": 16870 }, { "epoch": 0.4924850430468408, "grad_norm": 1.056989609274433, "learning_rate": 2.9915518371563573e-05, "loss": 0.5851, "step": 16875 }, { "epoch": 0.4926309645410769, "grad_norm": 1.117294212353933, "learning_rate": 2.990472313615829e-05, "loss": 0.5715, "step": 16880 }, { "epoch": 0.492776886035313, "grad_norm": 1.0668832151570473, "learning_rate": 2.9893927340768323e-05, "loss": 0.5732, "step": 16885 }, { "epoch": 0.4929228075295491, "grad_norm": 1.0437664606850778, "learning_rate": 2.988313098790766e-05, "loss": 0.5577, "step": 16890 }, { "epoch": 0.4930687290237852, "grad_norm": 1.1182632433749098, "learning_rate": 2.987233408009043e-05, "loss": 0.5727, "step": 16895 }, { "epoch": 0.4932146505180213, "grad_norm": 0.9806326308210983, "learning_rate": 2.9861536619830908e-05, "loss": 0.5108, "step": 16900 }, { "epoch": 0.4933605720122574, "grad_norm": 1.0632672939693888, "learning_rate": 2.9850738609643476e-05, "loss": 0.6147, "step": 16905 }, { "epoch": 0.4935064935064935, "grad_norm": 1.0202524332496135, "learning_rate": 2.9839940052042658e-05, "loss": 0.6579, "step": 16910 }, { "epoch": 0.4936524150007296, "grad_norm": 0.9673321870150993, "learning_rate": 2.9829140949543095e-05, "loss": 0.6454, "step": 16915 }, { "epoch": 0.4937983364949657, "grad_norm": 0.9250203025433781, "learning_rate": 2.9818341304659558e-05, "loss": 0.5605, "step": 16920 }, { "epoch": 0.49394425798920183, "grad_norm": 1.17805820620995, "learning_rate": 2.980754111990695e-05, "loss": 0.6092, "step": 16925 }, { "epoch": 0.4940901794834379, "grad_norm": 0.9049157028388981, "learning_rate": 2.9796740397800294e-05, "loss": 0.5629, "step": 16930 }, { "epoch": 0.494236100977674, "grad_norm": 0.9992000241130848, "learning_rate": 2.978593914085474e-05, "loss": 0.5351, "step": 16935 }, { "epoch": 0.4943820224719101, "grad_norm": 1.0880880154066537, "learning_rate": 2.977513735158557e-05, "loss": 0.562, "step": 16940 }, { "epoch": 0.4945279439661462, "grad_norm": 1.025528763606253, "learning_rate": 2.9764335032508174e-05, "loss": 0.5494, "step": 16945 }, { "epoch": 0.49467386546038233, "grad_norm": 0.9083104501889877, "learning_rate": 2.975353218613807e-05, "loss": 0.57, "step": 16950 }, { "epoch": 0.4948197869546184, "grad_norm": 1.0445661233546493, "learning_rate": 2.9742728814990906e-05, "loss": 0.5815, "step": 16955 }, { "epoch": 0.4949657084488545, "grad_norm": 1.1064254230623647, "learning_rate": 2.9731924921582456e-05, "loss": 0.6064, "step": 16960 }, { "epoch": 0.4951116299430906, "grad_norm": 0.9843510816262551, "learning_rate": 2.9721120508428596e-05, "loss": 0.5466, "step": 16965 }, { "epoch": 0.4952575514373267, "grad_norm": 1.1181502424225254, "learning_rate": 2.9710315578045346e-05, "loss": 0.633, "step": 16970 }, { "epoch": 0.49540347293156284, "grad_norm": 1.084398136372078, "learning_rate": 2.969951013294883e-05, "loss": 0.559, "step": 16975 }, { "epoch": 0.49554939442579893, "grad_norm": 1.1208990016419313, "learning_rate": 2.9688704175655307e-05, "loss": 0.6358, "step": 16980 }, { "epoch": 0.495695315920035, "grad_norm": 0.869429962777699, "learning_rate": 2.9677897708681128e-05, "loss": 0.5541, "step": 16985 }, { "epoch": 0.4958412374142711, "grad_norm": 1.0566064012035865, "learning_rate": 2.9667090734542803e-05, "loss": 0.5793, "step": 16990 }, { "epoch": 0.4959871589085072, "grad_norm": 0.8986133297879705, "learning_rate": 2.9656283255756915e-05, "loss": 0.5479, "step": 16995 }, { "epoch": 0.49613308040274334, "grad_norm": 1.1670480632866398, "learning_rate": 2.9645475274840214e-05, "loss": 0.5958, "step": 17000 }, { "epoch": 0.49627900189697943, "grad_norm": 1.0595418026447003, "learning_rate": 2.9634666794309507e-05, "loss": 0.576, "step": 17005 }, { "epoch": 0.4964249233912155, "grad_norm": 1.0343596360481417, "learning_rate": 2.9623857816681773e-05, "loss": 0.6534, "step": 17010 }, { "epoch": 0.4965708448854516, "grad_norm": 1.2118023238726043, "learning_rate": 2.9613048344474087e-05, "loss": 0.5977, "step": 17015 }, { "epoch": 0.4967167663796877, "grad_norm": 0.8946716938813952, "learning_rate": 2.9602238380203623e-05, "loss": 0.5996, "step": 17020 }, { "epoch": 0.49686268787392385, "grad_norm": 1.0965484099482996, "learning_rate": 2.9591427926387693e-05, "loss": 0.5717, "step": 17025 }, { "epoch": 0.49700860936815994, "grad_norm": 1.203404582937764, "learning_rate": 2.95806169855437e-05, "loss": 0.585, "step": 17030 }, { "epoch": 0.497154530862396, "grad_norm": 1.0241335862567653, "learning_rate": 2.956980556018919e-05, "loss": 0.5898, "step": 17035 }, { "epoch": 0.4973004523566321, "grad_norm": 1.212790525642013, "learning_rate": 2.9558993652841795e-05, "loss": 0.6325, "step": 17040 }, { "epoch": 0.4974463738508682, "grad_norm": 1.0672694059007521, "learning_rate": 2.9548181266019277e-05, "loss": 0.586, "step": 17045 }, { "epoch": 0.49759229534510435, "grad_norm": 1.067985076293012, "learning_rate": 2.9537368402239496e-05, "loss": 0.6287, "step": 17050 }, { "epoch": 0.49773821683934044, "grad_norm": 1.1210875052539528, "learning_rate": 2.9526555064020427e-05, "loss": 0.573, "step": 17055 }, { "epoch": 0.49788413833357653, "grad_norm": 1.089874287773011, "learning_rate": 2.9515741253880164e-05, "loss": 0.591, "step": 17060 }, { "epoch": 0.4980300598278126, "grad_norm": 1.0910041107342514, "learning_rate": 2.95049269743369e-05, "loss": 0.5831, "step": 17065 }, { "epoch": 0.4981759813220487, "grad_norm": 0.9203871016363342, "learning_rate": 2.9494112227908955e-05, "loss": 0.5516, "step": 17070 }, { "epoch": 0.49832190281628486, "grad_norm": 0.9816651505927119, "learning_rate": 2.948329701711472e-05, "loss": 0.5643, "step": 17075 }, { "epoch": 0.49846782431052095, "grad_norm": 1.1430532292228752, "learning_rate": 2.9472481344472742e-05, "loss": 0.5994, "step": 17080 }, { "epoch": 0.49861374580475704, "grad_norm": 1.0681291883308852, "learning_rate": 2.9461665212501644e-05, "loss": 0.5766, "step": 17085 }, { "epoch": 0.4987596672989931, "grad_norm": 1.2322660112356114, "learning_rate": 2.9450848623720155e-05, "loss": 0.5961, "step": 17090 }, { "epoch": 0.4989055887932292, "grad_norm": 1.1629068826495832, "learning_rate": 2.944003158064713e-05, "loss": 0.5381, "step": 17095 }, { "epoch": 0.49905151028746536, "grad_norm": 1.1404379842908996, "learning_rate": 2.9429214085801525e-05, "loss": 0.6537, "step": 17100 }, { "epoch": 0.49919743178170145, "grad_norm": 1.0940607875008523, "learning_rate": 2.941839614170238e-05, "loss": 0.5714, "step": 17105 }, { "epoch": 0.49934335327593754, "grad_norm": 1.0299423097868212, "learning_rate": 2.940757775086887e-05, "loss": 0.6254, "step": 17110 }, { "epoch": 0.49948927477017363, "grad_norm": 0.9625733565818848, "learning_rate": 2.939675891582025e-05, "loss": 0.5756, "step": 17115 }, { "epoch": 0.4996351962644097, "grad_norm": 1.1749270933065001, "learning_rate": 2.93859396390759e-05, "loss": 0.544, "step": 17120 }, { "epoch": 0.49978111775864587, "grad_norm": 1.1419196198724284, "learning_rate": 2.937511992315527e-05, "loss": 0.5359, "step": 17125 }, { "epoch": 0.49992703925288196, "grad_norm": 1.0460114285291608, "learning_rate": 2.936429977057795e-05, "loss": 0.5768, "step": 17130 }, { "epoch": 0.500072960747118, "grad_norm": 0.9495569851449527, "learning_rate": 2.9353479183863608e-05, "loss": 0.5854, "step": 17135 }, { "epoch": 0.5002188822413541, "grad_norm": 1.0947634323998576, "learning_rate": 2.934265816553202e-05, "loss": 0.5748, "step": 17140 }, { "epoch": 0.5003648037355902, "grad_norm": 1.0812227117142743, "learning_rate": 2.9331836718103072e-05, "loss": 0.5982, "step": 17145 }, { "epoch": 0.5005107252298263, "grad_norm": 1.1583502319131789, "learning_rate": 2.9321014844096715e-05, "loss": 0.6301, "step": 17150 }, { "epoch": 0.5006566467240624, "grad_norm": 1.0244266547667469, "learning_rate": 2.9310192546033055e-05, "loss": 0.5376, "step": 17155 }, { "epoch": 0.5008025682182986, "grad_norm": 1.0900180064398863, "learning_rate": 2.929936982643225e-05, "loss": 0.5834, "step": 17160 }, { "epoch": 0.5009484897125347, "grad_norm": 1.100390758039939, "learning_rate": 2.9288546687814573e-05, "loss": 0.5617, "step": 17165 }, { "epoch": 0.5010944112067708, "grad_norm": 1.1087549270427306, "learning_rate": 2.9277723132700392e-05, "loss": 0.6625, "step": 17170 }, { "epoch": 0.5012403327010069, "grad_norm": 1.1275671835770074, "learning_rate": 2.9266899163610173e-05, "loss": 0.5694, "step": 17175 }, { "epoch": 0.501386254195243, "grad_norm": 1.301754425842359, "learning_rate": 2.9256074783064492e-05, "loss": 0.616, "step": 17180 }, { "epoch": 0.501532175689479, "grad_norm": 0.9801953455936939, "learning_rate": 2.9245249993583996e-05, "loss": 0.6054, "step": 17185 }, { "epoch": 0.5016780971837151, "grad_norm": 1.3169724917244068, "learning_rate": 2.9234424797689448e-05, "loss": 0.5957, "step": 17190 }, { "epoch": 0.5018240186779512, "grad_norm": 1.0964113878658048, "learning_rate": 2.9223599197901692e-05, "loss": 0.5315, "step": 17195 }, { "epoch": 0.5019699401721873, "grad_norm": 1.116645100721211, "learning_rate": 2.9212773196741673e-05, "loss": 0.6488, "step": 17200 }, { "epoch": 0.5021158616664234, "grad_norm": 1.1340473647607214, "learning_rate": 2.9201946796730417e-05, "loss": 0.5862, "step": 17205 }, { "epoch": 0.5022617831606596, "grad_norm": 0.9973146248286466, "learning_rate": 2.9191120000389066e-05, "loss": 0.543, "step": 17210 }, { "epoch": 0.5024077046548957, "grad_norm": 0.8232630283002779, "learning_rate": 2.9180292810238836e-05, "loss": 0.5329, "step": 17215 }, { "epoch": 0.5025536261491318, "grad_norm": 1.0003801033715258, "learning_rate": 2.916946522880104e-05, "loss": 0.5445, "step": 17220 }, { "epoch": 0.5026995476433679, "grad_norm": 0.8959837413992002, "learning_rate": 2.91586372585971e-05, "loss": 0.5692, "step": 17225 }, { "epoch": 0.502845469137604, "grad_norm": 1.0163054443683688, "learning_rate": 2.9147808902148488e-05, "loss": 0.6168, "step": 17230 }, { "epoch": 0.5029913906318401, "grad_norm": 1.1127307451846904, "learning_rate": 2.9136980161976797e-05, "loss": 0.6375, "step": 17235 }, { "epoch": 0.5031373121260762, "grad_norm": 1.076284732878839, "learning_rate": 2.91261510406037e-05, "loss": 0.5912, "step": 17240 }, { "epoch": 0.5032832336203122, "grad_norm": 1.0139824967738633, "learning_rate": 2.911532154055096e-05, "loss": 0.5759, "step": 17245 }, { "epoch": 0.5034291551145483, "grad_norm": 1.19276931955239, "learning_rate": 2.9104491664340434e-05, "loss": 0.6087, "step": 17250 }, { "epoch": 0.5035750766087844, "grad_norm": 1.0511522848016808, "learning_rate": 2.9093661414494057e-05, "loss": 0.5587, "step": 17255 }, { "epoch": 0.5037209981030206, "grad_norm": 0.9211614362139381, "learning_rate": 2.9082830793533856e-05, "loss": 0.5971, "step": 17260 }, { "epoch": 0.5038669195972567, "grad_norm": 1.0145505004904072, "learning_rate": 2.907199980398194e-05, "loss": 0.5494, "step": 17265 }, { "epoch": 0.5040128410914928, "grad_norm": 0.9686619057838534, "learning_rate": 2.906116844836051e-05, "loss": 0.5692, "step": 17270 }, { "epoch": 0.5041587625857289, "grad_norm": 1.0442973920811878, "learning_rate": 2.9050336729191857e-05, "loss": 0.5579, "step": 17275 }, { "epoch": 0.504304684079965, "grad_norm": 1.1239191601778034, "learning_rate": 2.9039504648998328e-05, "loss": 0.569, "step": 17280 }, { "epoch": 0.5044506055742011, "grad_norm": 1.2276443448948418, "learning_rate": 2.90286722103024e-05, "loss": 0.5255, "step": 17285 }, { "epoch": 0.5045965270684372, "grad_norm": 1.2029144942229784, "learning_rate": 2.9017839415626584e-05, "loss": 0.5834, "step": 17290 }, { "epoch": 0.5047424485626733, "grad_norm": 0.9857402848906581, "learning_rate": 2.9007006267493525e-05, "loss": 0.5827, "step": 17295 }, { "epoch": 0.5048883700569093, "grad_norm": 1.328432638628235, "learning_rate": 2.8996172768425907e-05, "loss": 0.5532, "step": 17300 }, { "epoch": 0.5050342915511454, "grad_norm": 1.0045094529702179, "learning_rate": 2.898533892094651e-05, "loss": 0.5265, "step": 17305 }, { "epoch": 0.5051802130453816, "grad_norm": 0.9651319526000371, "learning_rate": 2.897450472757821e-05, "loss": 0.6008, "step": 17310 }, { "epoch": 0.5053261345396177, "grad_norm": 1.0001540404332667, "learning_rate": 2.896367019084394e-05, "loss": 0.6061, "step": 17315 }, { "epoch": 0.5054720560338538, "grad_norm": 1.0646427289389395, "learning_rate": 2.895283531326674e-05, "loss": 0.5672, "step": 17320 }, { "epoch": 0.5056179775280899, "grad_norm": 0.9596517328676479, "learning_rate": 2.8942000097369688e-05, "loss": 0.6075, "step": 17325 }, { "epoch": 0.505763899022326, "grad_norm": 1.060332198099877, "learning_rate": 2.893116454567599e-05, "loss": 0.5943, "step": 17330 }, { "epoch": 0.5059098205165621, "grad_norm": 1.0262001565236243, "learning_rate": 2.8920328660708896e-05, "loss": 0.5723, "step": 17335 }, { "epoch": 0.5060557420107982, "grad_norm": 1.29144940433142, "learning_rate": 2.8909492444991747e-05, "loss": 0.5843, "step": 17340 }, { "epoch": 0.5062016635050343, "grad_norm": 1.0781561513046394, "learning_rate": 2.8898655901047962e-05, "loss": 0.5963, "step": 17345 }, { "epoch": 0.5063475849992704, "grad_norm": 1.117393026989122, "learning_rate": 2.888781903140102e-05, "loss": 0.5466, "step": 17350 }, { "epoch": 0.5064935064935064, "grad_norm": 1.0736764317057645, "learning_rate": 2.8876981838574503e-05, "loss": 0.6062, "step": 17355 }, { "epoch": 0.5066394279877426, "grad_norm": 1.17194833290902, "learning_rate": 2.8866144325092043e-05, "loss": 0.6087, "step": 17360 }, { "epoch": 0.5067853494819787, "grad_norm": 0.9703464444738195, "learning_rate": 2.8855306493477358e-05, "loss": 0.6191, "step": 17365 }, { "epoch": 0.5069312709762148, "grad_norm": 0.9410884438624202, "learning_rate": 2.8844468346254255e-05, "loss": 0.6091, "step": 17370 }, { "epoch": 0.5070771924704509, "grad_norm": 1.2207950256953726, "learning_rate": 2.8833629885946577e-05, "loss": 0.6219, "step": 17375 }, { "epoch": 0.507223113964687, "grad_norm": 0.8974910594482379, "learning_rate": 2.882279111507828e-05, "loss": 0.5796, "step": 17380 }, { "epoch": 0.5073690354589231, "grad_norm": 1.1175635016786833, "learning_rate": 2.8811952036173355e-05, "loss": 0.6583, "step": 17385 }, { "epoch": 0.5075149569531592, "grad_norm": 0.9872565826231327, "learning_rate": 2.8801112651755905e-05, "loss": 0.5826, "step": 17390 }, { "epoch": 0.5076608784473953, "grad_norm": 1.1222368721191303, "learning_rate": 2.8790272964350068e-05, "loss": 0.599, "step": 17395 }, { "epoch": 0.5078067999416314, "grad_norm": 0.9814023266013103, "learning_rate": 2.8779432976480068e-05, "loss": 0.5664, "step": 17400 }, { "epoch": 0.5079527214358674, "grad_norm": 1.0228487372019415, "learning_rate": 2.876859269067022e-05, "loss": 0.546, "step": 17405 }, { "epoch": 0.5080986429301036, "grad_norm": 1.092224997464901, "learning_rate": 2.8757752109444863e-05, "loss": 0.6167, "step": 17410 }, { "epoch": 0.5082445644243397, "grad_norm": 1.1356116876873803, "learning_rate": 2.8746911235328434e-05, "loss": 0.5678, "step": 17415 }, { "epoch": 0.5083904859185758, "grad_norm": 1.037995364495429, "learning_rate": 2.8736070070845438e-05, "loss": 0.6021, "step": 17420 }, { "epoch": 0.5085364074128119, "grad_norm": 1.1460497432059433, "learning_rate": 2.872522861852044e-05, "loss": 0.6667, "step": 17425 }, { "epoch": 0.508682328907048, "grad_norm": 1.0397089702505213, "learning_rate": 2.871438688087807e-05, "loss": 0.5932, "step": 17430 }, { "epoch": 0.5088282504012841, "grad_norm": 0.945489975867667, "learning_rate": 2.8703544860443038e-05, "loss": 0.616, "step": 17435 }, { "epoch": 0.5089741718955202, "grad_norm": 0.9287097347610275, "learning_rate": 2.869270255974011e-05, "loss": 0.5479, "step": 17440 }, { "epoch": 0.5091200933897563, "grad_norm": 1.0789326227101652, "learning_rate": 2.8681859981294112e-05, "loss": 0.5944, "step": 17445 }, { "epoch": 0.5092660148839924, "grad_norm": 0.9248640116604638, "learning_rate": 2.8671017127629946e-05, "loss": 0.5989, "step": 17450 }, { "epoch": 0.5094119363782285, "grad_norm": 0.9435524458654166, "learning_rate": 2.866017400127256e-05, "loss": 0.5973, "step": 17455 }, { "epoch": 0.5095578578724647, "grad_norm": 0.8594193716207245, "learning_rate": 2.8649330604746998e-05, "loss": 0.5361, "step": 17460 }, { "epoch": 0.5097037793667007, "grad_norm": 1.1668699217409337, "learning_rate": 2.863848694057834e-05, "loss": 0.5746, "step": 17465 }, { "epoch": 0.5098497008609368, "grad_norm": 1.0228725378398382, "learning_rate": 2.8627643011291733e-05, "loss": 0.5623, "step": 17470 }, { "epoch": 0.5099956223551729, "grad_norm": 1.230049338564838, "learning_rate": 2.8616798819412393e-05, "loss": 0.6605, "step": 17475 }, { "epoch": 0.510141543849409, "grad_norm": 0.9505145364168149, "learning_rate": 2.8605954367465587e-05, "loss": 0.5223, "step": 17480 }, { "epoch": 0.5102874653436451, "grad_norm": 1.0710982882944622, "learning_rate": 2.8595109657976655e-05, "loss": 0.5854, "step": 17485 }, { "epoch": 0.5104333868378812, "grad_norm": 1.2372012507606343, "learning_rate": 2.858426469347098e-05, "loss": 0.6904, "step": 17490 }, { "epoch": 0.5105793083321173, "grad_norm": 0.9376638153286584, "learning_rate": 2.8573419476474016e-05, "loss": 0.5832, "step": 17495 }, { "epoch": 0.5107252298263534, "grad_norm": 1.0187019226979341, "learning_rate": 2.8562574009511294e-05, "loss": 0.5794, "step": 17500 }, { "epoch": 0.5108711513205896, "grad_norm": 1.1340581306982422, "learning_rate": 2.8551728295108355e-05, "loss": 0.581, "step": 17505 }, { "epoch": 0.5110170728148257, "grad_norm": 1.086329309984347, "learning_rate": 2.8540882335790847e-05, "loss": 0.5763, "step": 17510 }, { "epoch": 0.5111629943090618, "grad_norm": 1.0538057847745117, "learning_rate": 2.8530036134084443e-05, "loss": 0.5662, "step": 17515 }, { "epoch": 0.5113089158032978, "grad_norm": 1.1087439246944333, "learning_rate": 2.8519189692514893e-05, "loss": 0.5533, "step": 17520 }, { "epoch": 0.5114548372975339, "grad_norm": 1.034907845803554, "learning_rate": 2.850834301360798e-05, "loss": 0.5684, "step": 17525 }, { "epoch": 0.51160075879177, "grad_norm": 1.1065832531853204, "learning_rate": 2.8497496099889564e-05, "loss": 0.5744, "step": 17530 }, { "epoch": 0.5117466802860061, "grad_norm": 1.1935555148762191, "learning_rate": 2.8486648953885558e-05, "loss": 0.6193, "step": 17535 }, { "epoch": 0.5118926017802422, "grad_norm": 1.2160283155081548, "learning_rate": 2.8475801578121906e-05, "loss": 0.5721, "step": 17540 }, { "epoch": 0.5120385232744783, "grad_norm": 1.0087517141440323, "learning_rate": 2.8464953975124635e-05, "loss": 0.5693, "step": 17545 }, { "epoch": 0.5121844447687144, "grad_norm": 1.046991254894614, "learning_rate": 2.845410614741982e-05, "loss": 0.5483, "step": 17550 }, { "epoch": 0.5123303662629506, "grad_norm": 1.2566785090116803, "learning_rate": 2.8443258097533555e-05, "loss": 0.565, "step": 17555 }, { "epoch": 0.5124762877571867, "grad_norm": 1.030136651257968, "learning_rate": 2.843240982799203e-05, "loss": 0.5895, "step": 17560 }, { "epoch": 0.5126222092514228, "grad_norm": 1.0161408067941047, "learning_rate": 2.8421561341321463e-05, "loss": 0.6342, "step": 17565 }, { "epoch": 0.5127681307456589, "grad_norm": 1.0011792332447376, "learning_rate": 2.8410712640048126e-05, "loss": 0.557, "step": 17570 }, { "epoch": 0.512914052239895, "grad_norm": 1.2349378635849713, "learning_rate": 2.8399863726698335e-05, "loss": 0.5977, "step": 17575 }, { "epoch": 0.513059973734131, "grad_norm": 1.0698154914571276, "learning_rate": 2.8389014603798475e-05, "loss": 0.5276, "step": 17580 }, { "epoch": 0.5132058952283671, "grad_norm": 1.0035111125310359, "learning_rate": 2.8378165273874957e-05, "loss": 0.5122, "step": 17585 }, { "epoch": 0.5133518167226032, "grad_norm": 1.0462222071126355, "learning_rate": 2.836731573945426e-05, "loss": 0.5474, "step": 17590 }, { "epoch": 0.5134977382168393, "grad_norm": 0.9428497527551408, "learning_rate": 2.8356466003062894e-05, "loss": 0.5983, "step": 17595 }, { "epoch": 0.5136436597110754, "grad_norm": 0.9175667941487953, "learning_rate": 2.8345616067227415e-05, "loss": 0.5236, "step": 17600 }, { "epoch": 0.5137895812053116, "grad_norm": 1.0710161454231686, "learning_rate": 2.8334765934474448e-05, "loss": 0.6276, "step": 17605 }, { "epoch": 0.5139355026995477, "grad_norm": 1.1499209811244444, "learning_rate": 2.8323915607330635e-05, "loss": 0.6508, "step": 17610 }, { "epoch": 0.5140814241937838, "grad_norm": 1.3424452013333985, "learning_rate": 2.8313065088322693e-05, "loss": 0.6246, "step": 17615 }, { "epoch": 0.5142273456880199, "grad_norm": 1.1100830168771811, "learning_rate": 2.8302214379977354e-05, "loss": 0.5859, "step": 17620 }, { "epoch": 0.514373267182256, "grad_norm": 0.9934520833845969, "learning_rate": 2.8291363484821422e-05, "loss": 0.5781, "step": 17625 }, { "epoch": 0.514519188676492, "grad_norm": 0.9340852176261393, "learning_rate": 2.828051240538172e-05, "loss": 0.57, "step": 17630 }, { "epoch": 0.5146651101707281, "grad_norm": 1.13110737250981, "learning_rate": 2.826966114418512e-05, "loss": 0.5069, "step": 17635 }, { "epoch": 0.5148110316649642, "grad_norm": 1.1937072726531144, "learning_rate": 2.8258809703758553e-05, "loss": 0.5909, "step": 17640 }, { "epoch": 0.5149569531592003, "grad_norm": 1.1477588448061466, "learning_rate": 2.8247958086628973e-05, "loss": 0.6076, "step": 17645 }, { "epoch": 0.5151028746534364, "grad_norm": 0.9959273415041177, "learning_rate": 2.8237106295323384e-05, "loss": 0.5674, "step": 17650 }, { "epoch": 0.5152487961476726, "grad_norm": 1.0391448175149716, "learning_rate": 2.8226254332368822e-05, "loss": 0.5276, "step": 17655 }, { "epoch": 0.5153947176419087, "grad_norm": 1.1358969477768783, "learning_rate": 2.821540220029238e-05, "loss": 0.6381, "step": 17660 }, { "epoch": 0.5155406391361448, "grad_norm": 1.0831653988255865, "learning_rate": 2.8204549901621173e-05, "loss": 0.6323, "step": 17665 }, { "epoch": 0.5156865606303809, "grad_norm": 0.9107220459306797, "learning_rate": 2.8193697438882365e-05, "loss": 0.6691, "step": 17670 }, { "epoch": 0.515832482124617, "grad_norm": 1.0772234751138587, "learning_rate": 2.8182844814603153e-05, "loss": 0.6064, "step": 17675 }, { "epoch": 0.515978403618853, "grad_norm": 1.1700468855762125, "learning_rate": 2.817199203131076e-05, "loss": 0.5742, "step": 17680 }, { "epoch": 0.5161243251130891, "grad_norm": 1.0484436757591806, "learning_rate": 2.816113909153249e-05, "loss": 0.5272, "step": 17685 }, { "epoch": 0.5162702466073252, "grad_norm": 1.2667027710767873, "learning_rate": 2.8150285997795623e-05, "loss": 0.6175, "step": 17690 }, { "epoch": 0.5164161681015613, "grad_norm": 1.0548303782894322, "learning_rate": 2.8139432752627525e-05, "loss": 0.556, "step": 17695 }, { "epoch": 0.5165620895957974, "grad_norm": 1.1023287926030914, "learning_rate": 2.812857935855556e-05, "loss": 0.6358, "step": 17700 }, { "epoch": 0.5167080110900336, "grad_norm": 1.0250801973355461, "learning_rate": 2.8117725818107153e-05, "loss": 0.5528, "step": 17705 }, { "epoch": 0.5168539325842697, "grad_norm": 1.043629832466394, "learning_rate": 2.810687213380976e-05, "loss": 0.5889, "step": 17710 }, { "epoch": 0.5169998540785058, "grad_norm": 1.1944446589197606, "learning_rate": 2.809601830819084e-05, "loss": 0.6054, "step": 17715 }, { "epoch": 0.5171457755727419, "grad_norm": 1.2055329412248967, "learning_rate": 2.8085164343777935e-05, "loss": 0.6046, "step": 17720 }, { "epoch": 0.517291697066978, "grad_norm": 1.3661658444564035, "learning_rate": 2.8074310243098585e-05, "loss": 0.6454, "step": 17725 }, { "epoch": 0.5174376185612141, "grad_norm": 0.9163626163591251, "learning_rate": 2.8063456008680366e-05, "loss": 0.5323, "step": 17730 }, { "epoch": 0.5175835400554502, "grad_norm": 1.1353270728921936, "learning_rate": 2.8052601643050896e-05, "loss": 0.6316, "step": 17735 }, { "epoch": 0.5177294615496862, "grad_norm": 1.0294023526885498, "learning_rate": 2.8041747148737808e-05, "loss": 0.564, "step": 17740 }, { "epoch": 0.5178753830439223, "grad_norm": 1.1887120183300548, "learning_rate": 2.8030892528268782e-05, "loss": 0.6252, "step": 17745 }, { "epoch": 0.5180213045381584, "grad_norm": 1.00884804778949, "learning_rate": 2.802003778417151e-05, "loss": 0.5356, "step": 17750 }, { "epoch": 0.5181672260323946, "grad_norm": 0.9203056843011136, "learning_rate": 2.800918291897372e-05, "loss": 0.5749, "step": 17755 }, { "epoch": 0.5183131475266307, "grad_norm": 1.0578741002197878, "learning_rate": 2.7998327935203195e-05, "loss": 0.5998, "step": 17760 }, { "epoch": 0.5184590690208668, "grad_norm": 0.9952829087159073, "learning_rate": 2.7987472835387697e-05, "loss": 0.6376, "step": 17765 }, { "epoch": 0.5186049905151029, "grad_norm": 1.057399174960197, "learning_rate": 2.797661762205504e-05, "loss": 0.5895, "step": 17770 }, { "epoch": 0.518750912009339, "grad_norm": 1.1917805036341311, "learning_rate": 2.7965762297733072e-05, "loss": 0.6051, "step": 17775 }, { "epoch": 0.5188968335035751, "grad_norm": 1.0186513632770766, "learning_rate": 2.7954906864949658e-05, "loss": 0.5908, "step": 17780 }, { "epoch": 0.5190427549978112, "grad_norm": 1.234953846598363, "learning_rate": 2.7944051326232678e-05, "loss": 0.6182, "step": 17785 }, { "epoch": 0.5191886764920473, "grad_norm": 0.9683449140657164, "learning_rate": 2.7933195684110048e-05, "loss": 0.5714, "step": 17790 }, { "epoch": 0.5193345979862833, "grad_norm": 0.9991210469099925, "learning_rate": 2.792233994110972e-05, "loss": 0.5679, "step": 17795 }, { "epoch": 0.5194805194805194, "grad_norm": 1.0102242166801076, "learning_rate": 2.7911484099759642e-05, "loss": 0.6158, "step": 17800 }, { "epoch": 0.5196264409747556, "grad_norm": 1.1314710282394644, "learning_rate": 2.7900628162587824e-05, "loss": 0.6181, "step": 17805 }, { "epoch": 0.5197723624689917, "grad_norm": 0.9824366087518185, "learning_rate": 2.7889772132122244e-05, "loss": 0.6119, "step": 17810 }, { "epoch": 0.5199182839632278, "grad_norm": 1.2106457589656532, "learning_rate": 2.787891601089094e-05, "loss": 0.5836, "step": 17815 }, { "epoch": 0.5200642054574639, "grad_norm": 0.8705621690175652, "learning_rate": 2.786805980142197e-05, "loss": 0.5748, "step": 17820 }, { "epoch": 0.5202101269517, "grad_norm": 0.8682835390098648, "learning_rate": 2.7857203506243396e-05, "loss": 0.5666, "step": 17825 }, { "epoch": 0.5203560484459361, "grad_norm": 0.9142846059466999, "learning_rate": 2.784634712788332e-05, "loss": 0.548, "step": 17830 }, { "epoch": 0.5205019699401722, "grad_norm": 0.8955753944313541, "learning_rate": 2.7835490668869847e-05, "loss": 0.5036, "step": 17835 }, { "epoch": 0.5206478914344083, "grad_norm": 0.9681870383507004, "learning_rate": 2.7824634131731104e-05, "loss": 0.5781, "step": 17840 }, { "epoch": 0.5207938129286444, "grad_norm": 1.0304916224598168, "learning_rate": 2.7813777518995242e-05, "loss": 0.5827, "step": 17845 }, { "epoch": 0.5209397344228804, "grad_norm": 0.9067291131415092, "learning_rate": 2.7802920833190415e-05, "loss": 0.5749, "step": 17850 }, { "epoch": 0.5210856559171166, "grad_norm": 1.246931610098259, "learning_rate": 2.7792064076844827e-05, "loss": 0.556, "step": 17855 }, { "epoch": 0.5212315774113527, "grad_norm": 1.1713973596950293, "learning_rate": 2.7781207252486657e-05, "loss": 0.5852, "step": 17860 }, { "epoch": 0.5213774989055888, "grad_norm": 1.1311884209914094, "learning_rate": 2.7770350362644133e-05, "loss": 0.5603, "step": 17865 }, { "epoch": 0.5215234203998249, "grad_norm": 1.1811047801665386, "learning_rate": 2.7759493409845465e-05, "loss": 0.5867, "step": 17870 }, { "epoch": 0.521669341894061, "grad_norm": 0.9313827278867771, "learning_rate": 2.774863639661892e-05, "loss": 0.5281, "step": 17875 }, { "epoch": 0.5218152633882971, "grad_norm": 1.0879431842096456, "learning_rate": 2.7737779325492745e-05, "loss": 0.6127, "step": 17880 }, { "epoch": 0.5219611848825332, "grad_norm": 0.9248057638527426, "learning_rate": 2.7726922198995213e-05, "loss": 0.5508, "step": 17885 }, { "epoch": 0.5221071063767693, "grad_norm": 1.181503588169849, "learning_rate": 2.7716065019654608e-05, "loss": 0.5795, "step": 17890 }, { "epoch": 0.5222530278710054, "grad_norm": 1.1629537044355318, "learning_rate": 2.770520778999922e-05, "loss": 0.5591, "step": 17895 }, { "epoch": 0.5223989493652414, "grad_norm": 1.006313492054334, "learning_rate": 2.7694350512557377e-05, "loss": 0.5426, "step": 17900 }, { "epoch": 0.5225448708594777, "grad_norm": 1.0427865429987986, "learning_rate": 2.7683493189857384e-05, "loss": 0.6002, "step": 17905 }, { "epoch": 0.5226907923537137, "grad_norm": 0.9388058197858173, "learning_rate": 2.767263582442757e-05, "loss": 0.5468, "step": 17910 }, { "epoch": 0.5228367138479498, "grad_norm": 0.9567632746630504, "learning_rate": 2.766177841879629e-05, "loss": 0.5749, "step": 17915 }, { "epoch": 0.5229826353421859, "grad_norm": 0.9371542371492836, "learning_rate": 2.765092097549187e-05, "loss": 0.5761, "step": 17920 }, { "epoch": 0.523128556836422, "grad_norm": 0.9180796385506896, "learning_rate": 2.76400634970427e-05, "loss": 0.5573, "step": 17925 }, { "epoch": 0.5232744783306581, "grad_norm": 0.99396388799466, "learning_rate": 2.7629205985977115e-05, "loss": 0.5328, "step": 17930 }, { "epoch": 0.5234203998248942, "grad_norm": 1.0635640291886472, "learning_rate": 2.761834844482351e-05, "loss": 0.5835, "step": 17935 }, { "epoch": 0.5235663213191303, "grad_norm": 1.010000245062592, "learning_rate": 2.7607490876110252e-05, "loss": 0.567, "step": 17940 }, { "epoch": 0.5237122428133664, "grad_norm": 0.9038317069670091, "learning_rate": 2.759663328236574e-05, "loss": 0.514, "step": 17945 }, { "epoch": 0.5238581643076025, "grad_norm": 1.0290717981877628, "learning_rate": 2.7585775666118363e-05, "loss": 0.5362, "step": 17950 }, { "epoch": 0.5240040858018387, "grad_norm": 1.0183765847627329, "learning_rate": 2.7574918029896518e-05, "loss": 0.6522, "step": 17955 }, { "epoch": 0.5241500072960747, "grad_norm": 0.870909947389803, "learning_rate": 2.7564060376228616e-05, "loss": 0.5514, "step": 17960 }, { "epoch": 0.5242959287903108, "grad_norm": 1.0534903074153465, "learning_rate": 2.7553202707643055e-05, "loss": 0.6032, "step": 17965 }, { "epoch": 0.5244418502845469, "grad_norm": 1.349471714667913, "learning_rate": 2.7542345026668255e-05, "loss": 0.5419, "step": 17970 }, { "epoch": 0.524587771778783, "grad_norm": 1.1253828129285237, "learning_rate": 2.7531487335832612e-05, "loss": 0.5845, "step": 17975 }, { "epoch": 0.5247336932730191, "grad_norm": 0.9818731466670688, "learning_rate": 2.7520629637664568e-05, "loss": 0.5717, "step": 17980 }, { "epoch": 0.5248796147672552, "grad_norm": 1.1678802119041052, "learning_rate": 2.750977193469253e-05, "loss": 0.5859, "step": 17985 }, { "epoch": 0.5250255362614913, "grad_norm": 0.8589280890552412, "learning_rate": 2.7498914229444905e-05, "loss": 0.5325, "step": 17990 }, { "epoch": 0.5251714577557274, "grad_norm": 1.1247062103077785, "learning_rate": 2.7488056524450124e-05, "loss": 0.5986, "step": 17995 }, { "epoch": 0.5253173792499635, "grad_norm": 1.1271651355640557, "learning_rate": 2.74771988222366e-05, "loss": 0.5751, "step": 18000 }, { "epoch": 0.5254633007441997, "grad_norm": 1.0204284137484032, "learning_rate": 2.7466341125332755e-05, "loss": 0.542, "step": 18005 }, { "epoch": 0.5256092222384358, "grad_norm": 1.053005753691077, "learning_rate": 2.745548343626701e-05, "loss": 0.5795, "step": 18010 }, { "epoch": 0.5257551437326718, "grad_norm": 1.105690091608904, "learning_rate": 2.744462575756779e-05, "loss": 0.6801, "step": 18015 }, { "epoch": 0.5259010652269079, "grad_norm": 1.178388275086113, "learning_rate": 2.743376809176349e-05, "loss": 0.6182, "step": 18020 }, { "epoch": 0.526046986721144, "grad_norm": 0.9457427140225922, "learning_rate": 2.742291044138252e-05, "loss": 0.558, "step": 18025 }, { "epoch": 0.5261929082153801, "grad_norm": 0.9708408525287296, "learning_rate": 2.7412052808953297e-05, "loss": 0.5676, "step": 18030 }, { "epoch": 0.5263388297096162, "grad_norm": 1.0559828569230834, "learning_rate": 2.740119519700423e-05, "loss": 0.5693, "step": 18035 }, { "epoch": 0.5264847512038523, "grad_norm": 0.9354915664613574, "learning_rate": 2.7390337608063698e-05, "loss": 0.5494, "step": 18040 }, { "epoch": 0.5266306726980884, "grad_norm": 1.061313471885252, "learning_rate": 2.7379480044660093e-05, "loss": 0.6198, "step": 18045 }, { "epoch": 0.5267765941923245, "grad_norm": 1.1000362431286346, "learning_rate": 2.7368622509321824e-05, "loss": 0.5672, "step": 18050 }, { "epoch": 0.5269225156865607, "grad_norm": 0.9563107888921746, "learning_rate": 2.7357765004577253e-05, "loss": 0.5479, "step": 18055 }, { "epoch": 0.5270684371807968, "grad_norm": 1.009484929738099, "learning_rate": 2.7346907532954757e-05, "loss": 0.4874, "step": 18060 }, { "epoch": 0.5272143586750329, "grad_norm": 1.1151126734438044, "learning_rate": 2.7336050096982708e-05, "loss": 0.5965, "step": 18065 }, { "epoch": 0.527360280169269, "grad_norm": 1.118748882626728, "learning_rate": 2.7325192699189444e-05, "loss": 0.5801, "step": 18070 }, { "epoch": 0.527506201663505, "grad_norm": 1.0036223727339522, "learning_rate": 2.7314335342103332e-05, "loss": 0.5876, "step": 18075 }, { "epoch": 0.5276521231577411, "grad_norm": 1.3129092811257328, "learning_rate": 2.7303478028252693e-05, "loss": 0.6509, "step": 18080 }, { "epoch": 0.5277980446519772, "grad_norm": 1.14082140899177, "learning_rate": 2.729262076016588e-05, "loss": 0.6252, "step": 18085 }, { "epoch": 0.5279439661462133, "grad_norm": 0.9213499227605806, "learning_rate": 2.728176354037119e-05, "loss": 0.5726, "step": 18090 }, { "epoch": 0.5280898876404494, "grad_norm": 0.8878894157992827, "learning_rate": 2.7270906371396944e-05, "loss": 0.5358, "step": 18095 }, { "epoch": 0.5282358091346855, "grad_norm": 1.1537371144424056, "learning_rate": 2.7260049255771414e-05, "loss": 0.5642, "step": 18100 }, { "epoch": 0.5283817306289217, "grad_norm": 1.1289761175928628, "learning_rate": 2.72491921960229e-05, "loss": 0.6012, "step": 18105 }, { "epoch": 0.5285276521231578, "grad_norm": 0.9920248849199294, "learning_rate": 2.7238335194679676e-05, "loss": 0.4891, "step": 18110 }, { "epoch": 0.5286735736173939, "grad_norm": 0.9307938024749491, "learning_rate": 2.7227478254269976e-05, "loss": 0.553, "step": 18115 }, { "epoch": 0.52881949511163, "grad_norm": 1.0576176657405045, "learning_rate": 2.7216621377322062e-05, "loss": 0.6572, "step": 18120 }, { "epoch": 0.528965416605866, "grad_norm": 1.0177275315593213, "learning_rate": 2.720576456636415e-05, "loss": 0.5535, "step": 18125 }, { "epoch": 0.5291113381001021, "grad_norm": 0.8761674993888076, "learning_rate": 2.7194907823924464e-05, "loss": 0.5118, "step": 18130 }, { "epoch": 0.5292572595943382, "grad_norm": 1.2133053221102292, "learning_rate": 2.718405115253118e-05, "loss": 0.5895, "step": 18135 }, { "epoch": 0.5294031810885743, "grad_norm": 1.055325401261371, "learning_rate": 2.7173194554712485e-05, "loss": 0.5534, "step": 18140 }, { "epoch": 0.5295491025828104, "grad_norm": 1.0648408707662704, "learning_rate": 2.7162338032996544e-05, "loss": 0.5392, "step": 18145 }, { "epoch": 0.5296950240770465, "grad_norm": 1.1425983438174918, "learning_rate": 2.7151481589911492e-05, "loss": 0.5726, "step": 18150 }, { "epoch": 0.5298409455712827, "grad_norm": 0.9676007574513101, "learning_rate": 2.7140625227985478e-05, "loss": 0.6483, "step": 18155 }, { "epoch": 0.5299868670655188, "grad_norm": 1.073930229541028, "learning_rate": 2.712976894974658e-05, "loss": 0.5833, "step": 18160 }, { "epoch": 0.5301327885597549, "grad_norm": 1.043407332235955, "learning_rate": 2.7118912757722896e-05, "loss": 0.6067, "step": 18165 }, { "epoch": 0.530278710053991, "grad_norm": 1.163245273461599, "learning_rate": 2.7108056654442492e-05, "loss": 0.6643, "step": 18170 }, { "epoch": 0.530424631548227, "grad_norm": 1.0601916215571352, "learning_rate": 2.7097200642433418e-05, "loss": 0.5444, "step": 18175 }, { "epoch": 0.5305705530424631, "grad_norm": 0.9801836412830268, "learning_rate": 2.7086344724223713e-05, "loss": 0.5907, "step": 18180 }, { "epoch": 0.5307164745366992, "grad_norm": 0.9459059497626972, "learning_rate": 2.7075488902341345e-05, "loss": 0.5692, "step": 18185 }, { "epoch": 0.5308623960309353, "grad_norm": 1.0340348833628825, "learning_rate": 2.706463317931432e-05, "loss": 0.6456, "step": 18190 }, { "epoch": 0.5310083175251714, "grad_norm": 0.9488384241803544, "learning_rate": 2.705377755767059e-05, "loss": 0.5355, "step": 18195 }, { "epoch": 0.5311542390194075, "grad_norm": 1.2440057463960081, "learning_rate": 2.704292203993809e-05, "loss": 0.5744, "step": 18200 }, { "epoch": 0.5313001605136437, "grad_norm": 1.1103911197502438, "learning_rate": 2.7032066628644738e-05, "loss": 0.5997, "step": 18205 }, { "epoch": 0.5314460820078798, "grad_norm": 0.9576074578486755, "learning_rate": 2.70212113263184e-05, "loss": 0.5129, "step": 18210 }, { "epoch": 0.5315920035021159, "grad_norm": 0.9557281373829349, "learning_rate": 2.7010356135486953e-05, "loss": 0.5518, "step": 18215 }, { "epoch": 0.531737924996352, "grad_norm": 1.0173171172414277, "learning_rate": 2.6999501058678218e-05, "loss": 0.5827, "step": 18220 }, { "epoch": 0.5318838464905881, "grad_norm": 1.0428664299761135, "learning_rate": 2.6988646098420018e-05, "loss": 0.5446, "step": 18225 }, { "epoch": 0.5320297679848242, "grad_norm": 0.9773760471360108, "learning_rate": 2.6977791257240115e-05, "loss": 0.5333, "step": 18230 }, { "epoch": 0.5321756894790602, "grad_norm": 0.9527201148385231, "learning_rate": 2.6966936537666282e-05, "loss": 0.5868, "step": 18235 }, { "epoch": 0.5323216109732963, "grad_norm": 0.921331536218337, "learning_rate": 2.6956081942226226e-05, "loss": 0.5974, "step": 18240 }, { "epoch": 0.5324675324675324, "grad_norm": 1.2147578476322956, "learning_rate": 2.6945227473447644e-05, "loss": 0.5934, "step": 18245 }, { "epoch": 0.5326134539617686, "grad_norm": 1.0689657581666354, "learning_rate": 2.6934373133858216e-05, "loss": 0.5346, "step": 18250 }, { "epoch": 0.5327593754560047, "grad_norm": 1.0122966731967253, "learning_rate": 2.6923518925985562e-05, "loss": 0.6253, "step": 18255 }, { "epoch": 0.5329052969502408, "grad_norm": 1.3474157785010537, "learning_rate": 2.691266485235729e-05, "loss": 0.5916, "step": 18260 }, { "epoch": 0.5330512184444769, "grad_norm": 0.8534697236050557, "learning_rate": 2.690181091550098e-05, "loss": 0.5058, "step": 18265 }, { "epoch": 0.533197139938713, "grad_norm": 0.9023092235402138, "learning_rate": 2.689095711794417e-05, "loss": 0.552, "step": 18270 }, { "epoch": 0.5333430614329491, "grad_norm": 1.104183687348476, "learning_rate": 2.6880103462214372e-05, "loss": 0.6473, "step": 18275 }, { "epoch": 0.5334889829271852, "grad_norm": 1.095553481408904, "learning_rate": 2.6869249950839065e-05, "loss": 0.5768, "step": 18280 }, { "epoch": 0.5336349044214213, "grad_norm": 1.1393413773932635, "learning_rate": 2.685839658634568e-05, "loss": 0.55, "step": 18285 }, { "epoch": 0.5337808259156573, "grad_norm": 0.9494067181565052, "learning_rate": 2.684754337126163e-05, "loss": 0.6175, "step": 18290 }, { "epoch": 0.5339267474098934, "grad_norm": 1.1490515663327459, "learning_rate": 2.6836690308114305e-05, "loss": 0.5981, "step": 18295 }, { "epoch": 0.5340726689041296, "grad_norm": 1.1841713961538063, "learning_rate": 2.682583739943102e-05, "loss": 0.6036, "step": 18300 }, { "epoch": 0.5342185903983657, "grad_norm": 1.0900590191484214, "learning_rate": 2.6814984647739084e-05, "loss": 0.5771, "step": 18305 }, { "epoch": 0.5343645118926018, "grad_norm": 0.9662420682836969, "learning_rate": 2.680413205556578e-05, "loss": 0.5793, "step": 18310 }, { "epoch": 0.5345104333868379, "grad_norm": 0.972625122035825, "learning_rate": 2.6793279625438312e-05, "loss": 0.6047, "step": 18315 }, { "epoch": 0.534656354881074, "grad_norm": 1.0018566165416312, "learning_rate": 2.678242735988389e-05, "loss": 0.6372, "step": 18320 }, { "epoch": 0.5348022763753101, "grad_norm": 1.0622554693048338, "learning_rate": 2.6771575261429653e-05, "loss": 0.6144, "step": 18325 }, { "epoch": 0.5349481978695462, "grad_norm": 1.012464937244321, "learning_rate": 2.6760723332602722e-05, "loss": 0.6442, "step": 18330 }, { "epoch": 0.5350941193637823, "grad_norm": 0.9954906944075244, "learning_rate": 2.674987157593017e-05, "loss": 0.5788, "step": 18335 }, { "epoch": 0.5352400408580184, "grad_norm": 0.8556905022391582, "learning_rate": 2.6739019993939036e-05, "loss": 0.5805, "step": 18340 }, { "epoch": 0.5353859623522544, "grad_norm": 1.0922630885641271, "learning_rate": 2.672816858915631e-05, "loss": 0.5266, "step": 18345 }, { "epoch": 0.5355318838464906, "grad_norm": 1.0387405695029193, "learning_rate": 2.671731736410894e-05, "loss": 0.5533, "step": 18350 }, { "epoch": 0.5356778053407267, "grad_norm": 1.1310656840115603, "learning_rate": 2.6706466321323835e-05, "loss": 0.6014, "step": 18355 }, { "epoch": 0.5358237268349628, "grad_norm": 0.9737755114287527, "learning_rate": 2.6695615463327873e-05, "loss": 0.5795, "step": 18360 }, { "epoch": 0.5359696483291989, "grad_norm": 1.0501839104982529, "learning_rate": 2.668476479264787e-05, "loss": 0.5396, "step": 18365 }, { "epoch": 0.536115569823435, "grad_norm": 1.027747273145144, "learning_rate": 2.6673914311810623e-05, "loss": 0.5666, "step": 18370 }, { "epoch": 0.5362614913176711, "grad_norm": 1.1081186098697693, "learning_rate": 2.6663064023342843e-05, "loss": 0.5575, "step": 18375 }, { "epoch": 0.5364074128119072, "grad_norm": 0.9431075609984424, "learning_rate": 2.665221392977125e-05, "loss": 0.5511, "step": 18380 }, { "epoch": 0.5365533343061433, "grad_norm": 1.117275856317832, "learning_rate": 2.664136403362246e-05, "loss": 0.587, "step": 18385 }, { "epoch": 0.5366992558003794, "grad_norm": 1.0224434254753776, "learning_rate": 2.6630514337423113e-05, "loss": 0.5647, "step": 18390 }, { "epoch": 0.5368451772946154, "grad_norm": 1.1926220516333217, "learning_rate": 2.661966484369972e-05, "loss": 0.5802, "step": 18395 }, { "epoch": 0.5369910987888517, "grad_norm": 1.1821218962088669, "learning_rate": 2.6608815554978816e-05, "loss": 0.5684, "step": 18400 }, { "epoch": 0.5371370202830877, "grad_norm": 0.9535477946098685, "learning_rate": 2.6597966473786855e-05, "loss": 0.5426, "step": 18405 }, { "epoch": 0.5372829417773238, "grad_norm": 1.0052778968371063, "learning_rate": 2.658711760265026e-05, "loss": 0.5204, "step": 18410 }, { "epoch": 0.5374288632715599, "grad_norm": 0.9255092815843405, "learning_rate": 2.657626894409537e-05, "loss": 0.5866, "step": 18415 }, { "epoch": 0.537574784765796, "grad_norm": 1.0578974978663298, "learning_rate": 2.6565420500648513e-05, "loss": 0.5784, "step": 18420 }, { "epoch": 0.5377207062600321, "grad_norm": 1.0520200182587254, "learning_rate": 2.6554572274835942e-05, "loss": 0.6163, "step": 18425 }, { "epoch": 0.5378666277542682, "grad_norm": 1.041134492589403, "learning_rate": 2.6543724269183873e-05, "loss": 0.6265, "step": 18430 }, { "epoch": 0.5380125492485043, "grad_norm": 0.988964346619895, "learning_rate": 2.6532876486218473e-05, "loss": 0.5645, "step": 18435 }, { "epoch": 0.5381584707427404, "grad_norm": 0.9453036825333526, "learning_rate": 2.652202892846585e-05, "loss": 0.5687, "step": 18440 }, { "epoch": 0.5383043922369765, "grad_norm": 1.0533407036876061, "learning_rate": 2.651118159845205e-05, "loss": 0.6116, "step": 18445 }, { "epoch": 0.5384503137312127, "grad_norm": 1.1041314700767115, "learning_rate": 2.6500334498703083e-05, "loss": 0.6, "step": 18450 }, { "epoch": 0.5385962352254487, "grad_norm": 0.9228067326310603, "learning_rate": 2.6489487631744896e-05, "loss": 0.5538, "step": 18455 }, { "epoch": 0.5387421567196848, "grad_norm": 1.175014935684281, "learning_rate": 2.647864100010339e-05, "loss": 0.5761, "step": 18460 }, { "epoch": 0.5388880782139209, "grad_norm": 1.1950751675611238, "learning_rate": 2.6467794606304413e-05, "loss": 0.5528, "step": 18465 }, { "epoch": 0.539033999708157, "grad_norm": 0.9191728133190892, "learning_rate": 2.6456948452873726e-05, "loss": 0.6361, "step": 18470 }, { "epoch": 0.5391799212023931, "grad_norm": 1.130807323169007, "learning_rate": 2.6446102542337076e-05, "loss": 0.5488, "step": 18475 }, { "epoch": 0.5393258426966292, "grad_norm": 1.025083859050189, "learning_rate": 2.6435256877220128e-05, "loss": 0.5727, "step": 18480 }, { "epoch": 0.5394717641908653, "grad_norm": 1.0230226662784287, "learning_rate": 2.6424411460048514e-05, "loss": 0.5378, "step": 18485 }, { "epoch": 0.5396176856851014, "grad_norm": 0.8624736322988603, "learning_rate": 2.6413566293347775e-05, "loss": 0.5018, "step": 18490 }, { "epoch": 0.5397636071793375, "grad_norm": 1.1635472449443434, "learning_rate": 2.640272137964342e-05, "loss": 0.594, "step": 18495 }, { "epoch": 0.5399095286735737, "grad_norm": 0.9694807524777028, "learning_rate": 2.6391876721460873e-05, "loss": 0.5659, "step": 18500 }, { "epoch": 0.5400554501678098, "grad_norm": 1.0043352412930615, "learning_rate": 2.6381032321325534e-05, "loss": 0.5522, "step": 18505 }, { "epoch": 0.5402013716620458, "grad_norm": 0.898720669135111, "learning_rate": 2.6370188181762728e-05, "loss": 0.5852, "step": 18510 }, { "epoch": 0.5403472931562819, "grad_norm": 1.0805056715968975, "learning_rate": 2.635934430529769e-05, "loss": 0.5887, "step": 18515 }, { "epoch": 0.540493214650518, "grad_norm": 0.9409565694363404, "learning_rate": 2.6348500694455642e-05, "loss": 0.5883, "step": 18520 }, { "epoch": 0.5406391361447541, "grad_norm": 0.9626454078603776, "learning_rate": 2.6337657351761713e-05, "loss": 0.5996, "step": 18525 }, { "epoch": 0.5407850576389902, "grad_norm": 1.156311791376103, "learning_rate": 2.6326814279740974e-05, "loss": 0.5654, "step": 18530 }, { "epoch": 0.5409309791332263, "grad_norm": 1.0126293661836134, "learning_rate": 2.631597148091845e-05, "loss": 0.6135, "step": 18535 }, { "epoch": 0.5410769006274624, "grad_norm": 0.9243149763631263, "learning_rate": 2.6305128957819076e-05, "loss": 0.5486, "step": 18540 }, { "epoch": 0.5412228221216985, "grad_norm": 1.1708097882204893, "learning_rate": 2.6294286712967735e-05, "loss": 0.5783, "step": 18545 }, { "epoch": 0.5413687436159347, "grad_norm": 1.1144951980913023, "learning_rate": 2.6283444748889252e-05, "loss": 0.5933, "step": 18550 }, { "epoch": 0.5415146651101708, "grad_norm": 1.1215278280294547, "learning_rate": 2.6272603068108386e-05, "loss": 0.6662, "step": 18555 }, { "epoch": 0.5416605866044069, "grad_norm": 0.9577801754610242, "learning_rate": 2.626176167314982e-05, "loss": 0.578, "step": 18560 }, { "epoch": 0.541806508098643, "grad_norm": 1.0799355954306333, "learning_rate": 2.6250920566538178e-05, "loss": 0.6074, "step": 18565 }, { "epoch": 0.541952429592879, "grad_norm": 1.1405245382424891, "learning_rate": 2.624007975079801e-05, "loss": 0.6529, "step": 18570 }, { "epoch": 0.5420983510871151, "grad_norm": 1.0377902980799405, "learning_rate": 2.6229239228453794e-05, "loss": 0.5545, "step": 18575 }, { "epoch": 0.5422442725813512, "grad_norm": 0.9353960676734543, "learning_rate": 2.621839900202997e-05, "loss": 0.5676, "step": 18580 }, { "epoch": 0.5423901940755873, "grad_norm": 1.1176803279490075, "learning_rate": 2.6207559074050863e-05, "loss": 0.4961, "step": 18585 }, { "epoch": 0.5425361155698234, "grad_norm": 0.9722832623021269, "learning_rate": 2.619671944704077e-05, "loss": 0.5487, "step": 18590 }, { "epoch": 0.5426820370640595, "grad_norm": 0.9587465116780288, "learning_rate": 2.6185880123523894e-05, "loss": 0.5546, "step": 18595 }, { "epoch": 0.5428279585582957, "grad_norm": 1.1696431270844794, "learning_rate": 2.6175041106024366e-05, "loss": 0.5058, "step": 18600 }, { "epoch": 0.5429738800525318, "grad_norm": 1.0826394701815976, "learning_rate": 2.6164202397066286e-05, "loss": 0.5232, "step": 18605 }, { "epoch": 0.5431198015467679, "grad_norm": 0.8796562439996058, "learning_rate": 2.615336399917361e-05, "loss": 0.5655, "step": 18610 }, { "epoch": 0.543265723041004, "grad_norm": 0.9823904936670873, "learning_rate": 2.614252591487027e-05, "loss": 0.519, "step": 18615 }, { "epoch": 0.54341164453524, "grad_norm": 1.028710295385897, "learning_rate": 2.6131688146680133e-05, "loss": 0.5346, "step": 18620 }, { "epoch": 0.5435575660294761, "grad_norm": 1.0895924683222729, "learning_rate": 2.612085069712697e-05, "loss": 0.6019, "step": 18625 }, { "epoch": 0.5437034875237122, "grad_norm": 1.0438265636717063, "learning_rate": 2.6110013568734477e-05, "loss": 0.5462, "step": 18630 }, { "epoch": 0.5438494090179483, "grad_norm": 0.983221332578274, "learning_rate": 2.609917676402629e-05, "loss": 0.5941, "step": 18635 }, { "epoch": 0.5439953305121844, "grad_norm": 1.1222904325094563, "learning_rate": 2.6088340285525948e-05, "loss": 0.6253, "step": 18640 }, { "epoch": 0.5441412520064205, "grad_norm": 0.9621962174057327, "learning_rate": 2.6077504135756935e-05, "loss": 0.5769, "step": 18645 }, { "epoch": 0.5442871735006567, "grad_norm": 1.1136769866907792, "learning_rate": 2.6066668317242653e-05, "loss": 0.5494, "step": 18650 }, { "epoch": 0.5444330949948928, "grad_norm": 1.0281861679538178, "learning_rate": 2.6055832832506434e-05, "loss": 0.5728, "step": 18655 }, { "epoch": 0.5445790164891289, "grad_norm": 1.2908076190791984, "learning_rate": 2.6044997684071497e-05, "loss": 0.618, "step": 18660 }, { "epoch": 0.544724937983365, "grad_norm": 1.0261999511290776, "learning_rate": 2.6034162874461038e-05, "loss": 0.556, "step": 18665 }, { "epoch": 0.544870859477601, "grad_norm": 1.0894673677022477, "learning_rate": 2.602332840619812e-05, "loss": 0.5053, "step": 18670 }, { "epoch": 0.5450167809718371, "grad_norm": 1.0719473798968804, "learning_rate": 2.6012494281805777e-05, "loss": 0.6432, "step": 18675 }, { "epoch": 0.5451627024660732, "grad_norm": 1.0206624581390917, "learning_rate": 2.6001660503806906e-05, "loss": 0.6032, "step": 18680 }, { "epoch": 0.5453086239603093, "grad_norm": 1.5241969536797513, "learning_rate": 2.599082707472438e-05, "loss": 0.5755, "step": 18685 }, { "epoch": 0.5454545454545454, "grad_norm": 1.0474371407233378, "learning_rate": 2.5979993997080947e-05, "loss": 0.6024, "step": 18690 }, { "epoch": 0.5456004669487815, "grad_norm": 1.1193353735004417, "learning_rate": 2.5969161273399306e-05, "loss": 0.6066, "step": 18695 }, { "epoch": 0.5457463884430177, "grad_norm": 1.2281871702574194, "learning_rate": 2.5958328906202063e-05, "loss": 0.6051, "step": 18700 }, { "epoch": 0.5458923099372538, "grad_norm": 1.248497607609345, "learning_rate": 2.594749689801172e-05, "loss": 0.5628, "step": 18705 }, { "epoch": 0.5460382314314899, "grad_norm": 1.0833910196660719, "learning_rate": 2.5936665251350718e-05, "loss": 0.6102, "step": 18710 }, { "epoch": 0.546184152925726, "grad_norm": 1.094220701660935, "learning_rate": 2.592583396874141e-05, "loss": 0.5604, "step": 18715 }, { "epoch": 0.5463300744199621, "grad_norm": 0.969939531675538, "learning_rate": 2.591500305270606e-05, "loss": 0.5243, "step": 18720 }, { "epoch": 0.5464759959141982, "grad_norm": 1.1182319389173998, "learning_rate": 2.5904172505766864e-05, "loss": 0.5713, "step": 18725 }, { "epoch": 0.5466219174084342, "grad_norm": 1.5037264282608949, "learning_rate": 2.5893342330445892e-05, "loss": 0.5946, "step": 18730 }, { "epoch": 0.5467678389026703, "grad_norm": 1.1648680136260472, "learning_rate": 2.5882512529265167e-05, "loss": 0.5923, "step": 18735 }, { "epoch": 0.5469137603969064, "grad_norm": 1.1128611647869884, "learning_rate": 2.587168310474662e-05, "loss": 0.5591, "step": 18740 }, { "epoch": 0.5470596818911425, "grad_norm": 0.8953168106044155, "learning_rate": 2.586085405941207e-05, "loss": 0.5439, "step": 18745 }, { "epoch": 0.5472056033853787, "grad_norm": 1.142966131321429, "learning_rate": 2.5850025395783263e-05, "loss": 0.5786, "step": 18750 }, { "epoch": 0.5473515248796148, "grad_norm": 0.9578302790419457, "learning_rate": 2.583919711638186e-05, "loss": 0.564, "step": 18755 }, { "epoch": 0.5474974463738509, "grad_norm": 1.1409318308312035, "learning_rate": 2.582836922372942e-05, "loss": 0.6189, "step": 18760 }, { "epoch": 0.547643367868087, "grad_norm": 1.0133993165330566, "learning_rate": 2.5817541720347434e-05, "loss": 0.5323, "step": 18765 }, { "epoch": 0.5477892893623231, "grad_norm": 0.9694775426494109, "learning_rate": 2.5806714608757283e-05, "loss": 0.5373, "step": 18770 }, { "epoch": 0.5479352108565592, "grad_norm": 1.0412753794502596, "learning_rate": 2.579588789148026e-05, "loss": 0.5765, "step": 18775 }, { "epoch": 0.5480811323507953, "grad_norm": 1.0722631643779192, "learning_rate": 2.5785061571037566e-05, "loss": 0.5325, "step": 18780 }, { "epoch": 0.5482270538450313, "grad_norm": 0.9694552921580994, "learning_rate": 2.5774235649950312e-05, "loss": 0.6036, "step": 18785 }, { "epoch": 0.5483729753392674, "grad_norm": 0.9047047292913784, "learning_rate": 2.5763410130739517e-05, "loss": 0.5399, "step": 18790 }, { "epoch": 0.5485188968335035, "grad_norm": 1.0527515663682632, "learning_rate": 2.5752585015926124e-05, "loss": 0.5638, "step": 18795 }, { "epoch": 0.5486648183277397, "grad_norm": 0.8847067385970466, "learning_rate": 2.5741760308030933e-05, "loss": 0.5668, "step": 18800 }, { "epoch": 0.5488107398219758, "grad_norm": 1.0433680099661289, "learning_rate": 2.5730936009574684e-05, "loss": 0.6146, "step": 18805 }, { "epoch": 0.5489566613162119, "grad_norm": 1.0960315526480513, "learning_rate": 2.572011212307804e-05, "loss": 0.6032, "step": 18810 }, { "epoch": 0.549102582810448, "grad_norm": 1.008049271422144, "learning_rate": 2.5709288651061526e-05, "loss": 0.5668, "step": 18815 }, { "epoch": 0.5492485043046841, "grad_norm": 1.0303661244700986, "learning_rate": 2.56984655960456e-05, "loss": 0.6205, "step": 18820 }, { "epoch": 0.5493944257989202, "grad_norm": 1.0216156943528545, "learning_rate": 2.56876429605506e-05, "loss": 0.5744, "step": 18825 }, { "epoch": 0.5495403472931563, "grad_norm": 0.9533687720611044, "learning_rate": 2.5676820747096787e-05, "loss": 0.5475, "step": 18830 }, { "epoch": 0.5496862687873924, "grad_norm": 1.1001135006103824, "learning_rate": 2.5665998958204313e-05, "loss": 0.5929, "step": 18835 }, { "epoch": 0.5498321902816284, "grad_norm": 1.0152341366158735, "learning_rate": 2.5655177596393244e-05, "loss": 0.5469, "step": 18840 }, { "epoch": 0.5499781117758645, "grad_norm": 1.1219268949891719, "learning_rate": 2.5644356664183512e-05, "loss": 0.5614, "step": 18845 }, { "epoch": 0.5501240332701007, "grad_norm": 0.8820918915949129, "learning_rate": 2.5633536164095e-05, "loss": 0.6017, "step": 18850 }, { "epoch": 0.5502699547643368, "grad_norm": 1.0735395332406277, "learning_rate": 2.562271609864744e-05, "loss": 0.6183, "step": 18855 }, { "epoch": 0.5504158762585729, "grad_norm": 1.195082020869037, "learning_rate": 2.561189647036051e-05, "loss": 0.569, "step": 18860 }, { "epoch": 0.550561797752809, "grad_norm": 0.961023415528801, "learning_rate": 2.5601077281753755e-05, "loss": 0.5552, "step": 18865 }, { "epoch": 0.5507077192470451, "grad_norm": 1.2236131441096125, "learning_rate": 2.5590258535346605e-05, "loss": 0.618, "step": 18870 }, { "epoch": 0.5508536407412812, "grad_norm": 1.1281272706105105, "learning_rate": 2.557944023365843e-05, "loss": 0.6186, "step": 18875 }, { "epoch": 0.5509995622355173, "grad_norm": 0.9494685945411812, "learning_rate": 2.556862237920847e-05, "loss": 0.541, "step": 18880 }, { "epoch": 0.5511454837297534, "grad_norm": 1.1039442359528024, "learning_rate": 2.5557804974515853e-05, "loss": 0.6234, "step": 18885 }, { "epoch": 0.5512914052239895, "grad_norm": 1.2397737831522957, "learning_rate": 2.5546988022099633e-05, "loss": 0.5842, "step": 18890 }, { "epoch": 0.5514373267182255, "grad_norm": 0.9222605895253784, "learning_rate": 2.553617152447872e-05, "loss": 0.532, "step": 18895 }, { "epoch": 0.5515832482124617, "grad_norm": 1.1054125419781198, "learning_rate": 2.5525355484171942e-05, "loss": 0.5844, "step": 18900 }, { "epoch": 0.5517291697066978, "grad_norm": 1.0920177162832378, "learning_rate": 2.551453990369802e-05, "loss": 0.5977, "step": 18905 }, { "epoch": 0.5518750912009339, "grad_norm": 1.0443406539143116, "learning_rate": 2.5503724785575567e-05, "loss": 0.5844, "step": 18910 }, { "epoch": 0.55202101269517, "grad_norm": 1.1242148004092776, "learning_rate": 2.549291013232309e-05, "loss": 0.5725, "step": 18915 }, { "epoch": 0.5521669341894061, "grad_norm": 0.978401193546021, "learning_rate": 2.5482095946458966e-05, "loss": 0.5488, "step": 18920 }, { "epoch": 0.5523128556836422, "grad_norm": 1.0847012634123125, "learning_rate": 2.5471282230501492e-05, "loss": 0.5354, "step": 18925 }, { "epoch": 0.5524587771778783, "grad_norm": 1.0358762658460097, "learning_rate": 2.546046898696884e-05, "loss": 0.5698, "step": 18930 }, { "epoch": 0.5526046986721144, "grad_norm": 1.0891401532471086, "learning_rate": 2.5449656218379074e-05, "loss": 0.5633, "step": 18935 }, { "epoch": 0.5527506201663505, "grad_norm": 1.0510193442296694, "learning_rate": 2.5438843927250154e-05, "loss": 0.6099, "step": 18940 }, { "epoch": 0.5528965416605865, "grad_norm": 1.2183005943802543, "learning_rate": 2.5428032116099915e-05, "loss": 0.6016, "step": 18945 }, { "epoch": 0.5530424631548227, "grad_norm": 1.013470118830834, "learning_rate": 2.54172207874461e-05, "loss": 0.5752, "step": 18950 }, { "epoch": 0.5531883846490588, "grad_norm": 1.0779410603820585, "learning_rate": 2.5406409943806332e-05, "loss": 0.5764, "step": 18955 }, { "epoch": 0.5533343061432949, "grad_norm": 1.0547807165614647, "learning_rate": 2.5395599587698105e-05, "loss": 0.5817, "step": 18960 }, { "epoch": 0.553480227637531, "grad_norm": 1.008305177677653, "learning_rate": 2.5384789721638814e-05, "loss": 0.6211, "step": 18965 }, { "epoch": 0.5536261491317671, "grad_norm": 1.099845859383687, "learning_rate": 2.5373980348145744e-05, "loss": 0.5909, "step": 18970 }, { "epoch": 0.5537720706260032, "grad_norm": 1.006296163758954, "learning_rate": 2.536317146973605e-05, "loss": 0.5261, "step": 18975 }, { "epoch": 0.5539179921202393, "grad_norm": 1.0087618764886408, "learning_rate": 2.5352363088926794e-05, "loss": 0.5847, "step": 18980 }, { "epoch": 0.5540639136144754, "grad_norm": 1.0175269245985794, "learning_rate": 2.5341555208234913e-05, "loss": 0.6103, "step": 18985 }, { "epoch": 0.5542098351087115, "grad_norm": 1.0676517263307896, "learning_rate": 2.5330747830177198e-05, "loss": 0.6017, "step": 18990 }, { "epoch": 0.5543557566029477, "grad_norm": 0.927595789718737, "learning_rate": 2.5319940957270376e-05, "loss": 0.5658, "step": 18995 }, { "epoch": 0.5545016780971838, "grad_norm": 1.0517677562821353, "learning_rate": 2.530913459203102e-05, "loss": 0.5708, "step": 19000 }, { "epoch": 0.5546475995914198, "grad_norm": 0.9924221815992588, "learning_rate": 2.5298328736975586e-05, "loss": 0.5162, "step": 19005 }, { "epoch": 0.5547935210856559, "grad_norm": 1.133808558330379, "learning_rate": 2.5287523394620434e-05, "loss": 0.6535, "step": 19010 }, { "epoch": 0.554939442579892, "grad_norm": 1.2295521240001699, "learning_rate": 2.5276718567481772e-05, "loss": 0.5985, "step": 19015 }, { "epoch": 0.5550853640741281, "grad_norm": 1.0829990655439399, "learning_rate": 2.526591425807572e-05, "loss": 0.5858, "step": 19020 }, { "epoch": 0.5552312855683642, "grad_norm": 0.975581808513619, "learning_rate": 2.525511046891826e-05, "loss": 0.5663, "step": 19025 }, { "epoch": 0.5553772070626003, "grad_norm": 0.9977770894440878, "learning_rate": 2.5244307202525253e-05, "loss": 0.5307, "step": 19030 }, { "epoch": 0.5555231285568364, "grad_norm": 1.0209717023105354, "learning_rate": 2.5233504461412442e-05, "loss": 0.5669, "step": 19035 }, { "epoch": 0.5556690500510725, "grad_norm": 1.1791880003927673, "learning_rate": 2.5222702248095453e-05, "loss": 0.588, "step": 19040 }, { "epoch": 0.5558149715453087, "grad_norm": 1.1051184583095714, "learning_rate": 2.521190056508977e-05, "loss": 0.6078, "step": 19045 }, { "epoch": 0.5559608930395448, "grad_norm": 0.9539411945786938, "learning_rate": 2.5201099414910777e-05, "loss": 0.5132, "step": 19050 }, { "epoch": 0.5561068145337809, "grad_norm": 0.9693700415932675, "learning_rate": 2.519029880007373e-05, "loss": 0.5634, "step": 19055 }, { "epoch": 0.556252736028017, "grad_norm": 1.1086048631443517, "learning_rate": 2.5179498723093738e-05, "loss": 0.5585, "step": 19060 }, { "epoch": 0.556398657522253, "grad_norm": 1.0455769386020437, "learning_rate": 2.5168699186485812e-05, "loss": 0.5514, "step": 19065 }, { "epoch": 0.5565445790164891, "grad_norm": 0.9256208382486878, "learning_rate": 2.515790019276481e-05, "loss": 0.5389, "step": 19070 }, { "epoch": 0.5566905005107252, "grad_norm": 1.1502391874146716, "learning_rate": 2.5147101744445502e-05, "loss": 0.5288, "step": 19075 }, { "epoch": 0.5568364220049613, "grad_norm": 1.0385892613077392, "learning_rate": 2.51363038440425e-05, "loss": 0.5402, "step": 19080 }, { "epoch": 0.5569823434991974, "grad_norm": 1.0652073224815324, "learning_rate": 2.5125506494070288e-05, "loss": 0.5658, "step": 19085 }, { "epoch": 0.5571282649934335, "grad_norm": 1.115274048936048, "learning_rate": 2.5114709697043233e-05, "loss": 0.5928, "step": 19090 }, { "epoch": 0.5572741864876697, "grad_norm": 1.0188397659573443, "learning_rate": 2.510391345547558e-05, "loss": 0.5633, "step": 19095 }, { "epoch": 0.5574201079819058, "grad_norm": 1.0801249548445246, "learning_rate": 2.5093117771881435e-05, "loss": 0.61, "step": 19100 }, { "epoch": 0.5575660294761419, "grad_norm": 1.0460639149770012, "learning_rate": 2.508232264877477e-05, "loss": 0.5917, "step": 19105 }, { "epoch": 0.557711950970378, "grad_norm": 1.076834998465646, "learning_rate": 2.507152808866942e-05, "loss": 0.5454, "step": 19110 }, { "epoch": 0.557857872464614, "grad_norm": 0.9164329626296981, "learning_rate": 2.5060734094079114e-05, "loss": 0.6192, "step": 19115 }, { "epoch": 0.5580037939588501, "grad_norm": 1.205042786583854, "learning_rate": 2.504994066751743e-05, "loss": 0.6095, "step": 19120 }, { "epoch": 0.5581497154530862, "grad_norm": 1.010719854884888, "learning_rate": 2.5039147811497832e-05, "loss": 0.5475, "step": 19125 }, { "epoch": 0.5582956369473223, "grad_norm": 1.1540967229947297, "learning_rate": 2.502835552853362e-05, "loss": 0.5794, "step": 19130 }, { "epoch": 0.5584415584415584, "grad_norm": 1.0461724261051417, "learning_rate": 2.5017563821137986e-05, "loss": 0.507, "step": 19135 }, { "epoch": 0.5585874799357945, "grad_norm": 1.0544502833352922, "learning_rate": 2.5006772691823978e-05, "loss": 0.5392, "step": 19140 }, { "epoch": 0.5587334014300307, "grad_norm": 0.9651564140635615, "learning_rate": 2.4995982143104512e-05, "loss": 0.5443, "step": 19145 }, { "epoch": 0.5588793229242668, "grad_norm": 1.092416606591082, "learning_rate": 2.4985192177492384e-05, "loss": 0.6354, "step": 19150 }, { "epoch": 0.5590252444185029, "grad_norm": 1.1443818809993265, "learning_rate": 2.4974402797500214e-05, "loss": 0.6377, "step": 19155 }, { "epoch": 0.559171165912739, "grad_norm": 1.1295841116010163, "learning_rate": 2.4963614005640522e-05, "loss": 0.5545, "step": 19160 }, { "epoch": 0.559317087406975, "grad_norm": 1.0868486360816592, "learning_rate": 2.4952825804425677e-05, "loss": 0.6242, "step": 19165 }, { "epoch": 0.5594630089012111, "grad_norm": 1.0443842412131272, "learning_rate": 2.494203819636793e-05, "loss": 0.5395, "step": 19170 }, { "epoch": 0.5596089303954472, "grad_norm": 1.2821998649607063, "learning_rate": 2.4931251183979363e-05, "loss": 0.593, "step": 19175 }, { "epoch": 0.5597548518896833, "grad_norm": 0.9135653226581955, "learning_rate": 2.4920464769771922e-05, "loss": 0.5753, "step": 19180 }, { "epoch": 0.5599007733839194, "grad_norm": 1.0307893919342195, "learning_rate": 2.4909678956257442e-05, "loss": 0.6159, "step": 19185 }, { "epoch": 0.5600466948781555, "grad_norm": 0.8833656854411767, "learning_rate": 2.48988937459476e-05, "loss": 0.5398, "step": 19190 }, { "epoch": 0.5601926163723917, "grad_norm": 1.0503850689797165, "learning_rate": 2.4888109141353933e-05, "loss": 0.6072, "step": 19195 }, { "epoch": 0.5603385378666278, "grad_norm": 1.1281983794219044, "learning_rate": 2.4877325144987824e-05, "loss": 0.5664, "step": 19200 }, { "epoch": 0.5604844593608639, "grad_norm": 1.0848995924893812, "learning_rate": 2.486654175936054e-05, "loss": 0.5723, "step": 19205 }, { "epoch": 0.5606303808551, "grad_norm": 1.0095072610759486, "learning_rate": 2.4855758986983192e-05, "loss": 0.5991, "step": 19210 }, { "epoch": 0.5607763023493361, "grad_norm": 1.0925017368118464, "learning_rate": 2.4844976830366752e-05, "loss": 0.554, "step": 19215 }, { "epoch": 0.5609222238435722, "grad_norm": 1.0161537070177942, "learning_rate": 2.483419529202205e-05, "loss": 0.4725, "step": 19220 }, { "epoch": 0.5610681453378082, "grad_norm": 1.1843883117584293, "learning_rate": 2.482341437445976e-05, "loss": 0.5695, "step": 19225 }, { "epoch": 0.5612140668320443, "grad_norm": 0.9304976985737102, "learning_rate": 2.481263408019041e-05, "loss": 0.5934, "step": 19230 }, { "epoch": 0.5613599883262804, "grad_norm": 1.0408515652964907, "learning_rate": 2.480185441172441e-05, "loss": 0.601, "step": 19235 }, { "epoch": 0.5615059098205165, "grad_norm": 1.1711645382896347, "learning_rate": 2.4791075371572014e-05, "loss": 0.6006, "step": 19240 }, { "epoch": 0.5616518313147527, "grad_norm": 1.0104416067115156, "learning_rate": 2.478029696224331e-05, "loss": 0.5449, "step": 19245 }, { "epoch": 0.5617977528089888, "grad_norm": 0.9905701187655629, "learning_rate": 2.4769519186248253e-05, "loss": 0.5688, "step": 19250 }, { "epoch": 0.5619436743032249, "grad_norm": 0.9506585186110902, "learning_rate": 2.4758742046096643e-05, "loss": 0.5745, "step": 19255 }, { "epoch": 0.562089595797461, "grad_norm": 1.0627845624652243, "learning_rate": 2.4747965544298153e-05, "loss": 0.5762, "step": 19260 }, { "epoch": 0.5622355172916971, "grad_norm": 1.056693222541942, "learning_rate": 2.4737189683362276e-05, "loss": 0.581, "step": 19265 }, { "epoch": 0.5623814387859332, "grad_norm": 1.2266561420640012, "learning_rate": 2.47264144657984e-05, "loss": 0.5532, "step": 19270 }, { "epoch": 0.5625273602801693, "grad_norm": 1.1216603509846654, "learning_rate": 2.4715639894115698e-05, "loss": 0.6624, "step": 19275 }, { "epoch": 0.5626732817744053, "grad_norm": 1.1223071449803248, "learning_rate": 2.470486597082326e-05, "loss": 0.5501, "step": 19280 }, { "epoch": 0.5628192032686414, "grad_norm": 1.0848189018979422, "learning_rate": 2.469409269842998e-05, "loss": 0.5863, "step": 19285 }, { "epoch": 0.5629651247628775, "grad_norm": 1.0779140525538955, "learning_rate": 2.4683320079444632e-05, "loss": 0.535, "step": 19290 }, { "epoch": 0.5631110462571137, "grad_norm": 1.1538109088750106, "learning_rate": 2.4672548116375797e-05, "loss": 0.6798, "step": 19295 }, { "epoch": 0.5632569677513498, "grad_norm": 1.1577009613692197, "learning_rate": 2.466177681173194e-05, "loss": 0.5687, "step": 19300 }, { "epoch": 0.5634028892455859, "grad_norm": 1.0637343804154196, "learning_rate": 2.4651006168021367e-05, "loss": 0.541, "step": 19305 }, { "epoch": 0.563548810739822, "grad_norm": 0.9878108564946632, "learning_rate": 2.464023618775222e-05, "loss": 0.5327, "step": 19310 }, { "epoch": 0.5636947322340581, "grad_norm": 0.9287462175330049, "learning_rate": 2.4629466873432483e-05, "loss": 0.5314, "step": 19315 }, { "epoch": 0.5638406537282942, "grad_norm": 0.8803254546998466, "learning_rate": 2.4618698227570004e-05, "loss": 0.5309, "step": 19320 }, { "epoch": 0.5639865752225303, "grad_norm": 1.2028701200319487, "learning_rate": 2.4607930252672446e-05, "loss": 0.6191, "step": 19325 }, { "epoch": 0.5641324967167664, "grad_norm": 0.8740160504273201, "learning_rate": 2.459716295124735e-05, "loss": 0.5593, "step": 19330 }, { "epoch": 0.5642784182110024, "grad_norm": 1.0320604321621292, "learning_rate": 2.458639632580207e-05, "loss": 0.6246, "step": 19335 }, { "epoch": 0.5644243397052385, "grad_norm": 1.0819877387168169, "learning_rate": 2.4575630378843835e-05, "loss": 0.5668, "step": 19340 }, { "epoch": 0.5645702611994747, "grad_norm": 1.0981914701607622, "learning_rate": 2.456486511287967e-05, "loss": 0.5457, "step": 19345 }, { "epoch": 0.5647161826937108, "grad_norm": 1.2711526163268574, "learning_rate": 2.455410053041649e-05, "loss": 0.6249, "step": 19350 }, { "epoch": 0.5648621041879469, "grad_norm": 1.1266035804487176, "learning_rate": 2.454333663396102e-05, "loss": 0.5747, "step": 19355 }, { "epoch": 0.565008025682183, "grad_norm": 1.0122364388672083, "learning_rate": 2.4532573426019834e-05, "loss": 0.5753, "step": 19360 }, { "epoch": 0.5651539471764191, "grad_norm": 1.0185787207627244, "learning_rate": 2.4521810909099358e-05, "loss": 0.5999, "step": 19365 }, { "epoch": 0.5652998686706552, "grad_norm": 1.1676105532008045, "learning_rate": 2.4511049085705823e-05, "loss": 0.541, "step": 19370 }, { "epoch": 0.5654457901648913, "grad_norm": 0.9087974882154571, "learning_rate": 2.4500287958345336e-05, "loss": 0.5494, "step": 19375 }, { "epoch": 0.5655917116591274, "grad_norm": 1.0401637410944768, "learning_rate": 2.448952752952382e-05, "loss": 0.6122, "step": 19380 }, { "epoch": 0.5657376331533635, "grad_norm": 1.0380105477386217, "learning_rate": 2.4478767801747056e-05, "loss": 0.5644, "step": 19385 }, { "epoch": 0.5658835546475995, "grad_norm": 1.0055441254996957, "learning_rate": 2.4468008777520628e-05, "loss": 0.5419, "step": 19390 }, { "epoch": 0.5660294761418357, "grad_norm": 1.0857132155666829, "learning_rate": 2.4457250459349983e-05, "loss": 0.6004, "step": 19395 }, { "epoch": 0.5661753976360718, "grad_norm": 1.087468370845473, "learning_rate": 2.44464928497404e-05, "loss": 0.5543, "step": 19400 }, { "epoch": 0.5663213191303079, "grad_norm": 0.962954653391166, "learning_rate": 2.443573595119698e-05, "loss": 0.5419, "step": 19405 }, { "epoch": 0.566467240624544, "grad_norm": 1.0725977718251574, "learning_rate": 2.4424979766224686e-05, "loss": 0.5627, "step": 19410 }, { "epoch": 0.5666131621187801, "grad_norm": 1.0711329682147823, "learning_rate": 2.4414224297328276e-05, "loss": 0.6043, "step": 19415 }, { "epoch": 0.5667590836130162, "grad_norm": 0.9184229927337886, "learning_rate": 2.4403469547012376e-05, "loss": 0.5424, "step": 19420 }, { "epoch": 0.5669050051072523, "grad_norm": 0.9900840238780486, "learning_rate": 2.4392715517781424e-05, "loss": 0.5713, "step": 19425 }, { "epoch": 0.5670509266014884, "grad_norm": 1.0326232022299162, "learning_rate": 2.438196221213969e-05, "loss": 0.5671, "step": 19430 }, { "epoch": 0.5671968480957245, "grad_norm": 1.1297156208535026, "learning_rate": 2.4371209632591306e-05, "loss": 0.5583, "step": 19435 }, { "epoch": 0.5673427695899605, "grad_norm": 1.2666884043574758, "learning_rate": 2.4360457781640185e-05, "loss": 0.6108, "step": 19440 }, { "epoch": 0.5674886910841968, "grad_norm": 1.0482144735176806, "learning_rate": 2.4349706661790106e-05, "loss": 0.5555, "step": 19445 }, { "epoch": 0.5676346125784328, "grad_norm": 1.0076283466884899, "learning_rate": 2.4338956275544668e-05, "loss": 0.5839, "step": 19450 }, { "epoch": 0.5677805340726689, "grad_norm": 1.1059734727385848, "learning_rate": 2.43282066254073e-05, "loss": 0.5409, "step": 19455 }, { "epoch": 0.567926455566905, "grad_norm": 1.03239931612895, "learning_rate": 2.4317457713881263e-05, "loss": 0.56, "step": 19460 }, { "epoch": 0.5680723770611411, "grad_norm": 0.9518194947040801, "learning_rate": 2.4306709543469634e-05, "loss": 0.5484, "step": 19465 }, { "epoch": 0.5682182985553772, "grad_norm": 1.0487805789207552, "learning_rate": 2.4295962116675324e-05, "loss": 0.6289, "step": 19470 }, { "epoch": 0.5683642200496133, "grad_norm": 0.9889208969704194, "learning_rate": 2.4285215436001075e-05, "loss": 0.6138, "step": 19475 }, { "epoch": 0.5685101415438494, "grad_norm": 1.103561269508087, "learning_rate": 2.427446950394947e-05, "loss": 0.5709, "step": 19480 }, { "epoch": 0.5686560630380855, "grad_norm": 1.0537865038572112, "learning_rate": 2.4263724323022867e-05, "loss": 0.5095, "step": 19485 }, { "epoch": 0.5688019845323216, "grad_norm": 1.0454335734105995, "learning_rate": 2.4252979895723504e-05, "loss": 0.5655, "step": 19490 }, { "epoch": 0.5689479060265578, "grad_norm": 1.2187331331251503, "learning_rate": 2.4242236224553416e-05, "loss": 0.607, "step": 19495 }, { "epoch": 0.5690938275207938, "grad_norm": 0.967919693370299, "learning_rate": 2.423149331201447e-05, "loss": 0.5801, "step": 19500 }, { "epoch": 0.5692397490150299, "grad_norm": 0.9603491495727466, "learning_rate": 2.4220751160608353e-05, "loss": 0.562, "step": 19505 }, { "epoch": 0.569385670509266, "grad_norm": 1.1393856886520102, "learning_rate": 2.4210009772836566e-05, "loss": 0.5568, "step": 19510 }, { "epoch": 0.5695315920035021, "grad_norm": 0.9905720047753638, "learning_rate": 2.4199269151200456e-05, "loss": 0.5626, "step": 19515 }, { "epoch": 0.5696775134977382, "grad_norm": 1.0127381621549063, "learning_rate": 2.4188529298201168e-05, "loss": 0.6223, "step": 19520 }, { "epoch": 0.5698234349919743, "grad_norm": 1.043503433621324, "learning_rate": 2.417779021633968e-05, "loss": 0.5544, "step": 19525 }, { "epoch": 0.5699693564862104, "grad_norm": 1.022900977933336, "learning_rate": 2.4167051908116793e-05, "loss": 0.5418, "step": 19530 }, { "epoch": 0.5701152779804465, "grad_norm": 1.0064623542984361, "learning_rate": 2.4156314376033117e-05, "loss": 0.5038, "step": 19535 }, { "epoch": 0.5702611994746826, "grad_norm": 1.1630485095625613, "learning_rate": 2.414557762258909e-05, "loss": 0.4865, "step": 19540 }, { "epoch": 0.5704071209689188, "grad_norm": 1.0112976148485264, "learning_rate": 2.4134841650284958e-05, "loss": 0.5525, "step": 19545 }, { "epoch": 0.5705530424631549, "grad_norm": 0.9943071438940063, "learning_rate": 2.4124106461620804e-05, "loss": 0.5762, "step": 19550 }, { "epoch": 0.570698963957391, "grad_norm": 1.0972661288424221, "learning_rate": 2.4113372059096505e-05, "loss": 0.6109, "step": 19555 }, { "epoch": 0.570844885451627, "grad_norm": 1.021607176701133, "learning_rate": 2.410263844521177e-05, "loss": 0.6179, "step": 19560 }, { "epoch": 0.5709908069458631, "grad_norm": 0.9976981375922676, "learning_rate": 2.4091905622466138e-05, "loss": 0.5794, "step": 19565 }, { "epoch": 0.5711367284400992, "grad_norm": 1.017355359592056, "learning_rate": 2.4081173593358924e-05, "loss": 0.6123, "step": 19570 }, { "epoch": 0.5712826499343353, "grad_norm": 1.0947729252844716, "learning_rate": 2.40704423603893e-05, "loss": 0.598, "step": 19575 }, { "epoch": 0.5714285714285714, "grad_norm": 1.1122979500011798, "learning_rate": 2.405971192605622e-05, "loss": 0.6051, "step": 19580 }, { "epoch": 0.5715744929228075, "grad_norm": 1.0626110276751881, "learning_rate": 2.4048982292858467e-05, "loss": 0.549, "step": 19585 }, { "epoch": 0.5717204144170436, "grad_norm": 0.9888621336869378, "learning_rate": 2.4038253463294647e-05, "loss": 0.5634, "step": 19590 }, { "epoch": 0.5718663359112798, "grad_norm": 0.9603879643285596, "learning_rate": 2.4027525439863157e-05, "loss": 0.5787, "step": 19595 }, { "epoch": 0.5720122574055159, "grad_norm": 1.0439401637810652, "learning_rate": 2.4016798225062236e-05, "loss": 0.5695, "step": 19600 }, { "epoch": 0.572158178899752, "grad_norm": 1.0563232939961402, "learning_rate": 2.4006071821389897e-05, "loss": 0.6025, "step": 19605 }, { "epoch": 0.572304100393988, "grad_norm": 1.1897741745411052, "learning_rate": 2.3995346231343986e-05, "loss": 0.5898, "step": 19610 }, { "epoch": 0.5724500218882241, "grad_norm": 0.9907830940476184, "learning_rate": 2.3984621457422164e-05, "loss": 0.5818, "step": 19615 }, { "epoch": 0.5725959433824602, "grad_norm": 0.8979939849343949, "learning_rate": 2.3973897502121883e-05, "loss": 0.5415, "step": 19620 }, { "epoch": 0.5727418648766963, "grad_norm": 0.9506663375049602, "learning_rate": 2.3963174367940443e-05, "loss": 0.5594, "step": 19625 }, { "epoch": 0.5728877863709324, "grad_norm": 1.053628239029449, "learning_rate": 2.395245205737489e-05, "loss": 0.5412, "step": 19630 }, { "epoch": 0.5730337078651685, "grad_norm": 1.0324232359859886, "learning_rate": 2.394173057292214e-05, "loss": 0.6263, "step": 19635 }, { "epoch": 0.5731796293594046, "grad_norm": 1.060672260120206, "learning_rate": 2.3931009917078885e-05, "loss": 0.5843, "step": 19640 }, { "epoch": 0.5733255508536408, "grad_norm": 1.051380927302944, "learning_rate": 2.3920290092341636e-05, "loss": 0.5944, "step": 19645 }, { "epoch": 0.5734714723478769, "grad_norm": 1.0332582720349894, "learning_rate": 2.3909571101206684e-05, "loss": 0.5514, "step": 19650 }, { "epoch": 0.573617393842113, "grad_norm": 1.1165065892940602, "learning_rate": 2.389885294617016e-05, "loss": 0.5557, "step": 19655 }, { "epoch": 0.5737633153363491, "grad_norm": 1.1243443192739717, "learning_rate": 2.388813562972798e-05, "loss": 0.5874, "step": 19660 }, { "epoch": 0.5739092368305851, "grad_norm": 0.9690077446995694, "learning_rate": 2.3877419154375886e-05, "loss": 0.5524, "step": 19665 }, { "epoch": 0.5740551583248212, "grad_norm": 1.101170878516414, "learning_rate": 2.3866703522609406e-05, "loss": 0.5503, "step": 19670 }, { "epoch": 0.5742010798190573, "grad_norm": 1.0808283099924705, "learning_rate": 2.3855988736923855e-05, "loss": 0.5282, "step": 19675 }, { "epoch": 0.5743470013132934, "grad_norm": 1.2252590800713856, "learning_rate": 2.3845274799814393e-05, "loss": 0.6092, "step": 19680 }, { "epoch": 0.5744929228075295, "grad_norm": 1.033283015047248, "learning_rate": 2.383456171377595e-05, "loss": 0.5685, "step": 19685 }, { "epoch": 0.5746388443017657, "grad_norm": 1.0292211122904824, "learning_rate": 2.3823849481303268e-05, "loss": 0.5483, "step": 19690 }, { "epoch": 0.5747847657960018, "grad_norm": 0.9795220588868664, "learning_rate": 2.381313810489091e-05, "loss": 0.5186, "step": 19695 }, { "epoch": 0.5749306872902379, "grad_norm": 1.009543404724265, "learning_rate": 2.3802427587033184e-05, "loss": 0.5586, "step": 19700 }, { "epoch": 0.575076608784474, "grad_norm": 0.9526254455321203, "learning_rate": 2.3791717930224255e-05, "loss": 0.5565, "step": 19705 }, { "epoch": 0.5752225302787101, "grad_norm": 1.05814446754326, "learning_rate": 2.378100913695807e-05, "loss": 0.5786, "step": 19710 }, { "epoch": 0.5753684517729462, "grad_norm": 1.1072549291998413, "learning_rate": 2.3770301209728364e-05, "loss": 0.6009, "step": 19715 }, { "epoch": 0.5755143732671822, "grad_norm": 1.1069411821699975, "learning_rate": 2.3759594151028687e-05, "loss": 0.5993, "step": 19720 }, { "epoch": 0.5756602947614183, "grad_norm": 0.9687950745002407, "learning_rate": 2.3748887963352363e-05, "loss": 0.6346, "step": 19725 }, { "epoch": 0.5758062162556544, "grad_norm": 0.9565434370262882, "learning_rate": 2.3738182649192536e-05, "loss": 0.5384, "step": 19730 }, { "epoch": 0.5759521377498905, "grad_norm": 1.2963520126701809, "learning_rate": 2.3727478211042137e-05, "loss": 0.631, "step": 19735 }, { "epoch": 0.5760980592441267, "grad_norm": 1.0277698762556078, "learning_rate": 2.3716774651393902e-05, "loss": 0.518, "step": 19740 }, { "epoch": 0.5762439807383628, "grad_norm": 1.037264089457356, "learning_rate": 2.3706071972740345e-05, "loss": 0.5777, "step": 19745 }, { "epoch": 0.5763899022325989, "grad_norm": 1.0686817206165269, "learning_rate": 2.3695370177573784e-05, "loss": 0.5847, "step": 19750 }, { "epoch": 0.576535823726835, "grad_norm": 1.0219112677969322, "learning_rate": 2.368466926838634e-05, "loss": 0.5744, "step": 19755 }, { "epoch": 0.5766817452210711, "grad_norm": 0.9198189234949002, "learning_rate": 2.367396924766991e-05, "loss": 0.5977, "step": 19760 }, { "epoch": 0.5768276667153072, "grad_norm": 1.1688812470931758, "learning_rate": 2.3663270117916208e-05, "loss": 0.551, "step": 19765 }, { "epoch": 0.5769735882095433, "grad_norm": 1.223149741639775, "learning_rate": 2.3652571881616708e-05, "loss": 0.6064, "step": 19770 }, { "epoch": 0.5771195097037793, "grad_norm": 1.093432559624975, "learning_rate": 2.3641874541262703e-05, "loss": 0.5427, "step": 19775 }, { "epoch": 0.5772654311980154, "grad_norm": 0.9943137345686238, "learning_rate": 2.3631178099345274e-05, "loss": 0.5146, "step": 19780 }, { "epoch": 0.5774113526922515, "grad_norm": 0.9860914534655764, "learning_rate": 2.362048255835528e-05, "loss": 0.5971, "step": 19785 }, { "epoch": 0.5775572741864877, "grad_norm": 1.137046556542198, "learning_rate": 2.3609787920783384e-05, "loss": 0.5623, "step": 19790 }, { "epoch": 0.5777031956807238, "grad_norm": 1.1018774285870212, "learning_rate": 2.359909418912002e-05, "loss": 0.6113, "step": 19795 }, { "epoch": 0.5778491171749599, "grad_norm": 1.3686580234016736, "learning_rate": 2.358840136585543e-05, "loss": 0.6183, "step": 19800 }, { "epoch": 0.577995038669196, "grad_norm": 0.9878869735558028, "learning_rate": 2.3577709453479635e-05, "loss": 0.5758, "step": 19805 }, { "epoch": 0.5781409601634321, "grad_norm": 1.0057911338574395, "learning_rate": 2.356701845448246e-05, "loss": 0.6007, "step": 19810 }, { "epoch": 0.5782868816576682, "grad_norm": 1.034418996895888, "learning_rate": 2.3556328371353488e-05, "loss": 0.5579, "step": 19815 }, { "epoch": 0.5784328031519043, "grad_norm": 1.0825349240768498, "learning_rate": 2.3545639206582114e-05, "loss": 0.5986, "step": 19820 }, { "epoch": 0.5785787246461404, "grad_norm": 1.0761705246061184, "learning_rate": 2.35349509626575e-05, "loss": 0.5843, "step": 19825 }, { "epoch": 0.5787246461403764, "grad_norm": 1.0338489498115588, "learning_rate": 2.3524263642068606e-05, "loss": 0.6431, "step": 19830 }, { "epoch": 0.5788705676346125, "grad_norm": 1.1026292915996687, "learning_rate": 2.3513577247304193e-05, "loss": 0.5962, "step": 19835 }, { "epoch": 0.5790164891288487, "grad_norm": 1.0591814340101953, "learning_rate": 2.350289178085276e-05, "loss": 0.5521, "step": 19840 }, { "epoch": 0.5791624106230848, "grad_norm": 0.9524738320325364, "learning_rate": 2.3492207245202626e-05, "loss": 0.6025, "step": 19845 }, { "epoch": 0.5793083321173209, "grad_norm": 0.9982975450306473, "learning_rate": 2.3481523642841884e-05, "loss": 0.5502, "step": 19850 }, { "epoch": 0.579454253611557, "grad_norm": 1.2876912246017667, "learning_rate": 2.3470840976258423e-05, "loss": 0.6401, "step": 19855 }, { "epoch": 0.5796001751057931, "grad_norm": 1.0360027701575842, "learning_rate": 2.34601592479399e-05, "loss": 0.5767, "step": 19860 }, { "epoch": 0.5797460966000292, "grad_norm": 1.1903877611486364, "learning_rate": 2.3449478460373736e-05, "loss": 0.5755, "step": 19865 }, { "epoch": 0.5798920180942653, "grad_norm": 1.05899201418877, "learning_rate": 2.3438798616047165e-05, "loss": 0.5655, "step": 19870 }, { "epoch": 0.5800379395885014, "grad_norm": 0.9707993762008369, "learning_rate": 2.3428119717447185e-05, "loss": 0.5791, "step": 19875 }, { "epoch": 0.5801838610827375, "grad_norm": 0.9943940114821047, "learning_rate": 2.3417441767060578e-05, "loss": 0.5499, "step": 19880 }, { "epoch": 0.5803297825769735, "grad_norm": 1.0998746453248927, "learning_rate": 2.3406764767373912e-05, "loss": 0.5326, "step": 19885 }, { "epoch": 0.5804757040712097, "grad_norm": 1.0192555530747083, "learning_rate": 2.3396088720873517e-05, "loss": 0.5286, "step": 19890 }, { "epoch": 0.5806216255654458, "grad_norm": 1.0135247138597054, "learning_rate": 2.3385413630045512e-05, "loss": 0.609, "step": 19895 }, { "epoch": 0.5807675470596819, "grad_norm": 0.9375723531144762, "learning_rate": 2.337473949737578e-05, "loss": 0.5623, "step": 19900 }, { "epoch": 0.580913468553918, "grad_norm": 1.013587725649466, "learning_rate": 2.336406632535002e-05, "loss": 0.5897, "step": 19905 }, { "epoch": 0.5810593900481541, "grad_norm": 1.0259578466540173, "learning_rate": 2.3353394116453646e-05, "loss": 0.5979, "step": 19910 }, { "epoch": 0.5812053115423902, "grad_norm": 0.9098455463702352, "learning_rate": 2.3342722873171902e-05, "loss": 0.5405, "step": 19915 }, { "epoch": 0.5813512330366263, "grad_norm": 0.9438563565428367, "learning_rate": 2.333205259798978e-05, "loss": 0.6148, "step": 19920 }, { "epoch": 0.5814971545308624, "grad_norm": 0.9894844982728428, "learning_rate": 2.3321383293392064e-05, "loss": 0.5473, "step": 19925 }, { "epoch": 0.5816430760250985, "grad_norm": 0.8859886805773258, "learning_rate": 2.3310714961863285e-05, "loss": 0.5764, "step": 19930 }, { "epoch": 0.5817889975193345, "grad_norm": 1.4192123280097704, "learning_rate": 2.330004760588777e-05, "loss": 0.6193, "step": 19935 }, { "epoch": 0.5819349190135708, "grad_norm": 1.113111075087676, "learning_rate": 2.3289381227949613e-05, "loss": 0.53, "step": 19940 }, { "epoch": 0.5820808405078068, "grad_norm": 1.0784595956427026, "learning_rate": 2.327871583053267e-05, "loss": 0.5413, "step": 19945 }, { "epoch": 0.5822267620020429, "grad_norm": 0.9414684261308156, "learning_rate": 2.326805141612059e-05, "loss": 0.568, "step": 19950 }, { "epoch": 0.582372683496279, "grad_norm": 1.030533834064575, "learning_rate": 2.3257387987196788e-05, "loss": 0.5074, "step": 19955 }, { "epoch": 0.5825186049905151, "grad_norm": 1.2681820979247564, "learning_rate": 2.324672554624442e-05, "loss": 0.615, "step": 19960 }, { "epoch": 0.5826645264847512, "grad_norm": 1.3022550494463503, "learning_rate": 2.323606409574645e-05, "loss": 0.6014, "step": 19965 }, { "epoch": 0.5828104479789873, "grad_norm": 1.1224614254956247, "learning_rate": 2.3225403638185588e-05, "loss": 0.617, "step": 19970 }, { "epoch": 0.5829563694732234, "grad_norm": 1.0395465766918444, "learning_rate": 2.321474417604433e-05, "loss": 0.5199, "step": 19975 }, { "epoch": 0.5831022909674595, "grad_norm": 0.9210448832036293, "learning_rate": 2.320408571180493e-05, "loss": 0.5587, "step": 19980 }, { "epoch": 0.5832482124616956, "grad_norm": 1.147796740447498, "learning_rate": 2.3193428247949394e-05, "loss": 0.6102, "step": 19985 }, { "epoch": 0.5833941339559318, "grad_norm": 0.9760878615299288, "learning_rate": 2.318277178695953e-05, "loss": 0.5722, "step": 19990 }, { "epoch": 0.5835400554501678, "grad_norm": 1.0158807382115598, "learning_rate": 2.3172116331316888e-05, "loss": 0.5549, "step": 19995 }, { "epoch": 0.5836859769444039, "grad_norm": 1.3370643707030505, "learning_rate": 2.316146188350279e-05, "loss": 0.5545, "step": 20000 }, { "epoch": 0.58383189843864, "grad_norm": 1.0601650883439209, "learning_rate": 2.3150808445998324e-05, "loss": 0.6062, "step": 20005 }, { "epoch": 0.5839778199328761, "grad_norm": 1.0832085047352267, "learning_rate": 2.314015602128433e-05, "loss": 0.55, "step": 20010 }, { "epoch": 0.5841237414271122, "grad_norm": 1.0067164385115555, "learning_rate": 2.312950461184144e-05, "loss": 0.6001, "step": 20015 }, { "epoch": 0.5842696629213483, "grad_norm": 0.9762789716483766, "learning_rate": 2.3118854220150027e-05, "loss": 0.5337, "step": 20020 }, { "epoch": 0.5844155844155844, "grad_norm": 1.0868463009744185, "learning_rate": 2.3108204848690246e-05, "loss": 0.5766, "step": 20025 }, { "epoch": 0.5845615059098205, "grad_norm": 1.0240216753493245, "learning_rate": 2.3097556499941973e-05, "loss": 0.6115, "step": 20030 }, { "epoch": 0.5847074274040566, "grad_norm": 1.0366519873291238, "learning_rate": 2.3086909176384906e-05, "loss": 0.5603, "step": 20035 }, { "epoch": 0.5848533488982928, "grad_norm": 1.1458312058321865, "learning_rate": 2.3076262880498457e-05, "loss": 0.5592, "step": 20040 }, { "epoch": 0.5849992703925289, "grad_norm": 1.0041386035133433, "learning_rate": 2.3065617614761813e-05, "loss": 0.5861, "step": 20045 }, { "epoch": 0.585145191886765, "grad_norm": 1.0279531474405932, "learning_rate": 2.305497338165394e-05, "loss": 0.5467, "step": 20050 }, { "epoch": 0.585291113381001, "grad_norm": 1.0219938409677654, "learning_rate": 2.304433018365352e-05, "loss": 0.5204, "step": 20055 }, { "epoch": 0.5854370348752371, "grad_norm": 1.0255116832824502, "learning_rate": 2.303368802323903e-05, "loss": 0.56, "step": 20060 }, { "epoch": 0.5855829563694732, "grad_norm": 1.1657682065927872, "learning_rate": 2.3023046902888708e-05, "loss": 0.5401, "step": 20065 }, { "epoch": 0.5857288778637093, "grad_norm": 0.9714736081473958, "learning_rate": 2.3012406825080533e-05, "loss": 0.572, "step": 20070 }, { "epoch": 0.5858747993579454, "grad_norm": 1.0460751900486902, "learning_rate": 2.300176779229225e-05, "loss": 0.5198, "step": 20075 }, { "epoch": 0.5860207208521815, "grad_norm": 1.0196186155194558, "learning_rate": 2.2991129807001337e-05, "loss": 0.5398, "step": 20080 }, { "epoch": 0.5861666423464176, "grad_norm": 0.9835985548288224, "learning_rate": 2.298049287168506e-05, "loss": 0.5454, "step": 20085 }, { "epoch": 0.5863125638406538, "grad_norm": 1.1305678948194302, "learning_rate": 2.2969856988820426e-05, "loss": 0.5504, "step": 20090 }, { "epoch": 0.5864584853348899, "grad_norm": 1.031482607950574, "learning_rate": 2.2959222160884214e-05, "loss": 0.5782, "step": 20095 }, { "epoch": 0.586604406829126, "grad_norm": 1.0639462238326853, "learning_rate": 2.2948588390352914e-05, "loss": 0.5499, "step": 20100 }, { "epoch": 0.586750328323362, "grad_norm": 0.9649379735991427, "learning_rate": 2.293795567970282e-05, "loss": 0.5314, "step": 20105 }, { "epoch": 0.5868962498175981, "grad_norm": 1.0786050965693676, "learning_rate": 2.2927324031409952e-05, "loss": 0.6091, "step": 20110 }, { "epoch": 0.5870421713118342, "grad_norm": 1.2126269526539113, "learning_rate": 2.2916693447950082e-05, "loss": 0.6135, "step": 20115 }, { "epoch": 0.5871880928060703, "grad_norm": 0.991849799542179, "learning_rate": 2.290606393179875e-05, "loss": 0.489, "step": 20120 }, { "epoch": 0.5873340143003064, "grad_norm": 1.1246426269741396, "learning_rate": 2.2895435485431228e-05, "loss": 0.5282, "step": 20125 }, { "epoch": 0.5874799357945425, "grad_norm": 1.1984968582242352, "learning_rate": 2.288480811132255e-05, "loss": 0.59, "step": 20130 }, { "epoch": 0.5876258572887786, "grad_norm": 1.0430466042705444, "learning_rate": 2.2874181811947497e-05, "loss": 0.5648, "step": 20135 }, { "epoch": 0.5877717787830148, "grad_norm": 1.0762150889898576, "learning_rate": 2.2863556589780617e-05, "loss": 0.617, "step": 20140 }, { "epoch": 0.5879177002772509, "grad_norm": 1.175687932031314, "learning_rate": 2.285293244729617e-05, "loss": 0.6026, "step": 20145 }, { "epoch": 0.588063621771487, "grad_norm": 1.2318501650849663, "learning_rate": 2.2842309386968196e-05, "loss": 0.5844, "step": 20150 }, { "epoch": 0.5882095432657231, "grad_norm": 1.0552322794995843, "learning_rate": 2.2831687411270476e-05, "loss": 0.5873, "step": 20155 }, { "epoch": 0.5883554647599591, "grad_norm": 0.865114369258492, "learning_rate": 2.2821066522676528e-05, "loss": 0.5376, "step": 20160 }, { "epoch": 0.5885013862541952, "grad_norm": 1.2060802660238765, "learning_rate": 2.2810446723659624e-05, "loss": 0.534, "step": 20165 }, { "epoch": 0.5886473077484313, "grad_norm": 0.9868617120119897, "learning_rate": 2.279982801669279e-05, "loss": 0.6097, "step": 20170 }, { "epoch": 0.5887932292426674, "grad_norm": 0.9743787891241852, "learning_rate": 2.278921040424879e-05, "loss": 0.5535, "step": 20175 }, { "epoch": 0.5889391507369035, "grad_norm": 0.939793853923095, "learning_rate": 2.2778593888800125e-05, "loss": 0.5277, "step": 20180 }, { "epoch": 0.5890850722311396, "grad_norm": 0.927018543227344, "learning_rate": 2.2767978472819056e-05, "loss": 0.5168, "step": 20185 }, { "epoch": 0.5892309937253758, "grad_norm": 0.9539244643890282, "learning_rate": 2.2757364158777585e-05, "loss": 0.5383, "step": 20190 }, { "epoch": 0.5893769152196119, "grad_norm": 1.1828529197982565, "learning_rate": 2.2746750949147438e-05, "loss": 0.5599, "step": 20195 }, { "epoch": 0.589522836713848, "grad_norm": 1.1124491374510874, "learning_rate": 2.2736138846400107e-05, "loss": 0.597, "step": 20200 }, { "epoch": 0.5896687582080841, "grad_norm": 1.1347113234226767, "learning_rate": 2.272552785300682e-05, "loss": 0.5404, "step": 20205 }, { "epoch": 0.5898146797023202, "grad_norm": 1.119478247378532, "learning_rate": 2.2714917971438548e-05, "loss": 0.5699, "step": 20210 }, { "epoch": 0.5899606011965562, "grad_norm": 1.1012338535080322, "learning_rate": 2.2704309204165996e-05, "loss": 0.5579, "step": 20215 }, { "epoch": 0.5901065226907923, "grad_norm": 0.9085055893323857, "learning_rate": 2.269370155365962e-05, "loss": 0.6082, "step": 20220 }, { "epoch": 0.5902524441850284, "grad_norm": 1.312426097371305, "learning_rate": 2.2683095022389594e-05, "loss": 0.571, "step": 20225 }, { "epoch": 0.5903983656792645, "grad_norm": 0.9496304002480994, "learning_rate": 2.267248961282586e-05, "loss": 0.4821, "step": 20230 }, { "epoch": 0.5905442871735006, "grad_norm": 1.1349963182095353, "learning_rate": 2.2661885327438075e-05, "loss": 0.5381, "step": 20235 }, { "epoch": 0.5906902086677368, "grad_norm": 0.8917739088768509, "learning_rate": 2.265128216869567e-05, "loss": 0.5422, "step": 20240 }, { "epoch": 0.5908361301619729, "grad_norm": 1.2144918697567813, "learning_rate": 2.2640680139067753e-05, "loss": 0.553, "step": 20245 }, { "epoch": 0.590982051656209, "grad_norm": 0.9986413615814256, "learning_rate": 2.2630079241023233e-05, "loss": 0.6059, "step": 20250 }, { "epoch": 0.5911279731504451, "grad_norm": 0.9865061021953596, "learning_rate": 2.261947947703071e-05, "loss": 0.6178, "step": 20255 }, { "epoch": 0.5912738946446812, "grad_norm": 0.8939777495483102, "learning_rate": 2.260888084955854e-05, "loss": 0.5693, "step": 20260 }, { "epoch": 0.5914198161389173, "grad_norm": 1.005944084878975, "learning_rate": 2.2598283361074828e-05, "loss": 0.5119, "step": 20265 }, { "epoch": 0.5915657376331533, "grad_norm": 0.9195304580861503, "learning_rate": 2.2587687014047376e-05, "loss": 0.5683, "step": 20270 }, { "epoch": 0.5917116591273894, "grad_norm": 1.3277022005795922, "learning_rate": 2.2577091810943746e-05, "loss": 0.6543, "step": 20275 }, { "epoch": 0.5918575806216255, "grad_norm": 1.0663342464068277, "learning_rate": 2.256649775423123e-05, "loss": 0.609, "step": 20280 }, { "epoch": 0.5920035021158616, "grad_norm": 1.3978342850656502, "learning_rate": 2.255590484637687e-05, "loss": 0.5292, "step": 20285 }, { "epoch": 0.5921494236100978, "grad_norm": 1.0049496463223158, "learning_rate": 2.254531308984739e-05, "loss": 0.5449, "step": 20290 }, { "epoch": 0.5922953451043339, "grad_norm": 0.9847580686873297, "learning_rate": 2.25347224871093e-05, "loss": 0.5393, "step": 20295 }, { "epoch": 0.59244126659857, "grad_norm": 1.0169967790279575, "learning_rate": 2.2524133040628813e-05, "loss": 0.544, "step": 20300 }, { "epoch": 0.5925871880928061, "grad_norm": 1.1658272993065637, "learning_rate": 2.251354475287188e-05, "loss": 0.5779, "step": 20305 }, { "epoch": 0.5927331095870422, "grad_norm": 1.056454703688431, "learning_rate": 2.2502957626304195e-05, "loss": 0.541, "step": 20310 }, { "epoch": 0.5928790310812783, "grad_norm": 1.0823722829493652, "learning_rate": 2.249237166339114e-05, "loss": 0.5743, "step": 20315 }, { "epoch": 0.5930249525755144, "grad_norm": 1.0635155505116682, "learning_rate": 2.2481786866597877e-05, "loss": 0.5646, "step": 20320 }, { "epoch": 0.5931708740697504, "grad_norm": 1.0562418940884053, "learning_rate": 2.247120323838926e-05, "loss": 0.5693, "step": 20325 }, { "epoch": 0.5933167955639865, "grad_norm": 0.938750548293275, "learning_rate": 2.2460620781229896e-05, "loss": 0.5882, "step": 20330 }, { "epoch": 0.5934627170582226, "grad_norm": 0.9674781560476177, "learning_rate": 2.2450039497584108e-05, "loss": 0.5786, "step": 20335 }, { "epoch": 0.5936086385524588, "grad_norm": 1.0382562079800113, "learning_rate": 2.2439459389915936e-05, "loss": 0.5517, "step": 20340 }, { "epoch": 0.5937545600466949, "grad_norm": 1.0405564976104924, "learning_rate": 2.242888046068915e-05, "loss": 0.5889, "step": 20345 }, { "epoch": 0.593900481540931, "grad_norm": 1.0170114741046987, "learning_rate": 2.2418302712367266e-05, "loss": 0.5458, "step": 20350 }, { "epoch": 0.5940464030351671, "grad_norm": 1.162253256162768, "learning_rate": 2.240772614741352e-05, "loss": 0.5464, "step": 20355 }, { "epoch": 0.5941923245294032, "grad_norm": 1.1075240930150376, "learning_rate": 2.239715076829083e-05, "loss": 0.5729, "step": 20360 }, { "epoch": 0.5943382460236393, "grad_norm": 1.1112280697434358, "learning_rate": 2.2386576577461897e-05, "loss": 0.523, "step": 20365 }, { "epoch": 0.5944841675178754, "grad_norm": 1.0792448966301387, "learning_rate": 2.2376003577389104e-05, "loss": 0.6105, "step": 20370 }, { "epoch": 0.5946300890121115, "grad_norm": 0.9822900784735648, "learning_rate": 2.2365431770534585e-05, "loss": 0.558, "step": 20375 }, { "epoch": 0.5947760105063475, "grad_norm": 1.4666583289560435, "learning_rate": 2.235486115936018e-05, "loss": 0.6297, "step": 20380 }, { "epoch": 0.5949219320005836, "grad_norm": 1.0954487303190743, "learning_rate": 2.234429174632744e-05, "loss": 0.5386, "step": 20385 }, { "epoch": 0.5950678534948198, "grad_norm": 1.0998707050841217, "learning_rate": 2.2333723533897656e-05, "loss": 0.5946, "step": 20390 }, { "epoch": 0.5952137749890559, "grad_norm": 1.0577715665455427, "learning_rate": 2.2323156524531847e-05, "loss": 0.4987, "step": 20395 }, { "epoch": 0.595359696483292, "grad_norm": 1.1000263172707412, "learning_rate": 2.231259072069072e-05, "loss": 0.5672, "step": 20400 }, { "epoch": 0.5955056179775281, "grad_norm": 1.0931702251825106, "learning_rate": 2.2302026124834735e-05, "loss": 0.5784, "step": 20405 }, { "epoch": 0.5956515394717642, "grad_norm": 1.1593187168785455, "learning_rate": 2.229146273942404e-05, "loss": 0.5309, "step": 20410 }, { "epoch": 0.5957974609660003, "grad_norm": 1.024642083032205, "learning_rate": 2.2280900566918527e-05, "loss": 0.5243, "step": 20415 }, { "epoch": 0.5959433824602364, "grad_norm": 0.9364630036692924, "learning_rate": 2.2270339609777795e-05, "loss": 0.597, "step": 20420 }, { "epoch": 0.5960893039544725, "grad_norm": 1.0631097835374657, "learning_rate": 2.2259779870461163e-05, "loss": 0.5595, "step": 20425 }, { "epoch": 0.5962352254487086, "grad_norm": 1.0818605687517882, "learning_rate": 2.2249221351427652e-05, "loss": 0.5166, "step": 20430 }, { "epoch": 0.5963811469429448, "grad_norm": 1.1059413427306886, "learning_rate": 2.223866405513602e-05, "loss": 0.5328, "step": 20435 }, { "epoch": 0.5965270684371808, "grad_norm": 1.0505419561483995, "learning_rate": 2.222810798404473e-05, "loss": 0.5071, "step": 20440 }, { "epoch": 0.5966729899314169, "grad_norm": 0.9612051757730211, "learning_rate": 2.2217553140611952e-05, "loss": 0.5846, "step": 20445 }, { "epoch": 0.596818911425653, "grad_norm": 1.0945574456110765, "learning_rate": 2.220699952729559e-05, "loss": 0.5803, "step": 20450 }, { "epoch": 0.5969648329198891, "grad_norm": 1.0513870560234595, "learning_rate": 2.2196447146553246e-05, "loss": 0.6112, "step": 20455 }, { "epoch": 0.5971107544141252, "grad_norm": 0.9943958297025546, "learning_rate": 2.2185896000842228e-05, "loss": 0.5442, "step": 20460 }, { "epoch": 0.5972566759083613, "grad_norm": 1.1133996031294575, "learning_rate": 2.2175346092619587e-05, "loss": 0.4808, "step": 20465 }, { "epoch": 0.5974025974025974, "grad_norm": 1.0980346172546491, "learning_rate": 2.2164797424342043e-05, "loss": 0.5182, "step": 20470 }, { "epoch": 0.5975485188968335, "grad_norm": 1.1067198836407617, "learning_rate": 2.215424999846608e-05, "loss": 0.556, "step": 20475 }, { "epoch": 0.5976944403910696, "grad_norm": 1.0759340430497362, "learning_rate": 2.2143703817447836e-05, "loss": 0.538, "step": 20480 }, { "epoch": 0.5978403618853058, "grad_norm": 1.0707005495055217, "learning_rate": 2.2133158883743194e-05, "loss": 0.5509, "step": 20485 }, { "epoch": 0.5979862833795418, "grad_norm": 0.9814784070439988, "learning_rate": 2.212261519980775e-05, "loss": 0.5682, "step": 20490 }, { "epoch": 0.5981322048737779, "grad_norm": 1.0201940864950712, "learning_rate": 2.2112072768096786e-05, "loss": 0.5815, "step": 20495 }, { "epoch": 0.598278126368014, "grad_norm": 1.0251839583017146, "learning_rate": 2.2101531591065306e-05, "loss": 0.5744, "step": 20500 }, { "epoch": 0.5984240478622501, "grad_norm": 1.0458626970003326, "learning_rate": 2.2090991671168027e-05, "loss": 0.5177, "step": 20505 }, { "epoch": 0.5985699693564862, "grad_norm": 0.9348861798189539, "learning_rate": 2.2080453010859353e-05, "loss": 0.5896, "step": 20510 }, { "epoch": 0.5987158908507223, "grad_norm": 1.050269455195219, "learning_rate": 2.2069915612593412e-05, "loss": 0.5885, "step": 20515 }, { "epoch": 0.5988618123449584, "grad_norm": 1.0804755063263796, "learning_rate": 2.205937947882404e-05, "loss": 0.6179, "step": 20520 }, { "epoch": 0.5990077338391945, "grad_norm": 1.223588097639347, "learning_rate": 2.2048844612004776e-05, "loss": 0.5401, "step": 20525 }, { "epoch": 0.5991536553334306, "grad_norm": 0.9293614599273766, "learning_rate": 2.2038311014588854e-05, "loss": 0.551, "step": 20530 }, { "epoch": 0.5992995768276668, "grad_norm": 1.0805094735804892, "learning_rate": 2.202777868902921e-05, "loss": 0.5545, "step": 20535 }, { "epoch": 0.5994454983219029, "grad_norm": 0.9488425597564626, "learning_rate": 2.201724763777851e-05, "loss": 0.5391, "step": 20540 }, { "epoch": 0.599591419816139, "grad_norm": 0.9865826681656805, "learning_rate": 2.2006717863289106e-05, "loss": 0.592, "step": 20545 }, { "epoch": 0.599737341310375, "grad_norm": 1.023242505353522, "learning_rate": 2.1996189368013025e-05, "loss": 0.5733, "step": 20550 }, { "epoch": 0.5998832628046111, "grad_norm": 0.9959313552116021, "learning_rate": 2.1985662154402053e-05, "loss": 0.5934, "step": 20555 }, { "epoch": 0.6000291842988472, "grad_norm": 1.1775251651305807, "learning_rate": 2.1975136224907638e-05, "loss": 0.5558, "step": 20560 }, { "epoch": 0.6001751057930833, "grad_norm": 1.0374697182886492, "learning_rate": 2.196461158198094e-05, "loss": 0.6711, "step": 20565 }, { "epoch": 0.6003210272873194, "grad_norm": 1.1039702053972185, "learning_rate": 2.195408822807282e-05, "loss": 0.5485, "step": 20570 }, { "epoch": 0.6004669487815555, "grad_norm": 1.04655963743811, "learning_rate": 2.194356616563383e-05, "loss": 0.5363, "step": 20575 }, { "epoch": 0.6006128702757916, "grad_norm": 1.0508311881698102, "learning_rate": 2.1933045397114245e-05, "loss": 0.5844, "step": 20580 }, { "epoch": 0.6007587917700278, "grad_norm": 0.9366321134470322, "learning_rate": 2.1922525924964004e-05, "loss": 0.534, "step": 20585 }, { "epoch": 0.6009047132642639, "grad_norm": 1.0380862691353556, "learning_rate": 2.1912007751632767e-05, "loss": 0.5768, "step": 20590 }, { "epoch": 0.6010506347585, "grad_norm": 1.0366172556067461, "learning_rate": 2.1901490879569908e-05, "loss": 0.5643, "step": 20595 }, { "epoch": 0.601196556252736, "grad_norm": 1.047360476975221, "learning_rate": 2.1890975311224448e-05, "loss": 0.5441, "step": 20600 }, { "epoch": 0.6013424777469721, "grad_norm": 1.0172326180650557, "learning_rate": 2.1880461049045144e-05, "loss": 0.5771, "step": 20605 }, { "epoch": 0.6014883992412082, "grad_norm": 1.2187568986612647, "learning_rate": 2.1869948095480447e-05, "loss": 0.5665, "step": 20610 }, { "epoch": 0.6016343207354443, "grad_norm": 0.9930688453397093, "learning_rate": 2.1859436452978483e-05, "loss": 0.6182, "step": 20615 }, { "epoch": 0.6017802422296804, "grad_norm": 1.0418819910204091, "learning_rate": 2.184892612398709e-05, "loss": 0.5332, "step": 20620 }, { "epoch": 0.6019261637239165, "grad_norm": 0.9857143201984704, "learning_rate": 2.1838417110953792e-05, "loss": 0.5625, "step": 20625 }, { "epoch": 0.6020720852181526, "grad_norm": 1.1584848470391331, "learning_rate": 2.182790941632581e-05, "loss": 0.6107, "step": 20630 }, { "epoch": 0.6022180067123888, "grad_norm": 1.1059416565062257, "learning_rate": 2.181740304255005e-05, "loss": 0.5432, "step": 20635 }, { "epoch": 0.6023639282066249, "grad_norm": 1.123736321120489, "learning_rate": 2.1806897992073138e-05, "loss": 0.5449, "step": 20640 }, { "epoch": 0.602509849700861, "grad_norm": 0.956918412791628, "learning_rate": 2.1796394267341346e-05, "loss": 0.4948, "step": 20645 }, { "epoch": 0.6026557711950971, "grad_norm": 0.9718674535469826, "learning_rate": 2.1785891870800685e-05, "loss": 0.5327, "step": 20650 }, { "epoch": 0.6028016926893331, "grad_norm": 0.9388932894873323, "learning_rate": 2.1775390804896816e-05, "loss": 0.5519, "step": 20655 }, { "epoch": 0.6029476141835692, "grad_norm": 1.0055246979123322, "learning_rate": 2.1764891072075112e-05, "loss": 0.5718, "step": 20660 }, { "epoch": 0.6030935356778053, "grad_norm": 1.0017008070083586, "learning_rate": 2.1754392674780643e-05, "loss": 0.5489, "step": 20665 }, { "epoch": 0.6032394571720414, "grad_norm": 0.8518831082289102, "learning_rate": 2.1743895615458148e-05, "loss": 0.5959, "step": 20670 }, { "epoch": 0.6033853786662775, "grad_norm": 0.994183401431802, "learning_rate": 2.173339989655206e-05, "loss": 0.5938, "step": 20675 }, { "epoch": 0.6035313001605136, "grad_norm": 0.8728277634365912, "learning_rate": 2.172290552050651e-05, "loss": 0.5572, "step": 20680 }, { "epoch": 0.6036772216547498, "grad_norm": 1.0331559815136646, "learning_rate": 2.171241248976531e-05, "loss": 0.5819, "step": 20685 }, { "epoch": 0.6038231431489859, "grad_norm": 0.9931145179801677, "learning_rate": 2.1701920806771958e-05, "loss": 0.5002, "step": 20690 }, { "epoch": 0.603969064643222, "grad_norm": 1.294920213556819, "learning_rate": 2.169143047396963e-05, "loss": 0.5608, "step": 20695 }, { "epoch": 0.6041149861374581, "grad_norm": 0.9987796593517074, "learning_rate": 2.1680941493801197e-05, "loss": 0.5195, "step": 20700 }, { "epoch": 0.6042609076316942, "grad_norm": 1.082973899450036, "learning_rate": 2.1670453868709222e-05, "loss": 0.6269, "step": 20705 }, { "epoch": 0.6044068291259302, "grad_norm": 1.186291341820937, "learning_rate": 2.1659967601135955e-05, "loss": 0.5906, "step": 20710 }, { "epoch": 0.6045527506201663, "grad_norm": 1.0604562835596387, "learning_rate": 2.1649482693523293e-05, "loss": 0.557, "step": 20715 }, { "epoch": 0.6046986721144024, "grad_norm": 1.314099975332506, "learning_rate": 2.1638999148312855e-05, "loss": 0.6083, "step": 20720 }, { "epoch": 0.6048445936086385, "grad_norm": 1.0462001366275309, "learning_rate": 2.1628516967945927e-05, "loss": 0.5343, "step": 20725 }, { "epoch": 0.6049905151028746, "grad_norm": 1.1311857720781664, "learning_rate": 2.1618036154863485e-05, "loss": 0.5856, "step": 20730 }, { "epoch": 0.6051364365971108, "grad_norm": 1.143649893366773, "learning_rate": 2.1607556711506194e-05, "loss": 0.5912, "step": 20735 }, { "epoch": 0.6052823580913469, "grad_norm": 0.8457230125523526, "learning_rate": 2.159707864031436e-05, "loss": 0.5514, "step": 20740 }, { "epoch": 0.605428279585583, "grad_norm": 1.0579730671928256, "learning_rate": 2.1586601943728013e-05, "loss": 0.5711, "step": 20745 }, { "epoch": 0.6055742010798191, "grad_norm": 1.004001186772471, "learning_rate": 2.157612662418685e-05, "loss": 0.521, "step": 20750 }, { "epoch": 0.6057201225740552, "grad_norm": 1.0654443347250322, "learning_rate": 2.1565652684130244e-05, "loss": 0.5424, "step": 20755 }, { "epoch": 0.6058660440682913, "grad_norm": 1.1286265359893384, "learning_rate": 2.1555180125997247e-05, "loss": 0.5678, "step": 20760 }, { "epoch": 0.6060119655625273, "grad_norm": 1.1479693581431403, "learning_rate": 2.1544708952226585e-05, "loss": 0.562, "step": 20765 }, { "epoch": 0.6061578870567634, "grad_norm": 1.06875988335612, "learning_rate": 2.1534239165256658e-05, "loss": 0.5419, "step": 20770 }, { "epoch": 0.6063038085509995, "grad_norm": 1.1545276979146863, "learning_rate": 2.1523770767525576e-05, "loss": 0.5368, "step": 20775 }, { "epoch": 0.6064497300452356, "grad_norm": 1.0947678661430025, "learning_rate": 2.151330376147108e-05, "loss": 0.6497, "step": 20780 }, { "epoch": 0.6065956515394718, "grad_norm": 0.9092165975933016, "learning_rate": 2.150283814953063e-05, "loss": 0.5881, "step": 20785 }, { "epoch": 0.6067415730337079, "grad_norm": 1.0249615315489535, "learning_rate": 2.1492373934141315e-05, "loss": 0.5502, "step": 20790 }, { "epoch": 0.606887494527944, "grad_norm": 0.8678888065011715, "learning_rate": 2.148191111773993e-05, "loss": 0.5354, "step": 20795 }, { "epoch": 0.6070334160221801, "grad_norm": 1.0305914310366362, "learning_rate": 2.147144970276294e-05, "loss": 0.5299, "step": 20800 }, { "epoch": 0.6071793375164162, "grad_norm": 0.9095794650262555, "learning_rate": 2.1460989691646495e-05, "loss": 0.5835, "step": 20805 }, { "epoch": 0.6073252590106523, "grad_norm": 1.2402571152238935, "learning_rate": 2.145053108682637e-05, "loss": 0.6104, "step": 20810 }, { "epoch": 0.6074711805048884, "grad_norm": 1.2254193813354115, "learning_rate": 2.1440073890738073e-05, "loss": 0.5973, "step": 20815 }, { "epoch": 0.6076171019991244, "grad_norm": 0.9822422301400647, "learning_rate": 2.142961810581675e-05, "loss": 0.538, "step": 20820 }, { "epoch": 0.6077630234933605, "grad_norm": 0.9772408431179281, "learning_rate": 2.1419163734497233e-05, "loss": 0.615, "step": 20825 }, { "epoch": 0.6079089449875966, "grad_norm": 1.005338618273472, "learning_rate": 2.140871077921401e-05, "loss": 0.5758, "step": 20830 }, { "epoch": 0.6080548664818328, "grad_norm": 1.0106155205475722, "learning_rate": 2.139825924240125e-05, "loss": 0.5656, "step": 20835 }, { "epoch": 0.6082007879760689, "grad_norm": 0.9939845411067095, "learning_rate": 2.1387809126492777e-05, "loss": 0.5504, "step": 20840 }, { "epoch": 0.608346709470305, "grad_norm": 1.1478236779280577, "learning_rate": 2.13773604339221e-05, "loss": 0.5632, "step": 20845 }, { "epoch": 0.6084926309645411, "grad_norm": 1.1606344762948329, "learning_rate": 2.136691316712241e-05, "loss": 0.5607, "step": 20850 }, { "epoch": 0.6086385524587772, "grad_norm": 1.0153698575612413, "learning_rate": 2.1356467328526537e-05, "loss": 0.6162, "step": 20855 }, { "epoch": 0.6087844739530133, "grad_norm": 1.0426421631438765, "learning_rate": 2.1346022920566976e-05, "loss": 0.5326, "step": 20860 }, { "epoch": 0.6089303954472494, "grad_norm": 1.0776483199406754, "learning_rate": 2.1335579945675926e-05, "loss": 0.5695, "step": 20865 }, { "epoch": 0.6090763169414855, "grad_norm": 1.0780409586606694, "learning_rate": 2.1325138406285212e-05, "loss": 0.5485, "step": 20870 }, { "epoch": 0.6092222384357215, "grad_norm": 1.112676160883401, "learning_rate": 2.1314698304826347e-05, "loss": 0.5152, "step": 20875 }, { "epoch": 0.6093681599299576, "grad_norm": 1.2861610403341261, "learning_rate": 2.1304259643730508e-05, "loss": 0.6097, "step": 20880 }, { "epoch": 0.6095140814241938, "grad_norm": 1.0235654980519893, "learning_rate": 2.1293822425428528e-05, "loss": 0.5154, "step": 20885 }, { "epoch": 0.6096600029184299, "grad_norm": 1.0248336427277436, "learning_rate": 2.1283386652350897e-05, "loss": 0.6362, "step": 20890 }, { "epoch": 0.609805924412666, "grad_norm": 0.9238274484533109, "learning_rate": 2.12729523269278e-05, "loss": 0.538, "step": 20895 }, { "epoch": 0.6099518459069021, "grad_norm": 1.1268433274417007, "learning_rate": 2.1262519451589057e-05, "loss": 0.6846, "step": 20900 }, { "epoch": 0.6100977674011382, "grad_norm": 0.8409508299516611, "learning_rate": 2.125208802876415e-05, "loss": 0.555, "step": 20905 }, { "epoch": 0.6102436888953743, "grad_norm": 1.060115998635965, "learning_rate": 2.1241658060882237e-05, "loss": 0.5642, "step": 20910 }, { "epoch": 0.6103896103896104, "grad_norm": 1.0056216063178673, "learning_rate": 2.123122955037213e-05, "loss": 0.5804, "step": 20915 }, { "epoch": 0.6105355318838465, "grad_norm": 1.1343809433512069, "learning_rate": 2.12208024996623e-05, "loss": 0.5894, "step": 20920 }, { "epoch": 0.6106814533780826, "grad_norm": 1.2345845805770375, "learning_rate": 2.1210376911180897e-05, "loss": 0.5921, "step": 20925 }, { "epoch": 0.6108273748723186, "grad_norm": 1.1387616682537929, "learning_rate": 2.1199952787355687e-05, "loss": 0.5643, "step": 20930 }, { "epoch": 0.6109732963665548, "grad_norm": 1.1019699759279846, "learning_rate": 2.1189530130614144e-05, "loss": 0.5827, "step": 20935 }, { "epoch": 0.6111192178607909, "grad_norm": 0.9621888290522305, "learning_rate": 2.1179108943383362e-05, "loss": 0.5376, "step": 20940 }, { "epoch": 0.611265139355027, "grad_norm": 1.1429063699441584, "learning_rate": 2.1168689228090116e-05, "loss": 0.6299, "step": 20945 }, { "epoch": 0.6114110608492631, "grad_norm": 0.9793385378375368, "learning_rate": 2.1158270987160845e-05, "loss": 0.582, "step": 20950 }, { "epoch": 0.6115569823434992, "grad_norm": 0.9625935878807543, "learning_rate": 2.1147854223021602e-05, "loss": 0.5464, "step": 20955 }, { "epoch": 0.6117029038377353, "grad_norm": 1.1470381136472476, "learning_rate": 2.1137438938098146e-05, "loss": 0.5253, "step": 20960 }, { "epoch": 0.6118488253319714, "grad_norm": 1.028113568982448, "learning_rate": 2.112702513481587e-05, "loss": 0.5293, "step": 20965 }, { "epoch": 0.6119947468262075, "grad_norm": 1.1336046067258037, "learning_rate": 2.111661281559982e-05, "loss": 0.5734, "step": 20970 }, { "epoch": 0.6121406683204436, "grad_norm": 1.0997543051865089, "learning_rate": 2.110620198287469e-05, "loss": 0.6211, "step": 20975 }, { "epoch": 0.6122865898146796, "grad_norm": 0.9912151064564366, "learning_rate": 2.1095792639064848e-05, "loss": 0.548, "step": 20980 }, { "epoch": 0.6124325113089159, "grad_norm": 1.0613383488483452, "learning_rate": 2.1085384786594293e-05, "loss": 0.5345, "step": 20985 }, { "epoch": 0.6125784328031519, "grad_norm": 1.3034219194078562, "learning_rate": 2.10749784278867e-05, "loss": 0.6727, "step": 20990 }, { "epoch": 0.612724354297388, "grad_norm": 0.9465008086291266, "learning_rate": 2.1064573565365386e-05, "loss": 0.5688, "step": 20995 }, { "epoch": 0.6128702757916241, "grad_norm": 1.0180498381045107, "learning_rate": 2.1054170201453306e-05, "loss": 0.5633, "step": 21000 }, { "epoch": 0.6130161972858602, "grad_norm": 1.0657436655782013, "learning_rate": 2.1043768338573078e-05, "loss": 0.5402, "step": 21005 }, { "epoch": 0.6131621187800963, "grad_norm": 1.004813186994838, "learning_rate": 2.1033367979146977e-05, "loss": 0.5228, "step": 21010 }, { "epoch": 0.6133080402743324, "grad_norm": 0.9249121557971645, "learning_rate": 2.1022969125596914e-05, "loss": 0.5876, "step": 21015 }, { "epoch": 0.6134539617685685, "grad_norm": 1.1860403940140252, "learning_rate": 2.101257178034447e-05, "loss": 0.5574, "step": 21020 }, { "epoch": 0.6135998832628046, "grad_norm": 1.0965048000541653, "learning_rate": 2.1002175945810848e-05, "loss": 0.5706, "step": 21025 }, { "epoch": 0.6137458047570407, "grad_norm": 1.3360917650644655, "learning_rate": 2.0991781624416906e-05, "loss": 0.6071, "step": 21030 }, { "epoch": 0.6138917262512769, "grad_norm": 1.383901407067633, "learning_rate": 2.0981388818583177e-05, "loss": 0.6533, "step": 21035 }, { "epoch": 0.614037647745513, "grad_norm": 1.1014294578665347, "learning_rate": 2.0970997530729803e-05, "loss": 0.5424, "step": 21040 }, { "epoch": 0.614183569239749, "grad_norm": 1.1272101205528826, "learning_rate": 2.0960607763276598e-05, "loss": 0.5894, "step": 21045 }, { "epoch": 0.6143294907339851, "grad_norm": 1.0062562554036623, "learning_rate": 2.095021951864301e-05, "loss": 0.595, "step": 21050 }, { "epoch": 0.6144754122282212, "grad_norm": 1.124343377791846, "learning_rate": 2.0939832799248132e-05, "loss": 0.5899, "step": 21055 }, { "epoch": 0.6146213337224573, "grad_norm": 1.0910739427240361, "learning_rate": 2.092944760751071e-05, "loss": 0.6024, "step": 21060 }, { "epoch": 0.6147672552166934, "grad_norm": 0.9850902669143926, "learning_rate": 2.0919063945849132e-05, "loss": 0.6254, "step": 21065 }, { "epoch": 0.6149131767109295, "grad_norm": 1.0522794167654212, "learning_rate": 2.090868181668143e-05, "loss": 0.5739, "step": 21070 }, { "epoch": 0.6150590982051656, "grad_norm": 1.3670239792786028, "learning_rate": 2.0898301222425264e-05, "loss": 0.5878, "step": 21075 }, { "epoch": 0.6152050196994017, "grad_norm": 1.174906405545156, "learning_rate": 2.0887922165497963e-05, "loss": 0.6097, "step": 21080 }, { "epoch": 0.6153509411936379, "grad_norm": 1.1029151659068621, "learning_rate": 2.087754464831648e-05, "loss": 0.5913, "step": 21085 }, { "epoch": 0.615496862687874, "grad_norm": 1.0722149946417212, "learning_rate": 2.0867168673297404e-05, "loss": 0.5466, "step": 21090 }, { "epoch": 0.61564278418211, "grad_norm": 1.0671606458624274, "learning_rate": 2.0856794242856987e-05, "loss": 0.5784, "step": 21095 }, { "epoch": 0.6157887056763461, "grad_norm": 0.9656602125896625, "learning_rate": 2.08464213594111e-05, "loss": 0.5053, "step": 21100 }, { "epoch": 0.6159346271705822, "grad_norm": 0.8487333982574742, "learning_rate": 2.083605002537527e-05, "loss": 0.4968, "step": 21105 }, { "epoch": 0.6160805486648183, "grad_norm": 1.0654263172881016, "learning_rate": 2.082568024316465e-05, "loss": 0.5615, "step": 21110 }, { "epoch": 0.6162264701590544, "grad_norm": 0.9245827144939938, "learning_rate": 2.081531201519405e-05, "loss": 0.5623, "step": 21115 }, { "epoch": 0.6163723916532905, "grad_norm": 1.0630141062163003, "learning_rate": 2.080494534387789e-05, "loss": 0.5863, "step": 21120 }, { "epoch": 0.6165183131475266, "grad_norm": 1.2693387154464157, "learning_rate": 2.079458023163025e-05, "loss": 0.5639, "step": 21125 }, { "epoch": 0.6166642346417627, "grad_norm": 0.9376850976772002, "learning_rate": 2.0784216680864833e-05, "loss": 0.5426, "step": 21130 }, { "epoch": 0.6168101561359989, "grad_norm": 1.0769892123138154, "learning_rate": 2.0773854693994994e-05, "loss": 0.5566, "step": 21135 }, { "epoch": 0.616956077630235, "grad_norm": 1.193443691146768, "learning_rate": 2.0763494273433727e-05, "loss": 0.5786, "step": 21140 }, { "epoch": 0.6171019991244711, "grad_norm": 1.0189284087574257, "learning_rate": 2.0753135421593616e-05, "loss": 0.5368, "step": 21145 }, { "epoch": 0.6172479206187071, "grad_norm": 7.954496033399136, "learning_rate": 2.0742778140886936e-05, "loss": 0.5891, "step": 21150 }, { "epoch": 0.6173938421129432, "grad_norm": 1.0887223560450103, "learning_rate": 2.0732422433725574e-05, "loss": 0.5695, "step": 21155 }, { "epoch": 0.6175397636071793, "grad_norm": 1.1922631437013145, "learning_rate": 2.0722068302521048e-05, "loss": 0.5527, "step": 21160 }, { "epoch": 0.6176856851014154, "grad_norm": 0.9858764650298035, "learning_rate": 2.0711715749684502e-05, "loss": 0.6035, "step": 21165 }, { "epoch": 0.6178316065956515, "grad_norm": 1.0346440495787257, "learning_rate": 2.0701364777626725e-05, "loss": 0.5369, "step": 21170 }, { "epoch": 0.6179775280898876, "grad_norm": 0.9005077258704912, "learning_rate": 2.0691015388758138e-05, "loss": 0.5485, "step": 21175 }, { "epoch": 0.6181234495841238, "grad_norm": 1.126403900209555, "learning_rate": 2.068066758548879e-05, "loss": 0.5753, "step": 21180 }, { "epoch": 0.6182693710783599, "grad_norm": 1.190056036680942, "learning_rate": 2.0670321370228362e-05, "loss": 0.575, "step": 21185 }, { "epoch": 0.618415292572596, "grad_norm": 1.0279237016730343, "learning_rate": 2.0659976745386157e-05, "loss": 0.5619, "step": 21190 }, { "epoch": 0.6185612140668321, "grad_norm": 0.9287121372900066, "learning_rate": 2.0649633713371107e-05, "loss": 0.5276, "step": 21195 }, { "epoch": 0.6187071355610682, "grad_norm": 0.965304169246816, "learning_rate": 2.0639292276591798e-05, "loss": 0.5693, "step": 21200 }, { "epoch": 0.6188530570553042, "grad_norm": 1.0316334511895353, "learning_rate": 2.062895243745641e-05, "loss": 0.5203, "step": 21205 }, { "epoch": 0.6189989785495403, "grad_norm": 0.9407941404535358, "learning_rate": 2.0618614198372793e-05, "loss": 0.5648, "step": 21210 }, { "epoch": 0.6191449000437764, "grad_norm": 1.0383205896852723, "learning_rate": 2.0608277561748372e-05, "loss": 0.5419, "step": 21215 }, { "epoch": 0.6192908215380125, "grad_norm": 0.8846479166681062, "learning_rate": 2.0597942529990233e-05, "loss": 0.5305, "step": 21220 }, { "epoch": 0.6194367430322486, "grad_norm": 1.0870178223318203, "learning_rate": 2.0587609105505096e-05, "loss": 0.5516, "step": 21225 }, { "epoch": 0.6195826645264848, "grad_norm": 0.9493673859021511, "learning_rate": 2.0577277290699272e-05, "loss": 0.5524, "step": 21230 }, { "epoch": 0.6197285860207209, "grad_norm": 1.075991458806228, "learning_rate": 2.0566947087978727e-05, "loss": 0.5922, "step": 21235 }, { "epoch": 0.619874507514957, "grad_norm": 1.0315632090199243, "learning_rate": 2.0556618499749037e-05, "loss": 0.5828, "step": 21240 }, { "epoch": 0.6200204290091931, "grad_norm": 1.114033719898931, "learning_rate": 2.054629152841541e-05, "loss": 0.5625, "step": 21245 }, { "epoch": 0.6201663505034292, "grad_norm": 0.8769256769172714, "learning_rate": 2.053596617638267e-05, "loss": 0.5527, "step": 21250 }, { "epoch": 0.6203122719976653, "grad_norm": 0.9331616257283377, "learning_rate": 2.052564244605529e-05, "loss": 0.5256, "step": 21255 }, { "epoch": 0.6204581934919013, "grad_norm": 0.9385707265453662, "learning_rate": 2.0515320339837307e-05, "loss": 0.5222, "step": 21260 }, { "epoch": 0.6206041149861374, "grad_norm": 0.9865989544284782, "learning_rate": 2.050499986013244e-05, "loss": 0.6163, "step": 21265 }, { "epoch": 0.6207500364803735, "grad_norm": 0.8994829113519723, "learning_rate": 2.0494681009343997e-05, "loss": 0.5327, "step": 21270 }, { "epoch": 0.6208959579746096, "grad_norm": 1.0514526287984596, "learning_rate": 2.0484363789874922e-05, "loss": 0.5518, "step": 21275 }, { "epoch": 0.6210418794688458, "grad_norm": 1.069301747313711, "learning_rate": 2.0474048204127773e-05, "loss": 0.5982, "step": 21280 }, { "epoch": 0.6211878009630819, "grad_norm": 1.3294654380040898, "learning_rate": 2.046373425450471e-05, "loss": 0.5861, "step": 21285 }, { "epoch": 0.621333722457318, "grad_norm": 0.9223621789935365, "learning_rate": 2.0453421943407547e-05, "loss": 0.5555, "step": 21290 }, { "epoch": 0.6214796439515541, "grad_norm": 0.9501808462250673, "learning_rate": 2.04431112732377e-05, "loss": 0.5902, "step": 21295 }, { "epoch": 0.6216255654457902, "grad_norm": 0.8997578243825174, "learning_rate": 2.0432802246396176e-05, "loss": 0.5294, "step": 21300 }, { "epoch": 0.6217714869400263, "grad_norm": 1.1095027987155772, "learning_rate": 2.0422494865283658e-05, "loss": 0.621, "step": 21305 }, { "epoch": 0.6219174084342624, "grad_norm": 1.0195718472616002, "learning_rate": 2.0412189132300384e-05, "loss": 0.5538, "step": 21310 }, { "epoch": 0.6220633299284984, "grad_norm": 1.1603937337271175, "learning_rate": 2.0401885049846253e-05, "loss": 0.5653, "step": 21315 }, { "epoch": 0.6222092514227345, "grad_norm": 0.857776049578617, "learning_rate": 2.039158262032076e-05, "loss": 0.5229, "step": 21320 }, { "epoch": 0.6223551729169706, "grad_norm": 1.0633128806757628, "learning_rate": 2.0381281846123012e-05, "loss": 0.5651, "step": 21325 }, { "epoch": 0.6225010944112068, "grad_norm": 0.9907112614236621, "learning_rate": 2.037098272965175e-05, "loss": 0.6081, "step": 21330 }, { "epoch": 0.6226470159054429, "grad_norm": 1.2478561719135073, "learning_rate": 2.0360685273305303e-05, "loss": 0.5885, "step": 21335 }, { "epoch": 0.622792937399679, "grad_norm": 1.0533024011191094, "learning_rate": 2.0350389479481625e-05, "loss": 0.6176, "step": 21340 }, { "epoch": 0.6229388588939151, "grad_norm": 0.9586637821408782, "learning_rate": 2.0340095350578285e-05, "loss": 0.565, "step": 21345 }, { "epoch": 0.6230847803881512, "grad_norm": 1.0426406732603255, "learning_rate": 2.0329802888992485e-05, "loss": 0.5626, "step": 21350 }, { "epoch": 0.6232307018823873, "grad_norm": 1.0347510408200014, "learning_rate": 2.031951209712098e-05, "loss": 0.6089, "step": 21355 }, { "epoch": 0.6233766233766234, "grad_norm": 0.9996708460516198, "learning_rate": 2.0309222977360193e-05, "loss": 0.5789, "step": 21360 }, { "epoch": 0.6235225448708595, "grad_norm": 1.4525903403202267, "learning_rate": 2.029893553210615e-05, "loss": 0.5948, "step": 21365 }, { "epoch": 0.6236684663650955, "grad_norm": 0.8834234802936349, "learning_rate": 2.0288649763754448e-05, "loss": 0.5683, "step": 21370 }, { "epoch": 0.6238143878593316, "grad_norm": 1.073468640838132, "learning_rate": 2.0278365674700343e-05, "loss": 0.5599, "step": 21375 }, { "epoch": 0.6239603093535678, "grad_norm": 0.9808194882641018, "learning_rate": 2.0268083267338665e-05, "loss": 0.564, "step": 21380 }, { "epoch": 0.6241062308478039, "grad_norm": 1.0608993769212025, "learning_rate": 2.0257802544063865e-05, "loss": 0.6158, "step": 21385 }, { "epoch": 0.62425215234204, "grad_norm": 1.1540170659309947, "learning_rate": 2.024752350727e-05, "loss": 0.6033, "step": 21390 }, { "epoch": 0.6243980738362761, "grad_norm": 0.98006331599683, "learning_rate": 2.023724615935075e-05, "loss": 0.5733, "step": 21395 }, { "epoch": 0.6245439953305122, "grad_norm": 0.967465655258965, "learning_rate": 2.0226970502699373e-05, "loss": 0.5419, "step": 21400 }, { "epoch": 0.6246899168247483, "grad_norm": 1.0185109680764977, "learning_rate": 2.0216696539708745e-05, "loss": 0.5123, "step": 21405 }, { "epoch": 0.6248358383189844, "grad_norm": 1.0499718379617553, "learning_rate": 2.0206424272771364e-05, "loss": 0.5261, "step": 21410 }, { "epoch": 0.6249817598132205, "grad_norm": 1.144159539526579, "learning_rate": 2.0196153704279303e-05, "loss": 0.534, "step": 21415 }, { "epoch": 0.6251276813074566, "grad_norm": 1.064938894036352, "learning_rate": 2.018588483662427e-05, "loss": 0.5612, "step": 21420 }, { "epoch": 0.6252736028016926, "grad_norm": 1.0264622459731616, "learning_rate": 2.0175617672197568e-05, "loss": 0.5694, "step": 21425 }, { "epoch": 0.6254195242959288, "grad_norm": 1.1168875367414954, "learning_rate": 2.016535221339007e-05, "loss": 0.5821, "step": 21430 }, { "epoch": 0.6255654457901649, "grad_norm": 0.9744687105585814, "learning_rate": 2.015508846259231e-05, "loss": 0.5947, "step": 21435 }, { "epoch": 0.625711367284401, "grad_norm": 0.9351018711588722, "learning_rate": 2.014482642219437e-05, "loss": 0.5697, "step": 21440 }, { "epoch": 0.6258572887786371, "grad_norm": 1.074847075465745, "learning_rate": 2.0134566094585976e-05, "loss": 0.571, "step": 21445 }, { "epoch": 0.6260032102728732, "grad_norm": 0.9232741818204088, "learning_rate": 2.0124307482156428e-05, "loss": 0.5622, "step": 21450 }, { "epoch": 0.6261491317671093, "grad_norm": 1.1075312982604204, "learning_rate": 2.0114050587294635e-05, "loss": 0.5161, "step": 21455 }, { "epoch": 0.6262950532613454, "grad_norm": 1.0498199037689169, "learning_rate": 2.010379541238911e-05, "loss": 0.5611, "step": 21460 }, { "epoch": 0.6264409747555815, "grad_norm": 0.8194196037784405, "learning_rate": 2.0093541959827956e-05, "loss": 0.4801, "step": 21465 }, { "epoch": 0.6265868962498176, "grad_norm": 0.9935096321243365, "learning_rate": 2.0083290231998896e-05, "loss": 0.5724, "step": 21470 }, { "epoch": 0.6267328177440536, "grad_norm": 1.0599974385061437, "learning_rate": 2.007304023128922e-05, "loss": 0.5287, "step": 21475 }, { "epoch": 0.6268787392382899, "grad_norm": 0.9435461840115053, "learning_rate": 2.006279196008584e-05, "loss": 0.56, "step": 21480 }, { "epoch": 0.6270246607325259, "grad_norm": 0.9392940918791206, "learning_rate": 2.005254542077525e-05, "loss": 0.5103, "step": 21485 }, { "epoch": 0.627170582226762, "grad_norm": 1.1049056641867625, "learning_rate": 2.004230061574356e-05, "loss": 0.5795, "step": 21490 }, { "epoch": 0.6273165037209981, "grad_norm": 1.051987756658652, "learning_rate": 2.0032057547376458e-05, "loss": 0.5502, "step": 21495 }, { "epoch": 0.6274624252152342, "grad_norm": 1.1779116305533983, "learning_rate": 2.002181621805923e-05, "loss": 0.6, "step": 21500 }, { "epoch": 0.6276083467094703, "grad_norm": 0.9992861488429373, "learning_rate": 2.0011576630176766e-05, "loss": 0.5858, "step": 21505 }, { "epoch": 0.6277542682037064, "grad_norm": 1.1511294131684007, "learning_rate": 2.000133878611354e-05, "loss": 0.5821, "step": 21510 }, { "epoch": 0.6279001896979425, "grad_norm": 1.0451854485275764, "learning_rate": 1.999110268825363e-05, "loss": 0.5955, "step": 21515 }, { "epoch": 0.6280461111921786, "grad_norm": 0.8563721435460955, "learning_rate": 1.9980868338980708e-05, "loss": 0.5137, "step": 21520 }, { "epoch": 0.6281920326864147, "grad_norm": 1.0629345700263528, "learning_rate": 1.9970635740678017e-05, "loss": 0.5727, "step": 21525 }, { "epoch": 0.6283379541806509, "grad_norm": 1.1178738371594295, "learning_rate": 1.9960404895728425e-05, "loss": 0.5467, "step": 21530 }, { "epoch": 0.628483875674887, "grad_norm": 1.239028514520143, "learning_rate": 1.995017580651436e-05, "loss": 0.5712, "step": 21535 }, { "epoch": 0.628629797169123, "grad_norm": 1.0309535186259977, "learning_rate": 1.993994847541788e-05, "loss": 0.532, "step": 21540 }, { "epoch": 0.6287757186633591, "grad_norm": 1.0949356423924668, "learning_rate": 1.992972290482058e-05, "loss": 0.628, "step": 21545 }, { "epoch": 0.6289216401575952, "grad_norm": 1.1365741643920848, "learning_rate": 1.9919499097103707e-05, "loss": 0.5629, "step": 21550 }, { "epoch": 0.6290675616518313, "grad_norm": 0.9266869761695989, "learning_rate": 1.9909277054648035e-05, "loss": 0.5483, "step": 21555 }, { "epoch": 0.6292134831460674, "grad_norm": 1.0233942261886486, "learning_rate": 1.9899056779833968e-05, "loss": 0.5333, "step": 21560 }, { "epoch": 0.6293594046403035, "grad_norm": 1.0904015542221328, "learning_rate": 1.9888838275041504e-05, "loss": 0.5333, "step": 21565 }, { "epoch": 0.6295053261345396, "grad_norm": 1.0064669175235683, "learning_rate": 1.9878621542650194e-05, "loss": 0.5703, "step": 21570 }, { "epoch": 0.6296512476287757, "grad_norm": 0.8647918065236093, "learning_rate": 1.98684065850392e-05, "loss": 0.5671, "step": 21575 }, { "epoch": 0.6297971691230119, "grad_norm": 1.0763160247628032, "learning_rate": 1.9858193404587266e-05, "loss": 0.5683, "step": 21580 }, { "epoch": 0.629943090617248, "grad_norm": 0.9557088483317157, "learning_rate": 1.9847982003672722e-05, "loss": 0.5601, "step": 21585 }, { "epoch": 0.630089012111484, "grad_norm": 1.0662843512762523, "learning_rate": 1.9837772384673493e-05, "loss": 0.5717, "step": 21590 }, { "epoch": 0.6302349336057201, "grad_norm": 1.204606190876787, "learning_rate": 1.9827564549967066e-05, "loss": 0.5785, "step": 21595 }, { "epoch": 0.6303808550999562, "grad_norm": 1.213489042518213, "learning_rate": 1.981735850193052e-05, "loss": 0.614, "step": 21600 }, { "epoch": 0.6305267765941923, "grad_norm": 1.1613852732207153, "learning_rate": 1.9807154242940538e-05, "loss": 0.591, "step": 21605 }, { "epoch": 0.6306726980884284, "grad_norm": 1.1104349952177215, "learning_rate": 1.979695177537338e-05, "loss": 0.4961, "step": 21610 }, { "epoch": 0.6308186195826645, "grad_norm": 1.027669194751987, "learning_rate": 1.9786751101604856e-05, "loss": 0.5297, "step": 21615 }, { "epoch": 0.6309645410769006, "grad_norm": 1.0842382631888179, "learning_rate": 1.9776552224010408e-05, "loss": 0.5428, "step": 21620 }, { "epoch": 0.6311104625711367, "grad_norm": 1.1392991137613815, "learning_rate": 1.9766355144965015e-05, "loss": 0.5222, "step": 21625 }, { "epoch": 0.6312563840653729, "grad_norm": 1.1786876753215634, "learning_rate": 1.9756159866843266e-05, "loss": 0.6008, "step": 21630 }, { "epoch": 0.631402305559609, "grad_norm": 0.9846830610408701, "learning_rate": 1.9745966392019334e-05, "loss": 0.5126, "step": 21635 }, { "epoch": 0.6315482270538451, "grad_norm": 1.1603368204216153, "learning_rate": 1.9735774722866934e-05, "loss": 0.5422, "step": 21640 }, { "epoch": 0.6316941485480811, "grad_norm": 1.1548009339559104, "learning_rate": 1.9725584861759404e-05, "loss": 0.5643, "step": 21645 }, { "epoch": 0.6318400700423172, "grad_norm": 0.9860952435174237, "learning_rate": 1.9715396811069646e-05, "loss": 0.5029, "step": 21650 }, { "epoch": 0.6319859915365533, "grad_norm": 1.1576391095470537, "learning_rate": 1.970521057317013e-05, "loss": 0.5766, "step": 21655 }, { "epoch": 0.6321319130307894, "grad_norm": 1.1608427762529696, "learning_rate": 1.9695026150432917e-05, "loss": 0.5499, "step": 21660 }, { "epoch": 0.6322778345250255, "grad_norm": 1.0831664722466747, "learning_rate": 1.9684843545229632e-05, "loss": 0.5557, "step": 21665 }, { "epoch": 0.6324237560192616, "grad_norm": 0.9752829167183303, "learning_rate": 1.9674662759931488e-05, "loss": 0.6056, "step": 21670 }, { "epoch": 0.6325696775134977, "grad_norm": 1.0721075951551997, "learning_rate": 1.9664483796909273e-05, "loss": 0.478, "step": 21675 }, { "epoch": 0.6327155990077339, "grad_norm": 1.1633242654425933, "learning_rate": 1.9654306658533343e-05, "loss": 0.6239, "step": 21680 }, { "epoch": 0.63286152050197, "grad_norm": 0.951271485650146, "learning_rate": 1.9644131347173657e-05, "loss": 0.5801, "step": 21685 }, { "epoch": 0.6330074419962061, "grad_norm": 1.1507745632632937, "learning_rate": 1.9633957865199698e-05, "loss": 0.5971, "step": 21690 }, { "epoch": 0.6331533634904422, "grad_norm": 0.9349511367178429, "learning_rate": 1.9623786214980565e-05, "loss": 0.5231, "step": 21695 }, { "epoch": 0.6332992849846782, "grad_norm": 1.0686143834784587, "learning_rate": 1.9613616398884917e-05, "loss": 0.5491, "step": 21700 }, { "epoch": 0.6334452064789143, "grad_norm": 1.0066415742846488, "learning_rate": 1.9603448419280984e-05, "loss": 0.5576, "step": 21705 }, { "epoch": 0.6335911279731504, "grad_norm": 1.0813473886407243, "learning_rate": 1.959328227853656e-05, "loss": 0.6038, "step": 21710 }, { "epoch": 0.6337370494673865, "grad_norm": 1.0700488508256019, "learning_rate": 1.9583117979019033e-05, "loss": 0.5768, "step": 21715 }, { "epoch": 0.6338829709616226, "grad_norm": 1.1626816701622698, "learning_rate": 1.9572955523095348e-05, "loss": 0.5268, "step": 21720 }, { "epoch": 0.6340288924558587, "grad_norm": 1.0848277872998586, "learning_rate": 1.9562794913132026e-05, "loss": 0.5562, "step": 21725 }, { "epoch": 0.6341748139500949, "grad_norm": 0.9970875121518702, "learning_rate": 1.9552636151495148e-05, "loss": 0.5048, "step": 21730 }, { "epoch": 0.634320735444331, "grad_norm": 1.0701842205964907, "learning_rate": 1.954247924055037e-05, "loss": 0.596, "step": 21735 }, { "epoch": 0.6344666569385671, "grad_norm": 0.9218209455053972, "learning_rate": 1.9532324182662924e-05, "loss": 0.5692, "step": 21740 }, { "epoch": 0.6346125784328032, "grad_norm": 1.0352803640309893, "learning_rate": 1.95221709801976e-05, "loss": 0.5592, "step": 21745 }, { "epoch": 0.6347584999270393, "grad_norm": 1.099854377686255, "learning_rate": 1.9512019635518762e-05, "loss": 0.5943, "step": 21750 }, { "epoch": 0.6349044214212753, "grad_norm": 1.3136065269390034, "learning_rate": 1.9501870150990353e-05, "loss": 0.5205, "step": 21755 }, { "epoch": 0.6350503429155114, "grad_norm": 1.1008757241372282, "learning_rate": 1.9491722528975852e-05, "loss": 0.5139, "step": 21760 }, { "epoch": 0.6351962644097475, "grad_norm": 0.9960248558880715, "learning_rate": 1.9481576771838327e-05, "loss": 0.5867, "step": 21765 }, { "epoch": 0.6353421859039836, "grad_norm": 0.9461912387161494, "learning_rate": 1.9471432881940417e-05, "loss": 0.521, "step": 21770 }, { "epoch": 0.6354881073982197, "grad_norm": 0.9295808817064493, "learning_rate": 1.9461290861644295e-05, "loss": 0.5494, "step": 21775 }, { "epoch": 0.6356340288924559, "grad_norm": 1.0333998877701482, "learning_rate": 1.945115071331175e-05, "loss": 0.5251, "step": 21780 }, { "epoch": 0.635779950386692, "grad_norm": 1.0724527217686928, "learning_rate": 1.9441012439304075e-05, "loss": 0.5382, "step": 21785 }, { "epoch": 0.6359258718809281, "grad_norm": 0.994721729197665, "learning_rate": 1.943087604198217e-05, "loss": 0.5614, "step": 21790 }, { "epoch": 0.6360717933751642, "grad_norm": 0.895917359408065, "learning_rate": 1.9420741523706493e-05, "loss": 0.5689, "step": 21795 }, { "epoch": 0.6362177148694003, "grad_norm": 1.1898866616462012, "learning_rate": 1.9410608886837043e-05, "loss": 0.5901, "step": 21800 }, { "epoch": 0.6363636363636364, "grad_norm": 0.989276378289207, "learning_rate": 1.9400478133733402e-05, "loss": 0.557, "step": 21805 }, { "epoch": 0.6365095578578724, "grad_norm": 0.9978327495797231, "learning_rate": 1.9390349266754694e-05, "loss": 0.5859, "step": 21810 }, { "epoch": 0.6366554793521085, "grad_norm": 0.9133972603259738, "learning_rate": 1.9380222288259618e-05, "loss": 0.5451, "step": 21815 }, { "epoch": 0.6368014008463446, "grad_norm": 1.0705297075965714, "learning_rate": 1.9370097200606436e-05, "loss": 0.5974, "step": 21820 }, { "epoch": 0.6369473223405807, "grad_norm": 1.099350454401547, "learning_rate": 1.935997400615297e-05, "loss": 0.5848, "step": 21825 }, { "epoch": 0.6370932438348169, "grad_norm": 1.033662516080168, "learning_rate": 1.934985270725658e-05, "loss": 0.5386, "step": 21830 }, { "epoch": 0.637239165329053, "grad_norm": 1.0822618943713955, "learning_rate": 1.933973330627421e-05, "loss": 0.5235, "step": 21835 }, { "epoch": 0.6373850868232891, "grad_norm": 1.0058398601548837, "learning_rate": 1.9329615805562346e-05, "loss": 0.5855, "step": 21840 }, { "epoch": 0.6375310083175252, "grad_norm": 1.2577499370813194, "learning_rate": 1.931950020747703e-05, "loss": 0.5288, "step": 21845 }, { "epoch": 0.6376769298117613, "grad_norm": 0.973305858285909, "learning_rate": 1.9309386514373897e-05, "loss": 0.4946, "step": 21850 }, { "epoch": 0.6378228513059974, "grad_norm": 1.1003721817427605, "learning_rate": 1.929927472860807e-05, "loss": 0.5847, "step": 21855 }, { "epoch": 0.6379687728002335, "grad_norm": 0.9773540960942197, "learning_rate": 1.9289164852534287e-05, "loss": 0.5433, "step": 21860 }, { "epoch": 0.6381146942944695, "grad_norm": 1.0137804527527943, "learning_rate": 1.9279056888506824e-05, "loss": 0.5763, "step": 21865 }, { "epoch": 0.6382606157887056, "grad_norm": 1.0864801326439824, "learning_rate": 1.9268950838879508e-05, "loss": 0.5342, "step": 21870 }, { "epoch": 0.6384065372829418, "grad_norm": 1.103286580790859, "learning_rate": 1.9258846706005724e-05, "loss": 0.5906, "step": 21875 }, { "epoch": 0.6385524587771779, "grad_norm": 1.0176982880976133, "learning_rate": 1.9248744492238392e-05, "loss": 0.5932, "step": 21880 }, { "epoch": 0.638698380271414, "grad_norm": 1.1005023290539862, "learning_rate": 1.923864419993001e-05, "loss": 0.5779, "step": 21885 }, { "epoch": 0.6388443017656501, "grad_norm": 1.088670336471166, "learning_rate": 1.922854583143263e-05, "loss": 0.6035, "step": 21890 }, { "epoch": 0.6389902232598862, "grad_norm": 1.1254615369540781, "learning_rate": 1.921844938909784e-05, "loss": 0.5117, "step": 21895 }, { "epoch": 0.6391361447541223, "grad_norm": 1.1787096577025074, "learning_rate": 1.9208354875276774e-05, "loss": 0.651, "step": 21900 }, { "epoch": 0.6392820662483584, "grad_norm": 1.138862720019086, "learning_rate": 1.9198262292320145e-05, "loss": 0.5354, "step": 21905 }, { "epoch": 0.6394279877425945, "grad_norm": 1.027769582525892, "learning_rate": 1.9188171642578185e-05, "loss": 0.5259, "step": 21910 }, { "epoch": 0.6395739092368306, "grad_norm": 1.1632852126316648, "learning_rate": 1.9178082928400697e-05, "loss": 0.534, "step": 21915 }, { "epoch": 0.6397198307310666, "grad_norm": 0.9554524439085132, "learning_rate": 1.916799615213703e-05, "loss": 0.5652, "step": 21920 }, { "epoch": 0.6398657522253028, "grad_norm": 1.2185073638271997, "learning_rate": 1.915791131613607e-05, "loss": 0.5486, "step": 21925 }, { "epoch": 0.6400116737195389, "grad_norm": 1.1126732615548693, "learning_rate": 1.9147828422746266e-05, "loss": 0.5596, "step": 21930 }, { "epoch": 0.640157595213775, "grad_norm": 1.1670515873939935, "learning_rate": 1.9137747474315604e-05, "loss": 0.5681, "step": 21935 }, { "epoch": 0.6403035167080111, "grad_norm": 1.0457325044935364, "learning_rate": 1.9127668473191624e-05, "loss": 0.541, "step": 21940 }, { "epoch": 0.6404494382022472, "grad_norm": 0.9561656215353793, "learning_rate": 1.9117591421721408e-05, "loss": 0.5639, "step": 21945 }, { "epoch": 0.6405953596964833, "grad_norm": 1.0593182257517024, "learning_rate": 1.910751632225159e-05, "loss": 0.5706, "step": 21950 }, { "epoch": 0.6407412811907194, "grad_norm": 1.161237438501792, "learning_rate": 1.9097443177128337e-05, "loss": 0.5773, "step": 21955 }, { "epoch": 0.6408872026849555, "grad_norm": 1.1797792195206869, "learning_rate": 1.908737198869737e-05, "loss": 0.4968, "step": 21960 }, { "epoch": 0.6410331241791916, "grad_norm": 1.1104436113051526, "learning_rate": 1.9077302759303966e-05, "loss": 0.5648, "step": 21965 }, { "epoch": 0.6411790456734276, "grad_norm": 1.1238236393226455, "learning_rate": 1.9067235491292918e-05, "loss": 0.5436, "step": 21970 }, { "epoch": 0.6413249671676639, "grad_norm": 0.8945389670274669, "learning_rate": 1.9057170187008584e-05, "loss": 0.5398, "step": 21975 }, { "epoch": 0.6414708886618999, "grad_norm": 1.0592094716344638, "learning_rate": 1.9047106848794856e-05, "loss": 0.5402, "step": 21980 }, { "epoch": 0.641616810156136, "grad_norm": 0.8896220415718814, "learning_rate": 1.903704547899517e-05, "loss": 0.5036, "step": 21985 }, { "epoch": 0.6417627316503721, "grad_norm": 1.0836446221395244, "learning_rate": 1.9026986079952515e-05, "loss": 0.5795, "step": 21990 }, { "epoch": 0.6419086531446082, "grad_norm": 1.3836117609355192, "learning_rate": 1.9016928654009386e-05, "loss": 0.5628, "step": 21995 }, { "epoch": 0.6420545746388443, "grad_norm": 0.9945319721452842, "learning_rate": 1.9006873203507864e-05, "loss": 0.5601, "step": 22000 }, { "epoch": 0.6422004961330804, "grad_norm": 0.9716881496250388, "learning_rate": 1.8996819730789533e-05, "loss": 0.4592, "step": 22005 }, { "epoch": 0.6423464176273165, "grad_norm": 0.9026869985544795, "learning_rate": 1.8986768238195553e-05, "loss": 0.5901, "step": 22010 }, { "epoch": 0.6424923391215526, "grad_norm": 0.9544651782469936, "learning_rate": 1.8976718728066584e-05, "loss": 0.5758, "step": 22015 }, { "epoch": 0.6426382606157887, "grad_norm": 1.0866303092292, "learning_rate": 1.8966671202742848e-05, "loss": 0.5692, "step": 22020 }, { "epoch": 0.6427841821100249, "grad_norm": 1.0479128744449573, "learning_rate": 1.8956625664564092e-05, "loss": 0.4996, "step": 22025 }, { "epoch": 0.642930103604261, "grad_norm": 1.1841361900011267, "learning_rate": 1.8946582115869614e-05, "loss": 0.5926, "step": 22030 }, { "epoch": 0.643076025098497, "grad_norm": 0.9502305747451631, "learning_rate": 1.893654055899824e-05, "loss": 0.551, "step": 22035 }, { "epoch": 0.6432219465927331, "grad_norm": 0.9737048360817947, "learning_rate": 1.8926500996288348e-05, "loss": 0.54, "step": 22040 }, { "epoch": 0.6433678680869692, "grad_norm": 1.01431126887875, "learning_rate": 1.8916463430077807e-05, "loss": 0.5613, "step": 22045 }, { "epoch": 0.6435137895812053, "grad_norm": 1.16528919494979, "learning_rate": 1.8906427862704086e-05, "loss": 0.5574, "step": 22050 }, { "epoch": 0.6436597110754414, "grad_norm": 1.089080879137437, "learning_rate": 1.889639429650412e-05, "loss": 0.6184, "step": 22055 }, { "epoch": 0.6438056325696775, "grad_norm": 0.9235669711542518, "learning_rate": 1.888636273381445e-05, "loss": 0.5565, "step": 22060 }, { "epoch": 0.6439515540639136, "grad_norm": 0.8797908484836513, "learning_rate": 1.887633317697108e-05, "loss": 0.541, "step": 22065 }, { "epoch": 0.6440974755581497, "grad_norm": 1.0525346594055296, "learning_rate": 1.886630562830959e-05, "loss": 0.5656, "step": 22070 }, { "epoch": 0.6442433970523859, "grad_norm": 1.0602087884688443, "learning_rate": 1.8856280090165085e-05, "loss": 0.5885, "step": 22075 }, { "epoch": 0.644389318546622, "grad_norm": 0.9797394718919068, "learning_rate": 1.8846256564872204e-05, "loss": 0.5385, "step": 22080 }, { "epoch": 0.644535240040858, "grad_norm": 1.1045910284359404, "learning_rate": 1.8836235054765112e-05, "loss": 0.5981, "step": 22085 }, { "epoch": 0.6446811615350941, "grad_norm": 1.100104331713952, "learning_rate": 1.8826215562177492e-05, "loss": 0.5967, "step": 22090 }, { "epoch": 0.6448270830293302, "grad_norm": 0.9079469064204102, "learning_rate": 1.8816198089442572e-05, "loss": 0.5517, "step": 22095 }, { "epoch": 0.6449730045235663, "grad_norm": 1.235865516274603, "learning_rate": 1.880618263889311e-05, "loss": 0.5699, "step": 22100 }, { "epoch": 0.6451189260178024, "grad_norm": 1.0011448289760487, "learning_rate": 1.87961692128614e-05, "loss": 0.5333, "step": 22105 }, { "epoch": 0.6452648475120385, "grad_norm": 1.0401675071128709, "learning_rate": 1.8786157813679255e-05, "loss": 0.5592, "step": 22110 }, { "epoch": 0.6454107690062746, "grad_norm": 1.0748297092456756, "learning_rate": 1.8776148443678e-05, "loss": 0.5356, "step": 22115 }, { "epoch": 0.6455566905005107, "grad_norm": 1.1052177985188683, "learning_rate": 1.8766141105188517e-05, "loss": 0.517, "step": 22120 }, { "epoch": 0.6457026119947469, "grad_norm": 0.984848924715191, "learning_rate": 1.8756135800541197e-05, "loss": 0.5518, "step": 22125 }, { "epoch": 0.645848533488983, "grad_norm": 1.0549945290660518, "learning_rate": 1.8746132532065963e-05, "loss": 0.5555, "step": 22130 }, { "epoch": 0.6459944549832191, "grad_norm": 1.181573969277069, "learning_rate": 1.8736131302092274e-05, "loss": 0.5721, "step": 22135 }, { "epoch": 0.6461403764774551, "grad_norm": 1.332459352082685, "learning_rate": 1.872613211294908e-05, "loss": 0.6013, "step": 22140 }, { "epoch": 0.6462862979716912, "grad_norm": 1.0573805920249875, "learning_rate": 1.8716134966964893e-05, "loss": 0.5749, "step": 22145 }, { "epoch": 0.6464322194659273, "grad_norm": 1.333648925851769, "learning_rate": 1.8706139866467732e-05, "loss": 0.5813, "step": 22150 }, { "epoch": 0.6465781409601634, "grad_norm": 1.0744983311308165, "learning_rate": 1.8696146813785154e-05, "loss": 0.5566, "step": 22155 }, { "epoch": 0.6467240624543995, "grad_norm": 1.0368161519532362, "learning_rate": 1.868615581124421e-05, "loss": 0.5731, "step": 22160 }, { "epoch": 0.6468699839486356, "grad_norm": 1.0496330182202802, "learning_rate": 1.8676166861171502e-05, "loss": 0.5427, "step": 22165 }, { "epoch": 0.6470159054428717, "grad_norm": 1.0630568673861143, "learning_rate": 1.8666179965893137e-05, "loss": 0.5808, "step": 22170 }, { "epoch": 0.6471618269371079, "grad_norm": 0.9499046406576437, "learning_rate": 1.865619512773476e-05, "loss": 0.5244, "step": 22175 }, { "epoch": 0.647307748431344, "grad_norm": 0.9201785651704663, "learning_rate": 1.864621234902153e-05, "loss": 0.5442, "step": 22180 }, { "epoch": 0.6474536699255801, "grad_norm": 1.0565774919364457, "learning_rate": 1.86362316320781e-05, "loss": 0.4962, "step": 22185 }, { "epoch": 0.6475995914198162, "grad_norm": 0.9500343992277334, "learning_rate": 1.8626252979228685e-05, "loss": 0.4972, "step": 22190 }, { "epoch": 0.6477455129140522, "grad_norm": 1.0043234280239284, "learning_rate": 1.8616276392797e-05, "loss": 0.5513, "step": 22195 }, { "epoch": 0.6478914344082883, "grad_norm": 0.9390246362665631, "learning_rate": 1.8606301875106273e-05, "loss": 0.5352, "step": 22200 }, { "epoch": 0.6480373559025244, "grad_norm": 1.1867331778099164, "learning_rate": 1.859632942847927e-05, "loss": 0.5514, "step": 22205 }, { "epoch": 0.6481832773967605, "grad_norm": 0.9779153018628504, "learning_rate": 1.8586359055238244e-05, "loss": 0.516, "step": 22210 }, { "epoch": 0.6483291988909966, "grad_norm": 1.0016874791669665, "learning_rate": 1.8576390757704988e-05, "loss": 0.5597, "step": 22215 }, { "epoch": 0.6484751203852327, "grad_norm": 1.0304749995347424, "learning_rate": 1.856642453820081e-05, "loss": 0.5492, "step": 22220 }, { "epoch": 0.6486210418794689, "grad_norm": 0.9601316515364549, "learning_rate": 1.8556460399046534e-05, "loss": 0.583, "step": 22225 }, { "epoch": 0.648766963373705, "grad_norm": 1.0460836305042074, "learning_rate": 1.8546498342562486e-05, "loss": 0.5289, "step": 22230 }, { "epoch": 0.6489128848679411, "grad_norm": 1.0163466737266107, "learning_rate": 1.8536538371068524e-05, "loss": 0.5771, "step": 22235 }, { "epoch": 0.6490588063621772, "grad_norm": 1.1007378726951997, "learning_rate": 1.852658048688401e-05, "loss": 0.5526, "step": 22240 }, { "epoch": 0.6492047278564133, "grad_norm": 1.0903823821791667, "learning_rate": 1.8516624692327828e-05, "loss": 0.5858, "step": 22245 }, { "epoch": 0.6493506493506493, "grad_norm": 0.974744903629621, "learning_rate": 1.8506670989718366e-05, "loss": 0.5162, "step": 22250 }, { "epoch": 0.6494965708448854, "grad_norm": 0.9603235287937402, "learning_rate": 1.849671938137353e-05, "loss": 0.5416, "step": 22255 }, { "epoch": 0.6496424923391215, "grad_norm": 1.1622975381596095, "learning_rate": 1.848676986961073e-05, "loss": 0.5405, "step": 22260 }, { "epoch": 0.6497884138333576, "grad_norm": 1.0097143305689305, "learning_rate": 1.8476822456746917e-05, "loss": 0.5491, "step": 22265 }, { "epoch": 0.6499343353275937, "grad_norm": 1.0565047478290637, "learning_rate": 1.846687714509851e-05, "loss": 0.5983, "step": 22270 }, { "epoch": 0.6500802568218299, "grad_norm": 0.9464132043355717, "learning_rate": 1.845693393698148e-05, "loss": 0.6055, "step": 22275 }, { "epoch": 0.650226178316066, "grad_norm": 1.1125014594981864, "learning_rate": 1.8446992834711274e-05, "loss": 0.5823, "step": 22280 }, { "epoch": 0.6503720998103021, "grad_norm": 0.9498019695510368, "learning_rate": 1.8437053840602864e-05, "loss": 0.4941, "step": 22285 }, { "epoch": 0.6505180213045382, "grad_norm": 1.0469344983050612, "learning_rate": 1.8427116956970735e-05, "loss": 0.5312, "step": 22290 }, { "epoch": 0.6506639427987743, "grad_norm": 1.4131594940772032, "learning_rate": 1.8417182186128878e-05, "loss": 0.6171, "step": 22295 }, { "epoch": 0.6508098642930104, "grad_norm": 1.060472795424812, "learning_rate": 1.8407249530390784e-05, "loss": 0.5409, "step": 22300 }, { "epoch": 0.6509557857872464, "grad_norm": 0.9018584279056739, "learning_rate": 1.8397318992069467e-05, "loss": 0.5876, "step": 22305 }, { "epoch": 0.6511017072814825, "grad_norm": 1.019629845212201, "learning_rate": 1.8387390573477425e-05, "loss": 0.5645, "step": 22310 }, { "epoch": 0.6512476287757186, "grad_norm": 1.0307095221493323, "learning_rate": 1.837746427692668e-05, "loss": 0.5888, "step": 22315 }, { "epoch": 0.6513935502699547, "grad_norm": 0.992104092831787, "learning_rate": 1.836754010472876e-05, "loss": 0.493, "step": 22320 }, { "epoch": 0.6515394717641909, "grad_norm": 1.175775803478721, "learning_rate": 1.83576180591947e-05, "loss": 0.5539, "step": 22325 }, { "epoch": 0.651685393258427, "grad_norm": 1.0238728909826367, "learning_rate": 1.8347698142635017e-05, "loss": 0.5662, "step": 22330 }, { "epoch": 0.6518313147526631, "grad_norm": 1.0789561027299324, "learning_rate": 1.8337780357359763e-05, "loss": 0.5505, "step": 22335 }, { "epoch": 0.6519772362468992, "grad_norm": 1.0342055484346018, "learning_rate": 1.8327864705678465e-05, "loss": 0.5413, "step": 22340 }, { "epoch": 0.6521231577411353, "grad_norm": 1.0363116458595134, "learning_rate": 1.8317951189900195e-05, "loss": 0.5433, "step": 22345 }, { "epoch": 0.6522690792353714, "grad_norm": 0.9839074521628217, "learning_rate": 1.8308039812333465e-05, "loss": 0.5476, "step": 22350 }, { "epoch": 0.6524150007296075, "grad_norm": 1.0992590866491638, "learning_rate": 1.8298130575286342e-05, "loss": 0.5128, "step": 22355 }, { "epoch": 0.6525609222238435, "grad_norm": 1.0029813189319117, "learning_rate": 1.8288223481066374e-05, "loss": 0.533, "step": 22360 }, { "epoch": 0.6527068437180796, "grad_norm": 1.1285802077821032, "learning_rate": 1.827831853198062e-05, "loss": 0.5521, "step": 22365 }, { "epoch": 0.6528527652123157, "grad_norm": 1.1021568507153408, "learning_rate": 1.8268415730335636e-05, "loss": 0.5666, "step": 22370 }, { "epoch": 0.6529986867065519, "grad_norm": 1.04945154257413, "learning_rate": 1.8258515078437456e-05, "loss": 0.5898, "step": 22375 }, { "epoch": 0.653144608200788, "grad_norm": 1.0780308853941516, "learning_rate": 1.8248616578591642e-05, "loss": 0.5103, "step": 22380 }, { "epoch": 0.6532905296950241, "grad_norm": 1.051293775865153, "learning_rate": 1.8238720233103247e-05, "loss": 0.5512, "step": 22385 }, { "epoch": 0.6534364511892602, "grad_norm": 1.0461492369882694, "learning_rate": 1.822882604427681e-05, "loss": 0.5729, "step": 22390 }, { "epoch": 0.6535823726834963, "grad_norm": 1.1234318945254003, "learning_rate": 1.8218934014416395e-05, "loss": 0.565, "step": 22395 }, { "epoch": 0.6537282941777324, "grad_norm": 0.9773601659570147, "learning_rate": 1.8209044145825533e-05, "loss": 0.5526, "step": 22400 }, { "epoch": 0.6538742156719685, "grad_norm": 1.1022468630495483, "learning_rate": 1.819915644080726e-05, "loss": 0.6008, "step": 22405 }, { "epoch": 0.6540201371662046, "grad_norm": 1.198216144642613, "learning_rate": 1.8189270901664123e-05, "loss": 0.5701, "step": 22410 }, { "epoch": 0.6541660586604406, "grad_norm": 1.1542191270615358, "learning_rate": 1.8179387530698154e-05, "loss": 0.52, "step": 22415 }, { "epoch": 0.6543119801546767, "grad_norm": 0.946965423472249, "learning_rate": 1.8169506330210884e-05, "loss": 0.561, "step": 22420 }, { "epoch": 0.6544579016489129, "grad_norm": 1.1388805323564575, "learning_rate": 1.815962730250331e-05, "loss": 0.5207, "step": 22425 }, { "epoch": 0.654603823143149, "grad_norm": 1.0627650927352519, "learning_rate": 1.814975044987598e-05, "loss": 0.5871, "step": 22430 }, { "epoch": 0.6547497446373851, "grad_norm": 1.2193239971956373, "learning_rate": 1.813987577462889e-05, "loss": 0.6161, "step": 22435 }, { "epoch": 0.6548956661316212, "grad_norm": 1.0275910890135207, "learning_rate": 1.8130003279061546e-05, "loss": 0.5446, "step": 22440 }, { "epoch": 0.6550415876258573, "grad_norm": 0.9816511661689146, "learning_rate": 1.812013296547294e-05, "loss": 0.5511, "step": 22445 }, { "epoch": 0.6551875091200934, "grad_norm": 1.1119925782065414, "learning_rate": 1.811026483616155e-05, "loss": 0.5222, "step": 22450 }, { "epoch": 0.6553334306143295, "grad_norm": 0.971374082081296, "learning_rate": 1.810039889342537e-05, "loss": 0.5262, "step": 22455 }, { "epoch": 0.6554793521085656, "grad_norm": 1.002649847142402, "learning_rate": 1.809053513956186e-05, "loss": 0.5393, "step": 22460 }, { "epoch": 0.6556252736028017, "grad_norm": 0.9174788924590409, "learning_rate": 1.8080673576867985e-05, "loss": 0.5325, "step": 22465 }, { "epoch": 0.6557711950970377, "grad_norm": 1.114159890731216, "learning_rate": 1.807081420764019e-05, "loss": 0.5961, "step": 22470 }, { "epoch": 0.6559171165912739, "grad_norm": 1.1245713955494845, "learning_rate": 1.8060957034174403e-05, "loss": 0.5735, "step": 22475 }, { "epoch": 0.65606303808551, "grad_norm": 1.216104686437999, "learning_rate": 1.805110205876607e-05, "loss": 0.5177, "step": 22480 }, { "epoch": 0.6562089595797461, "grad_norm": 1.0833817666980703, "learning_rate": 1.8041249283710094e-05, "loss": 0.543, "step": 22485 }, { "epoch": 0.6563548810739822, "grad_norm": 1.106600433974133, "learning_rate": 1.80313987113009e-05, "loss": 0.5953, "step": 22490 }, { "epoch": 0.6565008025682183, "grad_norm": 1.0135322080600178, "learning_rate": 1.8021550343832337e-05, "loss": 0.4854, "step": 22495 }, { "epoch": 0.6566467240624544, "grad_norm": 1.1687294715051906, "learning_rate": 1.8011704183597817e-05, "loss": 0.5513, "step": 22500 }, { "epoch": 0.6567926455566905, "grad_norm": 1.0034398783796985, "learning_rate": 1.8001860232890183e-05, "loss": 0.4984, "step": 22505 }, { "epoch": 0.6569385670509266, "grad_norm": 1.02623442681083, "learning_rate": 1.7992018494001794e-05, "loss": 0.5394, "step": 22510 }, { "epoch": 0.6570844885451627, "grad_norm": 1.120452635378034, "learning_rate": 1.7982178969224472e-05, "loss": 0.5995, "step": 22515 }, { "epoch": 0.6572304100393987, "grad_norm": 1.2458899168498645, "learning_rate": 1.7972341660849535e-05, "loss": 0.582, "step": 22520 }, { "epoch": 0.657376331533635, "grad_norm": 1.187255381181032, "learning_rate": 1.7962506571167797e-05, "loss": 0.6208, "step": 22525 }, { "epoch": 0.657522253027871, "grad_norm": 0.9448733478550574, "learning_rate": 1.795267370246953e-05, "loss": 0.57, "step": 22530 }, { "epoch": 0.6576681745221071, "grad_norm": 1.1492476448366193, "learning_rate": 1.7942843057044513e-05, "loss": 0.5188, "step": 22535 }, { "epoch": 0.6578140960163432, "grad_norm": 1.0409284854644185, "learning_rate": 1.7933014637181982e-05, "loss": 0.5496, "step": 22540 }, { "epoch": 0.6579600175105793, "grad_norm": 0.9340754713217472, "learning_rate": 1.7923188445170663e-05, "loss": 0.513, "step": 22545 }, { "epoch": 0.6581059390048154, "grad_norm": 1.0541531749339594, "learning_rate": 1.7913364483298794e-05, "loss": 0.5322, "step": 22550 }, { "epoch": 0.6582518604990515, "grad_norm": 1.2384180599349084, "learning_rate": 1.790354275385404e-05, "loss": 0.5843, "step": 22555 }, { "epoch": 0.6583977819932876, "grad_norm": 1.1377817639613113, "learning_rate": 1.789372325912361e-05, "loss": 0.5538, "step": 22560 }, { "epoch": 0.6585437034875237, "grad_norm": 1.1194731863886143, "learning_rate": 1.7883906001394108e-05, "loss": 0.532, "step": 22565 }, { "epoch": 0.6586896249817598, "grad_norm": 0.9954368267707016, "learning_rate": 1.78740909829517e-05, "loss": 0.5475, "step": 22570 }, { "epoch": 0.658835546475996, "grad_norm": 0.8811513865950462, "learning_rate": 1.7864278206081992e-05, "loss": 0.5331, "step": 22575 }, { "epoch": 0.658981467970232, "grad_norm": 0.9968644311605007, "learning_rate": 1.785446767307006e-05, "loss": 0.499, "step": 22580 }, { "epoch": 0.6591273894644681, "grad_norm": 0.9675670382732324, "learning_rate": 1.7844659386200495e-05, "loss": 0.4832, "step": 22585 }, { "epoch": 0.6592733109587042, "grad_norm": 0.9996172606231879, "learning_rate": 1.7834853347757304e-05, "loss": 0.5418, "step": 22590 }, { "epoch": 0.6594192324529403, "grad_norm": 1.0081872140256811, "learning_rate": 1.7825049560024033e-05, "loss": 0.5555, "step": 22595 }, { "epoch": 0.6595651539471764, "grad_norm": 0.9386331549370254, "learning_rate": 1.7815248025283666e-05, "loss": 0.5756, "step": 22600 }, { "epoch": 0.6597110754414125, "grad_norm": 1.0637658638546388, "learning_rate": 1.780544874581869e-05, "loss": 0.5928, "step": 22605 }, { "epoch": 0.6598569969356486, "grad_norm": 0.9725144027446253, "learning_rate": 1.7795651723911015e-05, "loss": 0.5687, "step": 22610 }, { "epoch": 0.6600029184298847, "grad_norm": 1.0342286664043392, "learning_rate": 1.7785856961842102e-05, "loss": 0.5728, "step": 22615 }, { "epoch": 0.6601488399241209, "grad_norm": 1.200101105127198, "learning_rate": 1.7776064461892815e-05, "loss": 0.591, "step": 22620 }, { "epoch": 0.660294761418357, "grad_norm": 1.0592290529834663, "learning_rate": 1.7766274226343526e-05, "loss": 0.574, "step": 22625 }, { "epoch": 0.6604406829125931, "grad_norm": 1.077231180645231, "learning_rate": 1.7756486257474093e-05, "loss": 0.5384, "step": 22630 }, { "epoch": 0.6605866044068291, "grad_norm": 1.0051843217699519, "learning_rate": 1.7746700557563795e-05, "loss": 0.5756, "step": 22635 }, { "epoch": 0.6607325259010652, "grad_norm": 0.9219904517506695, "learning_rate": 1.7736917128891444e-05, "loss": 0.5769, "step": 22640 }, { "epoch": 0.6608784473953013, "grad_norm": 1.0555397452322748, "learning_rate": 1.772713597373528e-05, "loss": 0.6118, "step": 22645 }, { "epoch": 0.6610243688895374, "grad_norm": 1.1494499274952148, "learning_rate": 1.7717357094373026e-05, "loss": 0.6019, "step": 22650 }, { "epoch": 0.6611702903837735, "grad_norm": 1.2381588336204996, "learning_rate": 1.77075804930819e-05, "loss": 0.6031, "step": 22655 }, { "epoch": 0.6613162118780096, "grad_norm": 0.9703289644232134, "learning_rate": 1.7697806172138526e-05, "loss": 0.6191, "step": 22660 }, { "epoch": 0.6614621333722457, "grad_norm": 0.9002486966267685, "learning_rate": 1.7688034133819066e-05, "loss": 0.5301, "step": 22665 }, { "epoch": 0.6616080548664819, "grad_norm": 1.3569416046244565, "learning_rate": 1.7678264380399106e-05, "loss": 0.6167, "step": 22670 }, { "epoch": 0.661753976360718, "grad_norm": 1.114878161847666, "learning_rate": 1.7668496914153736e-05, "loss": 0.517, "step": 22675 }, { "epoch": 0.6618998978549541, "grad_norm": 1.1529869418569423, "learning_rate": 1.765873173735748e-05, "loss": 0.5194, "step": 22680 }, { "epoch": 0.6620458193491902, "grad_norm": 1.0543841509746303, "learning_rate": 1.764896885228434e-05, "loss": 0.5484, "step": 22685 }, { "epoch": 0.6621917408434262, "grad_norm": 0.9161104234675111, "learning_rate": 1.763920826120779e-05, "loss": 0.5605, "step": 22690 }, { "epoch": 0.6623376623376623, "grad_norm": 0.9798387389815199, "learning_rate": 1.762944996640076e-05, "loss": 0.5128, "step": 22695 }, { "epoch": 0.6624835838318984, "grad_norm": 1.2902785652504538, "learning_rate": 1.7619693970135665e-05, "loss": 0.5258, "step": 22700 }, { "epoch": 0.6626295053261345, "grad_norm": 1.0780989655203863, "learning_rate": 1.7609940274684343e-05, "loss": 0.5618, "step": 22705 }, { "epoch": 0.6627754268203706, "grad_norm": 1.0592094884280832, "learning_rate": 1.760018888231816e-05, "loss": 0.5692, "step": 22710 }, { "epoch": 0.6629213483146067, "grad_norm": 1.200159793838391, "learning_rate": 1.7590439795307888e-05, "loss": 0.5183, "step": 22715 }, { "epoch": 0.6630672698088429, "grad_norm": 0.9049647984769915, "learning_rate": 1.7580693015923782e-05, "loss": 0.5048, "step": 22720 }, { "epoch": 0.663213191303079, "grad_norm": 0.9736219233245142, "learning_rate": 1.7570948546435584e-05, "loss": 0.5807, "step": 22725 }, { "epoch": 0.6633591127973151, "grad_norm": 1.0726031575698647, "learning_rate": 1.7561206389112445e-05, "loss": 0.6033, "step": 22730 }, { "epoch": 0.6635050342915512, "grad_norm": 1.1279534663342017, "learning_rate": 1.7551466546223033e-05, "loss": 0.6007, "step": 22735 }, { "epoch": 0.6636509557857873, "grad_norm": 0.9675569167676644, "learning_rate": 1.754172902003543e-05, "loss": 0.5263, "step": 22740 }, { "epoch": 0.6637968772800233, "grad_norm": 1.0963318658072772, "learning_rate": 1.7531993812817222e-05, "loss": 0.591, "step": 22745 }, { "epoch": 0.6639427987742594, "grad_norm": 1.0096277645767417, "learning_rate": 1.7522260926835425e-05, "loss": 0.5514, "step": 22750 }, { "epoch": 0.6640887202684955, "grad_norm": 1.1492415048861329, "learning_rate": 1.751253036435653e-05, "loss": 0.5434, "step": 22755 }, { "epoch": 0.6642346417627316, "grad_norm": 0.9946079184844736, "learning_rate": 1.7502802127646466e-05, "loss": 0.516, "step": 22760 }, { "epoch": 0.6643805632569677, "grad_norm": 1.0035661071226694, "learning_rate": 1.7493076218970636e-05, "loss": 0.5326, "step": 22765 }, { "epoch": 0.6645264847512039, "grad_norm": 1.095034181536273, "learning_rate": 1.7483352640593926e-05, "loss": 0.5193, "step": 22770 }, { "epoch": 0.66467240624544, "grad_norm": 1.1257855144454236, "learning_rate": 1.747363139478061e-05, "loss": 0.5374, "step": 22775 }, { "epoch": 0.6648183277396761, "grad_norm": 0.9187265742371556, "learning_rate": 1.74639124837945e-05, "loss": 0.5713, "step": 22780 }, { "epoch": 0.6649642492339122, "grad_norm": 1.1038815515098346, "learning_rate": 1.7454195909898804e-05, "loss": 0.5757, "step": 22785 }, { "epoch": 0.6651101707281483, "grad_norm": 1.08632253691434, "learning_rate": 1.7444481675356208e-05, "loss": 0.5896, "step": 22790 }, { "epoch": 0.6652560922223844, "grad_norm": 0.8971477303461654, "learning_rate": 1.7434769782428877e-05, "loss": 0.5027, "step": 22795 }, { "epoch": 0.6654020137166204, "grad_norm": 0.938212889591609, "learning_rate": 1.742506023337837e-05, "loss": 0.5505, "step": 22800 }, { "epoch": 0.6655479352108565, "grad_norm": 0.9654345804146206, "learning_rate": 1.7415353030465763e-05, "loss": 0.5603, "step": 22805 }, { "epoch": 0.6656938567050926, "grad_norm": 1.0788458471096207, "learning_rate": 1.7405648175951545e-05, "loss": 0.495, "step": 22810 }, { "epoch": 0.6658397781993287, "grad_norm": 1.0970413151210807, "learning_rate": 1.739594567209569e-05, "loss": 0.5458, "step": 22815 }, { "epoch": 0.6659856996935649, "grad_norm": 1.1318940324050657, "learning_rate": 1.7386245521157596e-05, "loss": 0.5287, "step": 22820 }, { "epoch": 0.666131621187801, "grad_norm": 1.0576462003167644, "learning_rate": 1.7376547725396123e-05, "loss": 0.6149, "step": 22825 }, { "epoch": 0.6662775426820371, "grad_norm": 1.0507391910214832, "learning_rate": 1.7366852287069586e-05, "loss": 0.5744, "step": 22830 }, { "epoch": 0.6664234641762732, "grad_norm": 1.05866825635751, "learning_rate": 1.7357159208435738e-05, "loss": 0.5639, "step": 22835 }, { "epoch": 0.6665693856705093, "grad_norm": 1.0220161270049948, "learning_rate": 1.7347468491751817e-05, "loss": 0.5405, "step": 22840 }, { "epoch": 0.6667153071647454, "grad_norm": 0.9813985032978343, "learning_rate": 1.7337780139274474e-05, "loss": 0.5619, "step": 22845 }, { "epoch": 0.6668612286589815, "grad_norm": 0.9746940027034712, "learning_rate": 1.7328094153259822e-05, "loss": 0.5152, "step": 22850 }, { "epoch": 0.6670071501532175, "grad_norm": 1.1253890253561056, "learning_rate": 1.731841053596342e-05, "loss": 0.6213, "step": 22855 }, { "epoch": 0.6671530716474536, "grad_norm": 1.0821344765087502, "learning_rate": 1.7308729289640292e-05, "loss": 0.5453, "step": 22860 }, { "epoch": 0.6672989931416897, "grad_norm": 0.8571015965834469, "learning_rate": 1.7299050416544886e-05, "loss": 0.4823, "step": 22865 }, { "epoch": 0.6674449146359259, "grad_norm": 1.0023458534924126, "learning_rate": 1.7289373918931113e-05, "loss": 0.5291, "step": 22870 }, { "epoch": 0.667590836130162, "grad_norm": 1.0509939215782997, "learning_rate": 1.7279699799052326e-05, "loss": 0.5207, "step": 22875 }, { "epoch": 0.6677367576243981, "grad_norm": 1.0096239479163684, "learning_rate": 1.7270028059161313e-05, "loss": 0.4976, "step": 22880 }, { "epoch": 0.6678826791186342, "grad_norm": 1.0696030330092865, "learning_rate": 1.7260358701510342e-05, "loss": 0.5825, "step": 22885 }, { "epoch": 0.6680286006128703, "grad_norm": 1.0517355077461708, "learning_rate": 1.7250691728351088e-05, "loss": 0.5244, "step": 22890 }, { "epoch": 0.6681745221071064, "grad_norm": 1.060763802022097, "learning_rate": 1.724102714193469e-05, "loss": 0.5679, "step": 22895 }, { "epoch": 0.6683204436013425, "grad_norm": 0.8891093644378575, "learning_rate": 1.723136494451173e-05, "loss": 0.5148, "step": 22900 }, { "epoch": 0.6684663650955786, "grad_norm": 1.3978784078283886, "learning_rate": 1.722170513833222e-05, "loss": 0.6194, "step": 22905 }, { "epoch": 0.6686122865898146, "grad_norm": 1.289247955978387, "learning_rate": 1.7212047725645637e-05, "loss": 0.5432, "step": 22910 }, { "epoch": 0.6687582080840507, "grad_norm": 0.9265779295872825, "learning_rate": 1.7202392708700888e-05, "loss": 0.5345, "step": 22915 }, { "epoch": 0.6689041295782869, "grad_norm": 0.9564012791854823, "learning_rate": 1.7192740089746324e-05, "loss": 0.4985, "step": 22920 }, { "epoch": 0.669050051072523, "grad_norm": 1.3245925113172805, "learning_rate": 1.718308987102973e-05, "loss": 0.6034, "step": 22925 }, { "epoch": 0.6691959725667591, "grad_norm": 0.9169733989032314, "learning_rate": 1.7173442054798355e-05, "loss": 0.5555, "step": 22930 }, { "epoch": 0.6693418940609952, "grad_norm": 1.1582435262667794, "learning_rate": 1.7163796643298866e-05, "loss": 0.5721, "step": 22935 }, { "epoch": 0.6694878155552313, "grad_norm": 1.0015232662577707, "learning_rate": 1.7154153638777372e-05, "loss": 0.5823, "step": 22940 }, { "epoch": 0.6696337370494674, "grad_norm": 0.9721584186757738, "learning_rate": 1.714451304347943e-05, "loss": 0.545, "step": 22945 }, { "epoch": 0.6697796585437035, "grad_norm": 1.0547979815866453, "learning_rate": 1.713487485965003e-05, "loss": 0.5437, "step": 22950 }, { "epoch": 0.6699255800379396, "grad_norm": 0.9668802912572163, "learning_rate": 1.7125239089533615e-05, "loss": 0.5282, "step": 22955 }, { "epoch": 0.6700715015321757, "grad_norm": 1.0523811420671794, "learning_rate": 1.711560573537404e-05, "loss": 0.5334, "step": 22960 }, { "epoch": 0.6702174230264117, "grad_norm": 1.111174111101546, "learning_rate": 1.7105974799414623e-05, "loss": 0.5437, "step": 22965 }, { "epoch": 0.6703633445206479, "grad_norm": 1.1306558488983185, "learning_rate": 1.70963462838981e-05, "loss": 0.5816, "step": 22970 }, { "epoch": 0.670509266014884, "grad_norm": 1.0348954330549631, "learning_rate": 1.708672019106664e-05, "loss": 0.5201, "step": 22975 }, { "epoch": 0.6706551875091201, "grad_norm": 1.0338216617873321, "learning_rate": 1.7077096523161882e-05, "loss": 0.5471, "step": 22980 }, { "epoch": 0.6708011090033562, "grad_norm": 1.1166430746274223, "learning_rate": 1.706747528242486e-05, "loss": 0.5633, "step": 22985 }, { "epoch": 0.6709470304975923, "grad_norm": 1.0854731011854513, "learning_rate": 1.7057856471096074e-05, "loss": 0.5102, "step": 22990 }, { "epoch": 0.6710929519918284, "grad_norm": 1.0561931045642492, "learning_rate": 1.7048240091415418e-05, "loss": 0.6019, "step": 22995 }, { "epoch": 0.6712388734860645, "grad_norm": 0.8548325967758942, "learning_rate": 1.7038626145622275e-05, "loss": 0.565, "step": 23000 }, { "epoch": 0.6713847949803006, "grad_norm": 1.1306562088692707, "learning_rate": 1.7029014635955414e-05, "loss": 0.5857, "step": 23005 }, { "epoch": 0.6715307164745367, "grad_norm": 0.8871696913778064, "learning_rate": 1.7019405564653063e-05, "loss": 0.4927, "step": 23010 }, { "epoch": 0.6716766379687727, "grad_norm": 1.1596165485318664, "learning_rate": 1.7009798933952872e-05, "loss": 0.6298, "step": 23015 }, { "epoch": 0.671822559463009, "grad_norm": 1.203428662080954, "learning_rate": 1.7000194746091913e-05, "loss": 0.5477, "step": 23020 }, { "epoch": 0.671968480957245, "grad_norm": 1.1431052281582041, "learning_rate": 1.6990593003306722e-05, "loss": 0.5874, "step": 23025 }, { "epoch": 0.6721144024514811, "grad_norm": 1.084437885125764, "learning_rate": 1.6980993707833216e-05, "loss": 0.5735, "step": 23030 }, { "epoch": 0.6722603239457172, "grad_norm": 1.0482996860710294, "learning_rate": 1.697139686190681e-05, "loss": 0.5112, "step": 23035 }, { "epoch": 0.6724062454399533, "grad_norm": 1.1694277303245686, "learning_rate": 1.696180246776228e-05, "loss": 0.5464, "step": 23040 }, { "epoch": 0.6725521669341894, "grad_norm": 1.1584063032522418, "learning_rate": 1.695221052763385e-05, "loss": 0.6097, "step": 23045 }, { "epoch": 0.6726980884284255, "grad_norm": 1.1321955920722366, "learning_rate": 1.6942621043755205e-05, "loss": 0.5793, "step": 23050 }, { "epoch": 0.6728440099226616, "grad_norm": 1.0710594381512657, "learning_rate": 1.6933034018359434e-05, "loss": 0.539, "step": 23055 }, { "epoch": 0.6729899314168977, "grad_norm": 1.0096259612558842, "learning_rate": 1.6923449453679048e-05, "loss": 0.5984, "step": 23060 }, { "epoch": 0.6731358529111338, "grad_norm": 1.0617707963259246, "learning_rate": 1.6913867351945978e-05, "loss": 0.5238, "step": 23065 }, { "epoch": 0.67328177440537, "grad_norm": 0.9839102714961664, "learning_rate": 1.6904287715391625e-05, "loss": 0.5367, "step": 23070 }, { "epoch": 0.673427695899606, "grad_norm": 1.1334308891524825, "learning_rate": 1.6894710546246774e-05, "loss": 0.5851, "step": 23075 }, { "epoch": 0.6735736173938421, "grad_norm": 1.11701429464701, "learning_rate": 1.688513584674164e-05, "loss": 0.5647, "step": 23080 }, { "epoch": 0.6737195388880782, "grad_norm": 1.160265069265792, "learning_rate": 1.687556361910588e-05, "loss": 0.5709, "step": 23085 }, { "epoch": 0.6738654603823143, "grad_norm": 0.9902998126548146, "learning_rate": 1.6865993865568547e-05, "loss": 0.5578, "step": 23090 }, { "epoch": 0.6740113818765504, "grad_norm": 1.0502794718331292, "learning_rate": 1.6856426588358167e-05, "loss": 0.5684, "step": 23095 }, { "epoch": 0.6741573033707865, "grad_norm": 1.0149458800754292, "learning_rate": 1.684686178970263e-05, "loss": 0.5036, "step": 23100 }, { "epoch": 0.6743032248650226, "grad_norm": 0.9477917100921661, "learning_rate": 1.6837299471829315e-05, "loss": 0.5981, "step": 23105 }, { "epoch": 0.6744491463592587, "grad_norm": 0.9576828679520093, "learning_rate": 1.682773963696494e-05, "loss": 0.571, "step": 23110 }, { "epoch": 0.6745950678534948, "grad_norm": 1.0285771222944262, "learning_rate": 1.6818182287335726e-05, "loss": 0.6035, "step": 23115 }, { "epoch": 0.674740989347731, "grad_norm": 1.0471372169567676, "learning_rate": 1.6808627425167274e-05, "loss": 0.5311, "step": 23120 }, { "epoch": 0.6748869108419671, "grad_norm": 0.8947847519013676, "learning_rate": 1.6799075052684587e-05, "loss": 0.4482, "step": 23125 }, { "epoch": 0.6750328323362031, "grad_norm": 0.9906370768566115, "learning_rate": 1.678952517211216e-05, "loss": 0.5068, "step": 23130 }, { "epoch": 0.6751787538304392, "grad_norm": 1.0269642948236566, "learning_rate": 1.6779977785673812e-05, "loss": 0.5417, "step": 23135 }, { "epoch": 0.6753246753246753, "grad_norm": 0.9840424733816381, "learning_rate": 1.677043289559286e-05, "loss": 0.527, "step": 23140 }, { "epoch": 0.6754705968189114, "grad_norm": 0.995730947756101, "learning_rate": 1.6760890504092004e-05, "loss": 0.5638, "step": 23145 }, { "epoch": 0.6756165183131475, "grad_norm": 1.1739752859882615, "learning_rate": 1.675135061339337e-05, "loss": 0.6313, "step": 23150 }, { "epoch": 0.6757624398073836, "grad_norm": 1.0070692333879487, "learning_rate": 1.674181322571849e-05, "loss": 0.5724, "step": 23155 }, { "epoch": 0.6759083613016197, "grad_norm": 0.9805496124952455, "learning_rate": 1.6732278343288324e-05, "loss": 0.5595, "step": 23160 }, { "epoch": 0.6760542827958558, "grad_norm": 1.1891503261107619, "learning_rate": 1.6722745968323262e-05, "loss": 0.5501, "step": 23165 }, { "epoch": 0.676200204290092, "grad_norm": 0.9459787552230361, "learning_rate": 1.6713216103043078e-05, "loss": 0.5178, "step": 23170 }, { "epoch": 0.6763461257843281, "grad_norm": 1.0194841987911363, "learning_rate": 1.6703688749667002e-05, "loss": 0.5153, "step": 23175 }, { "epoch": 0.6764920472785642, "grad_norm": 0.9394424189934754, "learning_rate": 1.6694163910413623e-05, "loss": 0.5457, "step": 23180 }, { "epoch": 0.6766379687728002, "grad_norm": 1.0305380954741048, "learning_rate": 1.668464158750101e-05, "loss": 0.5356, "step": 23185 }, { "epoch": 0.6767838902670363, "grad_norm": 1.0628366997866339, "learning_rate": 1.6675121783146598e-05, "loss": 0.5755, "step": 23190 }, { "epoch": 0.6769298117612724, "grad_norm": 1.0204511676726125, "learning_rate": 1.6665604499567238e-05, "loss": 0.5893, "step": 23195 }, { "epoch": 0.6770757332555085, "grad_norm": 1.2554300500924958, "learning_rate": 1.6656089738979246e-05, "loss": 0.6102, "step": 23200 }, { "epoch": 0.6772216547497446, "grad_norm": 1.0584128865168627, "learning_rate": 1.6646577503598266e-05, "loss": 0.4976, "step": 23205 }, { "epoch": 0.6773675762439807, "grad_norm": 0.9802831555039291, "learning_rate": 1.6637067795639433e-05, "loss": 0.565, "step": 23210 }, { "epoch": 0.6775134977382168, "grad_norm": 0.9297394984569726, "learning_rate": 1.662756061731725e-05, "loss": 0.5141, "step": 23215 }, { "epoch": 0.677659419232453, "grad_norm": 1.2610847416564823, "learning_rate": 1.6618055970845637e-05, "loss": 0.5798, "step": 23220 }, { "epoch": 0.6778053407266891, "grad_norm": 1.03021309074525, "learning_rate": 1.6608553858437934e-05, "loss": 0.548, "step": 23225 }, { "epoch": 0.6779512622209252, "grad_norm": 1.1822521636541803, "learning_rate": 1.6599054282306875e-05, "loss": 0.4995, "step": 23230 }, { "epoch": 0.6780971837151613, "grad_norm": 1.0142994370813658, "learning_rate": 1.6589557244664627e-05, "loss": 0.5529, "step": 23235 }, { "epoch": 0.6782431052093973, "grad_norm": 1.0297452818205204, "learning_rate": 1.658006274772274e-05, "loss": 0.5646, "step": 23240 }, { "epoch": 0.6783890267036334, "grad_norm": 1.1473667017619555, "learning_rate": 1.6570570793692204e-05, "loss": 0.6576, "step": 23245 }, { "epoch": 0.6785349481978695, "grad_norm": 1.0208846805799963, "learning_rate": 1.6561081384783372e-05, "loss": 0.5145, "step": 23250 }, { "epoch": 0.6786808696921056, "grad_norm": 1.110999011653466, "learning_rate": 1.6551594523206054e-05, "loss": 0.5795, "step": 23255 }, { "epoch": 0.6788267911863417, "grad_norm": 0.9602465407403457, "learning_rate": 1.6542110211169433e-05, "loss": 0.5574, "step": 23260 }, { "epoch": 0.6789727126805778, "grad_norm": 0.9208055144682332, "learning_rate": 1.6532628450882094e-05, "loss": 0.5545, "step": 23265 }, { "epoch": 0.679118634174814, "grad_norm": 0.9242384567633852, "learning_rate": 1.6523149244552078e-05, "loss": 0.5141, "step": 23270 }, { "epoch": 0.6792645556690501, "grad_norm": 1.0816904339632034, "learning_rate": 1.6513672594386754e-05, "loss": 0.5539, "step": 23275 }, { "epoch": 0.6794104771632862, "grad_norm": 1.080541560198899, "learning_rate": 1.6504198502592967e-05, "loss": 0.5101, "step": 23280 }, { "epoch": 0.6795563986575223, "grad_norm": 1.0742492238456445, "learning_rate": 1.649472697137691e-05, "loss": 0.5258, "step": 23285 }, { "epoch": 0.6797023201517584, "grad_norm": 0.9347587717116602, "learning_rate": 1.6485258002944237e-05, "loss": 0.5638, "step": 23290 }, { "epoch": 0.6798482416459944, "grad_norm": 1.021894703208546, "learning_rate": 1.6475791599499963e-05, "loss": 0.558, "step": 23295 }, { "epoch": 0.6799941631402305, "grad_norm": 0.9373868979781866, "learning_rate": 1.6466327763248496e-05, "loss": 0.5886, "step": 23300 }, { "epoch": 0.6801400846344666, "grad_norm": 1.1164845953712361, "learning_rate": 1.6456866496393693e-05, "loss": 0.5123, "step": 23305 }, { "epoch": 0.6802860061287027, "grad_norm": 1.1053382619595968, "learning_rate": 1.6447407801138764e-05, "loss": 0.5751, "step": 23310 }, { "epoch": 0.6804319276229388, "grad_norm": 1.0749554731577158, "learning_rate": 1.643795167968638e-05, "loss": 0.535, "step": 23315 }, { "epoch": 0.680577849117175, "grad_norm": 1.185607479490067, "learning_rate": 1.6428498134238525e-05, "loss": 0.5442, "step": 23320 }, { "epoch": 0.6807237706114111, "grad_norm": 1.0152353292368814, "learning_rate": 1.6419047166996677e-05, "loss": 0.5343, "step": 23325 }, { "epoch": 0.6808696921056472, "grad_norm": 1.0350232364893797, "learning_rate": 1.640959878016165e-05, "loss": 0.5414, "step": 23330 }, { "epoch": 0.6810156135998833, "grad_norm": 1.1825879912847295, "learning_rate": 1.640015297593367e-05, "loss": 0.5637, "step": 23335 }, { "epoch": 0.6811615350941194, "grad_norm": 1.0675058118255656, "learning_rate": 1.6390709756512396e-05, "loss": 0.5819, "step": 23340 }, { "epoch": 0.6813074565883555, "grad_norm": 1.0598146914594901, "learning_rate": 1.6381269124096826e-05, "loss": 0.5448, "step": 23345 }, { "epoch": 0.6814533780825915, "grad_norm": 1.0035441650626076, "learning_rate": 1.6371831080885412e-05, "loss": 0.5284, "step": 23350 }, { "epoch": 0.6815992995768276, "grad_norm": 0.9034775875526377, "learning_rate": 1.636239562907596e-05, "loss": 0.5339, "step": 23355 }, { "epoch": 0.6817452210710637, "grad_norm": 1.0782894915309393, "learning_rate": 1.635296277086571e-05, "loss": 0.567, "step": 23360 }, { "epoch": 0.6818911425652999, "grad_norm": 0.9842612881379557, "learning_rate": 1.634353250845127e-05, "loss": 0.5745, "step": 23365 }, { "epoch": 0.682037064059536, "grad_norm": 1.178868287168965, "learning_rate": 1.633410484402865e-05, "loss": 0.5723, "step": 23370 }, { "epoch": 0.6821829855537721, "grad_norm": 0.9865464731188474, "learning_rate": 1.6324679779793262e-05, "loss": 0.5712, "step": 23375 }, { "epoch": 0.6823289070480082, "grad_norm": 0.9605047187629729, "learning_rate": 1.6315257317939897e-05, "loss": 0.536, "step": 23380 }, { "epoch": 0.6824748285422443, "grad_norm": 1.2088904999579724, "learning_rate": 1.6305837460662775e-05, "loss": 0.6091, "step": 23385 }, { "epoch": 0.6826207500364804, "grad_norm": 1.1824381558147814, "learning_rate": 1.6296420210155468e-05, "loss": 0.5819, "step": 23390 }, { "epoch": 0.6827666715307165, "grad_norm": 1.0609920495438732, "learning_rate": 1.628700556861096e-05, "loss": 0.4789, "step": 23395 }, { "epoch": 0.6829125930249526, "grad_norm": 1.0475499021537709, "learning_rate": 1.627759353822163e-05, "loss": 0.5758, "step": 23400 }, { "epoch": 0.6830585145191886, "grad_norm": 0.9269605777426536, "learning_rate": 1.6268184121179238e-05, "loss": 0.5593, "step": 23405 }, { "epoch": 0.6832044360134247, "grad_norm": 1.4658124627300477, "learning_rate": 1.6258777319674962e-05, "loss": 0.5617, "step": 23410 }, { "epoch": 0.6833503575076609, "grad_norm": 0.8773186284019495, "learning_rate": 1.6249373135899318e-05, "loss": 0.5225, "step": 23415 }, { "epoch": 0.683496279001897, "grad_norm": 0.9814686065894171, "learning_rate": 1.6239971572042276e-05, "loss": 0.5202, "step": 23420 }, { "epoch": 0.6836422004961331, "grad_norm": 1.1953645833053774, "learning_rate": 1.6230572630293144e-05, "loss": 0.6085, "step": 23425 }, { "epoch": 0.6837881219903692, "grad_norm": 1.0469237309760777, "learning_rate": 1.6221176312840666e-05, "loss": 0.5334, "step": 23430 }, { "epoch": 0.6839340434846053, "grad_norm": 0.9478701868735897, "learning_rate": 1.6211782621872932e-05, "loss": 0.579, "step": 23435 }, { "epoch": 0.6840799649788414, "grad_norm": 1.1447166374528552, "learning_rate": 1.6202391559577444e-05, "loss": 0.4644, "step": 23440 }, { "epoch": 0.6842258864730775, "grad_norm": 1.067339598444809, "learning_rate": 1.6193003128141083e-05, "loss": 0.6239, "step": 23445 }, { "epoch": 0.6843718079673136, "grad_norm": 1.024253990127003, "learning_rate": 1.6183617329750118e-05, "loss": 0.5885, "step": 23450 }, { "epoch": 0.6845177294615497, "grad_norm": 0.9265669984984773, "learning_rate": 1.6174234166590213e-05, "loss": 0.523, "step": 23455 }, { "epoch": 0.6846636509557857, "grad_norm": 1.0874373479316508, "learning_rate": 1.616485364084642e-05, "loss": 0.5816, "step": 23460 }, { "epoch": 0.6848095724500219, "grad_norm": 1.0376467727882255, "learning_rate": 1.6155475754703154e-05, "loss": 0.5503, "step": 23465 }, { "epoch": 0.684955493944258, "grad_norm": 1.0895133678626407, "learning_rate": 1.6146100510344243e-05, "loss": 0.5281, "step": 23470 }, { "epoch": 0.6851014154384941, "grad_norm": 1.268252728418005, "learning_rate": 1.613672790995287e-05, "loss": 0.684, "step": 23475 }, { "epoch": 0.6852473369327302, "grad_norm": 0.9438313716507676, "learning_rate": 1.6127357955711645e-05, "loss": 0.5629, "step": 23480 }, { "epoch": 0.6853932584269663, "grad_norm": 1.1380420485873997, "learning_rate": 1.611799064980252e-05, "loss": 0.5421, "step": 23485 }, { "epoch": 0.6855391799212024, "grad_norm": 1.198483456071783, "learning_rate": 1.6108625994406856e-05, "loss": 0.5806, "step": 23490 }, { "epoch": 0.6856851014154385, "grad_norm": 1.064611203381326, "learning_rate": 1.6099263991705377e-05, "loss": 0.5777, "step": 23495 }, { "epoch": 0.6858310229096746, "grad_norm": 1.1879101637228973, "learning_rate": 1.608990464387821e-05, "loss": 0.5111, "step": 23500 }, { "epoch": 0.6859769444039107, "grad_norm": 0.9454223676325956, "learning_rate": 1.6080547953104855e-05, "loss": 0.4858, "step": 23505 }, { "epoch": 0.6861228658981467, "grad_norm": 0.9772241526624805, "learning_rate": 1.6071193921564186e-05, "loss": 0.5658, "step": 23510 }, { "epoch": 0.686268787392383, "grad_norm": 1.255510697205806, "learning_rate": 1.606184255143447e-05, "loss": 0.6002, "step": 23515 }, { "epoch": 0.686414708886619, "grad_norm": 0.9904281988027352, "learning_rate": 1.605249384489333e-05, "loss": 0.6192, "step": 23520 }, { "epoch": 0.6865606303808551, "grad_norm": 1.1358177593119672, "learning_rate": 1.604314780411781e-05, "loss": 0.5435, "step": 23525 }, { "epoch": 0.6867065518750912, "grad_norm": 1.0466279870437631, "learning_rate": 1.6033804431284304e-05, "loss": 0.5616, "step": 23530 }, { "epoch": 0.6868524733693273, "grad_norm": 1.1618160603749006, "learning_rate": 1.602446372856859e-05, "loss": 0.5652, "step": 23535 }, { "epoch": 0.6869983948635634, "grad_norm": 0.9998343717013705, "learning_rate": 1.6015125698145813e-05, "loss": 0.5309, "step": 23540 }, { "epoch": 0.6871443163577995, "grad_norm": 1.2399625557201817, "learning_rate": 1.6005790342190524e-05, "loss": 0.5655, "step": 23545 }, { "epoch": 0.6872902378520356, "grad_norm": 0.9459448030947272, "learning_rate": 1.599645766287663e-05, "loss": 0.5692, "step": 23550 }, { "epoch": 0.6874361593462717, "grad_norm": 1.0752870262988208, "learning_rate": 1.5987127662377422e-05, "loss": 0.535, "step": 23555 }, { "epoch": 0.6875820808405078, "grad_norm": 1.0667389282485402, "learning_rate": 1.5977800342865562e-05, "loss": 0.6001, "step": 23560 }, { "epoch": 0.687728002334744, "grad_norm": 1.0609337750350845, "learning_rate": 1.5968475706513074e-05, "loss": 0.5241, "step": 23565 }, { "epoch": 0.68787392382898, "grad_norm": 1.1431712469539306, "learning_rate": 1.5959153755491406e-05, "loss": 0.5194, "step": 23570 }, { "epoch": 0.6880198453232161, "grad_norm": 1.0336957854878634, "learning_rate": 1.594983449197133e-05, "loss": 0.5721, "step": 23575 }, { "epoch": 0.6881657668174522, "grad_norm": 1.0372929794021808, "learning_rate": 1.5940517918123014e-05, "loss": 0.5477, "step": 23580 }, { "epoch": 0.6883116883116883, "grad_norm": 1.1715872333389117, "learning_rate": 1.593120403611599e-05, "loss": 0.564, "step": 23585 }, { "epoch": 0.6884576098059244, "grad_norm": 0.9390364392736658, "learning_rate": 1.5921892848119168e-05, "loss": 0.489, "step": 23590 }, { "epoch": 0.6886035313001605, "grad_norm": 0.9694152961788406, "learning_rate": 1.5912584356300852e-05, "loss": 0.5783, "step": 23595 }, { "epoch": 0.6887494527943966, "grad_norm": 1.0175749742331377, "learning_rate": 1.590327856282868e-05, "loss": 0.6127, "step": 23600 }, { "epoch": 0.6888953742886327, "grad_norm": 0.9958267435307991, "learning_rate": 1.5893975469869683e-05, "loss": 0.5605, "step": 23605 }, { "epoch": 0.6890412957828688, "grad_norm": 1.0258320463814403, "learning_rate": 1.5884675079590257e-05, "loss": 0.5621, "step": 23610 }, { "epoch": 0.689187217277105, "grad_norm": 1.0199016104137717, "learning_rate": 1.587537739415618e-05, "loss": 0.5188, "step": 23615 }, { "epoch": 0.6893331387713411, "grad_norm": 0.9650422372422685, "learning_rate": 1.5866082415732594e-05, "loss": 0.599, "step": 23620 }, { "epoch": 0.6894790602655771, "grad_norm": 1.0831343293958984, "learning_rate": 1.5856790146483997e-05, "loss": 0.5557, "step": 23625 }, { "epoch": 0.6896249817598132, "grad_norm": 1.0055842576754728, "learning_rate": 1.5847500588574274e-05, "loss": 0.5785, "step": 23630 }, { "epoch": 0.6897709032540493, "grad_norm": 1.3611713938555352, "learning_rate": 1.5838213744166663e-05, "loss": 0.5718, "step": 23635 }, { "epoch": 0.6899168247482854, "grad_norm": 1.0885449456691279, "learning_rate": 1.582892961542379e-05, "loss": 0.573, "step": 23640 }, { "epoch": 0.6900627462425215, "grad_norm": 0.989074675000031, "learning_rate": 1.5819648204507643e-05, "loss": 0.5723, "step": 23645 }, { "epoch": 0.6902086677367576, "grad_norm": 0.95446603669653, "learning_rate": 1.5810369513579558e-05, "loss": 0.5519, "step": 23650 }, { "epoch": 0.6903545892309937, "grad_norm": 1.184215700854847, "learning_rate": 1.580109354480026e-05, "loss": 0.5478, "step": 23655 }, { "epoch": 0.6905005107252298, "grad_norm": 1.0701789108762954, "learning_rate": 1.5791820300329817e-05, "loss": 0.5363, "step": 23660 }, { "epoch": 0.690646432219466, "grad_norm": 1.1438174326842303, "learning_rate": 1.5782549782327695e-05, "loss": 0.585, "step": 23665 }, { "epoch": 0.6907923537137021, "grad_norm": 0.9157628825332, "learning_rate": 1.5773281992952704e-05, "loss": 0.5984, "step": 23670 }, { "epoch": 0.6909382752079382, "grad_norm": 0.9762524213905028, "learning_rate": 1.5764016934363017e-05, "loss": 0.5547, "step": 23675 }, { "epoch": 0.6910841967021742, "grad_norm": 1.0943813251343266, "learning_rate": 1.575475460871617e-05, "loss": 0.6572, "step": 23680 }, { "epoch": 0.6912301181964103, "grad_norm": 1.4299729069645397, "learning_rate": 1.5745495018169087e-05, "loss": 0.5581, "step": 23685 }, { "epoch": 0.6913760396906464, "grad_norm": 1.1122682820087713, "learning_rate": 1.5736238164878024e-05, "loss": 0.5561, "step": 23690 }, { "epoch": 0.6915219611848825, "grad_norm": 1.042108564749287, "learning_rate": 1.572698405099861e-05, "loss": 0.5207, "step": 23695 }, { "epoch": 0.6916678826791186, "grad_norm": 0.9900335337263171, "learning_rate": 1.5717732678685848e-05, "loss": 0.5052, "step": 23700 }, { "epoch": 0.6918138041733547, "grad_norm": 1.3067463810264681, "learning_rate": 1.5708484050094077e-05, "loss": 0.6179, "step": 23705 }, { "epoch": 0.6919597256675908, "grad_norm": 0.9617549799250384, "learning_rate": 1.569923816737703e-05, "loss": 0.4895, "step": 23710 }, { "epoch": 0.692105647161827, "grad_norm": 1.1011768100805583, "learning_rate": 1.5689995032687767e-05, "loss": 0.5222, "step": 23715 }, { "epoch": 0.6922515686560631, "grad_norm": 1.1232514821334867, "learning_rate": 1.5680754648178754e-05, "loss": 0.5949, "step": 23720 }, { "epoch": 0.6923974901502992, "grad_norm": 1.3577272138291319, "learning_rate": 1.5671517016001757e-05, "loss": 0.5989, "step": 23725 }, { "epoch": 0.6925434116445353, "grad_norm": 1.1006137366082411, "learning_rate": 1.5662282138307932e-05, "loss": 0.6073, "step": 23730 }, { "epoch": 0.6926893331387713, "grad_norm": 1.1773173017553349, "learning_rate": 1.565305001724781e-05, "loss": 0.588, "step": 23735 }, { "epoch": 0.6928352546330074, "grad_norm": 1.1620335338616863, "learning_rate": 1.5643820654971248e-05, "loss": 0.5832, "step": 23740 }, { "epoch": 0.6929811761272435, "grad_norm": 1.031085682964398, "learning_rate": 1.5634594053627497e-05, "loss": 0.5757, "step": 23745 }, { "epoch": 0.6931270976214796, "grad_norm": 1.004524683013652, "learning_rate": 1.5625370215365112e-05, "loss": 0.5818, "step": 23750 }, { "epoch": 0.6932730191157157, "grad_norm": 0.9339729446136052, "learning_rate": 1.5616149142332064e-05, "loss": 0.5754, "step": 23755 }, { "epoch": 0.6934189406099518, "grad_norm": 1.0004535089218087, "learning_rate": 1.560693083667564e-05, "loss": 0.4795, "step": 23760 }, { "epoch": 0.693564862104188, "grad_norm": 0.9877533608974142, "learning_rate": 1.559771530054249e-05, "loss": 0.5071, "step": 23765 }, { "epoch": 0.6937107835984241, "grad_norm": 1.0923836364874329, "learning_rate": 1.5588502536078635e-05, "loss": 0.5225, "step": 23770 }, { "epoch": 0.6938567050926602, "grad_norm": 0.9781339788954408, "learning_rate": 1.5579292545429424e-05, "loss": 0.472, "step": 23775 }, { "epoch": 0.6940026265868963, "grad_norm": 1.0269149935255157, "learning_rate": 1.557008533073959e-05, "loss": 0.4979, "step": 23780 }, { "epoch": 0.6941485480811324, "grad_norm": 0.9612519605924633, "learning_rate": 1.5560880894153194e-05, "loss": 0.6085, "step": 23785 }, { "epoch": 0.6942944695753684, "grad_norm": 1.093956388883857, "learning_rate": 1.5551679237813683e-05, "loss": 0.5393, "step": 23790 }, { "epoch": 0.6944403910696045, "grad_norm": 0.9298277891789557, "learning_rate": 1.55424803638638e-05, "loss": 0.5564, "step": 23795 }, { "epoch": 0.6945863125638406, "grad_norm": 1.1287123821467764, "learning_rate": 1.5533284274445704e-05, "loss": 0.5833, "step": 23800 }, { "epoch": 0.6947322340580767, "grad_norm": 0.9747456728924071, "learning_rate": 1.5524090971700858e-05, "loss": 0.514, "step": 23805 }, { "epoch": 0.6948781555523128, "grad_norm": 1.006958214950494, "learning_rate": 1.5514900457770096e-05, "loss": 0.5698, "step": 23810 }, { "epoch": 0.695024077046549, "grad_norm": 1.0017558592261047, "learning_rate": 1.5505712734793624e-05, "loss": 0.5484, "step": 23815 }, { "epoch": 0.6951699985407851, "grad_norm": 1.144835571577397, "learning_rate": 1.549652780491094e-05, "loss": 0.5765, "step": 23820 }, { "epoch": 0.6953159200350212, "grad_norm": 0.9971907597869134, "learning_rate": 1.548734567026095e-05, "loss": 0.6106, "step": 23825 }, { "epoch": 0.6954618415292573, "grad_norm": 0.9843461176192522, "learning_rate": 1.5478166332981883e-05, "loss": 0.5654, "step": 23830 }, { "epoch": 0.6956077630234934, "grad_norm": 1.11990042432235, "learning_rate": 1.5468989795211303e-05, "loss": 0.5446, "step": 23835 }, { "epoch": 0.6957536845177295, "grad_norm": 0.9887499455259151, "learning_rate": 1.5459816059086167e-05, "loss": 0.5462, "step": 23840 }, { "epoch": 0.6958996060119655, "grad_norm": 0.9902404577868734, "learning_rate": 1.545064512674272e-05, "loss": 0.5598, "step": 23845 }, { "epoch": 0.6960455275062016, "grad_norm": 0.9753796815314271, "learning_rate": 1.5441477000316602e-05, "loss": 0.5653, "step": 23850 }, { "epoch": 0.6961914490004377, "grad_norm": 0.9872521467117548, "learning_rate": 1.5432311681942775e-05, "loss": 0.5295, "step": 23855 }, { "epoch": 0.6963373704946738, "grad_norm": 1.2628690881733258, "learning_rate": 1.542314917375557e-05, "loss": 0.6121, "step": 23860 }, { "epoch": 0.69648329198891, "grad_norm": 1.0891966269910203, "learning_rate": 1.5413989477888628e-05, "loss": 0.5384, "step": 23865 }, { "epoch": 0.6966292134831461, "grad_norm": 0.9987778189723784, "learning_rate": 1.5404832596474966e-05, "loss": 0.5314, "step": 23870 }, { "epoch": 0.6967751349773822, "grad_norm": 1.1407358691363665, "learning_rate": 1.5395678531646937e-05, "loss": 0.595, "step": 23875 }, { "epoch": 0.6969210564716183, "grad_norm": 1.0973126262333854, "learning_rate": 1.5386527285536222e-05, "loss": 0.5673, "step": 23880 }, { "epoch": 0.6970669779658544, "grad_norm": 1.062910583233567, "learning_rate": 1.5377378860273888e-05, "loss": 0.5299, "step": 23885 }, { "epoch": 0.6972128994600905, "grad_norm": 1.0558694226895347, "learning_rate": 1.5368233257990284e-05, "loss": 0.6188, "step": 23890 }, { "epoch": 0.6973588209543266, "grad_norm": 1.0133638996405185, "learning_rate": 1.5359090480815152e-05, "loss": 0.5552, "step": 23895 }, { "epoch": 0.6975047424485626, "grad_norm": 1.1337502189115591, "learning_rate": 1.5349950530877558e-05, "loss": 0.5552, "step": 23900 }, { "epoch": 0.6976506639427987, "grad_norm": 0.9759160535765449, "learning_rate": 1.5340813410305897e-05, "loss": 0.5235, "step": 23905 }, { "epoch": 0.6977965854370348, "grad_norm": 0.9863873237566788, "learning_rate": 1.5331679121227942e-05, "loss": 0.6026, "step": 23910 }, { "epoch": 0.697942506931271, "grad_norm": 0.8840693382169786, "learning_rate": 1.532254766577076e-05, "loss": 0.5632, "step": 23915 }, { "epoch": 0.6980884284255071, "grad_norm": 1.079312551924166, "learning_rate": 1.531341904606079e-05, "loss": 0.5608, "step": 23920 }, { "epoch": 0.6982343499197432, "grad_norm": 1.1708898818397133, "learning_rate": 1.53042932642238e-05, "loss": 0.5526, "step": 23925 }, { "epoch": 0.6983802714139793, "grad_norm": 0.959678370872696, "learning_rate": 1.5295170322384904e-05, "loss": 0.5437, "step": 23930 }, { "epoch": 0.6985261929082154, "grad_norm": 0.8939635222772591, "learning_rate": 1.528605022266855e-05, "loss": 0.5445, "step": 23935 }, { "epoch": 0.6986721144024515, "grad_norm": 1.1218714853305787, "learning_rate": 1.5276932967198517e-05, "loss": 0.5974, "step": 23940 }, { "epoch": 0.6988180358966876, "grad_norm": 1.0828812139718766, "learning_rate": 1.526781855809793e-05, "loss": 0.5247, "step": 23945 }, { "epoch": 0.6989639573909237, "grad_norm": 0.9231800826504527, "learning_rate": 1.5258706997489241e-05, "loss": 0.5466, "step": 23950 }, { "epoch": 0.6991098788851597, "grad_norm": 1.1109178502670414, "learning_rate": 1.5249598287494277e-05, "loss": 0.5898, "step": 23955 }, { "epoch": 0.6992558003793958, "grad_norm": 1.0142001242438954, "learning_rate": 1.5240492430234127e-05, "loss": 0.5583, "step": 23960 }, { "epoch": 0.699401721873632, "grad_norm": 1.0743897999192367, "learning_rate": 1.5231389427829295e-05, "loss": 0.5254, "step": 23965 }, { "epoch": 0.6995476433678681, "grad_norm": 1.060414814772233, "learning_rate": 1.5222289282399568e-05, "loss": 0.5538, "step": 23970 }, { "epoch": 0.6996935648621042, "grad_norm": 1.2878218447865446, "learning_rate": 1.5213191996064094e-05, "loss": 0.5409, "step": 23975 }, { "epoch": 0.6998394863563403, "grad_norm": 1.063504307431234, "learning_rate": 1.5204097570941351e-05, "loss": 0.6097, "step": 23980 }, { "epoch": 0.6999854078505764, "grad_norm": 1.1331507414321427, "learning_rate": 1.519500600914912e-05, "loss": 0.5285, "step": 23985 }, { "epoch": 0.7001313293448125, "grad_norm": 0.942782986060499, "learning_rate": 1.5185917312804571e-05, "loss": 0.552, "step": 23990 }, { "epoch": 0.7002772508390486, "grad_norm": 0.8879691954509794, "learning_rate": 1.5176831484024152e-05, "loss": 0.53, "step": 23995 }, { "epoch": 0.7004231723332847, "grad_norm": 1.0041566890995757, "learning_rate": 1.5167748524923687e-05, "loss": 0.5795, "step": 24000 }, { "epoch": 0.7005690938275208, "grad_norm": 0.9421668551810137, "learning_rate": 1.5158668437618307e-05, "loss": 0.5373, "step": 24005 }, { "epoch": 0.7007150153217568, "grad_norm": 0.9887603240078826, "learning_rate": 1.5149591224222476e-05, "loss": 0.4987, "step": 24010 }, { "epoch": 0.700860936815993, "grad_norm": 0.9910310966047815, "learning_rate": 1.5140516886849993e-05, "loss": 0.5325, "step": 24015 }, { "epoch": 0.7010068583102291, "grad_norm": 1.1489471142732917, "learning_rate": 1.5131445427613982e-05, "loss": 0.5529, "step": 24020 }, { "epoch": 0.7011527798044652, "grad_norm": 0.9926160338689359, "learning_rate": 1.512237684862693e-05, "loss": 0.5427, "step": 24025 }, { "epoch": 0.7012987012987013, "grad_norm": 0.994655527164068, "learning_rate": 1.511331115200058e-05, "loss": 0.5698, "step": 24030 }, { "epoch": 0.7014446227929374, "grad_norm": 1.021934161619113, "learning_rate": 1.5104248339846083e-05, "loss": 0.5669, "step": 24035 }, { "epoch": 0.7015905442871735, "grad_norm": 1.086159428464733, "learning_rate": 1.5095188414273865e-05, "loss": 0.54, "step": 24040 }, { "epoch": 0.7017364657814096, "grad_norm": 1.0082074909642036, "learning_rate": 1.5086131377393715e-05, "loss": 0.5725, "step": 24045 }, { "epoch": 0.7018823872756457, "grad_norm": 1.0623725640192214, "learning_rate": 1.5077077231314724e-05, "loss": 0.5333, "step": 24050 }, { "epoch": 0.7020283087698818, "grad_norm": 1.0580229008137838, "learning_rate": 1.5068025978145323e-05, "loss": 0.6016, "step": 24055 }, { "epoch": 0.702174230264118, "grad_norm": 1.004582938060519, "learning_rate": 1.5058977619993258e-05, "loss": 0.5315, "step": 24060 }, { "epoch": 0.702320151758354, "grad_norm": 0.9474439861790411, "learning_rate": 1.5049932158965608e-05, "loss": 0.5362, "step": 24065 }, { "epoch": 0.7024660732525901, "grad_norm": 1.173901170174014, "learning_rate": 1.5040889597168789e-05, "loss": 0.5656, "step": 24070 }, { "epoch": 0.7026119947468262, "grad_norm": 1.0026167024711883, "learning_rate": 1.5031849936708528e-05, "loss": 0.542, "step": 24075 }, { "epoch": 0.7027579162410623, "grad_norm": 1.0418145379810961, "learning_rate": 1.5022813179689871e-05, "loss": 0.5634, "step": 24080 }, { "epoch": 0.7029038377352984, "grad_norm": 1.0347587841121264, "learning_rate": 1.5013779328217201e-05, "loss": 0.5469, "step": 24085 }, { "epoch": 0.7030497592295345, "grad_norm": 1.070501516634199, "learning_rate": 1.5004748384394207e-05, "loss": 0.574, "step": 24090 }, { "epoch": 0.7031956807237706, "grad_norm": 1.155202963908396, "learning_rate": 1.4995720350323932e-05, "loss": 0.6062, "step": 24095 }, { "epoch": 0.7033416022180067, "grad_norm": 1.0153245700415607, "learning_rate": 1.4986695228108715e-05, "loss": 0.584, "step": 24100 }, { "epoch": 0.7034875237122428, "grad_norm": 1.0004052658001845, "learning_rate": 1.4977673019850219e-05, "loss": 0.4873, "step": 24105 }, { "epoch": 0.703633445206479, "grad_norm": 1.0832171318691564, "learning_rate": 1.4968653727649434e-05, "loss": 0.547, "step": 24110 }, { "epoch": 0.7037793667007151, "grad_norm": 0.9336063406392175, "learning_rate": 1.4959637353606678e-05, "loss": 0.4998, "step": 24115 }, { "epoch": 0.7039252881949511, "grad_norm": 0.8745079174032, "learning_rate": 1.495062389982158e-05, "loss": 0.4909, "step": 24120 }, { "epoch": 0.7040712096891872, "grad_norm": 1.0416569546274859, "learning_rate": 1.4941613368393092e-05, "loss": 0.5029, "step": 24125 }, { "epoch": 0.7042171311834233, "grad_norm": 0.8952009517958974, "learning_rate": 1.493260576141948e-05, "loss": 0.5581, "step": 24130 }, { "epoch": 0.7043630526776594, "grad_norm": 1.1097504765125694, "learning_rate": 1.4923601080998325e-05, "loss": 0.6083, "step": 24135 }, { "epoch": 0.7045089741718955, "grad_norm": 1.1128181359323472, "learning_rate": 1.4914599329226558e-05, "loss": 0.5523, "step": 24140 }, { "epoch": 0.7046548956661316, "grad_norm": 1.0606921931329718, "learning_rate": 1.490560050820039e-05, "loss": 0.5162, "step": 24145 }, { "epoch": 0.7048008171603677, "grad_norm": 0.9151637047067894, "learning_rate": 1.4896604620015364e-05, "loss": 0.5032, "step": 24150 }, { "epoch": 0.7049467386546038, "grad_norm": 0.8606924694357436, "learning_rate": 1.4887611666766346e-05, "loss": 0.6016, "step": 24155 }, { "epoch": 0.70509266014884, "grad_norm": 1.0209408199054315, "learning_rate": 1.4878621650547503e-05, "loss": 0.5696, "step": 24160 }, { "epoch": 0.7052385816430761, "grad_norm": 1.0772898736344854, "learning_rate": 1.4869634573452346e-05, "loss": 0.587, "step": 24165 }, { "epoch": 0.7053845031373122, "grad_norm": 1.0632431085850207, "learning_rate": 1.486065043757367e-05, "loss": 0.5096, "step": 24170 }, { "epoch": 0.7055304246315482, "grad_norm": 0.9315477859257142, "learning_rate": 1.4851669245003601e-05, "loss": 0.598, "step": 24175 }, { "epoch": 0.7056763461257843, "grad_norm": 1.1768450356135882, "learning_rate": 1.4842690997833574e-05, "loss": 0.5837, "step": 24180 }, { "epoch": 0.7058222676200204, "grad_norm": 1.0158217869466701, "learning_rate": 1.4833715698154355e-05, "loss": 0.5406, "step": 24185 }, { "epoch": 0.7059681891142565, "grad_norm": 0.9858779197760544, "learning_rate": 1.4824743348055997e-05, "loss": 0.5284, "step": 24190 }, { "epoch": 0.7061141106084926, "grad_norm": 1.0396290614849886, "learning_rate": 1.4815773949627888e-05, "loss": 0.5618, "step": 24195 }, { "epoch": 0.7062600321027287, "grad_norm": 0.9548591255245135, "learning_rate": 1.4806807504958715e-05, "loss": 0.573, "step": 24200 }, { "epoch": 0.7064059535969648, "grad_norm": 1.0617743027350097, "learning_rate": 1.4797844016136475e-05, "loss": 0.561, "step": 24205 }, { "epoch": 0.706551875091201, "grad_norm": 1.1657765155102613, "learning_rate": 1.4788883485248498e-05, "loss": 0.5785, "step": 24210 }, { "epoch": 0.7066977965854371, "grad_norm": 1.0929638255282905, "learning_rate": 1.4779925914381403e-05, "loss": 0.589, "step": 24215 }, { "epoch": 0.7068437180796732, "grad_norm": 1.0610996139228235, "learning_rate": 1.4770971305621134e-05, "loss": 0.577, "step": 24220 }, { "epoch": 0.7069896395739093, "grad_norm": 1.0448727877535875, "learning_rate": 1.4762019661052926e-05, "loss": 0.6013, "step": 24225 }, { "epoch": 0.7071355610681453, "grad_norm": 1.049271368571552, "learning_rate": 1.475307098276135e-05, "loss": 0.5032, "step": 24230 }, { "epoch": 0.7072814825623814, "grad_norm": 1.198834488235397, "learning_rate": 1.4744125272830268e-05, "loss": 0.5805, "step": 24235 }, { "epoch": 0.7074274040566175, "grad_norm": 0.9728507946921953, "learning_rate": 1.4735182533342857e-05, "loss": 0.5473, "step": 24240 }, { "epoch": 0.7075733255508536, "grad_norm": 1.0125589572724134, "learning_rate": 1.4726242766381599e-05, "loss": 0.491, "step": 24245 }, { "epoch": 0.7077192470450897, "grad_norm": 0.9403536157881934, "learning_rate": 1.4717305974028278e-05, "loss": 0.5401, "step": 24250 }, { "epoch": 0.7078651685393258, "grad_norm": 1.2439438694113967, "learning_rate": 1.4708372158364004e-05, "loss": 0.5912, "step": 24255 }, { "epoch": 0.708011090033562, "grad_norm": 1.0570476213780557, "learning_rate": 1.4699441321469187e-05, "loss": 0.563, "step": 24260 }, { "epoch": 0.7081570115277981, "grad_norm": 1.1344776024957806, "learning_rate": 1.4690513465423527e-05, "loss": 0.5351, "step": 24265 }, { "epoch": 0.7083029330220342, "grad_norm": 0.9517116966818174, "learning_rate": 1.4681588592306051e-05, "loss": 0.5816, "step": 24270 }, { "epoch": 0.7084488545162703, "grad_norm": 0.9254938061243251, "learning_rate": 1.4672666704195065e-05, "loss": 0.5402, "step": 24275 }, { "epoch": 0.7085947760105064, "grad_norm": 1.0949906738144726, "learning_rate": 1.4663747803168221e-05, "loss": 0.5228, "step": 24280 }, { "epoch": 0.7087406975047424, "grad_norm": 0.9209693860775959, "learning_rate": 1.4654831891302437e-05, "loss": 0.5459, "step": 24285 }, { "epoch": 0.7088866189989785, "grad_norm": 1.0828236813098084, "learning_rate": 1.4645918970673966e-05, "loss": 0.5604, "step": 24290 }, { "epoch": 0.7090325404932146, "grad_norm": 0.8573555380896565, "learning_rate": 1.463700904335832e-05, "loss": 0.4945, "step": 24295 }, { "epoch": 0.7091784619874507, "grad_norm": 1.138820866835025, "learning_rate": 1.462810211143037e-05, "loss": 0.6028, "step": 24300 }, { "epoch": 0.7093243834816868, "grad_norm": 1.1059502592991022, "learning_rate": 1.4619198176964244e-05, "loss": 0.5824, "step": 24305 }, { "epoch": 0.709470304975923, "grad_norm": 1.1868373732211293, "learning_rate": 1.4610297242033393e-05, "loss": 0.5831, "step": 24310 }, { "epoch": 0.7096162264701591, "grad_norm": 1.0295679619658424, "learning_rate": 1.4601399308710573e-05, "loss": 0.5342, "step": 24315 }, { "epoch": 0.7097621479643952, "grad_norm": 1.2139491234112905, "learning_rate": 1.4592504379067822e-05, "loss": 0.554, "step": 24320 }, { "epoch": 0.7099080694586313, "grad_norm": 0.9184199497569038, "learning_rate": 1.4583612455176503e-05, "loss": 0.4928, "step": 24325 }, { "epoch": 0.7100539909528674, "grad_norm": 0.9289577816708264, "learning_rate": 1.457472353910726e-05, "loss": 0.531, "step": 24330 }, { "epoch": 0.7101999124471035, "grad_norm": 1.0325969369599877, "learning_rate": 1.4565837632930044e-05, "loss": 0.4927, "step": 24335 }, { "epoch": 0.7103458339413395, "grad_norm": 0.993337810512389, "learning_rate": 1.4556954738714103e-05, "loss": 0.543, "step": 24340 }, { "epoch": 0.7104917554355756, "grad_norm": 1.0119432687559544, "learning_rate": 1.4548074858527982e-05, "loss": 0.5243, "step": 24345 }, { "epoch": 0.7106376769298117, "grad_norm": 1.2655935035631212, "learning_rate": 1.4539197994439534e-05, "loss": 0.5375, "step": 24350 }, { "epoch": 0.7107835984240478, "grad_norm": 1.152821115981948, "learning_rate": 1.4530324148515895e-05, "loss": 0.6121, "step": 24355 }, { "epoch": 0.710929519918284, "grad_norm": 0.9490338918918519, "learning_rate": 1.4521453322823526e-05, "loss": 0.5566, "step": 24360 }, { "epoch": 0.7110754414125201, "grad_norm": 0.9488672991645826, "learning_rate": 1.451258551942813e-05, "loss": 0.5579, "step": 24365 }, { "epoch": 0.7112213629067562, "grad_norm": 1.0984510830498266, "learning_rate": 1.4503720740394768e-05, "loss": 0.5773, "step": 24370 }, { "epoch": 0.7113672844009923, "grad_norm": 1.0620351005521815, "learning_rate": 1.4494858987787762e-05, "loss": 0.5476, "step": 24375 }, { "epoch": 0.7115132058952284, "grad_norm": 1.1361207229615862, "learning_rate": 1.4486000263670735e-05, "loss": 0.5058, "step": 24380 }, { "epoch": 0.7116591273894645, "grad_norm": 1.0414569084795966, "learning_rate": 1.4477144570106607e-05, "loss": 0.5166, "step": 24385 }, { "epoch": 0.7118050488837006, "grad_norm": 1.1976358702952392, "learning_rate": 1.4468291909157583e-05, "loss": 0.5531, "step": 24390 }, { "epoch": 0.7119509703779366, "grad_norm": 1.1590179723220841, "learning_rate": 1.4459442282885188e-05, "loss": 0.5616, "step": 24395 }, { "epoch": 0.7120968918721727, "grad_norm": 1.1566752242120362, "learning_rate": 1.4450595693350205e-05, "loss": 0.5233, "step": 24400 }, { "epoch": 0.7122428133664088, "grad_norm": 1.2389754022509472, "learning_rate": 1.4441752142612752e-05, "loss": 0.5789, "step": 24405 }, { "epoch": 0.712388734860645, "grad_norm": 1.1453548437096257, "learning_rate": 1.4432911632732187e-05, "loss": 0.5979, "step": 24410 }, { "epoch": 0.7125346563548811, "grad_norm": 1.1037437211897294, "learning_rate": 1.4424074165767199e-05, "loss": 0.5345, "step": 24415 }, { "epoch": 0.7126805778491172, "grad_norm": 0.9551622470134965, "learning_rate": 1.441523974377576e-05, "loss": 0.6019, "step": 24420 }, { "epoch": 0.7128264993433533, "grad_norm": 1.2130372013147819, "learning_rate": 1.4406408368815122e-05, "loss": 0.5643, "step": 24425 }, { "epoch": 0.7129724208375894, "grad_norm": 0.9712835721280971, "learning_rate": 1.4397580042941858e-05, "loss": 0.5012, "step": 24430 }, { "epoch": 0.7131183423318255, "grad_norm": 0.9058692467266198, "learning_rate": 1.4388754768211776e-05, "loss": 0.5313, "step": 24435 }, { "epoch": 0.7132642638260616, "grad_norm": 1.1425079248796624, "learning_rate": 1.4379932546680031e-05, "loss": 0.555, "step": 24440 }, { "epoch": 0.7134101853202977, "grad_norm": 0.9961630226072019, "learning_rate": 1.4371113380401036e-05, "loss": 0.5603, "step": 24445 }, { "epoch": 0.7135561068145337, "grad_norm": 0.9630539402208909, "learning_rate": 1.4362297271428487e-05, "loss": 0.534, "step": 24450 }, { "epoch": 0.7137020283087698, "grad_norm": 1.0306877211262744, "learning_rate": 1.4353484221815403e-05, "loss": 0.5331, "step": 24455 }, { "epoch": 0.713847949803006, "grad_norm": 1.0358162654136978, "learning_rate": 1.4344674233614035e-05, "loss": 0.5989, "step": 24460 }, { "epoch": 0.7139938712972421, "grad_norm": 0.8352713222910401, "learning_rate": 1.4335867308875983e-05, "loss": 0.5398, "step": 24465 }, { "epoch": 0.7141397927914782, "grad_norm": 4.946832034723737, "learning_rate": 1.4327063449652084e-05, "loss": 0.5702, "step": 24470 }, { "epoch": 0.7142857142857143, "grad_norm": 1.116624206980472, "learning_rate": 1.4318262657992504e-05, "loss": 0.5895, "step": 24475 }, { "epoch": 0.7144316357799504, "grad_norm": 1.1823160676251852, "learning_rate": 1.4309464935946642e-05, "loss": 0.5692, "step": 24480 }, { "epoch": 0.7145775572741865, "grad_norm": 0.984740016936657, "learning_rate": 1.4300670285563233e-05, "loss": 0.5259, "step": 24485 }, { "epoch": 0.7147234787684226, "grad_norm": 1.023882732110191, "learning_rate": 1.4291878708890272e-05, "loss": 0.5628, "step": 24490 }, { "epoch": 0.7148694002626587, "grad_norm": 1.166015227269716, "learning_rate": 1.428309020797503e-05, "loss": 0.5269, "step": 24495 }, { "epoch": 0.7150153217568948, "grad_norm": 1.0807054018499804, "learning_rate": 1.4274304784864098e-05, "loss": 0.5174, "step": 24500 }, { "epoch": 0.7151612432511308, "grad_norm": 0.8718926417295376, "learning_rate": 1.4265522441603296e-05, "loss": 0.5067, "step": 24505 }, { "epoch": 0.715307164745367, "grad_norm": 1.1005682628862632, "learning_rate": 1.4256743180237777e-05, "loss": 0.5539, "step": 24510 }, { "epoch": 0.7154530862396031, "grad_norm": 0.8802949528227003, "learning_rate": 1.4247967002811952e-05, "loss": 0.5229, "step": 24515 }, { "epoch": 0.7155990077338392, "grad_norm": 1.0458262784878145, "learning_rate": 1.4239193911369508e-05, "loss": 0.5406, "step": 24520 }, { "epoch": 0.7157449292280753, "grad_norm": 1.1943831586181977, "learning_rate": 1.4230423907953449e-05, "loss": 0.559, "step": 24525 }, { "epoch": 0.7158908507223114, "grad_norm": 0.983925435455423, "learning_rate": 1.4221656994605998e-05, "loss": 0.5678, "step": 24530 }, { "epoch": 0.7160367722165475, "grad_norm": 1.1176890264899106, "learning_rate": 1.4212893173368725e-05, "loss": 0.571, "step": 24535 }, { "epoch": 0.7161826937107836, "grad_norm": 1.137288587410384, "learning_rate": 1.4204132446282425e-05, "loss": 0.5562, "step": 24540 }, { "epoch": 0.7163286152050197, "grad_norm": 1.1724075084412144, "learning_rate": 1.4195374815387219e-05, "loss": 0.5735, "step": 24545 }, { "epoch": 0.7164745366992558, "grad_norm": 1.161746572130148, "learning_rate": 1.4186620282722473e-05, "loss": 0.565, "step": 24550 }, { "epoch": 0.7166204581934918, "grad_norm": 1.077664429766411, "learning_rate": 1.4177868850326851e-05, "loss": 0.5375, "step": 24555 }, { "epoch": 0.716766379687728, "grad_norm": 1.1224995518381657, "learning_rate": 1.4169120520238277e-05, "loss": 0.5275, "step": 24560 }, { "epoch": 0.7169123011819641, "grad_norm": 0.9830538347397293, "learning_rate": 1.4160375294493961e-05, "loss": 0.476, "step": 24565 }, { "epoch": 0.7170582226762002, "grad_norm": 1.1436343034411778, "learning_rate": 1.4151633175130417e-05, "loss": 0.5839, "step": 24570 }, { "epoch": 0.7172041441704363, "grad_norm": 1.085081887911008, "learning_rate": 1.4142894164183376e-05, "loss": 0.4675, "step": 24575 }, { "epoch": 0.7173500656646724, "grad_norm": 1.0413742364384941, "learning_rate": 1.4134158263687905e-05, "loss": 0.57, "step": 24580 }, { "epoch": 0.7174959871589085, "grad_norm": 1.000954814738591, "learning_rate": 1.4125425475678315e-05, "loss": 0.5328, "step": 24585 }, { "epoch": 0.7176419086531446, "grad_norm": 1.0108958039595437, "learning_rate": 1.411669580218819e-05, "loss": 0.504, "step": 24590 }, { "epoch": 0.7177878301473807, "grad_norm": 0.8884764384926542, "learning_rate": 1.4107969245250419e-05, "loss": 0.5396, "step": 24595 }, { "epoch": 0.7179337516416168, "grad_norm": 0.9453175934815446, "learning_rate": 1.4099245806897113e-05, "loss": 0.5025, "step": 24600 }, { "epoch": 0.7180796731358529, "grad_norm": 0.9482374808440415, "learning_rate": 1.4090525489159711e-05, "loss": 0.4951, "step": 24605 }, { "epoch": 0.7182255946300891, "grad_norm": 0.8915304156398259, "learning_rate": 1.4081808294068894e-05, "loss": 0.5143, "step": 24610 }, { "epoch": 0.7183715161243251, "grad_norm": 0.9699427457982547, "learning_rate": 1.4073094223654626e-05, "loss": 0.5501, "step": 24615 }, { "epoch": 0.7185174376185612, "grad_norm": 1.0017473976094748, "learning_rate": 1.4064383279946141e-05, "loss": 0.5465, "step": 24620 }, { "epoch": 0.7186633591127973, "grad_norm": 1.2777095277581743, "learning_rate": 1.4055675464971946e-05, "loss": 0.5337, "step": 24625 }, { "epoch": 0.7188092806070334, "grad_norm": 1.1011030671152342, "learning_rate": 1.4046970780759816e-05, "loss": 0.6021, "step": 24630 }, { "epoch": 0.7189552021012695, "grad_norm": 1.0296889790169528, "learning_rate": 1.4038269229336787e-05, "loss": 0.5714, "step": 24635 }, { "epoch": 0.7191011235955056, "grad_norm": 1.2811505711799551, "learning_rate": 1.4029570812729204e-05, "loss": 0.5916, "step": 24640 }, { "epoch": 0.7192470450897417, "grad_norm": 0.8802879534332432, "learning_rate": 1.402087553296264e-05, "loss": 0.5506, "step": 24645 }, { "epoch": 0.7193929665839778, "grad_norm": 0.9912588371933395, "learning_rate": 1.4012183392061956e-05, "loss": 0.5129, "step": 24650 }, { "epoch": 0.7195388880782139, "grad_norm": 1.1786932997067552, "learning_rate": 1.4003494392051267e-05, "loss": 0.6091, "step": 24655 }, { "epoch": 0.7196848095724501, "grad_norm": 1.1311139214037087, "learning_rate": 1.399480853495399e-05, "loss": 0.5185, "step": 24660 }, { "epoch": 0.7198307310666862, "grad_norm": 1.1387093840691218, "learning_rate": 1.3986125822792789e-05, "loss": 0.5914, "step": 24665 }, { "epoch": 0.7199766525609222, "grad_norm": 0.9299079241464092, "learning_rate": 1.3977446257589572e-05, "loss": 0.5925, "step": 24670 }, { "epoch": 0.7201225740551583, "grad_norm": 1.114214367232458, "learning_rate": 1.3968769841365552e-05, "loss": 0.5944, "step": 24675 }, { "epoch": 0.7202684955493944, "grad_norm": 0.9740034346974353, "learning_rate": 1.3960096576141197e-05, "loss": 0.5178, "step": 24680 }, { "epoch": 0.7204144170436305, "grad_norm": 1.0794001317087405, "learning_rate": 1.395142646393624e-05, "loss": 0.5678, "step": 24685 }, { "epoch": 0.7205603385378666, "grad_norm": 1.0489859868659006, "learning_rate": 1.3942759506769676e-05, "loss": 0.5185, "step": 24690 }, { "epoch": 0.7207062600321027, "grad_norm": 1.0772334983786078, "learning_rate": 1.3934095706659765e-05, "loss": 0.5215, "step": 24695 }, { "epoch": 0.7208521815263388, "grad_norm": 0.9102516968369698, "learning_rate": 1.3925435065624038e-05, "loss": 0.5407, "step": 24700 }, { "epoch": 0.7209981030205749, "grad_norm": 1.279974120818073, "learning_rate": 1.3916777585679275e-05, "loss": 0.5325, "step": 24705 }, { "epoch": 0.7211440245148111, "grad_norm": 0.9517698128184349, "learning_rate": 1.3908123268841552e-05, "loss": 0.5235, "step": 24710 }, { "epoch": 0.7212899460090472, "grad_norm": 0.984386956873802, "learning_rate": 1.3899472117126181e-05, "loss": 0.5812, "step": 24715 }, { "epoch": 0.7214358675032833, "grad_norm": 0.9897334991811357, "learning_rate": 1.3890824132547742e-05, "loss": 0.5646, "step": 24720 }, { "epoch": 0.7215817889975193, "grad_norm": 1.1999808261371183, "learning_rate": 1.388217931712007e-05, "loss": 0.5999, "step": 24725 }, { "epoch": 0.7217277104917554, "grad_norm": 1.1141004763722877, "learning_rate": 1.387353767285629e-05, "loss": 0.583, "step": 24730 }, { "epoch": 0.7218736319859915, "grad_norm": 1.1531681898759512, "learning_rate": 1.3864899201768761e-05, "loss": 0.5033, "step": 24735 }, { "epoch": 0.7220195534802276, "grad_norm": 1.165137918761426, "learning_rate": 1.3856263905869113e-05, "loss": 0.5505, "step": 24740 }, { "epoch": 0.7221654749744637, "grad_norm": 0.9499678365747813, "learning_rate": 1.3847631787168236e-05, "loss": 0.5691, "step": 24745 }, { "epoch": 0.7223113964686998, "grad_norm": 0.9832657700898059, "learning_rate": 1.3839002847676275e-05, "loss": 0.5458, "step": 24750 }, { "epoch": 0.7224573179629359, "grad_norm": 1.0809557200461168, "learning_rate": 1.3830377089402646e-05, "loss": 0.5766, "step": 24755 }, { "epoch": 0.7226032394571721, "grad_norm": 1.0343446224534398, "learning_rate": 1.3821754514356025e-05, "loss": 0.598, "step": 24760 }, { "epoch": 0.7227491609514082, "grad_norm": 0.9274016911121137, "learning_rate": 1.3813135124544327e-05, "loss": 0.5128, "step": 24765 }, { "epoch": 0.7228950824456443, "grad_norm": 0.9317734839774331, "learning_rate": 1.380451892197474e-05, "loss": 0.5276, "step": 24770 }, { "epoch": 0.7230410039398804, "grad_norm": 0.954064454326678, "learning_rate": 1.3795905908653706e-05, "loss": 0.5685, "step": 24775 }, { "epoch": 0.7231869254341164, "grad_norm": 1.1493734976006906, "learning_rate": 1.3787296086586937e-05, "loss": 0.5907, "step": 24780 }, { "epoch": 0.7233328469283525, "grad_norm": 0.8968722600358542, "learning_rate": 1.3778689457779384e-05, "loss": 0.5311, "step": 24785 }, { "epoch": 0.7234787684225886, "grad_norm": 1.1242738594814456, "learning_rate": 1.3770086024235263e-05, "loss": 0.6352, "step": 24790 }, { "epoch": 0.7236246899168247, "grad_norm": 1.1527975654634566, "learning_rate": 1.3761485787958028e-05, "loss": 0.6154, "step": 24795 }, { "epoch": 0.7237706114110608, "grad_norm": 1.0697957783709997, "learning_rate": 1.3752888750950433e-05, "loss": 0.4973, "step": 24800 }, { "epoch": 0.723916532905297, "grad_norm": 0.9825699910886434, "learning_rate": 1.3744294915214443e-05, "loss": 0.5287, "step": 24805 }, { "epoch": 0.7240624543995331, "grad_norm": 1.052330833100793, "learning_rate": 1.3735704282751294e-05, "loss": 0.5519, "step": 24810 }, { "epoch": 0.7242083758937692, "grad_norm": 1.0415379392135926, "learning_rate": 1.3727116855561478e-05, "loss": 0.5386, "step": 24815 }, { "epoch": 0.7243542973880053, "grad_norm": 1.0789041218116648, "learning_rate": 1.3718532635644727e-05, "loss": 0.5248, "step": 24820 }, { "epoch": 0.7245002188822414, "grad_norm": 1.1541761863394766, "learning_rate": 1.370995162500005e-05, "loss": 0.51, "step": 24825 }, { "epoch": 0.7246461403764775, "grad_norm": 0.8833589612047396, "learning_rate": 1.3701373825625694e-05, "loss": 0.5075, "step": 24830 }, { "epoch": 0.7247920618707135, "grad_norm": 1.0177952056470108, "learning_rate": 1.3692799239519155e-05, "loss": 0.5251, "step": 24835 }, { "epoch": 0.7249379833649496, "grad_norm": 1.0753824812576223, "learning_rate": 1.3684227868677191e-05, "loss": 0.4957, "step": 24840 }, { "epoch": 0.7250839048591857, "grad_norm": 1.1592437380251366, "learning_rate": 1.3675659715095793e-05, "loss": 0.5679, "step": 24845 }, { "epoch": 0.7252298263534218, "grad_norm": 1.1888215712744017, "learning_rate": 1.3667094780770234e-05, "loss": 0.5594, "step": 24850 }, { "epoch": 0.725375747847658, "grad_norm": 1.0947373918738463, "learning_rate": 1.3658533067695012e-05, "loss": 0.5442, "step": 24855 }, { "epoch": 0.7255216693418941, "grad_norm": 1.1977489526984082, "learning_rate": 1.3649974577863871e-05, "loss": 0.6208, "step": 24860 }, { "epoch": 0.7256675908361302, "grad_norm": 1.044143912132242, "learning_rate": 1.3641419313269824e-05, "loss": 0.535, "step": 24865 }, { "epoch": 0.7258135123303663, "grad_norm": 0.8902421415342231, "learning_rate": 1.3632867275905129e-05, "loss": 0.5404, "step": 24870 }, { "epoch": 0.7259594338246024, "grad_norm": 1.0042582937693387, "learning_rate": 1.3624318467761282e-05, "loss": 0.4978, "step": 24875 }, { "epoch": 0.7261053553188385, "grad_norm": 1.0522703978741763, "learning_rate": 1.3615772890829034e-05, "loss": 0.5685, "step": 24880 }, { "epoch": 0.7262512768130746, "grad_norm": 0.913587632733783, "learning_rate": 1.3607230547098382e-05, "loss": 0.5397, "step": 24885 }, { "epoch": 0.7263971983073106, "grad_norm": 0.9836496182677489, "learning_rate": 1.3598691438558564e-05, "loss": 0.5273, "step": 24890 }, { "epoch": 0.7265431198015467, "grad_norm": 1.1476049492298055, "learning_rate": 1.3590155567198082e-05, "loss": 0.5263, "step": 24895 }, { "epoch": 0.7266890412957828, "grad_norm": 1.2436486421592616, "learning_rate": 1.358162293500466e-05, "loss": 0.6255, "step": 24900 }, { "epoch": 0.726834962790019, "grad_norm": 1.0366010603056368, "learning_rate": 1.3573093543965304e-05, "loss": 0.5713, "step": 24905 }, { "epoch": 0.7269808842842551, "grad_norm": 1.0646455422092935, "learning_rate": 1.3564567396066213e-05, "loss": 0.4962, "step": 24910 }, { "epoch": 0.7271268057784912, "grad_norm": 1.0905562208383737, "learning_rate": 1.355604449329288e-05, "loss": 0.5693, "step": 24915 }, { "epoch": 0.7272727272727273, "grad_norm": 0.9392806608497347, "learning_rate": 1.3547524837630013e-05, "loss": 0.5278, "step": 24920 }, { "epoch": 0.7274186487669634, "grad_norm": 1.245128905418762, "learning_rate": 1.3539008431061576e-05, "loss": 0.56, "step": 24925 }, { "epoch": 0.7275645702611995, "grad_norm": 0.9882712513203719, "learning_rate": 1.3530495275570773e-05, "loss": 0.5162, "step": 24930 }, { "epoch": 0.7277104917554356, "grad_norm": 1.0235677160058934, "learning_rate": 1.3521985373140046e-05, "loss": 0.4985, "step": 24935 }, { "epoch": 0.7278564132496717, "grad_norm": 1.260192405412068, "learning_rate": 1.3513478725751094e-05, "loss": 0.5742, "step": 24940 }, { "epoch": 0.7280023347439077, "grad_norm": 1.0147625933917526, "learning_rate": 1.3504975335384845e-05, "loss": 0.5328, "step": 24945 }, { "epoch": 0.7281482562381438, "grad_norm": 1.0419171675453127, "learning_rate": 1.3496475204021472e-05, "loss": 0.5402, "step": 24950 }, { "epoch": 0.72829417773238, "grad_norm": 0.9945950867103498, "learning_rate": 1.3487978333640391e-05, "loss": 0.5159, "step": 24955 }, { "epoch": 0.7284400992266161, "grad_norm": 1.0193792119020708, "learning_rate": 1.3479484726220245e-05, "loss": 0.5992, "step": 24960 }, { "epoch": 0.7285860207208522, "grad_norm": 1.1210378157260457, "learning_rate": 1.3470994383738946e-05, "loss": 0.5718, "step": 24965 }, { "epoch": 0.7287319422150883, "grad_norm": 0.9106248327010925, "learning_rate": 1.3462507308173616e-05, "loss": 0.5048, "step": 24970 }, { "epoch": 0.7288778637093244, "grad_norm": 1.137208457783271, "learning_rate": 1.345402350150065e-05, "loss": 0.5303, "step": 24975 }, { "epoch": 0.7290237852035605, "grad_norm": 1.0232967254671586, "learning_rate": 1.344554296569563e-05, "loss": 0.5517, "step": 24980 }, { "epoch": 0.7291697066977966, "grad_norm": 1.084303467882424, "learning_rate": 1.343706570273343e-05, "loss": 0.5613, "step": 24985 }, { "epoch": 0.7293156281920327, "grad_norm": 1.048543664385041, "learning_rate": 1.3428591714588134e-05, "loss": 0.5635, "step": 24990 }, { "epoch": 0.7294615496862688, "grad_norm": 0.9990611698961829, "learning_rate": 1.3420121003233055e-05, "loss": 0.5502, "step": 24995 }, { "epoch": 0.7296074711805048, "grad_norm": 1.1208309789969972, "learning_rate": 1.3411653570640786e-05, "loss": 0.5497, "step": 25000 }, { "epoch": 0.729753392674741, "grad_norm": 1.1804782246716283, "learning_rate": 1.3403189418783093e-05, "loss": 0.5699, "step": 25005 }, { "epoch": 0.7298993141689771, "grad_norm": 1.2854738742147318, "learning_rate": 1.3394728549631035e-05, "loss": 0.5345, "step": 25010 }, { "epoch": 0.7300452356632132, "grad_norm": 0.8898250793964284, "learning_rate": 1.3386270965154874e-05, "loss": 0.5171, "step": 25015 }, { "epoch": 0.7301911571574493, "grad_norm": 0.9419518674295984, "learning_rate": 1.3377816667324116e-05, "loss": 0.5343, "step": 25020 }, { "epoch": 0.7303370786516854, "grad_norm": 1.0704773079951875, "learning_rate": 1.336936565810751e-05, "loss": 0.5452, "step": 25025 }, { "epoch": 0.7304830001459215, "grad_norm": 1.1023298964262838, "learning_rate": 1.3360917939473016e-05, "loss": 0.5448, "step": 25030 }, { "epoch": 0.7306289216401576, "grad_norm": 0.9900842006034813, "learning_rate": 1.335247351338786e-05, "loss": 0.5279, "step": 25035 }, { "epoch": 0.7307748431343937, "grad_norm": 0.9998328715450364, "learning_rate": 1.3344032381818469e-05, "loss": 0.5159, "step": 25040 }, { "epoch": 0.7309207646286298, "grad_norm": 1.0287709411333383, "learning_rate": 1.3335594546730545e-05, "loss": 0.5165, "step": 25045 }, { "epoch": 0.7310666861228658, "grad_norm": 1.2239768990007813, "learning_rate": 1.3327160010088958e-05, "loss": 0.612, "step": 25050 }, { "epoch": 0.731212607617102, "grad_norm": 0.992710013363525, "learning_rate": 1.3318728773857874e-05, "loss": 0.5674, "step": 25055 }, { "epoch": 0.7313585291113381, "grad_norm": 0.9288355374173342, "learning_rate": 1.3310300840000656e-05, "loss": 0.542, "step": 25060 }, { "epoch": 0.7315044506055742, "grad_norm": 0.9629195865983087, "learning_rate": 1.3301876210479896e-05, "loss": 0.5816, "step": 25065 }, { "epoch": 0.7316503720998103, "grad_norm": 1.0422849435404384, "learning_rate": 1.3293454887257456e-05, "loss": 0.5501, "step": 25070 }, { "epoch": 0.7317962935940464, "grad_norm": 0.9495058815796161, "learning_rate": 1.3285036872294359e-05, "loss": 0.5814, "step": 25075 }, { "epoch": 0.7319422150882825, "grad_norm": 1.0803645519281164, "learning_rate": 1.3276622167550928e-05, "loss": 0.5822, "step": 25080 }, { "epoch": 0.7320881365825186, "grad_norm": 0.9533636908469251, "learning_rate": 1.3268210774986673e-05, "loss": 0.5767, "step": 25085 }, { "epoch": 0.7322340580767547, "grad_norm": 1.0520121643432652, "learning_rate": 1.3259802696560333e-05, "loss": 0.5609, "step": 25090 }, { "epoch": 0.7323799795709908, "grad_norm": 1.0187550732776751, "learning_rate": 1.3251397934229914e-05, "loss": 0.5612, "step": 25095 }, { "epoch": 0.7325259010652269, "grad_norm": 0.9645179245506396, "learning_rate": 1.3242996489952585e-05, "loss": 0.5555, "step": 25100 }, { "epoch": 0.7326718225594631, "grad_norm": 1.0592009922412269, "learning_rate": 1.3234598365684808e-05, "loss": 0.509, "step": 25105 }, { "epoch": 0.7328177440536991, "grad_norm": 1.2442606341880318, "learning_rate": 1.322620356338223e-05, "loss": 0.5082, "step": 25110 }, { "epoch": 0.7329636655479352, "grad_norm": 1.0217467282651531, "learning_rate": 1.3217812084999753e-05, "loss": 0.5759, "step": 25115 }, { "epoch": 0.7331095870421713, "grad_norm": 1.0329878328551165, "learning_rate": 1.3209423932491466e-05, "loss": 0.5197, "step": 25120 }, { "epoch": 0.7332555085364074, "grad_norm": 1.1815872200404214, "learning_rate": 1.3201039107810722e-05, "loss": 0.5559, "step": 25125 }, { "epoch": 0.7334014300306435, "grad_norm": 1.0587165280268696, "learning_rate": 1.3192657612910083e-05, "loss": 0.4893, "step": 25130 }, { "epoch": 0.7335473515248796, "grad_norm": 1.0176391640366607, "learning_rate": 1.3184279449741326e-05, "loss": 0.5606, "step": 25135 }, { "epoch": 0.7336932730191157, "grad_norm": 0.9573205379674394, "learning_rate": 1.3175904620255486e-05, "loss": 0.4937, "step": 25140 }, { "epoch": 0.7338391945133518, "grad_norm": 0.9887979990747906, "learning_rate": 1.316753312640277e-05, "loss": 0.5585, "step": 25145 }, { "epoch": 0.7339851160075879, "grad_norm": 0.9923632007516789, "learning_rate": 1.315916497013266e-05, "loss": 0.5518, "step": 25150 }, { "epoch": 0.7341310375018241, "grad_norm": 1.0263889728903777, "learning_rate": 1.3150800153393816e-05, "loss": 0.5587, "step": 25155 }, { "epoch": 0.7342769589960602, "grad_norm": 0.9832763646765168, "learning_rate": 1.3142438678134161e-05, "loss": 0.6034, "step": 25160 }, { "epoch": 0.7344228804902962, "grad_norm": 0.9377201914182386, "learning_rate": 1.3134080546300815e-05, "loss": 0.5057, "step": 25165 }, { "epoch": 0.7345688019845323, "grad_norm": 1.1672845600917523, "learning_rate": 1.3125725759840123e-05, "loss": 0.5072, "step": 25170 }, { "epoch": 0.7347147234787684, "grad_norm": 1.0341457328193873, "learning_rate": 1.3117374320697656e-05, "loss": 0.5964, "step": 25175 }, { "epoch": 0.7348606449730045, "grad_norm": 1.095503482067089, "learning_rate": 1.310902623081819e-05, "loss": 0.576, "step": 25180 }, { "epoch": 0.7350065664672406, "grad_norm": 0.9048064511447627, "learning_rate": 1.3100681492145757e-05, "loss": 0.4863, "step": 25185 }, { "epoch": 0.7351524879614767, "grad_norm": 0.9930625815555961, "learning_rate": 1.309234010662356e-05, "loss": 0.5317, "step": 25190 }, { "epoch": 0.7352984094557128, "grad_norm": 0.947380651500713, "learning_rate": 1.3084002076194065e-05, "loss": 0.5415, "step": 25195 }, { "epoch": 0.7354443309499489, "grad_norm": 1.2026570428396774, "learning_rate": 1.3075667402798931e-05, "loss": 0.6066, "step": 25200 }, { "epoch": 0.7355902524441851, "grad_norm": 1.2133817124368318, "learning_rate": 1.3067336088379034e-05, "loss": 0.5386, "step": 25205 }, { "epoch": 0.7357361739384212, "grad_norm": 1.0464365770984325, "learning_rate": 1.3059008134874503e-05, "loss": 0.5677, "step": 25210 }, { "epoch": 0.7358820954326573, "grad_norm": 1.0467130848976993, "learning_rate": 1.3050683544224624e-05, "loss": 0.5598, "step": 25215 }, { "epoch": 0.7360280169268933, "grad_norm": 0.9610077681154812, "learning_rate": 1.3042362318367953e-05, "loss": 0.5113, "step": 25220 }, { "epoch": 0.7361739384211294, "grad_norm": 1.010748113549356, "learning_rate": 1.3034044459242234e-05, "loss": 0.5413, "step": 25225 }, { "epoch": 0.7363198599153655, "grad_norm": 0.9593836407782844, "learning_rate": 1.3025729968784447e-05, "loss": 0.4786, "step": 25230 }, { "epoch": 0.7364657814096016, "grad_norm": 1.01907740885405, "learning_rate": 1.3017418848930773e-05, "loss": 0.5506, "step": 25235 }, { "epoch": 0.7366117029038377, "grad_norm": 0.9389540561197258, "learning_rate": 1.3009111101616605e-05, "loss": 0.5393, "step": 25240 }, { "epoch": 0.7367576243980738, "grad_norm": 0.9366584227149366, "learning_rate": 1.300080672877656e-05, "loss": 0.5526, "step": 25245 }, { "epoch": 0.7369035458923099, "grad_norm": 1.0513284063906845, "learning_rate": 1.2992505732344463e-05, "loss": 0.5371, "step": 25250 }, { "epoch": 0.7370494673865461, "grad_norm": 1.0595101507416727, "learning_rate": 1.2984208114253366e-05, "loss": 0.514, "step": 25255 }, { "epoch": 0.7371953888807822, "grad_norm": 0.9967972199191502, "learning_rate": 1.2975913876435519e-05, "loss": 0.5836, "step": 25260 }, { "epoch": 0.7373413103750183, "grad_norm": 1.0115150034897316, "learning_rate": 1.296762302082239e-05, "loss": 0.5321, "step": 25265 }, { "epoch": 0.7374872318692544, "grad_norm": 0.9694402181154759, "learning_rate": 1.2959335549344657e-05, "loss": 0.5051, "step": 25270 }, { "epoch": 0.7376331533634904, "grad_norm": 1.2303331437095986, "learning_rate": 1.2951051463932207e-05, "loss": 0.5237, "step": 25275 }, { "epoch": 0.7377790748577265, "grad_norm": 0.9520640601815741, "learning_rate": 1.2942770766514167e-05, "loss": 0.5198, "step": 25280 }, { "epoch": 0.7379249963519626, "grad_norm": 0.9773029328247239, "learning_rate": 1.293449345901882e-05, "loss": 0.541, "step": 25285 }, { "epoch": 0.7380709178461987, "grad_norm": 1.2470725909333529, "learning_rate": 1.2926219543373718e-05, "loss": 0.5776, "step": 25290 }, { "epoch": 0.7382168393404348, "grad_norm": 1.141465175644101, "learning_rate": 1.2917949021505582e-05, "loss": 0.5972, "step": 25295 }, { "epoch": 0.7383627608346709, "grad_norm": 0.9609788573037573, "learning_rate": 1.2909681895340367e-05, "loss": 0.5649, "step": 25300 }, { "epoch": 0.7385086823289071, "grad_norm": 1.0364169636451106, "learning_rate": 1.2901418166803223e-05, "loss": 0.5828, "step": 25305 }, { "epoch": 0.7386546038231432, "grad_norm": 0.9538950234671885, "learning_rate": 1.2893157837818514e-05, "loss": 0.5583, "step": 25310 }, { "epoch": 0.7388005253173793, "grad_norm": 1.0643406247670861, "learning_rate": 1.2884900910309813e-05, "loss": 0.6018, "step": 25315 }, { "epoch": 0.7389464468116154, "grad_norm": 1.0262789379219452, "learning_rate": 1.2876647386199892e-05, "loss": 0.5783, "step": 25320 }, { "epoch": 0.7390923683058515, "grad_norm": 1.1026771368815542, "learning_rate": 1.286839726741075e-05, "loss": 0.5665, "step": 25325 }, { "epoch": 0.7392382898000875, "grad_norm": 0.924100406038911, "learning_rate": 1.2860150555863577e-05, "loss": 0.5723, "step": 25330 }, { "epoch": 0.7393842112943236, "grad_norm": 1.0483367425838177, "learning_rate": 1.2851907253478774e-05, "loss": 0.5323, "step": 25335 }, { "epoch": 0.7395301327885597, "grad_norm": 1.0042384594865397, "learning_rate": 1.2843667362175937e-05, "loss": 0.5512, "step": 25340 }, { "epoch": 0.7396760542827958, "grad_norm": 0.9384001947194012, "learning_rate": 1.2835430883873898e-05, "loss": 0.507, "step": 25345 }, { "epoch": 0.7398219757770319, "grad_norm": 1.0745238063295024, "learning_rate": 1.282719782049066e-05, "loss": 0.565, "step": 25350 }, { "epoch": 0.7399678972712681, "grad_norm": 1.090054603605742, "learning_rate": 1.2818968173943457e-05, "loss": 0.5802, "step": 25355 }, { "epoch": 0.7401138187655042, "grad_norm": 1.1182360724752936, "learning_rate": 1.2810741946148707e-05, "loss": 0.5232, "step": 25360 }, { "epoch": 0.7402597402597403, "grad_norm": 1.0473042664186767, "learning_rate": 1.2802519139022034e-05, "loss": 0.5216, "step": 25365 }, { "epoch": 0.7404056617539764, "grad_norm": 1.0216923113681062, "learning_rate": 1.279429975447829e-05, "loss": 0.5621, "step": 25370 }, { "epoch": 0.7405515832482125, "grad_norm": 1.092407081871658, "learning_rate": 1.2786083794431505e-05, "loss": 0.5767, "step": 25375 }, { "epoch": 0.7406975047424486, "grad_norm": 0.9090653668478144, "learning_rate": 1.2777871260794918e-05, "loss": 0.5293, "step": 25380 }, { "epoch": 0.7408434262366846, "grad_norm": 1.1172868093720418, "learning_rate": 1.2769662155480968e-05, "loss": 0.5669, "step": 25385 }, { "epoch": 0.7409893477309207, "grad_norm": 1.1706064875136633, "learning_rate": 1.2761456480401296e-05, "loss": 0.5388, "step": 25390 }, { "epoch": 0.7411352692251568, "grad_norm": 0.9929775739484752, "learning_rate": 1.275325423746676e-05, "loss": 0.5389, "step": 25395 }, { "epoch": 0.7412811907193929, "grad_norm": 1.3032373806818933, "learning_rate": 1.2745055428587393e-05, "loss": 0.565, "step": 25400 }, { "epoch": 0.7414271122136291, "grad_norm": 1.1310822533461034, "learning_rate": 1.2736860055672447e-05, "loss": 0.5667, "step": 25405 }, { "epoch": 0.7415730337078652, "grad_norm": 1.126759467635421, "learning_rate": 1.272866812063036e-05, "loss": 0.6085, "step": 25410 }, { "epoch": 0.7417189552021013, "grad_norm": 0.945681056225867, "learning_rate": 1.2720479625368786e-05, "loss": 0.5156, "step": 25415 }, { "epoch": 0.7418648766963374, "grad_norm": 1.2618692978413297, "learning_rate": 1.271229457179457e-05, "loss": 0.597, "step": 25420 }, { "epoch": 0.7420107981905735, "grad_norm": 1.043740837656037, "learning_rate": 1.2704112961813747e-05, "loss": 0.5383, "step": 25425 }, { "epoch": 0.7421567196848096, "grad_norm": 1.0465747646212846, "learning_rate": 1.269593479733156e-05, "loss": 0.5424, "step": 25430 }, { "epoch": 0.7423026411790457, "grad_norm": 0.8664855832434459, "learning_rate": 1.2687760080252443e-05, "loss": 0.4679, "step": 25435 }, { "epoch": 0.7424485626732817, "grad_norm": 0.8744408774993557, "learning_rate": 1.2679588812480043e-05, "loss": 0.5036, "step": 25440 }, { "epoch": 0.7425944841675178, "grad_norm": 1.031492824204485, "learning_rate": 1.2671420995917185e-05, "loss": 0.5321, "step": 25445 }, { "epoch": 0.7427404056617539, "grad_norm": 0.9338973390288998, "learning_rate": 1.2663256632465903e-05, "loss": 0.525, "step": 25450 }, { "epoch": 0.7428863271559901, "grad_norm": 1.2091478700003864, "learning_rate": 1.2655095724027414e-05, "loss": 0.5758, "step": 25455 }, { "epoch": 0.7430322486502262, "grad_norm": 1.1075708304779006, "learning_rate": 1.2646938272502134e-05, "loss": 0.5101, "step": 25460 }, { "epoch": 0.7431781701444623, "grad_norm": 1.0104157537561222, "learning_rate": 1.263878427978969e-05, "loss": 0.486, "step": 25465 }, { "epoch": 0.7433240916386984, "grad_norm": 0.9563619346991841, "learning_rate": 1.2630633747788891e-05, "loss": 0.4798, "step": 25470 }, { "epoch": 0.7434700131329345, "grad_norm": 1.118596412294096, "learning_rate": 1.2622486678397732e-05, "loss": 0.5559, "step": 25475 }, { "epoch": 0.7436159346271706, "grad_norm": 1.0867858788235167, "learning_rate": 1.2614343073513413e-05, "loss": 0.5971, "step": 25480 }, { "epoch": 0.7437618561214067, "grad_norm": 1.1108414145235843, "learning_rate": 1.260620293503233e-05, "loss": 0.4884, "step": 25485 }, { "epoch": 0.7439077776156428, "grad_norm": 1.0101355416992428, "learning_rate": 1.2598066264850061e-05, "loss": 0.5085, "step": 25490 }, { "epoch": 0.7440536991098788, "grad_norm": 1.1083711609452138, "learning_rate": 1.2589933064861382e-05, "loss": 0.5699, "step": 25495 }, { "epoch": 0.7441996206041149, "grad_norm": 1.0628758072733857, "learning_rate": 1.2581803336960262e-05, "loss": 0.5774, "step": 25500 }, { "epoch": 0.7443455420983511, "grad_norm": 0.9367883689633545, "learning_rate": 1.2573677083039856e-05, "loss": 0.5218, "step": 25505 }, { "epoch": 0.7444914635925872, "grad_norm": 1.2783136425482464, "learning_rate": 1.2565554304992522e-05, "loss": 0.6178, "step": 25510 }, { "epoch": 0.7446373850868233, "grad_norm": 0.9114066630977176, "learning_rate": 1.2557435004709796e-05, "loss": 0.5118, "step": 25515 }, { "epoch": 0.7447833065810594, "grad_norm": 0.8338505604524789, "learning_rate": 1.2549319184082408e-05, "loss": 0.5539, "step": 25520 }, { "epoch": 0.7449292280752955, "grad_norm": 0.9797838951828499, "learning_rate": 1.254120684500028e-05, "loss": 0.5078, "step": 25525 }, { "epoch": 0.7450751495695316, "grad_norm": 1.0069589685082077, "learning_rate": 1.2533097989352513e-05, "loss": 0.5356, "step": 25530 }, { "epoch": 0.7452210710637677, "grad_norm": 1.0403052166121876, "learning_rate": 1.252499261902742e-05, "loss": 0.5645, "step": 25535 }, { "epoch": 0.7453669925580038, "grad_norm": 1.0450398430405794, "learning_rate": 1.2516890735912477e-05, "loss": 0.5561, "step": 25540 }, { "epoch": 0.7455129140522399, "grad_norm": 0.9924120042741277, "learning_rate": 1.250879234189437e-05, "loss": 0.5514, "step": 25545 }, { "epoch": 0.745658835546476, "grad_norm": 0.9063258798785097, "learning_rate": 1.250069743885895e-05, "loss": 0.5632, "step": 25550 }, { "epoch": 0.7458047570407121, "grad_norm": 1.0469000263831814, "learning_rate": 1.249260602869127e-05, "loss": 0.5257, "step": 25555 }, { "epoch": 0.7459506785349482, "grad_norm": 0.9803628707261173, "learning_rate": 1.248451811327557e-05, "loss": 0.5637, "step": 25560 }, { "epoch": 0.7460966000291843, "grad_norm": 1.1460900109108125, "learning_rate": 1.2476433694495266e-05, "loss": 0.5822, "step": 25565 }, { "epoch": 0.7462425215234204, "grad_norm": 1.0089698183451101, "learning_rate": 1.246835277423297e-05, "loss": 0.5512, "step": 25570 }, { "epoch": 0.7463884430176565, "grad_norm": 0.9418966492405134, "learning_rate": 1.2460275354370467e-05, "loss": 0.6279, "step": 25575 }, { "epoch": 0.7465343645118926, "grad_norm": 1.075479539471517, "learning_rate": 1.2452201436788748e-05, "loss": 0.4726, "step": 25580 }, { "epoch": 0.7466802860061287, "grad_norm": 1.034681224530147, "learning_rate": 1.2444131023367962e-05, "loss": 0.5227, "step": 25585 }, { "epoch": 0.7468262075003648, "grad_norm": 1.0711723820905, "learning_rate": 1.243606411598748e-05, "loss": 0.5799, "step": 25590 }, { "epoch": 0.7469721289946009, "grad_norm": 1.0895783824566092, "learning_rate": 1.2428000716525799e-05, "loss": 0.5761, "step": 25595 }, { "epoch": 0.7471180504888371, "grad_norm": 0.9819716620730959, "learning_rate": 1.2419940826860655e-05, "loss": 0.5177, "step": 25600 }, { "epoch": 0.7472639719830731, "grad_norm": 0.8782455978579036, "learning_rate": 1.2411884448868937e-05, "loss": 0.5308, "step": 25605 }, { "epoch": 0.7474098934773092, "grad_norm": 1.1614015957066017, "learning_rate": 1.2403831584426716e-05, "loss": 0.539, "step": 25610 }, { "epoch": 0.7475558149715453, "grad_norm": 0.8748345954958823, "learning_rate": 1.2395782235409274e-05, "loss": 0.5738, "step": 25615 }, { "epoch": 0.7477017364657814, "grad_norm": 0.9710031945082617, "learning_rate": 1.2387736403691019e-05, "loss": 0.5729, "step": 25620 }, { "epoch": 0.7478476579600175, "grad_norm": 1.0450618853947413, "learning_rate": 1.2379694091145602e-05, "loss": 0.5896, "step": 25625 }, { "epoch": 0.7479935794542536, "grad_norm": 1.003233099959815, "learning_rate": 1.2371655299645815e-05, "loss": 0.4996, "step": 25630 }, { "epoch": 0.7481395009484897, "grad_norm": 0.9860045169036421, "learning_rate": 1.2363620031063638e-05, "loss": 0.5406, "step": 25635 }, { "epoch": 0.7482854224427258, "grad_norm": 1.1722039167321137, "learning_rate": 1.235558828727024e-05, "loss": 0.6086, "step": 25640 }, { "epoch": 0.7484313439369619, "grad_norm": 1.3099115426647863, "learning_rate": 1.2347560070135948e-05, "loss": 0.5371, "step": 25645 }, { "epoch": 0.7485772654311981, "grad_norm": 0.8645467297104173, "learning_rate": 1.2339535381530302e-05, "loss": 0.5392, "step": 25650 }, { "epoch": 0.7487231869254342, "grad_norm": 0.90651514980129, "learning_rate": 1.2331514223321985e-05, "loss": 0.496, "step": 25655 }, { "epoch": 0.7488691084196702, "grad_norm": 1.0756932529184244, "learning_rate": 1.2323496597378892e-05, "loss": 0.5699, "step": 25660 }, { "epoch": 0.7490150299139063, "grad_norm": 0.9666964534861574, "learning_rate": 1.2315482505568052e-05, "loss": 0.5679, "step": 25665 }, { "epoch": 0.7491609514081424, "grad_norm": 1.0970593532615356, "learning_rate": 1.2307471949755719e-05, "loss": 0.5727, "step": 25670 }, { "epoch": 0.7493068729023785, "grad_norm": 1.0683184475371457, "learning_rate": 1.2299464931807286e-05, "loss": 0.6314, "step": 25675 }, { "epoch": 0.7494527943966146, "grad_norm": 0.9568494891869519, "learning_rate": 1.2291461453587335e-05, "loss": 0.5228, "step": 25680 }, { "epoch": 0.7495987158908507, "grad_norm": 1.117201976766499, "learning_rate": 1.2283461516959648e-05, "loss": 0.5623, "step": 25685 }, { "epoch": 0.7497446373850868, "grad_norm": 0.9464249873501117, "learning_rate": 1.2275465123787128e-05, "loss": 0.5471, "step": 25690 }, { "epoch": 0.7498905588793229, "grad_norm": 0.9907759266612338, "learning_rate": 1.2267472275931905e-05, "loss": 0.4934, "step": 25695 }, { "epoch": 0.7500364803735591, "grad_norm": 0.9282906386658726, "learning_rate": 1.225948297525526e-05, "loss": 0.4944, "step": 25700 }, { "epoch": 0.7501824018677952, "grad_norm": 1.0302221469690025, "learning_rate": 1.225149722361764e-05, "loss": 0.5713, "step": 25705 }, { "epoch": 0.7503283233620313, "grad_norm": 0.9536677406356936, "learning_rate": 1.2243515022878699e-05, "loss": 0.4942, "step": 25710 }, { "epoch": 0.7504742448562673, "grad_norm": 1.122923194063469, "learning_rate": 1.2235536374897215e-05, "loss": 0.5914, "step": 25715 }, { "epoch": 0.7506201663505034, "grad_norm": 1.0219895713651888, "learning_rate": 1.2227561281531183e-05, "loss": 0.5612, "step": 25720 }, { "epoch": 0.7507660878447395, "grad_norm": 1.0491126926372052, "learning_rate": 1.221958974463774e-05, "loss": 0.593, "step": 25725 }, { "epoch": 0.7509120093389756, "grad_norm": 0.9893507325396295, "learning_rate": 1.2211621766073229e-05, "loss": 0.4979, "step": 25730 }, { "epoch": 0.7510579308332117, "grad_norm": 1.1725701414719172, "learning_rate": 1.2203657347693115e-05, "loss": 0.5798, "step": 25735 }, { "epoch": 0.7512038523274478, "grad_norm": 0.863698238264407, "learning_rate": 1.219569649135208e-05, "loss": 0.5066, "step": 25740 }, { "epoch": 0.7513497738216839, "grad_norm": 1.0188583985408846, "learning_rate": 1.2187739198903953e-05, "loss": 0.5638, "step": 25745 }, { "epoch": 0.7514956953159201, "grad_norm": 0.979049262163374, "learning_rate": 1.217978547220173e-05, "loss": 0.5013, "step": 25750 }, { "epoch": 0.7516416168101562, "grad_norm": 0.962044520822094, "learning_rate": 1.217183531309761e-05, "loss": 0.4915, "step": 25755 }, { "epoch": 0.7517875383043923, "grad_norm": 0.9815783493050788, "learning_rate": 1.21638887234429e-05, "loss": 0.5138, "step": 25760 }, { "epoch": 0.7519334597986284, "grad_norm": 1.098760217680226, "learning_rate": 1.215594570508814e-05, "loss": 0.5442, "step": 25765 }, { "epoch": 0.7520793812928644, "grad_norm": 1.1703149140345381, "learning_rate": 1.2148006259883001e-05, "loss": 0.6309, "step": 25770 }, { "epoch": 0.7522253027871005, "grad_norm": 0.9750847021164261, "learning_rate": 1.2140070389676322e-05, "loss": 0.565, "step": 25775 }, { "epoch": 0.7523712242813366, "grad_norm": 1.0192018431041778, "learning_rate": 1.2132138096316141e-05, "loss": 0.5489, "step": 25780 }, { "epoch": 0.7525171457755727, "grad_norm": 1.1034162032635708, "learning_rate": 1.2124209381649615e-05, "loss": 0.5704, "step": 25785 }, { "epoch": 0.7526630672698088, "grad_norm": 1.0089886523816638, "learning_rate": 1.2116284247523111e-05, "loss": 0.5292, "step": 25790 }, { "epoch": 0.7528089887640449, "grad_norm": 0.9485782122555003, "learning_rate": 1.2108362695782133e-05, "loss": 0.5778, "step": 25795 }, { "epoch": 0.7529549102582811, "grad_norm": 1.1223467935927802, "learning_rate": 1.2100444728271373e-05, "loss": 0.5411, "step": 25800 }, { "epoch": 0.7531008317525172, "grad_norm": 1.0289524117390865, "learning_rate": 1.2092530346834676e-05, "loss": 0.4867, "step": 25805 }, { "epoch": 0.7532467532467533, "grad_norm": 1.0984035194440613, "learning_rate": 1.2084619553315045e-05, "loss": 0.5269, "step": 25810 }, { "epoch": 0.7533926747409894, "grad_norm": 1.04075958371763, "learning_rate": 1.2076712349554665e-05, "loss": 0.5591, "step": 25815 }, { "epoch": 0.7535385962352255, "grad_norm": 0.9385249767136463, "learning_rate": 1.2068808737394865e-05, "loss": 0.5413, "step": 25820 }, { "epoch": 0.7536845177294615, "grad_norm": 0.9094181827674642, "learning_rate": 1.2060908718676168e-05, "loss": 0.4641, "step": 25825 }, { "epoch": 0.7538304392236976, "grad_norm": 1.164689036327355, "learning_rate": 1.2053012295238217e-05, "loss": 0.5803, "step": 25830 }, { "epoch": 0.7539763607179337, "grad_norm": 1.1286062858097257, "learning_rate": 1.204511946891986e-05, "loss": 0.56, "step": 25835 }, { "epoch": 0.7541222822121698, "grad_norm": 1.1097275886625522, "learning_rate": 1.2037230241559074e-05, "loss": 0.5715, "step": 25840 }, { "epoch": 0.7542682037064059, "grad_norm": 1.0564247130739828, "learning_rate": 1.202934461499303e-05, "loss": 0.5334, "step": 25845 }, { "epoch": 0.7544141252006421, "grad_norm": 0.9972303161086802, "learning_rate": 1.2021462591058037e-05, "loss": 0.5436, "step": 25850 }, { "epoch": 0.7545600466948782, "grad_norm": 1.169238260709802, "learning_rate": 1.2013584171589565e-05, "loss": 0.5476, "step": 25855 }, { "epoch": 0.7547059681891143, "grad_norm": 1.0615169711425816, "learning_rate": 1.2005709358422258e-05, "loss": 0.5662, "step": 25860 }, { "epoch": 0.7548518896833504, "grad_norm": 1.0080620948780274, "learning_rate": 1.1997838153389904e-05, "loss": 0.5513, "step": 25865 }, { "epoch": 0.7549978111775865, "grad_norm": 0.9875728082709048, "learning_rate": 1.1989970558325475e-05, "loss": 0.4737, "step": 25870 }, { "epoch": 0.7551437326718226, "grad_norm": 0.9693294617578385, "learning_rate": 1.1982106575061076e-05, "loss": 0.5388, "step": 25875 }, { "epoch": 0.7552896541660586, "grad_norm": 0.987254170263494, "learning_rate": 1.1974246205427989e-05, "loss": 0.529, "step": 25880 }, { "epoch": 0.7554355756602947, "grad_norm": 1.093244516130019, "learning_rate": 1.1966389451256644e-05, "loss": 0.5386, "step": 25885 }, { "epoch": 0.7555814971545308, "grad_norm": 1.0410642537186547, "learning_rate": 1.1958536314376625e-05, "loss": 0.5726, "step": 25890 }, { "epoch": 0.7557274186487669, "grad_norm": 0.9468226472007348, "learning_rate": 1.1950686796616698e-05, "loss": 0.5276, "step": 25895 }, { "epoch": 0.7558733401430031, "grad_norm": 1.162583147170506, "learning_rate": 1.1942840899804763e-05, "loss": 0.5885, "step": 25900 }, { "epoch": 0.7560192616372392, "grad_norm": 0.9937293801527998, "learning_rate": 1.1934998625767882e-05, "loss": 0.5107, "step": 25905 }, { "epoch": 0.7561651831314753, "grad_norm": 1.035658591836072, "learning_rate": 1.192715997633227e-05, "loss": 0.5818, "step": 25910 }, { "epoch": 0.7563111046257114, "grad_norm": 1.012141341232472, "learning_rate": 1.1919324953323312e-05, "loss": 0.5705, "step": 25915 }, { "epoch": 0.7564570261199475, "grad_norm": 0.9660426062870733, "learning_rate": 1.191149355856554e-05, "loss": 0.4958, "step": 25920 }, { "epoch": 0.7566029476141836, "grad_norm": 0.994359619903572, "learning_rate": 1.1903665793882633e-05, "loss": 0.6293, "step": 25925 }, { "epoch": 0.7567488691084197, "grad_norm": 1.2261718997537736, "learning_rate": 1.1895841661097438e-05, "loss": 0.5709, "step": 25930 }, { "epoch": 0.7568947906026557, "grad_norm": 0.9353713565401006, "learning_rate": 1.188802116203194e-05, "loss": 0.5279, "step": 25935 }, { "epoch": 0.7570407120968918, "grad_norm": 1.039585047909423, "learning_rate": 1.1880204298507305e-05, "loss": 0.5527, "step": 25940 }, { "epoch": 0.7571866335911279, "grad_norm": 1.0287603599490898, "learning_rate": 1.1872391072343824e-05, "loss": 0.5212, "step": 25945 }, { "epoch": 0.7573325550853641, "grad_norm": 1.0005440195775186, "learning_rate": 1.186458148536096e-05, "loss": 0.5194, "step": 25950 }, { "epoch": 0.7574784765796002, "grad_norm": 0.996583335606949, "learning_rate": 1.1856775539377316e-05, "loss": 0.5035, "step": 25955 }, { "epoch": 0.7576243980738363, "grad_norm": 1.1174549431129843, "learning_rate": 1.1848973236210644e-05, "loss": 0.5421, "step": 25960 }, { "epoch": 0.7577703195680724, "grad_norm": 1.019774627318254, "learning_rate": 1.1841174577677873e-05, "loss": 0.5255, "step": 25965 }, { "epoch": 0.7579162410623085, "grad_norm": 0.9448878674420801, "learning_rate": 1.1833379565595058e-05, "loss": 0.4771, "step": 25970 }, { "epoch": 0.7580621625565446, "grad_norm": 1.1242599847384511, "learning_rate": 1.1825588201777413e-05, "loss": 0.5653, "step": 25975 }, { "epoch": 0.7582080840507807, "grad_norm": 1.4572133065036537, "learning_rate": 1.1817800488039297e-05, "loss": 0.5845, "step": 25980 }, { "epoch": 0.7583540055450168, "grad_norm": 1.0459839130174469, "learning_rate": 1.181001642619424e-05, "loss": 0.5509, "step": 25985 }, { "epoch": 0.7584999270392528, "grad_norm": 1.1006205740381068, "learning_rate": 1.1802236018054896e-05, "loss": 0.5806, "step": 25990 }, { "epoch": 0.7586458485334889, "grad_norm": 1.0035135232840215, "learning_rate": 1.179445926543308e-05, "loss": 0.519, "step": 25995 }, { "epoch": 0.7587917700277251, "grad_norm": 0.8542348077225248, "learning_rate": 1.1786686170139753e-05, "loss": 0.5104, "step": 26000 }, { "epoch": 0.7589376915219612, "grad_norm": 1.1252292451347838, "learning_rate": 1.1778916733985017e-05, "loss": 0.5049, "step": 26005 }, { "epoch": 0.7590836130161973, "grad_norm": 0.869429282723768, "learning_rate": 1.1771150958778147e-05, "loss": 0.5532, "step": 26010 }, { "epoch": 0.7592295345104334, "grad_norm": 0.9781650608313145, "learning_rate": 1.1763388846327542e-05, "loss": 0.5441, "step": 26015 }, { "epoch": 0.7593754560046695, "grad_norm": 1.006431306882641, "learning_rate": 1.1755630398440753e-05, "loss": 0.5183, "step": 26020 }, { "epoch": 0.7595213774989056, "grad_norm": 0.957305906675393, "learning_rate": 1.1747875616924482e-05, "loss": 0.5426, "step": 26025 }, { "epoch": 0.7596672989931417, "grad_norm": 1.208702423084591, "learning_rate": 1.1740124503584567e-05, "loss": 0.5521, "step": 26030 }, { "epoch": 0.7598132204873778, "grad_norm": 0.9551373867755021, "learning_rate": 1.173237706022601e-05, "loss": 0.5069, "step": 26035 }, { "epoch": 0.7599591419816139, "grad_norm": 1.0468417229080547, "learning_rate": 1.1724633288652941e-05, "loss": 0.537, "step": 26040 }, { "epoch": 0.7601050634758499, "grad_norm": 1.069712247237243, "learning_rate": 1.1716893190668648e-05, "loss": 0.5907, "step": 26045 }, { "epoch": 0.7602509849700861, "grad_norm": 1.119615581164806, "learning_rate": 1.1709156768075547e-05, "loss": 0.5598, "step": 26050 }, { "epoch": 0.7603969064643222, "grad_norm": 1.2004525093680338, "learning_rate": 1.1701424022675223e-05, "loss": 0.5149, "step": 26055 }, { "epoch": 0.7605428279585583, "grad_norm": 1.254879113452933, "learning_rate": 1.1693694956268381e-05, "loss": 0.5606, "step": 26060 }, { "epoch": 0.7606887494527944, "grad_norm": 1.2463059205487388, "learning_rate": 1.1685969570654878e-05, "loss": 0.5539, "step": 26065 }, { "epoch": 0.7608346709470305, "grad_norm": 0.9331520088335304, "learning_rate": 1.1678247867633718e-05, "loss": 0.5122, "step": 26070 }, { "epoch": 0.7609805924412666, "grad_norm": 0.9776996854919341, "learning_rate": 1.1670529849003036e-05, "loss": 0.533, "step": 26075 }, { "epoch": 0.7611265139355027, "grad_norm": 1.1555457217301646, "learning_rate": 1.1662815516560128e-05, "loss": 0.5441, "step": 26080 }, { "epoch": 0.7612724354297388, "grad_norm": 1.0706136107791275, "learning_rate": 1.1655104872101415e-05, "loss": 0.5485, "step": 26085 }, { "epoch": 0.7614183569239749, "grad_norm": 1.2403313969508123, "learning_rate": 1.1647397917422464e-05, "loss": 0.601, "step": 26090 }, { "epoch": 0.761564278418211, "grad_norm": 1.1240766869317989, "learning_rate": 1.163969465431798e-05, "loss": 0.5524, "step": 26095 }, { "epoch": 0.7617101999124472, "grad_norm": 1.0278855921577061, "learning_rate": 1.163199508458182e-05, "loss": 0.6085, "step": 26100 }, { "epoch": 0.7618561214066832, "grad_norm": 0.8901685700546406, "learning_rate": 1.162429921000697e-05, "loss": 0.5121, "step": 26105 }, { "epoch": 0.7620020429009193, "grad_norm": 1.01239652661771, "learning_rate": 1.1616607032385555e-05, "loss": 0.5829, "step": 26110 }, { "epoch": 0.7621479643951554, "grad_norm": 1.0578567172497082, "learning_rate": 1.1608918553508844e-05, "loss": 0.5329, "step": 26115 }, { "epoch": 0.7622938858893915, "grad_norm": 0.9941253904157246, "learning_rate": 1.1601233775167235e-05, "loss": 0.5112, "step": 26120 }, { "epoch": 0.7624398073836276, "grad_norm": 1.0013257836423846, "learning_rate": 1.1593552699150287e-05, "loss": 0.5214, "step": 26125 }, { "epoch": 0.7625857288778637, "grad_norm": 1.121875655198004, "learning_rate": 1.1585875327246677e-05, "loss": 0.5291, "step": 26130 }, { "epoch": 0.7627316503720998, "grad_norm": 1.107408305727277, "learning_rate": 1.157820166124422e-05, "loss": 0.5906, "step": 26135 }, { "epoch": 0.7628775718663359, "grad_norm": 1.0897101848323532, "learning_rate": 1.1570531702929873e-05, "loss": 0.5466, "step": 26140 }, { "epoch": 0.763023493360572, "grad_norm": 1.0599471421470619, "learning_rate": 1.1562865454089728e-05, "loss": 0.5745, "step": 26145 }, { "epoch": 0.7631694148548082, "grad_norm": 1.0288093740130015, "learning_rate": 1.1555202916509022e-05, "loss": 0.6144, "step": 26150 }, { "epoch": 0.7633153363490442, "grad_norm": 0.9816796522952747, "learning_rate": 1.1547544091972107e-05, "loss": 0.5788, "step": 26155 }, { "epoch": 0.7634612578432803, "grad_norm": 0.9014640350577715, "learning_rate": 1.1539888982262504e-05, "loss": 0.5244, "step": 26160 }, { "epoch": 0.7636071793375164, "grad_norm": 0.9801727100503064, "learning_rate": 1.1532237589162824e-05, "loss": 0.497, "step": 26165 }, { "epoch": 0.7637531008317525, "grad_norm": 1.1378514682531211, "learning_rate": 1.1524589914454855e-05, "loss": 0.5681, "step": 26170 }, { "epoch": 0.7638990223259886, "grad_norm": 0.9751828940230503, "learning_rate": 1.1516945959919494e-05, "loss": 0.5117, "step": 26175 }, { "epoch": 0.7640449438202247, "grad_norm": 0.9534722582594792, "learning_rate": 1.1509305727336779e-05, "loss": 0.5297, "step": 26180 }, { "epoch": 0.7641908653144608, "grad_norm": 1.0229250415906461, "learning_rate": 1.150166921848588e-05, "loss": 0.5521, "step": 26185 }, { "epoch": 0.7643367868086969, "grad_norm": 1.1939338833197222, "learning_rate": 1.1494036435145095e-05, "loss": 0.5367, "step": 26190 }, { "epoch": 0.764482708302933, "grad_norm": 1.0443782898588452, "learning_rate": 1.148640737909187e-05, "loss": 0.4888, "step": 26195 }, { "epoch": 0.7646286297971692, "grad_norm": 1.029712909853686, "learning_rate": 1.1478782052102775e-05, "loss": 0.4944, "step": 26200 }, { "epoch": 0.7647745512914053, "grad_norm": 1.0299434999522206, "learning_rate": 1.1471160455953502e-05, "loss": 0.5602, "step": 26205 }, { "epoch": 0.7649204727856413, "grad_norm": 1.0625170889149527, "learning_rate": 1.1463542592418885e-05, "loss": 0.4982, "step": 26210 }, { "epoch": 0.7650663942798774, "grad_norm": 1.0761779506451865, "learning_rate": 1.1455928463272878e-05, "loss": 0.5888, "step": 26215 }, { "epoch": 0.7652123157741135, "grad_norm": 1.260766552199907, "learning_rate": 1.1448318070288587e-05, "loss": 0.5737, "step": 26220 }, { "epoch": 0.7653582372683496, "grad_norm": 1.0044722474088226, "learning_rate": 1.1440711415238221e-05, "loss": 0.5736, "step": 26225 }, { "epoch": 0.7655041587625857, "grad_norm": 1.1677107519312429, "learning_rate": 1.143310849989315e-05, "loss": 0.537, "step": 26230 }, { "epoch": 0.7656500802568218, "grad_norm": 0.9714881172532713, "learning_rate": 1.1425509326023832e-05, "loss": 0.5277, "step": 26235 }, { "epoch": 0.7657960017510579, "grad_norm": 0.9536014941152121, "learning_rate": 1.1417913895399896e-05, "loss": 0.5519, "step": 26240 }, { "epoch": 0.765941923245294, "grad_norm": 1.1474721367891554, "learning_rate": 1.141032220979007e-05, "loss": 0.5302, "step": 26245 }, { "epoch": 0.7660878447395302, "grad_norm": 1.311854894178345, "learning_rate": 1.1402734270962213e-05, "loss": 0.5385, "step": 26250 }, { "epoch": 0.7662337662337663, "grad_norm": 1.0170296395474896, "learning_rate": 1.1395150080683341e-05, "loss": 0.5164, "step": 26255 }, { "epoch": 0.7663796877280024, "grad_norm": 1.0142818028958265, "learning_rate": 1.1387569640719546e-05, "loss": 0.52, "step": 26260 }, { "epoch": 0.7665256092222384, "grad_norm": 1.087516983543965, "learning_rate": 1.1379992952836096e-05, "loss": 0.5252, "step": 26265 }, { "epoch": 0.7666715307164745, "grad_norm": 1.0685628522153694, "learning_rate": 1.1372420018797351e-05, "loss": 0.6054, "step": 26270 }, { "epoch": 0.7668174522107106, "grad_norm": 0.9459058502166803, "learning_rate": 1.136485084036683e-05, "loss": 0.5564, "step": 26275 }, { "epoch": 0.7669633737049467, "grad_norm": 1.0297359623145743, "learning_rate": 1.1357285419307131e-05, "loss": 0.5403, "step": 26280 }, { "epoch": 0.7671092951991828, "grad_norm": 0.9749961572036202, "learning_rate": 1.1349723757380024e-05, "loss": 0.549, "step": 26285 }, { "epoch": 0.7672552166934189, "grad_norm": 0.9622158450289131, "learning_rate": 1.1342165856346373e-05, "loss": 0.5226, "step": 26290 }, { "epoch": 0.7674011381876551, "grad_norm": 1.007275351717186, "learning_rate": 1.1334611717966173e-05, "loss": 0.5305, "step": 26295 }, { "epoch": 0.7675470596818912, "grad_norm": 0.9833263425629261, "learning_rate": 1.132706134399857e-05, "loss": 0.5031, "step": 26300 }, { "epoch": 0.7676929811761273, "grad_norm": 0.964950547844251, "learning_rate": 1.1319514736201773e-05, "loss": 0.5396, "step": 26305 }, { "epoch": 0.7678389026703634, "grad_norm": 1.0440341375740938, "learning_rate": 1.1311971896333179e-05, "loss": 0.5559, "step": 26310 }, { "epoch": 0.7679848241645995, "grad_norm": 0.986377169722253, "learning_rate": 1.130443282614927e-05, "loss": 0.5478, "step": 26315 }, { "epoch": 0.7681307456588355, "grad_norm": 1.036366520073748, "learning_rate": 1.1296897527405651e-05, "loss": 0.549, "step": 26320 }, { "epoch": 0.7682766671530716, "grad_norm": 1.0396657355979717, "learning_rate": 1.1289366001857082e-05, "loss": 0.584, "step": 26325 }, { "epoch": 0.7684225886473077, "grad_norm": 1.1659084248424367, "learning_rate": 1.1281838251257384e-05, "loss": 0.5504, "step": 26330 }, { "epoch": 0.7685685101415438, "grad_norm": 1.0224428467325715, "learning_rate": 1.1274314277359563e-05, "loss": 0.5653, "step": 26335 }, { "epoch": 0.7687144316357799, "grad_norm": 1.0821596532302247, "learning_rate": 1.12667940819157e-05, "loss": 0.5621, "step": 26340 }, { "epoch": 0.7688603531300161, "grad_norm": 0.9100038179071127, "learning_rate": 1.1259277666677023e-05, "loss": 0.5412, "step": 26345 }, { "epoch": 0.7690062746242522, "grad_norm": 1.0237176826796293, "learning_rate": 1.1251765033393868e-05, "loss": 0.4852, "step": 26350 }, { "epoch": 0.7691521961184883, "grad_norm": 0.927511991898227, "learning_rate": 1.124425618381569e-05, "loss": 0.5065, "step": 26355 }, { "epoch": 0.7692981176127244, "grad_norm": 1.0127450452466895, "learning_rate": 1.1236751119691066e-05, "loss": 0.5419, "step": 26360 }, { "epoch": 0.7694440391069605, "grad_norm": 1.0868699431838862, "learning_rate": 1.1229249842767684e-05, "loss": 0.5012, "step": 26365 }, { "epoch": 0.7695899606011966, "grad_norm": 0.854644570271715, "learning_rate": 1.1221752354792373e-05, "loss": 0.5081, "step": 26370 }, { "epoch": 0.7697358820954326, "grad_norm": 0.975006809185287, "learning_rate": 1.1214258657511037e-05, "loss": 0.511, "step": 26375 }, { "epoch": 0.7698818035896687, "grad_norm": 1.2293865477658465, "learning_rate": 1.1206768752668747e-05, "loss": 0.646, "step": 26380 }, { "epoch": 0.7700277250839048, "grad_norm": 1.0038895467301465, "learning_rate": 1.119928264200966e-05, "loss": 0.5184, "step": 26385 }, { "epoch": 0.7701736465781409, "grad_norm": 0.9551484166198256, "learning_rate": 1.1191800327277046e-05, "loss": 0.5414, "step": 26390 }, { "epoch": 0.7703195680723771, "grad_norm": 0.9301715407148176, "learning_rate": 1.1184321810213326e-05, "loss": 0.5365, "step": 26395 }, { "epoch": 0.7704654895666132, "grad_norm": 0.9191611509869474, "learning_rate": 1.1176847092559986e-05, "loss": 0.4363, "step": 26400 }, { "epoch": 0.7706114110608493, "grad_norm": 0.9442743898599173, "learning_rate": 1.1169376176057666e-05, "loss": 0.5614, "step": 26405 }, { "epoch": 0.7707573325550854, "grad_norm": 1.0637324804971586, "learning_rate": 1.1161909062446105e-05, "loss": 0.5302, "step": 26410 }, { "epoch": 0.7709032540493215, "grad_norm": 1.259719803526256, "learning_rate": 1.1154445753464168e-05, "loss": 0.602, "step": 26415 }, { "epoch": 0.7710491755435576, "grad_norm": 1.0799875321457637, "learning_rate": 1.1146986250849819e-05, "loss": 0.5964, "step": 26420 }, { "epoch": 0.7711950970377937, "grad_norm": 1.1335312158770563, "learning_rate": 1.1139530556340147e-05, "loss": 0.5964, "step": 26425 }, { "epoch": 0.7713410185320297, "grad_norm": 1.0749432291433856, "learning_rate": 1.1132078671671345e-05, "loss": 0.5592, "step": 26430 }, { "epoch": 0.7714869400262658, "grad_norm": 1.0907066776683048, "learning_rate": 1.1124630598578716e-05, "loss": 0.5381, "step": 26435 }, { "epoch": 0.7716328615205019, "grad_norm": 1.0298183424681384, "learning_rate": 1.1117186338796707e-05, "loss": 0.5272, "step": 26440 }, { "epoch": 0.7717787830147381, "grad_norm": 0.8572802119525016, "learning_rate": 1.1109745894058825e-05, "loss": 0.5091, "step": 26445 }, { "epoch": 0.7719247045089742, "grad_norm": 1.0378810935211007, "learning_rate": 1.1102309266097735e-05, "loss": 0.5422, "step": 26450 }, { "epoch": 0.7720706260032103, "grad_norm": 1.1183538579459082, "learning_rate": 1.109487645664519e-05, "loss": 0.5475, "step": 26455 }, { "epoch": 0.7722165474974464, "grad_norm": 1.008846668051106, "learning_rate": 1.1087447467432047e-05, "loss": 0.4789, "step": 26460 }, { "epoch": 0.7723624689916825, "grad_norm": 0.9548686984444584, "learning_rate": 1.1080022300188313e-05, "loss": 0.509, "step": 26465 }, { "epoch": 0.7725083904859186, "grad_norm": 1.1319156145216078, "learning_rate": 1.1072600956643046e-05, "loss": 0.5573, "step": 26470 }, { "epoch": 0.7726543119801547, "grad_norm": 1.068148796691251, "learning_rate": 1.1065183438524461e-05, "loss": 0.5305, "step": 26475 }, { "epoch": 0.7728002334743908, "grad_norm": 0.9431366109806484, "learning_rate": 1.1057769747559859e-05, "loss": 0.4844, "step": 26480 }, { "epoch": 0.7729461549686268, "grad_norm": 1.0910037856309902, "learning_rate": 1.1050359885475663e-05, "loss": 0.5952, "step": 26485 }, { "epoch": 0.7730920764628629, "grad_norm": 0.900522554912647, "learning_rate": 1.1042953853997398e-05, "loss": 0.5109, "step": 26490 }, { "epoch": 0.7732379979570991, "grad_norm": 1.192841715582392, "learning_rate": 1.103555165484969e-05, "loss": 0.5572, "step": 26495 }, { "epoch": 0.7733839194513352, "grad_norm": 1.239785568993468, "learning_rate": 1.1028153289756287e-05, "loss": 0.5923, "step": 26500 }, { "epoch": 0.7735298409455713, "grad_norm": 1.1439078975637549, "learning_rate": 1.1020758760440023e-05, "loss": 0.5617, "step": 26505 }, { "epoch": 0.7736757624398074, "grad_norm": 0.8816858779540978, "learning_rate": 1.1013368068622865e-05, "loss": 0.5054, "step": 26510 }, { "epoch": 0.7738216839340435, "grad_norm": 0.9004464248885659, "learning_rate": 1.1005981216025874e-05, "loss": 0.5363, "step": 26515 }, { "epoch": 0.7739676054282796, "grad_norm": 1.0716891887745637, "learning_rate": 1.0998598204369212e-05, "loss": 0.58, "step": 26520 }, { "epoch": 0.7741135269225157, "grad_norm": 1.2528211726648548, "learning_rate": 1.0991219035372144e-05, "loss": 0.5856, "step": 26525 }, { "epoch": 0.7742594484167518, "grad_norm": 0.9244889707258868, "learning_rate": 1.0983843710753062e-05, "loss": 0.5167, "step": 26530 }, { "epoch": 0.7744053699109879, "grad_norm": 1.001764835583752, "learning_rate": 1.097647223222944e-05, "loss": 0.544, "step": 26535 }, { "epoch": 0.7745512914052239, "grad_norm": 1.0178996596269738, "learning_rate": 1.0969104601517868e-05, "loss": 0.5406, "step": 26540 }, { "epoch": 0.7746972128994601, "grad_norm": 0.914968039839898, "learning_rate": 1.0961740820334032e-05, "loss": 0.4929, "step": 26545 }, { "epoch": 0.7748431343936962, "grad_norm": 1.0325972459841566, "learning_rate": 1.0954380890392721e-05, "loss": 0.5273, "step": 26550 }, { "epoch": 0.7749890558879323, "grad_norm": 1.0543724376917063, "learning_rate": 1.0947024813407848e-05, "loss": 0.5606, "step": 26555 }, { "epoch": 0.7751349773821684, "grad_norm": 1.0691947109121767, "learning_rate": 1.0939672591092404e-05, "loss": 0.5347, "step": 26560 }, { "epoch": 0.7752808988764045, "grad_norm": 1.067036295985987, "learning_rate": 1.093232422515849e-05, "loss": 0.5436, "step": 26565 }, { "epoch": 0.7754268203706406, "grad_norm": 0.9275120333427185, "learning_rate": 1.0924979717317313e-05, "loss": 0.4744, "step": 26570 }, { "epoch": 0.7755727418648767, "grad_norm": 1.0834631290577457, "learning_rate": 1.0917639069279166e-05, "loss": 0.5198, "step": 26575 }, { "epoch": 0.7757186633591128, "grad_norm": 0.9495215822316148, "learning_rate": 1.0910302282753477e-05, "loss": 0.5175, "step": 26580 }, { "epoch": 0.7758645848533489, "grad_norm": 0.9463328960838919, "learning_rate": 1.0902969359448747e-05, "loss": 0.4925, "step": 26585 }, { "epoch": 0.776010506347585, "grad_norm": 0.9926699150133391, "learning_rate": 1.089564030107258e-05, "loss": 0.5342, "step": 26590 }, { "epoch": 0.7761564278418212, "grad_norm": 1.100510166667213, "learning_rate": 1.0888315109331675e-05, "loss": 0.6052, "step": 26595 }, { "epoch": 0.7763023493360572, "grad_norm": 1.1930276926883863, "learning_rate": 1.0880993785931859e-05, "loss": 0.529, "step": 26600 }, { "epoch": 0.7764482708302933, "grad_norm": 0.9741370662357622, "learning_rate": 1.087367633257803e-05, "loss": 0.4951, "step": 26605 }, { "epoch": 0.7765941923245294, "grad_norm": 1.0116020218370523, "learning_rate": 1.0866362750974194e-05, "loss": 0.4975, "step": 26610 }, { "epoch": 0.7767401138187655, "grad_norm": 1.0023964323771666, "learning_rate": 1.0859053042823455e-05, "loss": 0.5178, "step": 26615 }, { "epoch": 0.7768860353130016, "grad_norm": 1.1927319121120095, "learning_rate": 1.0851747209828008e-05, "loss": 0.5681, "step": 26620 }, { "epoch": 0.7770319568072377, "grad_norm": 0.8799901206906776, "learning_rate": 1.0844445253689167e-05, "loss": 0.6037, "step": 26625 }, { "epoch": 0.7771778783014738, "grad_norm": 0.8560348918531231, "learning_rate": 1.0837147176107322e-05, "loss": 0.5216, "step": 26630 }, { "epoch": 0.7773237997957099, "grad_norm": 1.0970734945825442, "learning_rate": 1.0829852978781964e-05, "loss": 0.5243, "step": 26635 }, { "epoch": 0.777469721289946, "grad_norm": 1.0071171015594016, "learning_rate": 1.0822562663411686e-05, "loss": 0.5847, "step": 26640 }, { "epoch": 0.7776156427841822, "grad_norm": 0.9979222514006434, "learning_rate": 1.081527623169417e-05, "loss": 0.5273, "step": 26645 }, { "epoch": 0.7777615642784182, "grad_norm": 0.9230597292966153, "learning_rate": 1.08079936853262e-05, "loss": 0.5078, "step": 26650 }, { "epoch": 0.7779074857726543, "grad_norm": 1.1459497111705466, "learning_rate": 1.080071502600366e-05, "loss": 0.5336, "step": 26655 }, { "epoch": 0.7780534072668904, "grad_norm": 1.0735928601569844, "learning_rate": 1.0793440255421511e-05, "loss": 0.6352, "step": 26660 }, { "epoch": 0.7781993287611265, "grad_norm": 1.017195495499888, "learning_rate": 1.0786169375273821e-05, "loss": 0.5073, "step": 26665 }, { "epoch": 0.7783452502553626, "grad_norm": 0.9589371604026143, "learning_rate": 1.0778902387253758e-05, "loss": 0.5338, "step": 26670 }, { "epoch": 0.7784911717495987, "grad_norm": 1.0093054678252185, "learning_rate": 1.0771639293053571e-05, "loss": 0.5451, "step": 26675 }, { "epoch": 0.7786370932438348, "grad_norm": 0.9894169640693417, "learning_rate": 1.0764380094364607e-05, "loss": 0.519, "step": 26680 }, { "epoch": 0.7787830147380709, "grad_norm": 0.851796298818518, "learning_rate": 1.0757124792877306e-05, "loss": 0.4957, "step": 26685 }, { "epoch": 0.778928936232307, "grad_norm": 1.1924757451502195, "learning_rate": 1.0749873390281194e-05, "loss": 0.5593, "step": 26690 }, { "epoch": 0.7790748577265432, "grad_norm": 0.9872430020850951, "learning_rate": 1.074262588826491e-05, "loss": 0.5453, "step": 26695 }, { "epoch": 0.7792207792207793, "grad_norm": 0.9859896595323326, "learning_rate": 1.0735382288516155e-05, "loss": 0.5708, "step": 26700 }, { "epoch": 0.7793667007150153, "grad_norm": 1.0298045916555123, "learning_rate": 1.0728142592721757e-05, "loss": 0.5457, "step": 26705 }, { "epoch": 0.7795126222092514, "grad_norm": 0.9858766725513713, "learning_rate": 1.0720906802567593e-05, "loss": 0.5481, "step": 26710 }, { "epoch": 0.7796585437034875, "grad_norm": 0.9357213052473992, "learning_rate": 1.0713674919738655e-05, "loss": 0.5441, "step": 26715 }, { "epoch": 0.7798044651977236, "grad_norm": 1.1807413164539537, "learning_rate": 1.0706446945919033e-05, "loss": 0.5438, "step": 26720 }, { "epoch": 0.7799503866919597, "grad_norm": 1.2879374593889028, "learning_rate": 1.069922288279189e-05, "loss": 0.6062, "step": 26725 }, { "epoch": 0.7800963081861958, "grad_norm": 0.9065153805960428, "learning_rate": 1.0692002732039482e-05, "loss": 0.4991, "step": 26730 }, { "epoch": 0.7802422296804319, "grad_norm": 1.0029375533661282, "learning_rate": 1.0684786495343155e-05, "loss": 0.5325, "step": 26735 }, { "epoch": 0.780388151174668, "grad_norm": 0.9139557501888654, "learning_rate": 1.067757417438335e-05, "loss": 0.5361, "step": 26740 }, { "epoch": 0.7805340726689042, "grad_norm": 0.9690206571977329, "learning_rate": 1.067036577083959e-05, "loss": 0.5339, "step": 26745 }, { "epoch": 0.7806799941631403, "grad_norm": 1.0111053450051546, "learning_rate": 1.0663161286390482e-05, "loss": 0.4814, "step": 26750 }, { "epoch": 0.7808259156573764, "grad_norm": 1.0294714716851727, "learning_rate": 1.0655960722713728e-05, "loss": 0.5024, "step": 26755 }, { "epoch": 0.7809718371516124, "grad_norm": 1.1111785810328374, "learning_rate": 1.064876408148611e-05, "loss": 0.5266, "step": 26760 }, { "epoch": 0.7811177586458485, "grad_norm": 1.068480231956318, "learning_rate": 1.0641571364383503e-05, "loss": 0.5479, "step": 26765 }, { "epoch": 0.7812636801400846, "grad_norm": 1.0774347239121638, "learning_rate": 1.0634382573080862e-05, "loss": 0.5834, "step": 26770 }, { "epoch": 0.7814096016343207, "grad_norm": 1.0192380970542516, "learning_rate": 1.0627197709252248e-05, "loss": 0.4717, "step": 26775 }, { "epoch": 0.7815555231285568, "grad_norm": 1.0218460545560653, "learning_rate": 1.0620016774570765e-05, "loss": 0.5378, "step": 26780 }, { "epoch": 0.7817014446227929, "grad_norm": 0.9668280484456119, "learning_rate": 1.0612839770708646e-05, "loss": 0.5762, "step": 26785 }, { "epoch": 0.781847366117029, "grad_norm": 1.00963578638126, "learning_rate": 1.0605666699337186e-05, "loss": 0.5227, "step": 26790 }, { "epoch": 0.7819932876112652, "grad_norm": 1.1287921193671648, "learning_rate": 1.0598497562126767e-05, "loss": 0.5826, "step": 26795 }, { "epoch": 0.7821392091055013, "grad_norm": 1.0768528026817945, "learning_rate": 1.0591332360746855e-05, "loss": 0.5147, "step": 26800 }, { "epoch": 0.7822851305997374, "grad_norm": 0.960805922525826, "learning_rate": 1.0584171096865997e-05, "loss": 0.4767, "step": 26805 }, { "epoch": 0.7824310520939735, "grad_norm": 1.0255582550356812, "learning_rate": 1.057701377215184e-05, "loss": 0.5375, "step": 26810 }, { "epoch": 0.7825769735882095, "grad_norm": 1.1403794243365977, "learning_rate": 1.0569860388271095e-05, "loss": 0.5367, "step": 26815 }, { "epoch": 0.7827228950824456, "grad_norm": 0.9387852519692074, "learning_rate": 1.0562710946889557e-05, "loss": 0.5077, "step": 26820 }, { "epoch": 0.7828688165766817, "grad_norm": 1.0794088465977334, "learning_rate": 1.0555565449672111e-05, "loss": 0.5101, "step": 26825 }, { "epoch": 0.7830147380709178, "grad_norm": 0.9323215720950511, "learning_rate": 1.0548423898282711e-05, "loss": 0.5012, "step": 26830 }, { "epoch": 0.7831606595651539, "grad_norm": 0.9817844452175546, "learning_rate": 1.0541286294384414e-05, "loss": 0.5471, "step": 26835 }, { "epoch": 0.78330658105939, "grad_norm": 0.97035189562513, "learning_rate": 1.0534152639639333e-05, "loss": 0.5537, "step": 26840 }, { "epoch": 0.7834525025536262, "grad_norm": 1.0151783766404865, "learning_rate": 1.0527022935708692e-05, "loss": 0.5669, "step": 26845 }, { "epoch": 0.7835984240478623, "grad_norm": 1.151043286721633, "learning_rate": 1.0519897184252745e-05, "loss": 0.5649, "step": 26850 }, { "epoch": 0.7837443455420984, "grad_norm": 1.0827802907639867, "learning_rate": 1.051277538693088e-05, "loss": 0.4871, "step": 26855 }, { "epoch": 0.7838902670363345, "grad_norm": 0.9723327917001047, "learning_rate": 1.0505657545401534e-05, "loss": 0.5535, "step": 26860 }, { "epoch": 0.7840361885305706, "grad_norm": 0.9648375760637035, "learning_rate": 1.0498543661322224e-05, "loss": 0.5363, "step": 26865 }, { "epoch": 0.7841821100248066, "grad_norm": 1.0416308165603414, "learning_rate": 1.0491433736349566e-05, "loss": 0.5554, "step": 26870 }, { "epoch": 0.7843280315190427, "grad_norm": 1.1743857190044469, "learning_rate": 1.0484327772139215e-05, "loss": 0.5755, "step": 26875 }, { "epoch": 0.7844739530132788, "grad_norm": 0.9892857571681145, "learning_rate": 1.0477225770345952e-05, "loss": 0.5097, "step": 26880 }, { "epoch": 0.7846198745075149, "grad_norm": 0.8745383153696362, "learning_rate": 1.0470127732623596e-05, "loss": 0.4687, "step": 26885 }, { "epoch": 0.784765796001751, "grad_norm": 1.0112344363569625, "learning_rate": 1.046303366062506e-05, "loss": 0.5436, "step": 26890 }, { "epoch": 0.7849117174959872, "grad_norm": 1.1638534746943998, "learning_rate": 1.0455943556002334e-05, "loss": 0.5632, "step": 26895 }, { "epoch": 0.7850576389902233, "grad_norm": 1.0813854473829274, "learning_rate": 1.0448857420406473e-05, "loss": 0.5754, "step": 26900 }, { "epoch": 0.7852035604844594, "grad_norm": 0.9329066614128672, "learning_rate": 1.044177525548763e-05, "loss": 0.499, "step": 26905 }, { "epoch": 0.7853494819786955, "grad_norm": 1.0739361060455275, "learning_rate": 1.0434697062895005e-05, "loss": 0.5461, "step": 26910 }, { "epoch": 0.7854954034729316, "grad_norm": 1.1294853115478685, "learning_rate": 1.0427622844276905e-05, "loss": 0.5748, "step": 26915 }, { "epoch": 0.7856413249671677, "grad_norm": 1.1184331586367497, "learning_rate": 1.0420552601280671e-05, "loss": 0.5316, "step": 26920 }, { "epoch": 0.7857872464614037, "grad_norm": 1.0398845017954061, "learning_rate": 1.0413486335552764e-05, "loss": 0.5634, "step": 26925 }, { "epoch": 0.7859331679556398, "grad_norm": 0.8762766074442039, "learning_rate": 1.040642404873868e-05, "loss": 0.4935, "step": 26930 }, { "epoch": 0.7860790894498759, "grad_norm": 0.8562464857743646, "learning_rate": 1.0399365742483005e-05, "loss": 0.4839, "step": 26935 }, { "epoch": 0.786225010944112, "grad_norm": 1.0347324867914962, "learning_rate": 1.0392311418429418e-05, "loss": 0.5101, "step": 26940 }, { "epoch": 0.7863709324383482, "grad_norm": 1.0433639491144333, "learning_rate": 1.038526107822062e-05, "loss": 0.5855, "step": 26945 }, { "epoch": 0.7865168539325843, "grad_norm": 0.8667111022655034, "learning_rate": 1.0378214723498433e-05, "loss": 0.5682, "step": 26950 }, { "epoch": 0.7866627754268204, "grad_norm": 1.054413416099709, "learning_rate": 1.0371172355903724e-05, "loss": 0.5479, "step": 26955 }, { "epoch": 0.7868086969210565, "grad_norm": 1.0628771361905176, "learning_rate": 1.0364133977076452e-05, "loss": 0.53, "step": 26960 }, { "epoch": 0.7869546184152926, "grad_norm": 0.9159529243190648, "learning_rate": 1.0357099588655621e-05, "loss": 0.4473, "step": 26965 }, { "epoch": 0.7871005399095287, "grad_norm": 0.915143704474563, "learning_rate": 1.035006919227933e-05, "loss": 0.5466, "step": 26970 }, { "epoch": 0.7872464614037648, "grad_norm": 1.080810620375531, "learning_rate": 1.0343042789584732e-05, "loss": 0.5833, "step": 26975 }, { "epoch": 0.7873923828980008, "grad_norm": 1.027914903709717, "learning_rate": 1.0336020382208053e-05, "loss": 0.5604, "step": 26980 }, { "epoch": 0.7875383043922369, "grad_norm": 1.011852185977007, "learning_rate": 1.0329001971784605e-05, "loss": 0.5167, "step": 26985 }, { "epoch": 0.7876842258864731, "grad_norm": 0.9771170988856054, "learning_rate": 1.032198755994874e-05, "loss": 0.5304, "step": 26990 }, { "epoch": 0.7878301473807092, "grad_norm": 0.9985216506998189, "learning_rate": 1.0314977148333905e-05, "loss": 0.5136, "step": 26995 }, { "epoch": 0.7879760688749453, "grad_norm": 1.102586021041923, "learning_rate": 1.0307970738572603e-05, "loss": 0.4896, "step": 27000 }, { "epoch": 0.7881219903691814, "grad_norm": 1.0269495887648135, "learning_rate": 1.0300968332296401e-05, "loss": 0.5471, "step": 27005 }, { "epoch": 0.7882679118634175, "grad_norm": 0.9876185922916727, "learning_rate": 1.029396993113596e-05, "loss": 0.5055, "step": 27010 }, { "epoch": 0.7884138333576536, "grad_norm": 1.111808647339129, "learning_rate": 1.0286975536720962e-05, "loss": 0.5433, "step": 27015 }, { "epoch": 0.7885597548518897, "grad_norm": 1.0860672616241285, "learning_rate": 1.0279985150680197e-05, "loss": 0.5345, "step": 27020 }, { "epoch": 0.7887056763461258, "grad_norm": 1.0599904274563325, "learning_rate": 1.0272998774641501e-05, "loss": 0.5346, "step": 27025 }, { "epoch": 0.7888515978403619, "grad_norm": 1.0194286224398008, "learning_rate": 1.0266016410231794e-05, "loss": 0.5971, "step": 27030 }, { "epoch": 0.7889975193345979, "grad_norm": 1.0994546963287377, "learning_rate": 1.025903805907704e-05, "loss": 0.5518, "step": 27035 }, { "epoch": 0.7891434408288341, "grad_norm": 0.9579309490500107, "learning_rate": 1.0252063722802277e-05, "loss": 0.5544, "step": 27040 }, { "epoch": 0.7892893623230702, "grad_norm": 1.2102666732002723, "learning_rate": 1.0245093403031616e-05, "loss": 0.5956, "step": 27045 }, { "epoch": 0.7894352838173063, "grad_norm": 0.9490959886886922, "learning_rate": 1.0238127101388214e-05, "loss": 0.5458, "step": 27050 }, { "epoch": 0.7895812053115424, "grad_norm": 1.0129748720248426, "learning_rate": 1.0231164819494323e-05, "loss": 0.5645, "step": 27055 }, { "epoch": 0.7897271268057785, "grad_norm": 0.853697521392681, "learning_rate": 1.0224206558971229e-05, "loss": 0.5081, "step": 27060 }, { "epoch": 0.7898730483000146, "grad_norm": 1.0109532413804747, "learning_rate": 1.0217252321439296e-05, "loss": 0.5901, "step": 27065 }, { "epoch": 0.7900189697942507, "grad_norm": 1.0761715446736881, "learning_rate": 1.0210302108517944e-05, "loss": 0.583, "step": 27070 }, { "epoch": 0.7901648912884868, "grad_norm": 0.9822409724803611, "learning_rate": 1.0203355921825658e-05, "loss": 0.565, "step": 27075 }, { "epoch": 0.7903108127827229, "grad_norm": 0.991866154896098, "learning_rate": 1.0196413762980003e-05, "loss": 0.5619, "step": 27080 }, { "epoch": 0.790456734276959, "grad_norm": 1.1659941928515118, "learning_rate": 1.0189475633597568e-05, "loss": 0.5248, "step": 27085 }, { "epoch": 0.7906026557711952, "grad_norm": 1.041721830783372, "learning_rate": 1.0182541535294043e-05, "loss": 0.477, "step": 27090 }, { "epoch": 0.7907485772654312, "grad_norm": 0.9410758883506408, "learning_rate": 1.0175611469684148e-05, "loss": 0.5467, "step": 27095 }, { "epoch": 0.7908944987596673, "grad_norm": 0.9387041882717445, "learning_rate": 1.0168685438381697e-05, "loss": 0.4909, "step": 27100 }, { "epoch": 0.7910404202539034, "grad_norm": 0.9989749866872627, "learning_rate": 1.0161763442999533e-05, "loss": 0.5597, "step": 27105 }, { "epoch": 0.7911863417481395, "grad_norm": 1.0448132223225775, "learning_rate": 1.0154845485149575e-05, "loss": 0.5143, "step": 27110 }, { "epoch": 0.7913322632423756, "grad_norm": 1.0004083768223377, "learning_rate": 1.0147931566442797e-05, "loss": 0.5886, "step": 27115 }, { "epoch": 0.7914781847366117, "grad_norm": 1.311983778781264, "learning_rate": 1.0141021688489231e-05, "loss": 0.5761, "step": 27120 }, { "epoch": 0.7916241062308478, "grad_norm": 0.943616279915982, "learning_rate": 1.0134115852897979e-05, "loss": 0.5406, "step": 27125 }, { "epoch": 0.7917700277250839, "grad_norm": 1.0654865437814889, "learning_rate": 1.0127214061277191e-05, "loss": 0.5732, "step": 27130 }, { "epoch": 0.79191594921932, "grad_norm": 1.0622301473898175, "learning_rate": 1.0120316315234076e-05, "loss": 0.5184, "step": 27135 }, { "epoch": 0.7920618707135562, "grad_norm": 1.054142344866929, "learning_rate": 1.0113422616374904e-05, "loss": 0.5107, "step": 27140 }, { "epoch": 0.7922077922077922, "grad_norm": 1.1247809626017595, "learning_rate": 1.0106532966304996e-05, "loss": 0.5465, "step": 27145 }, { "epoch": 0.7923537137020283, "grad_norm": 0.8494081010169099, "learning_rate": 1.009964736662875e-05, "loss": 0.4786, "step": 27150 }, { "epoch": 0.7924996351962644, "grad_norm": 1.2234108431849424, "learning_rate": 1.0092765818949596e-05, "loss": 0.5675, "step": 27155 }, { "epoch": 0.7926455566905005, "grad_norm": 1.005798287560001, "learning_rate": 1.0085888324870032e-05, "loss": 0.5503, "step": 27160 }, { "epoch": 0.7927914781847366, "grad_norm": 0.8559603018776307, "learning_rate": 1.0079014885991609e-05, "loss": 0.5602, "step": 27165 }, { "epoch": 0.7929373996789727, "grad_norm": 1.109763629231988, "learning_rate": 1.007214550391494e-05, "loss": 0.5538, "step": 27170 }, { "epoch": 0.7930833211732088, "grad_norm": 1.0633799701930946, "learning_rate": 1.0065280180239689e-05, "loss": 0.4901, "step": 27175 }, { "epoch": 0.7932292426674449, "grad_norm": 0.9956387740402242, "learning_rate": 1.0058418916564573e-05, "loss": 0.544, "step": 27180 }, { "epoch": 0.793375164161681, "grad_norm": 1.0965245567499116, "learning_rate": 1.0051561714487365e-05, "loss": 0.6129, "step": 27185 }, { "epoch": 0.7935210856559172, "grad_norm": 1.0379328588231362, "learning_rate": 1.0044708575604887e-05, "loss": 0.5376, "step": 27190 }, { "epoch": 0.7936670071501533, "grad_norm": 1.0465975513049335, "learning_rate": 1.0037859501513033e-05, "loss": 0.6011, "step": 27195 }, { "epoch": 0.7938129286443893, "grad_norm": 1.0420389861648298, "learning_rate": 1.0031014493806728e-05, "loss": 0.5387, "step": 27200 }, { "epoch": 0.7939588501386254, "grad_norm": 1.0911596134064727, "learning_rate": 1.002417355407997e-05, "loss": 0.5997, "step": 27205 }, { "epoch": 0.7941047716328615, "grad_norm": 1.058853823446391, "learning_rate": 1.0017336683925782e-05, "loss": 0.5651, "step": 27210 }, { "epoch": 0.7942506931270976, "grad_norm": 1.0287017175405877, "learning_rate": 1.0010503884936272e-05, "loss": 0.5523, "step": 27215 }, { "epoch": 0.7943966146213337, "grad_norm": 0.9463675898832514, "learning_rate": 1.0003675158702584e-05, "loss": 0.5135, "step": 27220 }, { "epoch": 0.7945425361155698, "grad_norm": 1.0361634311254662, "learning_rate": 9.996850506814907e-06, "loss": 0.5745, "step": 27225 }, { "epoch": 0.7946884576098059, "grad_norm": 0.98522863504564, "learning_rate": 9.990029930862496e-06, "loss": 0.5792, "step": 27230 }, { "epoch": 0.794834379104042, "grad_norm": 1.093101275345474, "learning_rate": 9.983213432433639e-06, "loss": 0.58, "step": 27235 }, { "epoch": 0.7949803005982782, "grad_norm": 1.0568704157421192, "learning_rate": 9.9764010131157e-06, "loss": 0.5275, "step": 27240 }, { "epoch": 0.7951262220925143, "grad_norm": 1.1403919978120967, "learning_rate": 9.969592674495065e-06, "loss": 0.5589, "step": 27245 }, { "epoch": 0.7952721435867504, "grad_norm": 1.0712487005921623, "learning_rate": 9.962788418157192e-06, "loss": 0.5623, "step": 27250 }, { "epoch": 0.7954180650809864, "grad_norm": 1.0513781499284154, "learning_rate": 9.955988245686575e-06, "loss": 0.5061, "step": 27255 }, { "epoch": 0.7955639865752225, "grad_norm": 0.8929031989395446, "learning_rate": 9.949192158666755e-06, "loss": 0.5969, "step": 27260 }, { "epoch": 0.7957099080694586, "grad_norm": 0.8974400594187889, "learning_rate": 9.94240015868034e-06, "loss": 0.4722, "step": 27265 }, { "epoch": 0.7958558295636947, "grad_norm": 1.1803723646969366, "learning_rate": 9.935612247308965e-06, "loss": 0.501, "step": 27270 }, { "epoch": 0.7960017510579308, "grad_norm": 1.0467736708845314, "learning_rate": 9.928828426133328e-06, "loss": 0.5212, "step": 27275 }, { "epoch": 0.7961476725521669, "grad_norm": 1.088564761438336, "learning_rate": 9.92204869673316e-06, "loss": 0.6101, "step": 27280 }, { "epoch": 0.796293594046403, "grad_norm": 0.9767858884642183, "learning_rate": 9.915273060687259e-06, "loss": 0.484, "step": 27285 }, { "epoch": 0.7964395155406392, "grad_norm": 0.9842297228226178, "learning_rate": 9.90850151957345e-06, "loss": 0.5226, "step": 27290 }, { "epoch": 0.7965854370348753, "grad_norm": 1.1449838278588649, "learning_rate": 9.901734074968616e-06, "loss": 0.5761, "step": 27295 }, { "epoch": 0.7967313585291114, "grad_norm": 0.8570547886807693, "learning_rate": 9.894970728448686e-06, "loss": 0.5156, "step": 27300 }, { "epoch": 0.7968772800233475, "grad_norm": 1.2021593853417891, "learning_rate": 9.888211481588615e-06, "loss": 0.5957, "step": 27305 }, { "epoch": 0.7970232015175835, "grad_norm": 0.9139561314783105, "learning_rate": 9.881456335962444e-06, "loss": 0.5374, "step": 27310 }, { "epoch": 0.7971691230118196, "grad_norm": 1.0356776970393604, "learning_rate": 9.87470529314322e-06, "loss": 0.5348, "step": 27315 }, { "epoch": 0.7973150445060557, "grad_norm": 1.1279862710750772, "learning_rate": 9.867958354703057e-06, "loss": 0.5423, "step": 27320 }, { "epoch": 0.7974609660002918, "grad_norm": 1.0535608425941125, "learning_rate": 9.861215522213097e-06, "loss": 0.4999, "step": 27325 }, { "epoch": 0.7976068874945279, "grad_norm": 0.9606345684864482, "learning_rate": 9.854476797243537e-06, "loss": 0.5397, "step": 27330 }, { "epoch": 0.797752808988764, "grad_norm": 1.1621359622508396, "learning_rate": 9.84774218136362e-06, "loss": 0.5647, "step": 27335 }, { "epoch": 0.7978987304830002, "grad_norm": 0.9540440460656431, "learning_rate": 9.841011676141624e-06, "loss": 0.5042, "step": 27340 }, { "epoch": 0.7980446519772363, "grad_norm": 1.1311605935202607, "learning_rate": 9.834285283144875e-06, "loss": 0.5273, "step": 27345 }, { "epoch": 0.7981905734714724, "grad_norm": 0.9087268902371635, "learning_rate": 9.827563003939727e-06, "loss": 0.4966, "step": 27350 }, { "epoch": 0.7983364949657085, "grad_norm": 1.2067002378640321, "learning_rate": 9.820844840091608e-06, "loss": 0.5401, "step": 27355 }, { "epoch": 0.7984824164599446, "grad_norm": 1.0419974564728558, "learning_rate": 9.814130793164957e-06, "loss": 0.5234, "step": 27360 }, { "epoch": 0.7986283379541806, "grad_norm": 1.132389495889677, "learning_rate": 9.807420864723268e-06, "loss": 0.6014, "step": 27365 }, { "epoch": 0.7987742594484167, "grad_norm": 0.9791412942275789, "learning_rate": 9.80071505632907e-06, "loss": 0.5452, "step": 27370 }, { "epoch": 0.7989201809426528, "grad_norm": 0.9863250104020144, "learning_rate": 9.794013369543933e-06, "loss": 0.5504, "step": 27375 }, { "epoch": 0.7990661024368889, "grad_norm": 1.136941525509386, "learning_rate": 9.787315805928476e-06, "loss": 0.5147, "step": 27380 }, { "epoch": 0.799212023931125, "grad_norm": 0.9638669748080306, "learning_rate": 9.780622367042347e-06, "loss": 0.572, "step": 27385 }, { "epoch": 0.7993579454253612, "grad_norm": 1.0038674685692957, "learning_rate": 9.773933054444256e-06, "loss": 0.5211, "step": 27390 }, { "epoch": 0.7995038669195973, "grad_norm": 1.0054751085562437, "learning_rate": 9.76724786969191e-06, "loss": 0.5153, "step": 27395 }, { "epoch": 0.7996497884138334, "grad_norm": 1.2150457173490514, "learning_rate": 9.760566814342085e-06, "loss": 0.5256, "step": 27400 }, { "epoch": 0.7997957099080695, "grad_norm": 1.2763043732891401, "learning_rate": 9.753889889950595e-06, "loss": 0.5427, "step": 27405 }, { "epoch": 0.7999416314023056, "grad_norm": 1.0284306448616833, "learning_rate": 9.747217098072287e-06, "loss": 0.5248, "step": 27410 }, { "epoch": 0.8000875528965417, "grad_norm": 1.0841584801004394, "learning_rate": 9.740548440261048e-06, "loss": 0.5258, "step": 27415 }, { "epoch": 0.8002334743907777, "grad_norm": 1.0289617759438767, "learning_rate": 9.733883918069786e-06, "loss": 0.5861, "step": 27420 }, { "epoch": 0.8003793958850138, "grad_norm": 0.9702492187903877, "learning_rate": 9.727223533050472e-06, "loss": 0.5085, "step": 27425 }, { "epoch": 0.8005253173792499, "grad_norm": 1.0128043042828136, "learning_rate": 9.720567286754098e-06, "loss": 0.5902, "step": 27430 }, { "epoch": 0.800671238873486, "grad_norm": 1.0831884655380992, "learning_rate": 9.713915180730698e-06, "loss": 0.5444, "step": 27435 }, { "epoch": 0.8008171603677222, "grad_norm": 0.919850515937568, "learning_rate": 9.70726721652933e-06, "loss": 0.5114, "step": 27440 }, { "epoch": 0.8009630818619583, "grad_norm": 1.143320572954369, "learning_rate": 9.7006233956981e-06, "loss": 0.5174, "step": 27445 }, { "epoch": 0.8011090033561944, "grad_norm": 0.9673356174348477, "learning_rate": 9.69398371978415e-06, "loss": 0.5348, "step": 27450 }, { "epoch": 0.8012549248504305, "grad_norm": 0.8049751965545767, "learning_rate": 9.687348190333649e-06, "loss": 0.4988, "step": 27455 }, { "epoch": 0.8014008463446666, "grad_norm": 0.9765064182314537, "learning_rate": 9.680716808891815e-06, "loss": 0.517, "step": 27460 }, { "epoch": 0.8015467678389027, "grad_norm": 0.9913646612621594, "learning_rate": 9.674089577002868e-06, "loss": 0.5834, "step": 27465 }, { "epoch": 0.8016926893331388, "grad_norm": 0.8744162882001801, "learning_rate": 9.6674664962101e-06, "loss": 0.5467, "step": 27470 }, { "epoch": 0.8018386108273748, "grad_norm": 1.2778265176970571, "learning_rate": 9.660847568055815e-06, "loss": 0.5899, "step": 27475 }, { "epoch": 0.8019845323216109, "grad_norm": 1.0611832238433687, "learning_rate": 9.654232794081344e-06, "loss": 0.5326, "step": 27480 }, { "epoch": 0.802130453815847, "grad_norm": 1.0102272809417914, "learning_rate": 9.64762217582708e-06, "loss": 0.5744, "step": 27485 }, { "epoch": 0.8022763753100832, "grad_norm": 0.9568474941629972, "learning_rate": 9.641015714832408e-06, "loss": 0.532, "step": 27490 }, { "epoch": 0.8024222968043193, "grad_norm": 1.06775629283666, "learning_rate": 9.63441341263578e-06, "loss": 0.5586, "step": 27495 }, { "epoch": 0.8025682182985554, "grad_norm": 1.1032546758194686, "learning_rate": 9.627815270774662e-06, "loss": 0.5222, "step": 27500 }, { "epoch": 0.8027141397927915, "grad_norm": 0.9155934374699033, "learning_rate": 9.621221290785548e-06, "loss": 0.505, "step": 27505 }, { "epoch": 0.8028600612870276, "grad_norm": 0.9401719563158842, "learning_rate": 9.614631474203988e-06, "loss": 0.5134, "step": 27510 }, { "epoch": 0.8030059827812637, "grad_norm": 0.9868455245798152, "learning_rate": 9.608045822564518e-06, "loss": 0.5084, "step": 27515 }, { "epoch": 0.8031519042754998, "grad_norm": 1.1627447409892855, "learning_rate": 9.601464337400749e-06, "loss": 0.5412, "step": 27520 }, { "epoch": 0.8032978257697359, "grad_norm": 0.9617026133021087, "learning_rate": 9.594887020245292e-06, "loss": 0.473, "step": 27525 }, { "epoch": 0.8034437472639719, "grad_norm": 0.9424687912754987, "learning_rate": 9.588313872629812e-06, "loss": 0.5175, "step": 27530 }, { "epoch": 0.803589668758208, "grad_norm": 0.9679763521252085, "learning_rate": 9.581744896084971e-06, "loss": 0.5314, "step": 27535 }, { "epoch": 0.8037355902524442, "grad_norm": 1.013203551640198, "learning_rate": 9.57518009214049e-06, "loss": 0.5023, "step": 27540 }, { "epoch": 0.8038815117466803, "grad_norm": 0.9064389754273547, "learning_rate": 9.568619462325105e-06, "loss": 0.4931, "step": 27545 }, { "epoch": 0.8040274332409164, "grad_norm": 1.0578281892980055, "learning_rate": 9.56206300816657e-06, "loss": 0.5472, "step": 27550 }, { "epoch": 0.8041733547351525, "grad_norm": 1.0917287075421498, "learning_rate": 9.555510731191703e-06, "loss": 0.5168, "step": 27555 }, { "epoch": 0.8043192762293886, "grad_norm": 0.9588788769701684, "learning_rate": 9.548962632926294e-06, "loss": 0.5174, "step": 27560 }, { "epoch": 0.8044651977236247, "grad_norm": 1.0454322416561728, "learning_rate": 9.54241871489521e-06, "loss": 0.527, "step": 27565 }, { "epoch": 0.8046111192178608, "grad_norm": 1.1135051545545753, "learning_rate": 9.53587897862232e-06, "loss": 0.5066, "step": 27570 }, { "epoch": 0.8047570407120969, "grad_norm": 1.002594034012158, "learning_rate": 9.529343425630515e-06, "loss": 0.5238, "step": 27575 }, { "epoch": 0.804902962206333, "grad_norm": 0.9086356407424167, "learning_rate": 9.522812057441742e-06, "loss": 0.5562, "step": 27580 }, { "epoch": 0.805048883700569, "grad_norm": 0.9534181218130524, "learning_rate": 9.516284875576923e-06, "loss": 0.577, "step": 27585 }, { "epoch": 0.8051948051948052, "grad_norm": 1.031107239357346, "learning_rate": 9.509761881556057e-06, "loss": 0.5257, "step": 27590 }, { "epoch": 0.8053407266890413, "grad_norm": 0.9885917876951881, "learning_rate": 9.503243076898133e-06, "loss": 0.5435, "step": 27595 }, { "epoch": 0.8054866481832774, "grad_norm": 1.0626693019751252, "learning_rate": 9.496728463121188e-06, "loss": 0.5472, "step": 27600 }, { "epoch": 0.8056325696775135, "grad_norm": 0.9067944455597727, "learning_rate": 9.490218041742255e-06, "loss": 0.5945, "step": 27605 }, { "epoch": 0.8057784911717496, "grad_norm": 1.1253369720637882, "learning_rate": 9.483711814277421e-06, "loss": 0.5659, "step": 27610 }, { "epoch": 0.8059244126659857, "grad_norm": 0.9644438493688245, "learning_rate": 9.477209782241782e-06, "loss": 0.4896, "step": 27615 }, { "epoch": 0.8060703341602218, "grad_norm": 0.9585658942490324, "learning_rate": 9.470711947149442e-06, "loss": 0.5252, "step": 27620 }, { "epoch": 0.8062162556544579, "grad_norm": 1.1722305346644295, "learning_rate": 9.46421831051357e-06, "loss": 0.5723, "step": 27625 }, { "epoch": 0.806362177148694, "grad_norm": 1.1177449741135377, "learning_rate": 9.457728873846305e-06, "loss": 0.5504, "step": 27630 }, { "epoch": 0.80650809864293, "grad_norm": 0.9393704640937329, "learning_rate": 9.451243638658847e-06, "loss": 0.5277, "step": 27635 }, { "epoch": 0.8066540201371662, "grad_norm": 1.0533317383940068, "learning_rate": 9.444762606461395e-06, "loss": 0.5479, "step": 27640 }, { "epoch": 0.8067999416314023, "grad_norm": 1.1566669044090205, "learning_rate": 9.43828577876319e-06, "loss": 0.5711, "step": 27645 }, { "epoch": 0.8069458631256384, "grad_norm": 1.018216470129085, "learning_rate": 9.431813157072483e-06, "loss": 0.5252, "step": 27650 }, { "epoch": 0.8070917846198745, "grad_norm": 0.9507319286907391, "learning_rate": 9.425344742896528e-06, "loss": 0.5051, "step": 27655 }, { "epoch": 0.8072377061141106, "grad_norm": 1.0691638812976023, "learning_rate": 9.41888053774163e-06, "loss": 0.6041, "step": 27660 }, { "epoch": 0.8073836276083467, "grad_norm": 1.0972148078030926, "learning_rate": 9.41242054311309e-06, "loss": 0.5358, "step": 27665 }, { "epoch": 0.8075295491025828, "grad_norm": 1.1118987443013721, "learning_rate": 9.405964760515256e-06, "loss": 0.5667, "step": 27670 }, { "epoch": 0.8076754705968189, "grad_norm": 1.0795088052423096, "learning_rate": 9.399513191451461e-06, "loss": 0.4594, "step": 27675 }, { "epoch": 0.807821392091055, "grad_norm": 1.0591274634186896, "learning_rate": 9.393065837424084e-06, "loss": 0.5397, "step": 27680 }, { "epoch": 0.8079673135852911, "grad_norm": 1.0058882986557025, "learning_rate": 9.386622699934501e-06, "loss": 0.5361, "step": 27685 }, { "epoch": 0.8081132350795273, "grad_norm": 1.0813658145107006, "learning_rate": 9.380183780483121e-06, "loss": 0.5435, "step": 27690 }, { "epoch": 0.8082591565737633, "grad_norm": 1.0424913198809822, "learning_rate": 9.373749080569378e-06, "loss": 0.5833, "step": 27695 }, { "epoch": 0.8084050780679994, "grad_norm": 1.0099481378119568, "learning_rate": 9.367318601691689e-06, "loss": 0.4944, "step": 27700 }, { "epoch": 0.8085509995622355, "grad_norm": 1.2534436522532801, "learning_rate": 9.360892345347533e-06, "loss": 0.6047, "step": 27705 }, { "epoch": 0.8086969210564716, "grad_norm": 1.0273105256661559, "learning_rate": 9.354470313033368e-06, "loss": 0.5628, "step": 27710 }, { "epoch": 0.8088428425507077, "grad_norm": 1.0879328581582328, "learning_rate": 9.348052506244697e-06, "loss": 0.5364, "step": 27715 }, { "epoch": 0.8089887640449438, "grad_norm": 1.2056678545686688, "learning_rate": 9.341638926476021e-06, "loss": 0.568, "step": 27720 }, { "epoch": 0.8091346855391799, "grad_norm": 1.2228912264438538, "learning_rate": 9.33522957522086e-06, "loss": 0.5399, "step": 27725 }, { "epoch": 0.809280607033416, "grad_norm": 1.0243165951012978, "learning_rate": 9.328824453971752e-06, "loss": 0.5146, "step": 27730 }, { "epoch": 0.8094265285276522, "grad_norm": 1.0440796243789765, "learning_rate": 9.322423564220246e-06, "loss": 0.5282, "step": 27735 }, { "epoch": 0.8095724500218883, "grad_norm": 1.139371110599913, "learning_rate": 9.316026907456918e-06, "loss": 0.5724, "step": 27740 }, { "epoch": 0.8097183715161244, "grad_norm": 1.0597225520778788, "learning_rate": 9.309634485171342e-06, "loss": 0.5767, "step": 27745 }, { "epoch": 0.8098642930103604, "grad_norm": 0.9747881523077964, "learning_rate": 9.303246298852114e-06, "loss": 0.4782, "step": 27750 }, { "epoch": 0.8100102145045965, "grad_norm": 1.1222987751360458, "learning_rate": 9.296862349986843e-06, "loss": 0.4727, "step": 27755 }, { "epoch": 0.8101561359988326, "grad_norm": 1.0610867429011148, "learning_rate": 9.290482640062147e-06, "loss": 0.5551, "step": 27760 }, { "epoch": 0.8103020574930687, "grad_norm": 1.1151588296115174, "learning_rate": 9.284107170563666e-06, "loss": 0.5698, "step": 27765 }, { "epoch": 0.8104479789873048, "grad_norm": 1.188136795631472, "learning_rate": 9.277735942976049e-06, "loss": 0.5736, "step": 27770 }, { "epoch": 0.8105939004815409, "grad_norm": 0.9672101091187782, "learning_rate": 9.271368958782947e-06, "loss": 0.5233, "step": 27775 }, { "epoch": 0.810739821975777, "grad_norm": 0.8573582914705824, "learning_rate": 9.265006219467033e-06, "loss": 0.5033, "step": 27780 }, { "epoch": 0.8108857434700132, "grad_norm": 0.9623949590488653, "learning_rate": 9.258647726509996e-06, "loss": 0.4892, "step": 27785 }, { "epoch": 0.8110316649642493, "grad_norm": 1.0744293737302077, "learning_rate": 9.25229348139253e-06, "loss": 0.498, "step": 27790 }, { "epoch": 0.8111775864584854, "grad_norm": 1.1014697423501634, "learning_rate": 9.245943485594332e-06, "loss": 0.5976, "step": 27795 }, { "epoch": 0.8113235079527215, "grad_norm": 0.987113832028931, "learning_rate": 9.239597740594122e-06, "loss": 0.5226, "step": 27800 }, { "epoch": 0.8114694294469575, "grad_norm": 1.2630926845648756, "learning_rate": 9.233256247869621e-06, "loss": 0.5543, "step": 27805 }, { "epoch": 0.8116153509411936, "grad_norm": 1.0864837101800717, "learning_rate": 9.22691900889757e-06, "loss": 0.4879, "step": 27810 }, { "epoch": 0.8117612724354297, "grad_norm": 0.9343961114199191, "learning_rate": 9.22058602515371e-06, "loss": 0.4881, "step": 27815 }, { "epoch": 0.8119071939296658, "grad_norm": 0.9889183974951017, "learning_rate": 9.214257298112798e-06, "loss": 0.5518, "step": 27820 }, { "epoch": 0.8120531154239019, "grad_norm": 0.9559528303074131, "learning_rate": 9.207932829248592e-06, "loss": 0.5169, "step": 27825 }, { "epoch": 0.812199036918138, "grad_norm": 1.137927860617841, "learning_rate": 9.201612620033858e-06, "loss": 0.5746, "step": 27830 }, { "epoch": 0.8123449584123742, "grad_norm": 0.8940713593425644, "learning_rate": 9.19529667194039e-06, "loss": 0.4852, "step": 27835 }, { "epoch": 0.8124908799066103, "grad_norm": 0.9521947145858813, "learning_rate": 9.188984986438962e-06, "loss": 0.5448, "step": 27840 }, { "epoch": 0.8126368014008464, "grad_norm": 0.8714111086224701, "learning_rate": 9.182677564999373e-06, "loss": 0.4721, "step": 27845 }, { "epoch": 0.8127827228950825, "grad_norm": 1.0190454412461618, "learning_rate": 9.176374409090415e-06, "loss": 0.5517, "step": 27850 }, { "epoch": 0.8129286443893186, "grad_norm": 0.9827922185977388, "learning_rate": 9.170075520179911e-06, "loss": 0.581, "step": 27855 }, { "epoch": 0.8130745658835546, "grad_norm": 1.2429780460859878, "learning_rate": 9.163780899734664e-06, "loss": 0.6059, "step": 27860 }, { "epoch": 0.8132204873777907, "grad_norm": 1.1901717120842787, "learning_rate": 9.157490549220499e-06, "loss": 0.5467, "step": 27865 }, { "epoch": 0.8133664088720268, "grad_norm": 1.199586783243458, "learning_rate": 9.151204470102238e-06, "loss": 0.5854, "step": 27870 }, { "epoch": 0.8135123303662629, "grad_norm": 0.9925734983397883, "learning_rate": 9.144922663843705e-06, "loss": 0.5523, "step": 27875 }, { "epoch": 0.813658251860499, "grad_norm": 0.996518396023555, "learning_rate": 9.138645131907754e-06, "loss": 0.5009, "step": 27880 }, { "epoch": 0.8138041733547352, "grad_norm": 0.9651210286462376, "learning_rate": 9.132371875756212e-06, "loss": 0.5114, "step": 27885 }, { "epoch": 0.8139500948489713, "grad_norm": 0.9642596226734768, "learning_rate": 9.12610289684993e-06, "loss": 0.532, "step": 27890 }, { "epoch": 0.8140960163432074, "grad_norm": 0.9317355303371309, "learning_rate": 9.119838196648749e-06, "loss": 0.5634, "step": 27895 }, { "epoch": 0.8142419378374435, "grad_norm": 1.1931870554692583, "learning_rate": 9.113577776611528e-06, "loss": 0.5678, "step": 27900 }, { "epoch": 0.8143878593316796, "grad_norm": 0.9705321649499273, "learning_rate": 9.107321638196128e-06, "loss": 0.4981, "step": 27905 }, { "epoch": 0.8145337808259157, "grad_norm": 0.9848314424466137, "learning_rate": 9.101069782859397e-06, "loss": 0.5374, "step": 27910 }, { "epoch": 0.8146797023201517, "grad_norm": 0.9849529556958224, "learning_rate": 9.094822212057202e-06, "loss": 0.4833, "step": 27915 }, { "epoch": 0.8148256238143878, "grad_norm": 1.113136201208091, "learning_rate": 9.0885789272444e-06, "loss": 0.4873, "step": 27920 }, { "epoch": 0.8149715453086239, "grad_norm": 1.1096102421006975, "learning_rate": 9.082339929874866e-06, "loss": 0.5669, "step": 27925 }, { "epoch": 0.81511746680286, "grad_norm": 1.0755514707794318, "learning_rate": 9.076105221401463e-06, "loss": 0.4828, "step": 27930 }, { "epoch": 0.8152633882970962, "grad_norm": 1.3527511108946093, "learning_rate": 9.069874803276057e-06, "loss": 0.5815, "step": 27935 }, { "epoch": 0.8154093097913323, "grad_norm": 1.1278287118302506, "learning_rate": 9.06364867694952e-06, "loss": 0.5447, "step": 27940 }, { "epoch": 0.8155552312855684, "grad_norm": 1.2232301347475611, "learning_rate": 9.057426843871717e-06, "loss": 0.5642, "step": 27945 }, { "epoch": 0.8157011527798045, "grad_norm": 1.1861420741796393, "learning_rate": 9.051209305491525e-06, "loss": 0.5632, "step": 27950 }, { "epoch": 0.8158470742740406, "grad_norm": 1.0056647253659428, "learning_rate": 9.044996063256805e-06, "loss": 0.5142, "step": 27955 }, { "epoch": 0.8159929957682767, "grad_norm": 1.171597051449102, "learning_rate": 9.038787118614445e-06, "loss": 0.5709, "step": 27960 }, { "epoch": 0.8161389172625128, "grad_norm": 1.0595601349295236, "learning_rate": 9.03258247301029e-06, "loss": 0.5356, "step": 27965 }, { "epoch": 0.8162848387567488, "grad_norm": 1.0476062087564835, "learning_rate": 9.026382127889223e-06, "loss": 0.5915, "step": 27970 }, { "epoch": 0.8164307602509849, "grad_norm": 0.941600054199794, "learning_rate": 9.02018608469511e-06, "loss": 0.5205, "step": 27975 }, { "epoch": 0.816576681745221, "grad_norm": 1.0309150295288003, "learning_rate": 9.013994344870807e-06, "loss": 0.5456, "step": 27980 }, { "epoch": 0.8167226032394572, "grad_norm": 0.998801669629097, "learning_rate": 9.00780690985818e-06, "loss": 0.5401, "step": 27985 }, { "epoch": 0.8168685247336933, "grad_norm": 1.079199015163746, "learning_rate": 9.001623781098085e-06, "loss": 0.5073, "step": 27990 }, { "epoch": 0.8170144462279294, "grad_norm": 1.037364112733121, "learning_rate": 8.99544496003039e-06, "loss": 0.5135, "step": 27995 }, { "epoch": 0.8171603677221655, "grad_norm": 0.9737609701743194, "learning_rate": 8.989270448093943e-06, "loss": 0.5131, "step": 28000 }, { "epoch": 0.8173062892164016, "grad_norm": 1.0652790053330017, "learning_rate": 8.98310024672659e-06, "loss": 0.567, "step": 28005 }, { "epoch": 0.8174522107106377, "grad_norm": 1.032025735214532, "learning_rate": 8.976934357365183e-06, "loss": 0.527, "step": 28010 }, { "epoch": 0.8175981322048738, "grad_norm": 0.9831969342446276, "learning_rate": 8.970772781445557e-06, "loss": 0.5093, "step": 28015 }, { "epoch": 0.8177440536991099, "grad_norm": 1.0413412740147545, "learning_rate": 8.964615520402562e-06, "loss": 0.5775, "step": 28020 }, { "epoch": 0.8178899751933459, "grad_norm": 1.0107369579762189, "learning_rate": 8.95846257567002e-06, "loss": 0.5999, "step": 28025 }, { "epoch": 0.818035896687582, "grad_norm": 0.9411778469432223, "learning_rate": 8.952313948680773e-06, "loss": 0.502, "step": 28030 }, { "epoch": 0.8181818181818182, "grad_norm": 1.0656401668805855, "learning_rate": 8.946169640866625e-06, "loss": 0.5612, "step": 28035 }, { "epoch": 0.8183277396760543, "grad_norm": 0.956707045458958, "learning_rate": 8.940029653658407e-06, "loss": 0.4698, "step": 28040 }, { "epoch": 0.8184736611702904, "grad_norm": 0.8905059535363938, "learning_rate": 8.933893988485927e-06, "loss": 0.4924, "step": 28045 }, { "epoch": 0.8186195826645265, "grad_norm": 1.1552375069009455, "learning_rate": 8.927762646777988e-06, "loss": 0.5288, "step": 28050 }, { "epoch": 0.8187655041587626, "grad_norm": 1.1103000459352588, "learning_rate": 8.921635629962383e-06, "loss": 0.5049, "step": 28055 }, { "epoch": 0.8189114256529987, "grad_norm": 1.0887474143422102, "learning_rate": 8.915512939465903e-06, "loss": 0.5544, "step": 28060 }, { "epoch": 0.8190573471472348, "grad_norm": 1.2835730826144744, "learning_rate": 8.909394576714342e-06, "loss": 0.5687, "step": 28065 }, { "epoch": 0.8192032686414709, "grad_norm": 0.891441482662223, "learning_rate": 8.90328054313246e-06, "loss": 0.5479, "step": 28070 }, { "epoch": 0.819349190135707, "grad_norm": 1.0411416067683397, "learning_rate": 8.89717084014404e-06, "loss": 0.5512, "step": 28075 }, { "epoch": 0.819495111629943, "grad_norm": 0.9101170804735373, "learning_rate": 8.891065469171827e-06, "loss": 0.4968, "step": 28080 }, { "epoch": 0.8196410331241792, "grad_norm": 1.0574828395888316, "learning_rate": 8.88496443163757e-06, "loss": 0.5291, "step": 28085 }, { "epoch": 0.8197869546184153, "grad_norm": 1.040209554091426, "learning_rate": 8.87886772896202e-06, "loss": 0.5508, "step": 28090 }, { "epoch": 0.8199328761126514, "grad_norm": 1.088897895777659, "learning_rate": 8.872775362564897e-06, "loss": 0.5252, "step": 28095 }, { "epoch": 0.8200787976068875, "grad_norm": 1.1177754534567514, "learning_rate": 8.86668733386494e-06, "loss": 0.5308, "step": 28100 }, { "epoch": 0.8202247191011236, "grad_norm": 1.0953068065459572, "learning_rate": 8.860603644279836e-06, "loss": 0.5429, "step": 28105 }, { "epoch": 0.8203706405953597, "grad_norm": 1.1047356183143968, "learning_rate": 8.854524295226304e-06, "loss": 0.5751, "step": 28110 }, { "epoch": 0.8205165620895958, "grad_norm": 1.0343611392911816, "learning_rate": 8.848449288120028e-06, "loss": 0.5256, "step": 28115 }, { "epoch": 0.8206624835838319, "grad_norm": 0.9750535414852431, "learning_rate": 8.84237862437568e-06, "loss": 0.5346, "step": 28120 }, { "epoch": 0.820808405078068, "grad_norm": 0.9025739737203358, "learning_rate": 8.836312305406946e-06, "loss": 0.4982, "step": 28125 }, { "epoch": 0.820954326572304, "grad_norm": 1.1475953183753795, "learning_rate": 8.830250332626461e-06, "loss": 0.5314, "step": 28130 }, { "epoch": 0.8211002480665403, "grad_norm": 1.0874575516504863, "learning_rate": 8.82419270744588e-06, "loss": 0.5377, "step": 28135 }, { "epoch": 0.8212461695607763, "grad_norm": 1.095453790300035, "learning_rate": 8.818139431275829e-06, "loss": 0.5775, "step": 28140 }, { "epoch": 0.8213920910550124, "grad_norm": 1.1713486802735658, "learning_rate": 8.812090505525938e-06, "loss": 0.5893, "step": 28145 }, { "epoch": 0.8215380125492485, "grad_norm": 1.216258057259605, "learning_rate": 8.806045931604792e-06, "loss": 0.5276, "step": 28150 }, { "epoch": 0.8216839340434846, "grad_norm": 1.0214076925902091, "learning_rate": 8.80000571092e-06, "loss": 0.5746, "step": 28155 }, { "epoch": 0.8218298555377207, "grad_norm": 1.0048347057821552, "learning_rate": 8.793969844878135e-06, "loss": 0.5198, "step": 28160 }, { "epoch": 0.8219757770319568, "grad_norm": 0.9353233099340699, "learning_rate": 8.787938334884754e-06, "loss": 0.5083, "step": 28165 }, { "epoch": 0.8221216985261929, "grad_norm": 0.9939063910839164, "learning_rate": 8.781911182344426e-06, "loss": 0.5522, "step": 28170 }, { "epoch": 0.822267620020429, "grad_norm": 0.9079581624624045, "learning_rate": 8.775888388660665e-06, "loss": 0.5321, "step": 28175 }, { "epoch": 0.8224135415146651, "grad_norm": 1.0726363179867262, "learning_rate": 8.769869955236001e-06, "loss": 0.5352, "step": 28180 }, { "epoch": 0.8225594630089013, "grad_norm": 0.959918415461413, "learning_rate": 8.763855883471938e-06, "loss": 0.5042, "step": 28185 }, { "epoch": 0.8227053845031373, "grad_norm": 1.0977239243156898, "learning_rate": 8.75784617476896e-06, "loss": 0.5076, "step": 28190 }, { "epoch": 0.8228513059973734, "grad_norm": 0.970517053477958, "learning_rate": 8.751840830526551e-06, "loss": 0.5291, "step": 28195 }, { "epoch": 0.8229972274916095, "grad_norm": 1.0383910002614876, "learning_rate": 8.745839852143153e-06, "loss": 0.5608, "step": 28200 }, { "epoch": 0.8231431489858456, "grad_norm": 1.083696137125219, "learning_rate": 8.739843241016218e-06, "loss": 0.5174, "step": 28205 }, { "epoch": 0.8232890704800817, "grad_norm": 0.9979652126326642, "learning_rate": 8.733850998542159e-06, "loss": 0.5277, "step": 28210 }, { "epoch": 0.8234349919743178, "grad_norm": 1.1333915467510103, "learning_rate": 8.727863126116391e-06, "loss": 0.5568, "step": 28215 }, { "epoch": 0.8235809134685539, "grad_norm": 1.0254876309538437, "learning_rate": 8.7218796251333e-06, "loss": 0.5118, "step": 28220 }, { "epoch": 0.82372683496279, "grad_norm": 1.2602776551773491, "learning_rate": 8.71590049698625e-06, "loss": 0.5721, "step": 28225 }, { "epoch": 0.8238727564570261, "grad_norm": 1.0831427282107986, "learning_rate": 8.7099257430676e-06, "loss": 0.5355, "step": 28230 }, { "epoch": 0.8240186779512623, "grad_norm": 1.0171223226081503, "learning_rate": 8.703955364768674e-06, "loss": 0.488, "step": 28235 }, { "epoch": 0.8241645994454984, "grad_norm": 0.9708185384944034, "learning_rate": 8.697989363479802e-06, "loss": 0.5465, "step": 28240 }, { "epoch": 0.8243105209397344, "grad_norm": 1.105057452779647, "learning_rate": 8.692027740590255e-06, "loss": 0.5324, "step": 28245 }, { "epoch": 0.8244564424339705, "grad_norm": 0.9668363008976052, "learning_rate": 8.68607049748833e-06, "loss": 0.5732, "step": 28250 }, { "epoch": 0.8246023639282066, "grad_norm": 1.033607550446259, "learning_rate": 8.680117635561274e-06, "loss": 0.5439, "step": 28255 }, { "epoch": 0.8247482854224427, "grad_norm": 1.1332460262963568, "learning_rate": 8.674169156195314e-06, "loss": 0.5383, "step": 28260 }, { "epoch": 0.8248942069166788, "grad_norm": 0.9704722276677873, "learning_rate": 8.668225060775684e-06, "loss": 0.5049, "step": 28265 }, { "epoch": 0.8250401284109149, "grad_norm": 1.0734520785174781, "learning_rate": 8.662285350686555e-06, "loss": 0.5593, "step": 28270 }, { "epoch": 0.825186049905151, "grad_norm": 1.2226039397833643, "learning_rate": 8.656350027311116e-06, "loss": 0.5659, "step": 28275 }, { "epoch": 0.8253319713993871, "grad_norm": 1.0981170973257064, "learning_rate": 8.65041909203151e-06, "loss": 0.5265, "step": 28280 }, { "epoch": 0.8254778928936233, "grad_norm": 1.004740060648132, "learning_rate": 8.644492546228872e-06, "loss": 0.5655, "step": 28285 }, { "epoch": 0.8256238143878594, "grad_norm": 1.0108501955307818, "learning_rate": 8.638570391283308e-06, "loss": 0.5371, "step": 28290 }, { "epoch": 0.8257697358820955, "grad_norm": 0.899541588686353, "learning_rate": 8.632652628573895e-06, "loss": 0.4771, "step": 28295 }, { "epoch": 0.8259156573763315, "grad_norm": 1.0296300858875094, "learning_rate": 8.6267392594787e-06, "loss": 0.5124, "step": 28300 }, { "epoch": 0.8260615788705676, "grad_norm": 1.1133524034065478, "learning_rate": 8.620830285374759e-06, "loss": 0.5804, "step": 28305 }, { "epoch": 0.8262075003648037, "grad_norm": 0.9411745150253564, "learning_rate": 8.61492570763809e-06, "loss": 0.5521, "step": 28310 }, { "epoch": 0.8263534218590398, "grad_norm": 1.0599675669577968, "learning_rate": 8.609025527643686e-06, "loss": 0.5301, "step": 28315 }, { "epoch": 0.8264993433532759, "grad_norm": 1.0840057842616877, "learning_rate": 8.603129746765509e-06, "loss": 0.5521, "step": 28320 }, { "epoch": 0.826645264847512, "grad_norm": 1.0165467852720114, "learning_rate": 8.5972383663765e-06, "loss": 0.5052, "step": 28325 }, { "epoch": 0.8267911863417481, "grad_norm": 0.8989814474686102, "learning_rate": 8.591351387848587e-06, "loss": 0.5164, "step": 28330 }, { "epoch": 0.8269371078359843, "grad_norm": 0.9118256489721082, "learning_rate": 8.585468812552659e-06, "loss": 0.5644, "step": 28335 }, { "epoch": 0.8270830293302204, "grad_norm": 1.0532162727122696, "learning_rate": 8.579590641858573e-06, "loss": 0.5577, "step": 28340 }, { "epoch": 0.8272289508244565, "grad_norm": 1.257313215284296, "learning_rate": 8.573716877135183e-06, "loss": 0.5225, "step": 28345 }, { "epoch": 0.8273748723186926, "grad_norm": 1.0170258356679212, "learning_rate": 8.567847519750295e-06, "loss": 0.5631, "step": 28350 }, { "epoch": 0.8275207938129286, "grad_norm": 0.9929653597187704, "learning_rate": 8.561982571070712e-06, "loss": 0.5596, "step": 28355 }, { "epoch": 0.8276667153071647, "grad_norm": 1.0160876758399313, "learning_rate": 8.556122032462185e-06, "loss": 0.5724, "step": 28360 }, { "epoch": 0.8278126368014008, "grad_norm": 1.141845250660919, "learning_rate": 8.550265905289459e-06, "loss": 0.5187, "step": 28365 }, { "epoch": 0.8279585582956369, "grad_norm": 1.1030789903200593, "learning_rate": 8.544414190916233e-06, "loss": 0.5549, "step": 28370 }, { "epoch": 0.828104479789873, "grad_norm": 1.083840841464789, "learning_rate": 8.53856689070519e-06, "loss": 0.6088, "step": 28375 }, { "epoch": 0.8282504012841091, "grad_norm": 1.0822565265068411, "learning_rate": 8.532724006017992e-06, "loss": 0.513, "step": 28380 }, { "epoch": 0.8283963227783453, "grad_norm": 0.9196671456825369, "learning_rate": 8.526885538215253e-06, "loss": 0.5069, "step": 28385 }, { "epoch": 0.8285422442725814, "grad_norm": 1.2076770396945309, "learning_rate": 8.521051488656578e-06, "loss": 0.5434, "step": 28390 }, { "epoch": 0.8286881657668175, "grad_norm": 1.0448804213015241, "learning_rate": 8.515221858700526e-06, "loss": 0.5506, "step": 28395 }, { "epoch": 0.8288340872610536, "grad_norm": 1.1183658470134425, "learning_rate": 8.509396649704641e-06, "loss": 0.5518, "step": 28400 }, { "epoch": 0.8289800087552897, "grad_norm": 1.1529160634805338, "learning_rate": 8.503575863025433e-06, "loss": 0.5206, "step": 28405 }, { "epoch": 0.8291259302495257, "grad_norm": 0.9076697602378971, "learning_rate": 8.497759500018377e-06, "loss": 0.5025, "step": 28410 }, { "epoch": 0.8292718517437618, "grad_norm": 1.0777764594570893, "learning_rate": 8.491947562037922e-06, "loss": 0.5285, "step": 28415 }, { "epoch": 0.8294177732379979, "grad_norm": 1.0732513751571446, "learning_rate": 8.486140050437479e-06, "loss": 0.51, "step": 28420 }, { "epoch": 0.829563694732234, "grad_norm": 1.034793987011815, "learning_rate": 8.480336966569451e-06, "loss": 0.5097, "step": 28425 }, { "epoch": 0.8297096162264701, "grad_norm": 1.0730145674433134, "learning_rate": 8.474538311785188e-06, "loss": 0.5204, "step": 28430 }, { "epoch": 0.8298555377207063, "grad_norm": 1.0071562603218471, "learning_rate": 8.468744087435013e-06, "loss": 0.5266, "step": 28435 }, { "epoch": 0.8300014592149424, "grad_norm": 0.9491159383525267, "learning_rate": 8.462954294868223e-06, "loss": 0.5544, "step": 28440 }, { "epoch": 0.8301473807091785, "grad_norm": 1.0802805101983743, "learning_rate": 8.457168935433068e-06, "loss": 0.551, "step": 28445 }, { "epoch": 0.8302933022034146, "grad_norm": 1.1076149901445336, "learning_rate": 8.451388010476794e-06, "loss": 0.524, "step": 28450 }, { "epoch": 0.8304392236976507, "grad_norm": 1.0582825856257356, "learning_rate": 8.445611521345587e-06, "loss": 0.5551, "step": 28455 }, { "epoch": 0.8305851451918868, "grad_norm": 1.1098473641565707, "learning_rate": 8.439839469384614e-06, "loss": 0.5536, "step": 28460 }, { "epoch": 0.8307310666861228, "grad_norm": 1.0264150557442315, "learning_rate": 8.434071855937998e-06, "loss": 0.5425, "step": 28465 }, { "epoch": 0.8308769881803589, "grad_norm": 1.2411340187542772, "learning_rate": 8.428308682348845e-06, "loss": 0.5388, "step": 28470 }, { "epoch": 0.831022909674595, "grad_norm": 1.1225591692932502, "learning_rate": 8.422549949959216e-06, "loss": 0.5809, "step": 28475 }, { "epoch": 0.8311688311688312, "grad_norm": 1.1221056016126487, "learning_rate": 8.416795660110135e-06, "loss": 0.5534, "step": 28480 }, { "epoch": 0.8313147526630673, "grad_norm": 1.0054239316913236, "learning_rate": 8.411045814141598e-06, "loss": 0.5221, "step": 28485 }, { "epoch": 0.8314606741573034, "grad_norm": 0.96367337559212, "learning_rate": 8.40530041339256e-06, "loss": 0.5608, "step": 28490 }, { "epoch": 0.8316065956515395, "grad_norm": 1.149264757579184, "learning_rate": 8.399559459200955e-06, "loss": 0.5799, "step": 28495 }, { "epoch": 0.8317525171457756, "grad_norm": 1.1033586848665635, "learning_rate": 8.393822952903666e-06, "loss": 0.5437, "step": 28500 }, { "epoch": 0.8318984386400117, "grad_norm": 0.9867967183849311, "learning_rate": 8.388090895836542e-06, "loss": 0.544, "step": 28505 }, { "epoch": 0.8320443601342478, "grad_norm": 0.9994423911169564, "learning_rate": 8.382363289334402e-06, "loss": 0.5356, "step": 28510 }, { "epoch": 0.8321902816284839, "grad_norm": 0.8585144235536971, "learning_rate": 8.376640134731024e-06, "loss": 0.5004, "step": 28515 }, { "epoch": 0.8323362031227199, "grad_norm": 1.100973418503936, "learning_rate": 8.370921433359158e-06, "loss": 0.5689, "step": 28520 }, { "epoch": 0.832482124616956, "grad_norm": 1.10027922106769, "learning_rate": 8.365207186550507e-06, "loss": 0.5634, "step": 28525 }, { "epoch": 0.8326280461111922, "grad_norm": 0.8913974395967492, "learning_rate": 8.359497395635737e-06, "loss": 0.4718, "step": 28530 }, { "epoch": 0.8327739676054283, "grad_norm": 1.0219082854602857, "learning_rate": 8.35379206194448e-06, "loss": 0.5205, "step": 28535 }, { "epoch": 0.8329198890996644, "grad_norm": 0.9634836976040918, "learning_rate": 8.348091186805337e-06, "loss": 0.4809, "step": 28540 }, { "epoch": 0.8330658105939005, "grad_norm": 1.088646281085902, "learning_rate": 8.342394771545856e-06, "loss": 0.5374, "step": 28545 }, { "epoch": 0.8332117320881366, "grad_norm": 0.986038026809769, "learning_rate": 8.336702817492554e-06, "loss": 0.5203, "step": 28550 }, { "epoch": 0.8333576535823727, "grad_norm": 1.1086873720378472, "learning_rate": 8.331015325970912e-06, "loss": 0.5327, "step": 28555 }, { "epoch": 0.8335035750766088, "grad_norm": 0.9822926656397626, "learning_rate": 8.325332298305364e-06, "loss": 0.4861, "step": 28560 }, { "epoch": 0.8336494965708449, "grad_norm": 0.9687290575187417, "learning_rate": 8.319653735819314e-06, "loss": 0.5697, "step": 28565 }, { "epoch": 0.833795418065081, "grad_norm": 0.9589075781558747, "learning_rate": 8.313979639835115e-06, "loss": 0.5325, "step": 28570 }, { "epoch": 0.833941339559317, "grad_norm": 1.0331155781807098, "learning_rate": 8.3083100116741e-06, "loss": 0.5207, "step": 28575 }, { "epoch": 0.8340872610535532, "grad_norm": 1.059006857155687, "learning_rate": 8.30264485265653e-06, "loss": 0.5363, "step": 28580 }, { "epoch": 0.8342331825477893, "grad_norm": 1.0248103398743924, "learning_rate": 8.296984164101656e-06, "loss": 0.5517, "step": 28585 }, { "epoch": 0.8343791040420254, "grad_norm": 1.019010704777994, "learning_rate": 8.291327947327673e-06, "loss": 0.578, "step": 28590 }, { "epoch": 0.8345250255362615, "grad_norm": 1.0613855789308455, "learning_rate": 8.285676203651729e-06, "loss": 0.5535, "step": 28595 }, { "epoch": 0.8346709470304976, "grad_norm": 1.1123191800024483, "learning_rate": 8.280028934389947e-06, "loss": 0.5931, "step": 28600 }, { "epoch": 0.8348168685247337, "grad_norm": 0.9181602846127165, "learning_rate": 8.27438614085739e-06, "loss": 0.5171, "step": 28605 }, { "epoch": 0.8349627900189698, "grad_norm": 1.0824412807674593, "learning_rate": 8.268747824368098e-06, "loss": 0.4973, "step": 28610 }, { "epoch": 0.8351087115132059, "grad_norm": 1.146386789160925, "learning_rate": 8.263113986235053e-06, "loss": 0.5661, "step": 28615 }, { "epoch": 0.835254633007442, "grad_norm": 0.9747110943565919, "learning_rate": 8.257484627770198e-06, "loss": 0.5244, "step": 28620 }, { "epoch": 0.835400554501678, "grad_norm": 0.9233288735046112, "learning_rate": 8.25185975028444e-06, "loss": 0.4955, "step": 28625 }, { "epoch": 0.8355464759959143, "grad_norm": 1.050275123363343, "learning_rate": 8.246239355087629e-06, "loss": 0.5425, "step": 28630 }, { "epoch": 0.8356923974901503, "grad_norm": 0.9500637696242519, "learning_rate": 8.240623443488582e-06, "loss": 0.5454, "step": 28635 }, { "epoch": 0.8358383189843864, "grad_norm": 1.1290070942804287, "learning_rate": 8.23501201679507e-06, "loss": 0.5765, "step": 28640 }, { "epoch": 0.8359842404786225, "grad_norm": 1.0704560214774832, "learning_rate": 8.229405076313828e-06, "loss": 0.5862, "step": 28645 }, { "epoch": 0.8361301619728586, "grad_norm": 1.1897397905140032, "learning_rate": 8.223802623350517e-06, "loss": 0.5786, "step": 28650 }, { "epoch": 0.8362760834670947, "grad_norm": 1.0555545496877718, "learning_rate": 8.218204659209786e-06, "loss": 0.5486, "step": 28655 }, { "epoch": 0.8364220049613308, "grad_norm": 1.159109589314259, "learning_rate": 8.212611185195224e-06, "loss": 0.5714, "step": 28660 }, { "epoch": 0.8365679264555669, "grad_norm": 1.1027782964026718, "learning_rate": 8.207022202609371e-06, "loss": 0.5055, "step": 28665 }, { "epoch": 0.836713847949803, "grad_norm": 1.1051130134821823, "learning_rate": 8.201437712753738e-06, "loss": 0.5505, "step": 28670 }, { "epoch": 0.8368597694440391, "grad_norm": 1.0229282931915, "learning_rate": 8.195857716928759e-06, "loss": 0.4824, "step": 28675 }, { "epoch": 0.8370056909382753, "grad_norm": 1.0781369446828093, "learning_rate": 8.190282216433856e-06, "loss": 0.5272, "step": 28680 }, { "epoch": 0.8371516124325113, "grad_norm": 0.9882931415660549, "learning_rate": 8.184711212567384e-06, "loss": 0.5646, "step": 28685 }, { "epoch": 0.8372975339267474, "grad_norm": 1.03038850668961, "learning_rate": 8.179144706626653e-06, "loss": 0.529, "step": 28690 }, { "epoch": 0.8374434554209835, "grad_norm": 1.0291082492959558, "learning_rate": 8.173582699907929e-06, "loss": 0.5273, "step": 28695 }, { "epoch": 0.8375893769152196, "grad_norm": 0.9611983840125431, "learning_rate": 8.168025193706422e-06, "loss": 0.556, "step": 28700 }, { "epoch": 0.8377352984094557, "grad_norm": 1.1370373493521226, "learning_rate": 8.162472189316318e-06, "loss": 0.5355, "step": 28705 }, { "epoch": 0.8378812199036918, "grad_norm": 1.0119000872423265, "learning_rate": 8.156923688030719e-06, "loss": 0.5184, "step": 28710 }, { "epoch": 0.8380271413979279, "grad_norm": 1.1622485116258077, "learning_rate": 8.151379691141716e-06, "loss": 0.5407, "step": 28715 }, { "epoch": 0.838173062892164, "grad_norm": 1.0062494274676266, "learning_rate": 8.145840199940318e-06, "loss": 0.5152, "step": 28720 }, { "epoch": 0.8383189843864001, "grad_norm": 0.9899267035539158, "learning_rate": 8.140305215716504e-06, "loss": 0.5307, "step": 28725 }, { "epoch": 0.8384649058806363, "grad_norm": 1.0412525428999533, "learning_rate": 8.134774739759197e-06, "loss": 0.533, "step": 28730 }, { "epoch": 0.8386108273748724, "grad_norm": 1.0004537666802642, "learning_rate": 8.129248773356271e-06, "loss": 0.5007, "step": 28735 }, { "epoch": 0.8387567488691084, "grad_norm": 1.0829581922079066, "learning_rate": 8.12372731779456e-06, "loss": 0.5432, "step": 28740 }, { "epoch": 0.8389026703633445, "grad_norm": 0.9583958927864793, "learning_rate": 8.118210374359821e-06, "loss": 0.5093, "step": 28745 }, { "epoch": 0.8390485918575806, "grad_norm": 0.913992901106083, "learning_rate": 8.112697944336788e-06, "loss": 0.5101, "step": 28750 }, { "epoch": 0.8391945133518167, "grad_norm": 1.2014537477908251, "learning_rate": 8.107190029009132e-06, "loss": 0.5574, "step": 28755 }, { "epoch": 0.8393404348460528, "grad_norm": 1.0132131985581958, "learning_rate": 8.101686629659474e-06, "loss": 0.5017, "step": 28760 }, { "epoch": 0.8394863563402889, "grad_norm": 1.0710258587056813, "learning_rate": 8.09618774756939e-06, "loss": 0.5272, "step": 28765 }, { "epoch": 0.839632277834525, "grad_norm": 1.2474873027348299, "learning_rate": 8.090693384019383e-06, "loss": 0.5155, "step": 28770 }, { "epoch": 0.8397781993287611, "grad_norm": 1.20166957409939, "learning_rate": 8.085203540288929e-06, "loss": 0.5416, "step": 28775 }, { "epoch": 0.8399241208229973, "grad_norm": 1.114488990711099, "learning_rate": 8.07971821765643e-06, "loss": 0.5266, "step": 28780 }, { "epoch": 0.8400700423172334, "grad_norm": 1.0731360915713228, "learning_rate": 8.074237417399264e-06, "loss": 0.5445, "step": 28785 }, { "epoch": 0.8402159638114695, "grad_norm": 1.1664358796426688, "learning_rate": 8.068761140793715e-06, "loss": 0.5681, "step": 28790 }, { "epoch": 0.8403618853057055, "grad_norm": 1.1015874636305765, "learning_rate": 8.063289389115057e-06, "loss": 0.5387, "step": 28795 }, { "epoch": 0.8405078067999416, "grad_norm": 0.9549948952316141, "learning_rate": 8.057822163637476e-06, "loss": 0.5227, "step": 28800 }, { "epoch": 0.8406537282941777, "grad_norm": 1.029128241363529, "learning_rate": 8.052359465634122e-06, "loss": 0.5286, "step": 28805 }, { "epoch": 0.8407996497884138, "grad_norm": 0.9186237891163542, "learning_rate": 8.046901296377093e-06, "loss": 0.5178, "step": 28810 }, { "epoch": 0.8409455712826499, "grad_norm": 1.0339676015107346, "learning_rate": 8.041447657137411e-06, "loss": 0.582, "step": 28815 }, { "epoch": 0.841091492776886, "grad_norm": 1.0642172182405518, "learning_rate": 8.035998549185069e-06, "loss": 0.5635, "step": 28820 }, { "epoch": 0.8412374142711221, "grad_norm": 0.9247459867156482, "learning_rate": 8.030553973788985e-06, "loss": 0.5449, "step": 28825 }, { "epoch": 0.8413833357653583, "grad_norm": 1.224868752668849, "learning_rate": 8.025113932217043e-06, "loss": 0.5414, "step": 28830 }, { "epoch": 0.8415292572595944, "grad_norm": 0.9502953553082312, "learning_rate": 8.01967842573605e-06, "loss": 0.5298, "step": 28835 }, { "epoch": 0.8416751787538305, "grad_norm": 1.0874792108850762, "learning_rate": 8.014247455611764e-06, "loss": 0.5548, "step": 28840 }, { "epoch": 0.8418211002480666, "grad_norm": 0.9818337211906557, "learning_rate": 8.00882102310889e-06, "loss": 0.5296, "step": 28845 }, { "epoch": 0.8419670217423026, "grad_norm": 1.175721516795925, "learning_rate": 8.003399129491069e-06, "loss": 0.5581, "step": 28850 }, { "epoch": 0.8421129432365387, "grad_norm": 1.0470979128166304, "learning_rate": 7.997981776020907e-06, "loss": 0.5269, "step": 28855 }, { "epoch": 0.8422588647307748, "grad_norm": 1.1079233534035513, "learning_rate": 7.992568963959914e-06, "loss": 0.5355, "step": 28860 }, { "epoch": 0.8424047862250109, "grad_norm": 0.8833179696592207, "learning_rate": 7.987160694568576e-06, "loss": 0.5415, "step": 28865 }, { "epoch": 0.842550707719247, "grad_norm": 0.9494362405020401, "learning_rate": 7.98175696910631e-06, "loss": 0.4982, "step": 28870 }, { "epoch": 0.8426966292134831, "grad_norm": 1.2477074151620577, "learning_rate": 7.976357788831468e-06, "loss": 0.5826, "step": 28875 }, { "epoch": 0.8428425507077193, "grad_norm": 0.9811005680142201, "learning_rate": 7.970963155001362e-06, "loss": 0.5423, "step": 28880 }, { "epoch": 0.8429884722019554, "grad_norm": 1.0398149955487936, "learning_rate": 7.965573068872218e-06, "loss": 0.5023, "step": 28885 }, { "epoch": 0.8431343936961915, "grad_norm": 1.004025921231077, "learning_rate": 7.96018753169923e-06, "loss": 0.5453, "step": 28890 }, { "epoch": 0.8432803151904276, "grad_norm": 1.039886239170758, "learning_rate": 7.954806544736513e-06, "loss": 0.5124, "step": 28895 }, { "epoch": 0.8434262366846637, "grad_norm": 1.0107755703127719, "learning_rate": 7.949430109237137e-06, "loss": 0.5231, "step": 28900 }, { "epoch": 0.8435721581788997, "grad_norm": 0.990302120857384, "learning_rate": 7.944058226453102e-06, "loss": 0.5486, "step": 28905 }, { "epoch": 0.8437180796731358, "grad_norm": 1.0223309498940814, "learning_rate": 7.938690897635353e-06, "loss": 0.5899, "step": 28910 }, { "epoch": 0.8438640011673719, "grad_norm": 0.9605595533318095, "learning_rate": 7.933328124033771e-06, "loss": 0.5083, "step": 28915 }, { "epoch": 0.844009922661608, "grad_norm": 0.9504214537266125, "learning_rate": 7.927969906897173e-06, "loss": 0.5552, "step": 28920 }, { "epoch": 0.8441558441558441, "grad_norm": 1.0211576402563416, "learning_rate": 7.922616247473332e-06, "loss": 0.5527, "step": 28925 }, { "epoch": 0.8443017656500803, "grad_norm": 0.9926792275004087, "learning_rate": 7.91726714700894e-06, "loss": 0.5095, "step": 28930 }, { "epoch": 0.8444476871443164, "grad_norm": 1.0975816362819006, "learning_rate": 7.911922606749634e-06, "loss": 0.5363, "step": 28935 }, { "epoch": 0.8445936086385525, "grad_norm": 0.9984023285668479, "learning_rate": 7.906582627939994e-06, "loss": 0.5522, "step": 28940 }, { "epoch": 0.8447395301327886, "grad_norm": 1.043262724859837, "learning_rate": 7.901247211823528e-06, "loss": 0.5286, "step": 28945 }, { "epoch": 0.8448854516270247, "grad_norm": 0.9674899976136536, "learning_rate": 7.895916359642699e-06, "loss": 0.4654, "step": 28950 }, { "epoch": 0.8450313731212608, "grad_norm": 0.824003242529688, "learning_rate": 7.890590072638877e-06, "loss": 0.5189, "step": 28955 }, { "epoch": 0.8451772946154968, "grad_norm": 1.0577531787174204, "learning_rate": 7.885268352052402e-06, "loss": 0.5382, "step": 28960 }, { "epoch": 0.8453232161097329, "grad_norm": 1.0361514509787901, "learning_rate": 7.87995119912253e-06, "loss": 0.5172, "step": 28965 }, { "epoch": 0.845469137603969, "grad_norm": 1.188232480309185, "learning_rate": 7.874638615087462e-06, "loss": 0.5337, "step": 28970 }, { "epoch": 0.8456150590982051, "grad_norm": 1.0091505899203395, "learning_rate": 7.869330601184336e-06, "loss": 0.5618, "step": 28975 }, { "epoch": 0.8457609805924413, "grad_norm": 1.0137750187902712, "learning_rate": 7.864027158649215e-06, "loss": 0.5584, "step": 28980 }, { "epoch": 0.8459069020866774, "grad_norm": 0.992773766493352, "learning_rate": 7.858728288717109e-06, "loss": 0.5119, "step": 28985 }, { "epoch": 0.8460528235809135, "grad_norm": 0.920133439807406, "learning_rate": 7.853433992621953e-06, "loss": 0.5336, "step": 28990 }, { "epoch": 0.8461987450751496, "grad_norm": 0.8635377933533684, "learning_rate": 7.848144271596631e-06, "loss": 0.5141, "step": 28995 }, { "epoch": 0.8463446665693857, "grad_norm": 0.9954243159172684, "learning_rate": 7.842859126872949e-06, "loss": 0.5689, "step": 29000 }, { "epoch": 0.8464905880636218, "grad_norm": 1.0335549413132776, "learning_rate": 7.837578559681652e-06, "loss": 0.5354, "step": 29005 }, { "epoch": 0.8466365095578579, "grad_norm": 1.0131267707665483, "learning_rate": 7.832302571252418e-06, "loss": 0.5459, "step": 29010 }, { "epoch": 0.8467824310520939, "grad_norm": 0.9532925750661183, "learning_rate": 7.82703116281386e-06, "loss": 0.528, "step": 29015 }, { "epoch": 0.84692835254633, "grad_norm": 1.0011634479318117, "learning_rate": 7.821764335593524e-06, "loss": 0.4985, "step": 29020 }, { "epoch": 0.8470742740405661, "grad_norm": 0.916946067830353, "learning_rate": 7.816502090817891e-06, "loss": 0.4593, "step": 29025 }, { "epoch": 0.8472201955348023, "grad_norm": 1.2908915911985754, "learning_rate": 7.811244429712372e-06, "loss": 0.5747, "step": 29030 }, { "epoch": 0.8473661170290384, "grad_norm": 1.0475907074270305, "learning_rate": 7.805991353501303e-06, "loss": 0.5589, "step": 29035 }, { "epoch": 0.8475120385232745, "grad_norm": 1.0050993561620114, "learning_rate": 7.800742863407978e-06, "loss": 0.5374, "step": 29040 }, { "epoch": 0.8476579600175106, "grad_norm": 0.9157725715882323, "learning_rate": 7.795498960654592e-06, "loss": 0.5076, "step": 29045 }, { "epoch": 0.8478038815117467, "grad_norm": 0.9866826543310594, "learning_rate": 7.790259646462291e-06, "loss": 0.5622, "step": 29050 }, { "epoch": 0.8479498030059828, "grad_norm": 0.8709154790155849, "learning_rate": 7.785024922051148e-06, "loss": 0.5464, "step": 29055 }, { "epoch": 0.8480957245002189, "grad_norm": 1.0186272960006317, "learning_rate": 7.77979478864016e-06, "loss": 0.5288, "step": 29060 }, { "epoch": 0.848241645994455, "grad_norm": 1.181732058870617, "learning_rate": 7.77456924744727e-06, "loss": 0.5018, "step": 29065 }, { "epoch": 0.848387567488691, "grad_norm": 0.9589909165194724, "learning_rate": 7.769348299689342e-06, "loss": 0.5384, "step": 29070 }, { "epoch": 0.8485334889829271, "grad_norm": 0.8477710164307056, "learning_rate": 7.764131946582164e-06, "loss": 0.5258, "step": 29075 }, { "epoch": 0.8486794104771633, "grad_norm": 0.9208784431749927, "learning_rate": 7.758920189340462e-06, "loss": 0.5413, "step": 29080 }, { "epoch": 0.8488253319713994, "grad_norm": 1.2303515323518492, "learning_rate": 7.753713029177899e-06, "loss": 0.5726, "step": 29085 }, { "epoch": 0.8489712534656355, "grad_norm": 0.9496198275718524, "learning_rate": 7.748510467307055e-06, "loss": 0.5267, "step": 29090 }, { "epoch": 0.8491171749598716, "grad_norm": 0.9965271908352583, "learning_rate": 7.743312504939442e-06, "loss": 0.5863, "step": 29095 }, { "epoch": 0.8492630964541077, "grad_norm": 1.099989548761296, "learning_rate": 7.738119143285511e-06, "loss": 0.5207, "step": 29100 }, { "epoch": 0.8494090179483438, "grad_norm": 0.9593993920346373, "learning_rate": 7.732930383554613e-06, "loss": 0.5005, "step": 29105 }, { "epoch": 0.8495549394425799, "grad_norm": 1.0007556620337354, "learning_rate": 7.727746226955071e-06, "loss": 0.5301, "step": 29110 }, { "epoch": 0.849700860936816, "grad_norm": 0.9979739404571979, "learning_rate": 7.7225666746941e-06, "loss": 0.5144, "step": 29115 }, { "epoch": 0.849846782431052, "grad_norm": 1.0030392955243845, "learning_rate": 7.71739172797786e-06, "loss": 0.5519, "step": 29120 }, { "epoch": 0.8499927039252881, "grad_norm": 0.8989583501412702, "learning_rate": 7.712221388011432e-06, "loss": 0.5305, "step": 29125 }, { "epoch": 0.8501386254195243, "grad_norm": 1.0627584780620454, "learning_rate": 7.707055655998821e-06, "loss": 0.5292, "step": 29130 }, { "epoch": 0.8502845469137604, "grad_norm": 1.251002920468956, "learning_rate": 7.701894533142974e-06, "loss": 0.5609, "step": 29135 }, { "epoch": 0.8504304684079965, "grad_norm": 0.9849854961762665, "learning_rate": 7.696738020645751e-06, "loss": 0.5382, "step": 29140 }, { "epoch": 0.8505763899022326, "grad_norm": 1.0427387168596631, "learning_rate": 7.691586119707942e-06, "loss": 0.5319, "step": 29145 }, { "epoch": 0.8507223113964687, "grad_norm": 1.1278618413710044, "learning_rate": 7.686438831529258e-06, "loss": 0.6103, "step": 29150 }, { "epoch": 0.8508682328907048, "grad_norm": 1.057858831586323, "learning_rate": 7.681296157308353e-06, "loss": 0.5405, "step": 29155 }, { "epoch": 0.8510141543849409, "grad_norm": 1.0191740421522106, "learning_rate": 7.676158098242786e-06, "loss": 0.5173, "step": 29160 }, { "epoch": 0.851160075879177, "grad_norm": 0.9951918045696453, "learning_rate": 7.671024655529053e-06, "loss": 0.5042, "step": 29165 }, { "epoch": 0.8513059973734131, "grad_norm": 0.9846268500510177, "learning_rate": 7.66589583036257e-06, "loss": 0.563, "step": 29170 }, { "epoch": 0.8514519188676493, "grad_norm": 0.9567677584071608, "learning_rate": 7.660771623937676e-06, "loss": 0.5053, "step": 29175 }, { "epoch": 0.8515978403618853, "grad_norm": 0.9677484857063607, "learning_rate": 7.655652037447649e-06, "loss": 0.5435, "step": 29180 }, { "epoch": 0.8517437618561214, "grad_norm": 1.4106466861289337, "learning_rate": 7.65053707208467e-06, "loss": 0.6132, "step": 29185 }, { "epoch": 0.8518896833503575, "grad_norm": 0.9950799720350395, "learning_rate": 7.645426729039864e-06, "loss": 0.5199, "step": 29190 }, { "epoch": 0.8520356048445936, "grad_norm": 1.0698083048875968, "learning_rate": 7.64032100950326e-06, "loss": 0.5364, "step": 29195 }, { "epoch": 0.8521815263388297, "grad_norm": 1.0565794819331658, "learning_rate": 7.635219914663825e-06, "loss": 0.5511, "step": 29200 }, { "epoch": 0.8523274478330658, "grad_norm": 1.096010294736017, "learning_rate": 7.630123445709445e-06, "loss": 0.5493, "step": 29205 }, { "epoch": 0.8524733693273019, "grad_norm": 1.098027658757928, "learning_rate": 7.625031603826924e-06, "loss": 0.5896, "step": 29210 }, { "epoch": 0.852619290821538, "grad_norm": 0.9774815360167062, "learning_rate": 7.619944390201997e-06, "loss": 0.4918, "step": 29215 }, { "epoch": 0.8527652123157741, "grad_norm": 1.0660118285118299, "learning_rate": 7.614861806019312e-06, "loss": 0.5297, "step": 29220 }, { "epoch": 0.8529111338100103, "grad_norm": 1.105076660124108, "learning_rate": 7.60978385246245e-06, "loss": 0.4709, "step": 29225 }, { "epoch": 0.8530570553042464, "grad_norm": 0.9820736367481939, "learning_rate": 7.604710530713904e-06, "loss": 0.5087, "step": 29230 }, { "epoch": 0.8532029767984824, "grad_norm": 0.9671971371307803, "learning_rate": 7.599641841955091e-06, "loss": 0.5434, "step": 29235 }, { "epoch": 0.8533488982927185, "grad_norm": 1.2751315450238296, "learning_rate": 7.594577787366353e-06, "loss": 0.5597, "step": 29240 }, { "epoch": 0.8534948197869546, "grad_norm": 1.031491900253608, "learning_rate": 7.5895183681269385e-06, "loss": 0.5039, "step": 29245 }, { "epoch": 0.8536407412811907, "grad_norm": 1.1107097811299178, "learning_rate": 7.584463585415044e-06, "loss": 0.5452, "step": 29250 }, { "epoch": 0.8537866627754268, "grad_norm": 1.0858412343028454, "learning_rate": 7.579413440407757e-06, "loss": 0.5312, "step": 29255 }, { "epoch": 0.8539325842696629, "grad_norm": 0.9569172340780231, "learning_rate": 7.574367934281112e-06, "loss": 0.5183, "step": 29260 }, { "epoch": 0.854078505763899, "grad_norm": 0.9439550370251739, "learning_rate": 7.56932706821003e-06, "loss": 0.5064, "step": 29265 }, { "epoch": 0.8542244272581351, "grad_norm": 1.0566462796605756, "learning_rate": 7.564290843368388e-06, "loss": 0.5768, "step": 29270 }, { "epoch": 0.8543703487523713, "grad_norm": 1.0767325754111787, "learning_rate": 7.5592592609289595e-06, "loss": 0.5666, "step": 29275 }, { "epoch": 0.8545162702466074, "grad_norm": 0.971360966282966, "learning_rate": 7.554232322063436e-06, "loss": 0.5559, "step": 29280 }, { "epoch": 0.8546621917408435, "grad_norm": 0.9725763079953507, "learning_rate": 7.549210027942448e-06, "loss": 0.491, "step": 29285 }, { "epoch": 0.8548081132350795, "grad_norm": 0.9526262228538975, "learning_rate": 7.544192379735515e-06, "loss": 0.5534, "step": 29290 }, { "epoch": 0.8549540347293156, "grad_norm": 0.9350550607996951, "learning_rate": 7.5391793786111e-06, "loss": 0.5389, "step": 29295 }, { "epoch": 0.8550999562235517, "grad_norm": 1.1512225106044671, "learning_rate": 7.534171025736572e-06, "loss": 0.5555, "step": 29300 }, { "epoch": 0.8552458777177878, "grad_norm": 0.9629784255139604, "learning_rate": 7.52916732227822e-06, "loss": 0.5782, "step": 29305 }, { "epoch": 0.8553917992120239, "grad_norm": 1.1062524292030902, "learning_rate": 7.524168269401247e-06, "loss": 0.5439, "step": 29310 }, { "epoch": 0.85553772070626, "grad_norm": 1.0406242052793033, "learning_rate": 7.519173868269773e-06, "loss": 0.502, "step": 29315 }, { "epoch": 0.8556836422004961, "grad_norm": 0.8514247689166575, "learning_rate": 7.514184120046848e-06, "loss": 0.5052, "step": 29320 }, { "epoch": 0.8558295636947323, "grad_norm": 0.9981273701994329, "learning_rate": 7.509199025894416e-06, "loss": 0.5002, "step": 29325 }, { "epoch": 0.8559754851889684, "grad_norm": 0.9299183496272749, "learning_rate": 7.504218586973362e-06, "loss": 0.5689, "step": 29330 }, { "epoch": 0.8561214066832045, "grad_norm": 0.9649267101768552, "learning_rate": 7.4992428044434615e-06, "loss": 0.4939, "step": 29335 }, { "epoch": 0.8562673281774406, "grad_norm": 1.0297776506641034, "learning_rate": 7.4942716794634285e-06, "loss": 0.5825, "step": 29340 }, { "epoch": 0.8564132496716766, "grad_norm": 1.0835927166519497, "learning_rate": 7.489305213190878e-06, "loss": 0.5747, "step": 29345 }, { "epoch": 0.8565591711659127, "grad_norm": 1.1930106256009805, "learning_rate": 7.4843434067823406e-06, "loss": 0.639, "step": 29350 }, { "epoch": 0.8567050926601488, "grad_norm": 0.9035318334904757, "learning_rate": 7.479386261393277e-06, "loss": 0.4922, "step": 29355 }, { "epoch": 0.8568510141543849, "grad_norm": 1.1758228799907051, "learning_rate": 7.474433778178036e-06, "loss": 0.5867, "step": 29360 }, { "epoch": 0.856996935648621, "grad_norm": 1.1377733620064063, "learning_rate": 7.469485958289909e-06, "loss": 0.4864, "step": 29365 }, { "epoch": 0.8571428571428571, "grad_norm": 0.9786212987924846, "learning_rate": 7.464542802881081e-06, "loss": 0.5254, "step": 29370 }, { "epoch": 0.8572887786370933, "grad_norm": 1.2119135254173408, "learning_rate": 7.4596043131026566e-06, "loss": 0.5174, "step": 29375 }, { "epoch": 0.8574347001313294, "grad_norm": 0.9651031770906952, "learning_rate": 7.454670490104667e-06, "loss": 0.5086, "step": 29380 }, { "epoch": 0.8575806216255655, "grad_norm": 1.1193861403779517, "learning_rate": 7.449741335036026e-06, "loss": 0.5154, "step": 29385 }, { "epoch": 0.8577265431198016, "grad_norm": 1.1527638564244398, "learning_rate": 7.444816849044597e-06, "loss": 0.5984, "step": 29390 }, { "epoch": 0.8578724646140377, "grad_norm": 0.8773588572099367, "learning_rate": 7.439897033277126e-06, "loss": 0.481, "step": 29395 }, { "epoch": 0.8580183861082737, "grad_norm": 0.8357452491954406, "learning_rate": 7.4349818888793e-06, "loss": 0.4871, "step": 29400 }, { "epoch": 0.8581643076025098, "grad_norm": 0.9615751916353773, "learning_rate": 7.43007141699568e-06, "loss": 0.5532, "step": 29405 }, { "epoch": 0.8583102290967459, "grad_norm": 1.0489755749147818, "learning_rate": 7.42516561876978e-06, "loss": 0.5293, "step": 29410 }, { "epoch": 0.858456150590982, "grad_norm": 1.04571543714412, "learning_rate": 7.420264495343999e-06, "loss": 0.5052, "step": 29415 }, { "epoch": 0.8586020720852181, "grad_norm": 0.9461085453647297, "learning_rate": 7.415368047859651e-06, "loss": 0.5069, "step": 29420 }, { "epoch": 0.8587479935794543, "grad_norm": 0.9693742427159968, "learning_rate": 7.4104762774569775e-06, "loss": 0.548, "step": 29425 }, { "epoch": 0.8588939150736904, "grad_norm": 1.1795387010120408, "learning_rate": 7.405589185275105e-06, "loss": 0.5123, "step": 29430 }, { "epoch": 0.8590398365679265, "grad_norm": 0.9182819818812419, "learning_rate": 7.4007067724520925e-06, "loss": 0.5047, "step": 29435 }, { "epoch": 0.8591857580621626, "grad_norm": 1.1434935929700647, "learning_rate": 7.395829040124899e-06, "loss": 0.5712, "step": 29440 }, { "epoch": 0.8593316795563987, "grad_norm": 0.9869355446577552, "learning_rate": 7.390955989429392e-06, "loss": 0.5322, "step": 29445 }, { "epoch": 0.8594776010506348, "grad_norm": 0.9635511933708542, "learning_rate": 7.386087621500362e-06, "loss": 0.5117, "step": 29450 }, { "epoch": 0.8596235225448708, "grad_norm": 0.9698633334364771, "learning_rate": 7.381223937471488e-06, "loss": 0.5118, "step": 29455 }, { "epoch": 0.8597694440391069, "grad_norm": 1.1973984980916046, "learning_rate": 7.376364938475378e-06, "loss": 0.5647, "step": 29460 }, { "epoch": 0.859915365533343, "grad_norm": 0.9986681878543255, "learning_rate": 7.371510625643532e-06, "loss": 0.529, "step": 29465 }, { "epoch": 0.8600612870275791, "grad_norm": 0.9926797038063817, "learning_rate": 7.366661000106378e-06, "loss": 0.475, "step": 29470 }, { "epoch": 0.8602072085218153, "grad_norm": 1.0370216750595327, "learning_rate": 7.361816062993236e-06, "loss": 0.5075, "step": 29475 }, { "epoch": 0.8603531300160514, "grad_norm": 0.9174706267697447, "learning_rate": 7.35697581543234e-06, "loss": 0.5385, "step": 29480 }, { "epoch": 0.8604990515102875, "grad_norm": 1.1573158443910245, "learning_rate": 7.352140258550835e-06, "loss": 0.5261, "step": 29485 }, { "epoch": 0.8606449730045236, "grad_norm": 1.0980753393416467, "learning_rate": 7.347309393474763e-06, "loss": 0.5769, "step": 29490 }, { "epoch": 0.8607908944987597, "grad_norm": 0.9298358491377472, "learning_rate": 7.3424832213290965e-06, "loss": 0.5121, "step": 29495 }, { "epoch": 0.8609368159929958, "grad_norm": 1.0873244015164854, "learning_rate": 7.33766174323768e-06, "loss": 0.5138, "step": 29500 }, { "epoch": 0.8610827374872319, "grad_norm": 1.0637996349861991, "learning_rate": 7.3328449603233e-06, "loss": 0.5663, "step": 29505 }, { "epoch": 0.8612286589814679, "grad_norm": 0.8792711426318676, "learning_rate": 7.328032873707626e-06, "loss": 0.5573, "step": 29510 }, { "epoch": 0.861374580475704, "grad_norm": 1.0346410026508894, "learning_rate": 7.323225484511246e-06, "loss": 0.5422, "step": 29515 }, { "epoch": 0.8615205019699401, "grad_norm": 1.196555233866233, "learning_rate": 7.3184227938536534e-06, "loss": 0.5513, "step": 29520 }, { "epoch": 0.8616664234641763, "grad_norm": 0.9908639897417755, "learning_rate": 7.313624802853241e-06, "loss": 0.5878, "step": 29525 }, { "epoch": 0.8618123449584124, "grad_norm": 0.8701600215744093, "learning_rate": 7.308831512627308e-06, "loss": 0.4933, "step": 29530 }, { "epoch": 0.8619582664526485, "grad_norm": 1.0571731477392725, "learning_rate": 7.30404292429206e-06, "loss": 0.4879, "step": 29535 }, { "epoch": 0.8621041879468846, "grad_norm": 0.9584279710696191, "learning_rate": 7.2992590389626195e-06, "loss": 0.5681, "step": 29540 }, { "epoch": 0.8622501094411207, "grad_norm": 1.0962913336649283, "learning_rate": 7.2944798577529965e-06, "loss": 0.5326, "step": 29545 }, { "epoch": 0.8623960309353568, "grad_norm": 1.1071142582479432, "learning_rate": 7.289705381776113e-06, "loss": 0.5746, "step": 29550 }, { "epoch": 0.8625419524295929, "grad_norm": 0.9696910815006976, "learning_rate": 7.284935612143795e-06, "loss": 0.5628, "step": 29555 }, { "epoch": 0.862687873923829, "grad_norm": 0.9200833296536041, "learning_rate": 7.2801705499667695e-06, "loss": 0.522, "step": 29560 }, { "epoch": 0.862833795418065, "grad_norm": 1.1338086664391194, "learning_rate": 7.275410196354675e-06, "loss": 0.5725, "step": 29565 }, { "epoch": 0.8629797169123011, "grad_norm": 0.944663776974379, "learning_rate": 7.270654552416049e-06, "loss": 0.5263, "step": 29570 }, { "epoch": 0.8631256384065373, "grad_norm": 0.9883047085687345, "learning_rate": 7.26590361925833e-06, "loss": 0.5727, "step": 29575 }, { "epoch": 0.8632715599007734, "grad_norm": 0.9492780244695722, "learning_rate": 7.2611573979878585e-06, "loss": 0.5263, "step": 29580 }, { "epoch": 0.8634174813950095, "grad_norm": 1.1940330377591961, "learning_rate": 7.256415889709885e-06, "loss": 0.5638, "step": 29585 }, { "epoch": 0.8635634028892456, "grad_norm": 0.8963232815679787, "learning_rate": 7.25167909552856e-06, "loss": 0.5048, "step": 29590 }, { "epoch": 0.8637093243834817, "grad_norm": 1.0786162624483215, "learning_rate": 7.24694701654693e-06, "loss": 0.5413, "step": 29595 }, { "epoch": 0.8638552458777178, "grad_norm": 1.0005664536563534, "learning_rate": 7.2422196538669525e-06, "loss": 0.5115, "step": 29600 }, { "epoch": 0.8640011673719539, "grad_norm": 0.9931593721585926, "learning_rate": 7.237497008589474e-06, "loss": 0.512, "step": 29605 }, { "epoch": 0.86414708886619, "grad_norm": 1.0513484033634384, "learning_rate": 7.232779081814259e-06, "loss": 0.5588, "step": 29610 }, { "epoch": 0.864293010360426, "grad_norm": 1.0415464286457292, "learning_rate": 7.228065874639964e-06, "loss": 0.5176, "step": 29615 }, { "epoch": 0.8644389318546621, "grad_norm": 0.9109553476060037, "learning_rate": 7.223357388164144e-06, "loss": 0.5474, "step": 29620 }, { "epoch": 0.8645848533488983, "grad_norm": 1.1251893142339733, "learning_rate": 7.218653623483259e-06, "loss": 0.5073, "step": 29625 }, { "epoch": 0.8647307748431344, "grad_norm": 0.9834942075279803, "learning_rate": 7.213954581692664e-06, "loss": 0.46, "step": 29630 }, { "epoch": 0.8648766963373705, "grad_norm": 0.9584303757006949, "learning_rate": 7.2092602638866285e-06, "loss": 0.4925, "step": 29635 }, { "epoch": 0.8650226178316066, "grad_norm": 1.2057430723260245, "learning_rate": 7.204570671158305e-06, "loss": 0.5172, "step": 29640 }, { "epoch": 0.8651685393258427, "grad_norm": 1.06932847983272, "learning_rate": 7.199885804599757e-06, "loss": 0.5282, "step": 29645 }, { "epoch": 0.8653144608200788, "grad_norm": 0.8615385000288169, "learning_rate": 7.195205665301937e-06, "loss": 0.4743, "step": 29650 }, { "epoch": 0.8654603823143149, "grad_norm": 1.1677842032916155, "learning_rate": 7.190530254354709e-06, "loss": 0.5757, "step": 29655 }, { "epoch": 0.865606303808551, "grad_norm": 0.9709761593785842, "learning_rate": 7.1858595728468275e-06, "loss": 0.5458, "step": 29660 }, { "epoch": 0.8657522253027871, "grad_norm": 1.031251773091452, "learning_rate": 7.181193621865948e-06, "loss": 0.576, "step": 29665 }, { "epoch": 0.8658981467970231, "grad_norm": 1.0651348781347871, "learning_rate": 7.176532402498626e-06, "loss": 0.5243, "step": 29670 }, { "epoch": 0.8660440682912594, "grad_norm": 0.9553451754803618, "learning_rate": 7.171875915830305e-06, "loss": 0.5445, "step": 29675 }, { "epoch": 0.8661899897854954, "grad_norm": 0.9830341963690129, "learning_rate": 7.167224162945346e-06, "loss": 0.5518, "step": 29680 }, { "epoch": 0.8663359112797315, "grad_norm": 1.1750194195071062, "learning_rate": 7.162577144926996e-06, "loss": 0.5083, "step": 29685 }, { "epoch": 0.8664818327739676, "grad_norm": 0.9967875008019401, "learning_rate": 7.157934862857392e-06, "loss": 0.5868, "step": 29690 }, { "epoch": 0.8666277542682037, "grad_norm": 1.1928715392703513, "learning_rate": 7.153297317817578e-06, "loss": 0.5934, "step": 29695 }, { "epoch": 0.8667736757624398, "grad_norm": 0.9750855717417999, "learning_rate": 7.148664510887497e-06, "loss": 0.5518, "step": 29700 }, { "epoch": 0.8669195972566759, "grad_norm": 0.9146764263237812, "learning_rate": 7.1440364431459835e-06, "loss": 0.5112, "step": 29705 }, { "epoch": 0.867065518750912, "grad_norm": 0.9199484514055865, "learning_rate": 7.139413115670769e-06, "loss": 0.4639, "step": 29710 }, { "epoch": 0.8672114402451481, "grad_norm": 1.140457294429192, "learning_rate": 7.134794529538484e-06, "loss": 0.4956, "step": 29715 }, { "epoch": 0.8673573617393842, "grad_norm": 0.9739593566755906, "learning_rate": 7.130180685824643e-06, "loss": 0.5628, "step": 29720 }, { "epoch": 0.8675032832336204, "grad_norm": 0.9924272179574128, "learning_rate": 7.125571585603677e-06, "loss": 0.4985, "step": 29725 }, { "epoch": 0.8676492047278564, "grad_norm": 0.9485921433147932, "learning_rate": 7.120967229948897e-06, "loss": 0.505, "step": 29730 }, { "epoch": 0.8677951262220925, "grad_norm": 1.1359561736908115, "learning_rate": 7.116367619932513e-06, "loss": 0.5168, "step": 29735 }, { "epoch": 0.8679410477163286, "grad_norm": 0.9757973307698785, "learning_rate": 7.1117727566256305e-06, "loss": 0.5353, "step": 29740 }, { "epoch": 0.8680869692105647, "grad_norm": 0.8331331945573113, "learning_rate": 7.107182641098243e-06, "loss": 0.5122, "step": 29745 }, { "epoch": 0.8682328907048008, "grad_norm": 0.9916706961854886, "learning_rate": 7.1025972744192555e-06, "loss": 0.5731, "step": 29750 }, { "epoch": 0.8683788121990369, "grad_norm": 0.9894609679733036, "learning_rate": 7.098016657656451e-06, "loss": 0.5142, "step": 29755 }, { "epoch": 0.868524733693273, "grad_norm": 1.0143109850872247, "learning_rate": 7.093440791876513e-06, "loss": 0.4951, "step": 29760 }, { "epoch": 0.8686706551875091, "grad_norm": 0.9632059995792072, "learning_rate": 7.088869678145011e-06, "loss": 0.5362, "step": 29765 }, { "epoch": 0.8688165766817452, "grad_norm": 1.0005181321903434, "learning_rate": 7.084303317526423e-06, "loss": 0.5181, "step": 29770 }, { "epoch": 0.8689624981759814, "grad_norm": 0.8704204772259626, "learning_rate": 7.079741711084107e-06, "loss": 0.4972, "step": 29775 }, { "epoch": 0.8691084196702175, "grad_norm": 0.9274588543343439, "learning_rate": 7.075184859880321e-06, "loss": 0.5346, "step": 29780 }, { "epoch": 0.8692543411644535, "grad_norm": 0.9092844997181421, "learning_rate": 7.070632764976209e-06, "loss": 0.5339, "step": 29785 }, { "epoch": 0.8694002626586896, "grad_norm": 0.9231899287060015, "learning_rate": 7.0660854274318095e-06, "loss": 0.4998, "step": 29790 }, { "epoch": 0.8695461841529257, "grad_norm": 0.9455069001389227, "learning_rate": 7.061542848306063e-06, "loss": 0.5217, "step": 29795 }, { "epoch": 0.8696921056471618, "grad_norm": 0.9226647398019713, "learning_rate": 7.057005028656791e-06, "loss": 0.5024, "step": 29800 }, { "epoch": 0.8698380271413979, "grad_norm": 1.1095454945608603, "learning_rate": 7.052471969540707e-06, "loss": 0.5892, "step": 29805 }, { "epoch": 0.869983948635634, "grad_norm": 1.0478962505815896, "learning_rate": 7.047943672013419e-06, "loss": 0.568, "step": 29810 }, { "epoch": 0.8701298701298701, "grad_norm": 1.0584558024284247, "learning_rate": 7.043420137129422e-06, "loss": 0.5413, "step": 29815 }, { "epoch": 0.8702757916241062, "grad_norm": 0.9954731433904819, "learning_rate": 7.0389013659421135e-06, "loss": 0.5306, "step": 29820 }, { "epoch": 0.8704217131183424, "grad_norm": 0.9457259107495466, "learning_rate": 7.034387359503766e-06, "loss": 0.5554, "step": 29825 }, { "epoch": 0.8705676346125785, "grad_norm": 0.8954323724038272, "learning_rate": 7.0298781188655625e-06, "loss": 0.5393, "step": 29830 }, { "epoch": 0.8707135561068146, "grad_norm": 1.086648646672613, "learning_rate": 7.025373645077549e-06, "loss": 0.5881, "step": 29835 }, { "epoch": 0.8708594776010506, "grad_norm": 1.026161268402807, "learning_rate": 7.020873939188686e-06, "loss": 0.5475, "step": 29840 }, { "epoch": 0.8710053990952867, "grad_norm": 0.9888220065075831, "learning_rate": 7.016379002246812e-06, "loss": 0.5278, "step": 29845 }, { "epoch": 0.8711513205895228, "grad_norm": 1.2230658721543257, "learning_rate": 7.011888835298655e-06, "loss": 0.5781, "step": 29850 }, { "epoch": 0.8712972420837589, "grad_norm": 0.9751594696089496, "learning_rate": 7.007403439389835e-06, "loss": 0.5593, "step": 29855 }, { "epoch": 0.871443163577995, "grad_norm": 1.072942579304287, "learning_rate": 7.002922815564858e-06, "loss": 0.5874, "step": 29860 }, { "epoch": 0.8715890850722311, "grad_norm": 1.111257371597716, "learning_rate": 6.998446964867125e-06, "loss": 0.5341, "step": 29865 }, { "epoch": 0.8717350065664672, "grad_norm": 1.221842096238057, "learning_rate": 6.993975888338924e-06, "loss": 0.5335, "step": 29870 }, { "epoch": 0.8718809280607034, "grad_norm": 1.0993164854861834, "learning_rate": 6.989509587021421e-06, "loss": 0.55, "step": 29875 }, { "epoch": 0.8720268495549395, "grad_norm": 0.8740989334193764, "learning_rate": 6.985048061954684e-06, "loss": 0.5821, "step": 29880 }, { "epoch": 0.8721727710491756, "grad_norm": 1.0956713262952211, "learning_rate": 6.980591314177652e-06, "loss": 0.5488, "step": 29885 }, { "epoch": 0.8723186925434117, "grad_norm": 1.0180792715670386, "learning_rate": 6.976139344728178e-06, "loss": 0.565, "step": 29890 }, { "epoch": 0.8724646140376477, "grad_norm": 1.0862611416281598, "learning_rate": 6.9716921546429715e-06, "loss": 0.5653, "step": 29895 }, { "epoch": 0.8726105355318838, "grad_norm": 1.044699719376724, "learning_rate": 6.967249744957657e-06, "loss": 0.559, "step": 29900 }, { "epoch": 0.8727564570261199, "grad_norm": 0.963360536657404, "learning_rate": 6.962812116706719e-06, "loss": 0.5442, "step": 29905 }, { "epoch": 0.872902378520356, "grad_norm": 1.1747464547669655, "learning_rate": 6.958379270923553e-06, "loss": 0.5444, "step": 29910 }, { "epoch": 0.8730483000145921, "grad_norm": 1.173916687656853, "learning_rate": 6.953951208640427e-06, "loss": 0.5448, "step": 29915 }, { "epoch": 0.8731942215088283, "grad_norm": 0.8322397775807132, "learning_rate": 6.94952793088849e-06, "loss": 0.4837, "step": 29920 }, { "epoch": 0.8733401430030644, "grad_norm": 1.08338997339878, "learning_rate": 6.945109438697803e-06, "loss": 0.5691, "step": 29925 }, { "epoch": 0.8734860644973005, "grad_norm": 0.9906516867313716, "learning_rate": 6.9406957330972715e-06, "loss": 0.5113, "step": 29930 }, { "epoch": 0.8736319859915366, "grad_norm": 0.9179423426391685, "learning_rate": 6.936286815114727e-06, "loss": 0.4827, "step": 29935 }, { "epoch": 0.8737779074857727, "grad_norm": 1.0304264892773132, "learning_rate": 6.9318826857768565e-06, "loss": 0.4878, "step": 29940 }, { "epoch": 0.8739238289800088, "grad_norm": 1.1369862842478091, "learning_rate": 6.927483346109259e-06, "loss": 0.5065, "step": 29945 }, { "epoch": 0.8740697504742448, "grad_norm": 1.0204419657974106, "learning_rate": 6.923088797136385e-06, "loss": 0.5895, "step": 29950 }, { "epoch": 0.8742156719684809, "grad_norm": 1.0357751844490088, "learning_rate": 6.918699039881601e-06, "loss": 0.5442, "step": 29955 }, { "epoch": 0.874361593462717, "grad_norm": 0.8748692793847861, "learning_rate": 6.914314075367138e-06, "loss": 0.5314, "step": 29960 }, { "epoch": 0.8745075149569531, "grad_norm": 1.0199991152308767, "learning_rate": 6.9099339046141114e-06, "loss": 0.4941, "step": 29965 }, { "epoch": 0.8746534364511893, "grad_norm": 1.0946435876259222, "learning_rate": 6.905558528642541e-06, "loss": 0.5069, "step": 29970 }, { "epoch": 0.8747993579454254, "grad_norm": 1.0300316392027062, "learning_rate": 6.901187948471301e-06, "loss": 0.502, "step": 29975 }, { "epoch": 0.8749452794396615, "grad_norm": 1.195950166837118, "learning_rate": 6.8968221651181675e-06, "loss": 0.5395, "step": 29980 }, { "epoch": 0.8750912009338976, "grad_norm": 0.922518089654216, "learning_rate": 6.892461179599796e-06, "loss": 0.5127, "step": 29985 }, { "epoch": 0.8752371224281337, "grad_norm": 1.059932475396812, "learning_rate": 6.888104992931718e-06, "loss": 0.5262, "step": 29990 }, { "epoch": 0.8753830439223698, "grad_norm": 0.9833271019775266, "learning_rate": 6.883753606128365e-06, "loss": 0.5348, "step": 29995 }, { "epoch": 0.8755289654166059, "grad_norm": 1.0137711799894418, "learning_rate": 6.879407020203021e-06, "loss": 0.5511, "step": 30000 }, { "epoch": 0.8756748869108419, "grad_norm": 0.9471064424446669, "learning_rate": 6.875065236167884e-06, "loss": 0.5068, "step": 30005 }, { "epoch": 0.875820808405078, "grad_norm": 0.9028574386618482, "learning_rate": 6.870728255034013e-06, "loss": 0.4875, "step": 30010 }, { "epoch": 0.8759667298993141, "grad_norm": 1.0526732564936714, "learning_rate": 6.8663960778113645e-06, "loss": 0.5204, "step": 30015 }, { "epoch": 0.8761126513935503, "grad_norm": 0.9489331372750632, "learning_rate": 6.862068705508748e-06, "loss": 0.5062, "step": 30020 }, { "epoch": 0.8762585728877864, "grad_norm": 1.0788595693638467, "learning_rate": 6.8577461391338915e-06, "loss": 0.4838, "step": 30025 }, { "epoch": 0.8764044943820225, "grad_norm": 1.0013434365668266, "learning_rate": 6.853428379693374e-06, "loss": 0.5429, "step": 30030 }, { "epoch": 0.8765504158762586, "grad_norm": 1.0088690569590848, "learning_rate": 6.84911542819267e-06, "loss": 0.5428, "step": 30035 }, { "epoch": 0.8766963373704947, "grad_norm": 1.062222335005417, "learning_rate": 6.84480728563614e-06, "loss": 0.5232, "step": 30040 }, { "epoch": 0.8768422588647308, "grad_norm": 1.1126050866143993, "learning_rate": 6.840503953026996e-06, "loss": 0.5611, "step": 30045 }, { "epoch": 0.8769881803589669, "grad_norm": 1.0350764177098937, "learning_rate": 6.836205431367365e-06, "loss": 0.5125, "step": 30050 }, { "epoch": 0.877134101853203, "grad_norm": 0.9792411923890526, "learning_rate": 6.8319117216582355e-06, "loss": 0.511, "step": 30055 }, { "epoch": 0.877280023347439, "grad_norm": 1.090824461939905, "learning_rate": 6.827622824899468e-06, "loss": 0.5186, "step": 30060 }, { "epoch": 0.8774259448416751, "grad_norm": 0.9121237611156275, "learning_rate": 6.823338742089831e-06, "loss": 0.5134, "step": 30065 }, { "epoch": 0.8775718663359113, "grad_norm": 1.147552196790515, "learning_rate": 6.819059474226933e-06, "loss": 0.533, "step": 30070 }, { "epoch": 0.8777177878301474, "grad_norm": 0.9970530391326189, "learning_rate": 6.814785022307293e-06, "loss": 0.5163, "step": 30075 }, { "epoch": 0.8778637093243835, "grad_norm": 1.056014804221753, "learning_rate": 6.810515387326291e-06, "loss": 0.5421, "step": 30080 }, { "epoch": 0.8780096308186196, "grad_norm": 1.1091870848055414, "learning_rate": 6.806250570278198e-06, "loss": 0.5734, "step": 30085 }, { "epoch": 0.8781555523128557, "grad_norm": 1.0023019814262806, "learning_rate": 6.801990572156155e-06, "loss": 0.5374, "step": 30090 }, { "epoch": 0.8783014738070918, "grad_norm": 0.8659267476355923, "learning_rate": 6.797735393952179e-06, "loss": 0.4771, "step": 30095 }, { "epoch": 0.8784473953013279, "grad_norm": 1.0798493557772468, "learning_rate": 6.7934850366571694e-06, "loss": 0.6133, "step": 30100 }, { "epoch": 0.878593316795564, "grad_norm": 0.9723571269965117, "learning_rate": 6.7892395012608965e-06, "loss": 0.5505, "step": 30105 }, { "epoch": 0.8787392382898, "grad_norm": 1.0537390830097002, "learning_rate": 6.784998788752024e-06, "loss": 0.531, "step": 30110 }, { "epoch": 0.8788851597840361, "grad_norm": 0.9400690072094103, "learning_rate": 6.780762900118068e-06, "loss": 0.4691, "step": 30115 }, { "epoch": 0.8790310812782723, "grad_norm": 1.105529440059472, "learning_rate": 6.77653183634544e-06, "loss": 0.5454, "step": 30120 }, { "epoch": 0.8791770027725084, "grad_norm": 0.924528009053591, "learning_rate": 6.772305598419426e-06, "loss": 0.5263, "step": 30125 }, { "epoch": 0.8793229242667445, "grad_norm": 0.9610767289945021, "learning_rate": 6.7680841873241756e-06, "loss": 0.6027, "step": 30130 }, { "epoch": 0.8794688457609806, "grad_norm": 1.0567462707982134, "learning_rate": 6.763867604042737e-06, "loss": 0.5347, "step": 30135 }, { "epoch": 0.8796147672552167, "grad_norm": 1.144421321692344, "learning_rate": 6.759655849557002e-06, "loss": 0.559, "step": 30140 }, { "epoch": 0.8797606887494528, "grad_norm": 0.9496284752401181, "learning_rate": 6.755448924847771e-06, "loss": 0.5463, "step": 30145 }, { "epoch": 0.8799066102436889, "grad_norm": 0.9636762022558271, "learning_rate": 6.751246830894694e-06, "loss": 0.5455, "step": 30150 }, { "epoch": 0.880052531737925, "grad_norm": 0.9072418255046193, "learning_rate": 6.747049568676318e-06, "loss": 0.5138, "step": 30155 }, { "epoch": 0.8801984532321611, "grad_norm": 0.8771845542226931, "learning_rate": 6.74285713917005e-06, "loss": 0.4933, "step": 30160 }, { "epoch": 0.8803443747263971, "grad_norm": 1.027385444308803, "learning_rate": 6.738669543352175e-06, "loss": 0.5442, "step": 30165 }, { "epoch": 0.8804902962206334, "grad_norm": 1.0769734088098941, "learning_rate": 6.734486782197852e-06, "loss": 0.5477, "step": 30170 }, { "epoch": 0.8806362177148694, "grad_norm": 0.9652807090033174, "learning_rate": 6.730308856681112e-06, "loss": 0.4675, "step": 30175 }, { "epoch": 0.8807821392091055, "grad_norm": 1.0411863227327887, "learning_rate": 6.726135767774867e-06, "loss": 0.4684, "step": 30180 }, { "epoch": 0.8809280607033416, "grad_norm": 1.0788513084648004, "learning_rate": 6.7219675164509e-06, "loss": 0.5347, "step": 30185 }, { "epoch": 0.8810739821975777, "grad_norm": 1.052854975731894, "learning_rate": 6.717804103679865e-06, "loss": 0.5459, "step": 30190 }, { "epoch": 0.8812199036918138, "grad_norm": 0.9380997580514051, "learning_rate": 6.7136455304312865e-06, "loss": 0.5533, "step": 30195 }, { "epoch": 0.8813658251860499, "grad_norm": 1.0993360559642138, "learning_rate": 6.709491797673571e-06, "loss": 0.4865, "step": 30200 }, { "epoch": 0.881511746680286, "grad_norm": 1.0338201264669893, "learning_rate": 6.70534290637399e-06, "loss": 0.541, "step": 30205 }, { "epoch": 0.8816576681745221, "grad_norm": 1.0762800010256062, "learning_rate": 6.701198857498693e-06, "loss": 0.4765, "step": 30210 }, { "epoch": 0.8818035896687582, "grad_norm": 1.154709347065583, "learning_rate": 6.6970596520126966e-06, "loss": 0.5343, "step": 30215 }, { "epoch": 0.8819495111629944, "grad_norm": 1.0069951836980788, "learning_rate": 6.692925290879891e-06, "loss": 0.4787, "step": 30220 }, { "epoch": 0.8820954326572304, "grad_norm": 0.9903485754315747, "learning_rate": 6.688795775063044e-06, "loss": 0.4865, "step": 30225 }, { "epoch": 0.8822413541514665, "grad_norm": 1.0509969502745817, "learning_rate": 6.684671105523785e-06, "loss": 0.5364, "step": 30230 }, { "epoch": 0.8823872756457026, "grad_norm": 0.8607330113734835, "learning_rate": 6.6805512832226244e-06, "loss": 0.5167, "step": 30235 }, { "epoch": 0.8825331971399387, "grad_norm": 0.8899420270677085, "learning_rate": 6.676436309118942e-06, "loss": 0.4924, "step": 30240 }, { "epoch": 0.8826791186341748, "grad_norm": 1.0785084869859474, "learning_rate": 6.672326184170974e-06, "loss": 0.5297, "step": 30245 }, { "epoch": 0.8828250401284109, "grad_norm": 0.9834880786436881, "learning_rate": 6.6682209093358565e-06, "loss": 0.497, "step": 30250 }, { "epoch": 0.882970961622647, "grad_norm": 0.8676551705046306, "learning_rate": 6.6641204855695695e-06, "loss": 0.5584, "step": 30255 }, { "epoch": 0.8831168831168831, "grad_norm": 0.9566525293696535, "learning_rate": 6.660024913826976e-06, "loss": 0.5782, "step": 30260 }, { "epoch": 0.8832628046111192, "grad_norm": 1.0188560538696452, "learning_rate": 6.6559341950618e-06, "loss": 0.5682, "step": 30265 }, { "epoch": 0.8834087261053554, "grad_norm": 1.0300495895815487, "learning_rate": 6.6518483302266535e-06, "loss": 0.5354, "step": 30270 }, { "epoch": 0.8835546475995915, "grad_norm": 1.0097785168081432, "learning_rate": 6.6477673202730006e-06, "loss": 0.5195, "step": 30275 }, { "epoch": 0.8837005690938275, "grad_norm": 0.9682368111525732, "learning_rate": 6.643691166151178e-06, "loss": 0.4895, "step": 30280 }, { "epoch": 0.8838464905880636, "grad_norm": 1.0018786415950411, "learning_rate": 6.639619868810398e-06, "loss": 0.5206, "step": 30285 }, { "epoch": 0.8839924120822997, "grad_norm": 0.9106761919254766, "learning_rate": 6.635553429198735e-06, "loss": 0.5409, "step": 30290 }, { "epoch": 0.8841383335765358, "grad_norm": 1.1403622283456385, "learning_rate": 6.631491848263137e-06, "loss": 0.5546, "step": 30295 }, { "epoch": 0.8842842550707719, "grad_norm": 0.8927811407997241, "learning_rate": 6.6274351269494214e-06, "loss": 0.4947, "step": 30300 }, { "epoch": 0.884430176565008, "grad_norm": 0.962949364788676, "learning_rate": 6.62338326620227e-06, "loss": 0.5423, "step": 30305 }, { "epoch": 0.8845760980592441, "grad_norm": 0.9036803757803599, "learning_rate": 6.619336266965234e-06, "loss": 0.5205, "step": 30310 }, { "epoch": 0.8847220195534802, "grad_norm": 1.1031498594248523, "learning_rate": 6.6152941301807286e-06, "loss": 0.5498, "step": 30315 }, { "epoch": 0.8848679410477164, "grad_norm": 1.0576554232408852, "learning_rate": 6.611256856790048e-06, "loss": 0.5196, "step": 30320 }, { "epoch": 0.8850138625419525, "grad_norm": 0.9514805077270847, "learning_rate": 6.607224447733343e-06, "loss": 0.51, "step": 30325 }, { "epoch": 0.8851597840361886, "grad_norm": 0.9177794419307634, "learning_rate": 6.603196903949637e-06, "loss": 0.5424, "step": 30330 }, { "epoch": 0.8853057055304246, "grad_norm": 0.9285289116283991, "learning_rate": 6.5991742263768125e-06, "loss": 0.5017, "step": 30335 }, { "epoch": 0.8854516270246607, "grad_norm": 1.0784913369271725, "learning_rate": 6.595156415951636e-06, "loss": 0.5322, "step": 30340 }, { "epoch": 0.8855975485188968, "grad_norm": 1.1073906503610704, "learning_rate": 6.591143473609722e-06, "loss": 0.525, "step": 30345 }, { "epoch": 0.8857434700131329, "grad_norm": 1.1577261909116483, "learning_rate": 6.587135400285562e-06, "loss": 0.5306, "step": 30350 }, { "epoch": 0.885889391507369, "grad_norm": 1.152700078794717, "learning_rate": 6.583132196912509e-06, "loss": 0.5441, "step": 30355 }, { "epoch": 0.8860353130016051, "grad_norm": 0.9863928107563286, "learning_rate": 6.5791338644227815e-06, "loss": 0.5159, "step": 30360 }, { "epoch": 0.8861812344958412, "grad_norm": 0.9611006475972624, "learning_rate": 6.575140403747474e-06, "loss": 0.4632, "step": 30365 }, { "epoch": 0.8863271559900774, "grad_norm": 1.0772509951562268, "learning_rate": 6.5711518158165255e-06, "loss": 0.5493, "step": 30370 }, { "epoch": 0.8864730774843135, "grad_norm": 0.9417049634273706, "learning_rate": 6.56716810155877e-06, "loss": 0.5421, "step": 30375 }, { "epoch": 0.8866189989785496, "grad_norm": 1.094028832386522, "learning_rate": 6.563189261901874e-06, "loss": 0.5116, "step": 30380 }, { "epoch": 0.8867649204727857, "grad_norm": 1.1523471117240423, "learning_rate": 6.559215297772395e-06, "loss": 0.5905, "step": 30385 }, { "epoch": 0.8869108419670217, "grad_norm": 1.0385926913926515, "learning_rate": 6.555246210095739e-06, "loss": 0.5225, "step": 30390 }, { "epoch": 0.8870567634612578, "grad_norm": 1.0145460635822472, "learning_rate": 6.551281999796187e-06, "loss": 0.5548, "step": 30395 }, { "epoch": 0.8872026849554939, "grad_norm": 0.8959369649060647, "learning_rate": 6.547322667796875e-06, "loss": 0.484, "step": 30400 }, { "epoch": 0.88734860644973, "grad_norm": 1.143972763850105, "learning_rate": 6.543368215019805e-06, "loss": 0.5317, "step": 30405 }, { "epoch": 0.8874945279439661, "grad_norm": 0.9856911896592033, "learning_rate": 6.539418642385854e-06, "loss": 0.504, "step": 30410 }, { "epoch": 0.8876404494382022, "grad_norm": 1.0053296352322783, "learning_rate": 6.535473950814749e-06, "loss": 0.4891, "step": 30415 }, { "epoch": 0.8877863709324384, "grad_norm": 1.1099608467866107, "learning_rate": 6.531534141225083e-06, "loss": 0.5808, "step": 30420 }, { "epoch": 0.8879322924266745, "grad_norm": 0.9428015409725334, "learning_rate": 6.527599214534319e-06, "loss": 0.4899, "step": 30425 }, { "epoch": 0.8880782139209106, "grad_norm": 1.0130410365871099, "learning_rate": 6.5236691716587705e-06, "loss": 0.548, "step": 30430 }, { "epoch": 0.8882241354151467, "grad_norm": 1.1563452810017223, "learning_rate": 6.5197440135136305e-06, "loss": 0.5631, "step": 30435 }, { "epoch": 0.8883700569093828, "grad_norm": 1.0189653894835977, "learning_rate": 6.51582374101294e-06, "loss": 0.5582, "step": 30440 }, { "epoch": 0.8885159784036188, "grad_norm": 0.9860787064062873, "learning_rate": 6.511908355069613e-06, "loss": 0.5381, "step": 30445 }, { "epoch": 0.8886618998978549, "grad_norm": 1.0889951926286519, "learning_rate": 6.507997856595409e-06, "loss": 0.5415, "step": 30450 }, { "epoch": 0.888807821392091, "grad_norm": 0.9779808812381271, "learning_rate": 6.504092246500972e-06, "loss": 0.4962, "step": 30455 }, { "epoch": 0.8889537428863271, "grad_norm": 1.1941593001987445, "learning_rate": 6.500191525695794e-06, "loss": 0.5913, "step": 30460 }, { "epoch": 0.8890996643805632, "grad_norm": 0.8411551052934448, "learning_rate": 6.496295695088227e-06, "loss": 0.5503, "step": 30465 }, { "epoch": 0.8892455858747994, "grad_norm": 0.9416121831704969, "learning_rate": 6.492404755585488e-06, "loss": 0.5031, "step": 30470 }, { "epoch": 0.8893915073690355, "grad_norm": 1.0317188628948697, "learning_rate": 6.488518708093653e-06, "loss": 0.5422, "step": 30475 }, { "epoch": 0.8895374288632716, "grad_norm": 1.0034729077229914, "learning_rate": 6.4846375535176725e-06, "loss": 0.5646, "step": 30480 }, { "epoch": 0.8896833503575077, "grad_norm": 1.0098002117844118, "learning_rate": 6.480761292761332e-06, "loss": 0.5516, "step": 30485 }, { "epoch": 0.8898292718517438, "grad_norm": 0.9164785215798202, "learning_rate": 6.4768899267273005e-06, "loss": 0.5004, "step": 30490 }, { "epoch": 0.8899751933459799, "grad_norm": 0.9290579640605727, "learning_rate": 6.4730234563170945e-06, "loss": 0.5079, "step": 30495 }, { "epoch": 0.8901211148402159, "grad_norm": 1.089062137643188, "learning_rate": 6.469161882431089e-06, "loss": 0.5335, "step": 30500 }, { "epoch": 0.890267036334452, "grad_norm": 1.0619621333094895, "learning_rate": 6.465305205968532e-06, "loss": 0.5544, "step": 30505 }, { "epoch": 0.8904129578286881, "grad_norm": 0.9297827314654659, "learning_rate": 6.461453427827514e-06, "loss": 0.5045, "step": 30510 }, { "epoch": 0.8905588793229242, "grad_norm": 1.0819081817818819, "learning_rate": 6.457606548905006e-06, "loss": 0.5566, "step": 30515 }, { "epoch": 0.8907048008171604, "grad_norm": 1.1006931447397736, "learning_rate": 6.453764570096811e-06, "loss": 0.5574, "step": 30520 }, { "epoch": 0.8908507223113965, "grad_norm": 0.998948901241452, "learning_rate": 6.449927492297618e-06, "loss": 0.4858, "step": 30525 }, { "epoch": 0.8909966438056326, "grad_norm": 0.9345295884985156, "learning_rate": 6.446095316400954e-06, "loss": 0.4846, "step": 30530 }, { "epoch": 0.8911425652998687, "grad_norm": 1.0212378264166426, "learning_rate": 6.442268043299214e-06, "loss": 0.5222, "step": 30535 }, { "epoch": 0.8912884867941048, "grad_norm": 0.9951932532848269, "learning_rate": 6.438445673883657e-06, "loss": 0.5258, "step": 30540 }, { "epoch": 0.8914344082883409, "grad_norm": 1.0502407080762461, "learning_rate": 6.43462820904438e-06, "loss": 0.5669, "step": 30545 }, { "epoch": 0.891580329782577, "grad_norm": 1.0536413917561414, "learning_rate": 6.430815649670363e-06, "loss": 0.5646, "step": 30550 }, { "epoch": 0.891726251276813, "grad_norm": 1.0644268500888934, "learning_rate": 6.427007996649427e-06, "loss": 0.5524, "step": 30555 }, { "epoch": 0.8918721727710491, "grad_norm": 0.998014763097728, "learning_rate": 6.4232052508682545e-06, "loss": 0.5336, "step": 30560 }, { "epoch": 0.8920180942652852, "grad_norm": 0.9854768318073398, "learning_rate": 6.419407413212389e-06, "loss": 0.5575, "step": 30565 }, { "epoch": 0.8921640157595214, "grad_norm": 1.0308011294562738, "learning_rate": 6.415614484566221e-06, "loss": 0.498, "step": 30570 }, { "epoch": 0.8923099372537575, "grad_norm": 1.1074020671383793, "learning_rate": 6.41182646581301e-06, "loss": 0.555, "step": 30575 }, { "epoch": 0.8924558587479936, "grad_norm": 1.1034009482283433, "learning_rate": 6.408043357834867e-06, "loss": 0.5582, "step": 30580 }, { "epoch": 0.8926017802422297, "grad_norm": 0.9759480031010573, "learning_rate": 6.404265161512765e-06, "loss": 0.516, "step": 30585 }, { "epoch": 0.8927477017364658, "grad_norm": 1.2376174869442904, "learning_rate": 6.400491877726511e-06, "loss": 0.5826, "step": 30590 }, { "epoch": 0.8928936232307019, "grad_norm": 1.1279783951884765, "learning_rate": 6.3967235073548025e-06, "loss": 0.5925, "step": 30595 }, { "epoch": 0.893039544724938, "grad_norm": 1.1641281275413724, "learning_rate": 6.392960051275165e-06, "loss": 0.5903, "step": 30600 }, { "epoch": 0.893185466219174, "grad_norm": 1.093501993710345, "learning_rate": 6.38920151036399e-06, "loss": 0.5321, "step": 30605 }, { "epoch": 0.8933313877134101, "grad_norm": 1.0689202478975515, "learning_rate": 6.385447885496531e-06, "loss": 0.6033, "step": 30610 }, { "epoch": 0.8934773092076462, "grad_norm": 1.039256245212493, "learning_rate": 6.381699177546877e-06, "loss": 0.5536, "step": 30615 }, { "epoch": 0.8936232307018824, "grad_norm": 1.1495482286410592, "learning_rate": 6.3779553873879985e-06, "loss": 0.5047, "step": 30620 }, { "epoch": 0.8937691521961185, "grad_norm": 1.1706408482212765, "learning_rate": 6.374216515891694e-06, "loss": 0.5584, "step": 30625 }, { "epoch": 0.8939150736903546, "grad_norm": 0.9046635457155036, "learning_rate": 6.37048256392864e-06, "loss": 0.4874, "step": 30630 }, { "epoch": 0.8940609951845907, "grad_norm": 1.076673238601162, "learning_rate": 6.366753532368354e-06, "loss": 0.5154, "step": 30635 }, { "epoch": 0.8942069166788268, "grad_norm": 0.9808198535416821, "learning_rate": 6.36302942207921e-06, "loss": 0.4801, "step": 30640 }, { "epoch": 0.8943528381730629, "grad_norm": 0.9291082799777206, "learning_rate": 6.359310233928435e-06, "loss": 0.4638, "step": 30645 }, { "epoch": 0.894498759667299, "grad_norm": 0.9941111289346053, "learning_rate": 6.355595968782109e-06, "loss": 0.5129, "step": 30650 }, { "epoch": 0.8946446811615351, "grad_norm": 1.13546093091978, "learning_rate": 6.35188662750518e-06, "loss": 0.4884, "step": 30655 }, { "epoch": 0.8947906026557712, "grad_norm": 1.0981551462292058, "learning_rate": 6.348182210961423e-06, "loss": 0.548, "step": 30660 }, { "epoch": 0.8949365241500074, "grad_norm": 0.9983409027611266, "learning_rate": 6.344482720013489e-06, "loss": 0.5461, "step": 30665 }, { "epoch": 0.8950824456442434, "grad_norm": 1.1207387387030325, "learning_rate": 6.34078815552287e-06, "loss": 0.5795, "step": 30670 }, { "epoch": 0.8952283671384795, "grad_norm": 1.0027070461084964, "learning_rate": 6.337098518349914e-06, "loss": 0.5112, "step": 30675 }, { "epoch": 0.8953742886327156, "grad_norm": 0.8992752510587828, "learning_rate": 6.33341380935383e-06, "loss": 0.5356, "step": 30680 }, { "epoch": 0.8955202101269517, "grad_norm": 0.999846550642492, "learning_rate": 6.329734029392658e-06, "loss": 0.5636, "step": 30685 }, { "epoch": 0.8956661316211878, "grad_norm": 0.8461577927220665, "learning_rate": 6.326059179323314e-06, "loss": 0.4809, "step": 30690 }, { "epoch": 0.8958120531154239, "grad_norm": 1.0883345393068011, "learning_rate": 6.322389260001548e-06, "loss": 0.5363, "step": 30695 }, { "epoch": 0.89595797460966, "grad_norm": 1.0750080410173237, "learning_rate": 6.318724272281976e-06, "loss": 0.4851, "step": 30700 }, { "epoch": 0.8961038961038961, "grad_norm": 1.001055464648106, "learning_rate": 6.315064217018057e-06, "loss": 0.5549, "step": 30705 }, { "epoch": 0.8962498175981322, "grad_norm": 1.097390498263082, "learning_rate": 6.311409095062103e-06, "loss": 0.5132, "step": 30710 }, { "epoch": 0.8963957390923684, "grad_norm": 1.0217109796596096, "learning_rate": 6.307758907265275e-06, "loss": 0.5576, "step": 30715 }, { "epoch": 0.8965416605866044, "grad_norm": 1.0436872364897183, "learning_rate": 6.304113654477585e-06, "loss": 0.5685, "step": 30720 }, { "epoch": 0.8966875820808405, "grad_norm": 1.0420778674501554, "learning_rate": 6.300473337547906e-06, "loss": 0.5512, "step": 30725 }, { "epoch": 0.8968335035750766, "grad_norm": 0.9639679566618371, "learning_rate": 6.296837957323952e-06, "loss": 0.5194, "step": 30730 }, { "epoch": 0.8969794250693127, "grad_norm": 1.06983236185863, "learning_rate": 6.293207514652284e-06, "loss": 0.5219, "step": 30735 }, { "epoch": 0.8971253465635488, "grad_norm": 1.0289358069190055, "learning_rate": 6.289582010378324e-06, "loss": 0.5011, "step": 30740 }, { "epoch": 0.8972712680577849, "grad_norm": 1.1435896903479865, "learning_rate": 6.285961445346332e-06, "loss": 0.487, "step": 30745 }, { "epoch": 0.897417189552021, "grad_norm": 1.0296068100496318, "learning_rate": 6.2823458203994325e-06, "loss": 0.4947, "step": 30750 }, { "epoch": 0.8975631110462571, "grad_norm": 0.8520463791349125, "learning_rate": 6.278735136379582e-06, "loss": 0.5364, "step": 30755 }, { "epoch": 0.8977090325404932, "grad_norm": 1.1820495439998748, "learning_rate": 6.275129394127605e-06, "loss": 0.5531, "step": 30760 }, { "epoch": 0.8978549540347294, "grad_norm": 0.9335935533542379, "learning_rate": 6.271528594483159e-06, "loss": 0.5547, "step": 30765 }, { "epoch": 0.8980008755289655, "grad_norm": 1.0175668164630614, "learning_rate": 6.2679327382847595e-06, "loss": 0.496, "step": 30770 }, { "epoch": 0.8981467970232015, "grad_norm": 1.0026736727604557, "learning_rate": 6.2643418263697724e-06, "loss": 0.4938, "step": 30775 }, { "epoch": 0.8982927185174376, "grad_norm": 0.9822216686811812, "learning_rate": 6.260755859574405e-06, "loss": 0.5322, "step": 30780 }, { "epoch": 0.8984386400116737, "grad_norm": 0.9349368027537309, "learning_rate": 6.257174838733715e-06, "loss": 0.586, "step": 30785 }, { "epoch": 0.8985845615059098, "grad_norm": 0.9742324521791845, "learning_rate": 6.253598764681609e-06, "loss": 0.4639, "step": 30790 }, { "epoch": 0.8987304830001459, "grad_norm": 0.9703682221450168, "learning_rate": 6.250027638250852e-06, "loss": 0.5122, "step": 30795 }, { "epoch": 0.898876404494382, "grad_norm": 0.9846377186188879, "learning_rate": 6.246461460273038e-06, "loss": 0.5596, "step": 30800 }, { "epoch": 0.8990223259886181, "grad_norm": 1.2070277199718502, "learning_rate": 6.242900231578623e-06, "loss": 0.5209, "step": 30805 }, { "epoch": 0.8991682474828542, "grad_norm": 1.2038504457706052, "learning_rate": 6.2393439529969035e-06, "loss": 0.5228, "step": 30810 }, { "epoch": 0.8993141689770904, "grad_norm": 0.9211033639959786, "learning_rate": 6.235792625356023e-06, "loss": 0.487, "step": 30815 }, { "epoch": 0.8994600904713265, "grad_norm": 0.9888143242627709, "learning_rate": 6.23224624948298e-06, "loss": 0.5685, "step": 30820 }, { "epoch": 0.8996060119655626, "grad_norm": 1.0831973209260362, "learning_rate": 6.228704826203613e-06, "loss": 0.5212, "step": 30825 }, { "epoch": 0.8997519334597986, "grad_norm": 1.021032267486361, "learning_rate": 6.225168356342607e-06, "loss": 0.5091, "step": 30830 }, { "epoch": 0.8998978549540347, "grad_norm": 0.9391812438786102, "learning_rate": 6.2216368407234895e-06, "loss": 0.4981, "step": 30835 }, { "epoch": 0.9000437764482708, "grad_norm": 1.1233057834540503, "learning_rate": 6.218110280168651e-06, "loss": 0.5176, "step": 30840 }, { "epoch": 0.9001896979425069, "grad_norm": 0.9746962411251152, "learning_rate": 6.214588675499312e-06, "loss": 0.539, "step": 30845 }, { "epoch": 0.900335619436743, "grad_norm": 1.0070858772131686, "learning_rate": 6.211072027535543e-06, "loss": 0.5201, "step": 30850 }, { "epoch": 0.9004815409309791, "grad_norm": 1.0557704283842555, "learning_rate": 6.207560337096262e-06, "loss": 0.6069, "step": 30855 }, { "epoch": 0.9006274624252152, "grad_norm": 1.098024224395763, "learning_rate": 6.204053604999226e-06, "loss": 0.5088, "step": 30860 }, { "epoch": 0.9007733839194514, "grad_norm": 1.0813060862779902, "learning_rate": 6.200551832061055e-06, "loss": 0.5206, "step": 30865 }, { "epoch": 0.9009193054136875, "grad_norm": 1.0193421618087972, "learning_rate": 6.197055019097194e-06, "loss": 0.4993, "step": 30870 }, { "epoch": 0.9010652269079236, "grad_norm": 1.104771089747469, "learning_rate": 6.1935631669219436e-06, "loss": 0.5193, "step": 30875 }, { "epoch": 0.9012111484021597, "grad_norm": 0.921495152216103, "learning_rate": 6.190076276348443e-06, "loss": 0.522, "step": 30880 }, { "epoch": 0.9013570698963957, "grad_norm": 1.071173638171577, "learning_rate": 6.186594348188684e-06, "loss": 0.5382, "step": 30885 }, { "epoch": 0.9015029913906318, "grad_norm": 1.0051369129160548, "learning_rate": 6.183117383253499e-06, "loss": 0.4599, "step": 30890 }, { "epoch": 0.9016489128848679, "grad_norm": 1.084468010080285, "learning_rate": 6.179645382352563e-06, "loss": 0.4742, "step": 30895 }, { "epoch": 0.901794834379104, "grad_norm": 1.17305276879897, "learning_rate": 6.176178346294397e-06, "loss": 0.5709, "step": 30900 }, { "epoch": 0.9019407558733401, "grad_norm": 1.0703681576706494, "learning_rate": 6.172716275886358e-06, "loss": 0.5468, "step": 30905 }, { "epoch": 0.9020866773675762, "grad_norm": 1.0014560348550983, "learning_rate": 6.169259171934666e-06, "loss": 0.5097, "step": 30910 }, { "epoch": 0.9022325988618124, "grad_norm": 0.9161118264601708, "learning_rate": 6.165807035244364e-06, "loss": 0.5348, "step": 30915 }, { "epoch": 0.9023785203560485, "grad_norm": 1.033482234226185, "learning_rate": 6.162359866619351e-06, "loss": 0.5704, "step": 30920 }, { "epoch": 0.9025244418502846, "grad_norm": 0.8376083515932418, "learning_rate": 6.158917666862359e-06, "loss": 0.476, "step": 30925 }, { "epoch": 0.9026703633445207, "grad_norm": 0.981333761604822, "learning_rate": 6.155480436774971e-06, "loss": 0.5033, "step": 30930 }, { "epoch": 0.9028162848387568, "grad_norm": 1.0863659765634708, "learning_rate": 6.1520481771576145e-06, "loss": 0.4721, "step": 30935 }, { "epoch": 0.9029622063329928, "grad_norm": 0.9487429074383799, "learning_rate": 6.148620888809549e-06, "loss": 0.5785, "step": 30940 }, { "epoch": 0.9031081278272289, "grad_norm": 0.9893157463033818, "learning_rate": 6.145198572528888e-06, "loss": 0.4978, "step": 30945 }, { "epoch": 0.903254049321465, "grad_norm": 1.071340088807136, "learning_rate": 6.141781229112573e-06, "loss": 0.5504, "step": 30950 }, { "epoch": 0.9033999708157011, "grad_norm": 1.0187993075512538, "learning_rate": 6.138368859356408e-06, "loss": 0.5676, "step": 30955 }, { "epoch": 0.9035458923099372, "grad_norm": 0.9998555113521561, "learning_rate": 6.1349614640550195e-06, "loss": 0.4819, "step": 30960 }, { "epoch": 0.9036918138041734, "grad_norm": 0.9418079183898496, "learning_rate": 6.131559044001885e-06, "loss": 0.473, "step": 30965 }, { "epoch": 0.9038377352984095, "grad_norm": 1.1005790767556463, "learning_rate": 6.128161599989324e-06, "loss": 0.528, "step": 30970 }, { "epoch": 0.9039836567926456, "grad_norm": 1.005161609810259, "learning_rate": 6.124769132808485e-06, "loss": 0.5238, "step": 30975 }, { "epoch": 0.9041295782868817, "grad_norm": 0.8513289649031828, "learning_rate": 6.121381643249381e-06, "loss": 0.5252, "step": 30980 }, { "epoch": 0.9042754997811178, "grad_norm": 0.989182404932506, "learning_rate": 6.1179991321008456e-06, "loss": 0.5577, "step": 30985 }, { "epoch": 0.9044214212753539, "grad_norm": 0.9695530732676745, "learning_rate": 6.1146216001505596e-06, "loss": 0.4698, "step": 30990 }, { "epoch": 0.9045673427695899, "grad_norm": 1.1652209252209018, "learning_rate": 6.111249048185042e-06, "loss": 0.5786, "step": 30995 }, { "epoch": 0.904713264263826, "grad_norm": 0.9759425072951008, "learning_rate": 6.107881476989656e-06, "loss": 0.5037, "step": 31000 }, { "epoch": 0.9048591857580621, "grad_norm": 0.9957514683998013, "learning_rate": 6.10451888734861e-06, "loss": 0.5471, "step": 31005 }, { "epoch": 0.9050051072522982, "grad_norm": 0.9927921285007671, "learning_rate": 6.101161280044938e-06, "loss": 0.5007, "step": 31010 }, { "epoch": 0.9051510287465344, "grad_norm": 1.0715586324501267, "learning_rate": 6.097808655860526e-06, "loss": 0.5838, "step": 31015 }, { "epoch": 0.9052969502407705, "grad_norm": 1.0977381208203096, "learning_rate": 6.094461015576088e-06, "loss": 0.524, "step": 31020 }, { "epoch": 0.9054428717350066, "grad_norm": 1.0654985153439427, "learning_rate": 6.091118359971195e-06, "loss": 0.5705, "step": 31025 }, { "epoch": 0.9055887932292427, "grad_norm": 1.119556846864035, "learning_rate": 6.087780689824239e-06, "loss": 0.4362, "step": 31030 }, { "epoch": 0.9057347147234788, "grad_norm": 1.0810583967789849, "learning_rate": 6.084448005912463e-06, "loss": 0.5764, "step": 31035 }, { "epoch": 0.9058806362177149, "grad_norm": 1.254736426712116, "learning_rate": 6.081120309011944e-06, "loss": 0.5431, "step": 31040 }, { "epoch": 0.906026557711951, "grad_norm": 1.021062231911103, "learning_rate": 6.077797599897593e-06, "loss": 0.4926, "step": 31045 }, { "epoch": 0.906172479206187, "grad_norm": 1.0208524727279626, "learning_rate": 6.0744798793431724e-06, "loss": 0.5446, "step": 31050 }, { "epoch": 0.9063184007004231, "grad_norm": 1.0155441200013657, "learning_rate": 6.071167148121269e-06, "loss": 0.5116, "step": 31055 }, { "epoch": 0.9064643221946592, "grad_norm": 0.9578405124866007, "learning_rate": 6.0678594070033235e-06, "loss": 0.5449, "step": 31060 }, { "epoch": 0.9066102436888954, "grad_norm": 1.1023921440251614, "learning_rate": 6.064556656759596e-06, "loss": 0.4987, "step": 31065 }, { "epoch": 0.9067561651831315, "grad_norm": 1.0295573316020605, "learning_rate": 6.061258898159196e-06, "loss": 0.511, "step": 31070 }, { "epoch": 0.9069020866773676, "grad_norm": 0.9426448869519591, "learning_rate": 6.057966131970069e-06, "loss": 0.5098, "step": 31075 }, { "epoch": 0.9070480081716037, "grad_norm": 1.414166816596012, "learning_rate": 6.054678358958997e-06, "loss": 0.5892, "step": 31080 }, { "epoch": 0.9071939296658398, "grad_norm": 1.0285945298071992, "learning_rate": 6.0513955798916025e-06, "loss": 0.5078, "step": 31085 }, { "epoch": 0.9073398511600759, "grad_norm": 1.0085168820771129, "learning_rate": 6.048117795532335e-06, "loss": 0.5288, "step": 31090 }, { "epoch": 0.907485772654312, "grad_norm": 0.92540792874259, "learning_rate": 6.044845006644492e-06, "loss": 0.4996, "step": 31095 }, { "epoch": 0.907631694148548, "grad_norm": 0.9301329440583919, "learning_rate": 6.041577213990205e-06, "loss": 0.4886, "step": 31100 }, { "epoch": 0.9077776156427841, "grad_norm": 0.9458118354448951, "learning_rate": 6.03831441833044e-06, "loss": 0.5403, "step": 31105 }, { "epoch": 0.9079235371370202, "grad_norm": 0.8794406711524512, "learning_rate": 6.035056620424998e-06, "loss": 0.4868, "step": 31110 }, { "epoch": 0.9080694586312564, "grad_norm": 1.1824427520914502, "learning_rate": 6.031803821032513e-06, "loss": 0.5416, "step": 31115 }, { "epoch": 0.9082153801254925, "grad_norm": 1.0732724327646415, "learning_rate": 6.028556020910472e-06, "loss": 0.4917, "step": 31120 }, { "epoch": 0.9083613016197286, "grad_norm": 0.9178265748231079, "learning_rate": 6.0253132208151725e-06, "loss": 0.5868, "step": 31125 }, { "epoch": 0.9085072231139647, "grad_norm": 1.0284250625449678, "learning_rate": 6.022075421501778e-06, "loss": 0.5512, "step": 31130 }, { "epoch": 0.9086531446082008, "grad_norm": 1.063822860478113, "learning_rate": 6.018842623724253e-06, "loss": 0.534, "step": 31135 }, { "epoch": 0.9087990661024369, "grad_norm": 1.6631472430847554, "learning_rate": 6.015614828235426e-06, "loss": 0.5461, "step": 31140 }, { "epoch": 0.908944987596673, "grad_norm": 1.0706749562589644, "learning_rate": 6.012392035786947e-06, "loss": 0.5286, "step": 31145 }, { "epoch": 0.9090909090909091, "grad_norm": 0.9024397453829718, "learning_rate": 6.009174247129299e-06, "loss": 0.5136, "step": 31150 }, { "epoch": 0.9092368305851452, "grad_norm": 0.923137024392914, "learning_rate": 6.005961463011813e-06, "loss": 0.5525, "step": 31155 }, { "epoch": 0.9093827520793812, "grad_norm": 0.9854482912022399, "learning_rate": 6.002753684182637e-06, "loss": 0.5132, "step": 31160 }, { "epoch": 0.9095286735736174, "grad_norm": 0.9583724268021456, "learning_rate": 5.9995509113887666e-06, "loss": 0.4881, "step": 31165 }, { "epoch": 0.9096745950678535, "grad_norm": 1.1056715192931814, "learning_rate": 5.9963531453760255e-06, "loss": 0.5461, "step": 31170 }, { "epoch": 0.9098205165620896, "grad_norm": 0.9983701486504825, "learning_rate": 5.9931603868890735e-06, "loss": 0.5326, "step": 31175 }, { "epoch": 0.9099664380563257, "grad_norm": 1.0084330801741046, "learning_rate": 5.989972636671409e-06, "loss": 0.5402, "step": 31180 }, { "epoch": 0.9101123595505618, "grad_norm": 1.0645425432848696, "learning_rate": 5.986789895465348e-06, "loss": 0.5829, "step": 31185 }, { "epoch": 0.9102582810447979, "grad_norm": 1.1019803813358091, "learning_rate": 5.983612164012062e-06, "loss": 0.5166, "step": 31190 }, { "epoch": 0.910404202539034, "grad_norm": 1.164090499797717, "learning_rate": 5.980439443051539e-06, "loss": 0.5006, "step": 31195 }, { "epoch": 0.9105501240332701, "grad_norm": 1.005277959036037, "learning_rate": 5.977271733322615e-06, "loss": 0.5354, "step": 31200 }, { "epoch": 0.9106960455275062, "grad_norm": 1.033691536534947, "learning_rate": 5.9741090355629355e-06, "loss": 0.6019, "step": 31205 }, { "epoch": 0.9108419670217422, "grad_norm": 1.0253810412060618, "learning_rate": 5.970951350509006e-06, "loss": 0.553, "step": 31210 }, { "epoch": 0.9109878885159785, "grad_norm": 1.2592189362163204, "learning_rate": 5.96779867889615e-06, "loss": 0.5902, "step": 31215 }, { "epoch": 0.9111338100102145, "grad_norm": 1.0394923775141476, "learning_rate": 5.964651021458521e-06, "loss": 0.5146, "step": 31220 }, { "epoch": 0.9112797315044506, "grad_norm": 1.0153763223121535, "learning_rate": 5.961508378929117e-06, "loss": 0.5195, "step": 31225 }, { "epoch": 0.9114256529986867, "grad_norm": 0.9961698768918347, "learning_rate": 5.958370752039753e-06, "loss": 0.5614, "step": 31230 }, { "epoch": 0.9115715744929228, "grad_norm": 1.0450466103250151, "learning_rate": 5.9552381415210925e-06, "loss": 0.5321, "step": 31235 }, { "epoch": 0.9117174959871589, "grad_norm": 1.068705360286735, "learning_rate": 5.952110548102617e-06, "loss": 0.5059, "step": 31240 }, { "epoch": 0.911863417481395, "grad_norm": 0.9964858760523883, "learning_rate": 5.948987972512644e-06, "loss": 0.5057, "step": 31245 }, { "epoch": 0.9120093389756311, "grad_norm": 0.9289308056371659, "learning_rate": 5.94587041547833e-06, "loss": 0.4789, "step": 31250 }, { "epoch": 0.9121552604698672, "grad_norm": 0.9809821991870397, "learning_rate": 5.9427578777256485e-06, "loss": 0.5331, "step": 31255 }, { "epoch": 0.9123011819641033, "grad_norm": 0.9809166605745736, "learning_rate": 5.9396503599794175e-06, "loss": 0.5758, "step": 31260 }, { "epoch": 0.9124471034583395, "grad_norm": 1.084114207476625, "learning_rate": 5.9365478629632755e-06, "loss": 0.5394, "step": 31265 }, { "epoch": 0.9125930249525755, "grad_norm": 1.091027514947355, "learning_rate": 5.933450387399709e-06, "loss": 0.5398, "step": 31270 }, { "epoch": 0.9127389464468116, "grad_norm": 1.0994095365611294, "learning_rate": 5.930357934010004e-06, "loss": 0.578, "step": 31275 }, { "epoch": 0.9128848679410477, "grad_norm": 0.9351519000209817, "learning_rate": 5.927270503514312e-06, "loss": 0.5068, "step": 31280 }, { "epoch": 0.9130307894352838, "grad_norm": 1.055765290402011, "learning_rate": 5.924188096631592e-06, "loss": 0.5253, "step": 31285 }, { "epoch": 0.9131767109295199, "grad_norm": 0.952439205718905, "learning_rate": 5.9211107140796385e-06, "loss": 0.5045, "step": 31290 }, { "epoch": 0.913322632423756, "grad_norm": 0.9719293979550976, "learning_rate": 5.918038356575085e-06, "loss": 0.4997, "step": 31295 }, { "epoch": 0.9134685539179921, "grad_norm": 0.9971675384795458, "learning_rate": 5.914971024833379e-06, "loss": 0.4517, "step": 31300 }, { "epoch": 0.9136144754122282, "grad_norm": 1.0809282011065169, "learning_rate": 5.9119087195688115e-06, "loss": 0.5774, "step": 31305 }, { "epoch": 0.9137603969064643, "grad_norm": 0.9332638910340019, "learning_rate": 5.908851441494491e-06, "loss": 0.4972, "step": 31310 }, { "epoch": 0.9139063184007005, "grad_norm": 1.017298725479635, "learning_rate": 5.90579919132237e-06, "loss": 0.5519, "step": 31315 }, { "epoch": 0.9140522398949366, "grad_norm": 0.9581703860876212, "learning_rate": 5.902751969763217e-06, "loss": 0.5308, "step": 31320 }, { "epoch": 0.9141981613891726, "grad_norm": 0.9804958493778927, "learning_rate": 5.899709777526638e-06, "loss": 0.5896, "step": 31325 }, { "epoch": 0.9143440828834087, "grad_norm": 1.0702718158927564, "learning_rate": 5.89667261532106e-06, "loss": 0.5051, "step": 31330 }, { "epoch": 0.9144900043776448, "grad_norm": 1.0007373590109032, "learning_rate": 5.893640483853743e-06, "loss": 0.5801, "step": 31335 }, { "epoch": 0.9146359258718809, "grad_norm": 1.013508659308757, "learning_rate": 5.890613383830778e-06, "loss": 0.5356, "step": 31340 }, { "epoch": 0.914781847366117, "grad_norm": 1.0521093857573989, "learning_rate": 5.8875913159570855e-06, "loss": 0.5245, "step": 31345 }, { "epoch": 0.9149277688603531, "grad_norm": 0.9727732752795446, "learning_rate": 5.884574280936402e-06, "loss": 0.5143, "step": 31350 }, { "epoch": 0.9150736903545892, "grad_norm": 0.9160916668246117, "learning_rate": 5.881562279471305e-06, "loss": 0.4882, "step": 31355 }, { "epoch": 0.9152196118488254, "grad_norm": 0.9770555239823763, "learning_rate": 5.878555312263192e-06, "loss": 0.5365, "step": 31360 }, { "epoch": 0.9153655333430615, "grad_norm": 1.0827212181009942, "learning_rate": 5.8755533800123015e-06, "loss": 0.5197, "step": 31365 }, { "epoch": 0.9155114548372976, "grad_norm": 0.9711317016550641, "learning_rate": 5.872556483417676e-06, "loss": 0.5078, "step": 31370 }, { "epoch": 0.9156573763315337, "grad_norm": 0.9395837642777, "learning_rate": 5.869564623177208e-06, "loss": 0.538, "step": 31375 }, { "epoch": 0.9158032978257697, "grad_norm": 0.976118613543372, "learning_rate": 5.866577799987601e-06, "loss": 0.5407, "step": 31380 }, { "epoch": 0.9159492193200058, "grad_norm": 0.8918711864452179, "learning_rate": 5.863596014544399e-06, "loss": 0.4543, "step": 31385 }, { "epoch": 0.9160951408142419, "grad_norm": 1.0104467179834946, "learning_rate": 5.860619267541967e-06, "loss": 0.5385, "step": 31390 }, { "epoch": 0.916241062308478, "grad_norm": 0.9839933987243363, "learning_rate": 5.85764755967349e-06, "loss": 0.5159, "step": 31395 }, { "epoch": 0.9163869838027141, "grad_norm": 1.016251482201384, "learning_rate": 5.854680891630992e-06, "loss": 0.5603, "step": 31400 }, { "epoch": 0.9165329052969502, "grad_norm": 1.0128208992399501, "learning_rate": 5.8517192641053106e-06, "loss": 0.5332, "step": 31405 }, { "epoch": 0.9166788267911864, "grad_norm": 0.9604393492902995, "learning_rate": 5.848762677786121e-06, "loss": 0.5196, "step": 31410 }, { "epoch": 0.9168247482854225, "grad_norm": 0.9598333133687205, "learning_rate": 5.845811133361918e-06, "loss": 0.5027, "step": 31415 }, { "epoch": 0.9169706697796586, "grad_norm": 1.0289666002720075, "learning_rate": 5.8428646315200246e-06, "loss": 0.5226, "step": 31420 }, { "epoch": 0.9171165912738947, "grad_norm": 0.9744226413128271, "learning_rate": 5.839923172946586e-06, "loss": 0.504, "step": 31425 }, { "epoch": 0.9172625127681308, "grad_norm": 1.0100517730443925, "learning_rate": 5.836986758326578e-06, "loss": 0.512, "step": 31430 }, { "epoch": 0.9174084342623668, "grad_norm": 1.120101253174796, "learning_rate": 5.834055388343801e-06, "loss": 0.5284, "step": 31435 }, { "epoch": 0.9175543557566029, "grad_norm": 0.9778272476700308, "learning_rate": 5.8311290636808765e-06, "loss": 0.4887, "step": 31440 }, { "epoch": 0.917700277250839, "grad_norm": 1.1721872563674882, "learning_rate": 5.828207785019255e-06, "loss": 0.5162, "step": 31445 }, { "epoch": 0.9178461987450751, "grad_norm": 1.0692293926090946, "learning_rate": 5.825291553039208e-06, "loss": 0.5194, "step": 31450 }, { "epoch": 0.9179921202393112, "grad_norm": 0.9269333426138349, "learning_rate": 5.8223803684198415e-06, "loss": 0.5488, "step": 31455 }, { "epoch": 0.9181380417335474, "grad_norm": 1.1208102992878484, "learning_rate": 5.819474231839076e-06, "loss": 0.5116, "step": 31460 }, { "epoch": 0.9182839632277835, "grad_norm": 1.0488380508477457, "learning_rate": 5.816573143973657e-06, "loss": 0.5478, "step": 31465 }, { "epoch": 0.9184298847220196, "grad_norm": 0.9572381107664635, "learning_rate": 5.813677105499158e-06, "loss": 0.5214, "step": 31470 }, { "epoch": 0.9185758062162557, "grad_norm": 1.196483551614418, "learning_rate": 5.810786117089975e-06, "loss": 0.504, "step": 31475 }, { "epoch": 0.9187217277104918, "grad_norm": 1.0389718264573946, "learning_rate": 5.807900179419332e-06, "loss": 0.5248, "step": 31480 }, { "epoch": 0.9188676492047279, "grad_norm": 1.1205777267900252, "learning_rate": 5.805019293159271e-06, "loss": 0.5722, "step": 31485 }, { "epoch": 0.9190135706989639, "grad_norm": 1.2602142760453576, "learning_rate": 5.802143458980663e-06, "loss": 0.5987, "step": 31490 }, { "epoch": 0.9191594921932, "grad_norm": 0.8809563533784155, "learning_rate": 5.799272677553196e-06, "loss": 0.5534, "step": 31495 }, { "epoch": 0.9193054136874361, "grad_norm": 1.022134406859791, "learning_rate": 5.796406949545384e-06, "loss": 0.5028, "step": 31500 }, { "epoch": 0.9194513351816722, "grad_norm": 0.9958885546350226, "learning_rate": 5.79354627562457e-06, "loss": 0.5144, "step": 31505 }, { "epoch": 0.9195972566759084, "grad_norm": 0.9586393693455683, "learning_rate": 5.790690656456913e-06, "loss": 0.4971, "step": 31510 }, { "epoch": 0.9197431781701445, "grad_norm": 0.9104926669708108, "learning_rate": 5.787840092707397e-06, "loss": 0.5012, "step": 31515 }, { "epoch": 0.9198890996643806, "grad_norm": 0.8971201381839433, "learning_rate": 5.7849945850398265e-06, "loss": 0.4606, "step": 31520 }, { "epoch": 0.9200350211586167, "grad_norm": 1.0757963411310465, "learning_rate": 5.78215413411684e-06, "loss": 0.538, "step": 31525 }, { "epoch": 0.9201809426528528, "grad_norm": 0.8092683643260684, "learning_rate": 5.779318740599879e-06, "loss": 0.4993, "step": 31530 }, { "epoch": 0.9203268641470889, "grad_norm": 1.0734661349566186, "learning_rate": 5.776488405149225e-06, "loss": 0.5671, "step": 31535 }, { "epoch": 0.920472785641325, "grad_norm": 1.1005848563672969, "learning_rate": 5.773663128423971e-06, "loss": 0.5109, "step": 31540 }, { "epoch": 0.920618707135561, "grad_norm": 1.1131244445886301, "learning_rate": 5.7708429110820345e-06, "loss": 0.5196, "step": 31545 }, { "epoch": 0.9207646286297971, "grad_norm": 1.024710100947874, "learning_rate": 5.7680277537801596e-06, "loss": 0.5611, "step": 31550 }, { "epoch": 0.9209105501240332, "grad_norm": 0.9815784808476141, "learning_rate": 5.7652176571739055e-06, "loss": 0.4933, "step": 31555 }, { "epoch": 0.9210564716182694, "grad_norm": 0.9674750595692396, "learning_rate": 5.762412621917658e-06, "loss": 0.5259, "step": 31560 }, { "epoch": 0.9212023931125055, "grad_norm": 1.1112468733005603, "learning_rate": 5.759612648664617e-06, "loss": 0.5642, "step": 31565 }, { "epoch": 0.9213483146067416, "grad_norm": 0.9704411756490212, "learning_rate": 5.756817738066813e-06, "loss": 0.4597, "step": 31570 }, { "epoch": 0.9214942361009777, "grad_norm": 0.8942571739452657, "learning_rate": 5.754027890775093e-06, "loss": 0.5536, "step": 31575 }, { "epoch": 0.9216401575952138, "grad_norm": 1.0540959562003775, "learning_rate": 5.751243107439126e-06, "loss": 0.5605, "step": 31580 }, { "epoch": 0.9217860790894499, "grad_norm": 1.0479665689525084, "learning_rate": 5.748463388707398e-06, "loss": 0.497, "step": 31585 }, { "epoch": 0.921932000583686, "grad_norm": 1.015230370189131, "learning_rate": 5.745688735227215e-06, "loss": 0.5243, "step": 31590 }, { "epoch": 0.922077922077922, "grad_norm": 1.0138227540966689, "learning_rate": 5.742919147644714e-06, "loss": 0.54, "step": 31595 }, { "epoch": 0.9222238435721581, "grad_norm": 0.9213330067075063, "learning_rate": 5.740154626604845e-06, "loss": 0.5009, "step": 31600 }, { "epoch": 0.9223697650663942, "grad_norm": 0.9607157300944271, "learning_rate": 5.737395172751372e-06, "loss": 0.5205, "step": 31605 }, { "epoch": 0.9225156865606304, "grad_norm": 0.9925073745629928, "learning_rate": 5.734640786726893e-06, "loss": 0.5078, "step": 31610 }, { "epoch": 0.9226616080548665, "grad_norm": 1.0988873460142738, "learning_rate": 5.7318914691728095e-06, "loss": 0.5357, "step": 31615 }, { "epoch": 0.9228075295491026, "grad_norm": 1.1493430640061477, "learning_rate": 5.7291472207293594e-06, "loss": 0.519, "step": 31620 }, { "epoch": 0.9229534510433387, "grad_norm": 0.8916772655107527, "learning_rate": 5.726408042035586e-06, "loss": 0.481, "step": 31625 }, { "epoch": 0.9230993725375748, "grad_norm": 0.993415329232206, "learning_rate": 5.723673933729367e-06, "loss": 0.5904, "step": 31630 }, { "epoch": 0.9232452940318109, "grad_norm": 1.0223446604284172, "learning_rate": 5.720944896447379e-06, "loss": 0.5169, "step": 31635 }, { "epoch": 0.923391215526047, "grad_norm": 1.1120195717784853, "learning_rate": 5.71822093082514e-06, "loss": 0.4938, "step": 31640 }, { "epoch": 0.9235371370202831, "grad_norm": 0.9257167384486059, "learning_rate": 5.7155020374969706e-06, "loss": 0.4983, "step": 31645 }, { "epoch": 0.9236830585145192, "grad_norm": 0.9266532227557428, "learning_rate": 5.712788217096014e-06, "loss": 0.556, "step": 31650 }, { "epoch": 0.9238289800087552, "grad_norm": 1.1213606221453962, "learning_rate": 5.71007947025424e-06, "loss": 0.5566, "step": 31655 }, { "epoch": 0.9239749015029914, "grad_norm": 0.9728696939200219, "learning_rate": 5.7073757976024225e-06, "loss": 0.492, "step": 31660 }, { "epoch": 0.9241208229972275, "grad_norm": 0.9735082313707822, "learning_rate": 5.704677199770172e-06, "loss": 0.5256, "step": 31665 }, { "epoch": 0.9242667444914636, "grad_norm": 1.0611148840662614, "learning_rate": 5.701983677385903e-06, "loss": 0.5449, "step": 31670 }, { "epoch": 0.9244126659856997, "grad_norm": 0.9387481771581796, "learning_rate": 5.699295231076852e-06, "loss": 0.5736, "step": 31675 }, { "epoch": 0.9245585874799358, "grad_norm": 1.0832717938936667, "learning_rate": 5.696611861469072e-06, "loss": 0.5682, "step": 31680 }, { "epoch": 0.9247045089741719, "grad_norm": 0.8510028898215768, "learning_rate": 5.693933569187437e-06, "loss": 0.5166, "step": 31685 }, { "epoch": 0.924850430468408, "grad_norm": 1.1235643002882414, "learning_rate": 5.6912603548556405e-06, "loss": 0.5046, "step": 31690 }, { "epoch": 0.9249963519626441, "grad_norm": 0.9179832343001979, "learning_rate": 5.688592219096184e-06, "loss": 0.5033, "step": 31695 }, { "epoch": 0.9251422734568802, "grad_norm": 1.1102966969451658, "learning_rate": 5.685929162530405e-06, "loss": 0.4932, "step": 31700 }, { "epoch": 0.9252881949511162, "grad_norm": 0.969940724614742, "learning_rate": 5.683271185778432e-06, "loss": 0.4671, "step": 31705 }, { "epoch": 0.9254341164453525, "grad_norm": 0.9642585952206327, "learning_rate": 5.68061828945923e-06, "loss": 0.5072, "step": 31710 }, { "epoch": 0.9255800379395885, "grad_norm": 0.9583576991705849, "learning_rate": 5.677970474190578e-06, "loss": 0.5244, "step": 31715 }, { "epoch": 0.9257259594338246, "grad_norm": 0.9014056225841488, "learning_rate": 5.675327740589069e-06, "loss": 0.539, "step": 31720 }, { "epoch": 0.9258718809280607, "grad_norm": 1.0248490218534667, "learning_rate": 5.672690089270107e-06, "loss": 0.5196, "step": 31725 }, { "epoch": 0.9260178024222968, "grad_norm": 0.9678155273003267, "learning_rate": 5.670057520847923e-06, "loss": 0.5237, "step": 31730 }, { "epoch": 0.9261637239165329, "grad_norm": 1.0229218390382524, "learning_rate": 5.66743003593556e-06, "loss": 0.5519, "step": 31735 }, { "epoch": 0.926309645410769, "grad_norm": 1.046174719379701, "learning_rate": 5.664807635144873e-06, "loss": 0.5102, "step": 31740 }, { "epoch": 0.9264555669050051, "grad_norm": 1.071932799796896, "learning_rate": 5.662190319086546e-06, "loss": 0.5717, "step": 31745 }, { "epoch": 0.9266014883992412, "grad_norm": 1.0729315072392658, "learning_rate": 5.659578088370062e-06, "loss": 0.5779, "step": 31750 }, { "epoch": 0.9267474098934773, "grad_norm": 1.0915419069520298, "learning_rate": 5.6569709436037265e-06, "loss": 0.5585, "step": 31755 }, { "epoch": 0.9268933313877135, "grad_norm": 1.0167125764508396, "learning_rate": 5.654368885394666e-06, "loss": 0.5309, "step": 31760 }, { "epoch": 0.9270392528819495, "grad_norm": 1.0424229754296737, "learning_rate": 5.651771914348817e-06, "loss": 0.4994, "step": 31765 }, { "epoch": 0.9271851743761856, "grad_norm": 1.1027655441406234, "learning_rate": 5.649180031070937e-06, "loss": 0.5154, "step": 31770 }, { "epoch": 0.9273310958704217, "grad_norm": 0.9313753100857511, "learning_rate": 5.646593236164586e-06, "loss": 0.5091, "step": 31775 }, { "epoch": 0.9274770173646578, "grad_norm": 1.0583117055235665, "learning_rate": 5.644011530232152e-06, "loss": 0.55, "step": 31780 }, { "epoch": 0.9276229388588939, "grad_norm": 0.9143411243107543, "learning_rate": 5.6414349138748375e-06, "loss": 0.5434, "step": 31785 }, { "epoch": 0.92776886035313, "grad_norm": 1.0386798196074016, "learning_rate": 5.638863387692646e-06, "loss": 0.5158, "step": 31790 }, { "epoch": 0.9279147818473661, "grad_norm": 0.8997653072960133, "learning_rate": 5.636296952284417e-06, "loss": 0.4879, "step": 31795 }, { "epoch": 0.9280607033416022, "grad_norm": 1.1642671816665333, "learning_rate": 5.633735608247781e-06, "loss": 0.5834, "step": 31800 }, { "epoch": 0.9282066248358383, "grad_norm": 0.9874577506471407, "learning_rate": 5.631179356179204e-06, "loss": 0.4963, "step": 31805 }, { "epoch": 0.9283525463300745, "grad_norm": 0.9642178660748422, "learning_rate": 5.628628196673953e-06, "loss": 0.5338, "step": 31810 }, { "epoch": 0.9284984678243106, "grad_norm": 0.9570004738418421, "learning_rate": 5.626082130326117e-06, "loss": 0.5318, "step": 31815 }, { "epoch": 0.9286443893185466, "grad_norm": 1.186193679196771, "learning_rate": 5.623541157728586e-06, "loss": 0.5355, "step": 31820 }, { "epoch": 0.9287903108127827, "grad_norm": 0.9451195718359684, "learning_rate": 5.621005279473083e-06, "loss": 0.4632, "step": 31825 }, { "epoch": 0.9289362323070188, "grad_norm": 0.9625344111486446, "learning_rate": 5.618474496150131e-06, "loss": 0.5392, "step": 31830 }, { "epoch": 0.9290821538012549, "grad_norm": 1.0328375691556269, "learning_rate": 5.615948808349066e-06, "loss": 0.5493, "step": 31835 }, { "epoch": 0.929228075295491, "grad_norm": 1.0515395906021583, "learning_rate": 5.6134282166580525e-06, "loss": 0.4935, "step": 31840 }, { "epoch": 0.9293739967897271, "grad_norm": 1.3085672807154634, "learning_rate": 5.610912721664043e-06, "loss": 0.5334, "step": 31845 }, { "epoch": 0.9295199182839632, "grad_norm": 1.0576759196709575, "learning_rate": 5.608402323952827e-06, "loss": 0.4972, "step": 31850 }, { "epoch": 0.9296658397781993, "grad_norm": 1.0185436077379475, "learning_rate": 5.605897024108997e-06, "loss": 0.5883, "step": 31855 }, { "epoch": 0.9298117612724355, "grad_norm": 1.133586188129723, "learning_rate": 5.603396822715953e-06, "loss": 0.5504, "step": 31860 }, { "epoch": 0.9299576827666716, "grad_norm": 1.0660002075547204, "learning_rate": 5.600901720355923e-06, "loss": 0.5242, "step": 31865 }, { "epoch": 0.9301036042609077, "grad_norm": 1.038727774726783, "learning_rate": 5.5984117176099295e-06, "loss": 0.5507, "step": 31870 }, { "epoch": 0.9302495257551437, "grad_norm": 1.0055715703660268, "learning_rate": 5.595926815057819e-06, "loss": 0.5145, "step": 31875 }, { "epoch": 0.9303954472493798, "grad_norm": 0.947604687560181, "learning_rate": 5.593447013278247e-06, "loss": 0.5041, "step": 31880 }, { "epoch": 0.9305413687436159, "grad_norm": 0.9269548880671084, "learning_rate": 5.590972312848686e-06, "loss": 0.5092, "step": 31885 }, { "epoch": 0.930687290237852, "grad_norm": 0.9513306453284274, "learning_rate": 5.58850271434541e-06, "loss": 0.5386, "step": 31890 }, { "epoch": 0.9308332117320881, "grad_norm": 0.9582900018746017, "learning_rate": 5.586038218343515e-06, "loss": 0.5096, "step": 31895 }, { "epoch": 0.9309791332263242, "grad_norm": 0.8940592201219112, "learning_rate": 5.5835788254169045e-06, "loss": 0.5629, "step": 31900 }, { "epoch": 0.9311250547205603, "grad_norm": 0.9547257387754181, "learning_rate": 5.5811245361382916e-06, "loss": 0.5526, "step": 31905 }, { "epoch": 0.9312709762147965, "grad_norm": 1.0317926827619388, "learning_rate": 5.57867535107921e-06, "loss": 0.5165, "step": 31910 }, { "epoch": 0.9314168977090326, "grad_norm": 0.9533641542598994, "learning_rate": 5.576231270809987e-06, "loss": 0.514, "step": 31915 }, { "epoch": 0.9315628192032687, "grad_norm": 1.1410699736394636, "learning_rate": 5.573792295899782e-06, "loss": 0.5169, "step": 31920 }, { "epoch": 0.9317087406975048, "grad_norm": 1.009128012617985, "learning_rate": 5.571358426916551e-06, "loss": 0.5187, "step": 31925 }, { "epoch": 0.9318546621917408, "grad_norm": 1.0778917206906513, "learning_rate": 5.568929664427068e-06, "loss": 0.5613, "step": 31930 }, { "epoch": 0.9320005836859769, "grad_norm": 0.9078292135919422, "learning_rate": 5.566506008996918e-06, "loss": 0.5431, "step": 31935 }, { "epoch": 0.932146505180213, "grad_norm": 1.0442124037312723, "learning_rate": 5.564087461190488e-06, "loss": 0.4817, "step": 31940 }, { "epoch": 0.9322924266744491, "grad_norm": 1.1363502299439294, "learning_rate": 5.5616740215709884e-06, "loss": 0.5645, "step": 31945 }, { "epoch": 0.9324383481686852, "grad_norm": 0.9184147280609047, "learning_rate": 5.559265690700426e-06, "loss": 0.4927, "step": 31950 }, { "epoch": 0.9325842696629213, "grad_norm": 1.0352520867868198, "learning_rate": 5.556862469139635e-06, "loss": 0.5382, "step": 31955 }, { "epoch": 0.9327301911571575, "grad_norm": 0.9675309386048626, "learning_rate": 5.554464357448243e-06, "loss": 0.5296, "step": 31960 }, { "epoch": 0.9328761126513936, "grad_norm": 1.0196329024275952, "learning_rate": 5.552071356184701e-06, "loss": 0.5657, "step": 31965 }, { "epoch": 0.9330220341456297, "grad_norm": 0.9408299908649591, "learning_rate": 5.549683465906261e-06, "loss": 0.4653, "step": 31970 }, { "epoch": 0.9331679556398658, "grad_norm": 0.9822200148482348, "learning_rate": 5.5473006871689855e-06, "loss": 0.5159, "step": 31975 }, { "epoch": 0.9333138771341019, "grad_norm": 1.129894230800963, "learning_rate": 5.5449230205277525e-06, "loss": 0.5997, "step": 31980 }, { "epoch": 0.933459798628338, "grad_norm": 0.9775420186286264, "learning_rate": 5.5425504665362445e-06, "loss": 0.5201, "step": 31985 }, { "epoch": 0.933605720122574, "grad_norm": 0.9276292516317872, "learning_rate": 5.5401830257469565e-06, "loss": 0.5204, "step": 31990 }, { "epoch": 0.9337516416168101, "grad_norm": 1.0036440236335673, "learning_rate": 5.537820698711189e-06, "loss": 0.5344, "step": 31995 }, { "epoch": 0.9338975631110462, "grad_norm": 1.151969581015306, "learning_rate": 5.535463485979058e-06, "loss": 0.5687, "step": 32000 }, { "epoch": 0.9340434846052823, "grad_norm": 1.0729535065934772, "learning_rate": 5.533111388099481e-06, "loss": 0.5547, "step": 32005 }, { "epoch": 0.9341894060995185, "grad_norm": 0.9617476627129081, "learning_rate": 5.5307644056201865e-06, "loss": 0.4852, "step": 32010 }, { "epoch": 0.9343353275937546, "grad_norm": 0.9527595038161738, "learning_rate": 5.528422539087719e-06, "loss": 0.5022, "step": 32015 }, { "epoch": 0.9344812490879907, "grad_norm": 1.0696385905057664, "learning_rate": 5.526085789047418e-06, "loss": 0.5683, "step": 32020 }, { "epoch": 0.9346271705822268, "grad_norm": 0.9308166600790008, "learning_rate": 5.5237541560434465e-06, "loss": 0.4923, "step": 32025 }, { "epoch": 0.9347730920764629, "grad_norm": 1.1002910979082146, "learning_rate": 5.521427640618766e-06, "loss": 0.5422, "step": 32030 }, { "epoch": 0.934919013570699, "grad_norm": 1.0904448248167755, "learning_rate": 5.519106243315149e-06, "loss": 0.5455, "step": 32035 }, { "epoch": 0.935064935064935, "grad_norm": 1.012187526547558, "learning_rate": 5.516789964673177e-06, "loss": 0.506, "step": 32040 }, { "epoch": 0.9352108565591711, "grad_norm": 1.1166942946625376, "learning_rate": 5.514478805232236e-06, "loss": 0.5288, "step": 32045 }, { "epoch": 0.9353567780534072, "grad_norm": 0.9722735365239863, "learning_rate": 5.512172765530527e-06, "loss": 0.5274, "step": 32050 }, { "epoch": 0.9355026995476433, "grad_norm": 1.0099030286564756, "learning_rate": 5.509871846105051e-06, "loss": 0.5677, "step": 32055 }, { "epoch": 0.9356486210418795, "grad_norm": 1.0077647828759178, "learning_rate": 5.507576047491619e-06, "loss": 0.4838, "step": 32060 }, { "epoch": 0.9357945425361156, "grad_norm": 1.0187261792820985, "learning_rate": 5.505285370224855e-06, "loss": 0.4953, "step": 32065 }, { "epoch": 0.9359404640303517, "grad_norm": 0.9529094247193499, "learning_rate": 5.502999814838182e-06, "loss": 0.5926, "step": 32070 }, { "epoch": 0.9360863855245878, "grad_norm": 0.9988246938941732, "learning_rate": 5.500719381863837e-06, "loss": 0.5284, "step": 32075 }, { "epoch": 0.9362323070188239, "grad_norm": 0.9719893841296832, "learning_rate": 5.498444071832861e-06, "loss": 0.5038, "step": 32080 }, { "epoch": 0.93637822851306, "grad_norm": 1.105894495324846, "learning_rate": 5.496173885275104e-06, "loss": 0.5161, "step": 32085 }, { "epoch": 0.936524150007296, "grad_norm": 0.961983803468379, "learning_rate": 5.493908822719214e-06, "loss": 0.5093, "step": 32090 }, { "epoch": 0.9366700715015321, "grad_norm": 1.104508324182209, "learning_rate": 5.491648884692663e-06, "loss": 0.5786, "step": 32095 }, { "epoch": 0.9368159929957682, "grad_norm": 0.9654443278620762, "learning_rate": 5.489394071721714e-06, "loss": 0.5342, "step": 32100 }, { "epoch": 0.9369619144900044, "grad_norm": 1.0744947223939199, "learning_rate": 5.487144384331442e-06, "loss": 0.5188, "step": 32105 }, { "epoch": 0.9371078359842405, "grad_norm": 1.1634668821257546, "learning_rate": 5.484899823045731e-06, "loss": 0.5173, "step": 32110 }, { "epoch": 0.9372537574784766, "grad_norm": 0.9303682489852825, "learning_rate": 5.482660388387265e-06, "loss": 0.5217, "step": 32115 }, { "epoch": 0.9373996789727127, "grad_norm": 0.8910344252256887, "learning_rate": 5.4804260808775454e-06, "loss": 0.5511, "step": 32120 }, { "epoch": 0.9375456004669488, "grad_norm": 0.9723850296906433, "learning_rate": 5.478196901036867e-06, "loss": 0.5001, "step": 32125 }, { "epoch": 0.9376915219611849, "grad_norm": 0.9962596224624716, "learning_rate": 5.475972849384338e-06, "loss": 0.5553, "step": 32130 }, { "epoch": 0.937837443455421, "grad_norm": 1.0634195109270448, "learning_rate": 5.473753926437866e-06, "loss": 0.5193, "step": 32135 }, { "epoch": 0.9379833649496571, "grad_norm": 0.8449855096674338, "learning_rate": 5.471540132714176e-06, "loss": 0.4895, "step": 32140 }, { "epoch": 0.9381292864438932, "grad_norm": 0.9397992786899602, "learning_rate": 5.469331468728788e-06, "loss": 0.4916, "step": 32145 }, { "epoch": 0.9382752079381292, "grad_norm": 0.9235692598768837, "learning_rate": 5.46712793499603e-06, "loss": 0.6033, "step": 32150 }, { "epoch": 0.9384211294323654, "grad_norm": 1.026363759239074, "learning_rate": 5.464929532029033e-06, "loss": 0.5888, "step": 32155 }, { "epoch": 0.9385670509266015, "grad_norm": 0.989640622068954, "learning_rate": 5.46273626033974e-06, "loss": 0.5364, "step": 32160 }, { "epoch": 0.9387129724208376, "grad_norm": 1.021372727540997, "learning_rate": 5.460548120438898e-06, "loss": 0.5486, "step": 32165 }, { "epoch": 0.9388588939150737, "grad_norm": 0.9049922969362844, "learning_rate": 5.458365112836051e-06, "loss": 0.508, "step": 32170 }, { "epoch": 0.9390048154093098, "grad_norm": 0.9961778348984796, "learning_rate": 5.456187238039556e-06, "loss": 0.5453, "step": 32175 }, { "epoch": 0.9391507369035459, "grad_norm": 1.0193700775334373, "learning_rate": 5.45401449655657e-06, "loss": 0.5238, "step": 32180 }, { "epoch": 0.939296658397782, "grad_norm": 0.9176568059248664, "learning_rate": 5.451846888893054e-06, "loss": 0.4589, "step": 32185 }, { "epoch": 0.9394425798920181, "grad_norm": 1.0711435355519072, "learning_rate": 5.449684415553783e-06, "loss": 0.5262, "step": 32190 }, { "epoch": 0.9395885013862542, "grad_norm": 1.0266778237076375, "learning_rate": 5.447527077042325e-06, "loss": 0.5264, "step": 32195 }, { "epoch": 0.9397344228804902, "grad_norm": 1.0181382804539882, "learning_rate": 5.445374873861056e-06, "loss": 0.5762, "step": 32200 }, { "epoch": 0.9398803443747265, "grad_norm": 1.0351835837230354, "learning_rate": 5.443227806511157e-06, "loss": 0.5357, "step": 32205 }, { "epoch": 0.9400262658689625, "grad_norm": 1.0539822136537988, "learning_rate": 5.4410858754926166e-06, "loss": 0.5838, "step": 32210 }, { "epoch": 0.9401721873631986, "grad_norm": 1.059967724648873, "learning_rate": 5.438949081304219e-06, "loss": 0.4786, "step": 32215 }, { "epoch": 0.9403181088574347, "grad_norm": 1.093441054360934, "learning_rate": 5.436817424443558e-06, "loss": 0.5903, "step": 32220 }, { "epoch": 0.9404640303516708, "grad_norm": 1.0062845438569128, "learning_rate": 5.434690905407029e-06, "loss": 0.5805, "step": 32225 }, { "epoch": 0.9406099518459069, "grad_norm": 0.9561755804812622, "learning_rate": 5.4325695246898315e-06, "loss": 0.5293, "step": 32230 }, { "epoch": 0.940755873340143, "grad_norm": 0.9427322986943857, "learning_rate": 5.430453282785971e-06, "loss": 0.5073, "step": 32235 }, { "epoch": 0.9409017948343791, "grad_norm": 0.8729784735374143, "learning_rate": 5.428342180188248e-06, "loss": 0.4975, "step": 32240 }, { "epoch": 0.9410477163286152, "grad_norm": 1.0720648333779812, "learning_rate": 5.4262362173882835e-06, "loss": 0.5735, "step": 32245 }, { "epoch": 0.9411936378228513, "grad_norm": 0.969168330805528, "learning_rate": 5.424135394876478e-06, "loss": 0.5364, "step": 32250 }, { "epoch": 0.9413395593170875, "grad_norm": 0.9505643008936041, "learning_rate": 5.422039713142058e-06, "loss": 0.4968, "step": 32255 }, { "epoch": 0.9414854808113235, "grad_norm": 0.968023828653141, "learning_rate": 5.419949172673032e-06, "loss": 0.5219, "step": 32260 }, { "epoch": 0.9416314023055596, "grad_norm": 1.0173988966125602, "learning_rate": 5.417863773956228e-06, "loss": 0.5272, "step": 32265 }, { "epoch": 0.9417773237997957, "grad_norm": 1.0581380216305314, "learning_rate": 5.415783517477268e-06, "loss": 0.4939, "step": 32270 }, { "epoch": 0.9419232452940318, "grad_norm": 1.1092085771557247, "learning_rate": 5.413708403720577e-06, "loss": 0.5666, "step": 32275 }, { "epoch": 0.9420691667882679, "grad_norm": 1.1165806864666583, "learning_rate": 5.411638433169389e-06, "loss": 0.5455, "step": 32280 }, { "epoch": 0.942215088282504, "grad_norm": 1.0678423743130478, "learning_rate": 5.409573606305731e-06, "loss": 0.482, "step": 32285 }, { "epoch": 0.9423610097767401, "grad_norm": 0.9500444389606177, "learning_rate": 5.407513923610437e-06, "loss": 0.5205, "step": 32290 }, { "epoch": 0.9425069312709762, "grad_norm": 1.036409361618088, "learning_rate": 5.405459385563143e-06, "loss": 0.5134, "step": 32295 }, { "epoch": 0.9426528527652123, "grad_norm": 0.9331912339013145, "learning_rate": 5.4034099926422835e-06, "loss": 0.5074, "step": 32300 }, { "epoch": 0.9427987742594485, "grad_norm": 0.9503658750409534, "learning_rate": 5.401365745325106e-06, "loss": 0.5562, "step": 32305 }, { "epoch": 0.9429446957536846, "grad_norm": 0.9821297261699153, "learning_rate": 5.399326644087645e-06, "loss": 0.5508, "step": 32310 }, { "epoch": 0.9430906172479206, "grad_norm": 0.9258730356943676, "learning_rate": 5.397292689404746e-06, "loss": 0.4497, "step": 32315 }, { "epoch": 0.9432365387421567, "grad_norm": 1.0341894543120496, "learning_rate": 5.3952638817500515e-06, "loss": 0.5157, "step": 32320 }, { "epoch": 0.9433824602363928, "grad_norm": 0.8866010774437126, "learning_rate": 5.3932402215960075e-06, "loss": 0.4654, "step": 32325 }, { "epoch": 0.9435283817306289, "grad_norm": 1.030424889629682, "learning_rate": 5.391221709413862e-06, "loss": 0.522, "step": 32330 }, { "epoch": 0.943674303224865, "grad_norm": 1.1016410270209107, "learning_rate": 5.3892083456736615e-06, "loss": 0.5656, "step": 32335 }, { "epoch": 0.9438202247191011, "grad_norm": 0.9657615339305116, "learning_rate": 5.387200130844262e-06, "loss": 0.5648, "step": 32340 }, { "epoch": 0.9439661462133372, "grad_norm": 1.0085048621724384, "learning_rate": 5.385197065393303e-06, "loss": 0.4893, "step": 32345 }, { "epoch": 0.9441120677075733, "grad_norm": 0.9533405179225334, "learning_rate": 5.383199149787245e-06, "loss": 0.4724, "step": 32350 }, { "epoch": 0.9442579892018095, "grad_norm": 1.1375491503550434, "learning_rate": 5.3812063844913356e-06, "loss": 0.5123, "step": 32355 }, { "epoch": 0.9444039106960456, "grad_norm": 1.1568315364097925, "learning_rate": 5.379218769969628e-06, "loss": 0.4994, "step": 32360 }, { "epoch": 0.9445498321902817, "grad_norm": 1.031978122242187, "learning_rate": 5.377236306684977e-06, "loss": 0.5083, "step": 32365 }, { "epoch": 0.9446957536845177, "grad_norm": 0.9164732549671073, "learning_rate": 5.375258995099031e-06, "loss": 0.4753, "step": 32370 }, { "epoch": 0.9448416751787538, "grad_norm": 1.095053920064882, "learning_rate": 5.373286835672251e-06, "loss": 0.5164, "step": 32375 }, { "epoch": 0.9449875966729899, "grad_norm": 1.0993939498070473, "learning_rate": 5.371319828863883e-06, "loss": 0.5911, "step": 32380 }, { "epoch": 0.945133518167226, "grad_norm": 1.0714038720499457, "learning_rate": 5.369357975131992e-06, "loss": 0.5227, "step": 32385 }, { "epoch": 0.9452794396614621, "grad_norm": 1.0515651481500352, "learning_rate": 5.367401274933423e-06, "loss": 0.5194, "step": 32390 }, { "epoch": 0.9454253611556982, "grad_norm": 1.0114511510710738, "learning_rate": 5.365449728723835e-06, "loss": 0.5478, "step": 32395 }, { "epoch": 0.9455712826499343, "grad_norm": 1.0717098404795151, "learning_rate": 5.363503336957679e-06, "loss": 0.4959, "step": 32400 }, { "epoch": 0.9457172041441705, "grad_norm": 1.0217572457427606, "learning_rate": 5.36156210008821e-06, "loss": 0.5399, "step": 32405 }, { "epoch": 0.9458631256384066, "grad_norm": 1.048430174293286, "learning_rate": 5.359626018567484e-06, "loss": 0.5221, "step": 32410 }, { "epoch": 0.9460090471326427, "grad_norm": 1.1518842693964366, "learning_rate": 5.35769509284635e-06, "loss": 0.5202, "step": 32415 }, { "epoch": 0.9461549686268788, "grad_norm": 0.9690391741965505, "learning_rate": 5.355769323374462e-06, "loss": 0.5099, "step": 32420 }, { "epoch": 0.9463008901211148, "grad_norm": 1.1001506832752115, "learning_rate": 5.353848710600266e-06, "loss": 0.5197, "step": 32425 }, { "epoch": 0.9464468116153509, "grad_norm": 0.9128926299634978, "learning_rate": 5.351933254971022e-06, "loss": 0.5783, "step": 32430 }, { "epoch": 0.946592733109587, "grad_norm": 1.0067123464497771, "learning_rate": 5.3500229569327775e-06, "loss": 0.5875, "step": 32435 }, { "epoch": 0.9467386546038231, "grad_norm": 1.020425317530466, "learning_rate": 5.348117816930374e-06, "loss": 0.5047, "step": 32440 }, { "epoch": 0.9468845760980592, "grad_norm": 0.9901664790546404, "learning_rate": 5.346217835407464e-06, "loss": 0.5509, "step": 32445 }, { "epoch": 0.9470304975922953, "grad_norm": 0.9363910630107029, "learning_rate": 5.344323012806491e-06, "loss": 0.4902, "step": 32450 }, { "epoch": 0.9471764190865315, "grad_norm": 0.9268987380651581, "learning_rate": 5.342433349568707e-06, "loss": 0.4917, "step": 32455 }, { "epoch": 0.9473223405807676, "grad_norm": 1.0074519871226457, "learning_rate": 5.340548846134146e-06, "loss": 0.5126, "step": 32460 }, { "epoch": 0.9474682620750037, "grad_norm": 1.0024981816391079, "learning_rate": 5.338669502941655e-06, "loss": 0.5051, "step": 32465 }, { "epoch": 0.9476141835692398, "grad_norm": 1.0811704900680763, "learning_rate": 5.336795320428876e-06, "loss": 0.5108, "step": 32470 }, { "epoch": 0.9477601050634759, "grad_norm": 0.9178559495477222, "learning_rate": 5.334926299032241e-06, "loss": 0.4762, "step": 32475 }, { "epoch": 0.947906026557712, "grad_norm": 1.2133271272432984, "learning_rate": 5.333062439186995e-06, "loss": 0.5634, "step": 32480 }, { "epoch": 0.948051948051948, "grad_norm": 1.0319536826568692, "learning_rate": 5.331203741327163e-06, "loss": 0.5916, "step": 32485 }, { "epoch": 0.9481978695461841, "grad_norm": 1.0526814990908873, "learning_rate": 5.329350205885583e-06, "loss": 0.5443, "step": 32490 }, { "epoch": 0.9483437910404202, "grad_norm": 0.9725696915607186, "learning_rate": 5.3275018332938844e-06, "loss": 0.5224, "step": 32495 }, { "epoch": 0.9484897125346563, "grad_norm": 1.0901222653279823, "learning_rate": 5.3256586239824964e-06, "loss": 0.5006, "step": 32500 }, { "epoch": 0.9486356340288925, "grad_norm": 0.8570398912908441, "learning_rate": 5.323820578380645e-06, "loss": 0.5193, "step": 32505 }, { "epoch": 0.9487815555231286, "grad_norm": 1.0173662891091206, "learning_rate": 5.321987696916352e-06, "loss": 0.5386, "step": 32510 }, { "epoch": 0.9489274770173647, "grad_norm": 0.9657507906669571, "learning_rate": 5.320159980016438e-06, "loss": 0.4992, "step": 32515 }, { "epoch": 0.9490733985116008, "grad_norm": 0.9361403786008836, "learning_rate": 5.31833742810652e-06, "loss": 0.5024, "step": 32520 }, { "epoch": 0.9492193200058369, "grad_norm": 1.034992469883588, "learning_rate": 5.316520041611018e-06, "loss": 0.5307, "step": 32525 }, { "epoch": 0.949365241500073, "grad_norm": 0.9389121182420452, "learning_rate": 5.314707820953137e-06, "loss": 0.4787, "step": 32530 }, { "epoch": 0.949511162994309, "grad_norm": 1.0691424888725687, "learning_rate": 5.312900766554895e-06, "loss": 0.5613, "step": 32535 }, { "epoch": 0.9496570844885451, "grad_norm": 0.8533700947589191, "learning_rate": 5.311098878837091e-06, "loss": 0.4551, "step": 32540 }, { "epoch": 0.9498030059827812, "grad_norm": 1.0216651568801873, "learning_rate": 5.3093021582193296e-06, "loss": 0.5606, "step": 32545 }, { "epoch": 0.9499489274770173, "grad_norm": 0.9944316795042544, "learning_rate": 5.307510605120014e-06, "loss": 0.5108, "step": 32550 }, { "epoch": 0.9500948489712535, "grad_norm": 1.001027602886065, "learning_rate": 5.305724219956338e-06, "loss": 0.5028, "step": 32555 }, { "epoch": 0.9502407704654896, "grad_norm": 0.9413797617653485, "learning_rate": 5.303943003144296e-06, "loss": 0.5269, "step": 32560 }, { "epoch": 0.9503866919597257, "grad_norm": 1.0480738113292865, "learning_rate": 5.302166955098676e-06, "loss": 0.5623, "step": 32565 }, { "epoch": 0.9505326134539618, "grad_norm": 1.0271440494518247, "learning_rate": 5.300396076233069e-06, "loss": 0.533, "step": 32570 }, { "epoch": 0.9506785349481979, "grad_norm": 1.1121075776593368, "learning_rate": 5.29863036695985e-06, "loss": 0.526, "step": 32575 }, { "epoch": 0.950824456442434, "grad_norm": 1.0256968168566702, "learning_rate": 5.296869827690204e-06, "loss": 0.5257, "step": 32580 }, { "epoch": 0.95097037793667, "grad_norm": 1.0409042006338227, "learning_rate": 5.2951144588341005e-06, "loss": 0.5052, "step": 32585 }, { "epoch": 0.9511162994309061, "grad_norm": 0.912893867936593, "learning_rate": 5.293364260800312e-06, "loss": 0.5164, "step": 32590 }, { "epoch": 0.9512622209251422, "grad_norm": 1.0098995168495661, "learning_rate": 5.291619233996403e-06, "loss": 0.5931, "step": 32595 }, { "epoch": 0.9514081424193783, "grad_norm": 1.040839479143544, "learning_rate": 5.289879378828741e-06, "loss": 0.5649, "step": 32600 }, { "epoch": 0.9515540639136145, "grad_norm": 1.0551045640183863, "learning_rate": 5.288144695702476e-06, "loss": 0.5477, "step": 32605 }, { "epoch": 0.9516999854078506, "grad_norm": 0.934896284795098, "learning_rate": 5.286415185021568e-06, "loss": 0.4793, "step": 32610 }, { "epoch": 0.9518459069020867, "grad_norm": 0.9506652452349018, "learning_rate": 5.28469084718876e-06, "loss": 0.513, "step": 32615 }, { "epoch": 0.9519918283963228, "grad_norm": 1.139952722775066, "learning_rate": 5.2829716826056045e-06, "loss": 0.5275, "step": 32620 }, { "epoch": 0.9521377498905589, "grad_norm": 1.035508919818366, "learning_rate": 5.281257691672431e-06, "loss": 0.5248, "step": 32625 }, { "epoch": 0.952283671384795, "grad_norm": 1.101384977301379, "learning_rate": 5.279548874788382e-06, "loss": 0.5746, "step": 32630 }, { "epoch": 0.9524295928790311, "grad_norm": 1.1202838667513801, "learning_rate": 5.277845232351384e-06, "loss": 0.5734, "step": 32635 }, { "epoch": 0.9525755143732672, "grad_norm": 0.9949726020492728, "learning_rate": 5.276146764758161e-06, "loss": 0.5642, "step": 32640 }, { "epoch": 0.9527214358675032, "grad_norm": 0.9575314480512831, "learning_rate": 5.2744534724042395e-06, "loss": 0.4667, "step": 32645 }, { "epoch": 0.9528673573617393, "grad_norm": 1.0148875212566582, "learning_rate": 5.272765355683926e-06, "loss": 0.5618, "step": 32650 }, { "epoch": 0.9530132788559755, "grad_norm": 0.99515839850543, "learning_rate": 5.271082414990332e-06, "loss": 0.5293, "step": 32655 }, { "epoch": 0.9531592003502116, "grad_norm": 0.9904444532458596, "learning_rate": 5.269404650715365e-06, "loss": 0.5821, "step": 32660 }, { "epoch": 0.9533051218444477, "grad_norm": 1.1847514427465977, "learning_rate": 5.267732063249721e-06, "loss": 0.5702, "step": 32665 }, { "epoch": 0.9534510433386838, "grad_norm": 1.0141353687465244, "learning_rate": 5.266064652982895e-06, "loss": 0.4969, "step": 32670 }, { "epoch": 0.9535969648329199, "grad_norm": 0.9100795609414316, "learning_rate": 5.2644024203031716e-06, "loss": 0.5364, "step": 32675 }, { "epoch": 0.953742886327156, "grad_norm": 0.9902050761953441, "learning_rate": 5.262745365597634e-06, "loss": 0.5303, "step": 32680 }, { "epoch": 0.9538888078213921, "grad_norm": 0.9428925797883027, "learning_rate": 5.261093489252162e-06, "loss": 0.4707, "step": 32685 }, { "epoch": 0.9540347293156282, "grad_norm": 0.9522013198823092, "learning_rate": 5.259446791651422e-06, "loss": 0.5141, "step": 32690 }, { "epoch": 0.9541806508098643, "grad_norm": 1.0495746841068556, "learning_rate": 5.257805273178879e-06, "loss": 0.5102, "step": 32695 }, { "epoch": 0.9543265723041003, "grad_norm": 1.1007383179722245, "learning_rate": 5.256168934216791e-06, "loss": 0.5405, "step": 32700 }, { "epoch": 0.9544724937983365, "grad_norm": 0.9679425435095155, "learning_rate": 5.254537775146213e-06, "loss": 0.4906, "step": 32705 }, { "epoch": 0.9546184152925726, "grad_norm": 0.976114342532864, "learning_rate": 5.2529117963469855e-06, "loss": 0.5285, "step": 32710 }, { "epoch": 0.9547643367868087, "grad_norm": 0.9719846692548411, "learning_rate": 5.251290998197752e-06, "loss": 0.489, "step": 32715 }, { "epoch": 0.9549102582810448, "grad_norm": 0.9713635488915366, "learning_rate": 5.249675381075945e-06, "loss": 0.5648, "step": 32720 }, { "epoch": 0.9550561797752809, "grad_norm": 0.9314669581398894, "learning_rate": 5.24806494535779e-06, "loss": 0.5166, "step": 32725 }, { "epoch": 0.955202101269517, "grad_norm": 1.0382269272043272, "learning_rate": 5.2464596914183065e-06, "loss": 0.5181, "step": 32730 }, { "epoch": 0.9553480227637531, "grad_norm": 0.9144298973759393, "learning_rate": 5.244859619631312e-06, "loss": 0.4862, "step": 32735 }, { "epoch": 0.9554939442579892, "grad_norm": 1.016100758082206, "learning_rate": 5.24326473036941e-06, "loss": 0.5064, "step": 32740 }, { "epoch": 0.9556398657522253, "grad_norm": 0.9166681959333947, "learning_rate": 5.241675024003999e-06, "loss": 0.4981, "step": 32745 }, { "epoch": 0.9557857872464613, "grad_norm": 1.1089264006598776, "learning_rate": 5.240090500905271e-06, "loss": 0.5701, "step": 32750 }, { "epoch": 0.9559317087406975, "grad_norm": 0.8845834130119041, "learning_rate": 5.238511161442217e-06, "loss": 0.5381, "step": 32755 }, { "epoch": 0.9560776302349336, "grad_norm": 1.0760980263918716, "learning_rate": 5.236937005982612e-06, "loss": 0.5033, "step": 32760 }, { "epoch": 0.9562235517291697, "grad_norm": 0.93997371920883, "learning_rate": 5.23536803489303e-06, "loss": 0.4803, "step": 32765 }, { "epoch": 0.9563694732234058, "grad_norm": 1.114164484132752, "learning_rate": 5.2338042485388315e-06, "loss": 0.5201, "step": 32770 }, { "epoch": 0.9565153947176419, "grad_norm": 1.0438821301319572, "learning_rate": 5.232245647284176e-06, "loss": 0.5296, "step": 32775 }, { "epoch": 0.956661316211878, "grad_norm": 0.9643308512194376, "learning_rate": 5.230692231492011e-06, "loss": 0.5093, "step": 32780 }, { "epoch": 0.9568072377061141, "grad_norm": 0.9827159002137831, "learning_rate": 5.229144001524079e-06, "loss": 0.5094, "step": 32785 }, { "epoch": 0.9569531592003502, "grad_norm": 0.9540932215899559, "learning_rate": 5.227600957740917e-06, "loss": 0.5222, "step": 32790 }, { "epoch": 0.9570990806945863, "grad_norm": 1.0394539900180224, "learning_rate": 5.226063100501851e-06, "loss": 0.5111, "step": 32795 }, { "epoch": 0.9572450021888224, "grad_norm": 0.962093786861143, "learning_rate": 5.224530430164993e-06, "loss": 0.546, "step": 32800 }, { "epoch": 0.9573909236830586, "grad_norm": 0.9440000777362706, "learning_rate": 5.223002947087262e-06, "loss": 0.5269, "step": 32805 }, { "epoch": 0.9575368451772946, "grad_norm": 1.0633015198672835, "learning_rate": 5.221480651624359e-06, "loss": 0.5397, "step": 32810 }, { "epoch": 0.9576827666715307, "grad_norm": 0.9617027669553335, "learning_rate": 5.2199635441307775e-06, "loss": 0.5303, "step": 32815 }, { "epoch": 0.9578286881657668, "grad_norm": 0.8119501727917513, "learning_rate": 5.218451624959804e-06, "loss": 0.4668, "step": 32820 }, { "epoch": 0.9579746096600029, "grad_norm": 0.8831345798253704, "learning_rate": 5.216944894463519e-06, "loss": 0.4946, "step": 32825 }, { "epoch": 0.958120531154239, "grad_norm": 1.0502557724917378, "learning_rate": 5.215443352992791e-06, "loss": 0.5613, "step": 32830 }, { "epoch": 0.9582664526484751, "grad_norm": 0.8841140001128457, "learning_rate": 5.213947000897282e-06, "loss": 0.4991, "step": 32835 }, { "epoch": 0.9584123741427112, "grad_norm": 0.858299719075058, "learning_rate": 5.212455838525449e-06, "loss": 0.5047, "step": 32840 }, { "epoch": 0.9585582956369473, "grad_norm": 0.9821504706577947, "learning_rate": 5.210969866224531e-06, "loss": 0.5616, "step": 32845 }, { "epoch": 0.9587042171311835, "grad_norm": 1.2101543767330076, "learning_rate": 5.2094890843405696e-06, "loss": 0.6015, "step": 32850 }, { "epoch": 0.9588501386254196, "grad_norm": 0.9942147599361678, "learning_rate": 5.208013493218389e-06, "loss": 0.5285, "step": 32855 }, { "epoch": 0.9589960601196557, "grad_norm": 1.3218168419484149, "learning_rate": 5.206543093201611e-06, "loss": 0.5893, "step": 32860 }, { "epoch": 0.9591419816138917, "grad_norm": 0.8421553959586671, "learning_rate": 5.205077884632646e-06, "loss": 0.4549, "step": 32865 }, { "epoch": 0.9592879031081278, "grad_norm": 0.919440616794099, "learning_rate": 5.2036178678526904e-06, "loss": 0.4463, "step": 32870 }, { "epoch": 0.9594338246023639, "grad_norm": 0.9696298943745347, "learning_rate": 5.202163043201743e-06, "loss": 0.4907, "step": 32875 }, { "epoch": 0.9595797460966, "grad_norm": 0.8747653288765233, "learning_rate": 5.200713411018582e-06, "loss": 0.4758, "step": 32880 }, { "epoch": 0.9597256675908361, "grad_norm": 1.052103595342119, "learning_rate": 5.199268971640783e-06, "loss": 0.5292, "step": 32885 }, { "epoch": 0.9598715890850722, "grad_norm": 0.9555986822809132, "learning_rate": 5.1978297254047095e-06, "loss": 0.5132, "step": 32890 }, { "epoch": 0.9600175105793083, "grad_norm": 1.116101319679959, "learning_rate": 5.196395672645518e-06, "loss": 0.5324, "step": 32895 }, { "epoch": 0.9601634320735445, "grad_norm": 0.8891671994728823, "learning_rate": 5.1949668136971556e-06, "loss": 0.5196, "step": 32900 }, { "epoch": 0.9603093535677806, "grad_norm": 0.8898822687487035, "learning_rate": 5.193543148892358e-06, "loss": 0.5461, "step": 32905 }, { "epoch": 0.9604552750620167, "grad_norm": 0.9721937975495203, "learning_rate": 5.192124678562651e-06, "loss": 0.5088, "step": 32910 }, { "epoch": 0.9606011965562528, "grad_norm": 1.0005016735438703, "learning_rate": 5.190711403038352e-06, "loss": 0.4949, "step": 32915 }, { "epoch": 0.9607471180504888, "grad_norm": 1.0446026730281885, "learning_rate": 5.18930332264857e-06, "loss": 0.5171, "step": 32920 }, { "epoch": 0.9608930395447249, "grad_norm": 1.0611777673683729, "learning_rate": 5.187900437721199e-06, "loss": 0.484, "step": 32925 }, { "epoch": 0.961038961038961, "grad_norm": 0.9666390137632779, "learning_rate": 5.1865027485829345e-06, "loss": 0.5161, "step": 32930 }, { "epoch": 0.9611848825331971, "grad_norm": 0.9420953812880385, "learning_rate": 5.185110255559249e-06, "loss": 0.5186, "step": 32935 }, { "epoch": 0.9613308040274332, "grad_norm": 1.0845116014942744, "learning_rate": 5.1837229589744086e-06, "loss": 0.5055, "step": 32940 }, { "epoch": 0.9614767255216693, "grad_norm": 1.004766470078887, "learning_rate": 5.182340859151478e-06, "loss": 0.5698, "step": 32945 }, { "epoch": 0.9616226470159055, "grad_norm": 0.9818628144336805, "learning_rate": 5.180963956412298e-06, "loss": 0.4901, "step": 32950 }, { "epoch": 0.9617685685101416, "grad_norm": 1.0234294705338192, "learning_rate": 5.179592251077512e-06, "loss": 0.5834, "step": 32955 }, { "epoch": 0.9619144900043777, "grad_norm": 0.9008938291871342, "learning_rate": 5.178225743466542e-06, "loss": 0.5034, "step": 32960 }, { "epoch": 0.9620604114986138, "grad_norm": 1.1751660764035112, "learning_rate": 5.17686443389761e-06, "loss": 0.5235, "step": 32965 }, { "epoch": 0.9622063329928499, "grad_norm": 0.9187648440525964, "learning_rate": 5.175508322687718e-06, "loss": 0.5371, "step": 32970 }, { "epoch": 0.962352254487086, "grad_norm": 0.9558112646720623, "learning_rate": 5.174157410152661e-06, "loss": 0.5556, "step": 32975 }, { "epoch": 0.962498175981322, "grad_norm": 1.0042514547229604, "learning_rate": 5.172811696607031e-06, "loss": 0.4797, "step": 32980 }, { "epoch": 0.9626440974755581, "grad_norm": 0.9847438386513686, "learning_rate": 5.171471182364195e-06, "loss": 0.5639, "step": 32985 }, { "epoch": 0.9627900189697942, "grad_norm": 0.9932990069174642, "learning_rate": 5.170135867736322e-06, "loss": 0.4755, "step": 32990 }, { "epoch": 0.9629359404640303, "grad_norm": 1.0292678148982501, "learning_rate": 5.168805753034359e-06, "loss": 0.5335, "step": 32995 }, { "epoch": 0.9630818619582665, "grad_norm": 0.959641399038719, "learning_rate": 5.167480838568058e-06, "loss": 0.5271, "step": 33000 }, { "epoch": 0.9632277834525026, "grad_norm": 0.9690860574424337, "learning_rate": 5.1661611246459395e-06, "loss": 0.5601, "step": 33005 }, { "epoch": 0.9633737049467387, "grad_norm": 0.9848156239347772, "learning_rate": 5.164846611575328e-06, "loss": 0.5075, "step": 33010 }, { "epoch": 0.9635196264409748, "grad_norm": 1.0002301548731347, "learning_rate": 5.163537299662336e-06, "loss": 0.4866, "step": 33015 }, { "epoch": 0.9636655479352109, "grad_norm": 1.2093082617707185, "learning_rate": 5.162233189211853e-06, "loss": 0.5509, "step": 33020 }, { "epoch": 0.963811469429447, "grad_norm": 0.9514686470490332, "learning_rate": 5.160934280527574e-06, "loss": 0.5274, "step": 33025 }, { "epoch": 0.963957390923683, "grad_norm": 1.0041336624940398, "learning_rate": 5.159640573911969e-06, "loss": 0.5364, "step": 33030 }, { "epoch": 0.9641033124179191, "grad_norm": 1.091454286976059, "learning_rate": 5.158352069666304e-06, "loss": 0.5062, "step": 33035 }, { "epoch": 0.9642492339121552, "grad_norm": 1.088978983775512, "learning_rate": 5.1570687680906295e-06, "loss": 0.5349, "step": 33040 }, { "epoch": 0.9643951554063913, "grad_norm": 0.9832650522237123, "learning_rate": 5.155790669483787e-06, "loss": 0.5017, "step": 33045 }, { "epoch": 0.9645410769006275, "grad_norm": 0.9790684900733043, "learning_rate": 5.1545177741434084e-06, "loss": 0.526, "step": 33050 }, { "epoch": 0.9646869983948636, "grad_norm": 1.057200653760597, "learning_rate": 5.153250082365904e-06, "loss": 0.5167, "step": 33055 }, { "epoch": 0.9648329198890997, "grad_norm": 0.940885123767378, "learning_rate": 5.151987594446485e-06, "loss": 0.5414, "step": 33060 }, { "epoch": 0.9649788413833358, "grad_norm": 1.191675067518615, "learning_rate": 5.150730310679145e-06, "loss": 0.4929, "step": 33065 }, { "epoch": 0.9651247628775719, "grad_norm": 1.003718608937224, "learning_rate": 5.149478231356668e-06, "loss": 0.5561, "step": 33070 }, { "epoch": 0.965270684371808, "grad_norm": 0.82691596437521, "learning_rate": 5.148231356770617e-06, "loss": 0.507, "step": 33075 }, { "epoch": 0.965416605866044, "grad_norm": 1.0216014318969204, "learning_rate": 5.146989687211356e-06, "loss": 0.5369, "step": 33080 }, { "epoch": 0.9655625273602801, "grad_norm": 1.054176341278807, "learning_rate": 5.1457532229680315e-06, "loss": 0.5557, "step": 33085 }, { "epoch": 0.9657084488545162, "grad_norm": 0.9851936942970125, "learning_rate": 5.144521964328571e-06, "loss": 0.4779, "step": 33090 }, { "epoch": 0.9658543703487523, "grad_norm": 0.9838057145646232, "learning_rate": 5.143295911579706e-06, "loss": 0.4917, "step": 33095 }, { "epoch": 0.9660002918429885, "grad_norm": 1.0725837704920778, "learning_rate": 5.142075065006935e-06, "loss": 0.5353, "step": 33100 }, { "epoch": 0.9661462133372246, "grad_norm": 0.9648922133667956, "learning_rate": 5.140859424894561e-06, "loss": 0.5435, "step": 33105 }, { "epoch": 0.9662921348314607, "grad_norm": 1.0604852329179624, "learning_rate": 5.139648991525667e-06, "loss": 0.5639, "step": 33110 }, { "epoch": 0.9664380563256968, "grad_norm": 1.190154121547011, "learning_rate": 5.1384437651821265e-06, "loss": 0.5585, "step": 33115 }, { "epoch": 0.9665839778199329, "grad_norm": 0.9956171957700265, "learning_rate": 5.137243746144599e-06, "loss": 0.535, "step": 33120 }, { "epoch": 0.966729899314169, "grad_norm": 0.8802547214672635, "learning_rate": 5.136048934692528e-06, "loss": 0.5361, "step": 33125 }, { "epoch": 0.9668758208084051, "grad_norm": 1.0011791380197759, "learning_rate": 5.134859331104151e-06, "loss": 0.5344, "step": 33130 }, { "epoch": 0.9670217423026412, "grad_norm": 1.0765221212822242, "learning_rate": 5.133674935656487e-06, "loss": 0.556, "step": 33135 }, { "epoch": 0.9671676637968772, "grad_norm": 1.319692278979502, "learning_rate": 5.1324957486253464e-06, "loss": 0.5475, "step": 33140 }, { "epoch": 0.9673135852911133, "grad_norm": 1.0379433584240518, "learning_rate": 5.131321770285326e-06, "loss": 0.5471, "step": 33145 }, { "epoch": 0.9674595067853495, "grad_norm": 1.0714375389036257, "learning_rate": 5.130153000909807e-06, "loss": 0.5159, "step": 33150 }, { "epoch": 0.9676054282795856, "grad_norm": 1.1460201847533646, "learning_rate": 5.128989440770958e-06, "loss": 0.5708, "step": 33155 }, { "epoch": 0.9677513497738217, "grad_norm": 0.9951473014589465, "learning_rate": 5.127831090139736e-06, "loss": 0.5317, "step": 33160 }, { "epoch": 0.9678972712680578, "grad_norm": 1.0649119610585445, "learning_rate": 5.12667794928589e-06, "loss": 0.5425, "step": 33165 }, { "epoch": 0.9680431927622939, "grad_norm": 0.9072497002866126, "learning_rate": 5.125530018477939e-06, "loss": 0.5484, "step": 33170 }, { "epoch": 0.96818911425653, "grad_norm": 1.1678431382956997, "learning_rate": 5.12438729798321e-06, "loss": 0.5346, "step": 33175 }, { "epoch": 0.9683350357507661, "grad_norm": 0.9897926605156816, "learning_rate": 5.123249788067804e-06, "loss": 0.5004, "step": 33180 }, { "epoch": 0.9684809572450022, "grad_norm": 0.9735793420759921, "learning_rate": 5.122117488996612e-06, "loss": 0.5117, "step": 33185 }, { "epoch": 0.9686268787392383, "grad_norm": 1.0087351247524305, "learning_rate": 5.1209904010333096e-06, "loss": 0.503, "step": 33190 }, { "epoch": 0.9687728002334743, "grad_norm": 1.0583610493142284, "learning_rate": 5.119868524440364e-06, "loss": 0.5127, "step": 33195 }, { "epoch": 0.9689187217277105, "grad_norm": 1.0238923474747301, "learning_rate": 5.1187518594790174e-06, "loss": 0.4973, "step": 33200 }, { "epoch": 0.9690646432219466, "grad_norm": 0.8435041410971695, "learning_rate": 5.11764040640931e-06, "loss": 0.5011, "step": 33205 }, { "epoch": 0.9692105647161827, "grad_norm": 1.0151820301918038, "learning_rate": 5.116534165490069e-06, "loss": 0.5011, "step": 33210 }, { "epoch": 0.9693564862104188, "grad_norm": 0.9971972187325656, "learning_rate": 5.1154331369788974e-06, "loss": 0.4803, "step": 33215 }, { "epoch": 0.9695024077046549, "grad_norm": 1.1223327247478896, "learning_rate": 5.114337321132194e-06, "loss": 0.5584, "step": 33220 }, { "epoch": 0.969648329198891, "grad_norm": 1.0468772696652713, "learning_rate": 5.113246718205137e-06, "loss": 0.4865, "step": 33225 }, { "epoch": 0.9697942506931271, "grad_norm": 1.1348237523854359, "learning_rate": 5.1121613284516924e-06, "loss": 0.4956, "step": 33230 }, { "epoch": 0.9699401721873632, "grad_norm": 0.8658675113336013, "learning_rate": 5.111081152124618e-06, "loss": 0.5014, "step": 33235 }, { "epoch": 0.9700860936815993, "grad_norm": 1.0762854808496933, "learning_rate": 5.110006189475454e-06, "loss": 0.5397, "step": 33240 }, { "epoch": 0.9702320151758353, "grad_norm": 0.8423060453443036, "learning_rate": 5.108936440754519e-06, "loss": 0.4841, "step": 33245 }, { "epoch": 0.9703779366700716, "grad_norm": 0.9461089986587222, "learning_rate": 5.107871906210928e-06, "loss": 0.5691, "step": 33250 }, { "epoch": 0.9705238581643076, "grad_norm": 0.9278039398188089, "learning_rate": 5.106812586092576e-06, "loss": 0.523, "step": 33255 }, { "epoch": 0.9706697796585437, "grad_norm": 1.012146005817263, "learning_rate": 5.105758480646149e-06, "loss": 0.5332, "step": 33260 }, { "epoch": 0.9708157011527798, "grad_norm": 1.0517619488957122, "learning_rate": 5.104709590117112e-06, "loss": 0.566, "step": 33265 }, { "epoch": 0.9709616226470159, "grad_norm": 1.0288930009520139, "learning_rate": 5.103665914749721e-06, "loss": 0.5468, "step": 33270 }, { "epoch": 0.971107544141252, "grad_norm": 1.0933419406856637, "learning_rate": 5.102627454787012e-06, "loss": 0.4999, "step": 33275 }, { "epoch": 0.9712534656354881, "grad_norm": 1.0138066205381293, "learning_rate": 5.101594210470813e-06, "loss": 0.5197, "step": 33280 }, { "epoch": 0.9713993871297242, "grad_norm": 1.1901627962464099, "learning_rate": 5.100566182041731e-06, "loss": 0.5612, "step": 33285 }, { "epoch": 0.9715453086239603, "grad_norm": 1.0175863314278761, "learning_rate": 5.099543369739165e-06, "loss": 0.5398, "step": 33290 }, { "epoch": 0.9716912301181964, "grad_norm": 1.0861544032973653, "learning_rate": 5.098525773801295e-06, "loss": 0.5091, "step": 33295 }, { "epoch": 0.9718371516124326, "grad_norm": 0.9290366814685095, "learning_rate": 5.097513394465085e-06, "loss": 0.5036, "step": 33300 }, { "epoch": 0.9719830731066686, "grad_norm": 1.0448890955356152, "learning_rate": 5.096506231966289e-06, "loss": 0.581, "step": 33305 }, { "epoch": 0.9721289946009047, "grad_norm": 0.9090545822960594, "learning_rate": 5.0955042865394425e-06, "loss": 0.4972, "step": 33310 }, { "epoch": 0.9722749160951408, "grad_norm": 1.3530843560789703, "learning_rate": 5.094507558417869e-06, "loss": 0.58, "step": 33315 }, { "epoch": 0.9724208375893769, "grad_norm": 0.9659651473516816, "learning_rate": 5.093516047833673e-06, "loss": 0.4957, "step": 33320 }, { "epoch": 0.972566759083613, "grad_norm": 0.928527395145158, "learning_rate": 5.092529755017747e-06, "loss": 0.503, "step": 33325 }, { "epoch": 0.9727126805778491, "grad_norm": 0.9342770201511649, "learning_rate": 5.091548680199767e-06, "loss": 0.4841, "step": 33330 }, { "epoch": 0.9728586020720852, "grad_norm": 1.0330013272728686, "learning_rate": 5.090572823608195e-06, "loss": 0.5481, "step": 33335 }, { "epoch": 0.9730045235663213, "grad_norm": 1.021005975163304, "learning_rate": 5.089602185470278e-06, "loss": 0.4873, "step": 33340 }, { "epoch": 0.9731504450605574, "grad_norm": 0.8948410259376687, "learning_rate": 5.088636766012046e-06, "loss": 0.4945, "step": 33345 }, { "epoch": 0.9732963665547936, "grad_norm": 0.8602093129756095, "learning_rate": 5.0876765654583185e-06, "loss": 0.4972, "step": 33350 }, { "epoch": 0.9734422880490297, "grad_norm": 1.1249056725836115, "learning_rate": 5.086721584032693e-06, "loss": 0.5108, "step": 33355 }, { "epoch": 0.9735882095432657, "grad_norm": 0.8970028911947296, "learning_rate": 5.085771821957554e-06, "loss": 0.5015, "step": 33360 }, { "epoch": 0.9737341310375018, "grad_norm": 1.00764752847316, "learning_rate": 5.084827279454072e-06, "loss": 0.5445, "step": 33365 }, { "epoch": 0.9738800525317379, "grad_norm": 1.0387737602810836, "learning_rate": 5.083887956742202e-06, "loss": 0.5585, "step": 33370 }, { "epoch": 0.974025974025974, "grad_norm": 1.0749149699775726, "learning_rate": 5.082953854040683e-06, "loss": 0.6185, "step": 33375 }, { "epoch": 0.9741718955202101, "grad_norm": 1.121355266355452, "learning_rate": 5.08202497156704e-06, "loss": 0.5013, "step": 33380 }, { "epoch": 0.9743178170144462, "grad_norm": 0.993181963027802, "learning_rate": 5.081101309537578e-06, "loss": 0.4913, "step": 33385 }, { "epoch": 0.9744637385086823, "grad_norm": 1.0152078631509958, "learning_rate": 5.080182868167388e-06, "loss": 0.495, "step": 33390 }, { "epoch": 0.9746096600029184, "grad_norm": 0.9557924930629481, "learning_rate": 5.079269647670351e-06, "loss": 0.513, "step": 33395 }, { "epoch": 0.9747555814971546, "grad_norm": 0.9643025103093668, "learning_rate": 5.078361648259122e-06, "loss": 0.5102, "step": 33400 }, { "epoch": 0.9749015029913907, "grad_norm": 1.049116992295395, "learning_rate": 5.077458870145146e-06, "loss": 0.5245, "step": 33405 }, { "epoch": 0.9750474244856268, "grad_norm": 1.0067790767085887, "learning_rate": 5.076561313538658e-06, "loss": 0.5375, "step": 33410 }, { "epoch": 0.9751933459798628, "grad_norm": 1.0708950557925891, "learning_rate": 5.075668978648664e-06, "loss": 0.5352, "step": 33415 }, { "epoch": 0.9753392674740989, "grad_norm": 0.987469132979006, "learning_rate": 5.0747818656829625e-06, "loss": 0.5341, "step": 33420 }, { "epoch": 0.975485188968335, "grad_norm": 0.9560857706118973, "learning_rate": 5.0738999748481366e-06, "loss": 0.4944, "step": 33425 }, { "epoch": 0.9756311104625711, "grad_norm": 0.9393343319458181, "learning_rate": 5.073023306349552e-06, "loss": 0.482, "step": 33430 }, { "epoch": 0.9757770319568072, "grad_norm": 0.9749974002721633, "learning_rate": 5.072151860391352e-06, "loss": 0.5367, "step": 33435 }, { "epoch": 0.9759229534510433, "grad_norm": 0.9803606779047043, "learning_rate": 5.071285637176473e-06, "loss": 0.5715, "step": 33440 }, { "epoch": 0.9760688749452794, "grad_norm": 1.0588347798111502, "learning_rate": 5.070424636906629e-06, "loss": 0.5851, "step": 33445 }, { "epoch": 0.9762147964395156, "grad_norm": 0.9797603512920966, "learning_rate": 5.0695688597823225e-06, "loss": 0.5652, "step": 33450 }, { "epoch": 0.9763607179337517, "grad_norm": 0.9769923064909117, "learning_rate": 5.068718306002837e-06, "loss": 0.5213, "step": 33455 }, { "epoch": 0.9765066394279878, "grad_norm": 1.09165998209017, "learning_rate": 5.0678729757662354e-06, "loss": 0.5766, "step": 33460 }, { "epoch": 0.9766525609222239, "grad_norm": 1.0749137723608306, "learning_rate": 5.067032869269374e-06, "loss": 0.5665, "step": 33465 }, { "epoch": 0.97679848241646, "grad_norm": 1.0068148780791313, "learning_rate": 5.066197986707887e-06, "loss": 0.5195, "step": 33470 }, { "epoch": 0.976944403910696, "grad_norm": 0.9967499572431862, "learning_rate": 5.065368328276189e-06, "loss": 0.5025, "step": 33475 }, { "epoch": 0.9770903254049321, "grad_norm": 0.8860617550926022, "learning_rate": 5.0645438941674836e-06, "loss": 0.5272, "step": 33480 }, { "epoch": 0.9772362468991682, "grad_norm": 0.9670277615613261, "learning_rate": 5.0637246845737535e-06, "loss": 0.517, "step": 33485 }, { "epoch": 0.9773821683934043, "grad_norm": 1.1273733012637333, "learning_rate": 5.06291069968577e-06, "loss": 0.5639, "step": 33490 }, { "epoch": 0.9775280898876404, "grad_norm": 0.8517430494352236, "learning_rate": 5.0621019396930844e-06, "loss": 0.5022, "step": 33495 }, { "epoch": 0.9776740113818766, "grad_norm": 0.854923004141028, "learning_rate": 5.0612984047840305e-06, "loss": 0.5249, "step": 33500 }, { "epoch": 0.9778199328761127, "grad_norm": 1.1043474655152912, "learning_rate": 5.060500095145726e-06, "loss": 0.564, "step": 33505 }, { "epoch": 0.9779658543703488, "grad_norm": 1.0426307699685953, "learning_rate": 5.059707010964071e-06, "loss": 0.5621, "step": 33510 }, { "epoch": 0.9781117758645849, "grad_norm": 1.1669967905970806, "learning_rate": 5.058919152423754e-06, "loss": 0.5449, "step": 33515 }, { "epoch": 0.978257697358821, "grad_norm": 1.1715461189033711, "learning_rate": 5.058136519708239e-06, "loss": 0.5272, "step": 33520 }, { "epoch": 0.978403618853057, "grad_norm": 1.0968187603986936, "learning_rate": 5.057359112999778e-06, "loss": 0.5325, "step": 33525 }, { "epoch": 0.9785495403472931, "grad_norm": 0.8341579304262031, "learning_rate": 5.056586932479406e-06, "loss": 0.4742, "step": 33530 }, { "epoch": 0.9786954618415292, "grad_norm": 0.9399784407931918, "learning_rate": 5.055819978326936e-06, "loss": 0.5053, "step": 33535 }, { "epoch": 0.9788413833357653, "grad_norm": 1.040836098664231, "learning_rate": 5.055058250720973e-06, "loss": 0.5699, "step": 33540 }, { "epoch": 0.9789873048300015, "grad_norm": 0.9576166852777381, "learning_rate": 5.0543017498388925e-06, "loss": 0.5314, "step": 33545 }, { "epoch": 0.9791332263242376, "grad_norm": 1.0821381600671702, "learning_rate": 5.0535504758568655e-06, "loss": 0.5414, "step": 33550 }, { "epoch": 0.9792791478184737, "grad_norm": 0.8874562276236875, "learning_rate": 5.052804428949839e-06, "loss": 0.4717, "step": 33555 }, { "epoch": 0.9794250693127098, "grad_norm": 1.077466291656469, "learning_rate": 5.052063609291541e-06, "loss": 0.5072, "step": 33560 }, { "epoch": 0.9795709908069459, "grad_norm": 0.9402816687641828, "learning_rate": 5.051328017054488e-06, "loss": 0.4963, "step": 33565 }, { "epoch": 0.979716912301182, "grad_norm": 1.0665721824246712, "learning_rate": 5.05059765240998e-06, "loss": 0.5358, "step": 33570 }, { "epoch": 0.979862833795418, "grad_norm": 1.0663817878247017, "learning_rate": 5.049872515528087e-06, "loss": 0.5911, "step": 33575 }, { "epoch": 0.9800087552896541, "grad_norm": 0.9949668657119501, "learning_rate": 5.049152606577677e-06, "loss": 0.5098, "step": 33580 }, { "epoch": 0.9801546767838902, "grad_norm": 1.0831224483291853, "learning_rate": 5.048437925726394e-06, "loss": 0.5786, "step": 33585 }, { "epoch": 0.9803005982781263, "grad_norm": 1.1327200471693837, "learning_rate": 5.047728473140661e-06, "loss": 0.5327, "step": 33590 }, { "epoch": 0.9804465197723625, "grad_norm": 1.0302789352412236, "learning_rate": 5.047024248985693e-06, "loss": 0.5411, "step": 33595 }, { "epoch": 0.9805924412665986, "grad_norm": 1.1077679676381746, "learning_rate": 5.046325253425478e-06, "loss": 0.5933, "step": 33600 }, { "epoch": 0.9807383627608347, "grad_norm": 1.0753854572626638, "learning_rate": 5.045631486622792e-06, "loss": 0.5936, "step": 33605 }, { "epoch": 0.9808842842550708, "grad_norm": 1.0920498382630135, "learning_rate": 5.044942948739185e-06, "loss": 0.5008, "step": 33610 }, { "epoch": 0.9810302057493069, "grad_norm": 0.8933129497704895, "learning_rate": 5.044259639935007e-06, "loss": 0.4747, "step": 33615 }, { "epoch": 0.981176127243543, "grad_norm": 1.0305549440006463, "learning_rate": 5.043581560369372e-06, "loss": 0.527, "step": 33620 }, { "epoch": 0.9813220487377791, "grad_norm": 0.9580121087779867, "learning_rate": 5.042908710200183e-06, "loss": 0.5347, "step": 33625 }, { "epoch": 0.9814679702320152, "grad_norm": 0.9205918761564317, "learning_rate": 5.04224108958413e-06, "loss": 0.4701, "step": 33630 }, { "epoch": 0.9816138917262512, "grad_norm": 1.2236944920625357, "learning_rate": 5.041578698676678e-06, "loss": 0.5496, "step": 33635 }, { "epoch": 0.9817598132204873, "grad_norm": 0.9490949498357534, "learning_rate": 5.040921537632076e-06, "loss": 0.4985, "step": 33640 }, { "epoch": 0.9819057347147235, "grad_norm": 1.1907449418833003, "learning_rate": 5.04026960660336e-06, "loss": 0.5483, "step": 33645 }, { "epoch": 0.9820516562089596, "grad_norm": 1.110200459051081, "learning_rate": 5.039622905742343e-06, "loss": 0.4873, "step": 33650 }, { "epoch": 0.9821975777031957, "grad_norm": 1.0729031205278334, "learning_rate": 5.038981435199619e-06, "loss": 0.5015, "step": 33655 }, { "epoch": 0.9823434991974318, "grad_norm": 1.0289432450989846, "learning_rate": 5.0383451951245675e-06, "loss": 0.5541, "step": 33660 }, { "epoch": 0.9824894206916679, "grad_norm": 0.9722733112237959, "learning_rate": 5.037714185665353e-06, "loss": 0.5272, "step": 33665 }, { "epoch": 0.982635342185904, "grad_norm": 1.064043484288904, "learning_rate": 5.037088406968911e-06, "loss": 0.5103, "step": 33670 }, { "epoch": 0.9827812636801401, "grad_norm": 1.0346664860819423, "learning_rate": 5.036467859180968e-06, "loss": 0.5282, "step": 33675 }, { "epoch": 0.9829271851743762, "grad_norm": 0.9895544493447913, "learning_rate": 5.035852542446034e-06, "loss": 0.4942, "step": 33680 }, { "epoch": 0.9830731066686123, "grad_norm": 0.8857360534632025, "learning_rate": 5.035242456907394e-06, "loss": 0.5021, "step": 33685 }, { "epoch": 0.9832190281628483, "grad_norm": 1.0209309071157355, "learning_rate": 5.034637602707115e-06, "loss": 0.5533, "step": 33690 }, { "epoch": 0.9833649496570845, "grad_norm": 0.9655930898221335, "learning_rate": 5.034037979986055e-06, "loss": 0.5099, "step": 33695 }, { "epoch": 0.9835108711513206, "grad_norm": 1.108373019645238, "learning_rate": 5.033443588883844e-06, "loss": 0.4903, "step": 33700 }, { "epoch": 0.9836567926455567, "grad_norm": 1.034365216081099, "learning_rate": 5.032854429538894e-06, "loss": 0.5776, "step": 33705 }, { "epoch": 0.9838027141397928, "grad_norm": 0.9427036341063435, "learning_rate": 5.0322705020884065e-06, "loss": 0.5601, "step": 33710 }, { "epoch": 0.9839486356340289, "grad_norm": 1.1051007774815587, "learning_rate": 5.031691806668358e-06, "loss": 0.5656, "step": 33715 }, { "epoch": 0.984094557128265, "grad_norm": 1.202707695806946, "learning_rate": 5.03111834341351e-06, "loss": 0.55, "step": 33720 }, { "epoch": 0.9842404786225011, "grad_norm": 1.1083712032784958, "learning_rate": 5.030550112457401e-06, "loss": 0.4812, "step": 33725 }, { "epoch": 0.9843864001167372, "grad_norm": 1.1442206629294946, "learning_rate": 5.0299871139323565e-06, "loss": 0.5685, "step": 33730 }, { "epoch": 0.9845323216109733, "grad_norm": 0.9140169945017929, "learning_rate": 5.029429347969483e-06, "loss": 0.5002, "step": 33735 }, { "epoch": 0.9846782431052093, "grad_norm": 0.9578531251436923, "learning_rate": 5.028876814698661e-06, "loss": 0.5434, "step": 33740 }, { "epoch": 0.9848241645994456, "grad_norm": 0.9265414791068719, "learning_rate": 5.028329514248563e-06, "loss": 0.4876, "step": 33745 }, { "epoch": 0.9849700860936816, "grad_norm": 1.106580671006795, "learning_rate": 5.027787446746639e-06, "loss": 0.5594, "step": 33750 }, { "epoch": 0.9851160075879177, "grad_norm": 1.0000048943538333, "learning_rate": 5.027250612319115e-06, "loss": 0.4994, "step": 33755 }, { "epoch": 0.9852619290821538, "grad_norm": 0.9279773868928076, "learning_rate": 5.0267190110910045e-06, "loss": 0.5034, "step": 33760 }, { "epoch": 0.9854078505763899, "grad_norm": 1.0872808378834786, "learning_rate": 5.026192643186102e-06, "loss": 0.5498, "step": 33765 }, { "epoch": 0.985553772070626, "grad_norm": 0.938516462965969, "learning_rate": 5.025671508726983e-06, "loss": 0.501, "step": 33770 }, { "epoch": 0.9856996935648621, "grad_norm": 1.263096457109755, "learning_rate": 5.025155607835002e-06, "loss": 0.5403, "step": 33775 }, { "epoch": 0.9858456150590982, "grad_norm": 0.9929012980015474, "learning_rate": 5.024644940630293e-06, "loss": 0.6056, "step": 33780 }, { "epoch": 0.9859915365533343, "grad_norm": 1.206529714361955, "learning_rate": 5.024139507231782e-06, "loss": 0.539, "step": 33785 }, { "epoch": 0.9861374580475704, "grad_norm": 1.0379883567226897, "learning_rate": 5.023639307757162e-06, "loss": 0.5352, "step": 33790 }, { "epoch": 0.9862833795418066, "grad_norm": 1.1306631842019477, "learning_rate": 5.023144342322916e-06, "loss": 0.5866, "step": 33795 }, { "epoch": 0.9864293010360426, "grad_norm": 1.0108377813317388, "learning_rate": 5.022654611044306e-06, "loss": 0.572, "step": 33800 }, { "epoch": 0.9865752225302787, "grad_norm": 0.9320636134038177, "learning_rate": 5.022170114035375e-06, "loss": 0.5147, "step": 33805 }, { "epoch": 0.9867211440245148, "grad_norm": 1.0175455601670906, "learning_rate": 5.021690851408948e-06, "loss": 0.5335, "step": 33810 }, { "epoch": 0.9868670655187509, "grad_norm": 1.0505046296815395, "learning_rate": 5.021216823276629e-06, "loss": 0.548, "step": 33815 }, { "epoch": 0.987012987012987, "grad_norm": 1.0668456587494082, "learning_rate": 5.020748029748804e-06, "loss": 0.5384, "step": 33820 }, { "epoch": 0.9871589085072231, "grad_norm": 0.9007015843794126, "learning_rate": 5.020284470934643e-06, "loss": 0.4901, "step": 33825 }, { "epoch": 0.9873048300014592, "grad_norm": 1.1310276536231032, "learning_rate": 5.019826146942089e-06, "loss": 0.5645, "step": 33830 }, { "epoch": 0.9874507514956953, "grad_norm": 0.9722862756883272, "learning_rate": 5.0193730578778765e-06, "loss": 0.498, "step": 33835 }, { "epoch": 0.9875966729899314, "grad_norm": 0.9734766858183307, "learning_rate": 5.018925203847516e-06, "loss": 0.4848, "step": 33840 }, { "epoch": 0.9877425944841676, "grad_norm": 1.24537530114486, "learning_rate": 5.018482584955293e-06, "loss": 0.5845, "step": 33845 }, { "epoch": 0.9878885159784037, "grad_norm": 1.1734094489109732, "learning_rate": 5.018045201304286e-06, "loss": 0.5326, "step": 33850 }, { "epoch": 0.9880344374726397, "grad_norm": 1.0180124376027657, "learning_rate": 5.017613052996342e-06, "loss": 0.5296, "step": 33855 }, { "epoch": 0.9881803589668758, "grad_norm": 0.9177150422853549, "learning_rate": 5.0171861401321025e-06, "loss": 0.4881, "step": 33860 }, { "epoch": 0.9883262804611119, "grad_norm": 1.0814809806392454, "learning_rate": 5.016764462810975e-06, "loss": 0.5144, "step": 33865 }, { "epoch": 0.988472201955348, "grad_norm": 0.9146467195207497, "learning_rate": 5.0163480211311565e-06, "loss": 0.4895, "step": 33870 }, { "epoch": 0.9886181234495841, "grad_norm": 1.0301122390412922, "learning_rate": 5.015936815189624e-06, "loss": 0.5572, "step": 33875 }, { "epoch": 0.9887640449438202, "grad_norm": 1.0361172551464364, "learning_rate": 5.015530845082138e-06, "loss": 0.5444, "step": 33880 }, { "epoch": 0.9889099664380563, "grad_norm": 1.140939330237156, "learning_rate": 5.01513011090323e-06, "loss": 0.5709, "step": 33885 }, { "epoch": 0.9890558879322924, "grad_norm": 0.9633466382471937, "learning_rate": 5.0147346127462235e-06, "loss": 0.4635, "step": 33890 }, { "epoch": 0.9892018094265286, "grad_norm": 0.9618784257000533, "learning_rate": 5.014344350703214e-06, "loss": 0.5946, "step": 33895 }, { "epoch": 0.9893477309207647, "grad_norm": 0.9715137131827606, "learning_rate": 5.013959324865082e-06, "loss": 0.5803, "step": 33900 }, { "epoch": 0.9894936524150008, "grad_norm": 1.210351679528532, "learning_rate": 5.01357953532149e-06, "loss": 0.5834, "step": 33905 }, { "epoch": 0.9896395739092368, "grad_norm": 1.001088802477084, "learning_rate": 5.013204982160878e-06, "loss": 0.5233, "step": 33910 }, { "epoch": 0.9897854954034729, "grad_norm": 1.079061766315509, "learning_rate": 5.012835665470469e-06, "loss": 0.5374, "step": 33915 }, { "epoch": 0.989931416897709, "grad_norm": 0.9744772838191876, "learning_rate": 5.012471585336263e-06, "loss": 0.4811, "step": 33920 }, { "epoch": 0.9900773383919451, "grad_norm": 1.1886066580660766, "learning_rate": 5.012112741843044e-06, "loss": 0.5516, "step": 33925 }, { "epoch": 0.9902232598861812, "grad_norm": 1.0043509799454422, "learning_rate": 5.011759135074372e-06, "loss": 0.5405, "step": 33930 }, { "epoch": 0.9903691813804173, "grad_norm": 0.9229418285152787, "learning_rate": 5.011410765112597e-06, "loss": 0.5003, "step": 33935 }, { "epoch": 0.9905151028746534, "grad_norm": 1.037200535420287, "learning_rate": 5.0110676320388395e-06, "loss": 0.4649, "step": 33940 }, { "epoch": 0.9906610243688896, "grad_norm": 0.9104571270919357, "learning_rate": 5.010729735933006e-06, "loss": 0.5037, "step": 33945 }, { "epoch": 0.9908069458631257, "grad_norm": 1.0056870234001343, "learning_rate": 5.01039707687378e-06, "loss": 0.5149, "step": 33950 }, { "epoch": 0.9909528673573618, "grad_norm": 1.0665776610375306, "learning_rate": 5.01006965493863e-06, "loss": 0.5629, "step": 33955 }, { "epoch": 0.9910987888515979, "grad_norm": 0.9572439710461341, "learning_rate": 5.009747470203801e-06, "loss": 0.4838, "step": 33960 }, { "epoch": 0.991244710345834, "grad_norm": 1.0837510316096048, "learning_rate": 5.009430522744322e-06, "loss": 0.5326, "step": 33965 }, { "epoch": 0.99139063184007, "grad_norm": 1.007204258097148, "learning_rate": 5.009118812633996e-06, "loss": 0.5782, "step": 33970 }, { "epoch": 0.9915365533343061, "grad_norm": 1.0399550064040173, "learning_rate": 5.008812339945411e-06, "loss": 0.5193, "step": 33975 }, { "epoch": 0.9916824748285422, "grad_norm": 0.9896983949080145, "learning_rate": 5.0085111047499384e-06, "loss": 0.5305, "step": 33980 }, { "epoch": 0.9918283963227783, "grad_norm": 0.9621123880163975, "learning_rate": 5.008215107117724e-06, "loss": 0.5182, "step": 33985 }, { "epoch": 0.9919743178170144, "grad_norm": 0.8447976358203553, "learning_rate": 5.007924347117697e-06, "loss": 0.4877, "step": 33990 }, { "epoch": 0.9921202393112506, "grad_norm": 1.1596474947702167, "learning_rate": 5.007638824817563e-06, "loss": 0.5708, "step": 33995 }, { "epoch": 0.9922661608054867, "grad_norm": 1.074628382692495, "learning_rate": 5.007358540283818e-06, "loss": 0.5672, "step": 34000 }, { "epoch": 0.9924120822997228, "grad_norm": 0.9154829466566006, "learning_rate": 5.007083493581728e-06, "loss": 0.4771, "step": 34005 }, { "epoch": 0.9925580037939589, "grad_norm": 1.074246880963391, "learning_rate": 5.0068136847753385e-06, "loss": 0.5565, "step": 34010 }, { "epoch": 0.992703925288195, "grad_norm": 0.9229558777937205, "learning_rate": 5.006549113927486e-06, "loss": 0.5182, "step": 34015 }, { "epoch": 0.992849846782431, "grad_norm": 1.1462405119761574, "learning_rate": 5.006289781099778e-06, "loss": 0.5588, "step": 34020 }, { "epoch": 0.9929957682766671, "grad_norm": 1.0710422413291885, "learning_rate": 5.006035686352608e-06, "loss": 0.5618, "step": 34025 }, { "epoch": 0.9931416897709032, "grad_norm": 0.9111509681353691, "learning_rate": 5.005786829745139e-06, "loss": 0.5442, "step": 34030 }, { "epoch": 0.9932876112651393, "grad_norm": 0.9510318450701138, "learning_rate": 5.005543211335331e-06, "loss": 0.5496, "step": 34035 }, { "epoch": 0.9934335327593754, "grad_norm": 1.0008996083381918, "learning_rate": 5.005304831179909e-06, "loss": 0.5313, "step": 34040 }, { "epoch": 0.9935794542536116, "grad_norm": 1.036343656349479, "learning_rate": 5.005071689334387e-06, "loss": 0.5071, "step": 34045 }, { "epoch": 0.9937253757478477, "grad_norm": 1.0064022594062205, "learning_rate": 5.004843785853055e-06, "loss": 0.5189, "step": 34050 }, { "epoch": 0.9938712972420838, "grad_norm": 1.0727308088819358, "learning_rate": 5.0046211207889865e-06, "loss": 0.498, "step": 34055 }, { "epoch": 0.9940172187363199, "grad_norm": 1.011996656846501, "learning_rate": 5.004403694194032e-06, "loss": 0.5572, "step": 34060 }, { "epoch": 0.994163140230556, "grad_norm": 1.090875683762067, "learning_rate": 5.004191506118822e-06, "loss": 0.5221, "step": 34065 }, { "epoch": 0.994309061724792, "grad_norm": 0.9333200560570092, "learning_rate": 5.00398455661277e-06, "loss": 0.4956, "step": 34070 }, { "epoch": 0.9944549832190281, "grad_norm": 0.9882746033956104, "learning_rate": 5.003782845724071e-06, "loss": 0.5227, "step": 34075 }, { "epoch": 0.9946009047132642, "grad_norm": 0.8267576198682784, "learning_rate": 5.003586373499691e-06, "loss": 0.5028, "step": 34080 }, { "epoch": 0.9947468262075003, "grad_norm": 0.9879584677953103, "learning_rate": 5.003395139985384e-06, "loss": 0.5377, "step": 34085 }, { "epoch": 0.9948927477017364, "grad_norm": 0.9719402077931502, "learning_rate": 5.003209145225686e-06, "loss": 0.5038, "step": 34090 }, { "epoch": 0.9950386691959726, "grad_norm": 1.046885445097955, "learning_rate": 5.003028389263907e-06, "loss": 0.5311, "step": 34095 }, { "epoch": 0.9951845906902087, "grad_norm": 0.9376607022492564, "learning_rate": 5.002852872142138e-06, "loss": 0.537, "step": 34100 }, { "epoch": 0.9953305121844448, "grad_norm": 1.1437560781956682, "learning_rate": 5.0026825939012535e-06, "loss": 0.5348, "step": 34105 }, { "epoch": 0.9954764336786809, "grad_norm": 0.9876093898709207, "learning_rate": 5.002517554580903e-06, "loss": 0.5113, "step": 34110 }, { "epoch": 0.995622355172917, "grad_norm": 1.1891251598070707, "learning_rate": 5.002357754219525e-06, "loss": 0.5266, "step": 34115 }, { "epoch": 0.9957682766671531, "grad_norm": 1.199713564049697, "learning_rate": 5.002203192854326e-06, "loss": 0.551, "step": 34120 }, { "epoch": 0.9959141981613892, "grad_norm": 1.0276670045212444, "learning_rate": 5.0020538705213e-06, "loss": 0.5597, "step": 34125 }, { "epoch": 0.9960601196556252, "grad_norm": 0.8596206355535277, "learning_rate": 5.0019097872552216e-06, "loss": 0.5082, "step": 34130 }, { "epoch": 0.9962060411498613, "grad_norm": 1.1390781389722806, "learning_rate": 5.001770943089642e-06, "loss": 0.5143, "step": 34135 }, { "epoch": 0.9963519626440974, "grad_norm": 1.010091785793645, "learning_rate": 5.001637338056892e-06, "loss": 0.4944, "step": 34140 }, { "epoch": 0.9964978841383336, "grad_norm": 1.1178732238965712, "learning_rate": 5.001508972188087e-06, "loss": 0.5127, "step": 34145 }, { "epoch": 0.9966438056325697, "grad_norm": 1.061360375157705, "learning_rate": 5.001385845513118e-06, "loss": 0.5202, "step": 34150 }, { "epoch": 0.9967897271268058, "grad_norm": 0.997627126386671, "learning_rate": 5.001267958060657e-06, "loss": 0.5369, "step": 34155 }, { "epoch": 0.9969356486210419, "grad_norm": 1.0447716836138679, "learning_rate": 5.001155309858156e-06, "loss": 0.5427, "step": 34160 }, { "epoch": 0.997081570115278, "grad_norm": 0.9997743865793186, "learning_rate": 5.001047900931849e-06, "loss": 0.4906, "step": 34165 }, { "epoch": 0.9972274916095141, "grad_norm": 1.1352521600880445, "learning_rate": 5.000945731306746e-06, "loss": 0.5921, "step": 34170 }, { "epoch": 0.9973734131037502, "grad_norm": 0.9336651797943503, "learning_rate": 5.000848801006639e-06, "loss": 0.5408, "step": 34175 }, { "epoch": 0.9975193345979863, "grad_norm": 0.8528773724163246, "learning_rate": 5.000757110054101e-06, "loss": 0.5046, "step": 34180 }, { "epoch": 0.9976652560922223, "grad_norm": 1.1731211514032513, "learning_rate": 5.0006706584704865e-06, "loss": 0.5576, "step": 34185 }, { "epoch": 0.9978111775864584, "grad_norm": 1.2239020358344568, "learning_rate": 5.0005894462759244e-06, "loss": 0.547, "step": 34190 }, { "epoch": 0.9979570990806946, "grad_norm": 1.193873006903444, "learning_rate": 5.000513473489325e-06, "loss": 0.5358, "step": 34195 }, { "epoch": 0.9981030205749307, "grad_norm": 0.9949503406352171, "learning_rate": 5.000442740128385e-06, "loss": 0.5466, "step": 34200 }, { "epoch": 0.9982489420691668, "grad_norm": 1.0040847301012406, "learning_rate": 5.0003772462095715e-06, "loss": 0.5184, "step": 34205 }, { "epoch": 0.9983948635634029, "grad_norm": 0.9666355319129454, "learning_rate": 5.000316991748136e-06, "loss": 0.5559, "step": 34210 }, { "epoch": 0.998540785057639, "grad_norm": 0.9893589176004742, "learning_rate": 5.000261976758115e-06, "loss": 0.5457, "step": 34215 }, { "epoch": 0.9986867065518751, "grad_norm": 0.9431329088837169, "learning_rate": 5.000212201252313e-06, "loss": 0.5169, "step": 34220 }, { "epoch": 0.9988326280461112, "grad_norm": 1.053905405555924, "learning_rate": 5.000167665242325e-06, "loss": 0.5972, "step": 34225 }, { "epoch": 0.9989785495403473, "grad_norm": 1.1493154532957772, "learning_rate": 5.000128368738522e-06, "loss": 0.5305, "step": 34230 }, { "epoch": 0.9991244710345834, "grad_norm": 1.094965406863394, "learning_rate": 5.000094311750053e-06, "loss": 0.5338, "step": 34235 }, { "epoch": 0.9992703925288194, "grad_norm": 1.0393104106214965, "learning_rate": 5.000065494284851e-06, "loss": 0.5055, "step": 34240 }, { "epoch": 0.9994163140230556, "grad_norm": 1.0626403252982533, "learning_rate": 5.000041916349627e-06, "loss": 0.5483, "step": 34245 }, { "epoch": 0.9995622355172917, "grad_norm": 1.0130078106020866, "learning_rate": 5.000023577949868e-06, "loss": 0.5814, "step": 34250 }, { "epoch": 0.9997081570115278, "grad_norm": 1.080029261493259, "learning_rate": 5.0000104790898465e-06, "loss": 0.5956, "step": 34255 }, { "epoch": 0.9998540785057639, "grad_norm": 0.8605516671437958, "learning_rate": 5.000002619772615e-06, "loss": 0.5014, "step": 34260 }, { "epoch": 1.0, "grad_norm": 1.1940589638955315, "learning_rate": 5e-06, "loss": 0.4856, "step": 34265 }, { "epoch": 1.0, "step": 34265, "total_flos": 488058617069568.0, "train_loss": 0.5987731399213277, "train_runtime": 55800.5629, "train_samples_per_second": 2.456, "train_steps_per_second": 0.614 } ], "logging_steps": 5, "max_steps": 34265, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 488058617069568.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }