{ "best_global_step": 348000, "best_metric": 0.9896356584218223, "best_model_checkpoint": "/workspace/output/lora_r8/checkpoint-348000", "epoch": 99.91484530229917, "eval_steps": 500, "global_step": 352000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002838489923360772, "grad_norm": 7.821003437042236, "learning_rate": 9.999744535906897e-05, "loss": 7.152902221679687, "step": 10 }, { "epoch": 0.005676979846721544, "grad_norm": 7.891488075256348, "learning_rate": 9.999460686914563e-05, "loss": 6.7615203857421875, "step": 20 }, { "epoch": 0.008515469770082317, "grad_norm": 7.9415435791015625, "learning_rate": 9.999176837922227e-05, "loss": 6.361074829101563, "step": 30 }, { "epoch": 0.011353959693443088, "grad_norm": 7.913585662841797, "learning_rate": 9.99889298892989e-05, "loss": 6.256243896484375, "step": 40 }, { "epoch": 0.01419244961680386, "grad_norm": 8.053275108337402, "learning_rate": 9.998609139937554e-05, "loss": 6.039715576171875, "step": 50 }, { "epoch": 0.017030939540164634, "grad_norm": 7.5638933181762695, "learning_rate": 9.998325290945218e-05, "loss": 5.95233154296875, "step": 60 }, { "epoch": 0.019869429463525403, "grad_norm": 8.13407039642334, "learning_rate": 9.99804144195288e-05, "loss": 5.799929809570313, "step": 70 }, { "epoch": 0.022707919386886176, "grad_norm": 8.777347564697266, "learning_rate": 9.997757592960545e-05, "loss": 5.6521728515625, "step": 80 }, { "epoch": 0.02554640931024695, "grad_norm": 8.381796836853027, "learning_rate": 9.99747374396821e-05, "loss": 5.583978271484375, "step": 90 }, { "epoch": 0.02838489923360772, "grad_norm": 8.396158218383789, "learning_rate": 9.997189894975873e-05, "loss": 5.369769287109375, "step": 100 }, { "epoch": 0.031223389156968494, "grad_norm": 8.797348976135254, "learning_rate": 9.996906045983537e-05, "loss": 5.199462890625, "step": 110 }, { "epoch": 0.03406187908032927, "grad_norm": 9.543533325195312, "learning_rate": 9.996622196991201e-05, "loss": 5.12852783203125, "step": 120 }, { "epoch": 0.03690036900369004, "grad_norm": 9.48311996459961, "learning_rate": 9.996338347998865e-05, "loss": 4.912509155273438, "step": 130 }, { "epoch": 0.039738858927050806, "grad_norm": 10.031524658203125, "learning_rate": 9.996054499006528e-05, "loss": 4.811312866210938, "step": 140 }, { "epoch": 0.04257734885041158, "grad_norm": 9.952632904052734, "learning_rate": 9.995770650014194e-05, "loss": 4.6605072021484375, "step": 150 }, { "epoch": 0.04541583877377235, "grad_norm": 10.994112968444824, "learning_rate": 9.995486801021858e-05, "loss": 4.556596374511718, "step": 160 }, { "epoch": 0.04825432869713313, "grad_norm": 11.618239402770996, "learning_rate": 9.99520295202952e-05, "loss": 4.429454040527344, "step": 170 }, { "epoch": 0.0510928186204939, "grad_norm": 11.872230529785156, "learning_rate": 9.994919103037185e-05, "loss": 4.2914886474609375, "step": 180 }, { "epoch": 0.05393130854385467, "grad_norm": 11.485831260681152, "learning_rate": 9.994635254044849e-05, "loss": 4.250700378417969, "step": 190 }, { "epoch": 0.05676979846721544, "grad_norm": 11.607077598571777, "learning_rate": 9.994351405052512e-05, "loss": 4.154458618164062, "step": 200 }, { "epoch": 0.05960828839057621, "grad_norm": 11.94019603729248, "learning_rate": 9.994067556060176e-05, "loss": 4.068196105957031, "step": 210 }, { "epoch": 0.06244677831393699, "grad_norm": 12.569548606872559, "learning_rate": 9.993783707067841e-05, "loss": 3.9570571899414064, "step": 220 }, { "epoch": 0.06528526823729776, "grad_norm": 12.599874496459961, "learning_rate": 9.993499858075504e-05, "loss": 3.8858352661132813, "step": 230 }, { "epoch": 0.06812375816065853, "grad_norm": 13.218369483947754, "learning_rate": 9.993216009083168e-05, "loss": 3.9015045166015625, "step": 240 }, { "epoch": 0.0709622480840193, "grad_norm": 13.07967472076416, "learning_rate": 9.992932160090832e-05, "loss": 3.7893341064453123, "step": 250 }, { "epoch": 0.07380073800738007, "grad_norm": 12.624361038208008, "learning_rate": 9.992648311098496e-05, "loss": 3.748307800292969, "step": 260 }, { "epoch": 0.07663922793074085, "grad_norm": 14.062763214111328, "learning_rate": 9.992364462106159e-05, "loss": 3.692674255371094, "step": 270 }, { "epoch": 0.07947771785410161, "grad_norm": 11.903675079345703, "learning_rate": 9.992080613113823e-05, "loss": 3.712840270996094, "step": 280 }, { "epoch": 0.08231620777746239, "grad_norm": 12.459579467773438, "learning_rate": 9.991796764121489e-05, "loss": 3.612870788574219, "step": 290 }, { "epoch": 0.08515469770082316, "grad_norm": 12.907959938049316, "learning_rate": 9.991512915129152e-05, "loss": 3.6311363220214843, "step": 300 }, { "epoch": 0.08799318762418393, "grad_norm": 12.576180458068848, "learning_rate": 9.991229066136816e-05, "loss": 3.458906555175781, "step": 310 }, { "epoch": 0.0908316775475447, "grad_norm": 13.112584114074707, "learning_rate": 9.99094521714448e-05, "loss": 3.458609771728516, "step": 320 }, { "epoch": 0.09367016747090548, "grad_norm": 14.300445556640625, "learning_rate": 9.990661368152143e-05, "loss": 3.392350769042969, "step": 330 }, { "epoch": 0.09650865739426626, "grad_norm": 13.455063819885254, "learning_rate": 9.990377519159807e-05, "loss": 3.3832077026367187, "step": 340 }, { "epoch": 0.09934714731762702, "grad_norm": 14.730708122253418, "learning_rate": 9.990093670167472e-05, "loss": 3.305467224121094, "step": 350 }, { "epoch": 0.1021856372409878, "grad_norm": 13.685253143310547, "learning_rate": 9.989809821175135e-05, "loss": 3.2741424560546877, "step": 360 }, { "epoch": 0.10502412716434857, "grad_norm": 13.814913749694824, "learning_rate": 9.989525972182799e-05, "loss": 3.1779693603515624, "step": 370 }, { "epoch": 0.10786261708770933, "grad_norm": 12.822137832641602, "learning_rate": 9.989242123190463e-05, "loss": 3.1046115875244142, "step": 380 }, { "epoch": 0.11070110701107011, "grad_norm": 13.518203735351562, "learning_rate": 9.988958274198128e-05, "loss": 3.2064586639404298, "step": 390 }, { "epoch": 0.11353959693443089, "grad_norm": 15.115385055541992, "learning_rate": 9.98867442520579e-05, "loss": 3.0773475646972654, "step": 400 }, { "epoch": 0.11637808685779165, "grad_norm": 14.46224594116211, "learning_rate": 9.988390576213454e-05, "loss": 3.1713470458984374, "step": 410 }, { "epoch": 0.11921657678115243, "grad_norm": 15.138704299926758, "learning_rate": 9.988106727221119e-05, "loss": 3.039786529541016, "step": 420 }, { "epoch": 0.1220550667045132, "grad_norm": 16.640743255615234, "learning_rate": 9.987822878228783e-05, "loss": 3.089741516113281, "step": 430 }, { "epoch": 0.12489355662787398, "grad_norm": 14.69398021697998, "learning_rate": 9.987539029236447e-05, "loss": 3.0166248321533202, "step": 440 }, { "epoch": 0.12773204655123474, "grad_norm": 15.687174797058105, "learning_rate": 9.987255180244111e-05, "loss": 2.951476287841797, "step": 450 }, { "epoch": 0.13057053647459552, "grad_norm": 15.328666687011719, "learning_rate": 9.986971331251774e-05, "loss": 2.8555816650390624, "step": 460 }, { "epoch": 0.1334090263979563, "grad_norm": 14.644464492797852, "learning_rate": 9.986687482259438e-05, "loss": 2.81953125, "step": 470 }, { "epoch": 0.13624751632131707, "grad_norm": 14.5212984085083, "learning_rate": 9.986403633267103e-05, "loss": 2.8416458129882813, "step": 480 }, { "epoch": 0.13908600624467782, "grad_norm": 14.066869735717773, "learning_rate": 9.986119784274766e-05, "loss": 2.759745407104492, "step": 490 }, { "epoch": 0.1419244961680386, "grad_norm": 15.152949333190918, "learning_rate": 9.98583593528243e-05, "loss": 2.6981739044189452, "step": 500 }, { "epoch": 0.1419244961680386, "eval_accuracy": 0.22311947606027852, "eval_loss": 2.93100643157959, "eval_runtime": 41.9962, "eval_samples_per_second": 374.487, "eval_steps_per_second": 5.858, "step": 500 }, { "epoch": 0.14476298609139937, "grad_norm": 16.042133331298828, "learning_rate": 9.985552086290095e-05, "loss": 2.6802669525146485, "step": 510 }, { "epoch": 0.14760147601476015, "grad_norm": 14.821699142456055, "learning_rate": 9.985268237297757e-05, "loss": 2.648159408569336, "step": 520 }, { "epoch": 0.15043996593812092, "grad_norm": 13.466229438781738, "learning_rate": 9.984984388305421e-05, "loss": 2.6834999084472657, "step": 530 }, { "epoch": 0.1532784558614817, "grad_norm": 14.4769868850708, "learning_rate": 9.984700539313086e-05, "loss": 2.6614501953125, "step": 540 }, { "epoch": 0.15611694578484248, "grad_norm": 14.460672378540039, "learning_rate": 9.98441669032075e-05, "loss": 2.636008644104004, "step": 550 }, { "epoch": 0.15895543570820322, "grad_norm": 17.37153434753418, "learning_rate": 9.984132841328414e-05, "loss": 2.595663070678711, "step": 560 }, { "epoch": 0.161793925631564, "grad_norm": 18.542922973632812, "learning_rate": 9.983848992336078e-05, "loss": 2.5024566650390625, "step": 570 }, { "epoch": 0.16463241555492478, "grad_norm": 15.792057991027832, "learning_rate": 9.983565143343742e-05, "loss": 2.5503517150878907, "step": 580 }, { "epoch": 0.16747090547828555, "grad_norm": 19.011674880981445, "learning_rate": 9.983281294351405e-05, "loss": 2.5184261322021486, "step": 590 }, { "epoch": 0.17030939540164633, "grad_norm": 15.780776977539062, "learning_rate": 9.982997445359069e-05, "loss": 2.566799545288086, "step": 600 }, { "epoch": 0.1731478853250071, "grad_norm": 17.79799461364746, "learning_rate": 9.982713596366733e-05, "loss": 2.4272220611572264, "step": 610 }, { "epoch": 0.17598637524836785, "grad_norm": 15.603456497192383, "learning_rate": 9.982429747374397e-05, "loss": 2.4600830078125, "step": 620 }, { "epoch": 0.17882486517172863, "grad_norm": 16.011137008666992, "learning_rate": 9.982145898382061e-05, "loss": 2.400131607055664, "step": 630 }, { "epoch": 0.1816633550950894, "grad_norm": 15.220072746276855, "learning_rate": 9.981862049389726e-05, "loss": 2.3965965270996095, "step": 640 }, { "epoch": 0.18450184501845018, "grad_norm": 18.637725830078125, "learning_rate": 9.981578200397388e-05, "loss": 2.3779424667358398, "step": 650 }, { "epoch": 0.18734033494181096, "grad_norm": 17.142663955688477, "learning_rate": 9.981294351405053e-05, "loss": 2.3576290130615236, "step": 660 }, { "epoch": 0.19017882486517174, "grad_norm": 14.926474571228027, "learning_rate": 9.981010502412717e-05, "loss": 2.2630123138427733, "step": 670 }, { "epoch": 0.1930173147885325, "grad_norm": 13.853861808776855, "learning_rate": 9.980726653420381e-05, "loss": 2.1499168395996096, "step": 680 }, { "epoch": 0.19585580471189326, "grad_norm": 16.44382095336914, "learning_rate": 9.980442804428045e-05, "loss": 2.233046531677246, "step": 690 }, { "epoch": 0.19869429463525404, "grad_norm": 16.464563369750977, "learning_rate": 9.980158955435709e-05, "loss": 2.302341079711914, "step": 700 }, { "epoch": 0.2015327845586148, "grad_norm": 13.797673225402832, "learning_rate": 9.979875106443373e-05, "loss": 2.179940414428711, "step": 710 }, { "epoch": 0.2043712744819756, "grad_norm": 16.39546775817871, "learning_rate": 9.979591257451036e-05, "loss": 2.164119911193848, "step": 720 }, { "epoch": 0.20720976440533637, "grad_norm": 16.399290084838867, "learning_rate": 9.9793074084587e-05, "loss": 2.0616743087768556, "step": 730 }, { "epoch": 0.21004825432869714, "grad_norm": 16.050365447998047, "learning_rate": 9.979023559466364e-05, "loss": 2.1156747817993162, "step": 740 }, { "epoch": 0.21288674425205792, "grad_norm": 16.382707595825195, "learning_rate": 9.978768095373262e-05, "loss": 2.0809173583984375, "step": 750 }, { "epoch": 0.21572523417541867, "grad_norm": 21.468551635742188, "learning_rate": 9.978484246380926e-05, "loss": 2.152540397644043, "step": 760 }, { "epoch": 0.21856372409877944, "grad_norm": 17.516782760620117, "learning_rate": 9.978200397388589e-05, "loss": 2.1594987869262696, "step": 770 }, { "epoch": 0.22140221402214022, "grad_norm": 15.761514663696289, "learning_rate": 9.977916548396253e-05, "loss": 2.080038642883301, "step": 780 }, { "epoch": 0.224240703945501, "grad_norm": 15.23882007598877, "learning_rate": 9.977632699403917e-05, "loss": 2.020671844482422, "step": 790 }, { "epoch": 0.22707919386886177, "grad_norm": 16.29534149169922, "learning_rate": 9.977348850411582e-05, "loss": 2.0401065826416014, "step": 800 }, { "epoch": 0.22991768379222255, "grad_norm": 19.03228187561035, "learning_rate": 9.977065001419246e-05, "loss": 2.073627471923828, "step": 810 }, { "epoch": 0.2327561737155833, "grad_norm": 20.366952896118164, "learning_rate": 9.97678115242691e-05, "loss": 2.095037269592285, "step": 820 }, { "epoch": 0.23559466363894407, "grad_norm": 14.894079208374023, "learning_rate": 9.976497303434573e-05, "loss": 2.0705188751220702, "step": 830 }, { "epoch": 0.23843315356230485, "grad_norm": 16.35198211669922, "learning_rate": 9.976213454442237e-05, "loss": 1.9764781951904298, "step": 840 }, { "epoch": 0.24127164348566563, "grad_norm": 17.30745506286621, "learning_rate": 9.975929605449901e-05, "loss": 1.9279197692871093, "step": 850 }, { "epoch": 0.2441101334090264, "grad_norm": 20.43689727783203, "learning_rate": 9.975645756457565e-05, "loss": 1.9041332244873046, "step": 860 }, { "epoch": 0.24694862333238718, "grad_norm": 20.463424682617188, "learning_rate": 9.975361907465229e-05, "loss": 1.9567670822143555, "step": 870 }, { "epoch": 0.24978711325574796, "grad_norm": 17.491180419921875, "learning_rate": 9.975078058472893e-05, "loss": 1.9539928436279297, "step": 880 }, { "epoch": 0.2526256031791087, "grad_norm": 17.74182891845703, "learning_rate": 9.974794209480557e-05, "loss": 1.8524280548095704, "step": 890 }, { "epoch": 0.2554640931024695, "grad_norm": 18.952123641967773, "learning_rate": 9.97451036048822e-05, "loss": 1.9144311904907227, "step": 900 }, { "epoch": 0.25830258302583026, "grad_norm": 16.72834014892578, "learning_rate": 9.974226511495884e-05, "loss": 1.9486034393310547, "step": 910 }, { "epoch": 0.26114107294919103, "grad_norm": 20.225597381591797, "learning_rate": 9.973942662503549e-05, "loss": 1.8651172637939453, "step": 920 }, { "epoch": 0.2639795628725518, "grad_norm": 20.666879653930664, "learning_rate": 9.973658813511213e-05, "loss": 1.8124202728271483, "step": 930 }, { "epoch": 0.2668180527959126, "grad_norm": 17.1395263671875, "learning_rate": 9.973374964518877e-05, "loss": 1.8280616760253907, "step": 940 }, { "epoch": 0.26965654271927336, "grad_norm": 15.146110534667969, "learning_rate": 9.973091115526541e-05, "loss": 1.8311988830566406, "step": 950 }, { "epoch": 0.27249503264263414, "grad_norm": 13.71627426147461, "learning_rate": 9.972807266534204e-05, "loss": 1.7315332412719726, "step": 960 }, { "epoch": 0.2753335225659949, "grad_norm": 18.377683639526367, "learning_rate": 9.972523417541868e-05, "loss": 1.8621788024902344, "step": 970 }, { "epoch": 0.27817201248935564, "grad_norm": 15.082335472106934, "learning_rate": 9.972239568549532e-05, "loss": 1.802347183227539, "step": 980 }, { "epoch": 0.2810105024127164, "grad_norm": 23.33822250366211, "learning_rate": 9.971955719557196e-05, "loss": 1.7548988342285157, "step": 990 }, { "epoch": 0.2838489923360772, "grad_norm": 18.412450790405273, "learning_rate": 9.97167187056486e-05, "loss": 1.7109697341918946, "step": 1000 }, { "epoch": 0.2838489923360772, "eval_accuracy": 0.4163540408215171, "eval_loss": 2.034782886505127, "eval_runtime": 39.8482, "eval_samples_per_second": 394.672, "eval_steps_per_second": 6.173, "step": 1000 }, { "epoch": 0.28668748225943796, "grad_norm": 19.74506950378418, "learning_rate": 9.971388021572524e-05, "loss": 1.7505809783935546, "step": 1010 }, { "epoch": 0.28952597218279874, "grad_norm": 17.884584426879883, "learning_rate": 9.971104172580189e-05, "loss": 1.7634092330932618, "step": 1020 }, { "epoch": 0.2923644621061595, "grad_norm": 18.55858612060547, "learning_rate": 9.970820323587851e-05, "loss": 1.6369083404541016, "step": 1030 }, { "epoch": 0.2952029520295203, "grad_norm": 18.163463592529297, "learning_rate": 9.970536474595515e-05, "loss": 1.7573257446289063, "step": 1040 }, { "epoch": 0.29804144195288107, "grad_norm": 18.886241912841797, "learning_rate": 9.97025262560318e-05, "loss": 1.6954448699951172, "step": 1050 }, { "epoch": 0.30087993187624185, "grad_norm": 15.49860954284668, "learning_rate": 9.969968776610844e-05, "loss": 1.67274169921875, "step": 1060 }, { "epoch": 0.3037184217996026, "grad_norm": 20.884662628173828, "learning_rate": 9.969684927618508e-05, "loss": 1.6121959686279297, "step": 1070 }, { "epoch": 0.3065569117229634, "grad_norm": 26.780324935913086, "learning_rate": 9.969401078626172e-05, "loss": 1.7890850067138673, "step": 1080 }, { "epoch": 0.3093954016463242, "grad_norm": 17.718534469604492, "learning_rate": 9.969117229633835e-05, "loss": 1.6677974700927733, "step": 1090 }, { "epoch": 0.31223389156968495, "grad_norm": 20.076583862304688, "learning_rate": 9.968833380641499e-05, "loss": 1.599573516845703, "step": 1100 }, { "epoch": 0.3150723814930457, "grad_norm": 18.984588623046875, "learning_rate": 9.968549531649163e-05, "loss": 1.6217384338378906, "step": 1110 }, { "epoch": 0.31791087141640645, "grad_norm": 18.82953643798828, "learning_rate": 9.968265682656827e-05, "loss": 1.5965264320373536, "step": 1120 }, { "epoch": 0.3207493613397672, "grad_norm": 14.870361328125, "learning_rate": 9.967981833664491e-05, "loss": 1.6974849700927734, "step": 1130 }, { "epoch": 0.323587851263128, "grad_norm": 15.048882484436035, "learning_rate": 9.967697984672155e-05, "loss": 1.5354299545288086, "step": 1140 }, { "epoch": 0.3264263411864888, "grad_norm": 16.185405731201172, "learning_rate": 9.96741413567982e-05, "loss": 1.5419054985046388, "step": 1150 }, { "epoch": 0.32926483110984955, "grad_norm": 15.70096492767334, "learning_rate": 9.967130286687482e-05, "loss": 1.5201569557189942, "step": 1160 }, { "epoch": 0.33210332103321033, "grad_norm": 15.435335159301758, "learning_rate": 9.966846437695147e-05, "loss": 1.584012508392334, "step": 1170 }, { "epoch": 0.3349418109565711, "grad_norm": 18.325897216796875, "learning_rate": 9.96656258870281e-05, "loss": 1.492680835723877, "step": 1180 }, { "epoch": 0.3377803008799319, "grad_norm": 17.37185287475586, "learning_rate": 9.966278739710473e-05, "loss": 1.5090473175048829, "step": 1190 }, { "epoch": 0.34061879080329266, "grad_norm": 22.372676849365234, "learning_rate": 9.965994890718139e-05, "loss": 1.5193059921264649, "step": 1200 }, { "epoch": 0.34345728072665344, "grad_norm": 19.935239791870117, "learning_rate": 9.965711041725803e-05, "loss": 1.543346118927002, "step": 1210 }, { "epoch": 0.3462957706500142, "grad_norm": 18.95125961303711, "learning_rate": 9.965427192733466e-05, "loss": 1.4330853462219237, "step": 1220 }, { "epoch": 0.349134260573375, "grad_norm": 15.722933769226074, "learning_rate": 9.96514334374113e-05, "loss": 1.5415838241577149, "step": 1230 }, { "epoch": 0.3519727504967357, "grad_norm": 20.790388107299805, "learning_rate": 9.964859494748794e-05, "loss": 1.5477384567260741, "step": 1240 }, { "epoch": 0.3548112404200965, "grad_norm": 19.917011260986328, "learning_rate": 9.964575645756458e-05, "loss": 1.472802734375, "step": 1250 }, { "epoch": 0.35764973034345726, "grad_norm": 20.369104385375977, "learning_rate": 9.964291796764122e-05, "loss": 1.5026443481445313, "step": 1260 }, { "epoch": 0.36048822026681804, "grad_norm": 17.954387664794922, "learning_rate": 9.964007947771787e-05, "loss": 1.4576308250427246, "step": 1270 }, { "epoch": 0.3633267101901788, "grad_norm": 19.4952449798584, "learning_rate": 9.963724098779451e-05, "loss": 1.4504443168640138, "step": 1280 }, { "epoch": 0.3661652001135396, "grad_norm": 20.30315589904785, "learning_rate": 9.963440249787113e-05, "loss": 1.4128257751464843, "step": 1290 }, { "epoch": 0.36900369003690037, "grad_norm": 16.15687370300293, "learning_rate": 9.963156400794778e-05, "loss": 1.4064359664916992, "step": 1300 }, { "epoch": 0.37184217996026114, "grad_norm": 20.24163818359375, "learning_rate": 9.962872551802442e-05, "loss": 1.4517648696899415, "step": 1310 }, { "epoch": 0.3746806698836219, "grad_norm": 18.123584747314453, "learning_rate": 9.962588702810105e-05, "loss": 1.3267455101013184, "step": 1320 }, { "epoch": 0.3775191598069827, "grad_norm": 18.530723571777344, "learning_rate": 9.96230485381777e-05, "loss": 1.413957691192627, "step": 1330 }, { "epoch": 0.3803576497303435, "grad_norm": 15.883684158325195, "learning_rate": 9.962021004825434e-05, "loss": 1.3711063385009765, "step": 1340 }, { "epoch": 0.38319613965370425, "grad_norm": 19.54316520690918, "learning_rate": 9.961737155833097e-05, "loss": 1.3910303115844727, "step": 1350 }, { "epoch": 0.386034629577065, "grad_norm": 18.246450424194336, "learning_rate": 9.961453306840761e-05, "loss": 1.4133499145507813, "step": 1360 }, { "epoch": 0.3888731195004258, "grad_norm": 19.862640380859375, "learning_rate": 9.961169457848425e-05, "loss": 1.3705204010009766, "step": 1370 }, { "epoch": 0.3917116094237865, "grad_norm": 19.405576705932617, "learning_rate": 9.96088560885609e-05, "loss": 1.4035526275634767, "step": 1380 }, { "epoch": 0.3945500993471473, "grad_norm": 16.486438751220703, "learning_rate": 9.960601759863752e-05, "loss": 1.338155174255371, "step": 1390 }, { "epoch": 0.3973885892705081, "grad_norm": 22.547771453857422, "learning_rate": 9.960317910871418e-05, "loss": 1.347532081604004, "step": 1400 }, { "epoch": 0.40022707919386885, "grad_norm": 19.592201232910156, "learning_rate": 9.96003406187908e-05, "loss": 1.3358808517456056, "step": 1410 }, { "epoch": 0.4030655691172296, "grad_norm": 20.477746963500977, "learning_rate": 9.959750212886745e-05, "loss": 1.3819169998168945, "step": 1420 }, { "epoch": 0.4059040590405904, "grad_norm": 19.539627075195312, "learning_rate": 9.959466363894409e-05, "loss": 1.2259580612182617, "step": 1430 }, { "epoch": 0.4087425489639512, "grad_norm": 19.376806259155273, "learning_rate": 9.959182514902073e-05, "loss": 1.3533945083618164, "step": 1440 }, { "epoch": 0.41158103888731196, "grad_norm": 18.436086654663086, "learning_rate": 9.958898665909736e-05, "loss": 1.318460464477539, "step": 1450 }, { "epoch": 0.41441952881067273, "grad_norm": 16.59644317626953, "learning_rate": 9.958614816917401e-05, "loss": 1.3290637016296387, "step": 1460 }, { "epoch": 0.4172580187340335, "grad_norm": 17.732330322265625, "learning_rate": 9.958330967925065e-05, "loss": 1.292267417907715, "step": 1470 }, { "epoch": 0.4200965086573943, "grad_norm": 19.204368591308594, "learning_rate": 9.958047118932728e-05, "loss": 1.3076340675354003, "step": 1480 }, { "epoch": 0.42293499858075506, "grad_norm": 16.512409210205078, "learning_rate": 9.957763269940392e-05, "loss": 1.259412670135498, "step": 1490 }, { "epoch": 0.42577348850411584, "grad_norm": 18.08131980895996, "learning_rate": 9.957479420948056e-05, "loss": 1.3013140678405761, "step": 1500 }, { "epoch": 0.42577348850411584, "eval_accuracy": 0.5402810453360463, "eval_loss": 1.5563039779663086, "eval_runtime": 40.2443, "eval_samples_per_second": 390.788, "eval_steps_per_second": 6.113, "step": 1500 }, { "epoch": 0.42861197842747656, "grad_norm": 26.6735897064209, "learning_rate": 9.957195571955719e-05, "loss": 1.3228124618530273, "step": 1510 }, { "epoch": 0.43145046835083734, "grad_norm": 18.70392417907715, "learning_rate": 9.956911722963383e-05, "loss": 1.2279690742492675, "step": 1520 }, { "epoch": 0.4342889582741981, "grad_norm": 15.526817321777344, "learning_rate": 9.956627873971049e-05, "loss": 1.2592705726623534, "step": 1530 }, { "epoch": 0.4371274481975589, "grad_norm": 17.968738555908203, "learning_rate": 9.956344024978711e-05, "loss": 1.2204198837280273, "step": 1540 }, { "epoch": 0.43996593812091966, "grad_norm": 17.75804901123047, "learning_rate": 9.956060175986376e-05, "loss": 1.2555466651916505, "step": 1550 }, { "epoch": 0.44280442804428044, "grad_norm": 26.97852325439453, "learning_rate": 9.95577632699404e-05, "loss": 1.257744026184082, "step": 1560 }, { "epoch": 0.4456429179676412, "grad_norm": 21.982749938964844, "learning_rate": 9.955492478001704e-05, "loss": 1.2631224632263183, "step": 1570 }, { "epoch": 0.448481407891002, "grad_norm": 17.545684814453125, "learning_rate": 9.955208629009367e-05, "loss": 1.1751346588134766, "step": 1580 }, { "epoch": 0.45131989781436277, "grad_norm": 20.652318954467773, "learning_rate": 9.954924780017031e-05, "loss": 1.2249062538146973, "step": 1590 }, { "epoch": 0.45415838773772355, "grad_norm": 22.165056228637695, "learning_rate": 9.954640931024696e-05, "loss": 1.1971467971801757, "step": 1600 }, { "epoch": 0.4569968776610843, "grad_norm": 17.115999221801758, "learning_rate": 9.954357082032359e-05, "loss": 1.1528328895568847, "step": 1610 }, { "epoch": 0.4598353675844451, "grad_norm": 23.53191566467285, "learning_rate": 9.954073233040023e-05, "loss": 1.1881637573242188, "step": 1620 }, { "epoch": 0.4626738575078059, "grad_norm": 16.994874954223633, "learning_rate": 9.953789384047687e-05, "loss": 1.1595786094665528, "step": 1630 }, { "epoch": 0.4655123474311666, "grad_norm": 17.554264068603516, "learning_rate": 9.95350553505535e-05, "loss": 1.1674737930297852, "step": 1640 }, { "epoch": 0.46835083735452737, "grad_norm": 19.162006378173828, "learning_rate": 9.953221686063014e-05, "loss": 1.194182777404785, "step": 1650 }, { "epoch": 0.47118932727788815, "grad_norm": 17.91055679321289, "learning_rate": 9.95293783707068e-05, "loss": 1.1569328308105469, "step": 1660 }, { "epoch": 0.4740278172012489, "grad_norm": 16.461978912353516, "learning_rate": 9.952653988078343e-05, "loss": 1.1378480911254882, "step": 1670 }, { "epoch": 0.4768663071246097, "grad_norm": 16.126646041870117, "learning_rate": 9.952370139086007e-05, "loss": 1.1848915100097657, "step": 1680 }, { "epoch": 0.4797047970479705, "grad_norm": 17.95230484008789, "learning_rate": 9.952086290093671e-05, "loss": 1.146336841583252, "step": 1690 }, { "epoch": 0.48254328697133125, "grad_norm": 19.885774612426758, "learning_rate": 9.951802441101335e-05, "loss": 1.1500332832336426, "step": 1700 }, { "epoch": 0.48538177689469203, "grad_norm": 21.219768524169922, "learning_rate": 9.951518592108998e-05, "loss": 1.1712417602539062, "step": 1710 }, { "epoch": 0.4882202668180528, "grad_norm": 20.819007873535156, "learning_rate": 9.951234743116662e-05, "loss": 1.1643861770629882, "step": 1720 }, { "epoch": 0.4910587567414136, "grad_norm": 19.05169677734375, "learning_rate": 9.950950894124327e-05, "loss": 1.1117895126342774, "step": 1730 }, { "epoch": 0.49389724666477436, "grad_norm": 23.230318069458008, "learning_rate": 9.95066704513199e-05, "loss": 1.1456717491149901, "step": 1740 }, { "epoch": 0.49673573658813513, "grad_norm": 24.508459091186523, "learning_rate": 9.950383196139654e-05, "loss": 1.1668670654296875, "step": 1750 }, { "epoch": 0.4995742265114959, "grad_norm": 17.86039161682129, "learning_rate": 9.950099347147318e-05, "loss": 1.1437216758728028, "step": 1760 }, { "epoch": 0.5024127164348566, "grad_norm": 24.65870475769043, "learning_rate": 9.949815498154981e-05, "loss": 1.0606961250305176, "step": 1770 }, { "epoch": 0.5052512063582174, "grad_norm": 17.028989791870117, "learning_rate": 9.949531649162645e-05, "loss": 1.0809514999389649, "step": 1780 }, { "epoch": 0.5080896962815782, "grad_norm": 16.685449600219727, "learning_rate": 9.94924780017031e-05, "loss": 1.1341323852539062, "step": 1790 }, { "epoch": 0.510928186204939, "grad_norm": 18.500078201293945, "learning_rate": 9.948963951177974e-05, "loss": 1.1468262672424316, "step": 1800 }, { "epoch": 0.5137666761282997, "grad_norm": 25.65341567993164, "learning_rate": 9.948680102185638e-05, "loss": 1.0892413139343262, "step": 1810 }, { "epoch": 0.5166051660516605, "grad_norm": 17.926219940185547, "learning_rate": 9.948396253193302e-05, "loss": 1.1217551231384277, "step": 1820 }, { "epoch": 0.5194436559750213, "grad_norm": 17.785680770874023, "learning_rate": 9.948112404200966e-05, "loss": 1.072051429748535, "step": 1830 }, { "epoch": 0.5222821458983821, "grad_norm": 17.262788772583008, "learning_rate": 9.947828555208629e-05, "loss": 1.0294719696044923, "step": 1840 }, { "epoch": 0.5251206358217428, "grad_norm": 22.165456771850586, "learning_rate": 9.947544706216293e-05, "loss": 1.0827247619628906, "step": 1850 }, { "epoch": 0.5279591257451036, "grad_norm": 20.852689743041992, "learning_rate": 9.947260857223958e-05, "loss": 1.0920230865478515, "step": 1860 }, { "epoch": 0.5307976156684644, "grad_norm": 19.363285064697266, "learning_rate": 9.946977008231621e-05, "loss": 1.0516845703125, "step": 1870 }, { "epoch": 0.5336361055918252, "grad_norm": 18.80592918395996, "learning_rate": 9.946693159239285e-05, "loss": 1.044110107421875, "step": 1880 }, { "epoch": 0.536474595515186, "grad_norm": 15.790478706359863, "learning_rate": 9.94640931024695e-05, "loss": 1.0158000946044923, "step": 1890 }, { "epoch": 0.5393130854385467, "grad_norm": 17.42597770690918, "learning_rate": 9.946125461254612e-05, "loss": 1.0441325187683106, "step": 1900 }, { "epoch": 0.5421515753619075, "grad_norm": 22.460384368896484, "learning_rate": 9.945841612262276e-05, "loss": 0.9719372749328613, "step": 1910 }, { "epoch": 0.5449900652852683, "grad_norm": 16.911693572998047, "learning_rate": 9.94555776326994e-05, "loss": 0.9860975265502929, "step": 1920 }, { "epoch": 0.547828555208629, "grad_norm": 20.080764770507812, "learning_rate": 9.945273914277605e-05, "loss": 1.032511043548584, "step": 1930 }, { "epoch": 0.5506670451319898, "grad_norm": 14.045267105102539, "learning_rate": 9.944990065285269e-05, "loss": 0.9706751823425293, "step": 1940 }, { "epoch": 0.5535055350553506, "grad_norm": 17.30767059326172, "learning_rate": 9.944706216292933e-05, "loss": 1.0029872894287108, "step": 1950 }, { "epoch": 0.5563440249787113, "grad_norm": 16.45441246032715, "learning_rate": 9.944422367300597e-05, "loss": 0.9810984611511231, "step": 1960 }, { "epoch": 0.559182514902072, "grad_norm": 18.228923797607422, "learning_rate": 9.94413851830826e-05, "loss": 1.019540023803711, "step": 1970 }, { "epoch": 0.5620210048254328, "grad_norm": 22.129457473754883, "learning_rate": 9.943854669315924e-05, "loss": 0.9464988708496094, "step": 1980 }, { "epoch": 0.5648594947487936, "grad_norm": 20.144012451171875, "learning_rate": 9.943570820323588e-05, "loss": 0.9839242935180664, "step": 1990 }, { "epoch": 0.5676979846721544, "grad_norm": 23.5290470123291, "learning_rate": 9.943286971331252e-05, "loss": 1.0318458557128907, "step": 2000 }, { "epoch": 0.5676979846721544, "eval_accuracy": 0.6265021936796592, "eval_loss": 1.263211965560913, "eval_runtime": 44.3147, "eval_samples_per_second": 354.893, "eval_steps_per_second": 5.551, "step": 2000 }, { "epoch": 0.5705364745955152, "grad_norm": 18.6157169342041, "learning_rate": 9.943003122338916e-05, "loss": 0.9861863136291504, "step": 2010 }, { "epoch": 0.5733749645188759, "grad_norm": 21.073627471923828, "learning_rate": 9.94271927334658e-05, "loss": 0.9943069458007813, "step": 2020 }, { "epoch": 0.5762134544422367, "grad_norm": 19.622215270996094, "learning_rate": 9.942435424354243e-05, "loss": 0.9539078712463379, "step": 2030 }, { "epoch": 0.5790519443655975, "grad_norm": 21.462800979614258, "learning_rate": 9.942151575361908e-05, "loss": 0.9168841361999511, "step": 2040 }, { "epoch": 0.5818904342889583, "grad_norm": 24.584243774414062, "learning_rate": 9.941867726369572e-05, "loss": 0.8968141555786133, "step": 2050 }, { "epoch": 0.584728924212319, "grad_norm": 18.689815521240234, "learning_rate": 9.941583877377236e-05, "loss": 0.9827134132385253, "step": 2060 }, { "epoch": 0.5875674141356798, "grad_norm": 17.244977951049805, "learning_rate": 9.9413000283849e-05, "loss": 0.920962142944336, "step": 2070 }, { "epoch": 0.5904059040590406, "grad_norm": 14.31159782409668, "learning_rate": 9.941016179392564e-05, "loss": 0.934681510925293, "step": 2080 }, { "epoch": 0.5932443939824014, "grad_norm": 14.217025756835938, "learning_rate": 9.940732330400228e-05, "loss": 0.9197972297668457, "step": 2090 }, { "epoch": 0.5960828839057621, "grad_norm": 20.46293067932129, "learning_rate": 9.940448481407891e-05, "loss": 0.920412826538086, "step": 2100 }, { "epoch": 0.5989213738291229, "grad_norm": 19.042085647583008, "learning_rate": 9.940164632415555e-05, "loss": 0.9223427772521973, "step": 2110 }, { "epoch": 0.6017598637524837, "grad_norm": 19.6672420501709, "learning_rate": 9.939909168322453e-05, "loss": 0.9822881698608399, "step": 2120 }, { "epoch": 0.6045983536758445, "grad_norm": 15.583842277526855, "learning_rate": 9.939625319330117e-05, "loss": 0.8742053031921386, "step": 2130 }, { "epoch": 0.6074368435992052, "grad_norm": 20.443885803222656, "learning_rate": 9.939341470337781e-05, "loss": 0.9759296417236328, "step": 2140 }, { "epoch": 0.610275333522566, "grad_norm": 17.96788215637207, "learning_rate": 9.939057621345444e-05, "loss": 0.9222827911376953, "step": 2150 }, { "epoch": 0.6131138234459268, "grad_norm": 23.307939529418945, "learning_rate": 9.938773772353108e-05, "loss": 0.9233883857727051, "step": 2160 }, { "epoch": 0.6159523133692876, "grad_norm": 22.879898071289062, "learning_rate": 9.938489923360772e-05, "loss": 0.9214688301086426, "step": 2170 }, { "epoch": 0.6187908032926484, "grad_norm": 19.261865615844727, "learning_rate": 9.938206074368437e-05, "loss": 0.8991484642028809, "step": 2180 }, { "epoch": 0.6216292932160091, "grad_norm": 25.39220428466797, "learning_rate": 9.937922225376101e-05, "loss": 0.9555189132690429, "step": 2190 }, { "epoch": 0.6244677831393699, "grad_norm": 23.816425323486328, "learning_rate": 9.937638376383765e-05, "loss": 0.8966886520385742, "step": 2200 }, { "epoch": 0.6273062730627307, "grad_norm": 20.043550491333008, "learning_rate": 9.937354527391428e-05, "loss": 0.8980030059814453, "step": 2210 }, { "epoch": 0.6301447629860913, "grad_norm": 20.483091354370117, "learning_rate": 9.937070678399092e-05, "loss": 0.9139481544494629, "step": 2220 }, { "epoch": 0.6329832529094521, "grad_norm": 28.058103561401367, "learning_rate": 9.936786829406756e-05, "loss": 0.8352715492248535, "step": 2230 }, { "epoch": 0.6358217428328129, "grad_norm": 19.546688079833984, "learning_rate": 9.93650298041442e-05, "loss": 0.8772073745727539, "step": 2240 }, { "epoch": 0.6386602327561737, "grad_norm": 15.413393020629883, "learning_rate": 9.936219131422084e-05, "loss": 0.8987820625305176, "step": 2250 }, { "epoch": 0.6414987226795344, "grad_norm": 20.86405372619629, "learning_rate": 9.935935282429748e-05, "loss": 0.9168025016784668, "step": 2260 }, { "epoch": 0.6443372126028952, "grad_norm": 16.999313354492188, "learning_rate": 9.935651433437411e-05, "loss": 0.847165870666504, "step": 2270 }, { "epoch": 0.647175702526256, "grad_norm": 20.19768714904785, "learning_rate": 9.935367584445075e-05, "loss": 0.8734968185424805, "step": 2280 }, { "epoch": 0.6500141924496168, "grad_norm": 20.372451782226562, "learning_rate": 9.93508373545274e-05, "loss": 0.8983511924743652, "step": 2290 }, { "epoch": 0.6528526823729776, "grad_norm": 24.846521377563477, "learning_rate": 9.934799886460404e-05, "loss": 0.8757077217102051, "step": 2300 }, { "epoch": 0.6556911722963383, "grad_norm": 24.48916244506836, "learning_rate": 9.934516037468068e-05, "loss": 0.884884262084961, "step": 2310 }, { "epoch": 0.6585296622196991, "grad_norm": 19.697154998779297, "learning_rate": 9.934232188475732e-05, "loss": 0.8407068252563477, "step": 2320 }, { "epoch": 0.6613681521430599, "grad_norm": 22.859161376953125, "learning_rate": 9.933948339483396e-05, "loss": 0.866301441192627, "step": 2330 }, { "epoch": 0.6642066420664207, "grad_norm": 21.11536979675293, "learning_rate": 9.933664490491059e-05, "loss": 0.8537308692932128, "step": 2340 }, { "epoch": 0.6670451319897814, "grad_norm": 18.359970092773438, "learning_rate": 9.933380641498723e-05, "loss": 0.9032468795776367, "step": 2350 }, { "epoch": 0.6698836219131422, "grad_norm": 18.005191802978516, "learning_rate": 9.933096792506387e-05, "loss": 0.8364997863769531, "step": 2360 }, { "epoch": 0.672722111836503, "grad_norm": 19.723268508911133, "learning_rate": 9.93281294351405e-05, "loss": 0.8026572227478027, "step": 2370 }, { "epoch": 0.6755606017598638, "grad_norm": 20.37019157409668, "learning_rate": 9.932529094521715e-05, "loss": 0.8097442626953125, "step": 2380 }, { "epoch": 0.6783990916832245, "grad_norm": 26.74312973022461, "learning_rate": 9.93224524552938e-05, "loss": 0.7760079383850098, "step": 2390 }, { "epoch": 0.6812375816065853, "grad_norm": 20.00583839416504, "learning_rate": 9.931961396537042e-05, "loss": 0.8548799514770508, "step": 2400 }, { "epoch": 0.6840760715299461, "grad_norm": 19.305622100830078, "learning_rate": 9.931677547544706e-05, "loss": 0.9243306159973145, "step": 2410 }, { "epoch": 0.6869145614533069, "grad_norm": 21.600069046020508, "learning_rate": 9.93139369855237e-05, "loss": 0.814850902557373, "step": 2420 }, { "epoch": 0.6897530513766676, "grad_norm": 14.252060890197754, "learning_rate": 9.931109849560035e-05, "loss": 0.8301360130310058, "step": 2430 }, { "epoch": 0.6925915413000284, "grad_norm": 15.896858215332031, "learning_rate": 9.930826000567699e-05, "loss": 0.8521718978881836, "step": 2440 }, { "epoch": 0.6954300312233892, "grad_norm": 17.77695083618164, "learning_rate": 9.930542151575363e-05, "loss": 0.7978891372680664, "step": 2450 }, { "epoch": 0.69826852114675, "grad_norm": 21.16244888305664, "learning_rate": 9.930258302583027e-05, "loss": 0.8159560203552246, "step": 2460 }, { "epoch": 0.7011070110701108, "grad_norm": 16.893630981445312, "learning_rate": 9.92997445359069e-05, "loss": 0.8099790573120117, "step": 2470 }, { "epoch": 0.7039455009934714, "grad_norm": 15.397760391235352, "learning_rate": 9.929690604598354e-05, "loss": 0.8053768157958985, "step": 2480 }, { "epoch": 0.7067839909168322, "grad_norm": 14.958460807800293, "learning_rate": 9.929406755606018e-05, "loss": 0.7830320358276367, "step": 2490 }, { "epoch": 0.709622480840193, "grad_norm": 31.445096969604492, "learning_rate": 9.929122906613681e-05, "loss": 0.7881523132324219, "step": 2500 }, { "epoch": 0.709622480840193, "eval_accuracy": 0.6640808800152603, "eval_loss": 1.1102180480957031, "eval_runtime": 31.2137, "eval_samples_per_second": 503.85, "eval_steps_per_second": 7.881, "step": 2500 }, { "epoch": 0.7124609707635537, "grad_norm": 19.977161407470703, "learning_rate": 9.928839057621346e-05, "loss": 0.8247689247131348, "step": 2510 }, { "epoch": 0.7152994606869145, "grad_norm": 21.054527282714844, "learning_rate": 9.92855520862901e-05, "loss": 0.8349895477294922, "step": 2520 }, { "epoch": 0.7181379506102753, "grad_norm": 20.075908660888672, "learning_rate": 9.928271359636673e-05, "loss": 0.874781608581543, "step": 2530 }, { "epoch": 0.7209764405336361, "grad_norm": 23.04558753967285, "learning_rate": 9.927987510644337e-05, "loss": 0.8183870315551758, "step": 2540 }, { "epoch": 0.7238149304569969, "grad_norm": 18.28003692626953, "learning_rate": 9.927703661652002e-05, "loss": 0.7649356365203858, "step": 2550 }, { "epoch": 0.7266534203803576, "grad_norm": 19.41520118713379, "learning_rate": 9.927419812659666e-05, "loss": 0.8036155700683594, "step": 2560 }, { "epoch": 0.7294919103037184, "grad_norm": 15.614694595336914, "learning_rate": 9.92713596366733e-05, "loss": 0.7804097175598145, "step": 2570 }, { "epoch": 0.7323304002270792, "grad_norm": 18.83174705505371, "learning_rate": 9.926852114674994e-05, "loss": 0.7655549049377441, "step": 2580 }, { "epoch": 0.73516889015044, "grad_norm": 14.78815746307373, "learning_rate": 9.926568265682658e-05, "loss": 0.7681702136993408, "step": 2590 }, { "epoch": 0.7380073800738007, "grad_norm": 18.639469146728516, "learning_rate": 9.926284416690321e-05, "loss": 0.7835202217102051, "step": 2600 }, { "epoch": 0.7408458699971615, "grad_norm": 16.485692977905273, "learning_rate": 9.926000567697985e-05, "loss": 0.7880281448364258, "step": 2610 }, { "epoch": 0.7436843599205223, "grad_norm": 24.092708587646484, "learning_rate": 9.925716718705649e-05, "loss": 0.8002113342285156, "step": 2620 }, { "epoch": 0.7465228498438831, "grad_norm": 26.200944900512695, "learning_rate": 9.925432869713312e-05, "loss": 0.7842296600341797, "step": 2630 }, { "epoch": 0.7493613397672438, "grad_norm": 17.914724349975586, "learning_rate": 9.925149020720977e-05, "loss": 0.704561424255371, "step": 2640 }, { "epoch": 0.7521998296906046, "grad_norm": 16.393476486206055, "learning_rate": 9.924865171728642e-05, "loss": 0.7988709449768067, "step": 2650 }, { "epoch": 0.7550383196139654, "grad_norm": 21.757715225219727, "learning_rate": 9.924581322736304e-05, "loss": 0.731673288345337, "step": 2660 }, { "epoch": 0.7578768095373262, "grad_norm": 19.89030647277832, "learning_rate": 9.924297473743968e-05, "loss": 0.8036115646362305, "step": 2670 }, { "epoch": 0.760715299460687, "grad_norm": 20.420228958129883, "learning_rate": 9.924013624751633e-05, "loss": 0.781536054611206, "step": 2680 }, { "epoch": 0.7635537893840477, "grad_norm": 21.24135398864746, "learning_rate": 9.923729775759297e-05, "loss": 0.7116634845733643, "step": 2690 }, { "epoch": 0.7663922793074085, "grad_norm": 24.834274291992188, "learning_rate": 9.92344592676696e-05, "loss": 0.7020663261413574, "step": 2700 }, { "epoch": 0.7692307692307693, "grad_norm": 30.94087028503418, "learning_rate": 9.923162077774625e-05, "loss": 0.7792910575866699, "step": 2710 }, { "epoch": 0.77206925915413, "grad_norm": 16.64111328125, "learning_rate": 9.922878228782289e-05, "loss": 0.7417089939117432, "step": 2720 }, { "epoch": 0.7749077490774908, "grad_norm": 14.349987030029297, "learning_rate": 9.922594379789952e-05, "loss": 0.7197896003723144, "step": 2730 }, { "epoch": 0.7777462390008516, "grad_norm": 19.708078384399414, "learning_rate": 9.922310530797616e-05, "loss": 0.7126902103424072, "step": 2740 }, { "epoch": 0.7805847289242123, "grad_norm": 20.269861221313477, "learning_rate": 9.92202668180528e-05, "loss": 0.6795226573944092, "step": 2750 }, { "epoch": 0.783423218847573, "grad_norm": 16.270212173461914, "learning_rate": 9.921742832812943e-05, "loss": 0.6910058498382569, "step": 2760 }, { "epoch": 0.7862617087709338, "grad_norm": 21.220205307006836, "learning_rate": 9.921458983820609e-05, "loss": 0.7572475433349609, "step": 2770 }, { "epoch": 0.7891001986942946, "grad_norm": 15.586875915527344, "learning_rate": 9.921175134828273e-05, "loss": 0.7609560489654541, "step": 2780 }, { "epoch": 0.7919386886176554, "grad_norm": 18.888391494750977, "learning_rate": 9.920891285835935e-05, "loss": 0.6915780544281006, "step": 2790 }, { "epoch": 0.7947771785410161, "grad_norm": 15.691100120544434, "learning_rate": 9.9206074368436e-05, "loss": 0.6838128089904785, "step": 2800 }, { "epoch": 0.7976156684643769, "grad_norm": 22.3140869140625, "learning_rate": 9.920323587851264e-05, "loss": 0.6962751388549805, "step": 2810 }, { "epoch": 0.8004541583877377, "grad_norm": 21.001943588256836, "learning_rate": 9.920039738858928e-05, "loss": 0.7597682476043701, "step": 2820 }, { "epoch": 0.8032926483110985, "grad_norm": 16.918865203857422, "learning_rate": 9.91975588986659e-05, "loss": 0.7108233451843262, "step": 2830 }, { "epoch": 0.8061311382344593, "grad_norm": 13.406148910522461, "learning_rate": 9.919472040874256e-05, "loss": 0.6854429244995117, "step": 2840 }, { "epoch": 0.80896962815782, "grad_norm": 16.771976470947266, "learning_rate": 9.91918819188192e-05, "loss": 0.7438200950622559, "step": 2850 }, { "epoch": 0.8118081180811808, "grad_norm": 18.00636100769043, "learning_rate": 9.918904342889583e-05, "loss": 0.6548980236053467, "step": 2860 }, { "epoch": 0.8146466080045416, "grad_norm": 15.884634017944336, "learning_rate": 9.918620493897247e-05, "loss": 0.7351513862609863, "step": 2870 }, { "epoch": 0.8174850979279024, "grad_norm": 15.585739135742188, "learning_rate": 9.918336644904911e-05, "loss": 0.7094754219055176, "step": 2880 }, { "epoch": 0.8203235878512631, "grad_norm": 20.898832321166992, "learning_rate": 9.918052795912574e-05, "loss": 0.6880773067474365, "step": 2890 }, { "epoch": 0.8231620777746239, "grad_norm": 24.912071228027344, "learning_rate": 9.917768946920238e-05, "loss": 0.7234230041503906, "step": 2900 }, { "epoch": 0.8260005676979847, "grad_norm": 16.72865104675293, "learning_rate": 9.917485097927904e-05, "loss": 0.6306540966033936, "step": 2910 }, { "epoch": 0.8288390576213455, "grad_norm": 21.63758087158203, "learning_rate": 9.917201248935566e-05, "loss": 0.6547715663909912, "step": 2920 }, { "epoch": 0.8316775475447062, "grad_norm": 22.159839630126953, "learning_rate": 9.91691739994323e-05, "loss": 0.7318464756011963, "step": 2930 }, { "epoch": 0.834516037468067, "grad_norm": 21.752506256103516, "learning_rate": 9.916633550950895e-05, "loss": 0.7466944217681885, "step": 2940 }, { "epoch": 0.8373545273914278, "grad_norm": 19.540922164916992, "learning_rate": 9.916349701958559e-05, "loss": 0.7104002952575683, "step": 2950 }, { "epoch": 0.8401930173147886, "grad_norm": 30.11966323852539, "learning_rate": 9.916065852966222e-05, "loss": 0.6709997653961182, "step": 2960 }, { "epoch": 0.8430315072381493, "grad_norm": 16.262800216674805, "learning_rate": 9.915782003973887e-05, "loss": 0.6747283458709716, "step": 2970 }, { "epoch": 0.8458699971615101, "grad_norm": 20.12322998046875, "learning_rate": 9.915498154981551e-05, "loss": 0.628515100479126, "step": 2980 }, { "epoch": 0.8487084870848709, "grad_norm": 17.60300064086914, "learning_rate": 9.915214305989214e-05, "loss": 0.6279134750366211, "step": 2990 }, { "epoch": 0.8515469770082317, "grad_norm": 21.34027862548828, "learning_rate": 9.914930456996878e-05, "loss": 0.5946772575378418, "step": 3000 }, { "epoch": 0.8515469770082317, "eval_accuracy": 0.7144401348000254, "eval_loss": 0.918184757232666, "eval_runtime": 55.5841, "eval_samples_per_second": 282.941, "eval_steps_per_second": 4.426, "step": 3000 }, { "epoch": 0.8543854669315923, "grad_norm": 14.811094284057617, "learning_rate": 9.914646608004542e-05, "loss": 0.6463747024536133, "step": 3010 }, { "epoch": 0.8572239568549531, "grad_norm": 20.67406463623047, "learning_rate": 9.914362759012205e-05, "loss": 0.6222482681274414, "step": 3020 }, { "epoch": 0.8600624467783139, "grad_norm": 13.015539169311523, "learning_rate": 9.914078910019869e-05, "loss": 0.6816729545593262, "step": 3030 }, { "epoch": 0.8629009367016747, "grad_norm": 23.75094223022461, "learning_rate": 9.913795061027535e-05, "loss": 0.6649856090545654, "step": 3040 }, { "epoch": 0.8657394266250354, "grad_norm": 23.84694480895996, "learning_rate": 9.913511212035198e-05, "loss": 0.6659555912017823, "step": 3050 }, { "epoch": 0.8685779165483962, "grad_norm": 21.65428924560547, "learning_rate": 9.913227363042862e-05, "loss": 0.6275756359100342, "step": 3060 }, { "epoch": 0.871416406471757, "grad_norm": 16.181154251098633, "learning_rate": 9.912943514050526e-05, "loss": 0.6297444343566895, "step": 3070 }, { "epoch": 0.8742548963951178, "grad_norm": 14.148035049438477, "learning_rate": 9.91265966505819e-05, "loss": 0.6211448669433594, "step": 3080 }, { "epoch": 0.8770933863184786, "grad_norm": 20.469369888305664, "learning_rate": 9.912375816065853e-05, "loss": 0.608912467956543, "step": 3090 }, { "epoch": 0.8799318762418393, "grad_norm": 13.300294876098633, "learning_rate": 9.912091967073517e-05, "loss": 0.5914093971252441, "step": 3100 }, { "epoch": 0.8827703661652001, "grad_norm": 20.558774948120117, "learning_rate": 9.911808118081181e-05, "loss": 0.644989013671875, "step": 3110 }, { "epoch": 0.8856088560885609, "grad_norm": 20.723093032836914, "learning_rate": 9.911524269088845e-05, "loss": 0.625809621810913, "step": 3120 }, { "epoch": 0.8884473460119217, "grad_norm": 20.392393112182617, "learning_rate": 9.911240420096509e-05, "loss": 0.6319412231445313, "step": 3130 }, { "epoch": 0.8912858359352824, "grad_norm": 19.048803329467773, "learning_rate": 9.910956571104173e-05, "loss": 0.6518021583557129, "step": 3140 }, { "epoch": 0.8941243258586432, "grad_norm": 16.378437042236328, "learning_rate": 9.910672722111836e-05, "loss": 0.6935600280761719, "step": 3150 }, { "epoch": 0.896962815782004, "grad_norm": 19.749753952026367, "learning_rate": 9.9103888731195e-05, "loss": 0.6593052864074707, "step": 3160 }, { "epoch": 0.8998013057053648, "grad_norm": 23.971311569213867, "learning_rate": 9.910105024127166e-05, "loss": 0.6660974979400635, "step": 3170 }, { "epoch": 0.9026397956287255, "grad_norm": 21.101110458374023, "learning_rate": 9.909821175134829e-05, "loss": 0.6309810638427734, "step": 3180 }, { "epoch": 0.9054782855520863, "grad_norm": 12.924442291259766, "learning_rate": 9.909537326142493e-05, "loss": 0.6368088722229004, "step": 3190 }, { "epoch": 0.9083167754754471, "grad_norm": 21.419795989990234, "learning_rate": 9.909253477150157e-05, "loss": 0.5837503910064697, "step": 3200 }, { "epoch": 0.9111552653988079, "grad_norm": 14.982532501220703, "learning_rate": 9.90896962815782e-05, "loss": 0.5545926094055176, "step": 3210 }, { "epoch": 0.9139937553221686, "grad_norm": 20.37749481201172, "learning_rate": 9.908685779165484e-05, "loss": 0.5880260944366456, "step": 3220 }, { "epoch": 0.9168322452455294, "grad_norm": 19.062028884887695, "learning_rate": 9.908401930173148e-05, "loss": 0.6510546684265137, "step": 3230 }, { "epoch": 0.9196707351688902, "grad_norm": 16.464235305786133, "learning_rate": 9.908118081180812e-05, "loss": 0.6244417190551758, "step": 3240 }, { "epoch": 0.922509225092251, "grad_norm": 17.39070701599121, "learning_rate": 9.907834232188476e-05, "loss": 0.6301247119903565, "step": 3250 }, { "epoch": 0.9253477150156117, "grad_norm": 24.75601577758789, "learning_rate": 9.90755038319614e-05, "loss": 0.6847886085510254, "step": 3260 }, { "epoch": 0.9281862049389724, "grad_norm": 16.77629280090332, "learning_rate": 9.907266534203805e-05, "loss": 0.5947803020477295, "step": 3270 }, { "epoch": 0.9310246948623332, "grad_norm": 23.07398223876953, "learning_rate": 9.906982685211467e-05, "loss": 0.6088460922241211, "step": 3280 }, { "epoch": 0.933863184785694, "grad_norm": 14.877997398376465, "learning_rate": 9.906698836219131e-05, "loss": 0.5384617328643799, "step": 3290 }, { "epoch": 0.9367016747090547, "grad_norm": 18.736621856689453, "learning_rate": 9.906414987226796e-05, "loss": 0.6512783527374267, "step": 3300 }, { "epoch": 0.9395401646324155, "grad_norm": 20.40567970275879, "learning_rate": 9.90613113823446e-05, "loss": 0.6608942985534668, "step": 3310 }, { "epoch": 0.9423786545557763, "grad_norm": 14.604842185974121, "learning_rate": 9.905847289242124e-05, "loss": 0.5476353168487549, "step": 3320 }, { "epoch": 0.9452171444791371, "grad_norm": 18.603300094604492, "learning_rate": 9.905563440249788e-05, "loss": 0.6173339366912842, "step": 3330 }, { "epoch": 0.9480556344024978, "grad_norm": 18.148330688476562, "learning_rate": 9.905279591257451e-05, "loss": 0.6027939319610596, "step": 3340 }, { "epoch": 0.9508941243258586, "grad_norm": 18.32457733154297, "learning_rate": 9.904995742265115e-05, "loss": 0.6387212753295899, "step": 3350 }, { "epoch": 0.9537326142492194, "grad_norm": 22.256174087524414, "learning_rate": 9.904711893272779e-05, "loss": 0.6160597324371337, "step": 3360 }, { "epoch": 0.9565711041725802, "grad_norm": 18.725799560546875, "learning_rate": 9.904428044280443e-05, "loss": 0.587288761138916, "step": 3370 }, { "epoch": 0.959409594095941, "grad_norm": 22.07069969177246, "learning_rate": 9.904144195288107e-05, "loss": 0.6679742813110352, "step": 3380 }, { "epoch": 0.9622480840193017, "grad_norm": 19.24441146850586, "learning_rate": 9.903860346295771e-05, "loss": 0.6263650894165039, "step": 3390 }, { "epoch": 0.9650865739426625, "grad_norm": 21.700515747070312, "learning_rate": 9.903576497303436e-05, "loss": 0.5862777709960938, "step": 3400 }, { "epoch": 0.9679250638660233, "grad_norm": 21.52484893798828, "learning_rate": 9.903292648311098e-05, "loss": 0.6023648262023926, "step": 3410 }, { "epoch": 0.9707635537893841, "grad_norm": 18.80558967590332, "learning_rate": 9.903008799318763e-05, "loss": 0.6242080688476562, "step": 3420 }, { "epoch": 0.9736020437127448, "grad_norm": 16.581951141357422, "learning_rate": 9.902724950326427e-05, "loss": 0.5422840118408203, "step": 3430 }, { "epoch": 0.9764405336361056, "grad_norm": 17.476638793945312, "learning_rate": 9.902441101334091e-05, "loss": 0.6070442199707031, "step": 3440 }, { "epoch": 0.9792790235594664, "grad_norm": 25.190176010131836, "learning_rate": 9.902157252341755e-05, "loss": 0.624859619140625, "step": 3450 }, { "epoch": 0.9821175134828272, "grad_norm": 23.83879280090332, "learning_rate": 9.901873403349419e-05, "loss": 0.6046848297119141, "step": 3460 }, { "epoch": 0.9849560034061879, "grad_norm": 14.63314437866211, "learning_rate": 9.901589554357082e-05, "loss": 0.525738525390625, "step": 3470 }, { "epoch": 0.9877944933295487, "grad_norm": 17.613473892211914, "learning_rate": 9.901305705364746e-05, "loss": 0.5794808387756347, "step": 3480 }, { "epoch": 0.9906329832529095, "grad_norm": 17.99323844909668, "learning_rate": 9.90102185637241e-05, "loss": 0.616182279586792, "step": 3490 }, { "epoch": 0.9934714731762703, "grad_norm": 18.68892478942871, "learning_rate": 9.900738007380074e-05, "loss": 0.5668538570404053, "step": 3500 }, { "epoch": 0.9934714731762703, "eval_accuracy": 0.7562154257010237, "eval_loss": 0.7962477207183838, "eval_runtime": 42.5278, "eval_samples_per_second": 369.805, "eval_steps_per_second": 5.784, "step": 3500 }, { "epoch": 0.996309963099631, "grad_norm": 21.941871643066406, "learning_rate": 9.900454158387738e-05, "loss": 0.5505751609802246, "step": 3510 }, { "epoch": 0.9991484530229918, "grad_norm": 13.521620750427246, "learning_rate": 9.900170309395403e-05, "loss": 0.5245109081268311, "step": 3520 }, { "epoch": 1.0019869429463526, "grad_norm": 19.12834930419922, "learning_rate": 9.899886460403067e-05, "loss": 0.6087633609771729, "step": 3530 }, { "epoch": 1.0048254328697133, "grad_norm": 19.97581672668457, "learning_rate": 9.89960261141073e-05, "loss": 0.5120650291442871, "step": 3540 }, { "epoch": 1.0076639227930742, "grad_norm": 14.91316032409668, "learning_rate": 9.899318762418394e-05, "loss": 0.5297846794128418, "step": 3550 }, { "epoch": 1.0105024127164348, "grad_norm": 13.453252792358398, "learning_rate": 9.899034913426058e-05, "loss": 0.532386589050293, "step": 3560 }, { "epoch": 1.0133409026397957, "grad_norm": 23.568191528320312, "learning_rate": 9.898751064433722e-05, "loss": 0.5427926540374756, "step": 3570 }, { "epoch": 1.0161793925631564, "grad_norm": 14.801007270812988, "learning_rate": 9.898467215441386e-05, "loss": 0.5012270927429199, "step": 3580 }, { "epoch": 1.0190178824865173, "grad_norm": 21.147682189941406, "learning_rate": 9.89818336644905e-05, "loss": 0.5705623626708984, "step": 3590 }, { "epoch": 1.021856372409878, "grad_norm": 18.752601623535156, "learning_rate": 9.897899517456713e-05, "loss": 0.5731739521026611, "step": 3600 }, { "epoch": 1.0246948623332388, "grad_norm": 20.307939529418945, "learning_rate": 9.897615668464377e-05, "loss": 0.5186459064483643, "step": 3610 }, { "epoch": 1.0275333522565995, "grad_norm": 14.331483840942383, "learning_rate": 9.897331819472041e-05, "loss": 0.5228328227996826, "step": 3620 }, { "epoch": 1.0303718421799604, "grad_norm": 17.32819175720215, "learning_rate": 9.897047970479705e-05, "loss": 0.5163751125335694, "step": 3630 }, { "epoch": 1.033210332103321, "grad_norm": 14.276911735534668, "learning_rate": 9.89676412148737e-05, "loss": 0.5030517578125, "step": 3640 }, { "epoch": 1.036048822026682, "grad_norm": 18.913345336914062, "learning_rate": 9.896480272495034e-05, "loss": 0.4956358909606934, "step": 3650 }, { "epoch": 1.0388873119500426, "grad_norm": 18.45677375793457, "learning_rate": 9.896196423502698e-05, "loss": 0.5469107627868652, "step": 3660 }, { "epoch": 1.0417258018734032, "grad_norm": 16.46055793762207, "learning_rate": 9.89591257451036e-05, "loss": 0.4877197265625, "step": 3670 }, { "epoch": 1.0445642917967641, "grad_norm": 14.712738990783691, "learning_rate": 9.895628725518025e-05, "loss": 0.5098207473754883, "step": 3680 }, { "epoch": 1.0474027817201248, "grad_norm": 20.962236404418945, "learning_rate": 9.895344876525689e-05, "loss": 0.5071119785308837, "step": 3690 }, { "epoch": 1.0502412716434857, "grad_norm": 23.96198272705078, "learning_rate": 9.895061027533352e-05, "loss": 0.5450370788574219, "step": 3700 }, { "epoch": 1.0530797615668464, "grad_norm": 19.801559448242188, "learning_rate": 9.894777178541017e-05, "loss": 0.5134214401245117, "step": 3710 }, { "epoch": 1.0559182514902072, "grad_norm": 14.510756492614746, "learning_rate": 9.894493329548681e-05, "loss": 0.49899988174438475, "step": 3720 }, { "epoch": 1.058756741413568, "grad_norm": 21.988487243652344, "learning_rate": 9.894209480556344e-05, "loss": 0.5698307037353516, "step": 3730 }, { "epoch": 1.0615952313369288, "grad_norm": 17.233766555786133, "learning_rate": 9.893925631564008e-05, "loss": 0.5030499458312988, "step": 3740 }, { "epoch": 1.0644337212602895, "grad_norm": 19.080814361572266, "learning_rate": 9.893641782571672e-05, "loss": 0.5081956386566162, "step": 3750 }, { "epoch": 1.0672722111836503, "grad_norm": 13.881319999694824, "learning_rate": 9.893357933579336e-05, "loss": 0.5425588607788085, "step": 3760 }, { "epoch": 1.070110701107011, "grad_norm": 16.08487319946289, "learning_rate": 9.893074084587e-05, "loss": 0.45580644607543946, "step": 3770 }, { "epoch": 1.072949191030372, "grad_norm": 15.748553276062012, "learning_rate": 9.892790235594665e-05, "loss": 0.4938023090362549, "step": 3780 }, { "epoch": 1.0757876809537326, "grad_norm": 35.376564025878906, "learning_rate": 9.892506386602329e-05, "loss": 0.48783578872680666, "step": 3790 }, { "epoch": 1.0786261708770934, "grad_norm": 13.826508522033691, "learning_rate": 9.892222537609992e-05, "loss": 0.4448415756225586, "step": 3800 }, { "epoch": 1.0814646608004541, "grad_norm": 16.199169158935547, "learning_rate": 9.891938688617656e-05, "loss": 0.49293742179870603, "step": 3810 }, { "epoch": 1.084303150723815, "grad_norm": 20.340862274169922, "learning_rate": 9.89165483962532e-05, "loss": 0.5109017848968506, "step": 3820 }, { "epoch": 1.0871416406471757, "grad_norm": 21.554218292236328, "learning_rate": 9.891370990632983e-05, "loss": 0.5753520965576172, "step": 3830 }, { "epoch": 1.0899801305705366, "grad_norm": 34.24570846557617, "learning_rate": 9.891087141640648e-05, "loss": 0.5693881988525391, "step": 3840 }, { "epoch": 1.0928186204938972, "grad_norm": 11.320433616638184, "learning_rate": 9.890803292648312e-05, "loss": 0.5257577419281005, "step": 3850 }, { "epoch": 1.095657110417258, "grad_norm": 19.261043548583984, "learning_rate": 9.890519443655975e-05, "loss": 0.45255417823791505, "step": 3860 }, { "epoch": 1.0984956003406188, "grad_norm": 17.512012481689453, "learning_rate": 9.890235594663639e-05, "loss": 0.4597503185272217, "step": 3870 }, { "epoch": 1.1013340902639797, "grad_norm": 12.027008056640625, "learning_rate": 9.889951745671303e-05, "loss": 0.4805763244628906, "step": 3880 }, { "epoch": 1.1041725801873403, "grad_norm": 22.0339412689209, "learning_rate": 9.889667896678968e-05, "loss": 0.5380415916442871, "step": 3890 }, { "epoch": 1.1070110701107012, "grad_norm": 15.947558403015137, "learning_rate": 9.889384047686632e-05, "loss": 0.5077179908752442, "step": 3900 }, { "epoch": 1.1098495600340619, "grad_norm": 16.223264694213867, "learning_rate": 9.889100198694296e-05, "loss": 0.5195577621459961, "step": 3910 }, { "epoch": 1.1126880499574225, "grad_norm": 23.789871215820312, "learning_rate": 9.88881634970196e-05, "loss": 0.501473331451416, "step": 3920 }, { "epoch": 1.1155265398807834, "grad_norm": 15.897955894470215, "learning_rate": 9.888532500709623e-05, "loss": 0.524996280670166, "step": 3930 }, { "epoch": 1.118365029804144, "grad_norm": 13.510128021240234, "learning_rate": 9.888248651717287e-05, "loss": 0.4928281307220459, "step": 3940 }, { "epoch": 1.121203519727505, "grad_norm": 20.105274200439453, "learning_rate": 9.887964802724951e-05, "loss": 0.524894905090332, "step": 3950 }, { "epoch": 1.1240420096508656, "grad_norm": 19.325244903564453, "learning_rate": 9.887680953732614e-05, "loss": 0.49663724899291994, "step": 3960 }, { "epoch": 1.1268804995742265, "grad_norm": 18.65350914001465, "learning_rate": 9.887397104740279e-05, "loss": 0.533427619934082, "step": 3970 }, { "epoch": 1.1297189894975872, "grad_norm": 23.46189308166504, "learning_rate": 9.887113255747943e-05, "loss": 0.48971285820007326, "step": 3980 }, { "epoch": 1.132557479420948, "grad_norm": 13.092864036560059, "learning_rate": 9.886829406755606e-05, "loss": 0.43837895393371584, "step": 3990 }, { "epoch": 1.1353959693443088, "grad_norm": 15.382172584533691, "learning_rate": 9.88654555776327e-05, "loss": 0.4698935508728027, "step": 4000 }, { "epoch": 1.1353959693443088, "eval_accuracy": 0.773701278056845, "eval_loss": 0.7292551398277283, "eval_runtime": 39.926, "eval_samples_per_second": 393.903, "eval_steps_per_second": 6.161, "step": 4000 }, { "epoch": 1.1382344592676696, "grad_norm": 19.64311408996582, "learning_rate": 9.886261708770934e-05, "loss": 0.49273972511291503, "step": 4010 }, { "epoch": 1.1410729491910303, "grad_norm": 17.47618865966797, "learning_rate": 9.885977859778599e-05, "loss": 0.5418276309967041, "step": 4020 }, { "epoch": 1.1439114391143912, "grad_norm": 14.918771743774414, "learning_rate": 9.885694010786261e-05, "loss": 0.41820201873779295, "step": 4030 }, { "epoch": 1.1467499290377519, "grad_norm": 25.491987228393555, "learning_rate": 9.885410161793927e-05, "loss": 0.5034903049468994, "step": 4040 }, { "epoch": 1.1495884189611127, "grad_norm": 23.17506217956543, "learning_rate": 9.88512631280159e-05, "loss": 0.45099434852600095, "step": 4050 }, { "epoch": 1.1524269088844734, "grad_norm": 19.603757858276367, "learning_rate": 9.884842463809254e-05, "loss": 0.46950182914733884, "step": 4060 }, { "epoch": 1.1552653988078343, "grad_norm": 12.458060264587402, "learning_rate": 9.884558614816918e-05, "loss": 0.46781249046325685, "step": 4070 }, { "epoch": 1.158103888731195, "grad_norm": 22.410030364990234, "learning_rate": 9.884274765824582e-05, "loss": 0.5144707202911377, "step": 4080 }, { "epoch": 1.1609423786545559, "grad_norm": 23.12044334411621, "learning_rate": 9.883990916832245e-05, "loss": 0.5029393196105957, "step": 4090 }, { "epoch": 1.1637808685779165, "grad_norm": 22.680009841918945, "learning_rate": 9.88370706783991e-05, "loss": 0.47003870010375975, "step": 4100 }, { "epoch": 1.1666193585012774, "grad_norm": 17.165809631347656, "learning_rate": 9.883423218847574e-05, "loss": 0.488496732711792, "step": 4110 }, { "epoch": 1.169457848424638, "grad_norm": 15.452492713928223, "learning_rate": 9.883139369855237e-05, "loss": 0.4864907741546631, "step": 4120 }, { "epoch": 1.172296338347999, "grad_norm": 13.541675567626953, "learning_rate": 9.882855520862901e-05, "loss": 0.433442211151123, "step": 4130 }, { "epoch": 1.1751348282713596, "grad_norm": 17.46375274658203, "learning_rate": 9.882571671870566e-05, "loss": 0.46564226150512694, "step": 4140 }, { "epoch": 1.1779733181947205, "grad_norm": 15.959354400634766, "learning_rate": 9.882287822878228e-05, "loss": 0.4392559051513672, "step": 4150 }, { "epoch": 1.1808118081180812, "grad_norm": 19.669645309448242, "learning_rate": 9.882003973885892e-05, "loss": 0.46302380561828616, "step": 4160 }, { "epoch": 1.1836502980414418, "grad_norm": 20.372474670410156, "learning_rate": 9.881720124893558e-05, "loss": 0.5397760391235351, "step": 4170 }, { "epoch": 1.1864887879648027, "grad_norm": 26.53449058532715, "learning_rate": 9.881436275901221e-05, "loss": 0.449375581741333, "step": 4180 }, { "epoch": 1.1893272778881636, "grad_norm": 21.799882888793945, "learning_rate": 9.881152426908885e-05, "loss": 0.5368235111236572, "step": 4190 }, { "epoch": 1.1921657678115243, "grad_norm": 19.610092163085938, "learning_rate": 9.880868577916549e-05, "loss": 0.44171462059020994, "step": 4200 }, { "epoch": 1.195004257734885, "grad_norm": 16.824251174926758, "learning_rate": 9.880584728924213e-05, "loss": 0.5037104606628418, "step": 4210 }, { "epoch": 1.1978427476582458, "grad_norm": 29.541683197021484, "learning_rate": 9.880300879931876e-05, "loss": 0.46416363716125486, "step": 4220 }, { "epoch": 1.2006812375816065, "grad_norm": 18.201892852783203, "learning_rate": 9.88001703093954e-05, "loss": 0.4911099910736084, "step": 4230 }, { "epoch": 1.2035197275049674, "grad_norm": 22.455341339111328, "learning_rate": 9.879733181947206e-05, "loss": 0.47243127822875974, "step": 4240 }, { "epoch": 1.206358217428328, "grad_norm": 15.709625244140625, "learning_rate": 9.879449332954868e-05, "loss": 0.4474250316619873, "step": 4250 }, { "epoch": 1.209196707351689, "grad_norm": 16.248855590820312, "learning_rate": 9.879165483962532e-05, "loss": 0.4088413238525391, "step": 4260 }, { "epoch": 1.2120351972750496, "grad_norm": 20.279415130615234, "learning_rate": 9.878881634970197e-05, "loss": 0.4390294075012207, "step": 4270 }, { "epoch": 1.2148736871984105, "grad_norm": 14.848129272460938, "learning_rate": 9.87859778597786e-05, "loss": 0.47104034423828123, "step": 4280 }, { "epoch": 1.2177121771217712, "grad_norm": 17.995227813720703, "learning_rate": 9.878313936985524e-05, "loss": 0.4983652591705322, "step": 4290 }, { "epoch": 1.220550667045132, "grad_norm": 28.94156837463379, "learning_rate": 9.878030087993189e-05, "loss": 0.49039883613586427, "step": 4300 }, { "epoch": 1.2233891569684927, "grad_norm": 19.829608917236328, "learning_rate": 9.877774623900086e-05, "loss": 0.49552297592163086, "step": 4310 }, { "epoch": 1.2262276468918536, "grad_norm": 13.533665657043457, "learning_rate": 9.87749077490775e-05, "loss": 0.42025227546691896, "step": 4320 }, { "epoch": 1.2290661368152143, "grad_norm": 13.58283519744873, "learning_rate": 9.877206925915413e-05, "loss": 0.4470266342163086, "step": 4330 }, { "epoch": 1.2319046267385751, "grad_norm": 12.666220664978027, "learning_rate": 9.876923076923077e-05, "loss": 0.47890100479125974, "step": 4340 }, { "epoch": 1.2347431166619358, "grad_norm": 16.611799240112305, "learning_rate": 9.876639227930742e-05, "loss": 0.46288247108459474, "step": 4350 }, { "epoch": 1.2375816065852967, "grad_norm": 14.443547248840332, "learning_rate": 9.876355378938405e-05, "loss": 0.49375429153442385, "step": 4360 }, { "epoch": 1.2404200965086574, "grad_norm": 27.5756893157959, "learning_rate": 9.876071529946069e-05, "loss": 0.47679734230041504, "step": 4370 }, { "epoch": 1.2432585864320183, "grad_norm": 16.343713760375977, "learning_rate": 9.875787680953733e-05, "loss": 0.5116428375244141, "step": 4380 }, { "epoch": 1.246097076355379, "grad_norm": 13.92696762084961, "learning_rate": 9.875503831961397e-05, "loss": 0.4031768798828125, "step": 4390 }, { "epoch": 1.2489355662787398, "grad_norm": 15.281488418579102, "learning_rate": 9.87521998296906e-05, "loss": 0.47810678482055663, "step": 4400 }, { "epoch": 1.2517740562021005, "grad_norm": 22.597318649291992, "learning_rate": 9.874936133976724e-05, "loss": 0.5381937026977539, "step": 4410 }, { "epoch": 1.2546125461254611, "grad_norm": 18.461830139160156, "learning_rate": 9.87465228498439e-05, "loss": 0.41672482490539553, "step": 4420 }, { "epoch": 1.257451036048822, "grad_norm": 19.697025299072266, "learning_rate": 9.874368435992053e-05, "loss": 0.44537792205810545, "step": 4430 }, { "epoch": 1.260289525972183, "grad_norm": 30.69244384765625, "learning_rate": 9.874084586999717e-05, "loss": 0.5139982223510742, "step": 4440 }, { "epoch": 1.2631280158955436, "grad_norm": 22.342615127563477, "learning_rate": 9.873800738007381e-05, "loss": 0.47946624755859374, "step": 4450 }, { "epoch": 1.2659665058189042, "grad_norm": 17.590852737426758, "learning_rate": 9.873516889015044e-05, "loss": 0.427897310256958, "step": 4460 }, { "epoch": 1.2688049957422651, "grad_norm": 11.778168678283691, "learning_rate": 9.873233040022708e-05, "loss": 0.4399810791015625, "step": 4470 }, { "epoch": 1.271643485665626, "grad_norm": 18.719938278198242, "learning_rate": 9.872949191030373e-05, "loss": 0.43341760635375975, "step": 4480 }, { "epoch": 1.2744819755889867, "grad_norm": 18.08342933654785, "learning_rate": 9.872665342038036e-05, "loss": 0.41063952445983887, "step": 4490 }, { "epoch": 1.2773204655123473, "grad_norm": 14.269857406616211, "learning_rate": 9.8723814930457e-05, "loss": 0.3776993274688721, "step": 4500 }, { "epoch": 1.2773204655123473, "eval_accuracy": 0.7976727920137343, "eval_loss": 0.6449918150901794, "eval_runtime": 41.83, "eval_samples_per_second": 375.975, "eval_steps_per_second": 5.881, "step": 4500 }, { "epoch": 1.2801589554357082, "grad_norm": 24.839513778686523, "learning_rate": 9.872097644053364e-05, "loss": 0.4299072265625, "step": 4510 }, { "epoch": 1.282997445359069, "grad_norm": 18.745697021484375, "learning_rate": 9.871813795061028e-05, "loss": 0.3797951936721802, "step": 4520 }, { "epoch": 1.2858359352824298, "grad_norm": 16.86839485168457, "learning_rate": 9.871529946068691e-05, "loss": 0.434146785736084, "step": 4530 }, { "epoch": 1.2886744252057905, "grad_norm": 15.549766540527344, "learning_rate": 9.871246097076355e-05, "loss": 0.4012486934661865, "step": 4540 }, { "epoch": 1.2915129151291513, "grad_norm": 12.177462577819824, "learning_rate": 9.870962248084021e-05, "loss": 0.4372123718261719, "step": 4550 }, { "epoch": 1.294351405052512, "grad_norm": 11.652061462402344, "learning_rate": 9.870678399091684e-05, "loss": 0.37133424282073973, "step": 4560 }, { "epoch": 1.297189894975873, "grad_norm": 17.72026824951172, "learning_rate": 9.870394550099348e-05, "loss": 0.3895043134689331, "step": 4570 }, { "epoch": 1.3000283848992336, "grad_norm": 17.506471633911133, "learning_rate": 9.870110701107012e-05, "loss": 0.4236306190490723, "step": 4580 }, { "epoch": 1.3028668748225944, "grad_norm": 15.206886291503906, "learning_rate": 9.869826852114675e-05, "loss": 0.43027386665344236, "step": 4590 }, { "epoch": 1.305705364745955, "grad_norm": 27.73688316345215, "learning_rate": 9.869543003122339e-05, "loss": 0.4492812633514404, "step": 4600 }, { "epoch": 1.308543854669316, "grad_norm": 16.135334014892578, "learning_rate": 9.869259154130003e-05, "loss": 0.4329353332519531, "step": 4610 }, { "epoch": 1.3113823445926767, "grad_norm": 17.55569076538086, "learning_rate": 9.868975305137667e-05, "loss": 0.4021465301513672, "step": 4620 }, { "epoch": 1.3142208345160376, "grad_norm": 13.74146842956543, "learning_rate": 9.868691456145331e-05, "loss": 0.4963533401489258, "step": 4630 }, { "epoch": 1.3170593244393982, "grad_norm": 17.204097747802734, "learning_rate": 9.868407607152995e-05, "loss": 0.4530606746673584, "step": 4640 }, { "epoch": 1.319897814362759, "grad_norm": 14.275046348571777, "learning_rate": 9.86812375816066e-05, "loss": 0.42008051872253416, "step": 4650 }, { "epoch": 1.3227363042861198, "grad_norm": 14.691771507263184, "learning_rate": 9.867839909168322e-05, "loss": 0.412373685836792, "step": 4660 }, { "epoch": 1.3255747942094804, "grad_norm": 18.029897689819336, "learning_rate": 9.867556060175986e-05, "loss": 0.39974236488342285, "step": 4670 }, { "epoch": 1.3284132841328413, "grad_norm": 15.352410316467285, "learning_rate": 9.867272211183652e-05, "loss": 0.39951088428497317, "step": 4680 }, { "epoch": 1.3312517740562022, "grad_norm": 21.814775466918945, "learning_rate": 9.866988362191315e-05, "loss": 0.5020639419555664, "step": 4690 }, { "epoch": 1.3340902639795629, "grad_norm": 14.603492736816406, "learning_rate": 9.866704513198979e-05, "loss": 0.3923049449920654, "step": 4700 }, { "epoch": 1.3369287539029235, "grad_norm": 14.375753402709961, "learning_rate": 9.866420664206643e-05, "loss": 0.42412638664245605, "step": 4710 }, { "epoch": 1.3397672438262844, "grad_norm": 15.672764778137207, "learning_rate": 9.866136815214306e-05, "loss": 0.4281881332397461, "step": 4720 }, { "epoch": 1.3426057337496453, "grad_norm": 13.635367393493652, "learning_rate": 9.86585296622197e-05, "loss": 0.3929778814315796, "step": 4730 }, { "epoch": 1.345444223673006, "grad_norm": 18.512662887573242, "learning_rate": 9.865569117229634e-05, "loss": 0.40594911575317383, "step": 4740 }, { "epoch": 1.3482827135963666, "grad_norm": 12.468866348266602, "learning_rate": 9.865285268237298e-05, "loss": 0.40600290298461916, "step": 4750 }, { "epoch": 1.3511212035197275, "grad_norm": 16.17823600769043, "learning_rate": 9.865001419244962e-05, "loss": 0.48497705459594725, "step": 4760 }, { "epoch": 1.3539596934430882, "grad_norm": 19.671701431274414, "learning_rate": 9.864717570252626e-05, "loss": 0.40709939002990725, "step": 4770 }, { "epoch": 1.356798183366449, "grad_norm": 13.598276138305664, "learning_rate": 9.86443372126029e-05, "loss": 0.3509558439254761, "step": 4780 }, { "epoch": 1.3596366732898097, "grad_norm": 14.558130264282227, "learning_rate": 9.864149872267953e-05, "loss": 0.44591646194458007, "step": 4790 }, { "epoch": 1.3624751632131706, "grad_norm": 16.25985336303711, "learning_rate": 9.863866023275618e-05, "loss": 0.4526205539703369, "step": 4800 }, { "epoch": 1.3653136531365313, "grad_norm": 21.94317626953125, "learning_rate": 9.863582174283282e-05, "loss": 0.4160717487335205, "step": 4810 }, { "epoch": 1.3681521430598922, "grad_norm": 13.047098159790039, "learning_rate": 9.863298325290946e-05, "loss": 0.3879124879837036, "step": 4820 }, { "epoch": 1.3709906329832529, "grad_norm": 18.06602668762207, "learning_rate": 9.86301447629861e-05, "loss": 0.3389427661895752, "step": 4830 }, { "epoch": 1.3738291229066137, "grad_norm": 16.58571434020996, "learning_rate": 9.862730627306274e-05, "loss": 0.4360986232757568, "step": 4840 }, { "epoch": 1.3766676128299744, "grad_norm": 14.993226051330566, "learning_rate": 9.862446778313937e-05, "loss": 0.4032593250274658, "step": 4850 }, { "epoch": 1.3795061027533353, "grad_norm": 16.774778366088867, "learning_rate": 9.862162929321601e-05, "loss": 0.40879335403442385, "step": 4860 }, { "epoch": 1.382344592676696, "grad_norm": 14.45619010925293, "learning_rate": 9.861879080329265e-05, "loss": 0.4169449329376221, "step": 4870 }, { "epoch": 1.3851830826000568, "grad_norm": 18.90743637084961, "learning_rate": 9.861595231336929e-05, "loss": 0.38536412715911866, "step": 4880 }, { "epoch": 1.3880215725234175, "grad_norm": 21.150867462158203, "learning_rate": 9.861311382344593e-05, "loss": 0.4077894687652588, "step": 4890 }, { "epoch": 1.3908600624467784, "grad_norm": 12.086867332458496, "learning_rate": 9.861027533352258e-05, "loss": 0.3888492822647095, "step": 4900 }, { "epoch": 1.393698552370139, "grad_norm": 17.880489349365234, "learning_rate": 9.86074368435992e-05, "loss": 0.3660192251205444, "step": 4910 }, { "epoch": 1.3965370422934997, "grad_norm": 13.121426582336426, "learning_rate": 9.860459835367584e-05, "loss": 0.36845395565032957, "step": 4920 }, { "epoch": 1.3993755322168606, "grad_norm": 17.843067169189453, "learning_rate": 9.860175986375249e-05, "loss": 0.4696702003479004, "step": 4930 }, { "epoch": 1.4022140221402215, "grad_norm": 18.1163272857666, "learning_rate": 9.859892137382913e-05, "loss": 0.43702106475830077, "step": 4940 }, { "epoch": 1.4050525120635822, "grad_norm": 12.804633140563965, "learning_rate": 9.859608288390577e-05, "loss": 0.3442823886871338, "step": 4950 }, { "epoch": 1.4078910019869428, "grad_norm": 17.787311553955078, "learning_rate": 9.859324439398241e-05, "loss": 0.41936607360839845, "step": 4960 }, { "epoch": 1.4107294919103037, "grad_norm": 15.412761688232422, "learning_rate": 9.859040590405905e-05, "loss": 0.3484956741333008, "step": 4970 }, { "epoch": 1.4135679818336646, "grad_norm": 16.862215042114258, "learning_rate": 9.858756741413568e-05, "loss": 0.3559828281402588, "step": 4980 }, { "epoch": 1.4164064717570253, "grad_norm": 20.44011878967285, "learning_rate": 9.858472892421232e-05, "loss": 0.48659963607788087, "step": 4990 }, { "epoch": 1.419244961680386, "grad_norm": 16.897275924682617, "learning_rate": 9.858189043428896e-05, "loss": 0.36725707054138185, "step": 5000 }, { "epoch": 1.419244961680386, "eval_accuracy": 0.8010427926495836, "eval_loss": 0.6082585453987122, "eval_runtime": 53.7844, "eval_samples_per_second": 292.408, "eval_steps_per_second": 4.574, "step": 5000 }, { "epoch": 1.4220834516037468, "grad_norm": 13.983293533325195, "learning_rate": 9.85790519443656e-05, "loss": 0.40871586799621584, "step": 5010 }, { "epoch": 1.4249219415271077, "grad_norm": 14.91701889038086, "learning_rate": 9.857621345444224e-05, "loss": 0.417681360244751, "step": 5020 }, { "epoch": 1.4277604314504684, "grad_norm": 12.531983375549316, "learning_rate": 9.857337496451889e-05, "loss": 0.33758106231689455, "step": 5030 }, { "epoch": 1.430598921373829, "grad_norm": 13.472724914550781, "learning_rate": 9.857053647459551e-05, "loss": 0.4251753330230713, "step": 5040 }, { "epoch": 1.43343741129719, "grad_norm": 12.599644660949707, "learning_rate": 9.856769798467216e-05, "loss": 0.40136184692382815, "step": 5050 }, { "epoch": 1.4362759012205506, "grad_norm": 13.55285930633545, "learning_rate": 9.85648594947488e-05, "loss": 0.331775426864624, "step": 5060 }, { "epoch": 1.4391143911439115, "grad_norm": 14.26955509185791, "learning_rate": 9.856202100482544e-05, "loss": 0.40736069679260256, "step": 5070 }, { "epoch": 1.4419528810672722, "grad_norm": 18.381608963012695, "learning_rate": 9.855918251490208e-05, "loss": 0.3950993061065674, "step": 5080 }, { "epoch": 1.444791370990633, "grad_norm": 22.655729293823242, "learning_rate": 9.855634402497872e-05, "loss": 0.41382884979248047, "step": 5090 }, { "epoch": 1.4476298609139937, "grad_norm": 15.472478866577148, "learning_rate": 9.855350553505536e-05, "loss": 0.43183231353759766, "step": 5100 }, { "epoch": 1.4504683508373546, "grad_norm": 13.48812198638916, "learning_rate": 9.855066704513199e-05, "loss": 0.3794731140136719, "step": 5110 }, { "epoch": 1.4533068407607153, "grad_norm": 18.082775115966797, "learning_rate": 9.854782855520863e-05, "loss": 0.3837474822998047, "step": 5120 }, { "epoch": 1.4561453306840761, "grad_norm": 14.654272079467773, "learning_rate": 9.854499006528527e-05, "loss": 0.4034879684448242, "step": 5130 }, { "epoch": 1.4589838206074368, "grad_norm": 22.6741886138916, "learning_rate": 9.85421515753619e-05, "loss": 0.40029330253601075, "step": 5140 }, { "epoch": 1.4618223105307977, "grad_norm": 15.116012573242188, "learning_rate": 9.853931308543856e-05, "loss": 0.3753565549850464, "step": 5150 }, { "epoch": 1.4646608004541584, "grad_norm": 15.378144264221191, "learning_rate": 9.85364745955152e-05, "loss": 0.39130725860595705, "step": 5160 }, { "epoch": 1.467499290377519, "grad_norm": 18.127573013305664, "learning_rate": 9.853363610559182e-05, "loss": 0.3862200975418091, "step": 5170 }, { "epoch": 1.47033778030088, "grad_norm": 17.513565063476562, "learning_rate": 9.853079761566847e-05, "loss": 0.3739815950393677, "step": 5180 }, { "epoch": 1.4731762702242408, "grad_norm": 15.361246109008789, "learning_rate": 9.852795912574511e-05, "loss": 0.35710279941558837, "step": 5190 }, { "epoch": 1.4760147601476015, "grad_norm": 12.10280990600586, "learning_rate": 9.852512063582175e-05, "loss": 0.35443720817565916, "step": 5200 }, { "epoch": 1.4788532500709621, "grad_norm": 26.650728225708008, "learning_rate": 9.852228214589838e-05, "loss": 0.3558326005935669, "step": 5210 }, { "epoch": 1.481691739994323, "grad_norm": 15.900015830993652, "learning_rate": 9.851944365597503e-05, "loss": 0.3787539958953857, "step": 5220 }, { "epoch": 1.484530229917684, "grad_norm": 22.03745460510254, "learning_rate": 9.851660516605167e-05, "loss": 0.4174639701843262, "step": 5230 }, { "epoch": 1.4873687198410446, "grad_norm": 15.448665618896484, "learning_rate": 9.85137666761283e-05, "loss": 0.3973209857940674, "step": 5240 }, { "epoch": 1.4902072097644052, "grad_norm": 16.67487907409668, "learning_rate": 9.851092818620494e-05, "loss": 0.40494956970214846, "step": 5250 }, { "epoch": 1.4930456996877661, "grad_norm": 17.907041549682617, "learning_rate": 9.850808969628158e-05, "loss": 0.3197688102722168, "step": 5260 }, { "epoch": 1.495884189611127, "grad_norm": 14.814518928527832, "learning_rate": 9.850525120635821e-05, "loss": 0.3876335144042969, "step": 5270 }, { "epoch": 1.4987226795344877, "grad_norm": 13.294283866882324, "learning_rate": 9.850241271643487e-05, "loss": 0.4116991996765137, "step": 5280 }, { "epoch": 1.5015611694578483, "grad_norm": 20.0823917388916, "learning_rate": 9.849957422651151e-05, "loss": 0.3615564346313477, "step": 5290 }, { "epoch": 1.5043996593812092, "grad_norm": 23.35344886779785, "learning_rate": 9.849673573658814e-05, "loss": 0.3687926769256592, "step": 5300 }, { "epoch": 1.5072381493045701, "grad_norm": 23.761474609375, "learning_rate": 9.849389724666478e-05, "loss": 0.34036128520965575, "step": 5310 }, { "epoch": 1.5100766392279308, "grad_norm": 23.982097625732422, "learning_rate": 9.849105875674142e-05, "loss": 0.3700068950653076, "step": 5320 }, { "epoch": 1.5129151291512914, "grad_norm": 17.33702278137207, "learning_rate": 9.848822026681806e-05, "loss": 0.3999584913253784, "step": 5330 }, { "epoch": 1.5157536190746523, "grad_norm": 8.948805809020996, "learning_rate": 9.848538177689469e-05, "loss": 0.3227642059326172, "step": 5340 }, { "epoch": 1.518592108998013, "grad_norm": 15.926279067993164, "learning_rate": 9.848254328697134e-05, "loss": 0.32281458377838135, "step": 5350 }, { "epoch": 1.521430598921374, "grad_norm": 10.821127891540527, "learning_rate": 9.847970479704798e-05, "loss": 0.3799780607223511, "step": 5360 }, { "epoch": 1.5242690888447346, "grad_norm": 14.134794235229492, "learning_rate": 9.847686630712461e-05, "loss": 0.3531552791595459, "step": 5370 }, { "epoch": 1.5271075787680952, "grad_norm": 13.056612014770508, "learning_rate": 9.847402781720125e-05, "loss": 0.36020984649658205, "step": 5380 }, { "epoch": 1.529946068691456, "grad_norm": 17.816654205322266, "learning_rate": 9.84711893272779e-05, "loss": 0.35541572570800783, "step": 5390 }, { "epoch": 1.532784558614817, "grad_norm": 14.351065635681152, "learning_rate": 9.846835083735452e-05, "loss": 0.34698355197906494, "step": 5400 }, { "epoch": 1.5356230485381777, "grad_norm": 12.12121295928955, "learning_rate": 9.846551234743116e-05, "loss": 0.3604514360427856, "step": 5410 }, { "epoch": 1.5384615384615383, "grad_norm": 17.574230194091797, "learning_rate": 9.846267385750782e-05, "loss": 0.3579800367355347, "step": 5420 }, { "epoch": 1.5413000283848992, "grad_norm": 13.679875373840332, "learning_rate": 9.845983536758445e-05, "loss": 0.36705615520477297, "step": 5430 }, { "epoch": 1.54413851830826, "grad_norm": 18.504926681518555, "learning_rate": 9.845699687766109e-05, "loss": 0.3654495716094971, "step": 5440 }, { "epoch": 1.5469770082316208, "grad_norm": 14.018847465515137, "learning_rate": 9.845415838773773e-05, "loss": 0.3356407642364502, "step": 5450 }, { "epoch": 1.5498154981549814, "grad_norm": 16.440414428710938, "learning_rate": 9.845131989781437e-05, "loss": 0.38534762859344485, "step": 5460 }, { "epoch": 1.5526539880783423, "grad_norm": 22.964309692382812, "learning_rate": 9.8448481407891e-05, "loss": 0.3433840274810791, "step": 5470 }, { "epoch": 1.5554924780017032, "grad_norm": 14.096144676208496, "learning_rate": 9.844564291796765e-05, "loss": 0.3266350507736206, "step": 5480 }, { "epoch": 1.5583309679250639, "grad_norm": 14.709188461303711, "learning_rate": 9.84428044280443e-05, "loss": 0.3251621723175049, "step": 5490 }, { "epoch": 1.5611694578484245, "grad_norm": 20.594057083129883, "learning_rate": 9.843996593812092e-05, "loss": 0.3514657497406006, "step": 5500 }, { "epoch": 1.5611694578484245, "eval_accuracy": 0.8146499650282953, "eval_loss": 0.5595000982284546, "eval_runtime": 38.2838, "eval_samples_per_second": 410.801, "eval_steps_per_second": 6.426, "step": 5500 }, { "epoch": 1.5640079477717854, "grad_norm": 14.448941230773926, "learning_rate": 9.843712744819756e-05, "loss": 0.35098283290863036, "step": 5510 }, { "epoch": 1.5668464376951463, "grad_norm": 19.287090301513672, "learning_rate": 9.84342889582742e-05, "loss": 0.4242234230041504, "step": 5520 }, { "epoch": 1.569684927618507, "grad_norm": 12.353058815002441, "learning_rate": 9.843145046835083e-05, "loss": 0.33080244064331055, "step": 5530 }, { "epoch": 1.5725234175418676, "grad_norm": 17.430191040039062, "learning_rate": 9.842861197842747e-05, "loss": 0.3384155035018921, "step": 5540 }, { "epoch": 1.5753619074652285, "grad_norm": 13.401595115661621, "learning_rate": 9.842577348850413e-05, "loss": 0.40207901000976565, "step": 5550 }, { "epoch": 1.5782003973885894, "grad_norm": 14.580215454101562, "learning_rate": 9.842293499858076e-05, "loss": 0.3236227512359619, "step": 5560 }, { "epoch": 1.58103888731195, "grad_norm": 19.61810302734375, "learning_rate": 9.84200965086574e-05, "loss": 0.35512096881866456, "step": 5570 }, { "epoch": 1.5838773772353107, "grad_norm": 14.905487060546875, "learning_rate": 9.841725801873404e-05, "loss": 0.36668665409088136, "step": 5580 }, { "epoch": 1.5867158671586716, "grad_norm": 20.765430450439453, "learning_rate": 9.841441952881068e-05, "loss": 0.32515730857849123, "step": 5590 }, { "epoch": 1.5895543570820325, "grad_norm": 17.95665740966797, "learning_rate": 9.841158103888731e-05, "loss": 0.35763490200042725, "step": 5600 }, { "epoch": 1.5923928470053932, "grad_norm": 13.318652153015137, "learning_rate": 9.840874254896396e-05, "loss": 0.31840684413909914, "step": 5610 }, { "epoch": 1.5952313369287539, "grad_norm": 19.703763961791992, "learning_rate": 9.84059040590406e-05, "loss": 0.37608768939971926, "step": 5620 }, { "epoch": 1.5980698268521145, "grad_norm": 15.146794319152832, "learning_rate": 9.840306556911723e-05, "loss": 0.3392350435256958, "step": 5630 }, { "epoch": 1.6009083167754754, "grad_norm": 14.886026382446289, "learning_rate": 9.840022707919387e-05, "loss": 0.32283697128295896, "step": 5640 }, { "epoch": 1.6037468066988363, "grad_norm": 15.784370422363281, "learning_rate": 9.839738858927052e-05, "loss": 0.30497045516967775, "step": 5650 }, { "epoch": 1.606585296622197, "grad_norm": 11.057462692260742, "learning_rate": 9.839455009934714e-05, "loss": 0.3651214838027954, "step": 5660 }, { "epoch": 1.6094237865455576, "grad_norm": 15.726938247680664, "learning_rate": 9.839171160942379e-05, "loss": 0.40771989822387694, "step": 5670 }, { "epoch": 1.6122622764689185, "grad_norm": 15.395030975341797, "learning_rate": 9.838887311950044e-05, "loss": 0.3829269647598267, "step": 5680 }, { "epoch": 1.6151007663922794, "grad_norm": 13.953539848327637, "learning_rate": 9.838603462957707e-05, "loss": 0.3378304004669189, "step": 5690 }, { "epoch": 1.61793925631564, "grad_norm": 12.328048706054688, "learning_rate": 9.838319613965371e-05, "loss": 0.3928564786911011, "step": 5700 }, { "epoch": 1.6207777462390007, "grad_norm": 25.60142707824707, "learning_rate": 9.838035764973035e-05, "loss": 0.3597573757171631, "step": 5710 }, { "epoch": 1.6236162361623616, "grad_norm": 15.16163444519043, "learning_rate": 9.837751915980699e-05, "loss": 0.3663243293762207, "step": 5720 }, { "epoch": 1.6264547260857225, "grad_norm": 20.300569534301758, "learning_rate": 9.837468066988362e-05, "loss": 0.41712703704833987, "step": 5730 }, { "epoch": 1.6292932160090832, "grad_norm": 15.666120529174805, "learning_rate": 9.837184217996026e-05, "loss": 0.3529141664505005, "step": 5740 }, { "epoch": 1.6321317059324438, "grad_norm": 17.87799644470215, "learning_rate": 9.83690036900369e-05, "loss": 0.37146754264831544, "step": 5750 }, { "epoch": 1.6349701958558047, "grad_norm": 16.82723045349121, "learning_rate": 9.836616520011354e-05, "loss": 0.3213114023208618, "step": 5760 }, { "epoch": 1.6378086857791656, "grad_norm": 19.079776763916016, "learning_rate": 9.836332671019019e-05, "loss": 0.3558051109313965, "step": 5770 }, { "epoch": 1.6406471757025263, "grad_norm": 16.492074966430664, "learning_rate": 9.836048822026683e-05, "loss": 0.3484348297119141, "step": 5780 }, { "epoch": 1.643485665625887, "grad_norm": 12.371981620788574, "learning_rate": 9.835764973034345e-05, "loss": 0.3237560987472534, "step": 5790 }, { "epoch": 1.6463241555492478, "grad_norm": 14.955903053283691, "learning_rate": 9.83548112404201e-05, "loss": 0.39213547706604, "step": 5800 }, { "epoch": 1.6491626454726087, "grad_norm": 17.922128677368164, "learning_rate": 9.835197275049675e-05, "loss": 0.41125311851501467, "step": 5810 }, { "epoch": 1.6520011353959694, "grad_norm": 13.147631645202637, "learning_rate": 9.834913426057338e-05, "loss": 0.3657277822494507, "step": 5820 }, { "epoch": 1.65483962531933, "grad_norm": 16.529460906982422, "learning_rate": 9.834629577065002e-05, "loss": 0.32744946479797366, "step": 5830 }, { "epoch": 1.657678115242691, "grad_norm": 10.893362998962402, "learning_rate": 9.834345728072666e-05, "loss": 0.3732975959777832, "step": 5840 }, { "epoch": 1.6605166051660518, "grad_norm": 20.3452091217041, "learning_rate": 9.834061879080329e-05, "loss": 0.3589163303375244, "step": 5850 }, { "epoch": 1.6633550950894125, "grad_norm": 18.298891067504883, "learning_rate": 9.833778030087993e-05, "loss": 0.33064637184143064, "step": 5860 }, { "epoch": 1.6661935850127731, "grad_norm": 16.244760513305664, "learning_rate": 9.833494181095657e-05, "loss": 0.3712292194366455, "step": 5870 }, { "epoch": 1.6690320749361338, "grad_norm": 10.698577880859375, "learning_rate": 9.833210332103321e-05, "loss": 0.391558837890625, "step": 5880 }, { "epoch": 1.6718705648594947, "grad_norm": 29.579830169677734, "learning_rate": 9.832926483110985e-05, "loss": 0.3333099126815796, "step": 5890 }, { "epoch": 1.6747090547828556, "grad_norm": 18.938533782958984, "learning_rate": 9.83264263411865e-05, "loss": 0.33517343997955323, "step": 5900 }, { "epoch": 1.6775475447062163, "grad_norm": 16.876991271972656, "learning_rate": 9.832358785126314e-05, "loss": 0.3522335052490234, "step": 5910 }, { "epoch": 1.680386034629577, "grad_norm": 14.537792205810547, "learning_rate": 9.832074936133977e-05, "loss": 0.3172943353652954, "step": 5920 }, { "epoch": 1.6832245245529378, "grad_norm": 17.584850311279297, "learning_rate": 9.831791087141641e-05, "loss": 0.27476043701171876, "step": 5930 }, { "epoch": 1.6860630144762987, "grad_norm": 14.82959270477295, "learning_rate": 9.831507238149305e-05, "loss": 0.3249894857406616, "step": 5940 }, { "epoch": 1.6889015043996594, "grad_norm": 15.364680290222168, "learning_rate": 9.831223389156969e-05, "loss": 0.2995280742645264, "step": 5950 }, { "epoch": 1.69173999432302, "grad_norm": 19.844539642333984, "learning_rate": 9.830939540164633e-05, "loss": 0.3330647945404053, "step": 5960 }, { "epoch": 1.694578484246381, "grad_norm": 16.168331146240234, "learning_rate": 9.830655691172297e-05, "loss": 0.2712660551071167, "step": 5970 }, { "epoch": 1.6974169741697418, "grad_norm": 20.28485107421875, "learning_rate": 9.83037184217996e-05, "loss": 0.40412187576293945, "step": 5980 }, { "epoch": 1.7002554640931025, "grad_norm": 10.756804466247559, "learning_rate": 9.830087993187624e-05, "loss": 0.2926353693008423, "step": 5990 }, { "epoch": 1.7030939540164631, "grad_norm": 17.3121395111084, "learning_rate": 9.829804144195288e-05, "loss": 0.3443186283111572, "step": 6000 }, { "epoch": 1.7030939540164631, "eval_accuracy": 0.8349971386787054, "eval_loss": 0.5117692351341248, "eval_runtime": 48.8366, "eval_samples_per_second": 322.033, "eval_steps_per_second": 5.037, "step": 6000 }, { "epoch": 1.705932443939824, "grad_norm": 12.070837020874023, "learning_rate": 9.829520295202952e-05, "loss": 0.353992223739624, "step": 6010 }, { "epoch": 1.708770933863185, "grad_norm": 10.661041259765625, "learning_rate": 9.829236446210617e-05, "loss": 0.3029832601547241, "step": 6020 }, { "epoch": 1.7116094237865456, "grad_norm": 17.887121200561523, "learning_rate": 9.828952597218281e-05, "loss": 0.30340917110443116, "step": 6030 }, { "epoch": 1.7144479137099062, "grad_norm": 13.913177490234375, "learning_rate": 9.828668748225945e-05, "loss": 0.3632274866104126, "step": 6040 }, { "epoch": 1.7172864036332671, "grad_norm": 17.006633758544922, "learning_rate": 9.828384899233608e-05, "loss": 0.3082206964492798, "step": 6050 }, { "epoch": 1.720124893556628, "grad_norm": 11.711881637573242, "learning_rate": 9.828101050241272e-05, "loss": 0.32248313426971437, "step": 6060 }, { "epoch": 1.7229633834799887, "grad_norm": 16.441007614135742, "learning_rate": 9.827817201248936e-05, "loss": 0.30312275886535645, "step": 6070 }, { "epoch": 1.7258018734033493, "grad_norm": 11.873711585998535, "learning_rate": 9.8275333522566e-05, "loss": 0.27329530715942385, "step": 6080 }, { "epoch": 1.7286403633267102, "grad_norm": 16.936992645263672, "learning_rate": 9.827249503264264e-05, "loss": 0.4037932395935059, "step": 6090 }, { "epoch": 1.7314788532500711, "grad_norm": 21.285125732421875, "learning_rate": 9.826965654271928e-05, "loss": 0.2814476013183594, "step": 6100 }, { "epoch": 1.7343173431734318, "grad_norm": 16.657012939453125, "learning_rate": 9.826681805279591e-05, "loss": 0.2976027011871338, "step": 6110 }, { "epoch": 1.7371558330967924, "grad_norm": 16.67571258544922, "learning_rate": 9.826397956287255e-05, "loss": 0.3533459186553955, "step": 6120 }, { "epoch": 1.7399943230201533, "grad_norm": 13.06078052520752, "learning_rate": 9.82611410729492e-05, "loss": 0.3323373556137085, "step": 6130 }, { "epoch": 1.742832812943514, "grad_norm": 11.427002906799316, "learning_rate": 9.825830258302584e-05, "loss": 0.2924748659133911, "step": 6140 }, { "epoch": 1.7456713028668749, "grad_norm": 22.251684188842773, "learning_rate": 9.825546409310248e-05, "loss": 0.3371902942657471, "step": 6150 }, { "epoch": 1.7485097927902356, "grad_norm": 18.60978126525879, "learning_rate": 9.825262560317912e-05, "loss": 0.3415785789489746, "step": 6160 }, { "epoch": 1.7513482827135962, "grad_norm": 16.729785919189453, "learning_rate": 9.824978711325576e-05, "loss": 0.3345015287399292, "step": 6170 }, { "epoch": 1.754186772636957, "grad_norm": 17.905529022216797, "learning_rate": 9.824694862333239e-05, "loss": 0.375708794593811, "step": 6180 }, { "epoch": 1.757025262560318, "grad_norm": 16.46019172668457, "learning_rate": 9.824411013340903e-05, "loss": 0.35086350440979003, "step": 6190 }, { "epoch": 1.7598637524836787, "grad_norm": 16.950963973999023, "learning_rate": 9.824127164348567e-05, "loss": 0.31397314071655275, "step": 6200 }, { "epoch": 1.7627022424070393, "grad_norm": 20.461437225341797, "learning_rate": 9.823843315356231e-05, "loss": 0.3586702823638916, "step": 6210 }, { "epoch": 1.7655407323304002, "grad_norm": 20.182470321655273, "learning_rate": 9.823559466363895e-05, "loss": 0.2895189762115479, "step": 6220 }, { "epoch": 1.768379222253761, "grad_norm": 21.47096061706543, "learning_rate": 9.82327561737156e-05, "loss": 0.35933992862701414, "step": 6230 }, { "epoch": 1.7712177121771218, "grad_norm": 16.219079971313477, "learning_rate": 9.822991768379222e-05, "loss": 0.29049115180969237, "step": 6240 }, { "epoch": 1.7740562021004824, "grad_norm": 11.221471786499023, "learning_rate": 9.822707919386886e-05, "loss": 0.2724747657775879, "step": 6250 }, { "epoch": 1.7768946920238433, "grad_norm": 10.871788024902344, "learning_rate": 9.82242407039455e-05, "loss": 0.2815844058990479, "step": 6260 }, { "epoch": 1.7797331819472042, "grad_norm": 12.997661590576172, "learning_rate": 9.822140221402215e-05, "loss": 0.3556910753250122, "step": 6270 }, { "epoch": 1.7825716718705649, "grad_norm": 22.975894927978516, "learning_rate": 9.821856372409879e-05, "loss": 0.3292320489883423, "step": 6280 }, { "epoch": 1.7854101617939255, "grad_norm": 16.16305923461914, "learning_rate": 9.821572523417543e-05, "loss": 0.2838417053222656, "step": 6290 }, { "epoch": 1.7882486517172864, "grad_norm": 11.255255699157715, "learning_rate": 9.821288674425207e-05, "loss": 0.3113535165786743, "step": 6300 }, { "epoch": 1.7910871416406473, "grad_norm": 18.050884246826172, "learning_rate": 9.82100482543287e-05, "loss": 0.3536433935165405, "step": 6310 }, { "epoch": 1.793925631564008, "grad_norm": 15.509233474731445, "learning_rate": 9.820720976440534e-05, "loss": 0.2790750741958618, "step": 6320 }, { "epoch": 1.7967641214873686, "grad_norm": 17.322568893432617, "learning_rate": 9.820437127448198e-05, "loss": 0.30753586292266843, "step": 6330 }, { "epoch": 1.7996026114107295, "grad_norm": 16.36469841003418, "learning_rate": 9.820153278455861e-05, "loss": 0.36039557456970217, "step": 6340 }, { "epoch": 1.8024411013340904, "grad_norm": 19.770418167114258, "learning_rate": 9.819869429463526e-05, "loss": 0.3671365261077881, "step": 6350 }, { "epoch": 1.805279591257451, "grad_norm": 12.352028846740723, "learning_rate": 9.81958558047119e-05, "loss": 0.31608612537384034, "step": 6360 }, { "epoch": 1.8081180811808117, "grad_norm": 9.862142562866211, "learning_rate": 9.819301731478853e-05, "loss": 0.33545398712158203, "step": 6370 }, { "epoch": 1.8109565711041726, "grad_norm": 13.603276252746582, "learning_rate": 9.819017882486517e-05, "loss": 0.3297145366668701, "step": 6380 }, { "epoch": 1.8137950610275335, "grad_norm": 16.01658058166504, "learning_rate": 9.818734033494182e-05, "loss": 0.3139737367630005, "step": 6390 }, { "epoch": 1.8166335509508942, "grad_norm": 9.831218719482422, "learning_rate": 9.818450184501846e-05, "loss": 0.29967679977416994, "step": 6400 }, { "epoch": 1.8194720408742548, "grad_norm": 17.045936584472656, "learning_rate": 9.81816633550951e-05, "loss": 0.3175974369049072, "step": 6410 }, { "epoch": 1.8223105307976155, "grad_norm": 17.890777587890625, "learning_rate": 9.817882486517174e-05, "loss": 0.355149245262146, "step": 6420 }, { "epoch": 1.8251490207209764, "grad_norm": 13.317343711853027, "learning_rate": 9.817598637524838e-05, "loss": 0.2842238187789917, "step": 6430 }, { "epoch": 1.8279875106443373, "grad_norm": 17.035802841186523, "learning_rate": 9.817314788532501e-05, "loss": 0.2768709659576416, "step": 6440 }, { "epoch": 1.830826000567698, "grad_norm": 16.847822189331055, "learning_rate": 9.817030939540165e-05, "loss": 0.2781175374984741, "step": 6450 }, { "epoch": 1.8336644904910586, "grad_norm": 15.062447547912598, "learning_rate": 9.816747090547829e-05, "loss": 0.3768578290939331, "step": 6460 }, { "epoch": 1.8365029804144195, "grad_norm": 12.556950569152832, "learning_rate": 9.816463241555492e-05, "loss": 0.3072884798049927, "step": 6470 }, { "epoch": 1.8393414703377804, "grad_norm": 13.883233070373535, "learning_rate": 9.816179392563157e-05, "loss": 0.26720499992370605, "step": 6480 }, { "epoch": 1.842179960261141, "grad_norm": 13.54625129699707, "learning_rate": 9.815895543570822e-05, "loss": 0.28811466693878174, "step": 6490 }, { "epoch": 1.8450184501845017, "grad_norm": 18.880754470825195, "learning_rate": 9.815611694578484e-05, "loss": 0.27132861614227294, "step": 6500 }, { "epoch": 1.8450184501845017, "eval_accuracy": 0.844026196986075, "eval_loss": 0.4758400022983551, "eval_runtime": 42.5516, "eval_samples_per_second": 369.598, "eval_steps_per_second": 5.781, "step": 6500 }, { "epoch": 1.8478569401078626, "grad_norm": 15.51987361907959, "learning_rate": 9.815327845586148e-05, "loss": 0.2886358261108398, "step": 6510 }, { "epoch": 1.8506954300312235, "grad_norm": 12.485743522644043, "learning_rate": 9.815043996593813e-05, "loss": 0.3087273597717285, "step": 6520 }, { "epoch": 1.8535339199545842, "grad_norm": 15.060705184936523, "learning_rate": 9.814760147601477e-05, "loss": 0.2822369813919067, "step": 6530 }, { "epoch": 1.8563724098779448, "grad_norm": 17.263790130615234, "learning_rate": 9.81447629860914e-05, "loss": 0.3083992004394531, "step": 6540 }, { "epoch": 1.8592108998013057, "grad_norm": 21.07904052734375, "learning_rate": 9.814192449616805e-05, "loss": 0.3222493648529053, "step": 6550 }, { "epoch": 1.8620493897246666, "grad_norm": 18.38567543029785, "learning_rate": 9.813908600624469e-05, "loss": 0.3827720880508423, "step": 6560 }, { "epoch": 1.8648878796480273, "grad_norm": 18.4997615814209, "learning_rate": 9.813624751632132e-05, "loss": 0.2893622875213623, "step": 6570 }, { "epoch": 1.867726369571388, "grad_norm": 18.927072525024414, "learning_rate": 9.813340902639796e-05, "loss": 0.3140169620513916, "step": 6580 }, { "epoch": 1.8705648594947488, "grad_norm": 15.130321502685547, "learning_rate": 9.81305705364746e-05, "loss": 0.29467949867248533, "step": 6590 }, { "epoch": 1.8734033494181097, "grad_norm": 22.18245506286621, "learning_rate": 9.812773204655123e-05, "loss": 0.299084210395813, "step": 6600 }, { "epoch": 1.8762418393414704, "grad_norm": 10.239355087280273, "learning_rate": 9.812489355662788e-05, "loss": 0.3077681303024292, "step": 6610 }, { "epoch": 1.879080329264831, "grad_norm": 20.043983459472656, "learning_rate": 9.812205506670453e-05, "loss": 0.2600071668624878, "step": 6620 }, { "epoch": 1.881918819188192, "grad_norm": 23.155086517333984, "learning_rate": 9.811921657678115e-05, "loss": 0.30149168968200685, "step": 6630 }, { "epoch": 1.8847573091115528, "grad_norm": 18.500080108642578, "learning_rate": 9.81163780868578e-05, "loss": 0.3076582670211792, "step": 6640 }, { "epoch": 1.8875957990349135, "grad_norm": 18.784940719604492, "learning_rate": 9.811353959693444e-05, "loss": 0.33830249309539795, "step": 6650 }, { "epoch": 1.8904342889582741, "grad_norm": 18.382831573486328, "learning_rate": 9.811070110701108e-05, "loss": 0.35420525074005127, "step": 6660 }, { "epoch": 1.8932727788816348, "grad_norm": 15.969094276428223, "learning_rate": 9.81078626170877e-05, "loss": 0.33379483222961426, "step": 6670 }, { "epoch": 1.8961112688049957, "grad_norm": 25.82126235961914, "learning_rate": 9.810502412716436e-05, "loss": 0.33286774158477783, "step": 6680 }, { "epoch": 1.8989497587283566, "grad_norm": 19.6887149810791, "learning_rate": 9.810218563724099e-05, "loss": 0.2990121603012085, "step": 6690 }, { "epoch": 1.9017882486517173, "grad_norm": 9.597956657409668, "learning_rate": 9.809934714731763e-05, "loss": 0.28246617317199707, "step": 6700 }, { "epoch": 1.904626738575078, "grad_norm": 13.332200050354004, "learning_rate": 9.809650865739427e-05, "loss": 0.27547593116760255, "step": 6710 }, { "epoch": 1.9074652284984388, "grad_norm": 12.529211044311523, "learning_rate": 9.809367016747091e-05, "loss": 0.27085394859313966, "step": 6720 }, { "epoch": 1.9103037184217997, "grad_norm": 18.7702693939209, "learning_rate": 9.809083167754754e-05, "loss": 0.33766510486602785, "step": 6730 }, { "epoch": 1.9131422083451604, "grad_norm": 11.348345756530762, "learning_rate": 9.808799318762418e-05, "loss": 0.2926100015640259, "step": 6740 }, { "epoch": 1.915980698268521, "grad_norm": 20.05299949645996, "learning_rate": 9.808515469770084e-05, "loss": 0.33159537315368653, "step": 6750 }, { "epoch": 1.918819188191882, "grad_norm": 16.290056228637695, "learning_rate": 9.808231620777746e-05, "loss": 0.36456098556518557, "step": 6760 }, { "epoch": 1.9216576781152428, "grad_norm": 11.45508098602295, "learning_rate": 9.80794777178541e-05, "loss": 0.2974071741104126, "step": 6770 }, { "epoch": 1.9244961680386035, "grad_norm": 13.075309753417969, "learning_rate": 9.807663922793075e-05, "loss": 0.2561248064041138, "step": 6780 }, { "epoch": 1.9273346579619641, "grad_norm": 19.12933349609375, "learning_rate": 9.807380073800738e-05, "loss": 0.3470123767852783, "step": 6790 }, { "epoch": 1.930173147885325, "grad_norm": 17.105192184448242, "learning_rate": 9.807096224808402e-05, "loss": 0.28379197120666505, "step": 6800 }, { "epoch": 1.933011637808686, "grad_norm": 12.469542503356934, "learning_rate": 9.806812375816067e-05, "loss": 0.26621673107147215, "step": 6810 }, { "epoch": 1.9358501277320466, "grad_norm": 20.196378707885742, "learning_rate": 9.80652852682373e-05, "loss": 0.27137002944946287, "step": 6820 }, { "epoch": 1.9386886176554072, "grad_norm": 12.58461856842041, "learning_rate": 9.806244677831394e-05, "loss": 0.301654052734375, "step": 6830 }, { "epoch": 1.9415271075787681, "grad_norm": 17.261821746826172, "learning_rate": 9.805960828839058e-05, "loss": 0.21995868682861328, "step": 6840 }, { "epoch": 1.944365597502129, "grad_norm": 11.840019226074219, "learning_rate": 9.805676979846722e-05, "loss": 0.2743107795715332, "step": 6850 }, { "epoch": 1.9472040874254897, "grad_norm": 13.75365924835205, "learning_rate": 9.805393130854385e-05, "loss": 0.29893038272857664, "step": 6860 }, { "epoch": 1.9500425773488503, "grad_norm": 12.187987327575684, "learning_rate": 9.805109281862049e-05, "loss": 0.24702579975128175, "step": 6870 }, { "epoch": 1.9528810672722112, "grad_norm": 9.265935897827148, "learning_rate": 9.804825432869715e-05, "loss": 0.27664918899536134, "step": 6880 }, { "epoch": 1.9557195571955721, "grad_norm": 11.776315689086914, "learning_rate": 9.804541583877378e-05, "loss": 0.288569974899292, "step": 6890 }, { "epoch": 1.9585580471189328, "grad_norm": 14.991866111755371, "learning_rate": 9.804257734885042e-05, "loss": 0.2685967206954956, "step": 6900 }, { "epoch": 1.9613965370422934, "grad_norm": 17.28834342956543, "learning_rate": 9.803973885892706e-05, "loss": 0.24035120010375977, "step": 6910 }, { "epoch": 1.964235026965654, "grad_norm": 13.664270401000977, "learning_rate": 9.803690036900369e-05, "loss": 0.30886015892028806, "step": 6920 }, { "epoch": 1.967073516889015, "grad_norm": 14.245565414428711, "learning_rate": 9.803406187908033e-05, "loss": 0.2999440670013428, "step": 6930 }, { "epoch": 1.9699120068123759, "grad_norm": 16.7731990814209, "learning_rate": 9.803122338915698e-05, "loss": 0.2886981010437012, "step": 6940 }, { "epoch": 1.9727504967357365, "grad_norm": 14.409491539001465, "learning_rate": 9.802838489923361e-05, "loss": 0.25193033218383787, "step": 6950 }, { "epoch": 1.9755889866590972, "grad_norm": 14.701410293579102, "learning_rate": 9.802554640931025e-05, "loss": 0.2685612440109253, "step": 6960 }, { "epoch": 1.978427476582458, "grad_norm": 9.440011024475098, "learning_rate": 9.802270791938689e-05, "loss": 0.2816462516784668, "step": 6970 }, { "epoch": 1.981265966505819, "grad_norm": 12.224149703979492, "learning_rate": 9.801986942946353e-05, "loss": 0.28878097534179686, "step": 6980 }, { "epoch": 1.9841044564291797, "grad_norm": 17.04814910888672, "learning_rate": 9.801703093954016e-05, "loss": 0.27490208148956297, "step": 6990 }, { "epoch": 1.9869429463525403, "grad_norm": 14.30703067779541, "learning_rate": 9.80141924496168e-05, "loss": 0.34579195976257326, "step": 7000 }, { "epoch": 1.9869429463525403, "eval_accuracy": 0.8470782730336365, "eval_loss": 0.4579145014286041, "eval_runtime": 39.7762, "eval_samples_per_second": 395.387, "eval_steps_per_second": 6.185, "step": 7000 }, { "epoch": 1.9897814362759012, "grad_norm": 17.47616958618164, "learning_rate": 9.801135395969346e-05, "loss": 0.27119882106781007, "step": 7010 }, { "epoch": 1.992619926199262, "grad_norm": 19.48821258544922, "learning_rate": 9.800851546977009e-05, "loss": 0.37435662746429443, "step": 7020 }, { "epoch": 1.9954584161226228, "grad_norm": 17.410888671875, "learning_rate": 9.800567697984673e-05, "loss": 0.27069568634033203, "step": 7030 }, { "epoch": 1.9982969060459834, "grad_norm": 19.001434326171875, "learning_rate": 9.800283848992337e-05, "loss": 0.2687077522277832, "step": 7040 }, { "epoch": 2.001135395969344, "grad_norm": 18.763477325439453, "learning_rate": 9.8e-05, "loss": 0.2568866014480591, "step": 7050 }, { "epoch": 2.003973885892705, "grad_norm": 15.480113983154297, "learning_rate": 9.799716151007664e-05, "loss": 0.25344271659851075, "step": 7060 }, { "epoch": 2.006812375816066, "grad_norm": 11.270167350769043, "learning_rate": 9.799432302015328e-05, "loss": 0.2638725280761719, "step": 7070 }, { "epoch": 2.0096508657394265, "grad_norm": 15.421369552612305, "learning_rate": 9.799148453022992e-05, "loss": 0.2819019556045532, "step": 7080 }, { "epoch": 2.012489355662787, "grad_norm": 16.260509490966797, "learning_rate": 9.798864604030656e-05, "loss": 0.2392103910446167, "step": 7090 }, { "epoch": 2.0153278455861483, "grad_norm": 15.831380844116211, "learning_rate": 9.79858075503832e-05, "loss": 0.3345108270645142, "step": 7100 }, { "epoch": 2.018166335509509, "grad_norm": 11.0287446975708, "learning_rate": 9.798296906045985e-05, "loss": 0.24355390071868896, "step": 7110 }, { "epoch": 2.0210048254328696, "grad_norm": 11.077299118041992, "learning_rate": 9.798013057053647e-05, "loss": 0.23610680103302, "step": 7120 }, { "epoch": 2.0238433153562303, "grad_norm": 15.252506256103516, "learning_rate": 9.797729208061311e-05, "loss": 0.26910417079925536, "step": 7130 }, { "epoch": 2.0266818052795914, "grad_norm": 15.534907341003418, "learning_rate": 9.797445359068977e-05, "loss": 0.26159300804138186, "step": 7140 }, { "epoch": 2.029520295202952, "grad_norm": 18.039833068847656, "learning_rate": 9.79716151007664e-05, "loss": 0.23557920455932618, "step": 7150 }, { "epoch": 2.0323587851263127, "grad_norm": 9.566669464111328, "learning_rate": 9.796877661084304e-05, "loss": 0.23839621543884276, "step": 7160 }, { "epoch": 2.0351972750496734, "grad_norm": 13.85372543334961, "learning_rate": 9.796593812091968e-05, "loss": 0.24598495960235595, "step": 7170 }, { "epoch": 2.0380357649730345, "grad_norm": 18.093406677246094, "learning_rate": 9.796309963099631e-05, "loss": 0.2451035499572754, "step": 7180 }, { "epoch": 2.040874254896395, "grad_norm": 13.53355884552002, "learning_rate": 9.796026114107295e-05, "loss": 0.2538771152496338, "step": 7190 }, { "epoch": 2.043712744819756, "grad_norm": 20.300521850585938, "learning_rate": 9.795742265114959e-05, "loss": 0.267975902557373, "step": 7200 }, { "epoch": 2.0465512347431165, "grad_norm": 10.4107666015625, "learning_rate": 9.795458416122623e-05, "loss": 0.31701467037200926, "step": 7210 }, { "epoch": 2.0493897246664776, "grad_norm": 13.979427337646484, "learning_rate": 9.795174567130287e-05, "loss": 0.2772643566131592, "step": 7220 }, { "epoch": 2.0522282145898383, "grad_norm": 12.8544921875, "learning_rate": 9.794890718137951e-05, "loss": 0.2398134708404541, "step": 7230 }, { "epoch": 2.055066704513199, "grad_norm": 9.887802124023438, "learning_rate": 9.794606869145616e-05, "loss": 0.23170738220214843, "step": 7240 }, { "epoch": 2.0579051944365596, "grad_norm": 21.729307174682617, "learning_rate": 9.794323020153278e-05, "loss": 0.2159505844116211, "step": 7250 }, { "epoch": 2.0607436843599207, "grad_norm": 17.41312599182129, "learning_rate": 9.794039171160943e-05, "loss": 0.31598806381225586, "step": 7260 }, { "epoch": 2.0635821742832814, "grad_norm": 13.736372947692871, "learning_rate": 9.793755322168607e-05, "loss": 0.2732905626296997, "step": 7270 }, { "epoch": 2.066420664206642, "grad_norm": 19.480266571044922, "learning_rate": 9.793471473176271e-05, "loss": 0.24543089866638185, "step": 7280 }, { "epoch": 2.0692591541300027, "grad_norm": 14.69006633758545, "learning_rate": 9.793187624183935e-05, "loss": 0.25444531440734863, "step": 7290 }, { "epoch": 2.072097644053364, "grad_norm": 20.38078498840332, "learning_rate": 9.792903775191599e-05, "loss": 0.28119847774505613, "step": 7300 }, { "epoch": 2.0749361339767245, "grad_norm": 12.320239067077637, "learning_rate": 9.792619926199262e-05, "loss": 0.22171955108642577, "step": 7310 }, { "epoch": 2.077774623900085, "grad_norm": 16.38958740234375, "learning_rate": 9.792336077206926e-05, "loss": 0.23989379405975342, "step": 7320 }, { "epoch": 2.080613113823446, "grad_norm": 12.612602233886719, "learning_rate": 9.79205222821459e-05, "loss": 0.2657534122467041, "step": 7330 }, { "epoch": 2.0834516037468065, "grad_norm": 15.754828453063965, "learning_rate": 9.791768379222254e-05, "loss": 0.24550879001617432, "step": 7340 }, { "epoch": 2.0862900936701676, "grad_norm": 9.174315452575684, "learning_rate": 9.791484530229918e-05, "loss": 0.21711139678955077, "step": 7350 }, { "epoch": 2.0891285835935283, "grad_norm": 21.177871704101562, "learning_rate": 9.791200681237583e-05, "loss": 0.2620918035507202, "step": 7360 }, { "epoch": 2.091967073516889, "grad_norm": 14.447072982788086, "learning_rate": 9.790916832245247e-05, "loss": 0.21041111946105956, "step": 7370 }, { "epoch": 2.0948055634402496, "grad_norm": 12.282468795776367, "learning_rate": 9.79063298325291e-05, "loss": 0.20335080623626708, "step": 7380 }, { "epoch": 2.0976440533636107, "grad_norm": 11.509307861328125, "learning_rate": 9.790349134260574e-05, "loss": 0.23172810077667236, "step": 7390 }, { "epoch": 2.1004825432869714, "grad_norm": 13.297411918640137, "learning_rate": 9.790065285268238e-05, "loss": 0.20893850326538085, "step": 7400 }, { "epoch": 2.103321033210332, "grad_norm": 12.008325576782227, "learning_rate": 9.789781436275902e-05, "loss": 0.23886916637420655, "step": 7410 }, { "epoch": 2.1061595231336927, "grad_norm": 15.531465530395508, "learning_rate": 9.789497587283566e-05, "loss": 0.24417381286621093, "step": 7420 }, { "epoch": 2.108998013057054, "grad_norm": 14.146934509277344, "learning_rate": 9.78921373829123e-05, "loss": 0.25829367637634276, "step": 7430 }, { "epoch": 2.1118365029804145, "grad_norm": 13.305428504943848, "learning_rate": 9.788929889298893e-05, "loss": 0.23140008449554444, "step": 7440 }, { "epoch": 2.114674992903775, "grad_norm": 14.72077751159668, "learning_rate": 9.788646040306557e-05, "loss": 0.2569812536239624, "step": 7450 }, { "epoch": 2.117513482827136, "grad_norm": 10.722192764282227, "learning_rate": 9.788362191314221e-05, "loss": 0.24325745105743407, "step": 7460 }, { "epoch": 2.120351972750497, "grad_norm": 12.012234687805176, "learning_rate": 9.788078342321885e-05, "loss": 0.24082765579223633, "step": 7470 }, { "epoch": 2.1231904626738576, "grad_norm": 9.280113220214844, "learning_rate": 9.78779449332955e-05, "loss": 0.2417231798171997, "step": 7480 }, { "epoch": 2.1260289525972182, "grad_norm": 19.433364868164062, "learning_rate": 9.787510644337214e-05, "loss": 0.2387477159500122, "step": 7490 }, { "epoch": 2.128867442520579, "grad_norm": 20.514432907104492, "learning_rate": 9.787226795344878e-05, "loss": 0.23306105136871338, "step": 7500 }, { "epoch": 2.128867442520579, "eval_accuracy": 0.8650092198130603, "eval_loss": 0.4215228259563446, "eval_runtime": 38.7033, "eval_samples_per_second": 406.348, "eval_steps_per_second": 6.356, "step": 7500 }, { "epoch": 2.13170593244394, "grad_norm": 7.6840901374816895, "learning_rate": 9.78694294635254e-05, "loss": 0.19927889108657837, "step": 7510 }, { "epoch": 2.1345444223673007, "grad_norm": 12.73912239074707, "learning_rate": 9.786659097360205e-05, "loss": 0.2823503017425537, "step": 7520 }, { "epoch": 2.1373829122906614, "grad_norm": 12.264430046081543, "learning_rate": 9.786375248367869e-05, "loss": 0.2558239698410034, "step": 7530 }, { "epoch": 2.140221402214022, "grad_norm": 15.635944366455078, "learning_rate": 9.786091399375533e-05, "loss": 0.2264336347579956, "step": 7540 }, { "epoch": 2.1430598921373827, "grad_norm": 14.154080390930176, "learning_rate": 9.785807550383197e-05, "loss": 0.2509979009628296, "step": 7550 }, { "epoch": 2.145898382060744, "grad_norm": 23.10524559020996, "learning_rate": 9.785523701390861e-05, "loss": 0.28000941276550295, "step": 7560 }, { "epoch": 2.1487368719841045, "grad_norm": 15.437457084655762, "learning_rate": 9.785239852398524e-05, "loss": 0.24493563175201416, "step": 7570 }, { "epoch": 2.151575361907465, "grad_norm": 9.363497734069824, "learning_rate": 9.784956003406188e-05, "loss": 0.19328728914260865, "step": 7580 }, { "epoch": 2.1544138518308262, "grad_norm": 9.874027252197266, "learning_rate": 9.784672154413852e-05, "loss": 0.20350320339202882, "step": 7590 }, { "epoch": 2.157252341754187, "grad_norm": 10.85040283203125, "learning_rate": 9.784388305421516e-05, "loss": 0.25958690643310545, "step": 7600 }, { "epoch": 2.1600908316775476, "grad_norm": 10.430965423583984, "learning_rate": 9.78410445642918e-05, "loss": 0.23683767318725585, "step": 7610 }, { "epoch": 2.1629293216009082, "grad_norm": 16.316247940063477, "learning_rate": 9.783820607436845e-05, "loss": 0.2220076084136963, "step": 7620 }, { "epoch": 2.165767811524269, "grad_norm": 10.482468605041504, "learning_rate": 9.783536758444507e-05, "loss": 0.222816801071167, "step": 7630 }, { "epoch": 2.16860630144763, "grad_norm": 15.458293914794922, "learning_rate": 9.783252909452172e-05, "loss": 0.27438170909881593, "step": 7640 }, { "epoch": 2.1714447913709907, "grad_norm": 11.545936584472656, "learning_rate": 9.782969060459836e-05, "loss": 0.2191019296646118, "step": 7650 }, { "epoch": 2.1742832812943513, "grad_norm": 19.762968063354492, "learning_rate": 9.7826852114675e-05, "loss": 0.26726441383361815, "step": 7660 }, { "epoch": 2.177121771217712, "grad_norm": 11.782361030578613, "learning_rate": 9.782401362475163e-05, "loss": 0.24666857719421387, "step": 7670 }, { "epoch": 2.179960261141073, "grad_norm": 16.626890182495117, "learning_rate": 9.782117513482828e-05, "loss": 0.23461694717407228, "step": 7680 }, { "epoch": 2.1827987510644338, "grad_norm": 21.103208541870117, "learning_rate": 9.781833664490492e-05, "loss": 0.26543688774108887, "step": 7690 }, { "epoch": 2.1856372409877944, "grad_norm": 18.049556732177734, "learning_rate": 9.781549815498155e-05, "loss": 0.24013686180114746, "step": 7700 }, { "epoch": 2.188475730911155, "grad_norm": 19.514453887939453, "learning_rate": 9.781265966505819e-05, "loss": 0.23417603969573975, "step": 7710 }, { "epoch": 2.191314220834516, "grad_norm": 15.171562194824219, "learning_rate": 9.780982117513483e-05, "loss": 0.22882506847381592, "step": 7720 }, { "epoch": 2.194152710757877, "grad_norm": 14.083772659301758, "learning_rate": 9.780698268521146e-05, "loss": 0.2550989627838135, "step": 7730 }, { "epoch": 2.1969912006812375, "grad_norm": 10.938915252685547, "learning_rate": 9.780414419528812e-05, "loss": 0.2208855152130127, "step": 7740 }, { "epoch": 2.199829690604598, "grad_norm": 13.666425704956055, "learning_rate": 9.780130570536476e-05, "loss": 0.2466364860534668, "step": 7750 }, { "epoch": 2.2026681805279593, "grad_norm": 19.512454986572266, "learning_rate": 9.779846721544139e-05, "loss": 0.22983946800231933, "step": 7760 }, { "epoch": 2.20550667045132, "grad_norm": 13.40416145324707, "learning_rate": 9.779562872551803e-05, "loss": 0.26017284393310547, "step": 7770 }, { "epoch": 2.2083451603746806, "grad_norm": 10.085061073303223, "learning_rate": 9.779279023559467e-05, "loss": 0.23039724826812744, "step": 7780 }, { "epoch": 2.2111836502980413, "grad_norm": 11.621413230895996, "learning_rate": 9.778995174567131e-05, "loss": 0.2539215564727783, "step": 7790 }, { "epoch": 2.2140221402214024, "grad_norm": 11.209738731384277, "learning_rate": 9.778711325574794e-05, "loss": 0.24424631595611573, "step": 7800 }, { "epoch": 2.216860630144763, "grad_norm": 12.942281723022461, "learning_rate": 9.778427476582459e-05, "loss": 0.26870782375335694, "step": 7810 }, { "epoch": 2.2196991200681238, "grad_norm": 11.966894149780273, "learning_rate": 9.778143627590123e-05, "loss": 0.24018533229827882, "step": 7820 }, { "epoch": 2.2225376099914844, "grad_norm": 14.007256507873535, "learning_rate": 9.777859778597786e-05, "loss": 0.2118708610534668, "step": 7830 }, { "epoch": 2.225376099914845, "grad_norm": 17.311471939086914, "learning_rate": 9.77757592960545e-05, "loss": 0.27283282279968263, "step": 7840 }, { "epoch": 2.228214589838206, "grad_norm": 11.731700897216797, "learning_rate": 9.777292080613114e-05, "loss": 0.26551094055175783, "step": 7850 }, { "epoch": 2.231053079761567, "grad_norm": 14.787330627441406, "learning_rate": 9.777008231620777e-05, "loss": 0.21845736503601074, "step": 7860 }, { "epoch": 2.2338915696849275, "grad_norm": 12.507437705993652, "learning_rate": 9.776724382628441e-05, "loss": 0.27095375061035154, "step": 7870 }, { "epoch": 2.236730059608288, "grad_norm": 9.725112915039062, "learning_rate": 9.776440533636107e-05, "loss": 0.21241676807403564, "step": 7880 }, { "epoch": 2.2395685495316493, "grad_norm": 10.974902153015137, "learning_rate": 9.77615668464377e-05, "loss": 0.2869737148284912, "step": 7890 }, { "epoch": 2.24240703945501, "grad_norm": 13.41285228729248, "learning_rate": 9.775872835651434e-05, "loss": 0.23968496322631835, "step": 7900 }, { "epoch": 2.2452455293783706, "grad_norm": 17.28588104248047, "learning_rate": 9.775588986659098e-05, "loss": 0.25711774826049805, "step": 7910 }, { "epoch": 2.2480840193017313, "grad_norm": 17.951156616210938, "learning_rate": 9.775305137666762e-05, "loss": 0.20355377197265626, "step": 7920 }, { "epoch": 2.2509225092250924, "grad_norm": 13.378880500793457, "learning_rate": 9.775021288674425e-05, "loss": 0.1990459680557251, "step": 7930 }, { "epoch": 2.253760999148453, "grad_norm": 18.869457244873047, "learning_rate": 9.77473743968209e-05, "loss": 0.22361304759979247, "step": 7940 }, { "epoch": 2.2565994890718137, "grad_norm": 15.550497055053711, "learning_rate": 9.774453590689754e-05, "loss": 0.27419803142547605, "step": 7950 }, { "epoch": 2.2594379789951744, "grad_norm": 13.781098365783691, "learning_rate": 9.774169741697417e-05, "loss": 0.2384237289428711, "step": 7960 }, { "epoch": 2.2622764689185355, "grad_norm": 14.52133846282959, "learning_rate": 9.773885892705081e-05, "loss": 0.21089982986450195, "step": 7970 }, { "epoch": 2.265114958841896, "grad_norm": 14.344047546386719, "learning_rate": 9.773602043712745e-05, "loss": 0.20667712688446044, "step": 7980 }, { "epoch": 2.267953448765257, "grad_norm": 10.374445915222168, "learning_rate": 9.773318194720408e-05, "loss": 0.2222202777862549, "step": 7990 }, { "epoch": 2.2707919386886175, "grad_norm": 13.58680534362793, "learning_rate": 9.773034345728072e-05, "loss": 0.19959030151367188, "step": 8000 }, { "epoch": 2.2707919386886175, "eval_accuracy": 0.8555350670820882, "eval_loss": 0.4406719207763672, "eval_runtime": 39.4859, "eval_samples_per_second": 398.294, "eval_steps_per_second": 6.23, "step": 8000 }, { "epoch": 2.2736304286119786, "grad_norm": 12.456775665283203, "learning_rate": 9.772750496735738e-05, "loss": 0.26481449604034424, "step": 8010 }, { "epoch": 2.2764689185353393, "grad_norm": 14.801284790039062, "learning_rate": 9.772466647743401e-05, "loss": 0.2251711368560791, "step": 8020 }, { "epoch": 2.2793074084587, "grad_norm": 9.664295196533203, "learning_rate": 9.772182798751065e-05, "loss": 0.2328707218170166, "step": 8030 }, { "epoch": 2.2821458983820606, "grad_norm": 14.9539213180542, "learning_rate": 9.771898949758729e-05, "loss": 0.22372379302978515, "step": 8040 }, { "epoch": 2.2849843883054213, "grad_norm": 10.81075382232666, "learning_rate": 9.771615100766393e-05, "loss": 0.20373163223266602, "step": 8050 }, { "epoch": 2.2878228782287824, "grad_norm": 15.880054473876953, "learning_rate": 9.771331251774056e-05, "loss": 0.2619627952575684, "step": 8060 }, { "epoch": 2.290661368152143, "grad_norm": 15.887392044067383, "learning_rate": 9.77104740278172e-05, "loss": 0.2041487693786621, "step": 8070 }, { "epoch": 2.2934998580755037, "grad_norm": 15.453225135803223, "learning_rate": 9.770763553789386e-05, "loss": 0.2081437587738037, "step": 8080 }, { "epoch": 2.296338347998865, "grad_norm": 12.111998558044434, "learning_rate": 9.770479704797048e-05, "loss": 0.22728829383850097, "step": 8090 }, { "epoch": 2.2991768379222255, "grad_norm": 18.026554107666016, "learning_rate": 9.770195855804712e-05, "loss": 0.2752798795700073, "step": 8100 }, { "epoch": 2.302015327845586, "grad_norm": 11.880863189697266, "learning_rate": 9.769912006812377e-05, "loss": 0.23376755714416503, "step": 8110 }, { "epoch": 2.304853817768947, "grad_norm": 11.634269714355469, "learning_rate": 9.76962815782004e-05, "loss": 0.19113626480102539, "step": 8120 }, { "epoch": 2.3076923076923075, "grad_norm": 12.41260051727295, "learning_rate": 9.769344308827703e-05, "loss": 0.20344212055206298, "step": 8130 }, { "epoch": 2.3105307976156686, "grad_norm": 15.14867877960205, "learning_rate": 9.769060459835369e-05, "loss": 0.2156879186630249, "step": 8140 }, { "epoch": 2.3133692875390293, "grad_norm": 11.39120864868164, "learning_rate": 9.768776610843032e-05, "loss": 0.2262671947479248, "step": 8150 }, { "epoch": 2.31620777746239, "grad_norm": 11.25550365447998, "learning_rate": 9.768492761850696e-05, "loss": 0.24672155380249022, "step": 8160 }, { "epoch": 2.3190462673857506, "grad_norm": 16.816112518310547, "learning_rate": 9.76820891285836e-05, "loss": 0.2738484382629395, "step": 8170 }, { "epoch": 2.3218847573091117, "grad_norm": 12.357087135314941, "learning_rate": 9.767925063866024e-05, "loss": 0.22211170196533203, "step": 8180 }, { "epoch": 2.3247232472324724, "grad_norm": 13.130645751953125, "learning_rate": 9.767641214873687e-05, "loss": 0.18933194875717163, "step": 8190 }, { "epoch": 2.327561737155833, "grad_norm": 14.255372047424316, "learning_rate": 9.767357365881351e-05, "loss": 0.24075715541839598, "step": 8200 }, { "epoch": 2.3304002270791937, "grad_norm": 18.747760772705078, "learning_rate": 9.767073516889017e-05, "loss": 0.26655423641204834, "step": 8210 }, { "epoch": 2.333238717002555, "grad_norm": 11.621094703674316, "learning_rate": 9.76678966789668e-05, "loss": 0.22652344703674315, "step": 8220 }, { "epoch": 2.3360772069259155, "grad_norm": 13.443135261535645, "learning_rate": 9.766505818904344e-05, "loss": 0.22921421527862548, "step": 8230 }, { "epoch": 2.338915696849276, "grad_norm": 14.189286231994629, "learning_rate": 9.766221969912008e-05, "loss": 0.2856487512588501, "step": 8240 }, { "epoch": 2.341754186772637, "grad_norm": 14.915955543518066, "learning_rate": 9.76593812091967e-05, "loss": 0.21768798828125, "step": 8250 }, { "epoch": 2.344592676695998, "grad_norm": 16.286808013916016, "learning_rate": 9.765654271927335e-05, "loss": 0.24037768840789794, "step": 8260 }, { "epoch": 2.3474311666193586, "grad_norm": 14.658313751220703, "learning_rate": 9.765370422935e-05, "loss": 0.2403259515762329, "step": 8270 }, { "epoch": 2.3502696565427192, "grad_norm": 12.88731575012207, "learning_rate": 9.765086573942663e-05, "loss": 0.21351354122161864, "step": 8280 }, { "epoch": 2.35310814646608, "grad_norm": 5.670812129974365, "learning_rate": 9.764802724950327e-05, "loss": 0.21820731163024903, "step": 8290 }, { "epoch": 2.355946636389441, "grad_norm": 12.147333145141602, "learning_rate": 9.764518875957991e-05, "loss": 0.23964507579803468, "step": 8300 }, { "epoch": 2.3587851263128017, "grad_norm": Infinity, "learning_rate": 9.764235026965655e-05, "loss": 0.2964792251586914, "step": 8310 }, { "epoch": 2.3616236162361623, "grad_norm": 12.759358406066895, "learning_rate": 9.763979562872553e-05, "loss": 0.2513108253479004, "step": 8320 }, { "epoch": 2.364462106159523, "grad_norm": 13.311692237854004, "learning_rate": 9.763695713880216e-05, "loss": 0.18245779275894164, "step": 8330 }, { "epoch": 2.3673005960828837, "grad_norm": 14.516600608825684, "learning_rate": 9.76341186488788e-05, "loss": 0.1909894347190857, "step": 8340 }, { "epoch": 2.370139086006245, "grad_norm": 12.185073852539062, "learning_rate": 9.763128015895544e-05, "loss": 0.18535197973251344, "step": 8350 }, { "epoch": 2.3729775759296055, "grad_norm": 14.372466087341309, "learning_rate": 9.762844166903208e-05, "loss": 0.2473667860031128, "step": 8360 }, { "epoch": 2.375816065852966, "grad_norm": 11.368657112121582, "learning_rate": 9.762560317910871e-05, "loss": 0.20681235790252686, "step": 8370 }, { "epoch": 2.3786545557763272, "grad_norm": 8.702242851257324, "learning_rate": 9.762276468918535e-05, "loss": 0.2009157657623291, "step": 8380 }, { "epoch": 2.381493045699688, "grad_norm": 10.98477554321289, "learning_rate": 9.7619926199262e-05, "loss": 0.22559032440185547, "step": 8390 }, { "epoch": 2.3843315356230486, "grad_norm": 8.243904113769531, "learning_rate": 9.761708770933864e-05, "loss": 0.22894697189331054, "step": 8400 }, { "epoch": 2.3871700255464092, "grad_norm": 8.544402122497559, "learning_rate": 9.761424921941528e-05, "loss": 0.22806932926177978, "step": 8410 }, { "epoch": 2.39000851546977, "grad_norm": 9.876009941101074, "learning_rate": 9.761141072949192e-05, "loss": 0.18935091495513917, "step": 8420 }, { "epoch": 2.392847005393131, "grad_norm": 15.732294082641602, "learning_rate": 9.760857223956855e-05, "loss": 0.2554239988327026, "step": 8430 }, { "epoch": 2.3956854953164917, "grad_norm": 13.344656944274902, "learning_rate": 9.760573374964519e-05, "loss": 0.19380322694778443, "step": 8440 }, { "epoch": 2.3985239852398523, "grad_norm": 11.638999938964844, "learning_rate": 9.760289525972183e-05, "loss": 0.2317817449569702, "step": 8450 }, { "epoch": 2.401362475163213, "grad_norm": 15.169660568237305, "learning_rate": 9.760005676979847e-05, "loss": 0.23746285438537598, "step": 8460 }, { "epoch": 2.404200965086574, "grad_norm": 13.893798828125, "learning_rate": 9.759721827987511e-05, "loss": 0.26475510597229, "step": 8470 }, { "epoch": 2.4070394550099348, "grad_norm": 11.058196067810059, "learning_rate": 9.759437978995175e-05, "loss": 0.2457582473754883, "step": 8480 }, { "epoch": 2.4098779449332954, "grad_norm": 18.173795700073242, "learning_rate": 9.75915413000284e-05, "loss": 0.28463115692138674, "step": 8490 }, { "epoch": 2.412716434856656, "grad_norm": 12.505970001220703, "learning_rate": 9.758870281010502e-05, "loss": 0.2565460681915283, "step": 8500 }, { "epoch": 2.412716434856656, "eval_accuracy": 0.8838303554396897, "eval_loss": 0.3519446849822998, "eval_runtime": 40.9569, "eval_samples_per_second": 383.989, "eval_steps_per_second": 6.006, "step": 8500 }, { "epoch": 2.415554924780017, "grad_norm": 16.984891891479492, "learning_rate": 9.758586432018166e-05, "loss": 0.2134021997451782, "step": 8510 }, { "epoch": 2.418393414703378, "grad_norm": 17.70616912841797, "learning_rate": 9.75830258302583e-05, "loss": 0.25598580837249757, "step": 8520 }, { "epoch": 2.4212319046267385, "grad_norm": 12.828035354614258, "learning_rate": 9.758018734033495e-05, "loss": 0.2354804515838623, "step": 8530 }, { "epoch": 2.424070394550099, "grad_norm": 15.933858871459961, "learning_rate": 9.757734885041159e-05, "loss": 0.29547131061553955, "step": 8540 }, { "epoch": 2.42690888447346, "grad_norm": 11.308984756469727, "learning_rate": 9.757451036048823e-05, "loss": 0.200403094291687, "step": 8550 }, { "epoch": 2.429747374396821, "grad_norm": 11.95490550994873, "learning_rate": 9.757167187056486e-05, "loss": 0.23439064025878906, "step": 8560 }, { "epoch": 2.4325858643201816, "grad_norm": 11.638036727905273, "learning_rate": 9.75688333806415e-05, "loss": 0.19709938764572144, "step": 8570 }, { "epoch": 2.4354243542435423, "grad_norm": 8.963068962097168, "learning_rate": 9.756599489071814e-05, "loss": 0.21404340267181396, "step": 8580 }, { "epoch": 2.4382628441669034, "grad_norm": 12.34644889831543, "learning_rate": 9.756315640079478e-05, "loss": 0.22099318504333496, "step": 8590 }, { "epoch": 2.441101334090264, "grad_norm": 12.76222038269043, "learning_rate": 9.756031791087142e-05, "loss": 0.1781737446784973, "step": 8600 }, { "epoch": 2.4439398240136248, "grad_norm": 14.397987365722656, "learning_rate": 9.755747942094806e-05, "loss": 0.21575732231140138, "step": 8610 }, { "epoch": 2.4467783139369854, "grad_norm": 13.062322616577148, "learning_rate": 9.755464093102469e-05, "loss": 0.18897591829299926, "step": 8620 }, { "epoch": 2.449616803860346, "grad_norm": 8.625822067260742, "learning_rate": 9.755180244110133e-05, "loss": 0.23920953273773193, "step": 8630 }, { "epoch": 2.452455293783707, "grad_norm": 12.505965232849121, "learning_rate": 9.754896395117798e-05, "loss": 0.2033005714416504, "step": 8640 }, { "epoch": 2.455293783707068, "grad_norm": 7.27635383605957, "learning_rate": 9.754612546125462e-05, "loss": 0.22252812385559081, "step": 8650 }, { "epoch": 2.4581322736304285, "grad_norm": 10.103968620300293, "learning_rate": 9.754328697133126e-05, "loss": 0.18875436782836913, "step": 8660 }, { "epoch": 2.4609707635537896, "grad_norm": 19.710289001464844, "learning_rate": 9.75404484814079e-05, "loss": 0.2187868356704712, "step": 8670 }, { "epoch": 2.4638092534771503, "grad_norm": 13.987964630126953, "learning_rate": 9.753760999148454e-05, "loss": 0.22286603450775147, "step": 8680 }, { "epoch": 2.466647743400511, "grad_norm": 13.517807960510254, "learning_rate": 9.753477150156117e-05, "loss": 0.20452170372009276, "step": 8690 }, { "epoch": 2.4694862333238716, "grad_norm": 7.977642059326172, "learning_rate": 9.753193301163781e-05, "loss": 0.20191783905029298, "step": 8700 }, { "epoch": 2.4723247232472323, "grad_norm": 12.540297508239746, "learning_rate": 9.752909452171445e-05, "loss": 0.24068069458007812, "step": 8710 }, { "epoch": 2.4751632131705934, "grad_norm": 4.675232410430908, "learning_rate": 9.752625603179109e-05, "loss": 0.23041927814483643, "step": 8720 }, { "epoch": 2.478001703093954, "grad_norm": 14.311765670776367, "learning_rate": 9.752341754186773e-05, "loss": 0.16479647159576416, "step": 8730 }, { "epoch": 2.4808401930173147, "grad_norm": 10.065478324890137, "learning_rate": 9.752057905194438e-05, "loss": 0.22471683025360106, "step": 8740 }, { "epoch": 2.4836786829406754, "grad_norm": 16.823122024536133, "learning_rate": 9.7517740562021e-05, "loss": 0.21297831535339357, "step": 8750 }, { "epoch": 2.4865171728640365, "grad_norm": 16.649362564086914, "learning_rate": 9.751490207209764e-05, "loss": 0.20448734760284423, "step": 8760 }, { "epoch": 2.489355662787397, "grad_norm": 14.677597999572754, "learning_rate": 9.751206358217429e-05, "loss": 0.2186464786529541, "step": 8770 }, { "epoch": 2.492194152710758, "grad_norm": 12.095709800720215, "learning_rate": 9.750922509225093e-05, "loss": 0.20416460037231446, "step": 8780 }, { "epoch": 2.4950326426341185, "grad_norm": 11.569559097290039, "learning_rate": 9.750638660232757e-05, "loss": 0.22008938789367677, "step": 8790 }, { "epoch": 2.4978711325574796, "grad_norm": 22.211475372314453, "learning_rate": 9.750354811240421e-05, "loss": 0.1953047752380371, "step": 8800 }, { "epoch": 2.5007096224808403, "grad_norm": 16.14258575439453, "learning_rate": 9.750070962248085e-05, "loss": 0.23989310264587402, "step": 8810 }, { "epoch": 2.503548112404201, "grad_norm": 9.801765441894531, "learning_rate": 9.749787113255748e-05, "loss": 0.21445956230163574, "step": 8820 }, { "epoch": 2.5063866023275616, "grad_norm": 11.799942970275879, "learning_rate": 9.749503264263412e-05, "loss": 0.24443621635437013, "step": 8830 }, { "epoch": 2.5092250922509223, "grad_norm": 16.704349517822266, "learning_rate": 9.749219415271076e-05, "loss": 0.21032731533050536, "step": 8840 }, { "epoch": 2.5120635821742834, "grad_norm": 12.347841262817383, "learning_rate": 9.74893556627874e-05, "loss": 0.22559504508972167, "step": 8850 }, { "epoch": 2.514902072097644, "grad_norm": 13.337400436401367, "learning_rate": 9.748651717286404e-05, "loss": 0.21833329200744628, "step": 8860 }, { "epoch": 2.5177405620210047, "grad_norm": 9.589932441711426, "learning_rate": 9.748367868294069e-05, "loss": 0.20717840194702147, "step": 8870 }, { "epoch": 2.520579051944366, "grad_norm": 20.871400833129883, "learning_rate": 9.748084019301731e-05, "loss": 0.1868640661239624, "step": 8880 }, { "epoch": 2.5234175418677265, "grad_norm": 9.912771224975586, "learning_rate": 9.747800170309396e-05, "loss": 0.20855891704559326, "step": 8890 }, { "epoch": 2.526256031791087, "grad_norm": 18.569868087768555, "learning_rate": 9.74751632131706e-05, "loss": 0.24780693054199218, "step": 8900 }, { "epoch": 2.529094521714448, "grad_norm": 13.907896995544434, "learning_rate": 9.747232472324724e-05, "loss": 0.1933709502220154, "step": 8910 }, { "epoch": 2.5319330116378085, "grad_norm": 16.193132400512695, "learning_rate": 9.746948623332388e-05, "loss": 0.23488900661468506, "step": 8920 }, { "epoch": 2.5347715015611696, "grad_norm": 12.96514892578125, "learning_rate": 9.746664774340052e-05, "loss": 0.2596521615982056, "step": 8930 }, { "epoch": 2.5376099914845303, "grad_norm": 14.07423210144043, "learning_rate": 9.746380925347716e-05, "loss": 0.2639884948730469, "step": 8940 }, { "epoch": 2.540448481407891, "grad_norm": 10.30260944366455, "learning_rate": 9.746097076355379e-05, "loss": 0.22712502479553223, "step": 8950 }, { "epoch": 2.543286971331252, "grad_norm": 13.019634246826172, "learning_rate": 9.745813227363043e-05, "loss": 0.15905182361602782, "step": 8960 }, { "epoch": 2.5461254612546127, "grad_norm": 14.944547653198242, "learning_rate": 9.745529378370707e-05, "loss": 0.2534718751907349, "step": 8970 }, { "epoch": 2.5489639511779734, "grad_norm": 19.794174194335938, "learning_rate": 9.74524552937837e-05, "loss": 0.21436522006988526, "step": 8980 }, { "epoch": 2.551802441101334, "grad_norm": 14.711974143981934, "learning_rate": 9.744961680386036e-05, "loss": 0.2270066261291504, "step": 8990 }, { "epoch": 2.5546409310246947, "grad_norm": 14.938990592956543, "learning_rate": 9.7446778313937e-05, "loss": 0.1977144479751587, "step": 9000 }, { "epoch": 2.5546409310246947, "eval_accuracy": 0.8822407324982514, "eval_loss": 0.353567510843277, "eval_runtime": 43.9054, "eval_samples_per_second": 358.202, "eval_steps_per_second": 5.603, "step": 9000 }, { "epoch": 2.557479420948056, "grad_norm": 17.573787689208984, "learning_rate": 9.744393982401362e-05, "loss": 0.17893621921539307, "step": 9010 }, { "epoch": 2.5603179108714165, "grad_norm": 10.35443115234375, "learning_rate": 9.744110133409027e-05, "loss": 0.1904844284057617, "step": 9020 }, { "epoch": 2.563156400794777, "grad_norm": 14.9873628616333, "learning_rate": 9.743826284416691e-05, "loss": 0.19945565462112427, "step": 9030 }, { "epoch": 2.565994890718138, "grad_norm": 10.525725364685059, "learning_rate": 9.743542435424355e-05, "loss": 0.2249751567840576, "step": 9040 }, { "epoch": 2.5688333806414985, "grad_norm": 19.867877960205078, "learning_rate": 9.743258586432019e-05, "loss": 0.20047574043273925, "step": 9050 }, { "epoch": 2.5716718705648596, "grad_norm": 8.384441375732422, "learning_rate": 9.742974737439683e-05, "loss": 0.23378121852874756, "step": 9060 }, { "epoch": 2.5745103604882202, "grad_norm": 9.303631782531738, "learning_rate": 9.742690888447347e-05, "loss": 0.17323017120361328, "step": 9070 }, { "epoch": 2.577348850411581, "grad_norm": 16.218801498413086, "learning_rate": 9.74240703945501e-05, "loss": 0.24482152462005616, "step": 9080 }, { "epoch": 2.580187340334942, "grad_norm": 15.998780250549316, "learning_rate": 9.742123190462674e-05, "loss": 0.21995923519134522, "step": 9090 }, { "epoch": 2.5830258302583027, "grad_norm": 14.225312232971191, "learning_rate": 9.741839341470338e-05, "loss": 0.23223438262939453, "step": 9100 }, { "epoch": 2.5858643201816633, "grad_norm": 10.478775024414062, "learning_rate": 9.741555492478001e-05, "loss": 0.19520981311798097, "step": 9110 }, { "epoch": 2.588702810105024, "grad_norm": 21.042734146118164, "learning_rate": 9.741271643485667e-05, "loss": 0.1689271569252014, "step": 9120 }, { "epoch": 2.5915413000283847, "grad_norm": 15.272537231445312, "learning_rate": 9.740987794493331e-05, "loss": 0.22515742778778075, "step": 9130 }, { "epoch": 2.594379789951746, "grad_norm": 13.931267738342285, "learning_rate": 9.740703945500994e-05, "loss": 0.1854565978050232, "step": 9140 }, { "epoch": 2.5972182798751065, "grad_norm": 8.049050331115723, "learning_rate": 9.740420096508658e-05, "loss": 0.23579180240631104, "step": 9150 }, { "epoch": 2.600056769798467, "grad_norm": 6.980400562286377, "learning_rate": 9.740136247516322e-05, "loss": 0.2065615177154541, "step": 9160 }, { "epoch": 2.6028952597218282, "grad_norm": 12.80195426940918, "learning_rate": 9.739852398523986e-05, "loss": 0.22150819301605223, "step": 9170 }, { "epoch": 2.605733749645189, "grad_norm": 14.673661231994629, "learning_rate": 9.739568549531649e-05, "loss": 0.17861660718917846, "step": 9180 }, { "epoch": 2.6085722395685496, "grad_norm": 11.372749328613281, "learning_rate": 9.739284700539314e-05, "loss": 0.2051511287689209, "step": 9190 }, { "epoch": 2.61141072949191, "grad_norm": 10.73395824432373, "learning_rate": 9.739000851546978e-05, "loss": 0.21081225872039794, "step": 9200 }, { "epoch": 2.614249219415271, "grad_norm": 15.322111129760742, "learning_rate": 9.738717002554641e-05, "loss": 0.288724684715271, "step": 9210 }, { "epoch": 2.617087709338632, "grad_norm": 14.391902923583984, "learning_rate": 9.738433153562305e-05, "loss": 0.21633071899414064, "step": 9220 }, { "epoch": 2.6199261992619927, "grad_norm": 16.249467849731445, "learning_rate": 9.73814930456997e-05, "loss": 0.21728301048278809, "step": 9230 }, { "epoch": 2.6227646891853533, "grad_norm": 11.481300354003906, "learning_rate": 9.737865455577632e-05, "loss": 0.195639967918396, "step": 9240 }, { "epoch": 2.6256031791087144, "grad_norm": 15.505470275878906, "learning_rate": 9.737581606585298e-05, "loss": 0.19282753467559816, "step": 9250 }, { "epoch": 2.628441669032075, "grad_norm": 9.053607940673828, "learning_rate": 9.737297757592962e-05, "loss": 0.1883820414543152, "step": 9260 }, { "epoch": 2.6312801589554358, "grad_norm": 8.635129928588867, "learning_rate": 9.737013908600625e-05, "loss": 0.25074615478515627, "step": 9270 }, { "epoch": 2.6341186488787964, "grad_norm": 18.625333786010742, "learning_rate": 9.736730059608289e-05, "loss": 0.22978038787841798, "step": 9280 }, { "epoch": 2.636957138802157, "grad_norm": 13.299293518066406, "learning_rate": 9.736446210615953e-05, "loss": 0.2617096662521362, "step": 9290 }, { "epoch": 2.639795628725518, "grad_norm": 11.98487377166748, "learning_rate": 9.736162361623617e-05, "loss": 0.20859298706054688, "step": 9300 }, { "epoch": 2.642634118648879, "grad_norm": 9.191095352172852, "learning_rate": 9.73587851263128e-05, "loss": 0.14842817783355713, "step": 9310 }, { "epoch": 2.6454726085722395, "grad_norm": 15.684033393859863, "learning_rate": 9.735594663638945e-05, "loss": 0.19613560438156127, "step": 9320 }, { "epoch": 2.6483110984956, "grad_norm": 9.7940092086792, "learning_rate": 9.735310814646608e-05, "loss": 0.21483032703399657, "step": 9330 }, { "epoch": 2.651149588418961, "grad_norm": 12.653528213500977, "learning_rate": 9.735026965654272e-05, "loss": 0.19207831621170043, "step": 9340 }, { "epoch": 2.653988078342322, "grad_norm": 19.49503517150879, "learning_rate": 9.734743116661936e-05, "loss": 0.17162203788757324, "step": 9350 }, { "epoch": 2.6568265682656826, "grad_norm": 9.636651992797852, "learning_rate": 9.7344592676696e-05, "loss": 0.22635285854339598, "step": 9360 }, { "epoch": 2.6596650581890433, "grad_norm": 16.960296630859375, "learning_rate": 9.734175418677263e-05, "loss": 0.2506766080856323, "step": 9370 }, { "epoch": 2.6625035481124044, "grad_norm": 11.412928581237793, "learning_rate": 9.733891569684927e-05, "loss": 0.22718396186828613, "step": 9380 }, { "epoch": 2.665342038035765, "grad_norm": 14.557952880859375, "learning_rate": 9.733607720692593e-05, "loss": 0.2045851469039917, "step": 9390 }, { "epoch": 2.6681805279591257, "grad_norm": 19.043903350830078, "learning_rate": 9.733323871700256e-05, "loss": 0.24429850578308104, "step": 9400 }, { "epoch": 2.6710190178824864, "grad_norm": 13.181796073913574, "learning_rate": 9.73304002270792e-05, "loss": 0.18220219612121583, "step": 9410 }, { "epoch": 2.673857507805847, "grad_norm": 19.13165283203125, "learning_rate": 9.732756173715584e-05, "loss": 0.21629371643066406, "step": 9420 }, { "epoch": 2.676695997729208, "grad_norm": 15.630627632141113, "learning_rate": 9.732472324723247e-05, "loss": 0.23067193031311034, "step": 9430 }, { "epoch": 2.679534487652569, "grad_norm": 13.47204875946045, "learning_rate": 9.732188475730911e-05, "loss": 0.194401752948761, "step": 9440 }, { "epoch": 2.6823729775759295, "grad_norm": 13.019672393798828, "learning_rate": 9.731904626738576e-05, "loss": 0.16775140762329102, "step": 9450 }, { "epoch": 2.6852114674992906, "grad_norm": 11.322114944458008, "learning_rate": 9.731620777746239e-05, "loss": 0.19435698986053468, "step": 9460 }, { "epoch": 2.6880499574226513, "grad_norm": 12.000550270080566, "learning_rate": 9.731336928753903e-05, "loss": 0.1899910807609558, "step": 9470 }, { "epoch": 2.690888447346012, "grad_norm": 15.239422798156738, "learning_rate": 9.731053079761567e-05, "loss": 0.17662038803100585, "step": 9480 }, { "epoch": 2.6937269372693726, "grad_norm": 16.10559844970703, "learning_rate": 9.730769230769232e-05, "loss": 0.22034332752227784, "step": 9490 }, { "epoch": 2.6965654271927333, "grad_norm": 12.43129825592041, "learning_rate": 9.730485381776894e-05, "loss": 0.1864484429359436, "step": 9500 }, { "epoch": 2.6965654271927333, "eval_accuracy": 0.88948941311121, "eval_loss": 0.3264746367931366, "eval_runtime": 38.3969, "eval_samples_per_second": 409.59, "eval_steps_per_second": 6.407, "step": 9500 }, { "epoch": 2.6994039171160944, "grad_norm": 12.36488151550293, "learning_rate": 9.730201532784559e-05, "loss": 0.21383161544799806, "step": 9510 }, { "epoch": 2.702242407039455, "grad_norm": 14.455981254577637, "learning_rate": 9.729917683792224e-05, "loss": 0.18577158451080322, "step": 9520 }, { "epoch": 2.7050808969628157, "grad_norm": 17.768827438354492, "learning_rate": 9.729633834799887e-05, "loss": 0.23360369205474854, "step": 9530 }, { "epoch": 2.7079193868861764, "grad_norm": 14.889660835266113, "learning_rate": 9.729349985807551e-05, "loss": 0.20406002998352052, "step": 9540 }, { "epoch": 2.710757876809537, "grad_norm": 14.61486530303955, "learning_rate": 9.729066136815215e-05, "loss": 0.19975624084472657, "step": 9550 }, { "epoch": 2.713596366732898, "grad_norm": 14.953705787658691, "learning_rate": 9.728782287822878e-05, "loss": 0.18793165683746338, "step": 9560 }, { "epoch": 2.716434856656259, "grad_norm": 8.680607795715332, "learning_rate": 9.728498438830542e-05, "loss": 0.20148806571960448, "step": 9570 }, { "epoch": 2.7192733465796195, "grad_norm": 13.567627906799316, "learning_rate": 9.728214589838206e-05, "loss": 0.21266634464263917, "step": 9580 }, { "epoch": 2.7221118365029806, "grad_norm": 15.743047714233398, "learning_rate": 9.72793074084587e-05, "loss": 0.20140488147735597, "step": 9590 }, { "epoch": 2.7249503264263413, "grad_norm": 12.143503189086914, "learning_rate": 9.727646891853534e-05, "loss": 0.20761444568634033, "step": 9600 }, { "epoch": 2.727788816349702, "grad_norm": 17.1187686920166, "learning_rate": 9.727363042861199e-05, "loss": 0.2508599042892456, "step": 9610 }, { "epoch": 2.7306273062730626, "grad_norm": 18.378902435302734, "learning_rate": 9.727079193868863e-05, "loss": 0.20834770202636718, "step": 9620 }, { "epoch": 2.7334657961964233, "grad_norm": 10.98671817779541, "learning_rate": 9.726795344876525e-05, "loss": 0.18055317401885987, "step": 9630 }, { "epoch": 2.7363042861197844, "grad_norm": 17.853260040283203, "learning_rate": 9.72651149588419e-05, "loss": 0.17769830226898192, "step": 9640 }, { "epoch": 2.739142776043145, "grad_norm": 13.123461723327637, "learning_rate": 9.726227646891855e-05, "loss": 0.19806002378463744, "step": 9650 }, { "epoch": 2.7419812659665057, "grad_norm": 12.203272819519043, "learning_rate": 9.725943797899518e-05, "loss": 0.20119891166687012, "step": 9660 }, { "epoch": 2.744819755889867, "grad_norm": 11.445073127746582, "learning_rate": 9.725659948907182e-05, "loss": 0.22633757591247558, "step": 9670 }, { "epoch": 2.7476582458132275, "grad_norm": 9.575220108032227, "learning_rate": 9.725376099914846e-05, "loss": 0.1777011752128601, "step": 9680 }, { "epoch": 2.750496735736588, "grad_norm": 13.495684623718262, "learning_rate": 9.725092250922509e-05, "loss": 0.21813290119171141, "step": 9690 }, { "epoch": 2.753335225659949, "grad_norm": 10.102472305297852, "learning_rate": 9.724808401930173e-05, "loss": 0.16254419088363647, "step": 9700 }, { "epoch": 2.7561737155833095, "grad_norm": 7.479804039001465, "learning_rate": 9.724524552937837e-05, "loss": 0.22302415370941162, "step": 9710 }, { "epoch": 2.7590122055066706, "grad_norm": 14.44089412689209, "learning_rate": 9.724240703945501e-05, "loss": 0.20151712894439697, "step": 9720 }, { "epoch": 2.7618506954300313, "grad_norm": 8.986812591552734, "learning_rate": 9.723956854953165e-05, "loss": 0.2087409019470215, "step": 9730 }, { "epoch": 2.764689185353392, "grad_norm": 9.511686325073242, "learning_rate": 9.72367300596083e-05, "loss": 0.18686021566390992, "step": 9740 }, { "epoch": 2.767527675276753, "grad_norm": 11.262073516845703, "learning_rate": 9.723389156968494e-05, "loss": 0.1853498935699463, "step": 9750 }, { "epoch": 2.7703661652001137, "grad_norm": 10.389833450317383, "learning_rate": 9.723105307976157e-05, "loss": 0.20570597648620606, "step": 9760 }, { "epoch": 2.7732046551234744, "grad_norm": 11.568097114562988, "learning_rate": 9.72282145898382e-05, "loss": 0.17183173894882203, "step": 9770 }, { "epoch": 2.776043145046835, "grad_norm": 28.320449829101562, "learning_rate": 9.722537609991485e-05, "loss": 0.18325958251953126, "step": 9780 }, { "epoch": 2.7788816349701957, "grad_norm": 12.151317596435547, "learning_rate": 9.722253760999149e-05, "loss": 0.19182183742523193, "step": 9790 }, { "epoch": 2.781720124893557, "grad_norm": 20.550113677978516, "learning_rate": 9.721969912006813e-05, "loss": 0.19728609323501586, "step": 9800 }, { "epoch": 2.7845586148169175, "grad_norm": 12.950318336486816, "learning_rate": 9.721686063014477e-05, "loss": 0.17154136896133423, "step": 9810 }, { "epoch": 2.787397104740278, "grad_norm": 10.786693572998047, "learning_rate": 9.72140221402214e-05, "loss": 0.1816237211227417, "step": 9820 }, { "epoch": 2.790235594663639, "grad_norm": 5.81512451171875, "learning_rate": 9.721118365029804e-05, "loss": 0.1979580044746399, "step": 9830 }, { "epoch": 2.7930740845869995, "grad_norm": 10.185220718383789, "learning_rate": 9.720834516037468e-05, "loss": 0.1997803807258606, "step": 9840 }, { "epoch": 2.7959125745103606, "grad_norm": 12.703685760498047, "learning_rate": 9.720550667045132e-05, "loss": 0.16134064197540282, "step": 9850 }, { "epoch": 2.7987510644337212, "grad_norm": 10.016107559204102, "learning_rate": 9.720266818052797e-05, "loss": 0.14471782445907594, "step": 9860 }, { "epoch": 2.801589554357082, "grad_norm": 13.028759956359863, "learning_rate": 9.71998296906046e-05, "loss": 0.17819924354553224, "step": 9870 }, { "epoch": 2.804428044280443, "grad_norm": 18.600008010864258, "learning_rate": 9.719699120068125e-05, "loss": 0.2001711368560791, "step": 9880 }, { "epoch": 2.8072665342038037, "grad_norm": 12.779607772827148, "learning_rate": 9.719415271075788e-05, "loss": 0.18103951215744019, "step": 9890 }, { "epoch": 2.8101050241271643, "grad_norm": 9.941447257995605, "learning_rate": 9.719131422083452e-05, "loss": 0.2060023784637451, "step": 9900 }, { "epoch": 2.812943514050525, "grad_norm": 12.857272148132324, "learning_rate": 9.718847573091116e-05, "loss": 0.1498393177986145, "step": 9910 }, { "epoch": 2.8157820039738857, "grad_norm": 11.979256629943848, "learning_rate": 9.71856372409878e-05, "loss": 0.16578680276870728, "step": 9920 }, { "epoch": 2.818620493897247, "grad_norm": 12.55955982208252, "learning_rate": 9.718279875106444e-05, "loss": 0.20763626098632812, "step": 9930 }, { "epoch": 2.8214589838206074, "grad_norm": 17.617786407470703, "learning_rate": 9.717996026114108e-05, "loss": 0.2542205810546875, "step": 9940 }, { "epoch": 2.824297473743968, "grad_norm": 17.59824562072754, "learning_rate": 9.717712177121771e-05, "loss": 0.21328301429748536, "step": 9950 }, { "epoch": 2.827135963667329, "grad_norm": 15.806827545166016, "learning_rate": 9.717428328129435e-05, "loss": 0.24740214347839357, "step": 9960 }, { "epoch": 2.82997445359069, "grad_norm": 10.871063232421875, "learning_rate": 9.7171444791371e-05, "loss": 0.21028294563293456, "step": 9970 }, { "epoch": 2.8328129435140506, "grad_norm": 12.224925994873047, "learning_rate": 9.716860630144763e-05, "loss": 0.19209096431732178, "step": 9980 }, { "epoch": 2.835651433437411, "grad_norm": 16.853662490844727, "learning_rate": 9.716576781152428e-05, "loss": 0.17414381504058837, "step": 9990 }, { "epoch": 2.838489923360772, "grad_norm": 16.15870475769043, "learning_rate": 9.716292932160092e-05, "loss": 0.21242589950561525, "step": 10000 }, { "epoch": 2.838489923360772, "eval_accuracy": 0.8946397914414701, "eval_loss": 0.3199341595172882, "eval_runtime": 48.0638, "eval_samples_per_second": 327.211, "eval_steps_per_second": 5.118, "step": 10000 }, { "epoch": 2.841328413284133, "grad_norm": 9.000622749328613, "learning_rate": 9.716009083167756e-05, "loss": 0.1970919966697693, "step": 10010 }, { "epoch": 2.8441669032074937, "grad_norm": 13.955005645751953, "learning_rate": 9.715725234175419e-05, "loss": 0.18227863311767578, "step": 10020 }, { "epoch": 2.8470053931308543, "grad_norm": 19.79986000061035, "learning_rate": 9.715441385183083e-05, "loss": 0.170460844039917, "step": 10030 }, { "epoch": 2.8498438830542154, "grad_norm": 14.54212474822998, "learning_rate": 9.715157536190747e-05, "loss": 0.20407140254974365, "step": 10040 }, { "epoch": 2.8526823729775757, "grad_norm": 9.706725120544434, "learning_rate": 9.714873687198411e-05, "loss": 0.19685860872268676, "step": 10050 }, { "epoch": 2.8555208629009368, "grad_norm": 11.717854499816895, "learning_rate": 9.714589838206075e-05, "loss": 0.20014140605926514, "step": 10060 }, { "epoch": 2.8583593528242974, "grad_norm": 11.705230712890625, "learning_rate": 9.71430598921374e-05, "loss": 0.21692922115325927, "step": 10070 }, { "epoch": 2.861197842747658, "grad_norm": 11.02046012878418, "learning_rate": 9.714022140221402e-05, "loss": 0.20340824127197266, "step": 10080 }, { "epoch": 2.864036332671019, "grad_norm": 11.58757209777832, "learning_rate": 9.713738291229066e-05, "loss": 0.21384308338165284, "step": 10090 }, { "epoch": 2.86687482259438, "grad_norm": 14.86462688446045, "learning_rate": 9.71345444223673e-05, "loss": 0.17497984170913697, "step": 10100 }, { "epoch": 2.8697133125177405, "grad_norm": 11.93954086303711, "learning_rate": 9.713170593244395e-05, "loss": 0.21073694229125978, "step": 10110 }, { "epoch": 2.872551802441101, "grad_norm": 13.602648735046387, "learning_rate": 9.712886744252059e-05, "loss": 0.15395116806030273, "step": 10120 }, { "epoch": 2.875390292364462, "grad_norm": 10.515848159790039, "learning_rate": 9.712602895259723e-05, "loss": 0.16803421974182128, "step": 10130 }, { "epoch": 2.878228782287823, "grad_norm": 10.30586051940918, "learning_rate": 9.712319046267387e-05, "loss": 0.20867342948913575, "step": 10140 }, { "epoch": 2.8810672722111836, "grad_norm": 13.783424377441406, "learning_rate": 9.71203519727505e-05, "loss": 0.17218780517578125, "step": 10150 }, { "epoch": 2.8839057621345443, "grad_norm": 9.815206527709961, "learning_rate": 9.711751348282714e-05, "loss": 0.1610799551010132, "step": 10160 }, { "epoch": 2.8867442520579054, "grad_norm": 16.649036407470703, "learning_rate": 9.711467499290378e-05, "loss": 0.18551867008209227, "step": 10170 }, { "epoch": 2.889582741981266, "grad_norm": 17.429306030273438, "learning_rate": 9.711183650298042e-05, "loss": 0.18831170797348024, "step": 10180 }, { "epoch": 2.8924212319046267, "grad_norm": 16.486108779907227, "learning_rate": 9.710899801305706e-05, "loss": 0.18170238733291627, "step": 10190 }, { "epoch": 2.8952597218279874, "grad_norm": 10.224630355834961, "learning_rate": 9.71061595231337e-05, "loss": 0.19410319328308107, "step": 10200 }, { "epoch": 2.898098211751348, "grad_norm": 15.0123929977417, "learning_rate": 9.710332103321033e-05, "loss": 0.19285236597061156, "step": 10210 }, { "epoch": 2.900936701674709, "grad_norm": 13.844090461730957, "learning_rate": 9.710048254328697e-05, "loss": 0.17872003316879273, "step": 10220 }, { "epoch": 2.90377519159807, "grad_norm": 11.936907768249512, "learning_rate": 9.709764405336361e-05, "loss": 0.15176676511764525, "step": 10230 }, { "epoch": 2.9066136815214305, "grad_norm": 14.45914077758789, "learning_rate": 9.709480556344026e-05, "loss": 0.19523416757583617, "step": 10240 }, { "epoch": 2.9094521714447916, "grad_norm": 13.420283317565918, "learning_rate": 9.70919670735169e-05, "loss": 0.14284124374389648, "step": 10250 }, { "epoch": 2.9122906613681523, "grad_norm": 8.028953552246094, "learning_rate": 9.708912858359354e-05, "loss": 0.19477298259735107, "step": 10260 }, { "epoch": 2.915129151291513, "grad_norm": 11.467706680297852, "learning_rate": 9.708629009367017e-05, "loss": 0.2148937463760376, "step": 10270 }, { "epoch": 2.9179676412148736, "grad_norm": 22.087190628051758, "learning_rate": 9.708345160374681e-05, "loss": 0.20989012718200684, "step": 10280 }, { "epoch": 2.9208061311382343, "grad_norm": 5.273431777954102, "learning_rate": 9.708061311382345e-05, "loss": 0.1701538681983948, "step": 10290 }, { "epoch": 2.9236446210615954, "grad_norm": 10.89460563659668, "learning_rate": 9.707777462390009e-05, "loss": 0.1644642949104309, "step": 10300 }, { "epoch": 2.926483110984956, "grad_norm": 10.595633506774902, "learning_rate": 9.707493613397672e-05, "loss": 0.20249826908111573, "step": 10310 }, { "epoch": 2.9293216009083167, "grad_norm": 16.167003631591797, "learning_rate": 9.70723814930457e-05, "loss": 0.21293470859527588, "step": 10320 }, { "epoch": 2.9321600908316774, "grad_norm": 9.83825397491455, "learning_rate": 9.706954300312234e-05, "loss": 0.1847701907157898, "step": 10330 }, { "epoch": 2.934998580755038, "grad_norm": 12.895318031311035, "learning_rate": 9.706670451319898e-05, "loss": 0.1469442367553711, "step": 10340 }, { "epoch": 2.937837070678399, "grad_norm": 13.721134185791016, "learning_rate": 9.706386602327562e-05, "loss": 0.22408618927001953, "step": 10350 }, { "epoch": 2.94067556060176, "grad_norm": 21.259695053100586, "learning_rate": 9.706102753335226e-05, "loss": 0.1932833671569824, "step": 10360 }, { "epoch": 2.9435140505251205, "grad_norm": 11.293501853942871, "learning_rate": 9.70581890434289e-05, "loss": 0.16970994472503662, "step": 10370 }, { "epoch": 2.9463525404484816, "grad_norm": 13.465764045715332, "learning_rate": 9.705535055350555e-05, "loss": 0.1656751036643982, "step": 10380 }, { "epoch": 2.9491910303718423, "grad_norm": 10.799820899963379, "learning_rate": 9.705251206358217e-05, "loss": 0.1472199082374573, "step": 10390 }, { "epoch": 2.952029520295203, "grad_norm": 10.170804977416992, "learning_rate": 9.704967357365882e-05, "loss": 0.15275402069091798, "step": 10400 }, { "epoch": 2.9548680102185636, "grad_norm": 16.666641235351562, "learning_rate": 9.704683508373546e-05, "loss": 0.23409066200256348, "step": 10410 }, { "epoch": 2.9577065001419243, "grad_norm": 8.886507987976074, "learning_rate": 9.704399659381209e-05, "loss": 0.18265349864959718, "step": 10420 }, { "epoch": 2.9605449900652854, "grad_norm": 8.293532371520996, "learning_rate": 9.704115810388874e-05, "loss": 0.16409800052642823, "step": 10430 }, { "epoch": 2.963383479988646, "grad_norm": 9.201800346374512, "learning_rate": 9.703831961396538e-05, "loss": 0.16031235456466675, "step": 10440 }, { "epoch": 2.9662219699120067, "grad_norm": 16.222553253173828, "learning_rate": 9.703548112404201e-05, "loss": 0.17410545349121093, "step": 10450 }, { "epoch": 2.969060459835368, "grad_norm": 10.362982749938965, "learning_rate": 9.703264263411865e-05, "loss": 0.1900336503982544, "step": 10460 }, { "epoch": 2.9718989497587285, "grad_norm": 8.775266647338867, "learning_rate": 9.702980414419529e-05, "loss": 0.17638566493988037, "step": 10470 }, { "epoch": 2.974737439682089, "grad_norm": 14.377142906188965, "learning_rate": 9.702696565427193e-05, "loss": 0.18094249963760375, "step": 10480 }, { "epoch": 2.97757592960545, "grad_norm": 12.368301391601562, "learning_rate": 9.702412716434856e-05, "loss": 0.20637946128845214, "step": 10490 }, { "epoch": 2.9804144195288105, "grad_norm": 15.63573932647705, "learning_rate": 9.702128867442522e-05, "loss": 0.20046114921569824, "step": 10500 }, { "epoch": 2.9804144195288105, "eval_accuracy": 0.9050677179373052, "eval_loss": 0.2900501489639282, "eval_runtime": 38.755, "eval_samples_per_second": 405.806, "eval_steps_per_second": 6.348, "step": 10500 }, { "epoch": 2.9832529094521716, "grad_norm": 13.364728927612305, "learning_rate": 9.701845018450186e-05, "loss": 0.2024156093597412, "step": 10510 }, { "epoch": 2.9860913993755323, "grad_norm": 11.432146072387695, "learning_rate": 9.701561169457849e-05, "loss": 0.1900703191757202, "step": 10520 }, { "epoch": 2.988929889298893, "grad_norm": 18.868886947631836, "learning_rate": 9.701277320465513e-05, "loss": 0.17574896812438964, "step": 10530 }, { "epoch": 2.991768379222254, "grad_norm": 13.4249267578125, "learning_rate": 9.700993471473177e-05, "loss": 0.1861947536468506, "step": 10540 }, { "epoch": 2.9946068691456147, "grad_norm": 11.705652236938477, "learning_rate": 9.70070962248084e-05, "loss": 0.18529784679412842, "step": 10550 }, { "epoch": 2.9974453590689754, "grad_norm": 13.404207229614258, "learning_rate": 9.700425773488505e-05, "loss": 0.20898010730743408, "step": 10560 }, { "epoch": 3.000283848992336, "grad_norm": 9.469964981079102, "learning_rate": 9.700141924496169e-05, "loss": 0.16186460256576538, "step": 10570 }, { "epoch": 3.0031223389156967, "grad_norm": 7.788647651672363, "learning_rate": 9.699858075503832e-05, "loss": 0.1344990611076355, "step": 10580 }, { "epoch": 3.005960828839058, "grad_norm": 14.064414024353027, "learning_rate": 9.699574226511496e-05, "loss": 0.15102314949035645, "step": 10590 }, { "epoch": 3.0087993187624185, "grad_norm": 12.568145751953125, "learning_rate": 9.69929037751916e-05, "loss": 0.1689762830734253, "step": 10600 }, { "epoch": 3.011637808685779, "grad_norm": 8.75515365600586, "learning_rate": 9.699006528526824e-05, "loss": 0.14115209579467775, "step": 10610 }, { "epoch": 3.01447629860914, "grad_norm": 11.703523635864258, "learning_rate": 9.698722679534487e-05, "loss": 0.1554561138153076, "step": 10620 }, { "epoch": 3.017314788532501, "grad_norm": 9.05704402923584, "learning_rate": 9.698467215441385e-05, "loss": 0.14567044973373414, "step": 10630 }, { "epoch": 3.0201532784558616, "grad_norm": 10.068679809570312, "learning_rate": 9.69818336644905e-05, "loss": 0.15005919933319092, "step": 10640 }, { "epoch": 3.0229917683792222, "grad_norm": 10.81230354309082, "learning_rate": 9.697899517456713e-05, "loss": 0.18711551427841186, "step": 10650 }, { "epoch": 3.025830258302583, "grad_norm": 12.706050872802734, "learning_rate": 9.697615668464378e-05, "loss": 0.14177069664001465, "step": 10660 }, { "epoch": 3.028668748225944, "grad_norm": 14.536253929138184, "learning_rate": 9.69733181947204e-05, "loss": 0.17402513027191163, "step": 10670 }, { "epoch": 3.0315072381493047, "grad_norm": 7.0582780838012695, "learning_rate": 9.697047970479706e-05, "loss": 0.16019707918167114, "step": 10680 }, { "epoch": 3.0343457280726653, "grad_norm": 15.192237854003906, "learning_rate": 9.69676412148737e-05, "loss": 0.21348438262939454, "step": 10690 }, { "epoch": 3.037184217996026, "grad_norm": 11.243151664733887, "learning_rate": 9.696480272495033e-05, "loss": 0.17934962511062622, "step": 10700 }, { "epoch": 3.0400227079193867, "grad_norm": 9.425424575805664, "learning_rate": 9.696196423502697e-05, "loss": 0.12439442873001098, "step": 10710 }, { "epoch": 3.042861197842748, "grad_norm": 11.799715042114258, "learning_rate": 9.695912574510361e-05, "loss": 0.13997917175292968, "step": 10720 }, { "epoch": 3.0456996877661084, "grad_norm": 15.707511901855469, "learning_rate": 9.695628725518024e-05, "loss": 0.15817757844924926, "step": 10730 }, { "epoch": 3.048538177689469, "grad_norm": 9.371976852416992, "learning_rate": 9.69534487652569e-05, "loss": 0.14122697114944457, "step": 10740 }, { "epoch": 3.0513766676128298, "grad_norm": 10.534281730651855, "learning_rate": 9.695061027533354e-05, "loss": 0.14637815952301025, "step": 10750 }, { "epoch": 3.054215157536191, "grad_norm": 11.18614387512207, "learning_rate": 9.694777178541016e-05, "loss": 0.16466747522354125, "step": 10760 }, { "epoch": 3.0570536474595515, "grad_norm": 8.910508155822754, "learning_rate": 9.69449332954868e-05, "loss": 0.14135489463806153, "step": 10770 }, { "epoch": 3.059892137382912, "grad_norm": 13.297048568725586, "learning_rate": 9.694209480556345e-05, "loss": 0.16892271041870116, "step": 10780 }, { "epoch": 3.062730627306273, "grad_norm": 20.510889053344727, "learning_rate": 9.693925631564009e-05, "loss": 0.17377058267593384, "step": 10790 }, { "epoch": 3.065569117229634, "grad_norm": 8.946203231811523, "learning_rate": 9.693641782571671e-05, "loss": 0.15090299844741822, "step": 10800 }, { "epoch": 3.0684076071529947, "grad_norm": 13.670655250549316, "learning_rate": 9.693357933579337e-05, "loss": 0.1799259066581726, "step": 10810 }, { "epoch": 3.0712460970763553, "grad_norm": 11.198412895202637, "learning_rate": 9.693074084587001e-05, "loss": 0.1814640760421753, "step": 10820 }, { "epoch": 3.074084586999716, "grad_norm": 11.021087646484375, "learning_rate": 9.692790235594664e-05, "loss": 0.14725111722946166, "step": 10830 }, { "epoch": 3.076923076923077, "grad_norm": 4.642458438873291, "learning_rate": 9.692506386602328e-05, "loss": 0.0880886435508728, "step": 10840 }, { "epoch": 3.0797615668464378, "grad_norm": 8.138908386230469, "learning_rate": 9.692222537609992e-05, "loss": 0.13502250909805297, "step": 10850 }, { "epoch": 3.0826000567697984, "grad_norm": 10.249444007873535, "learning_rate": 9.691938688617655e-05, "loss": 0.127088725566864, "step": 10860 }, { "epoch": 3.085438546693159, "grad_norm": 14.046932220458984, "learning_rate": 9.691654839625319e-05, "loss": 0.16166425943374635, "step": 10870 }, { "epoch": 3.08827703661652, "grad_norm": 8.6829252243042, "learning_rate": 9.691370990632985e-05, "loss": 0.15032883882522582, "step": 10880 }, { "epoch": 3.091115526539881, "grad_norm": 7.250696182250977, "learning_rate": 9.691087141640647e-05, "loss": 0.13808951377868653, "step": 10890 }, { "epoch": 3.0939540164632415, "grad_norm": 11.446696281433105, "learning_rate": 9.690803292648312e-05, "loss": 0.16500025987625122, "step": 10900 }, { "epoch": 3.096792506386602, "grad_norm": 12.937329292297363, "learning_rate": 9.690519443655976e-05, "loss": 0.16294169425964355, "step": 10910 }, { "epoch": 3.0996309963099633, "grad_norm": 13.984382629394531, "learning_rate": 9.69023559466364e-05, "loss": 0.14896880388259887, "step": 10920 }, { "epoch": 3.102469486233324, "grad_norm": 9.657512664794922, "learning_rate": 9.689951745671303e-05, "loss": 0.12691539525985718, "step": 10930 }, { "epoch": 3.1053079761566846, "grad_norm": 9.15675163269043, "learning_rate": 9.689667896678968e-05, "loss": 0.141265344619751, "step": 10940 }, { "epoch": 3.1081464660800453, "grad_norm": 16.440513610839844, "learning_rate": 9.689384047686632e-05, "loss": 0.14725964069366454, "step": 10950 }, { "epoch": 3.1109849560034064, "grad_norm": 7.351463317871094, "learning_rate": 9.689100198694295e-05, "loss": 0.13106188774108887, "step": 10960 }, { "epoch": 3.113823445926767, "grad_norm": 10.075414657592773, "learning_rate": 9.688816349701959e-05, "loss": 0.1500296711921692, "step": 10970 }, { "epoch": 3.1166619358501277, "grad_norm": 11.4528226852417, "learning_rate": 9.688532500709623e-05, "loss": 0.14042552709579467, "step": 10980 }, { "epoch": 3.1195004257734884, "grad_norm": 7.897055625915527, "learning_rate": 9.688248651717286e-05, "loss": 0.1427959680557251, "step": 10990 }, { "epoch": 3.122338915696849, "grad_norm": 9.173778533935547, "learning_rate": 9.68796480272495e-05, "loss": 0.1363622546195984, "step": 11000 }, { "epoch": 3.122338915696849, "eval_accuracy": 0.9078654543142367, "eval_loss": 0.27136972546577454, "eval_runtime": 45.3531, "eval_samples_per_second": 346.768, "eval_steps_per_second": 5.424, "step": 11000 }, { "epoch": 3.12517740562021, "grad_norm": 14.362506866455078, "learning_rate": 9.687680953732616e-05, "loss": 0.17341303825378418, "step": 11010 }, { "epoch": 3.128015895543571, "grad_norm": 11.589618682861328, "learning_rate": 9.687397104740278e-05, "loss": 0.15291993618011473, "step": 11020 }, { "epoch": 3.1308543854669315, "grad_norm": 12.063961029052734, "learning_rate": 9.687113255747943e-05, "loss": 0.20076916217803956, "step": 11030 }, { "epoch": 3.133692875390292, "grad_norm": 12.555753707885742, "learning_rate": 9.686829406755607e-05, "loss": 0.15198826789855957, "step": 11040 }, { "epoch": 3.1365313653136533, "grad_norm": 10.2974214553833, "learning_rate": 9.686545557763271e-05, "loss": 0.10995292663574219, "step": 11050 }, { "epoch": 3.139369855237014, "grad_norm": 9.610437393188477, "learning_rate": 9.686261708770934e-05, "loss": 0.17646002769470215, "step": 11060 }, { "epoch": 3.1422083451603746, "grad_norm": 13.212011337280273, "learning_rate": 9.685977859778598e-05, "loss": 0.1628280758857727, "step": 11070 }, { "epoch": 3.1450468350837353, "grad_norm": 14.695028305053711, "learning_rate": 9.685694010786262e-05, "loss": 0.15219953060150146, "step": 11080 }, { "epoch": 3.1478853250070964, "grad_norm": 9.574193954467773, "learning_rate": 9.685410161793926e-05, "loss": 0.14558756351470947, "step": 11090 }, { "epoch": 3.150723814930457, "grad_norm": 8.09005069732666, "learning_rate": 9.68512631280159e-05, "loss": 0.1328153967857361, "step": 11100 }, { "epoch": 3.1535623048538177, "grad_norm": 8.114734649658203, "learning_rate": 9.684842463809254e-05, "loss": 0.14638772010803222, "step": 11110 }, { "epoch": 3.1564007947771784, "grad_norm": 13.470097541809082, "learning_rate": 9.684558614816917e-05, "loss": 0.1983910918235779, "step": 11120 }, { "epoch": 3.1592392847005395, "grad_norm": 12.799702644348145, "learning_rate": 9.684274765824581e-05, "loss": 0.1719268560409546, "step": 11130 }, { "epoch": 3.1620777746239, "grad_norm": 12.271860122680664, "learning_rate": 9.683990916832247e-05, "loss": 0.17405003309249878, "step": 11140 }, { "epoch": 3.164916264547261, "grad_norm": 7.507524490356445, "learning_rate": 9.68370706783991e-05, "loss": 0.1475110411643982, "step": 11150 }, { "epoch": 3.1677547544706215, "grad_norm": 10.062581062316895, "learning_rate": 9.683423218847574e-05, "loss": 0.1581020951271057, "step": 11160 }, { "epoch": 3.1705932443939826, "grad_norm": 11.50074291229248, "learning_rate": 9.683139369855238e-05, "loss": 0.14824583530426025, "step": 11170 }, { "epoch": 3.1734317343173433, "grad_norm": 14.899484634399414, "learning_rate": 9.682855520862902e-05, "loss": 0.17019470930099487, "step": 11180 }, { "epoch": 3.176270224240704, "grad_norm": 11.381678581237793, "learning_rate": 9.682571671870565e-05, "loss": 0.1021158218383789, "step": 11190 }, { "epoch": 3.1791087141640646, "grad_norm": 11.494942665100098, "learning_rate": 9.682287822878229e-05, "loss": 0.12169458866119384, "step": 11200 }, { "epoch": 3.1819472040874253, "grad_norm": 8.903223037719727, "learning_rate": 9.682003973885893e-05, "loss": 0.14039865732192994, "step": 11210 }, { "epoch": 3.1847856940107864, "grad_norm": 18.794811248779297, "learning_rate": 9.681720124893557e-05, "loss": 0.1679140329360962, "step": 11220 }, { "epoch": 3.187624183934147, "grad_norm": 10.842596054077148, "learning_rate": 9.681436275901221e-05, "loss": 0.1710715413093567, "step": 11230 }, { "epoch": 3.1904626738575077, "grad_norm": 10.292693138122559, "learning_rate": 9.681152426908885e-05, "loss": 0.1616393208503723, "step": 11240 }, { "epoch": 3.193301163780869, "grad_norm": 11.655790328979492, "learning_rate": 9.680868577916548e-05, "loss": 0.1318575143814087, "step": 11250 }, { "epoch": 3.1961396537042295, "grad_norm": 8.940766334533691, "learning_rate": 9.680584728924212e-05, "loss": 0.12075049877166748, "step": 11260 }, { "epoch": 3.19897814362759, "grad_norm": 11.279410362243652, "learning_rate": 9.680300879931876e-05, "loss": 0.13568800687789917, "step": 11270 }, { "epoch": 3.201816633550951, "grad_norm": 12.064679145812988, "learning_rate": 9.68001703093954e-05, "loss": 0.1762060761451721, "step": 11280 }, { "epoch": 3.2046551234743115, "grad_norm": 20.73368263244629, "learning_rate": 9.679733181947205e-05, "loss": 0.15397260189056397, "step": 11290 }, { "epoch": 3.2074936133976726, "grad_norm": 11.312993049621582, "learning_rate": 9.679449332954869e-05, "loss": 0.15002591609954835, "step": 11300 }, { "epoch": 3.2103321033210332, "grad_norm": 9.954587936401367, "learning_rate": 9.679165483962532e-05, "loss": 0.17322758436203003, "step": 11310 }, { "epoch": 3.213170593244394, "grad_norm": 13.547497749328613, "learning_rate": 9.678881634970196e-05, "loss": 0.17230950593948363, "step": 11320 }, { "epoch": 3.2160090831677546, "grad_norm": 23.023361206054688, "learning_rate": 9.67859778597786e-05, "loss": 0.17132192850112915, "step": 11330 }, { "epoch": 3.2188475730911157, "grad_norm": 13.228828430175781, "learning_rate": 9.678313936985524e-05, "loss": 0.17723389863967895, "step": 11340 }, { "epoch": 3.2216860630144764, "grad_norm": 10.713096618652344, "learning_rate": 9.678030087993188e-05, "loss": 0.13268792629241943, "step": 11350 }, { "epoch": 3.224524552937837, "grad_norm": 11.145319938659668, "learning_rate": 9.677746239000852e-05, "loss": 0.14324551820755005, "step": 11360 }, { "epoch": 3.2273630428611977, "grad_norm": 12.382258415222168, "learning_rate": 9.677462390008516e-05, "loss": 0.20424387454986573, "step": 11370 }, { "epoch": 3.230201532784559, "grad_norm": 13.948812484741211, "learning_rate": 9.677178541016179e-05, "loss": 0.1765672206878662, "step": 11380 }, { "epoch": 3.2330400227079195, "grad_norm": 12.375029563903809, "learning_rate": 9.676894692023843e-05, "loss": 0.17043259143829345, "step": 11390 }, { "epoch": 3.23587851263128, "grad_norm": 5.840155601501465, "learning_rate": 9.676610843031508e-05, "loss": 0.14688397645950318, "step": 11400 }, { "epoch": 3.238717002554641, "grad_norm": 13.304166793823242, "learning_rate": 9.676326994039172e-05, "loss": 0.12399591207504272, "step": 11410 }, { "epoch": 3.241555492478002, "grad_norm": 8.98900318145752, "learning_rate": 9.676043145046836e-05, "loss": 0.14647464752197265, "step": 11420 }, { "epoch": 3.2443939824013626, "grad_norm": 12.95863151550293, "learning_rate": 9.6757592960545e-05, "loss": 0.18155845403671264, "step": 11430 }, { "epoch": 3.2472324723247232, "grad_norm": 8.929644584655762, "learning_rate": 9.675475447062163e-05, "loss": 0.1678781032562256, "step": 11440 }, { "epoch": 3.250070962248084, "grad_norm": 18.598772048950195, "learning_rate": 9.675191598069827e-05, "loss": 0.18768832683563233, "step": 11450 }, { "epoch": 3.252909452171445, "grad_norm": 15.839473724365234, "learning_rate": 9.674907749077491e-05, "loss": 0.18556362390518188, "step": 11460 }, { "epoch": 3.2557479420948057, "grad_norm": 7.3281426429748535, "learning_rate": 9.674623900085155e-05, "loss": 0.15311427116394044, "step": 11470 }, { "epoch": 3.2585864320181663, "grad_norm": 8.276244163513184, "learning_rate": 9.674340051092819e-05, "loss": 0.12923117876052856, "step": 11480 }, { "epoch": 3.261424921941527, "grad_norm": 10.475846290588379, "learning_rate": 9.674056202100483e-05, "loss": 0.14876191616058348, "step": 11490 }, { "epoch": 3.2642634118648877, "grad_norm": 9.121557235717773, "learning_rate": 9.673772353108148e-05, "loss": 0.13356937170028688, "step": 11500 }, { "epoch": 3.2642634118648877, "eval_accuracy": 0.9032237553252368, "eval_loss": 0.2833595275878906, "eval_runtime": 46.3423, "eval_samples_per_second": 339.366, "eval_steps_per_second": 5.308, "step": 11500 }, { "epoch": 3.2671019017882488, "grad_norm": 6.445477485656738, "learning_rate": 9.67348850411581e-05, "loss": 0.1814670443534851, "step": 11510 }, { "epoch": 3.2699403917116094, "grad_norm": 11.865300178527832, "learning_rate": 9.673204655123474e-05, "loss": 0.13304414749145507, "step": 11520 }, { "epoch": 3.27277888163497, "grad_norm": 6.172824859619141, "learning_rate": 9.672920806131139e-05, "loss": 0.10327214002609253, "step": 11530 }, { "epoch": 3.275617371558331, "grad_norm": 13.082752227783203, "learning_rate": 9.672636957138803e-05, "loss": 0.1435145616531372, "step": 11540 }, { "epoch": 3.278455861481692, "grad_norm": 10.705196380615234, "learning_rate": 9.672353108146467e-05, "loss": 0.12174123525619507, "step": 11550 }, { "epoch": 3.2812943514050525, "grad_norm": 6.458785533905029, "learning_rate": 9.672069259154131e-05, "loss": 0.1434764862060547, "step": 11560 }, { "epoch": 3.284132841328413, "grad_norm": 11.696215629577637, "learning_rate": 9.671785410161794e-05, "loss": 0.18122624158859252, "step": 11570 }, { "epoch": 3.286971331251774, "grad_norm": 16.7047061920166, "learning_rate": 9.671501561169458e-05, "loss": 0.17361868619918824, "step": 11580 }, { "epoch": 3.289809821175135, "grad_norm": 6.451972007751465, "learning_rate": 9.671217712177122e-05, "loss": 0.16989243030548096, "step": 11590 }, { "epoch": 3.2926483110984957, "grad_norm": 5.240332126617432, "learning_rate": 9.670933863184786e-05, "loss": 0.15108165740966797, "step": 11600 }, { "epoch": 3.2954868010218563, "grad_norm": 9.199390411376953, "learning_rate": 9.67065001419245e-05, "loss": 0.1350209355354309, "step": 11610 }, { "epoch": 3.298325290945217, "grad_norm": 11.945877075195312, "learning_rate": 9.670366165200115e-05, "loss": 0.1771214246749878, "step": 11620 }, { "epoch": 3.301163780868578, "grad_norm": 10.108057022094727, "learning_rate": 9.670082316207779e-05, "loss": 0.14468319416046144, "step": 11630 }, { "epoch": 3.3040022707919388, "grad_norm": 6.3723464012146, "learning_rate": 9.669798467215441e-05, "loss": 0.13560099601745607, "step": 11640 }, { "epoch": 3.3068407607152994, "grad_norm": 11.101216316223145, "learning_rate": 9.669514618223106e-05, "loss": 0.14702677726745605, "step": 11650 }, { "epoch": 3.30967925063866, "grad_norm": 11.121957778930664, "learning_rate": 9.66923076923077e-05, "loss": 0.16260606050491333, "step": 11660 }, { "epoch": 3.312517740562021, "grad_norm": 13.649311065673828, "learning_rate": 9.668946920238432e-05, "loss": 0.14691877365112305, "step": 11670 }, { "epoch": 3.315356230485382, "grad_norm": 9.027546882629395, "learning_rate": 9.668663071246098e-05, "loss": 0.13184075355529784, "step": 11680 }, { "epoch": 3.3181947204087425, "grad_norm": 12.529989242553711, "learning_rate": 9.668379222253762e-05, "loss": 0.14988337755203246, "step": 11690 }, { "epoch": 3.321033210332103, "grad_norm": 5.061054706573486, "learning_rate": 9.668095373261425e-05, "loss": 0.1402359962463379, "step": 11700 }, { "epoch": 3.323871700255464, "grad_norm": 17.001625061035156, "learning_rate": 9.667811524269089e-05, "loss": 0.15122188329696656, "step": 11710 }, { "epoch": 3.326710190178825, "grad_norm": 12.765850067138672, "learning_rate": 9.667527675276753e-05, "loss": 0.15265630483627318, "step": 11720 }, { "epoch": 3.3295486801021856, "grad_norm": 10.951409339904785, "learning_rate": 9.667243826284417e-05, "loss": 0.11330270767211914, "step": 11730 }, { "epoch": 3.3323871700255463, "grad_norm": 7.664775371551514, "learning_rate": 9.666959977292081e-05, "loss": 0.14588812589645386, "step": 11740 }, { "epoch": 3.3352256599489074, "grad_norm": 17.489843368530273, "learning_rate": 9.666676128299746e-05, "loss": 0.15147066116333008, "step": 11750 }, { "epoch": 3.338064149872268, "grad_norm": 7.865429878234863, "learning_rate": 9.66639227930741e-05, "loss": 0.1377892255783081, "step": 11760 }, { "epoch": 3.3409026397956287, "grad_norm": 6.333840370178223, "learning_rate": 9.666108430315072e-05, "loss": 0.13421481847763062, "step": 11770 }, { "epoch": 3.3437411297189894, "grad_norm": 8.636858940124512, "learning_rate": 9.665824581322737e-05, "loss": 0.1649019956588745, "step": 11780 }, { "epoch": 3.34657961964235, "grad_norm": 13.328131675720215, "learning_rate": 9.665540732330401e-05, "loss": 0.15546318292617797, "step": 11790 }, { "epoch": 3.349418109565711, "grad_norm": 10.0093355178833, "learning_rate": 9.665256883338064e-05, "loss": 0.14049407243728637, "step": 11800 }, { "epoch": 3.352256599489072, "grad_norm": 16.31190299987793, "learning_rate": 9.664973034345729e-05, "loss": 0.15132972002029418, "step": 11810 }, { "epoch": 3.3550950894124325, "grad_norm": 11.242594718933105, "learning_rate": 9.664689185353393e-05, "loss": 0.2063140392303467, "step": 11820 }, { "epoch": 3.357933579335793, "grad_norm": 11.833036422729492, "learning_rate": 9.664405336361056e-05, "loss": 0.16879596710205078, "step": 11830 }, { "epoch": 3.3607720692591543, "grad_norm": 12.876978874206543, "learning_rate": 9.66412148736872e-05, "loss": 0.12961652278900146, "step": 11840 }, { "epoch": 3.363610559182515, "grad_norm": 18.58479881286621, "learning_rate": 9.663837638376384e-05, "loss": 0.1831730842590332, "step": 11850 }, { "epoch": 3.3664490491058756, "grad_norm": 9.960158348083496, "learning_rate": 9.663553789384048e-05, "loss": 0.12919691801071168, "step": 11860 }, { "epoch": 3.3692875390292363, "grad_norm": 10.840004920959473, "learning_rate": 9.663269940391711e-05, "loss": 0.16478891372680665, "step": 11870 }, { "epoch": 3.3721260289525974, "grad_norm": 19.694351196289062, "learning_rate": 9.662986091399377e-05, "loss": 0.15939066410064698, "step": 11880 }, { "epoch": 3.374964518875958, "grad_norm": 11.789313316345215, "learning_rate": 9.662702242407041e-05, "loss": 0.19280238151550294, "step": 11890 }, { "epoch": 3.3778030087993187, "grad_norm": 11.413835525512695, "learning_rate": 9.662418393414704e-05, "loss": 0.15336452722549437, "step": 11900 }, { "epoch": 3.3806414987226794, "grad_norm": 10.671963691711426, "learning_rate": 9.662134544422368e-05, "loss": 0.13012861013412474, "step": 11910 }, { "epoch": 3.3834799886460405, "grad_norm": 14.077119827270508, "learning_rate": 9.661850695430032e-05, "loss": 0.15267839431762695, "step": 11920 }, { "epoch": 3.386318478569401, "grad_norm": 9.638855934143066, "learning_rate": 9.661566846437695e-05, "loss": 0.14778499603271483, "step": 11930 }, { "epoch": 3.389156968492762, "grad_norm": 15.305093765258789, "learning_rate": 9.66128299744536e-05, "loss": 0.16846277713775634, "step": 11940 }, { "epoch": 3.3919954584161225, "grad_norm": 14.396567344665527, "learning_rate": 9.660999148453024e-05, "loss": 0.1654866099357605, "step": 11950 }, { "epoch": 3.3948339483394836, "grad_norm": 9.350953102111816, "learning_rate": 9.660715299460687e-05, "loss": 0.18241506814956665, "step": 11960 }, { "epoch": 3.3976724382628443, "grad_norm": 12.4446439743042, "learning_rate": 9.660431450468351e-05, "loss": 0.15514891147613524, "step": 11970 }, { "epoch": 3.400510928186205, "grad_norm": 11.974370002746582, "learning_rate": 9.660147601476015e-05, "loss": 0.13358899354934692, "step": 11980 }, { "epoch": 3.4033494181095656, "grad_norm": 6.528241157531738, "learning_rate": 9.65986375248368e-05, "loss": 0.16397557258605958, "step": 11990 }, { "epoch": 3.4061879080329263, "grad_norm": 8.474517822265625, "learning_rate": 9.659579903491342e-05, "loss": 0.11033121347427369, "step": 12000 }, { "epoch": 3.4061879080329263, "eval_accuracy": 0.9119348890443187, "eval_loss": 0.2643587589263916, "eval_runtime": 48.987, "eval_samples_per_second": 321.044, "eval_steps_per_second": 5.022, "step": 12000 }, { "epoch": 3.4090263979562874, "grad_norm": 12.411487579345703, "learning_rate": 9.659296054499008e-05, "loss": 0.14438670873641968, "step": 12010 }, { "epoch": 3.411864887879648, "grad_norm": 8.046468734741211, "learning_rate": 9.65901220550667e-05, "loss": 0.136596941947937, "step": 12020 }, { "epoch": 3.4147033778030087, "grad_norm": 6.668715476989746, "learning_rate": 9.658728356514335e-05, "loss": 0.15121634006500245, "step": 12030 }, { "epoch": 3.41754186772637, "grad_norm": 15.477483749389648, "learning_rate": 9.658444507521999e-05, "loss": 0.15167651176452637, "step": 12040 }, { "epoch": 3.4203803576497305, "grad_norm": 11.307235717773438, "learning_rate": 9.658160658529663e-05, "loss": 0.16462146043777465, "step": 12050 }, { "epoch": 3.423218847573091, "grad_norm": 16.42156219482422, "learning_rate": 9.657876809537326e-05, "loss": 0.1635150909423828, "step": 12060 }, { "epoch": 3.426057337496452, "grad_norm": 15.635255813598633, "learning_rate": 9.657592960544991e-05, "loss": 0.2087777614593506, "step": 12070 }, { "epoch": 3.4288958274198125, "grad_norm": 13.600241661071777, "learning_rate": 9.657309111552655e-05, "loss": 0.13118594884872437, "step": 12080 }, { "epoch": 3.4317343173431736, "grad_norm": 6.2261762619018555, "learning_rate": 9.657025262560318e-05, "loss": 0.11638147830963134, "step": 12090 }, { "epoch": 3.4345728072665342, "grad_norm": 10.106011390686035, "learning_rate": 9.656741413567982e-05, "loss": 0.13335254192352294, "step": 12100 }, { "epoch": 3.437411297189895, "grad_norm": 11.471528053283691, "learning_rate": 9.656457564575646e-05, "loss": 0.1739979386329651, "step": 12110 }, { "epoch": 3.4402497871132556, "grad_norm": 10.41161823272705, "learning_rate": 9.656173715583309e-05, "loss": 0.13894728422164918, "step": 12120 }, { "epoch": 3.4430882770366167, "grad_norm": 5.42951774597168, "learning_rate": 9.655889866590973e-05, "loss": 0.1250569701194763, "step": 12130 }, { "epoch": 3.4459267669599773, "grad_norm": 14.104445457458496, "learning_rate": 9.655606017598639e-05, "loss": 0.15783209800720216, "step": 12140 }, { "epoch": 3.448765256883338, "grad_norm": 11.670680046081543, "learning_rate": 9.655322168606302e-05, "loss": 0.11987025737762451, "step": 12150 }, { "epoch": 3.4516037468066987, "grad_norm": 7.261173248291016, "learning_rate": 9.655038319613966e-05, "loss": 0.12436482906341553, "step": 12160 }, { "epoch": 3.45444223673006, "grad_norm": 10.725381851196289, "learning_rate": 9.65475447062163e-05, "loss": 0.13203428983688353, "step": 12170 }, { "epoch": 3.4572807266534205, "grad_norm": 13.40053653717041, "learning_rate": 9.654470621629294e-05, "loss": 0.15290054082870483, "step": 12180 }, { "epoch": 3.460119216576781, "grad_norm": 5.119351387023926, "learning_rate": 9.654186772636957e-05, "loss": 0.12637677192687988, "step": 12190 }, { "epoch": 3.462957706500142, "grad_norm": 13.761574745178223, "learning_rate": 9.653902923644621e-05, "loss": 0.1636880874633789, "step": 12200 }, { "epoch": 3.4657961964235025, "grad_norm": 10.164427757263184, "learning_rate": 9.653619074652286e-05, "loss": 0.12211843729019164, "step": 12210 }, { "epoch": 3.4686346863468636, "grad_norm": 10.64016342163086, "learning_rate": 9.653335225659949e-05, "loss": 0.12546112537384033, "step": 12220 }, { "epoch": 3.4714731762702242, "grad_norm": 10.817879676818848, "learning_rate": 9.653051376667613e-05, "loss": 0.14835540056228638, "step": 12230 }, { "epoch": 3.474311666193585, "grad_norm": 12.837498664855957, "learning_rate": 9.652767527675277e-05, "loss": 0.092954820394516, "step": 12240 }, { "epoch": 3.477150156116946, "grad_norm": 16.125133514404297, "learning_rate": 9.65248367868294e-05, "loss": 0.10975875854492187, "step": 12250 }, { "epoch": 3.4799886460403067, "grad_norm": 12.137495994567871, "learning_rate": 9.652199829690604e-05, "loss": 0.1667574167251587, "step": 12260 }, { "epoch": 3.4828271359636673, "grad_norm": 12.842817306518555, "learning_rate": 9.65191598069827e-05, "loss": 0.13560017347335815, "step": 12270 }, { "epoch": 3.485665625887028, "grad_norm": 14.219481468200684, "learning_rate": 9.651632131705933e-05, "loss": 0.16405098438262938, "step": 12280 }, { "epoch": 3.4885041158103887, "grad_norm": 9.038618087768555, "learning_rate": 9.651348282713597e-05, "loss": 0.12552303075790405, "step": 12290 }, { "epoch": 3.4913426057337498, "grad_norm": 5.055552005767822, "learning_rate": 9.651064433721261e-05, "loss": 0.12906919717788695, "step": 12300 }, { "epoch": 3.4941810956571104, "grad_norm": 7.11513614654541, "learning_rate": 9.650780584728925e-05, "loss": 0.16867920160293579, "step": 12310 }, { "epoch": 3.497019585580471, "grad_norm": 12.284077644348145, "learning_rate": 9.650496735736588e-05, "loss": 0.18003761768341064, "step": 12320 }, { "epoch": 3.499858075503832, "grad_norm": 14.935282707214355, "learning_rate": 9.650212886744252e-05, "loss": 0.1326635718345642, "step": 12330 }, { "epoch": 3.502696565427193, "grad_norm": 10.751022338867188, "learning_rate": 9.649929037751917e-05, "loss": 0.17290703058242798, "step": 12340 }, { "epoch": 3.5055350553505535, "grad_norm": 9.223835945129395, "learning_rate": 9.64964518875958e-05, "loss": 0.15955530405044555, "step": 12350 }, { "epoch": 3.508373545273914, "grad_norm": 14.708523750305176, "learning_rate": 9.649361339767244e-05, "loss": 0.15324472188949584, "step": 12360 }, { "epoch": 3.511212035197275, "grad_norm": 13.388874053955078, "learning_rate": 9.649077490774909e-05, "loss": 0.18602864742279052, "step": 12370 }, { "epoch": 3.514050525120636, "grad_norm": 6.74630069732666, "learning_rate": 9.648793641782571e-05, "loss": 0.09275709390640259, "step": 12380 }, { "epoch": 3.5168890150439966, "grad_norm": 12.86048412322998, "learning_rate": 9.648509792790235e-05, "loss": 0.12048506736755371, "step": 12390 }, { "epoch": 3.5197275049673573, "grad_norm": 14.406123161315918, "learning_rate": 9.6482259437979e-05, "loss": 0.1484326720237732, "step": 12400 }, { "epoch": 3.522565994890718, "grad_norm": 11.865017890930176, "learning_rate": 9.647942094805564e-05, "loss": 0.19406474828720094, "step": 12410 }, { "epoch": 3.5254044848140786, "grad_norm": 12.934172630310059, "learning_rate": 9.647658245813228e-05, "loss": 0.14454905986785888, "step": 12420 }, { "epoch": 3.5282429747374398, "grad_norm": 6.276732921600342, "learning_rate": 9.647374396820892e-05, "loss": 0.13644802570343018, "step": 12430 }, { "epoch": 3.5310814646608004, "grad_norm": 12.897501945495605, "learning_rate": 9.647090547828556e-05, "loss": 0.12124760150909424, "step": 12440 }, { "epoch": 3.533919954584161, "grad_norm": 10.201253890991211, "learning_rate": 9.646806698836219e-05, "loss": 0.12724809646606444, "step": 12450 }, { "epoch": 3.536758444507522, "grad_norm": 12.802302360534668, "learning_rate": 9.646522849843883e-05, "loss": 0.12096166610717773, "step": 12460 }, { "epoch": 3.539596934430883, "grad_norm": 8.79740047454834, "learning_rate": 9.646239000851549e-05, "loss": 0.10809562206268311, "step": 12470 }, { "epoch": 3.5424354243542435, "grad_norm": 7.564057350158691, "learning_rate": 9.645955151859211e-05, "loss": 0.1411568284034729, "step": 12480 }, { "epoch": 3.545273914277604, "grad_norm": 7.985098361968994, "learning_rate": 9.645671302866875e-05, "loss": 0.13520214557647706, "step": 12490 }, { "epoch": 3.548112404200965, "grad_norm": 8.886762619018555, "learning_rate": 9.64538745387454e-05, "loss": 0.12970778942108155, "step": 12500 }, { "epoch": 3.548112404200965, "eval_accuracy": 0.9035416799135245, "eval_loss": 0.2802216708660126, "eval_runtime": 40.2941, "eval_samples_per_second": 390.305, "eval_steps_per_second": 6.105, "step": 12500 }, { "epoch": 3.550950894124326, "grad_norm": 14.358380317687988, "learning_rate": 9.645103604882202e-05, "loss": 0.16223762035369874, "step": 12510 }, { "epoch": 3.5537893840476866, "grad_norm": 10.07137680053711, "learning_rate": 9.644819755889867e-05, "loss": 0.13493602275848388, "step": 12520 }, { "epoch": 3.5566278739710473, "grad_norm": 22.109006881713867, "learning_rate": 9.644535906897531e-05, "loss": 0.14561716318130494, "step": 12530 }, { "epoch": 3.5594663638944084, "grad_norm": 3.5231778621673584, "learning_rate": 9.644252057905195e-05, "loss": 0.14778690338134765, "step": 12540 }, { "epoch": 3.562304853817769, "grad_norm": 8.588858604431152, "learning_rate": 9.643968208912859e-05, "loss": 0.12953715324401854, "step": 12550 }, { "epoch": 3.5651433437411297, "grad_norm": 7.9311957359313965, "learning_rate": 9.643684359920523e-05, "loss": 0.16757124662399292, "step": 12560 }, { "epoch": 3.5679818336644904, "grad_norm": 4.043559551239014, "learning_rate": 9.643400510928187e-05, "loss": 0.1119659423828125, "step": 12570 }, { "epoch": 3.570820323587851, "grad_norm": 7.296187877655029, "learning_rate": 9.64311666193585e-05, "loss": 0.15805888175964355, "step": 12580 }, { "epoch": 3.573658813511212, "grad_norm": 8.013771057128906, "learning_rate": 9.642832812943514e-05, "loss": 0.1364515542984009, "step": 12590 }, { "epoch": 3.576497303434573, "grad_norm": 8.823838233947754, "learning_rate": 9.642548963951178e-05, "loss": 0.14326345920562744, "step": 12600 }, { "epoch": 3.5793357933579335, "grad_norm": 13.172597885131836, "learning_rate": 9.642265114958842e-05, "loss": 0.1583712577819824, "step": 12610 }, { "epoch": 3.5821742832812946, "grad_norm": 9.35103702545166, "learning_rate": 9.641981265966507e-05, "loss": 0.1317357063293457, "step": 12620 }, { "epoch": 3.5850127732046553, "grad_norm": 12.366024017333984, "learning_rate": 9.641697416974171e-05, "loss": 0.1660614490509033, "step": 12630 }, { "epoch": 3.587851263128016, "grad_norm": 11.392767906188965, "learning_rate": 9.641413567981833e-05, "loss": 0.15276314020156861, "step": 12640 }, { "epoch": 3.5906897530513766, "grad_norm": 10.289289474487305, "learning_rate": 9.641129718989498e-05, "loss": 0.16830862760543824, "step": 12650 }, { "epoch": 3.5935282429747373, "grad_norm": 12.024338722229004, "learning_rate": 9.640845869997162e-05, "loss": 0.12334600687026978, "step": 12660 }, { "epoch": 3.5963667328980984, "grad_norm": 7.976048946380615, "learning_rate": 9.640562021004826e-05, "loss": 0.11718854904174805, "step": 12670 }, { "epoch": 3.599205222821459, "grad_norm": 15.10072135925293, "learning_rate": 9.64027817201249e-05, "loss": 0.13472546339035035, "step": 12680 }, { "epoch": 3.6020437127448197, "grad_norm": 9.130041122436523, "learning_rate": 9.639994323020154e-05, "loss": 0.13344259262084962, "step": 12690 }, { "epoch": 3.6048822026681804, "grad_norm": 10.196064949035645, "learning_rate": 9.639710474027818e-05, "loss": 0.1438470721244812, "step": 12700 }, { "epoch": 3.607720692591541, "grad_norm": 12.85401439666748, "learning_rate": 9.639426625035481e-05, "loss": 0.14456666707992555, "step": 12710 }, { "epoch": 3.610559182514902, "grad_norm": 17.068899154663086, "learning_rate": 9.639142776043145e-05, "loss": 0.16659666299819947, "step": 12720 }, { "epoch": 3.613397672438263, "grad_norm": 15.0862455368042, "learning_rate": 9.63885892705081e-05, "loss": 0.1558910608291626, "step": 12730 }, { "epoch": 3.6162361623616235, "grad_norm": 10.724343299865723, "learning_rate": 9.638575078058474e-05, "loss": 0.11232625246047974, "step": 12740 }, { "epoch": 3.6190746522849846, "grad_norm": 8.526631355285645, "learning_rate": 9.638291229066138e-05, "loss": 0.11282163858413696, "step": 12750 }, { "epoch": 3.6219131422083453, "grad_norm": 7.790524959564209, "learning_rate": 9.638007380073802e-05, "loss": 0.12941310405731202, "step": 12760 }, { "epoch": 3.624751632131706, "grad_norm": 8.933027267456055, "learning_rate": 9.637723531081465e-05, "loss": 0.15615637302398683, "step": 12770 }, { "epoch": 3.6275901220550666, "grad_norm": 8.623051643371582, "learning_rate": 9.637439682089129e-05, "loss": 0.13030364513397216, "step": 12780 }, { "epoch": 3.6304286119784273, "grad_norm": 4.148929119110107, "learning_rate": 9.637155833096793e-05, "loss": 0.10451631546020508, "step": 12790 }, { "epoch": 3.6332671019017884, "grad_norm": 10.418755531311035, "learning_rate": 9.636871984104457e-05, "loss": 0.1058707594871521, "step": 12800 }, { "epoch": 3.636105591825149, "grad_norm": 14.228039741516113, "learning_rate": 9.636588135112121e-05, "loss": 0.14562081098556517, "step": 12810 }, { "epoch": 3.6389440817485097, "grad_norm": 21.89490509033203, "learning_rate": 9.636304286119785e-05, "loss": 0.18454208374023437, "step": 12820 }, { "epoch": 3.641782571671871, "grad_norm": 10.988327026367188, "learning_rate": 9.63602043712745e-05, "loss": 0.1800578474998474, "step": 12830 }, { "epoch": 3.6446210615952315, "grad_norm": 9.786598205566406, "learning_rate": 9.635736588135112e-05, "loss": 0.14112477302551268, "step": 12840 }, { "epoch": 3.647459551518592, "grad_norm": 8.201181411743164, "learning_rate": 9.635452739142776e-05, "loss": 0.13922977447509766, "step": 12850 }, { "epoch": 3.650298041441953, "grad_norm": 11.358736038208008, "learning_rate": 9.63516889015044e-05, "loss": 0.13420796394348145, "step": 12860 }, { "epoch": 3.6531365313653135, "grad_norm": 12.910623550415039, "learning_rate": 9.634885041158105e-05, "loss": 0.1824745535850525, "step": 12870 }, { "epoch": 3.6559750212886746, "grad_norm": 13.174339294433594, "learning_rate": 9.634601192165769e-05, "loss": 0.15212289094924927, "step": 12880 }, { "epoch": 3.6588135112120352, "grad_norm": 9.043191909790039, "learning_rate": 9.634317343173433e-05, "loss": 0.12412445545196533, "step": 12890 }, { "epoch": 3.661652001135396, "grad_norm": 7.539735317230225, "learning_rate": 9.634033494181096e-05, "loss": 0.11652603149414062, "step": 12900 }, { "epoch": 3.664490491058757, "grad_norm": 5.98252010345459, "learning_rate": 9.63374964518876e-05, "loss": 0.12573798894882202, "step": 12910 }, { "epoch": 3.6673289809821172, "grad_norm": 12.798454284667969, "learning_rate": 9.633465796196424e-05, "loss": 0.11952928304672242, "step": 12920 }, { "epoch": 3.6701674709054783, "grad_norm": 13.444751739501953, "learning_rate": 9.633181947204088e-05, "loss": 0.09641913771629333, "step": 12930 }, { "epoch": 3.673005960828839, "grad_norm": 11.645222663879395, "learning_rate": 9.632898098211752e-05, "loss": 0.12786115407943727, "step": 12940 }, { "epoch": 3.6758444507521997, "grad_norm": 5.454982757568359, "learning_rate": 9.632614249219416e-05, "loss": 0.10404405593872071, "step": 12950 }, { "epoch": 3.678682940675561, "grad_norm": 6.196654796600342, "learning_rate": 9.632330400227079e-05, "loss": 0.14941078424453735, "step": 12960 }, { "epoch": 3.6815214305989215, "grad_norm": 10.651656150817871, "learning_rate": 9.632046551234743e-05, "loss": 0.12012120485305786, "step": 12970 }, { "epoch": 3.684359920522282, "grad_norm": 6.000016689300537, "learning_rate": 9.631762702242407e-05, "loss": 0.1179958462715149, "step": 12980 }, { "epoch": 3.687198410445643, "grad_norm": 16.31242561340332, "learning_rate": 9.631478853250072e-05, "loss": 0.19535436630249023, "step": 12990 }, { "epoch": 3.6900369003690034, "grad_norm": 7.7373366355896, "learning_rate": 9.631195004257734e-05, "loss": 0.1359477996826172, "step": 13000 }, { "epoch": 3.6900369003690034, "eval_accuracy": 0.9204552680104279, "eval_loss": 0.23924162983894348, "eval_runtime": 43.7632, "eval_samples_per_second": 359.366, "eval_steps_per_second": 5.621, "step": 13000 }, { "epoch": 3.6928753902923646, "grad_norm": 13.128469467163086, "learning_rate": 9.6309111552654e-05, "loss": 0.13705849647521973, "step": 13010 }, { "epoch": 3.6957138802157252, "grad_norm": 4.269360065460205, "learning_rate": 9.630627306273064e-05, "loss": 0.151450514793396, "step": 13020 }, { "epoch": 3.698552370139086, "grad_norm": 17.193418502807617, "learning_rate": 9.630343457280727e-05, "loss": 0.1382644534111023, "step": 13030 }, { "epoch": 3.701390860062447, "grad_norm": 16.792936325073242, "learning_rate": 9.630059608288391e-05, "loss": 0.15913288593292235, "step": 13040 }, { "epoch": 3.7042293499858077, "grad_norm": 7.32480525970459, "learning_rate": 9.629775759296055e-05, "loss": 0.11819169521331788, "step": 13050 }, { "epoch": 3.7070678399091683, "grad_norm": 11.53783130645752, "learning_rate": 9.629491910303718e-05, "loss": 0.1410749673843384, "step": 13060 }, { "epoch": 3.709906329832529, "grad_norm": 11.016429901123047, "learning_rate": 9.629208061311383e-05, "loss": 0.08910006284713745, "step": 13070 }, { "epoch": 3.7127448197558897, "grad_norm": 8.25999641418457, "learning_rate": 9.628924212319047e-05, "loss": 0.12432101964950562, "step": 13080 }, { "epoch": 3.7155833096792508, "grad_norm": 9.341691017150879, "learning_rate": 9.62864036332671e-05, "loss": 0.1892438054084778, "step": 13090 }, { "epoch": 3.7184217996026114, "grad_norm": 11.550322532653809, "learning_rate": 9.628356514334374e-05, "loss": 0.14536328315734864, "step": 13100 }, { "epoch": 3.721260289525972, "grad_norm": 12.449294090270996, "learning_rate": 9.628072665342038e-05, "loss": 0.09549451470375062, "step": 13110 }, { "epoch": 3.724098779449333, "grad_norm": 11.374482154846191, "learning_rate": 9.627788816349703e-05, "loss": 0.14721710681915284, "step": 13120 }, { "epoch": 3.726937269372694, "grad_norm": 8.726109504699707, "learning_rate": 9.627504967357365e-05, "loss": 0.11882315874099732, "step": 13130 }, { "epoch": 3.7297757592960545, "grad_norm": 7.35357141494751, "learning_rate": 9.627221118365031e-05, "loss": 0.123272705078125, "step": 13140 }, { "epoch": 3.732614249219415, "grad_norm": 8.318432807922363, "learning_rate": 9.626937269372695e-05, "loss": 0.1153672218322754, "step": 13150 }, { "epoch": 3.735452739142776, "grad_norm": 7.076030731201172, "learning_rate": 9.626653420380358e-05, "loss": 0.14233273267745972, "step": 13160 }, { "epoch": 3.738291229066137, "grad_norm": 11.465832710266113, "learning_rate": 9.626369571388022e-05, "loss": 0.13837801218032836, "step": 13170 }, { "epoch": 3.7411297189894976, "grad_norm": 13.135468482971191, "learning_rate": 9.626085722395686e-05, "loss": 0.1558758497238159, "step": 13180 }, { "epoch": 3.7439682089128583, "grad_norm": 11.968530654907227, "learning_rate": 9.625801873403349e-05, "loss": 0.16677323579788209, "step": 13190 }, { "epoch": 3.746806698836219, "grad_norm": 11.770331382751465, "learning_rate": 9.625518024411014e-05, "loss": 0.12444937229156494, "step": 13200 }, { "epoch": 3.7496451887595796, "grad_norm": 13.442785263061523, "learning_rate": 9.625234175418678e-05, "loss": 0.1786947727203369, "step": 13210 }, { "epoch": 3.7524836786829407, "grad_norm": 12.296490669250488, "learning_rate": 9.624950326426341e-05, "loss": 0.1548254370689392, "step": 13220 }, { "epoch": 3.7553221686063014, "grad_norm": 9.341365814208984, "learning_rate": 9.624666477434005e-05, "loss": 0.12017190456390381, "step": 13230 }, { "epoch": 3.758160658529662, "grad_norm": 7.341797351837158, "learning_rate": 9.624411013340902e-05, "loss": 0.16488142013549806, "step": 13240 }, { "epoch": 3.760999148453023, "grad_norm": 5.137293815612793, "learning_rate": 9.624127164348568e-05, "loss": 0.1583306908607483, "step": 13250 }, { "epoch": 3.763837638376384, "grad_norm": 12.23542594909668, "learning_rate": 9.623843315356232e-05, "loss": 0.1462108612060547, "step": 13260 }, { "epoch": 3.7666761282997445, "grad_norm": 6.2007527351379395, "learning_rate": 9.623559466363894e-05, "loss": 0.09977462291717529, "step": 13270 }, { "epoch": 3.769514618223105, "grad_norm": 10.17895221710205, "learning_rate": 9.623275617371559e-05, "loss": 0.12059725522994995, "step": 13280 }, { "epoch": 3.772353108146466, "grad_norm": 14.211423873901367, "learning_rate": 9.622991768379223e-05, "loss": 0.1303005337715149, "step": 13290 }, { "epoch": 3.775191598069827, "grad_norm": 9.772232055664062, "learning_rate": 9.622707919386887e-05, "loss": 0.14880988597869874, "step": 13300 }, { "epoch": 3.7780300879931876, "grad_norm": 13.374626159667969, "learning_rate": 9.62242407039455e-05, "loss": 0.1577385902404785, "step": 13310 }, { "epoch": 3.7808685779165483, "grad_norm": 8.198537826538086, "learning_rate": 9.622140221402215e-05, "loss": 0.10466568470001221, "step": 13320 }, { "epoch": 3.7837070678399094, "grad_norm": 5.434967994689941, "learning_rate": 9.621856372409879e-05, "loss": 0.11640899181365967, "step": 13330 }, { "epoch": 3.78654555776327, "grad_norm": 6.071686744689941, "learning_rate": 9.621572523417542e-05, "loss": 0.11794052124023438, "step": 13340 }, { "epoch": 3.7893840476866307, "grad_norm": 9.97398567199707, "learning_rate": 9.621288674425206e-05, "loss": 0.15355685949325562, "step": 13350 }, { "epoch": 3.7922225376099914, "grad_norm": 12.7194185256958, "learning_rate": 9.62100482543287e-05, "loss": 0.1407650351524353, "step": 13360 }, { "epoch": 3.795061027533352, "grad_norm": 16.020071029663086, "learning_rate": 9.620720976440533e-05, "loss": 0.17545218467712403, "step": 13370 }, { "epoch": 3.797899517456713, "grad_norm": 9.311351776123047, "learning_rate": 9.620437127448197e-05, "loss": 0.11551628112792969, "step": 13380 }, { "epoch": 3.800738007380074, "grad_norm": 12.669973373413086, "learning_rate": 9.620153278455863e-05, "loss": 0.1381611704826355, "step": 13390 }, { "epoch": 3.8035764973034345, "grad_norm": 11.55171012878418, "learning_rate": 9.619869429463526e-05, "loss": 0.12705569267272948, "step": 13400 }, { "epoch": 3.8064149872267956, "grad_norm": 8.563316345214844, "learning_rate": 9.61958558047119e-05, "loss": 0.1359581708908081, "step": 13410 }, { "epoch": 3.8092534771501563, "grad_norm": 24.51914405822754, "learning_rate": 9.619301731478854e-05, "loss": 0.13123935461044312, "step": 13420 }, { "epoch": 3.812091967073517, "grad_norm": 14.923455238342285, "learning_rate": 9.619017882486518e-05, "loss": 0.13299981355667115, "step": 13430 }, { "epoch": 3.8149304569968776, "grad_norm": 9.67190170288086, "learning_rate": 9.618734033494181e-05, "loss": 0.10709612369537354, "step": 13440 }, { "epoch": 3.8177689469202383, "grad_norm": 7.92209005355835, "learning_rate": 9.618450184501846e-05, "loss": 0.14101274013519288, "step": 13450 }, { "epoch": 3.8206074368435994, "grad_norm": 14.323643684387207, "learning_rate": 9.61816633550951e-05, "loss": 0.15004523992538452, "step": 13460 }, { "epoch": 3.82344592676696, "grad_norm": 9.850910186767578, "learning_rate": 9.617882486517173e-05, "loss": 0.15016478300094604, "step": 13470 }, { "epoch": 3.8262844166903207, "grad_norm": 12.276317596435547, "learning_rate": 9.617598637524837e-05, "loss": 0.17407671213150025, "step": 13480 }, { "epoch": 3.8291229066136814, "grad_norm": 13.975410461425781, "learning_rate": 9.617314788532501e-05, "loss": 0.1120498776435852, "step": 13490 }, { "epoch": 3.831961396537042, "grad_norm": 10.185418128967285, "learning_rate": 9.617030939540164e-05, "loss": 0.1282976508140564, "step": 13500 }, { "epoch": 3.831961396537042, "eval_accuracy": 0.916640172950976, "eval_loss": 0.2500058114528656, "eval_runtime": 36.7391, "eval_samples_per_second": 428.073, "eval_steps_per_second": 6.696, "step": 13500 }, { "epoch": 3.834799886460403, "grad_norm": 3.4010097980499268, "learning_rate": 9.616747090547828e-05, "loss": 0.12859276533126832, "step": 13510 }, { "epoch": 3.837638376383764, "grad_norm": 11.004711151123047, "learning_rate": 9.616463241555494e-05, "loss": 0.11757502555847169, "step": 13520 }, { "epoch": 3.8404768663071245, "grad_norm": 9.052450180053711, "learning_rate": 9.616179392563157e-05, "loss": 0.11572097539901734, "step": 13530 }, { "epoch": 3.8433153562304856, "grad_norm": 11.217466354370117, "learning_rate": 9.615895543570821e-05, "loss": 0.12781227827072145, "step": 13540 }, { "epoch": 3.8461538461538463, "grad_norm": 10.035443305969238, "learning_rate": 9.615611694578485e-05, "loss": 0.10720311403274536, "step": 13550 }, { "epoch": 3.848992336077207, "grad_norm": 9.492477416992188, "learning_rate": 9.615327845586149e-05, "loss": 0.18364393711090088, "step": 13560 }, { "epoch": 3.8518308260005676, "grad_norm": 6.851222991943359, "learning_rate": 9.615043996593812e-05, "loss": 0.09978563785552978, "step": 13570 }, { "epoch": 3.8546693159239283, "grad_norm": 14.143752098083496, "learning_rate": 9.614760147601477e-05, "loss": 0.12027443647384643, "step": 13580 }, { "epoch": 3.8575078058472894, "grad_norm": 10.357497215270996, "learning_rate": 9.614476298609141e-05, "loss": 0.12287615537643433, "step": 13590 }, { "epoch": 3.86034629577065, "grad_norm": 11.030355453491211, "learning_rate": 9.614192449616804e-05, "loss": 0.13575853109359742, "step": 13600 }, { "epoch": 3.8631847856940107, "grad_norm": 11.74402904510498, "learning_rate": 9.613908600624468e-05, "loss": 0.14983344078063965, "step": 13610 }, { "epoch": 3.866023275617372, "grad_norm": 12.32616901397705, "learning_rate": 9.613624751632132e-05, "loss": 0.12064799070358276, "step": 13620 }, { "epoch": 3.8688617655407325, "grad_norm": 15.7556734085083, "learning_rate": 9.613340902639795e-05, "loss": 0.12453874349594116, "step": 13630 }, { "epoch": 3.871700255464093, "grad_norm": 5.788139820098877, "learning_rate": 9.61305705364746e-05, "loss": 0.10892635583877563, "step": 13640 }, { "epoch": 3.874538745387454, "grad_norm": 8.357176780700684, "learning_rate": 9.612773204655125e-05, "loss": 0.11689653396606445, "step": 13650 }, { "epoch": 3.8773772353108145, "grad_norm": 11.559595108032227, "learning_rate": 9.612489355662788e-05, "loss": 0.13534941673278808, "step": 13660 }, { "epoch": 3.8802157252341756, "grad_norm": 7.657944202423096, "learning_rate": 9.612205506670452e-05, "loss": 0.14152661561965943, "step": 13670 }, { "epoch": 3.8830542151575362, "grad_norm": 10.012310981750488, "learning_rate": 9.611921657678116e-05, "loss": 0.1328120231628418, "step": 13680 }, { "epoch": 3.885892705080897, "grad_norm": 12.822077751159668, "learning_rate": 9.61163780868578e-05, "loss": 0.19572813510894777, "step": 13690 }, { "epoch": 3.888731195004258, "grad_norm": 8.302614212036133, "learning_rate": 9.611353959693443e-05, "loss": 0.11375526189804078, "step": 13700 }, { "epoch": 3.8915696849276182, "grad_norm": 9.149529457092285, "learning_rate": 9.611070110701107e-05, "loss": 0.11319116353988648, "step": 13710 }, { "epoch": 3.8944081748509793, "grad_norm": 11.06827449798584, "learning_rate": 9.610786261708773e-05, "loss": 0.13225321769714354, "step": 13720 }, { "epoch": 3.89724666477434, "grad_norm": 8.413561820983887, "learning_rate": 9.610502412716435e-05, "loss": 0.09244733452796935, "step": 13730 }, { "epoch": 3.9000851546977007, "grad_norm": 12.452582359313965, "learning_rate": 9.6102185637241e-05, "loss": 0.15070028305053712, "step": 13740 }, { "epoch": 3.902923644621062, "grad_norm": 8.5281343460083, "learning_rate": 9.609934714731764e-05, "loss": 0.09001613259315491, "step": 13750 }, { "epoch": 3.9057621345444224, "grad_norm": 10.673900604248047, "learning_rate": 9.609650865739426e-05, "loss": 0.12970414161682128, "step": 13760 }, { "epoch": 3.908600624467783, "grad_norm": 5.440392971038818, "learning_rate": 9.60936701674709e-05, "loss": 0.1380963444709778, "step": 13770 }, { "epoch": 3.911439114391144, "grad_norm": 7.268813133239746, "learning_rate": 9.609083167754756e-05, "loss": 0.12927582263946533, "step": 13780 }, { "epoch": 3.9142776043145044, "grad_norm": 5.837165832519531, "learning_rate": 9.608799318762419e-05, "loss": 0.15271462202072145, "step": 13790 }, { "epoch": 3.9171160942378656, "grad_norm": 12.03630256652832, "learning_rate": 9.608515469770083e-05, "loss": 0.19636542797088624, "step": 13800 }, { "epoch": 3.919954584161226, "grad_norm": 16.867380142211914, "learning_rate": 9.608231620777747e-05, "loss": 0.1506428003311157, "step": 13810 }, { "epoch": 3.922793074084587, "grad_norm": 19.567331314086914, "learning_rate": 9.607947771785411e-05, "loss": 0.11044533252716064, "step": 13820 }, { "epoch": 3.925631564007948, "grad_norm": 12.904621124267578, "learning_rate": 9.607663922793074e-05, "loss": 0.14581849575042724, "step": 13830 }, { "epoch": 3.9284700539313087, "grad_norm": 7.585264205932617, "learning_rate": 9.607380073800738e-05, "loss": 0.1511662483215332, "step": 13840 }, { "epoch": 3.9313085438546693, "grad_norm": 10.484241485595703, "learning_rate": 9.607096224808402e-05, "loss": 0.10768641233444214, "step": 13850 }, { "epoch": 3.93414703377803, "grad_norm": 8.28950023651123, "learning_rate": 9.606812375816066e-05, "loss": 0.15528329610824584, "step": 13860 }, { "epoch": 3.9369855237013907, "grad_norm": 5.430914402008057, "learning_rate": 9.60652852682373e-05, "loss": 0.10620771646499634, "step": 13870 }, { "epoch": 3.9398240136247518, "grad_norm": 12.264892578125, "learning_rate": 9.606244677831395e-05, "loss": 0.14809378385543823, "step": 13880 }, { "epoch": 3.9426625035481124, "grad_norm": 5.505810737609863, "learning_rate": 9.605960828839057e-05, "loss": 0.13955754041671753, "step": 13890 }, { "epoch": 3.945500993471473, "grad_norm": 7.392472743988037, "learning_rate": 9.605676979846722e-05, "loss": 0.12620739936828612, "step": 13900 }, { "epoch": 3.948339483394834, "grad_norm": 6.124483108520508, "learning_rate": 9.605393130854386e-05, "loss": 0.1296523928642273, "step": 13910 }, { "epoch": 3.951177973318195, "grad_norm": 12.68109130859375, "learning_rate": 9.60510928186205e-05, "loss": 0.13769983053207396, "step": 13920 }, { "epoch": 3.9540164632415555, "grad_norm": 5.922175407409668, "learning_rate": 9.604825432869714e-05, "loss": 0.1507207989692688, "step": 13930 }, { "epoch": 3.956854953164916, "grad_norm": 9.0308256149292, "learning_rate": 9.604541583877378e-05, "loss": 0.12400763034820557, "step": 13940 }, { "epoch": 3.959693443088277, "grad_norm": 13.728779792785645, "learning_rate": 9.604257734885041e-05, "loss": 0.13627406358718872, "step": 13950 }, { "epoch": 3.962531933011638, "grad_norm": 4.769062519073486, "learning_rate": 9.603973885892705e-05, "loss": 0.1264578104019165, "step": 13960 }, { "epoch": 3.9653704229349986, "grad_norm": 3.5640993118286133, "learning_rate": 9.603690036900369e-05, "loss": 0.15145952701568605, "step": 13970 }, { "epoch": 3.9682089128583593, "grad_norm": 14.57795524597168, "learning_rate": 9.603406187908033e-05, "loss": 0.14383717775344848, "step": 13980 }, { "epoch": 3.97104740278172, "grad_norm": 13.673648834228516, "learning_rate": 9.603122338915697e-05, "loss": 0.14478977918624877, "step": 13990 }, { "epoch": 3.9738858927050806, "grad_norm": 9.978239059448242, "learning_rate": 9.602838489923362e-05, "loss": 0.1557931661605835, "step": 14000 }, { "epoch": 3.9738858927050806, "eval_accuracy": 0.9202645132574553, "eval_loss": 0.23880994319915771, "eval_runtime": 40.4117, "eval_samples_per_second": 389.17, "eval_steps_per_second": 6.087, "step": 14000 }, { "epoch": 3.9767243826284417, "grad_norm": 18.19049072265625, "learning_rate": 9.602554640931026e-05, "loss": 0.13450791835784912, "step": 14010 }, { "epoch": 3.9795628725518024, "grad_norm": 9.949593544006348, "learning_rate": 9.602270791938688e-05, "loss": 0.14858608245849608, "step": 14020 }, { "epoch": 3.982401362475163, "grad_norm": 13.145724296569824, "learning_rate": 9.601986942946353e-05, "loss": 0.13436529636383057, "step": 14030 }, { "epoch": 3.985239852398524, "grad_norm": 15.63553524017334, "learning_rate": 9.601703093954017e-05, "loss": 0.17865006923675536, "step": 14040 }, { "epoch": 3.988078342321885, "grad_norm": 11.877776145935059, "learning_rate": 9.601419244961681e-05, "loss": 0.13950564861297607, "step": 14050 }, { "epoch": 3.9909168322452455, "grad_norm": 15.486270904541016, "learning_rate": 9.601135395969345e-05, "loss": 0.17712441682815552, "step": 14060 }, { "epoch": 3.993755322168606, "grad_norm": 13.327706336975098, "learning_rate": 9.600851546977009e-05, "loss": 0.17885204553604125, "step": 14070 }, { "epoch": 3.996593812091967, "grad_norm": 4.493618965148926, "learning_rate": 9.600567697984672e-05, "loss": 0.07905619144439698, "step": 14080 }, { "epoch": 3.999432302015328, "grad_norm": 10.75156021118164, "learning_rate": 9.600283848992336e-05, "loss": 0.11969027519226075, "step": 14090 }, { "epoch": 4.002270791938688, "grad_norm": 8.150691032409668, "learning_rate": 9.6e-05, "loss": 0.1255323052406311, "step": 14100 }, { "epoch": 4.005109281862049, "grad_norm": 4.878571033477783, "learning_rate": 9.599716151007664e-05, "loss": 0.09251973628997803, "step": 14110 }, { "epoch": 4.00794777178541, "grad_norm": 13.427812576293945, "learning_rate": 9.599432302015329e-05, "loss": 0.10075161457061768, "step": 14120 }, { "epoch": 4.010786261708771, "grad_norm": 16.238224029541016, "learning_rate": 9.599148453022993e-05, "loss": 0.102085280418396, "step": 14130 }, { "epoch": 4.013624751632132, "grad_norm": 9.057024955749512, "learning_rate": 9.598864604030657e-05, "loss": 0.09474047422409057, "step": 14140 }, { "epoch": 4.016463241555493, "grad_norm": 10.832261085510254, "learning_rate": 9.59858075503832e-05, "loss": 0.10924746990203857, "step": 14150 }, { "epoch": 4.019301731478853, "grad_norm": 9.383221626281738, "learning_rate": 9.598296906045984e-05, "loss": 0.06316727995872498, "step": 14160 }, { "epoch": 4.022140221402214, "grad_norm": 7.136850833892822, "learning_rate": 9.598013057053648e-05, "loss": 0.10823802947998047, "step": 14170 }, { "epoch": 4.024978711325574, "grad_norm": 7.814727783203125, "learning_rate": 9.597729208061312e-05, "loss": 0.13547511100769044, "step": 14180 }, { "epoch": 4.0278172012489355, "grad_norm": 16.021039962768555, "learning_rate": 9.597445359068976e-05, "loss": 0.13839480876922608, "step": 14190 }, { "epoch": 4.030655691172297, "grad_norm": 10.43539047241211, "learning_rate": 9.59716151007664e-05, "loss": 0.08196156620979309, "step": 14200 }, { "epoch": 4.033494181095657, "grad_norm": 4.834102153778076, "learning_rate": 9.596877661084303e-05, "loss": 0.11097515821456909, "step": 14210 }, { "epoch": 4.036332671019018, "grad_norm": 7.360057353973389, "learning_rate": 9.596593812091967e-05, "loss": 0.11906614303588867, "step": 14220 }, { "epoch": 4.039171160942379, "grad_norm": 3.9440691471099854, "learning_rate": 9.596309963099631e-05, "loss": 0.09003963470458984, "step": 14230 }, { "epoch": 4.042009650865739, "grad_norm": 10.266974449157715, "learning_rate": 9.596026114107295e-05, "loss": 0.11627607345581055, "step": 14240 }, { "epoch": 4.0448481407891, "grad_norm": 10.75854206085205, "learning_rate": 9.59574226511496e-05, "loss": 0.12331702709197997, "step": 14250 }, { "epoch": 4.047686630712461, "grad_norm": 5.659377574920654, "learning_rate": 9.595458416122624e-05, "loss": 0.08914806246757508, "step": 14260 }, { "epoch": 4.050525120635822, "grad_norm": 8.639955520629883, "learning_rate": 9.595174567130288e-05, "loss": 0.10007381439208984, "step": 14270 }, { "epoch": 4.053363610559183, "grad_norm": 12.293832778930664, "learning_rate": 9.59489071813795e-05, "loss": 0.12101688385009765, "step": 14280 }, { "epoch": 4.056202100482543, "grad_norm": 18.912532806396484, "learning_rate": 9.594606869145615e-05, "loss": 0.12863388061523437, "step": 14290 }, { "epoch": 4.059040590405904, "grad_norm": 14.071619987487793, "learning_rate": 9.594323020153279e-05, "loss": 0.09669498205184937, "step": 14300 }, { "epoch": 4.061879080329265, "grad_norm": 9.622110366821289, "learning_rate": 9.594039171160942e-05, "loss": 0.08368233442306519, "step": 14310 }, { "epoch": 4.0647175702526255, "grad_norm": 11.370525360107422, "learning_rate": 9.593755322168607e-05, "loss": 0.1263554573059082, "step": 14320 }, { "epoch": 4.067556060175987, "grad_norm": 8.008711814880371, "learning_rate": 9.593471473176271e-05, "loss": 0.10123660564422607, "step": 14330 }, { "epoch": 4.070394550099347, "grad_norm": 5.035618305206299, "learning_rate": 9.593187624183934e-05, "loss": 0.09410436153411865, "step": 14340 }, { "epoch": 4.073233040022708, "grad_norm": 7.120819568634033, "learning_rate": 9.592903775191598e-05, "loss": 0.09953033328056335, "step": 14350 }, { "epoch": 4.076071529946069, "grad_norm": 3.1404309272766113, "learning_rate": 9.592619926199262e-05, "loss": 0.09761457443237305, "step": 14360 }, { "epoch": 4.078910019869429, "grad_norm": 10.691572189331055, "learning_rate": 9.592336077206927e-05, "loss": 0.08937427997589112, "step": 14370 }, { "epoch": 4.08174850979279, "grad_norm": 4.949476718902588, "learning_rate": 9.59205222821459e-05, "loss": 0.08243495225906372, "step": 14380 }, { "epoch": 4.084586999716151, "grad_norm": 3.1258387565612793, "learning_rate": 9.591768379222255e-05, "loss": 0.107956063747406, "step": 14390 }, { "epoch": 4.087425489639512, "grad_norm": 11.155573844909668, "learning_rate": 9.591484530229919e-05, "loss": 0.11006866693496704, "step": 14400 }, { "epoch": 4.090263979562873, "grad_norm": 7.246969699859619, "learning_rate": 9.591200681237582e-05, "loss": 0.1284692406654358, "step": 14410 }, { "epoch": 4.093102469486233, "grad_norm": 10.234097480773926, "learning_rate": 9.590916832245246e-05, "loss": 0.11945974826812744, "step": 14420 }, { "epoch": 4.095940959409594, "grad_norm": 6.303606033325195, "learning_rate": 9.59063298325291e-05, "loss": 0.12363855838775635, "step": 14430 }, { "epoch": 4.098779449332955, "grad_norm": 8.154223442077637, "learning_rate": 9.590349134260573e-05, "loss": 0.08484888672828675, "step": 14440 }, { "epoch": 4.1016179392563155, "grad_norm": 6.062149524688721, "learning_rate": 9.590065285268238e-05, "loss": 0.08848806023597718, "step": 14450 }, { "epoch": 4.104456429179677, "grad_norm": 3.6650664806365967, "learning_rate": 9.589781436275902e-05, "loss": 0.07113722562789918, "step": 14460 }, { "epoch": 4.107294919103037, "grad_norm": 13.319270133972168, "learning_rate": 9.589497587283565e-05, "loss": 0.12020624876022339, "step": 14470 }, { "epoch": 4.110133409026398, "grad_norm": 8.548646926879883, "learning_rate": 9.58921373829123e-05, "loss": 0.1243698239326477, "step": 14480 }, { "epoch": 4.112971898949759, "grad_norm": 10.457490921020508, "learning_rate": 9.588929889298893e-05, "loss": 0.12181462049484253, "step": 14490 }, { "epoch": 4.115810388873119, "grad_norm": 3.7827720642089844, "learning_rate": 9.588646040306558e-05, "loss": 0.11501580476760864, "step": 14500 }, { "epoch": 4.115810388873119, "eval_accuracy": 0.9259235709289756, "eval_loss": 0.2222551852464676, "eval_runtime": 46.0534, "eval_samples_per_second": 341.495, "eval_steps_per_second": 5.342, "step": 14500 }, { "epoch": 4.11864887879648, "grad_norm": 6.77955961227417, "learning_rate": 9.58836219131422e-05, "loss": 0.0998927891254425, "step": 14510 }, { "epoch": 4.1214873687198414, "grad_norm": 9.285117149353027, "learning_rate": 9.588078342321886e-05, "loss": 0.09262888431549073, "step": 14520 }, { "epoch": 4.124325858643202, "grad_norm": 13.294258117675781, "learning_rate": 9.58779449332955e-05, "loss": 0.12045708894729615, "step": 14530 }, { "epoch": 4.127164348566563, "grad_norm": 3.119527578353882, "learning_rate": 9.587510644337213e-05, "loss": 0.07593669891357421, "step": 14540 }, { "epoch": 4.130002838489923, "grad_norm": 3.950878620147705, "learning_rate": 9.587226795344877e-05, "loss": 0.09898312091827392, "step": 14550 }, { "epoch": 4.132841328413284, "grad_norm": 11.092055320739746, "learning_rate": 9.586942946352541e-05, "loss": 0.13109955787658692, "step": 14560 }, { "epoch": 4.135679818336645, "grad_norm": 9.894498825073242, "learning_rate": 9.586659097360204e-05, "loss": 0.10246489048004151, "step": 14570 }, { "epoch": 4.138518308260005, "grad_norm": 12.938497543334961, "learning_rate": 9.58637524836787e-05, "loss": 0.11322982311248779, "step": 14580 }, { "epoch": 4.1413567981833665, "grad_norm": 10.294341087341309, "learning_rate": 9.586091399375533e-05, "loss": 0.1258512020111084, "step": 14590 }, { "epoch": 4.144195288106728, "grad_norm": 17.97700309753418, "learning_rate": 9.585807550383196e-05, "loss": 0.11007544994354249, "step": 14600 }, { "epoch": 4.147033778030088, "grad_norm": 11.919865608215332, "learning_rate": 9.58552370139086e-05, "loss": 0.13854563236236572, "step": 14610 }, { "epoch": 4.149872267953449, "grad_norm": 7.199741363525391, "learning_rate": 9.585239852398525e-05, "loss": 0.1072045087814331, "step": 14620 }, { "epoch": 4.152710757876809, "grad_norm": 11.083148956298828, "learning_rate": 9.584956003406189e-05, "loss": 0.11457937955856323, "step": 14630 }, { "epoch": 4.15554924780017, "grad_norm": 7.713132858276367, "learning_rate": 9.584672154413851e-05, "loss": 0.08804317712783813, "step": 14640 }, { "epoch": 4.158387737723531, "grad_norm": 8.071778297424316, "learning_rate": 9.584388305421517e-05, "loss": 0.10408281087875366, "step": 14650 }, { "epoch": 4.161226227646892, "grad_norm": 7.58452033996582, "learning_rate": 9.584104456429181e-05, "loss": 0.11020106077194214, "step": 14660 }, { "epoch": 4.164064717570253, "grad_norm": 4.063427925109863, "learning_rate": 9.583820607436844e-05, "loss": 0.09614802598953247, "step": 14670 }, { "epoch": 4.166903207493613, "grad_norm": 11.892534255981445, "learning_rate": 9.583536758444508e-05, "loss": 0.08040065169334412, "step": 14680 }, { "epoch": 4.169741697416974, "grad_norm": 17.587987899780273, "learning_rate": 9.583252909452172e-05, "loss": 0.09476807713508606, "step": 14690 }, { "epoch": 4.172580187340335, "grad_norm": 6.535255432128906, "learning_rate": 9.582969060459835e-05, "loss": 0.10618381500244141, "step": 14700 }, { "epoch": 4.175418677263695, "grad_norm": 4.665090560913086, "learning_rate": 9.582685211467499e-05, "loss": 0.09169800877571106, "step": 14710 }, { "epoch": 4.1782571671870565, "grad_norm": 12.078035354614258, "learning_rate": 9.582401362475165e-05, "loss": 0.10968313217163086, "step": 14720 }, { "epoch": 4.181095657110418, "grad_norm": 1.8821532726287842, "learning_rate": 9.582117513482827e-05, "loss": 0.10320936441421509, "step": 14730 }, { "epoch": 4.183934147033778, "grad_norm": 10.412105560302734, "learning_rate": 9.581833664490491e-05, "loss": 0.1313454508781433, "step": 14740 }, { "epoch": 4.186772636957139, "grad_norm": 13.401899337768555, "learning_rate": 9.581549815498156e-05, "loss": 0.09337538480758667, "step": 14750 }, { "epoch": 4.189611126880499, "grad_norm": 8.803839683532715, "learning_rate": 9.58126596650582e-05, "loss": 0.09171583652496337, "step": 14760 }, { "epoch": 4.19244961680386, "grad_norm": 6.475306510925293, "learning_rate": 9.580982117513483e-05, "loss": 0.10607587099075318, "step": 14770 }, { "epoch": 4.195288106727221, "grad_norm": 10.158600807189941, "learning_rate": 9.580698268521148e-05, "loss": 0.13583807945251464, "step": 14780 }, { "epoch": 4.198126596650582, "grad_norm": 11.948307037353516, "learning_rate": 9.580414419528811e-05, "loss": 0.1415409564971924, "step": 14790 }, { "epoch": 4.200965086573943, "grad_norm": 8.073081016540527, "learning_rate": 9.580130570536475e-05, "loss": 0.09804023504257202, "step": 14800 }, { "epoch": 4.203803576497304, "grad_norm": 5.781335830688477, "learning_rate": 9.579846721544139e-05, "loss": 0.12191305160522461, "step": 14810 }, { "epoch": 4.206642066420664, "grad_norm": 11.49725341796875, "learning_rate": 9.579562872551803e-05, "loss": 0.09396051168441773, "step": 14820 }, { "epoch": 4.209480556344025, "grad_norm": 8.281519889831543, "learning_rate": 9.579279023559466e-05, "loss": 0.09673430919647216, "step": 14830 }, { "epoch": 4.212319046267385, "grad_norm": 11.10311508178711, "learning_rate": 9.57899517456713e-05, "loss": 0.09752503037452698, "step": 14840 }, { "epoch": 4.2151575361907465, "grad_norm": 8.122870445251465, "learning_rate": 9.578711325574796e-05, "loss": 0.09364856481552124, "step": 14850 }, { "epoch": 4.217996026114108, "grad_norm": 11.108592987060547, "learning_rate": 9.578427476582458e-05, "loss": 0.12444288730621338, "step": 14860 }, { "epoch": 4.220834516037468, "grad_norm": 5.374812126159668, "learning_rate": 9.578143627590123e-05, "loss": 0.09746533036231994, "step": 14870 }, { "epoch": 4.223673005960829, "grad_norm": 9.096115112304688, "learning_rate": 9.577859778597787e-05, "loss": 0.15855931043624877, "step": 14880 }, { "epoch": 4.226511495884189, "grad_norm": 5.0284857749938965, "learning_rate": 9.57757592960545e-05, "loss": 0.11276011466979981, "step": 14890 }, { "epoch": 4.22934998580755, "grad_norm": 7.8205180168151855, "learning_rate": 9.577292080613114e-05, "loss": 0.07677354216575623, "step": 14900 }, { "epoch": 4.232188475730911, "grad_norm": 16.193645477294922, "learning_rate": 9.577008231620779e-05, "loss": 0.13194407224655152, "step": 14910 }, { "epoch": 4.235026965654272, "grad_norm": 15.98956298828125, "learning_rate": 9.576724382628442e-05, "loss": 0.12185338735580445, "step": 14920 }, { "epoch": 4.237865455577633, "grad_norm": 6.659210205078125, "learning_rate": 9.576440533636106e-05, "loss": 0.10797693729400634, "step": 14930 }, { "epoch": 4.240703945500994, "grad_norm": 14.982155799865723, "learning_rate": 9.57615668464377e-05, "loss": 0.10793132781982422, "step": 14940 }, { "epoch": 4.243542435424354, "grad_norm": 11.39868450164795, "learning_rate": 9.575872835651434e-05, "loss": 0.09230276346206664, "step": 14950 }, { "epoch": 4.246380925347715, "grad_norm": 6.698498249053955, "learning_rate": 9.575588986659097e-05, "loss": 0.1091640830039978, "step": 14960 }, { "epoch": 4.249219415271075, "grad_norm": 11.06921672821045, "learning_rate": 9.575305137666761e-05, "loss": 0.09979037642478943, "step": 14970 }, { "epoch": 4.2520579051944365, "grad_norm": 2.263554811477661, "learning_rate": 9.575021288674427e-05, "loss": 0.07672253251075745, "step": 14980 }, { "epoch": 4.254896395117798, "grad_norm": 7.199281692504883, "learning_rate": 9.57473743968209e-05, "loss": 0.10653339624404908, "step": 14990 }, { "epoch": 4.257734885041158, "grad_norm": 6.498597145080566, "learning_rate": 9.574453590689754e-05, "loss": 0.10281004905700683, "step": 15000 }, { "epoch": 4.257734885041158, "eval_accuracy": 0.9280854581293317, "eval_loss": 0.21723805367946625, "eval_runtime": 66.477, "eval_samples_per_second": 236.578, "eval_steps_per_second": 3.701, "step": 15000 }, { "epoch": 4.260573374964519, "grad_norm": 10.73794174194336, "learning_rate": 9.574169741697418e-05, "loss": 0.10162538290023804, "step": 15010 }, { "epoch": 4.26341186488788, "grad_norm": 8.504993438720703, "learning_rate": 9.57388589270508e-05, "loss": 0.10412778854370117, "step": 15020 }, { "epoch": 4.26625035481124, "grad_norm": 6.92310094833374, "learning_rate": 9.573602043712745e-05, "loss": 0.11273119449615479, "step": 15030 }, { "epoch": 4.269088844734601, "grad_norm": 7.16041374206543, "learning_rate": 9.573318194720409e-05, "loss": 0.10654997825622559, "step": 15040 }, { "epoch": 4.271927334657962, "grad_norm": 5.499131202697754, "learning_rate": 9.573034345728073e-05, "loss": 0.0879385769367218, "step": 15050 }, { "epoch": 4.274765824581323, "grad_norm": 9.4363431930542, "learning_rate": 9.572750496735737e-05, "loss": 0.11866852045059204, "step": 15060 }, { "epoch": 4.277604314504684, "grad_norm": 8.838692665100098, "learning_rate": 9.572466647743401e-05, "loss": 0.09277896285057068, "step": 15070 }, { "epoch": 4.280442804428044, "grad_norm": 7.413661003112793, "learning_rate": 9.572182798751065e-05, "loss": 0.08880077004432678, "step": 15080 }, { "epoch": 4.283281294351405, "grad_norm": 9.91930103302002, "learning_rate": 9.571898949758728e-05, "loss": 0.13681248426437378, "step": 15090 }, { "epoch": 4.286119784274765, "grad_norm": 10.419453620910645, "learning_rate": 9.571615100766392e-05, "loss": 0.09644099473953247, "step": 15100 }, { "epoch": 4.2889582741981265, "grad_norm": 16.121124267578125, "learning_rate": 9.571331251774058e-05, "loss": 0.09612476229667663, "step": 15110 }, { "epoch": 4.291796764121488, "grad_norm": 11.754042625427246, "learning_rate": 9.57104740278172e-05, "loss": 0.1271878719329834, "step": 15120 }, { "epoch": 4.294635254044848, "grad_norm": 7.8950886726379395, "learning_rate": 9.570763553789385e-05, "loss": 0.10952348709106445, "step": 15130 }, { "epoch": 4.297473743968209, "grad_norm": 11.527775764465332, "learning_rate": 9.570479704797049e-05, "loss": 0.10275592803955078, "step": 15140 }, { "epoch": 4.30031223389157, "grad_norm": 7.265465259552002, "learning_rate": 9.570195855804712e-05, "loss": 0.11690117120742798, "step": 15150 }, { "epoch": 4.30315072381493, "grad_norm": 9.822203636169434, "learning_rate": 9.569912006812376e-05, "loss": 0.1152570128440857, "step": 15160 }, { "epoch": 4.305989213738291, "grad_norm": 14.509187698364258, "learning_rate": 9.56962815782004e-05, "loss": 0.09673151969909669, "step": 15170 }, { "epoch": 4.3088277036616525, "grad_norm": 8.456305503845215, "learning_rate": 9.569344308827704e-05, "loss": 0.08582110404968261, "step": 15180 }, { "epoch": 4.311666193585013, "grad_norm": 10.612581253051758, "learning_rate": 9.569060459835368e-05, "loss": 0.09570643901824952, "step": 15190 }, { "epoch": 4.314504683508374, "grad_norm": 6.780313968658447, "learning_rate": 9.568776610843032e-05, "loss": 0.08396963477134704, "step": 15200 }, { "epoch": 4.317343173431734, "grad_norm": 9.964248657226562, "learning_rate": 9.568492761850696e-05, "loss": 0.14010148048400878, "step": 15210 }, { "epoch": 4.320181663355095, "grad_norm": 6.536813735961914, "learning_rate": 9.568208912858359e-05, "loss": 0.12184340953826904, "step": 15220 }, { "epoch": 4.323020153278456, "grad_norm": 11.19168758392334, "learning_rate": 9.567925063866023e-05, "loss": 0.07435535788536071, "step": 15230 }, { "epoch": 4.3258586432018165, "grad_norm": 12.899444580078125, "learning_rate": 9.567641214873688e-05, "loss": 0.13097312450408935, "step": 15240 }, { "epoch": 4.328697133125178, "grad_norm": 12.158329963684082, "learning_rate": 9.567357365881352e-05, "loss": 0.10922545194625854, "step": 15250 }, { "epoch": 4.331535623048538, "grad_norm": 7.487823486328125, "learning_rate": 9.567073516889016e-05, "loss": 0.09415889978408813, "step": 15260 }, { "epoch": 4.334374112971899, "grad_norm": 14.7211275100708, "learning_rate": 9.56678966789668e-05, "loss": 0.12029809951782226, "step": 15270 }, { "epoch": 4.33721260289526, "grad_norm": 7.123003005981445, "learning_rate": 9.566505818904343e-05, "loss": 0.11168547868728637, "step": 15280 }, { "epoch": 4.34005109281862, "grad_norm": 10.750041007995605, "learning_rate": 9.566221969912007e-05, "loss": 0.10698223114013672, "step": 15290 }, { "epoch": 4.342889582741981, "grad_norm": 5.5995192527771, "learning_rate": 9.565938120919671e-05, "loss": 0.08130642175674438, "step": 15300 }, { "epoch": 4.3457280726653424, "grad_norm": 6.202361106872559, "learning_rate": 9.565654271927335e-05, "loss": 0.12642371654510498, "step": 15310 }, { "epoch": 4.348566562588703, "grad_norm": 13.233956336975098, "learning_rate": 9.565370422934999e-05, "loss": 0.09336087703704835, "step": 15320 }, { "epoch": 4.351405052512064, "grad_norm": 9.915778160095215, "learning_rate": 9.565086573942663e-05, "loss": 0.11775330305099488, "step": 15330 }, { "epoch": 4.354243542435424, "grad_norm": 6.237540245056152, "learning_rate": 9.564802724950328e-05, "loss": 0.09939075112342835, "step": 15340 }, { "epoch": 4.357082032358785, "grad_norm": 5.106442451477051, "learning_rate": 9.56451887595799e-05, "loss": 0.08672484159469604, "step": 15350 }, { "epoch": 4.359920522282146, "grad_norm": 11.800057411193848, "learning_rate": 9.564235026965654e-05, "loss": 0.10281562805175781, "step": 15360 }, { "epoch": 4.362759012205506, "grad_norm": 8.074190139770508, "learning_rate": 9.563951177973319e-05, "loss": 0.10443708896636963, "step": 15370 }, { "epoch": 4.3655975021288675, "grad_norm": 10.224120140075684, "learning_rate": 9.563667328980983e-05, "loss": 0.11569275856018066, "step": 15380 }, { "epoch": 4.368435992052229, "grad_norm": 17.160524368286133, "learning_rate": 9.563383479988647e-05, "loss": 0.1310238718986511, "step": 15390 }, { "epoch": 4.371274481975589, "grad_norm": 8.138664245605469, "learning_rate": 9.563099630996311e-05, "loss": 0.11139894723892212, "step": 15400 }, { "epoch": 4.37411297189895, "grad_norm": 8.116637229919434, "learning_rate": 9.562815782003974e-05, "loss": 0.10230048894882202, "step": 15410 }, { "epoch": 4.37695146182231, "grad_norm": 10.149262428283691, "learning_rate": 9.562531933011638e-05, "loss": 0.09183356165885925, "step": 15420 }, { "epoch": 4.379789951745671, "grad_norm": 18.210248947143555, "learning_rate": 9.562248084019302e-05, "loss": 0.1334926724433899, "step": 15430 }, { "epoch": 4.382628441669032, "grad_norm": 4.218972682952881, "learning_rate": 9.561964235026966e-05, "loss": 0.06529752016067505, "step": 15440 }, { "epoch": 4.385466931592393, "grad_norm": 7.509181976318359, "learning_rate": 9.56168038603463e-05, "loss": 0.08690014481544495, "step": 15450 }, { "epoch": 4.388305421515754, "grad_norm": 14.305695533752441, "learning_rate": 9.561396537042294e-05, "loss": 0.11545631885528565, "step": 15460 }, { "epoch": 4.391143911439114, "grad_norm": 12.572425842285156, "learning_rate": 9.561112688049959e-05, "loss": 0.11966493129730224, "step": 15470 }, { "epoch": 4.393982401362475, "grad_norm": 7.853279113769531, "learning_rate": 9.560828839057621e-05, "loss": 0.11678638458251953, "step": 15480 }, { "epoch": 4.396820891285836, "grad_norm": 7.1537089347839355, "learning_rate": 9.560544990065286e-05, "loss": 0.095060533285141, "step": 15490 }, { "epoch": 4.399659381209196, "grad_norm": 13.091660499572754, "learning_rate": 9.56026114107295e-05, "loss": 0.14481035470962525, "step": 15500 }, { "epoch": 4.399659381209196, "eval_accuracy": 0.9160043237744007, "eval_loss": 0.25544074177742004, "eval_runtime": 51.6478, "eval_samples_per_second": 304.505, "eval_steps_per_second": 4.763, "step": 15500 }, { "epoch": 4.4024978711325575, "grad_norm": 7.660592555999756, "learning_rate": 9.559977292080614e-05, "loss": 0.11808959245681763, "step": 15510 }, { "epoch": 4.405336361055919, "grad_norm": 6.976760387420654, "learning_rate": 9.559693443088278e-05, "loss": 0.12914351224899293, "step": 15520 }, { "epoch": 4.408174850979279, "grad_norm": 15.215959548950195, "learning_rate": 9.559409594095942e-05, "loss": 0.08484911322593688, "step": 15530 }, { "epoch": 4.41101334090264, "grad_norm": 7.180747985839844, "learning_rate": 9.559125745103605e-05, "loss": 0.10736932754516601, "step": 15540 }, { "epoch": 4.413851830826, "grad_norm": 10.312606811523438, "learning_rate": 9.558841896111269e-05, "loss": 0.10439006090164185, "step": 15550 }, { "epoch": 4.416690320749361, "grad_norm": 6.2049479484558105, "learning_rate": 9.558558047118933e-05, "loss": 0.09250456094741821, "step": 15560 }, { "epoch": 4.419528810672722, "grad_norm": 12.195984840393066, "learning_rate": 9.558274198126597e-05, "loss": 0.10497046709060669, "step": 15570 }, { "epoch": 4.422367300596083, "grad_norm": 13.351627349853516, "learning_rate": 9.557990349134261e-05, "loss": 0.12346873283386231, "step": 15580 }, { "epoch": 4.425205790519444, "grad_norm": 7.082498550415039, "learning_rate": 9.557706500141926e-05, "loss": 0.10150505304336548, "step": 15590 }, { "epoch": 4.428044280442805, "grad_norm": 9.033550262451172, "learning_rate": 9.557422651149588e-05, "loss": 0.09153882265090943, "step": 15600 }, { "epoch": 4.430882770366165, "grad_norm": 12.988853454589844, "learning_rate": 9.557138802157252e-05, "loss": 0.11239298582077026, "step": 15610 }, { "epoch": 4.433721260289526, "grad_norm": 11.72825813293457, "learning_rate": 9.556854953164917e-05, "loss": 0.12145980596542358, "step": 15620 }, { "epoch": 4.436559750212886, "grad_norm": 16.388145446777344, "learning_rate": 9.556571104172581e-05, "loss": 0.11548004150390626, "step": 15630 }, { "epoch": 4.4393982401362475, "grad_norm": 12.729074478149414, "learning_rate": 9.556287255180244e-05, "loss": 0.12082574367523194, "step": 15640 }, { "epoch": 4.442236730059609, "grad_norm": 5.23103141784668, "learning_rate": 9.556031791087142e-05, "loss": 0.10686925649642945, "step": 15650 }, { "epoch": 4.445075219982969, "grad_norm": 10.979079246520996, "learning_rate": 9.555747942094806e-05, "loss": 0.10044347047805786, "step": 15660 }, { "epoch": 4.44791370990633, "grad_norm": 7.9336748123168945, "learning_rate": 9.55546409310247e-05, "loss": 0.09248520135879516, "step": 15670 }, { "epoch": 4.45075219982969, "grad_norm": 8.848562240600586, "learning_rate": 9.555180244110134e-05, "loss": 0.09197693467140197, "step": 15680 }, { "epoch": 4.453590689753051, "grad_norm": 10.84079360961914, "learning_rate": 9.554896395117798e-05, "loss": 0.11211545467376709, "step": 15690 }, { "epoch": 4.456429179676412, "grad_norm": 5.874711990356445, "learning_rate": 9.554612546125462e-05, "loss": 0.09090997576713562, "step": 15700 }, { "epoch": 4.459267669599773, "grad_norm": 10.225217819213867, "learning_rate": 9.554328697133126e-05, "loss": 0.088396555185318, "step": 15710 }, { "epoch": 4.462106159523134, "grad_norm": 9.501543045043945, "learning_rate": 9.554044848140789e-05, "loss": 0.09322645664215087, "step": 15720 }, { "epoch": 4.464944649446495, "grad_norm": 10.869462966918945, "learning_rate": 9.553760999148453e-05, "loss": 0.10622620582580566, "step": 15730 }, { "epoch": 4.467783139369855, "grad_norm": 10.668783187866211, "learning_rate": 9.553477150156117e-05, "loss": 0.08440297842025757, "step": 15740 }, { "epoch": 4.470621629293216, "grad_norm": 8.058917045593262, "learning_rate": 9.55319330116378e-05, "loss": 0.08994375467300415, "step": 15750 }, { "epoch": 4.473460119216576, "grad_norm": 5.312722206115723, "learning_rate": 9.552909452171446e-05, "loss": 0.11537469625473022, "step": 15760 }, { "epoch": 4.4762986091399375, "grad_norm": 9.1978120803833, "learning_rate": 9.55262560317911e-05, "loss": 0.07520809173583984, "step": 15770 }, { "epoch": 4.479137099063299, "grad_norm": 10.398944854736328, "learning_rate": 9.552341754186773e-05, "loss": 0.10270328521728515, "step": 15780 }, { "epoch": 4.481975588986659, "grad_norm": 9.813103675842285, "learning_rate": 9.552057905194437e-05, "loss": 0.1330260992050171, "step": 15790 }, { "epoch": 4.48481407891002, "grad_norm": 15.782865524291992, "learning_rate": 9.551774056202101e-05, "loss": 0.11012259721755982, "step": 15800 }, { "epoch": 4.487652568833381, "grad_norm": 4.21843147277832, "learning_rate": 9.551490207209765e-05, "loss": 0.10828667879104614, "step": 15810 }, { "epoch": 4.490491058756741, "grad_norm": 11.461812973022461, "learning_rate": 9.551206358217428e-05, "loss": 0.08962449431419373, "step": 15820 }, { "epoch": 4.493329548680102, "grad_norm": 18.50031852722168, "learning_rate": 9.550922509225093e-05, "loss": 0.11121621131896972, "step": 15830 }, { "epoch": 4.496168038603463, "grad_norm": 10.496804237365723, "learning_rate": 9.550638660232757e-05, "loss": 0.10565978288650513, "step": 15840 }, { "epoch": 4.499006528526824, "grad_norm": 11.859594345092773, "learning_rate": 9.55035481124042e-05, "loss": 0.12185925245285034, "step": 15850 }, { "epoch": 4.501845018450185, "grad_norm": 5.2325615882873535, "learning_rate": 9.550070962248084e-05, "loss": 0.07704566717147827, "step": 15860 }, { "epoch": 4.504683508373545, "grad_norm": 5.022075176239014, "learning_rate": 9.549787113255748e-05, "loss": 0.11739245653152466, "step": 15870 }, { "epoch": 4.507521998296906, "grad_norm": 4.426034450531006, "learning_rate": 9.549503264263411e-05, "loss": 0.10426708459854125, "step": 15880 }, { "epoch": 4.510360488220266, "grad_norm": 6.994664669036865, "learning_rate": 9.549219415271077e-05, "loss": 0.08456276059150696, "step": 15890 }, { "epoch": 4.5131989781436275, "grad_norm": 10.892842292785645, "learning_rate": 9.548935566278741e-05, "loss": 0.12684619426727295, "step": 15900 }, { "epoch": 4.516037468066989, "grad_norm": 7.4198713302612305, "learning_rate": 9.548651717286404e-05, "loss": 0.09960526823997498, "step": 15910 }, { "epoch": 4.518875957990349, "grad_norm": 9.729351043701172, "learning_rate": 9.548367868294068e-05, "loss": 0.09258853197097779, "step": 15920 }, { "epoch": 4.52171444791371, "grad_norm": 6.9711012840271, "learning_rate": 9.548084019301732e-05, "loss": 0.11070196628570557, "step": 15930 }, { "epoch": 4.524552937837071, "grad_norm": 10.35857105255127, "learning_rate": 9.547800170309396e-05, "loss": 0.07786663174629212, "step": 15940 }, { "epoch": 4.527391427760431, "grad_norm": 10.002998352050781, "learning_rate": 9.547516321317059e-05, "loss": 0.14362573623657227, "step": 15950 }, { "epoch": 4.530229917683792, "grad_norm": 8.165070533752441, "learning_rate": 9.547232472324724e-05, "loss": 0.13483221530914308, "step": 15960 }, { "epoch": 4.5330684076071535, "grad_norm": 7.437763690948486, "learning_rate": 9.546948623332389e-05, "loss": 0.11803457736968995, "step": 15970 }, { "epoch": 4.535906897530514, "grad_norm": 7.22627067565918, "learning_rate": 9.546664774340051e-05, "loss": 0.1191522240638733, "step": 15980 }, { "epoch": 4.538745387453875, "grad_norm": 5.201384544372559, "learning_rate": 9.546380925347715e-05, "loss": 0.08088930249214173, "step": 15990 }, { "epoch": 4.541583877377235, "grad_norm": 8.076692581176758, "learning_rate": 9.54609707635538e-05, "loss": 0.0829664945602417, "step": 16000 }, { "epoch": 4.541583877377235, "eval_accuracy": 0.9283397977999619, "eval_loss": 0.21699507534503937, "eval_runtime": 94.914, "eval_samples_per_second": 165.697, "eval_steps_per_second": 2.592, "step": 16000 }, { "epoch": 4.544422367300596, "grad_norm": 6.8143157958984375, "learning_rate": 9.545813227363042e-05, "loss": 0.10992350578308105, "step": 16010 }, { "epoch": 4.547260857223957, "grad_norm": 12.826355934143066, "learning_rate": 9.545529378370706e-05, "loss": 0.08966001272201538, "step": 16020 }, { "epoch": 4.5500993471473175, "grad_norm": 9.055809020996094, "learning_rate": 9.545245529378372e-05, "loss": 0.07982750535011292, "step": 16030 }, { "epoch": 4.552937837070679, "grad_norm": 5.074742317199707, "learning_rate": 9.544961680386035e-05, "loss": 0.13852918148040771, "step": 16040 }, { "epoch": 4.555776326994039, "grad_norm": 14.630254745483398, "learning_rate": 9.544677831393699e-05, "loss": 0.13280603885650635, "step": 16050 }, { "epoch": 4.5586148169174, "grad_norm": 7.514127731323242, "learning_rate": 9.544393982401363e-05, "loss": 0.08939498662948608, "step": 16060 }, { "epoch": 4.561453306840761, "grad_norm": 6.497294902801514, "learning_rate": 9.544110133409027e-05, "loss": 0.08693058490753174, "step": 16070 }, { "epoch": 4.564291796764121, "grad_norm": 9.195015907287598, "learning_rate": 9.54382628441669e-05, "loss": 0.0890343189239502, "step": 16080 }, { "epoch": 4.567130286687482, "grad_norm": 9.167508125305176, "learning_rate": 9.543542435424355e-05, "loss": 0.09251469373703003, "step": 16090 }, { "epoch": 4.5699687766108426, "grad_norm": 7.859913349151611, "learning_rate": 9.54325858643202e-05, "loss": 0.09591525793075562, "step": 16100 }, { "epoch": 4.572807266534204, "grad_norm": 16.815797805786133, "learning_rate": 9.542974737439682e-05, "loss": 0.10137869119644165, "step": 16110 }, { "epoch": 4.575645756457565, "grad_norm": 7.435506820678711, "learning_rate": 9.542690888447347e-05, "loss": 0.1018248200416565, "step": 16120 }, { "epoch": 4.578484246380925, "grad_norm": 6.60835599899292, "learning_rate": 9.54240703945501e-05, "loss": 0.0896972358226776, "step": 16130 }, { "epoch": 4.581322736304286, "grad_norm": 8.406904220581055, "learning_rate": 9.542123190462673e-05, "loss": 0.08845735788345337, "step": 16140 }, { "epoch": 4.584161226227647, "grad_norm": 9.789113998413086, "learning_rate": 9.541839341470338e-05, "loss": 0.11816247701644897, "step": 16150 }, { "epoch": 4.586999716151007, "grad_norm": 4.5440802574157715, "learning_rate": 9.541555492478003e-05, "loss": 0.12903318405151368, "step": 16160 }, { "epoch": 4.5898382060743685, "grad_norm": 5.641026020050049, "learning_rate": 9.541271643485666e-05, "loss": 0.10740396976470948, "step": 16170 }, { "epoch": 4.59267669599773, "grad_norm": 8.69283390045166, "learning_rate": 9.54098779449333e-05, "loss": 0.12431821823120118, "step": 16180 }, { "epoch": 4.59551518592109, "grad_norm": 13.662983894348145, "learning_rate": 9.540703945500994e-05, "loss": 0.09815744161605836, "step": 16190 }, { "epoch": 4.598353675844451, "grad_norm": 11.815173149108887, "learning_rate": 9.540420096508658e-05, "loss": 0.11861116886138916, "step": 16200 }, { "epoch": 4.601192165767811, "grad_norm": 15.07276439666748, "learning_rate": 9.540136247516321e-05, "loss": 0.10368105173110961, "step": 16210 }, { "epoch": 4.604030655691172, "grad_norm": 11.935704231262207, "learning_rate": 9.539852398523985e-05, "loss": 0.10962073802947998, "step": 16220 }, { "epoch": 4.606869145614533, "grad_norm": 8.340388298034668, "learning_rate": 9.53956854953165e-05, "loss": 0.10156797170639038, "step": 16230 }, { "epoch": 4.609707635537894, "grad_norm": 14.518442153930664, "learning_rate": 9.539284700539313e-05, "loss": 0.15380810499191283, "step": 16240 }, { "epoch": 4.612546125461255, "grad_norm": 12.047292709350586, "learning_rate": 9.539000851546978e-05, "loss": 0.13962408304214477, "step": 16250 }, { "epoch": 4.615384615384615, "grad_norm": 8.846763610839844, "learning_rate": 9.538717002554642e-05, "loss": 0.11269948482513428, "step": 16260 }, { "epoch": 4.618223105307976, "grad_norm": 10.527706146240234, "learning_rate": 9.538433153562304e-05, "loss": 0.11503825187683106, "step": 16270 }, { "epoch": 4.621061595231337, "grad_norm": 6.792575836181641, "learning_rate": 9.538149304569969e-05, "loss": 0.08874737620353698, "step": 16280 }, { "epoch": 4.623900085154697, "grad_norm": 6.758511066436768, "learning_rate": 9.537865455577634e-05, "loss": 0.10492103099822998, "step": 16290 }, { "epoch": 4.6267385750780585, "grad_norm": 7.591922760009766, "learning_rate": 9.537581606585297e-05, "loss": 0.14146231412887572, "step": 16300 }, { "epoch": 4.62957706500142, "grad_norm": 5.613837242126465, "learning_rate": 9.537297757592961e-05, "loss": 0.06702241897583008, "step": 16310 }, { "epoch": 4.63241555492478, "grad_norm": 5.019285678863525, "learning_rate": 9.537013908600625e-05, "loss": 0.1083527684211731, "step": 16320 }, { "epoch": 4.635254044848141, "grad_norm": 1.5560178756713867, "learning_rate": 9.53673005960829e-05, "loss": 0.06599014401435851, "step": 16330 }, { "epoch": 4.638092534771501, "grad_norm": 11.054651260375977, "learning_rate": 9.536446210615952e-05, "loss": 0.11762373447418213, "step": 16340 }, { "epoch": 4.640931024694862, "grad_norm": 3.175903797149658, "learning_rate": 9.536162361623616e-05, "loss": 0.11723690032958985, "step": 16350 }, { "epoch": 4.643769514618223, "grad_norm": 6.72786808013916, "learning_rate": 9.535878512631282e-05, "loss": 0.07933568954467773, "step": 16360 }, { "epoch": 4.646608004541584, "grad_norm": 9.460695266723633, "learning_rate": 9.535594663638945e-05, "loss": 0.10729955434799195, "step": 16370 }, { "epoch": 4.649446494464945, "grad_norm": 6.052169322967529, "learning_rate": 9.535310814646609e-05, "loss": 0.07405093312263489, "step": 16380 }, { "epoch": 4.652284984388306, "grad_norm": 7.573793888092041, "learning_rate": 9.535026965654273e-05, "loss": 0.09423447251319886, "step": 16390 }, { "epoch": 4.655123474311666, "grad_norm": 6.456345081329346, "learning_rate": 9.534743116661936e-05, "loss": 0.1209423303604126, "step": 16400 }, { "epoch": 4.657961964235027, "grad_norm": 13.176081657409668, "learning_rate": 9.5344592676696e-05, "loss": 0.12907494306564332, "step": 16410 }, { "epoch": 4.660800454158387, "grad_norm": 6.255336284637451, "learning_rate": 9.534175418677264e-05, "loss": 0.09483216404914856, "step": 16420 }, { "epoch": 4.6636389440817485, "grad_norm": 14.503129959106445, "learning_rate": 9.533891569684928e-05, "loss": 0.08981877565383911, "step": 16430 }, { "epoch": 4.66647743400511, "grad_norm": 8.697125434875488, "learning_rate": 9.533607720692592e-05, "loss": 0.09626601934432984, "step": 16440 }, { "epoch": 4.66931592392847, "grad_norm": 4.9715375900268555, "learning_rate": 9.533323871700256e-05, "loss": 0.12096467018127441, "step": 16450 }, { "epoch": 4.672154413851831, "grad_norm": 9.409915924072266, "learning_rate": 9.53304002270792e-05, "loss": 0.14950315952301024, "step": 16460 }, { "epoch": 4.674992903775191, "grad_norm": 8.096994400024414, "learning_rate": 9.532756173715583e-05, "loss": 0.10059374570846558, "step": 16470 }, { "epoch": 4.677831393698552, "grad_norm": 8.544726371765137, "learning_rate": 9.532472324723247e-05, "loss": 0.06606690883636475, "step": 16480 }, { "epoch": 4.680669883621913, "grad_norm": 18.25759506225586, "learning_rate": 9.532188475730911e-05, "loss": 0.15642318725585938, "step": 16490 }, { "epoch": 4.683508373545274, "grad_norm": 13.105119705200195, "learning_rate": 9.531904626738576e-05, "loss": 0.12553293704986573, "step": 16500 }, { "epoch": 4.683508373545274, "eval_accuracy": 0.9269409296114961, "eval_loss": 0.2191101759672165, "eval_runtime": 104.5052, "eval_samples_per_second": 150.49, "eval_steps_per_second": 2.354, "step": 16500 }, { "epoch": 4.686346863468635, "grad_norm": 7.655740737915039, "learning_rate": 9.53162077774624e-05, "loss": 0.14437127113342285, "step": 16510 }, { "epoch": 4.689185353391996, "grad_norm": 11.356976509094238, "learning_rate": 9.531336928753904e-05, "loss": 0.13273264169692994, "step": 16520 }, { "epoch": 4.692023843315356, "grad_norm": 9.923696517944336, "learning_rate": 9.531053079761567e-05, "loss": 0.10701746940612793, "step": 16530 }, { "epoch": 4.694862333238717, "grad_norm": 10.309243202209473, "learning_rate": 9.530769230769231e-05, "loss": 0.11092044115066528, "step": 16540 }, { "epoch": 4.697700823162078, "grad_norm": 14.704728126525879, "learning_rate": 9.530485381776895e-05, "loss": 0.09466351270675659, "step": 16550 }, { "epoch": 4.7005393130854385, "grad_norm": 10.297354698181152, "learning_rate": 9.530201532784559e-05, "loss": 0.10505775213241578, "step": 16560 }, { "epoch": 4.7033778030088, "grad_norm": 6.728969097137451, "learning_rate": 9.529917683792223e-05, "loss": 0.07867704629898072, "step": 16570 }, { "epoch": 4.70621629293216, "grad_norm": 8.465327262878418, "learning_rate": 9.529633834799887e-05, "loss": 0.07905448079109192, "step": 16580 }, { "epoch": 4.709054782855521, "grad_norm": 14.698272705078125, "learning_rate": 9.52934998580755e-05, "loss": 0.14318947792053222, "step": 16590 }, { "epoch": 4.711893272778882, "grad_norm": 7.873364448547363, "learning_rate": 9.529066136815214e-05, "loss": 0.1462700843811035, "step": 16600 }, { "epoch": 4.714731762702242, "grad_norm": 8.836233139038086, "learning_rate": 9.528782287822878e-05, "loss": 0.08657097816467285, "step": 16610 }, { "epoch": 4.717570252625603, "grad_norm": 7.747163772583008, "learning_rate": 9.528498438830543e-05, "loss": 0.06765082478523254, "step": 16620 }, { "epoch": 4.720408742548964, "grad_norm": 7.814065933227539, "learning_rate": 9.528214589838207e-05, "loss": 0.11931720972061158, "step": 16630 }, { "epoch": 4.723247232472325, "grad_norm": 8.074331283569336, "learning_rate": 9.527930740845871e-05, "loss": 0.11197446584701538, "step": 16640 }, { "epoch": 4.726085722395686, "grad_norm": 13.66728687286377, "learning_rate": 9.527646891853535e-05, "loss": 0.10868241786956787, "step": 16650 }, { "epoch": 4.728924212319046, "grad_norm": 8.978878021240234, "learning_rate": 9.527363042861198e-05, "loss": 0.11614512205123902, "step": 16660 }, { "epoch": 4.731762702242407, "grad_norm": 10.64276123046875, "learning_rate": 9.527079193868862e-05, "loss": 0.09515693187713622, "step": 16670 }, { "epoch": 4.734601192165767, "grad_norm": 20.021202087402344, "learning_rate": 9.526795344876526e-05, "loss": 0.11200168132781982, "step": 16680 }, { "epoch": 4.7374396820891285, "grad_norm": 5.69077205657959, "learning_rate": 9.52651149588419e-05, "loss": 0.11815887689590454, "step": 16690 }, { "epoch": 4.74027817201249, "grad_norm": 16.375200271606445, "learning_rate": 9.526227646891854e-05, "loss": 0.09055788516998291, "step": 16700 }, { "epoch": 4.74311666193585, "grad_norm": 10.385828971862793, "learning_rate": 9.525943797899518e-05, "loss": 0.11615899801254273, "step": 16710 }, { "epoch": 4.745955151859211, "grad_norm": 18.358211517333984, "learning_rate": 9.525659948907181e-05, "loss": 0.11552170515060425, "step": 16720 }, { "epoch": 4.748793641782572, "grad_norm": 20.3819522857666, "learning_rate": 9.525376099914845e-05, "loss": 0.09404461979866027, "step": 16730 }, { "epoch": 4.751632131705932, "grad_norm": 15.501614570617676, "learning_rate": 9.52509225092251e-05, "loss": 0.11058355569839477, "step": 16740 }, { "epoch": 4.754470621629293, "grad_norm": 4.156524658203125, "learning_rate": 9.524808401930174e-05, "loss": 0.1007753849029541, "step": 16750 }, { "epoch": 4.7573091115526545, "grad_norm": 4.872131824493408, "learning_rate": 9.524524552937838e-05, "loss": 0.09929580092430115, "step": 16760 }, { "epoch": 4.760147601476015, "grad_norm": 7.64676570892334, "learning_rate": 9.524240703945502e-05, "loss": 0.10682765245437623, "step": 16770 }, { "epoch": 4.762986091399376, "grad_norm": 18.387691497802734, "learning_rate": 9.523956854953166e-05, "loss": 0.12672107219696044, "step": 16780 }, { "epoch": 4.765824581322736, "grad_norm": 8.545414924621582, "learning_rate": 9.523673005960829e-05, "loss": 0.09666278958320618, "step": 16790 }, { "epoch": 4.768663071246097, "grad_norm": 11.832864761352539, "learning_rate": 9.523389156968493e-05, "loss": 0.1282070279121399, "step": 16800 }, { "epoch": 4.771501561169458, "grad_norm": 2.034998655319214, "learning_rate": 9.523105307976157e-05, "loss": 0.10468051433563233, "step": 16810 }, { "epoch": 4.7743400510928184, "grad_norm": 3.9222159385681152, "learning_rate": 9.522821458983821e-05, "loss": 0.09091046452522278, "step": 16820 }, { "epoch": 4.77717854101618, "grad_norm": 9.841185569763184, "learning_rate": 9.522537609991485e-05, "loss": 0.09908325672149658, "step": 16830 }, { "epoch": 4.78001703093954, "grad_norm": 7.8181657791137695, "learning_rate": 9.52225376099915e-05, "loss": 0.10607476234436035, "step": 16840 }, { "epoch": 4.782855520862901, "grad_norm": 8.056228637695312, "learning_rate": 9.521969912006812e-05, "loss": 0.08934326171875, "step": 16850 }, { "epoch": 4.785694010786262, "grad_norm": 5.64236307144165, "learning_rate": 9.521686063014476e-05, "loss": 0.08309676051139832, "step": 16860 }, { "epoch": 4.788532500709622, "grad_norm": 10.038912773132324, "learning_rate": 9.52140221402214e-05, "loss": 0.09991850852966308, "step": 16870 }, { "epoch": 4.791370990632983, "grad_norm": 5.57243537902832, "learning_rate": 9.521118365029805e-05, "loss": 0.09784092903137206, "step": 16880 }, { "epoch": 4.7942094805563436, "grad_norm": 4.520884037017822, "learning_rate": 9.520834516037469e-05, "loss": 0.10817173719406128, "step": 16890 }, { "epoch": 4.797047970479705, "grad_norm": 8.909920692443848, "learning_rate": 9.520550667045133e-05, "loss": 0.1041751742362976, "step": 16900 }, { "epoch": 4.799886460403066, "grad_norm": 10.240732192993164, "learning_rate": 9.520266818052797e-05, "loss": 0.18071430921554565, "step": 16910 }, { "epoch": 4.802724950326426, "grad_norm": 5.066999912261963, "learning_rate": 9.51998296906046e-05, "loss": 0.1086575984954834, "step": 16920 }, { "epoch": 4.805563440249787, "grad_norm": 3.035792827606201, "learning_rate": 9.519699120068124e-05, "loss": 0.10182220935821533, "step": 16930 }, { "epoch": 4.808401930173148, "grad_norm": 10.97976303100586, "learning_rate": 9.519415271075788e-05, "loss": 0.08665647506713867, "step": 16940 }, { "epoch": 4.811240420096508, "grad_norm": 12.87671947479248, "learning_rate": 9.519131422083451e-05, "loss": 0.09950867295265198, "step": 16950 }, { "epoch": 4.8140789100198695, "grad_norm": 9.703461647033691, "learning_rate": 9.518847573091116e-05, "loss": 0.09405292272567749, "step": 16960 }, { "epoch": 4.816917399943231, "grad_norm": 6.016464710235596, "learning_rate": 9.51856372409878e-05, "loss": 0.10929626226425171, "step": 16970 }, { "epoch": 4.819755889866591, "grad_norm": 10.534263610839844, "learning_rate": 9.518279875106443e-05, "loss": 0.1280822515487671, "step": 16980 }, { "epoch": 4.822594379789952, "grad_norm": 9.862433433532715, "learning_rate": 9.517996026114107e-05, "loss": 0.1020018458366394, "step": 16990 }, { "epoch": 4.825432869713312, "grad_norm": 7.955530166625977, "learning_rate": 9.517712177121772e-05, "loss": 0.10977147817611695, "step": 17000 }, { "epoch": 4.825432869713312, "eval_accuracy": 0.9362879125071533, "eval_loss": 0.19154486060142517, "eval_runtime": 95.6569, "eval_samples_per_second": 164.411, "eval_steps_per_second": 2.572, "step": 17000 }, { "epoch": 4.828271359636673, "grad_norm": 10.125717163085938, "learning_rate": 9.517428328129436e-05, "loss": 0.11574218273162842, "step": 17010 }, { "epoch": 4.831109849560034, "grad_norm": 4.094853401184082, "learning_rate": 9.5171444791371e-05, "loss": 0.08689155578613281, "step": 17020 }, { "epoch": 4.833948339483395, "grad_norm": 9.622533798217773, "learning_rate": 9.516860630144764e-05, "loss": 0.10938496589660644, "step": 17030 }, { "epoch": 4.836786829406756, "grad_norm": 10.39592456817627, "learning_rate": 9.516576781152428e-05, "loss": 0.10825848579406738, "step": 17040 }, { "epoch": 4.839625319330116, "grad_norm": 14.282358169555664, "learning_rate": 9.516292932160091e-05, "loss": 0.13254456520080565, "step": 17050 }, { "epoch": 4.842463809253477, "grad_norm": 9.785965919494629, "learning_rate": 9.516009083167755e-05, "loss": 0.1096080183982849, "step": 17060 }, { "epoch": 4.845302299176838, "grad_norm": 6.729855537414551, "learning_rate": 9.515725234175419e-05, "loss": 0.09165678024291993, "step": 17070 }, { "epoch": 4.848140789100198, "grad_norm": 2.0501980781555176, "learning_rate": 9.515441385183082e-05, "loss": 0.11290193796157837, "step": 17080 }, { "epoch": 4.8509792790235595, "grad_norm": 4.317163467407227, "learning_rate": 9.515157536190748e-05, "loss": 0.11050604581832886, "step": 17090 }, { "epoch": 4.85381776894692, "grad_norm": 12.650547981262207, "learning_rate": 9.514873687198412e-05, "loss": 0.09160925149917602, "step": 17100 }, { "epoch": 4.856656258870281, "grad_norm": 7.288372993469238, "learning_rate": 9.514589838206074e-05, "loss": 0.10064893960952759, "step": 17110 }, { "epoch": 4.859494748793642, "grad_norm": 7.357313632965088, "learning_rate": 9.514305989213739e-05, "loss": 0.11276999711990357, "step": 17120 }, { "epoch": 4.862333238717002, "grad_norm": 14.667037010192871, "learning_rate": 9.514022140221403e-05, "loss": 0.1277229905128479, "step": 17130 }, { "epoch": 4.865171728640363, "grad_norm": 6.252410411834717, "learning_rate": 9.513738291229067e-05, "loss": 0.10603890419006348, "step": 17140 }, { "epoch": 4.868010218563724, "grad_norm": 5.582968235015869, "learning_rate": 9.51345444223673e-05, "loss": 0.10447918176651001, "step": 17150 }, { "epoch": 4.870848708487085, "grad_norm": 2.8924121856689453, "learning_rate": 9.513170593244395e-05, "loss": 0.08672342896461487, "step": 17160 }, { "epoch": 4.873687198410446, "grad_norm": 10.146109580993652, "learning_rate": 9.512886744252059e-05, "loss": 0.07783749103546142, "step": 17170 }, { "epoch": 4.876525688333807, "grad_norm": 7.575291633605957, "learning_rate": 9.512602895259722e-05, "loss": 0.07380979657173156, "step": 17180 }, { "epoch": 4.879364178257167, "grad_norm": 11.607744216918945, "learning_rate": 9.512319046267386e-05, "loss": 0.11043701171875, "step": 17190 }, { "epoch": 4.882202668180528, "grad_norm": 4.639012813568115, "learning_rate": 9.51203519727505e-05, "loss": 0.07277289628982545, "step": 17200 }, { "epoch": 4.885041158103888, "grad_norm": 12.187844276428223, "learning_rate": 9.511751348282713e-05, "loss": 0.1335480809211731, "step": 17210 }, { "epoch": 4.8878796480272495, "grad_norm": 5.165884017944336, "learning_rate": 9.511467499290379e-05, "loss": 0.0840934693813324, "step": 17220 }, { "epoch": 4.890718137950611, "grad_norm": 10.953727722167969, "learning_rate": 9.511183650298043e-05, "loss": 0.10846861600875854, "step": 17230 }, { "epoch": 4.893556627873971, "grad_norm": 6.698594093322754, "learning_rate": 9.510899801305706e-05, "loss": 0.07865909337997437, "step": 17240 }, { "epoch": 4.896395117797332, "grad_norm": 9.646087646484375, "learning_rate": 9.51061595231337e-05, "loss": 0.09259881377220154, "step": 17250 }, { "epoch": 4.899233607720692, "grad_norm": 11.060342788696289, "learning_rate": 9.510332103321034e-05, "loss": 0.09942519664764404, "step": 17260 }, { "epoch": 4.902072097644053, "grad_norm": 9.568615913391113, "learning_rate": 9.510048254328698e-05, "loss": 0.0934409737586975, "step": 17270 }, { "epoch": 4.904910587567414, "grad_norm": 8.678301811218262, "learning_rate": 9.509764405336361e-05, "loss": 0.10730793476104736, "step": 17280 }, { "epoch": 4.907749077490775, "grad_norm": 13.774466514587402, "learning_rate": 9.509480556344026e-05, "loss": 0.12326433658599853, "step": 17290 }, { "epoch": 4.910587567414136, "grad_norm": 7.41526985168457, "learning_rate": 9.50919670735169e-05, "loss": 0.09856268167495727, "step": 17300 }, { "epoch": 4.913426057337497, "grad_norm": 14.93381404876709, "learning_rate": 9.508912858359353e-05, "loss": 0.12167043685913086, "step": 17310 }, { "epoch": 4.916264547260857, "grad_norm": 5.814024448394775, "learning_rate": 9.508629009367017e-05, "loss": 0.12186210155487061, "step": 17320 }, { "epoch": 4.919103037184218, "grad_norm": 6.952630519866943, "learning_rate": 9.508345160374681e-05, "loss": 0.11538687944412232, "step": 17330 }, { "epoch": 4.921941527107579, "grad_norm": 16.18046760559082, "learning_rate": 9.508061311382344e-05, "loss": 0.07667739391326904, "step": 17340 }, { "epoch": 4.9247800170309395, "grad_norm": 12.478224754333496, "learning_rate": 9.507777462390008e-05, "loss": 0.09416518807411194, "step": 17350 }, { "epoch": 4.927618506954301, "grad_norm": 5.829733848571777, "learning_rate": 9.507493613397674e-05, "loss": 0.11254727840423584, "step": 17360 }, { "epoch": 4.930456996877661, "grad_norm": 7.897740364074707, "learning_rate": 9.507209764405337e-05, "loss": 0.09863484501838685, "step": 17370 }, { "epoch": 4.933295486801022, "grad_norm": 11.0557222366333, "learning_rate": 9.506925915413001e-05, "loss": 0.08681448698043823, "step": 17380 }, { "epoch": 4.936133976724383, "grad_norm": 6.653707981109619, "learning_rate": 9.506642066420665e-05, "loss": 0.0904123306274414, "step": 17390 }, { "epoch": 4.938972466647743, "grad_norm": 11.574283599853516, "learning_rate": 9.506358217428329e-05, "loss": 0.09812538623809815, "step": 17400 }, { "epoch": 4.941810956571104, "grad_norm": 5.577938079833984, "learning_rate": 9.506074368435992e-05, "loss": 0.09067865014076233, "step": 17410 }, { "epoch": 4.944649446494465, "grad_norm": 13.252918243408203, "learning_rate": 9.505790519443657e-05, "loss": 0.09812102317810059, "step": 17420 }, { "epoch": 4.947487936417826, "grad_norm": 6.941417217254639, "learning_rate": 9.50550667045132e-05, "loss": 0.08538872003555298, "step": 17430 }, { "epoch": 4.950326426341187, "grad_norm": 9.448984146118164, "learning_rate": 9.505222821458984e-05, "loss": 0.10518577098846435, "step": 17440 }, { "epoch": 4.953164916264547, "grad_norm": 9.265104293823242, "learning_rate": 9.504938972466648e-05, "loss": 0.10004875659942628, "step": 17450 }, { "epoch": 4.956003406187908, "grad_norm": 4.81005859375, "learning_rate": 9.504655123474312e-05, "loss": 0.0894004225730896, "step": 17460 }, { "epoch": 4.958841896111268, "grad_norm": 10.237979888916016, "learning_rate": 9.504371274481975e-05, "loss": 0.1018558144569397, "step": 17470 }, { "epoch": 4.9616803860346295, "grad_norm": 16.29758071899414, "learning_rate": 9.50408742548964e-05, "loss": 0.10791411399841308, "step": 17480 }, { "epoch": 4.964518875957991, "grad_norm": 8.558045387268066, "learning_rate": 9.503803576497305e-05, "loss": 0.10049484968185425, "step": 17490 }, { "epoch": 4.967357365881351, "grad_norm": 15.209638595581055, "learning_rate": 9.503519727504968e-05, "loss": 0.11658862829208375, "step": 17500 }, { "epoch": 4.967357365881351, "eval_accuracy": 0.9319641381064412, "eval_loss": 0.19923432171344757, "eval_runtime": 43.9232, "eval_samples_per_second": 358.056, "eval_steps_per_second": 5.601, "step": 17500 }, { "epoch": 4.970195855804712, "grad_norm": 7.136631488800049, "learning_rate": 9.503235878512632e-05, "loss": 0.08826234340667724, "step": 17510 }, { "epoch": 4.973034345728073, "grad_norm": 7.698460578918457, "learning_rate": 9.502952029520296e-05, "loss": 0.07713820338249207, "step": 17520 }, { "epoch": 4.975872835651433, "grad_norm": 12.848273277282715, "learning_rate": 9.502668180527959e-05, "loss": 0.06579479575157166, "step": 17530 }, { "epoch": 4.978711325574794, "grad_norm": 6.341490745544434, "learning_rate": 9.502384331535623e-05, "loss": 0.0986249327659607, "step": 17540 }, { "epoch": 4.9815498154981555, "grad_norm": 15.848865509033203, "learning_rate": 9.502100482543287e-05, "loss": 0.12302224636077881, "step": 17550 }, { "epoch": 4.984388305421516, "grad_norm": 7.15885066986084, "learning_rate": 9.501816633550951e-05, "loss": 0.0922247290611267, "step": 17560 }, { "epoch": 4.987226795344877, "grad_norm": 18.393217086791992, "learning_rate": 9.501532784558615e-05, "loss": 0.11364421844482422, "step": 17570 }, { "epoch": 4.990065285268237, "grad_norm": 7.784830570220947, "learning_rate": 9.50124893556628e-05, "loss": 0.08862462043762206, "step": 17580 }, { "epoch": 4.992903775191598, "grad_norm": 11.34899616241455, "learning_rate": 9.500965086573944e-05, "loss": 0.11847195625305176, "step": 17590 }, { "epoch": 4.995742265114959, "grad_norm": 10.266997337341309, "learning_rate": 9.500681237581606e-05, "loss": 0.08919727206230163, "step": 17600 }, { "epoch": 4.9985807550383194, "grad_norm": 12.564249992370605, "learning_rate": 9.50039738858927e-05, "loss": 0.09603608250617982, "step": 17610 }, { "epoch": 5.0014192449616806, "grad_norm": 10.379829406738281, "learning_rate": 9.500113539596936e-05, "loss": 0.10056259632110595, "step": 17620 }, { "epoch": 5.004257734885041, "grad_norm": 4.2141571044921875, "learning_rate": 9.499829690604599e-05, "loss": 0.058902549743652347, "step": 17630 }, { "epoch": 5.007096224808402, "grad_norm": 9.410126686096191, "learning_rate": 9.499545841612263e-05, "loss": 0.07901622653007508, "step": 17640 }, { "epoch": 5.009934714731763, "grad_norm": 2.986795663833618, "learning_rate": 9.499261992619927e-05, "loss": 0.08686585426330566, "step": 17650 }, { "epoch": 5.012773204655123, "grad_norm": 4.975697994232178, "learning_rate": 9.49897814362759e-05, "loss": 0.07038358449935914, "step": 17660 }, { "epoch": 5.015611694578484, "grad_norm": 7.428198337554932, "learning_rate": 9.498694294635254e-05, "loss": 0.06350514888763428, "step": 17670 }, { "epoch": 5.018450184501845, "grad_norm": 5.369809150695801, "learning_rate": 9.498410445642918e-05, "loss": 0.05474106669425964, "step": 17680 }, { "epoch": 5.021288674425206, "grad_norm": 18.248376846313477, "learning_rate": 9.498126596650582e-05, "loss": 0.08777884244918824, "step": 17690 }, { "epoch": 5.024127164348567, "grad_norm": 8.888175964355469, "learning_rate": 9.497842747658246e-05, "loss": 0.06761594414710999, "step": 17700 }, { "epoch": 5.026965654271927, "grad_norm": 5.724506378173828, "learning_rate": 9.49755889866591e-05, "loss": 0.048832985758781436, "step": 17710 }, { "epoch": 5.029804144195288, "grad_norm": 12.64881706237793, "learning_rate": 9.497275049673575e-05, "loss": 0.07469272017478942, "step": 17720 }, { "epoch": 5.032642634118649, "grad_norm": 10.861228942871094, "learning_rate": 9.496991200681237e-05, "loss": 0.07855167388916015, "step": 17730 }, { "epoch": 5.035481124042009, "grad_norm": 8.118185997009277, "learning_rate": 9.496707351688902e-05, "loss": 0.11277334690093994, "step": 17740 }, { "epoch": 5.0383196139653705, "grad_norm": 4.291882514953613, "learning_rate": 9.496423502696566e-05, "loss": 0.0736359179019928, "step": 17750 }, { "epoch": 5.041158103888731, "grad_norm": 6.38245964050293, "learning_rate": 9.49613965370423e-05, "loss": 0.06490515470504761, "step": 17760 }, { "epoch": 5.043996593812092, "grad_norm": 9.034252166748047, "learning_rate": 9.495855804711894e-05, "loss": 0.09125692248344422, "step": 17770 }, { "epoch": 5.046835083735453, "grad_norm": 4.985113143920898, "learning_rate": 9.495571955719558e-05, "loss": 0.07188963294029235, "step": 17780 }, { "epoch": 5.049673573658813, "grad_norm": 6.34281587600708, "learning_rate": 9.495288106727221e-05, "loss": 0.08345025181770324, "step": 17790 }, { "epoch": 5.052512063582174, "grad_norm": 4.090058326721191, "learning_rate": 9.495004257734885e-05, "loss": 0.06745479702949524, "step": 17800 }, { "epoch": 5.055350553505535, "grad_norm": 10.353370666503906, "learning_rate": 9.494720408742549e-05, "loss": 0.08422017693519593, "step": 17810 }, { "epoch": 5.058189043428896, "grad_norm": 7.8296966552734375, "learning_rate": 9.494436559750213e-05, "loss": 0.0588695228099823, "step": 17820 }, { "epoch": 5.061027533352257, "grad_norm": 9.754130363464355, "learning_rate": 9.494152710757877e-05, "loss": 0.07040626406669617, "step": 17830 }, { "epoch": 5.063866023275617, "grad_norm": 7.78750467300415, "learning_rate": 9.493868861765542e-05, "loss": 0.10027550458908081, "step": 17840 }, { "epoch": 5.066704513198978, "grad_norm": 11.22624397277832, "learning_rate": 9.493585012773206e-05, "loss": 0.09625286459922791, "step": 17850 }, { "epoch": 5.069543003122339, "grad_norm": 12.896248817443848, "learning_rate": 9.493301163780868e-05, "loss": 0.09566358923912048, "step": 17860 }, { "epoch": 5.072381493045699, "grad_norm": 5.279540061950684, "learning_rate": 9.493017314788533e-05, "loss": 0.07742425203323364, "step": 17870 }, { "epoch": 5.0752199829690605, "grad_norm": 3.275238037109375, "learning_rate": 9.492733465796197e-05, "loss": 0.0683150589466095, "step": 17880 }, { "epoch": 5.078058472892422, "grad_norm": 7.348101615905762, "learning_rate": 9.492449616803861e-05, "loss": 0.08151776790618896, "step": 17890 }, { "epoch": 5.080896962815782, "grad_norm": 13.485532760620117, "learning_rate": 9.492165767811525e-05, "loss": 0.07649669051170349, "step": 17900 }, { "epoch": 5.083735452739143, "grad_norm": 7.857591152191162, "learning_rate": 9.491881918819189e-05, "loss": 0.06742680668830872, "step": 17910 }, { "epoch": 5.086573942662503, "grad_norm": 6.590219497680664, "learning_rate": 9.491598069826852e-05, "loss": 0.05884557366371155, "step": 17920 }, { "epoch": 5.089412432585864, "grad_norm": 2.055638313293457, "learning_rate": 9.491314220834516e-05, "loss": 0.0855446696281433, "step": 17930 }, { "epoch": 5.092250922509225, "grad_norm": 2.430316925048828, "learning_rate": 9.49103037184218e-05, "loss": 0.06274958848953247, "step": 17940 }, { "epoch": 5.095089412432586, "grad_norm": 7.170204162597656, "learning_rate": 9.490746522849844e-05, "loss": 0.07805115580558777, "step": 17950 }, { "epoch": 5.097927902355947, "grad_norm": 10.279401779174805, "learning_rate": 9.490462673857508e-05, "loss": 0.07871707677841186, "step": 17960 }, { "epoch": 5.100766392279308, "grad_norm": 15.920262336730957, "learning_rate": 9.490178824865173e-05, "loss": 0.10540463924407958, "step": 17970 }, { "epoch": 5.103604882202668, "grad_norm": 10.054527282714844, "learning_rate": 9.489894975872837e-05, "loss": 0.07206755876541138, "step": 17980 }, { "epoch": 5.106443372126029, "grad_norm": 6.092567443847656, "learning_rate": 9.4896111268805e-05, "loss": 0.07173948287963867, "step": 17990 }, { "epoch": 5.109281862049389, "grad_norm": 11.205162048339844, "learning_rate": 9.489327277888164e-05, "loss": 0.102176034450531, "step": 18000 }, { "epoch": 5.109281862049389, "eval_accuracy": 0.9210911171870032, "eval_loss": 0.23296763002872467, "eval_runtime": 38.9243, "eval_samples_per_second": 404.04, "eval_steps_per_second": 6.32, "step": 18000 }, { "epoch": 5.1121203519727505, "grad_norm": 4.0921406745910645, "learning_rate": 9.489043428895828e-05, "loss": 0.09659543633460999, "step": 18010 }, { "epoch": 5.114958841896112, "grad_norm": 6.710626602172852, "learning_rate": 9.488759579903492e-05, "loss": 0.08607231974601745, "step": 18020 }, { "epoch": 5.117797331819472, "grad_norm": 6.633852958679199, "learning_rate": 9.488475730911156e-05, "loss": 0.06362637877464294, "step": 18030 }, { "epoch": 5.120635821742833, "grad_norm": 2.3176827430725098, "learning_rate": 9.48819188191882e-05, "loss": 0.06823750138282776, "step": 18040 }, { "epoch": 5.123474311666193, "grad_norm": 7.994673728942871, "learning_rate": 9.487908032926483e-05, "loss": 0.09097947478294373, "step": 18050 }, { "epoch": 5.126312801589554, "grad_norm": 9.616576194763184, "learning_rate": 9.487624183934147e-05, "loss": 0.06737141013145446, "step": 18060 }, { "epoch": 5.129151291512915, "grad_norm": 11.313285827636719, "learning_rate": 9.487340334941811e-05, "loss": 0.09072917103767394, "step": 18070 }, { "epoch": 5.131989781436276, "grad_norm": 1.5065031051635742, "learning_rate": 9.487056485949475e-05, "loss": 0.05679771304130554, "step": 18080 }, { "epoch": 5.134828271359637, "grad_norm": 11.541824340820312, "learning_rate": 9.48677263695714e-05, "loss": 0.08107281923294067, "step": 18090 }, { "epoch": 5.137666761282998, "grad_norm": 4.936645030975342, "learning_rate": 9.486488787964804e-05, "loss": 0.0851235032081604, "step": 18100 }, { "epoch": 5.140505251206358, "grad_norm": 8.537007331848145, "learning_rate": 9.486204938972468e-05, "loss": 0.08810591697692871, "step": 18110 }, { "epoch": 5.143343741129719, "grad_norm": 8.245386123657227, "learning_rate": 9.48592108998013e-05, "loss": 0.0913909912109375, "step": 18120 }, { "epoch": 5.146182231053079, "grad_norm": 3.9210731983184814, "learning_rate": 9.485637240987795e-05, "loss": 0.07565485239028931, "step": 18130 }, { "epoch": 5.1490207209764405, "grad_norm": 7.8380022048950195, "learning_rate": 9.485353391995459e-05, "loss": 0.07229968905448914, "step": 18140 }, { "epoch": 5.151859210899802, "grad_norm": 8.569650650024414, "learning_rate": 9.485069543003123e-05, "loss": 0.06638562083244323, "step": 18150 }, { "epoch": 5.154697700823162, "grad_norm": 5.786457538604736, "learning_rate": 9.484785694010787e-05, "loss": 0.07354280352592468, "step": 18160 }, { "epoch": 5.157536190746523, "grad_norm": 7.18919038772583, "learning_rate": 9.484501845018451e-05, "loss": 0.06895474791526794, "step": 18170 }, { "epoch": 5.160374680669884, "grad_norm": 10.221485137939453, "learning_rate": 9.484217996026114e-05, "loss": 0.0609832763671875, "step": 18180 }, { "epoch": 5.163213170593244, "grad_norm": 7.741446495056152, "learning_rate": 9.483934147033778e-05, "loss": 0.12070943117141723, "step": 18190 }, { "epoch": 5.166051660516605, "grad_norm": 6.015775203704834, "learning_rate": 9.483650298041442e-05, "loss": 0.062222766876220706, "step": 18200 }, { "epoch": 5.168890150439966, "grad_norm": 12.257545471191406, "learning_rate": 9.483366449049107e-05, "loss": 0.08937386274337769, "step": 18210 }, { "epoch": 5.171728640363327, "grad_norm": 5.993994235992432, "learning_rate": 9.48308260005677e-05, "loss": 0.059513908624649045, "step": 18220 }, { "epoch": 5.174567130286688, "grad_norm": 1.7038859128952026, "learning_rate": 9.482798751064435e-05, "loss": 0.06859742403030396, "step": 18230 }, { "epoch": 5.177405620210048, "grad_norm": 3.7338712215423584, "learning_rate": 9.482514902072099e-05, "loss": 0.06382508277893066, "step": 18240 }, { "epoch": 5.180244110133409, "grad_norm": 3.770702838897705, "learning_rate": 9.482231053079762e-05, "loss": 0.06885420680046081, "step": 18250 }, { "epoch": 5.183082600056769, "grad_norm": 4.901632785797119, "learning_rate": 9.481947204087426e-05, "loss": 0.06940459609031677, "step": 18260 }, { "epoch": 5.1859210899801305, "grad_norm": 4.641170501708984, "learning_rate": 9.48166335509509e-05, "loss": 0.10013949871063232, "step": 18270 }, { "epoch": 5.188759579903492, "grad_norm": 12.095184326171875, "learning_rate": 9.481379506102753e-05, "loss": 0.113499116897583, "step": 18280 }, { "epoch": 5.191598069826852, "grad_norm": 10.730377197265625, "learning_rate": 9.481124042009651e-05, "loss": 0.0910386860370636, "step": 18290 }, { "epoch": 5.194436559750213, "grad_norm": 8.615971565246582, "learning_rate": 9.480840193017315e-05, "loss": 0.09274806976318359, "step": 18300 }, { "epoch": 5.197275049673574, "grad_norm": 10.263895034790039, "learning_rate": 9.480556344024979e-05, "loss": 0.07925695776939393, "step": 18310 }, { "epoch": 5.200113539596934, "grad_norm": 8.16684341430664, "learning_rate": 9.480272495032643e-05, "loss": 0.07992810606956482, "step": 18320 }, { "epoch": 5.202952029520295, "grad_norm": 16.601282119750977, "learning_rate": 9.479988646040307e-05, "loss": 0.077848881483078, "step": 18330 }, { "epoch": 5.205790519443656, "grad_norm": 19.564897537231445, "learning_rate": 9.479704797047971e-05, "loss": 0.09368284940719604, "step": 18340 }, { "epoch": 5.208629009367017, "grad_norm": 13.548897743225098, "learning_rate": 9.479420948055636e-05, "loss": 0.07074714303016663, "step": 18350 }, { "epoch": 5.211467499290378, "grad_norm": 12.592287063598633, "learning_rate": 9.479137099063298e-05, "loss": 0.11481066942214965, "step": 18360 }, { "epoch": 5.214305989213738, "grad_norm": 12.380085945129395, "learning_rate": 9.478853250070963e-05, "loss": 0.07343819737434387, "step": 18370 }, { "epoch": 5.217144479137099, "grad_norm": 8.411995887756348, "learning_rate": 9.478569401078627e-05, "loss": 0.08523416519165039, "step": 18380 }, { "epoch": 5.21998296906046, "grad_norm": 7.19748067855835, "learning_rate": 9.47828555208629e-05, "loss": 0.05684340000152588, "step": 18390 }, { "epoch": 5.22282145898382, "grad_norm": 7.9836835861206055, "learning_rate": 9.478001703093955e-05, "loss": 0.07688609957695007, "step": 18400 }, { "epoch": 5.2256599489071816, "grad_norm": 6.801393508911133, "learning_rate": 9.477717854101619e-05, "loss": 0.07400145530700683, "step": 18410 }, { "epoch": 5.228498438830542, "grad_norm": 15.341517448425293, "learning_rate": 9.477434005109282e-05, "loss": 0.08876971006393433, "step": 18420 }, { "epoch": 5.231336928753903, "grad_norm": 8.892108917236328, "learning_rate": 9.477150156116946e-05, "loss": 0.1133413314819336, "step": 18430 }, { "epoch": 5.234175418677264, "grad_norm": 9.530908584594727, "learning_rate": 9.47686630712461e-05, "loss": 0.07957600951194763, "step": 18440 }, { "epoch": 5.237013908600624, "grad_norm": 10.234977722167969, "learning_rate": 9.476582458132274e-05, "loss": 0.07572659850120544, "step": 18450 }, { "epoch": 5.239852398523985, "grad_norm": 4.83993673324585, "learning_rate": 9.476298609139937e-05, "loss": 0.06785738468170166, "step": 18460 }, { "epoch": 5.242690888447346, "grad_norm": 7.219738483428955, "learning_rate": 9.476014760147603e-05, "loss": 0.08247049450874329, "step": 18470 }, { "epoch": 5.245529378370707, "grad_norm": 9.332255363464355, "learning_rate": 9.475730911155267e-05, "loss": 0.12435424327850342, "step": 18480 }, { "epoch": 5.248367868294068, "grad_norm": 8.041120529174805, "learning_rate": 9.47544706216293e-05, "loss": 0.07584156990051269, "step": 18490 }, { "epoch": 5.251206358217428, "grad_norm": 8.58516788482666, "learning_rate": 9.475163213170594e-05, "loss": 0.07202554941177368, "step": 18500 }, { "epoch": 5.251206358217428, "eval_accuracy": 0.9422648947669613, "eval_loss": 0.17457884550094604, "eval_runtime": 50.8641, "eval_samples_per_second": 309.197, "eval_steps_per_second": 4.836, "step": 18500 }, { "epoch": 5.254044848140789, "grad_norm": 12.778895378112793, "learning_rate": 9.474879364178258e-05, "loss": 0.08528786897659302, "step": 18510 }, { "epoch": 5.25688333806415, "grad_norm": 8.699050903320312, "learning_rate": 9.47459551518592e-05, "loss": 0.07395046949386597, "step": 18520 }, { "epoch": 5.25972182798751, "grad_norm": 4.717103481292725, "learning_rate": 9.474311666193586e-05, "loss": 0.06763979196548461, "step": 18530 }, { "epoch": 5.2625603179108715, "grad_norm": 10.392642974853516, "learning_rate": 9.47402781720125e-05, "loss": 0.08027667999267578, "step": 18540 }, { "epoch": 5.265398807834233, "grad_norm": 7.460955619812012, "learning_rate": 9.473743968208913e-05, "loss": 0.08559210300445556, "step": 18550 }, { "epoch": 5.268237297757593, "grad_norm": 8.935466766357422, "learning_rate": 9.473460119216577e-05, "loss": 0.08033855557441712, "step": 18560 }, { "epoch": 5.271075787680954, "grad_norm": 8.478368759155273, "learning_rate": 9.473176270224241e-05, "loss": 0.09030812382698059, "step": 18570 }, { "epoch": 5.273914277604314, "grad_norm": 16.435333251953125, "learning_rate": 9.472892421231905e-05, "loss": 0.10910791158676147, "step": 18580 }, { "epoch": 5.276752767527675, "grad_norm": 11.21521282196045, "learning_rate": 9.472608572239568e-05, "loss": 0.08007351160049439, "step": 18590 }, { "epoch": 5.279591257451036, "grad_norm": 11.123760223388672, "learning_rate": 9.472324723247234e-05, "loss": 0.10969436168670654, "step": 18600 }, { "epoch": 5.282429747374397, "grad_norm": 11.591766357421875, "learning_rate": 9.472040874254898e-05, "loss": 0.07666656970977784, "step": 18610 }, { "epoch": 5.285268237297758, "grad_norm": 7.157061576843262, "learning_rate": 9.47175702526256e-05, "loss": 0.10225881338119507, "step": 18620 }, { "epoch": 5.288106727221118, "grad_norm": 7.429928779602051, "learning_rate": 9.471473176270225e-05, "loss": 0.08736824989318848, "step": 18630 }, { "epoch": 5.290945217144479, "grad_norm": 12.958579063415527, "learning_rate": 9.471189327277889e-05, "loss": 0.09359899759292603, "step": 18640 }, { "epoch": 5.29378370706784, "grad_norm": 4.989264011383057, "learning_rate": 9.470905478285552e-05, "loss": 0.07549943923950195, "step": 18650 }, { "epoch": 5.2966221969912, "grad_norm": 18.896257400512695, "learning_rate": 9.470621629293216e-05, "loss": 0.07630518674850464, "step": 18660 }, { "epoch": 5.2994606869145615, "grad_norm": 6.147333145141602, "learning_rate": 9.470337780300881e-05, "loss": 0.0783186674118042, "step": 18670 }, { "epoch": 5.302299176837923, "grad_norm": 9.742904663085938, "learning_rate": 9.470053931308544e-05, "loss": 0.10020580291748046, "step": 18680 }, { "epoch": 5.305137666761283, "grad_norm": 12.073601722717285, "learning_rate": 9.469770082316208e-05, "loss": 0.08531382083892822, "step": 18690 }, { "epoch": 5.307976156684644, "grad_norm": 17.650005340576172, "learning_rate": 9.469486233323872e-05, "loss": 0.12030344009399414, "step": 18700 }, { "epoch": 5.310814646608004, "grad_norm": 13.1454496383667, "learning_rate": 9.469202384331536e-05, "loss": 0.062100136280059816, "step": 18710 }, { "epoch": 5.313653136531365, "grad_norm": 5.906651020050049, "learning_rate": 9.468918535339199e-05, "loss": 0.06617650985717774, "step": 18720 }, { "epoch": 5.316491626454726, "grad_norm": 9.046698570251465, "learning_rate": 9.468634686346865e-05, "loss": 0.07618563175201416, "step": 18730 }, { "epoch": 5.319330116378087, "grad_norm": 8.500767707824707, "learning_rate": 9.468350837354529e-05, "loss": 0.07679342627525329, "step": 18740 }, { "epoch": 5.322168606301448, "grad_norm": 7.238258361816406, "learning_rate": 9.468066988362192e-05, "loss": 0.076738840341568, "step": 18750 }, { "epoch": 5.325007096224809, "grad_norm": 2.7573652267456055, "learning_rate": 9.467783139369856e-05, "loss": 0.06035377383232117, "step": 18760 }, { "epoch": 5.327845586148169, "grad_norm": 3.8139407634735107, "learning_rate": 9.46749929037752e-05, "loss": 0.08762522935867309, "step": 18770 }, { "epoch": 5.33068407607153, "grad_norm": 12.29344367980957, "learning_rate": 9.467215441385183e-05, "loss": 0.08349004983901978, "step": 18780 }, { "epoch": 5.33352256599489, "grad_norm": 8.24168586730957, "learning_rate": 9.466931592392847e-05, "loss": 0.057842719554901126, "step": 18790 }, { "epoch": 5.3363610559182515, "grad_norm": 7.376888751983643, "learning_rate": 9.466647743400512e-05, "loss": 0.0895460605621338, "step": 18800 }, { "epoch": 5.339199545841613, "grad_norm": 7.441391944885254, "learning_rate": 9.466363894408175e-05, "loss": 0.0740753710269928, "step": 18810 }, { "epoch": 5.342038035764973, "grad_norm": 7.36415958404541, "learning_rate": 9.466080045415839e-05, "loss": 0.0717194676399231, "step": 18820 }, { "epoch": 5.344876525688334, "grad_norm": 7.024552822113037, "learning_rate": 9.465796196423503e-05, "loss": 0.07917510867118835, "step": 18830 }, { "epoch": 5.347715015611694, "grad_norm": 4.087230682373047, "learning_rate": 9.465512347431167e-05, "loss": 0.09903403520584106, "step": 18840 }, { "epoch": 5.350553505535055, "grad_norm": 7.015305042266846, "learning_rate": 9.46522849843883e-05, "loss": 0.09222846627235412, "step": 18850 }, { "epoch": 5.353391995458416, "grad_norm": 12.812353134155273, "learning_rate": 9.464944649446494e-05, "loss": 0.10756261348724365, "step": 18860 }, { "epoch": 5.356230485381777, "grad_norm": 5.955140113830566, "learning_rate": 9.46466080045416e-05, "loss": 0.08172371387481689, "step": 18870 }, { "epoch": 5.359068975305138, "grad_norm": 6.889424800872803, "learning_rate": 9.464376951461823e-05, "loss": 0.08319158554077148, "step": 18880 }, { "epoch": 5.361907465228499, "grad_norm": 11.521003723144531, "learning_rate": 9.464093102469487e-05, "loss": 0.0747744858264923, "step": 18890 }, { "epoch": 5.364745955151859, "grad_norm": 4.933936595916748, "learning_rate": 9.463809253477151e-05, "loss": 0.06362287402153015, "step": 18900 }, { "epoch": 5.36758444507522, "grad_norm": 20.800384521484375, "learning_rate": 9.463525404484814e-05, "loss": 0.10495679378509522, "step": 18910 }, { "epoch": 5.37042293499858, "grad_norm": 2.5494186878204346, "learning_rate": 9.463241555492478e-05, "loss": 0.07361495494842529, "step": 18920 }, { "epoch": 5.3732614249219415, "grad_norm": 6.367360591888428, "learning_rate": 9.462957706500143e-05, "loss": 0.07116778492927552, "step": 18930 }, { "epoch": 5.376099914845303, "grad_norm": 3.8966290950775146, "learning_rate": 9.462673857507806e-05, "loss": 0.07487550973892212, "step": 18940 }, { "epoch": 5.378938404768663, "grad_norm": 13.369152069091797, "learning_rate": 9.46239000851547e-05, "loss": 0.10263134241104126, "step": 18950 }, { "epoch": 5.381776894692024, "grad_norm": 13.917024612426758, "learning_rate": 9.462106159523134e-05, "loss": 0.09078729152679443, "step": 18960 }, { "epoch": 5.384615384615385, "grad_norm": 6.691281318664551, "learning_rate": 9.461822310530799e-05, "loss": 0.07896417975425721, "step": 18970 }, { "epoch": 5.387453874538745, "grad_norm": 10.000167846679688, "learning_rate": 9.461538461538461e-05, "loss": 0.08428423404693604, "step": 18980 }, { "epoch": 5.390292364462106, "grad_norm": 8.958215713500977, "learning_rate": 9.461254612546125e-05, "loss": 0.08342361450195312, "step": 18990 }, { "epoch": 5.393130854385467, "grad_norm": 7.94756555557251, "learning_rate": 9.460970763553791e-05, "loss": 0.10720465183258057, "step": 19000 }, { "epoch": 5.393130854385467, "eval_accuracy": 0.9427735741082215, "eval_loss": 0.17933644354343414, "eval_runtime": 51.539, "eval_samples_per_second": 305.147, "eval_steps_per_second": 4.773, "step": 19000 }, { "epoch": 5.395969344308828, "grad_norm": 6.878994464874268, "learning_rate": 9.460686914561454e-05, "loss": 0.05874641537666321, "step": 19010 }, { "epoch": 5.398807834232189, "grad_norm": 3.2151708602905273, "learning_rate": 9.460403065569118e-05, "loss": 0.07626590132713318, "step": 19020 }, { "epoch": 5.401646324155549, "grad_norm": 8.450891494750977, "learning_rate": 9.460119216576782e-05, "loss": 0.07106446623802185, "step": 19030 }, { "epoch": 5.40448481407891, "grad_norm": 9.667518615722656, "learning_rate": 9.459835367584445e-05, "loss": 0.07302677631378174, "step": 19040 }, { "epoch": 5.40732330400227, "grad_norm": 8.096644401550293, "learning_rate": 9.459551518592109e-05, "loss": 0.10217257738113403, "step": 19050 }, { "epoch": 5.4101617939256315, "grad_norm": 8.352413177490234, "learning_rate": 9.459267669599773e-05, "loss": 0.08236924409866334, "step": 19060 }, { "epoch": 5.413000283848993, "grad_norm": 2.507559299468994, "learning_rate": 9.458983820607437e-05, "loss": 0.06328428387641907, "step": 19070 }, { "epoch": 5.415838773772353, "grad_norm": 4.837892055511475, "learning_rate": 9.458699971615101e-05, "loss": 0.09069536328315735, "step": 19080 }, { "epoch": 5.418677263695714, "grad_norm": 15.89395809173584, "learning_rate": 9.458416122622765e-05, "loss": 0.07400401830673217, "step": 19090 }, { "epoch": 5.421515753619075, "grad_norm": 11.02748966217041, "learning_rate": 9.45813227363043e-05, "loss": 0.07365716099739075, "step": 19100 }, { "epoch": 5.424354243542435, "grad_norm": 14.338333129882812, "learning_rate": 9.457848424638092e-05, "loss": 0.09927078485488891, "step": 19110 }, { "epoch": 5.427192733465796, "grad_norm": 8.652446746826172, "learning_rate": 9.457564575645757e-05, "loss": 0.08792399168014527, "step": 19120 }, { "epoch": 5.430031223389157, "grad_norm": 7.211245059967041, "learning_rate": 9.457280726653421e-05, "loss": 0.0920968234539032, "step": 19130 }, { "epoch": 5.432869713312518, "grad_norm": 9.831609725952148, "learning_rate": 9.456996877661085e-05, "loss": 0.0960320234298706, "step": 19140 }, { "epoch": 5.435708203235879, "grad_norm": 6.089266777038574, "learning_rate": 9.456713028668749e-05, "loss": 0.12647815942764282, "step": 19150 }, { "epoch": 5.438546693159239, "grad_norm": 8.162260055541992, "learning_rate": 9.456429179676413e-05, "loss": 0.09688466787338257, "step": 19160 }, { "epoch": 5.4413851830826, "grad_norm": 6.646251678466797, "learning_rate": 9.456145330684076e-05, "loss": 0.07046921849250794, "step": 19170 }, { "epoch": 5.444223673005961, "grad_norm": 10.844340324401855, "learning_rate": 9.45586148169174e-05, "loss": 0.07728798389434814, "step": 19180 }, { "epoch": 5.447062162929321, "grad_norm": 7.466623306274414, "learning_rate": 9.455577632699404e-05, "loss": 0.08667004108428955, "step": 19190 }, { "epoch": 5.4499006528526825, "grad_norm": 10.069533348083496, "learning_rate": 9.455293783707068e-05, "loss": 0.08872150778770446, "step": 19200 }, { "epoch": 5.452739142776043, "grad_norm": 10.04992389678955, "learning_rate": 9.455009934714732e-05, "loss": 0.1075783371925354, "step": 19210 }, { "epoch": 5.455577632699404, "grad_norm": 6.856022834777832, "learning_rate": 9.454726085722397e-05, "loss": 0.05453166365623474, "step": 19220 }, { "epoch": 5.458416122622765, "grad_norm": 10.104236602783203, "learning_rate": 9.45444223673006e-05, "loss": 0.10069869756698609, "step": 19230 }, { "epoch": 5.461254612546125, "grad_norm": 5.524036884307861, "learning_rate": 9.454158387737723e-05, "loss": 0.07077946066856385, "step": 19240 }, { "epoch": 5.464093102469486, "grad_norm": 18.045108795166016, "learning_rate": 9.453874538745388e-05, "loss": 0.10300452709197998, "step": 19250 }, { "epoch": 5.4669315923928465, "grad_norm": 3.159855365753174, "learning_rate": 9.453590689753052e-05, "loss": 0.09097833037376404, "step": 19260 }, { "epoch": 5.469770082316208, "grad_norm": 7.375906467437744, "learning_rate": 9.453306840760716e-05, "loss": 0.08084736466407776, "step": 19270 }, { "epoch": 5.472608572239569, "grad_norm": 10.022363662719727, "learning_rate": 9.45302299176838e-05, "loss": 0.09462616443634034, "step": 19280 }, { "epoch": 5.475447062162929, "grad_norm": 2.4281177520751953, "learning_rate": 9.452739142776044e-05, "loss": 0.08815036416053772, "step": 19290 }, { "epoch": 5.47828555208629, "grad_norm": 11.097614288330078, "learning_rate": 9.452455293783707e-05, "loss": 0.1076703429222107, "step": 19300 }, { "epoch": 5.481124042009651, "grad_norm": 8.454005241394043, "learning_rate": 9.452171444791371e-05, "loss": 0.07722907066345215, "step": 19310 }, { "epoch": 5.483962531933011, "grad_norm": 7.675615310668945, "learning_rate": 9.451887595799035e-05, "loss": 0.05459824204444885, "step": 19320 }, { "epoch": 5.4868010218563725, "grad_norm": 12.410074234008789, "learning_rate": 9.4516037468067e-05, "loss": 0.08613243699073792, "step": 19330 }, { "epoch": 5.489639511779734, "grad_norm": 16.268434524536133, "learning_rate": 9.451319897814364e-05, "loss": 0.10436093807220459, "step": 19340 }, { "epoch": 5.492478001703094, "grad_norm": 9.588899612426758, "learning_rate": 9.451036048822028e-05, "loss": 0.08647354841232299, "step": 19350 }, { "epoch": 5.495316491626455, "grad_norm": 1.6752121448516846, "learning_rate": 9.45075219982969e-05, "loss": 0.057551604509353635, "step": 19360 }, { "epoch": 5.498154981549815, "grad_norm": 9.188848495483398, "learning_rate": 9.450468350837355e-05, "loss": 0.08146671652793884, "step": 19370 }, { "epoch": 5.500993471473176, "grad_norm": 10.282560348510742, "learning_rate": 9.450184501845019e-05, "loss": 0.0790541410446167, "step": 19380 }, { "epoch": 5.503831961396537, "grad_norm": 3.5971035957336426, "learning_rate": 9.449900652852683e-05, "loss": 0.08922047019004822, "step": 19390 }, { "epoch": 5.506670451319898, "grad_norm": 11.138239860534668, "learning_rate": 9.449616803860347e-05, "loss": 0.0908033549785614, "step": 19400 }, { "epoch": 5.509508941243259, "grad_norm": 7.2023606300354, "learning_rate": 9.449332954868011e-05, "loss": 0.10645496845245361, "step": 19410 }, { "epoch": 5.512347431166619, "grad_norm": 6.792098522186279, "learning_rate": 9.449049105875675e-05, "loss": 0.07852301597595215, "step": 19420 }, { "epoch": 5.51518592108998, "grad_norm": 6.498943328857422, "learning_rate": 9.448765256883338e-05, "loss": 0.08675245642662048, "step": 19430 }, { "epoch": 5.518024411013341, "grad_norm": 10.863751411437988, "learning_rate": 9.448481407891002e-05, "loss": 0.07290551662445069, "step": 19440 }, { "epoch": 5.520862900936701, "grad_norm": 10.324902534484863, "learning_rate": 9.448197558898666e-05, "loss": 0.08392128348350525, "step": 19450 }, { "epoch": 5.5237013908600625, "grad_norm": 9.560992240905762, "learning_rate": 9.447913709906329e-05, "loss": 0.08865192532539368, "step": 19460 }, { "epoch": 5.526539880783423, "grad_norm": 4.9796366691589355, "learning_rate": 9.447629860913995e-05, "loss": 0.08133686780929565, "step": 19470 }, { "epoch": 5.529378370706784, "grad_norm": 7.798362731933594, "learning_rate": 9.447346011921659e-05, "loss": 0.08588886857032776, "step": 19480 }, { "epoch": 5.532216860630145, "grad_norm": 4.308609485626221, "learning_rate": 9.447062162929322e-05, "loss": 0.11932425498962403, "step": 19490 }, { "epoch": 5.535055350553505, "grad_norm": 7.0214104652404785, "learning_rate": 9.446778313936986e-05, "loss": 0.093926602602005, "step": 19500 }, { "epoch": 5.535055350553505, "eval_accuracy": 0.9460799898264132, "eval_loss": 0.1689659208059311, "eval_runtime": 41.8052, "eval_samples_per_second": 376.197, "eval_steps_per_second": 5.884, "step": 19500 }, { "epoch": 5.537893840476866, "grad_norm": 13.88133430480957, "learning_rate": 9.44649446494465e-05, "loss": 0.09504415988922119, "step": 19510 }, { "epoch": 5.540732330400227, "grad_norm": 8.42488956451416, "learning_rate": 9.446210615952314e-05, "loss": 0.08991296887397766, "step": 19520 }, { "epoch": 5.543570820323588, "grad_norm": 11.717544555664062, "learning_rate": 9.445926766959978e-05, "loss": 0.06964786052703857, "step": 19530 }, { "epoch": 5.546409310246949, "grad_norm": 2.9819517135620117, "learning_rate": 9.445642917967642e-05, "loss": 0.10066670179367065, "step": 19540 }, { "epoch": 5.54924780017031, "grad_norm": 9.772299766540527, "learning_rate": 9.445359068975306e-05, "loss": 0.09909443259239196, "step": 19550 }, { "epoch": 5.55208629009367, "grad_norm": 11.05834674835205, "learning_rate": 9.445075219982969e-05, "loss": 0.07698040008544922, "step": 19560 }, { "epoch": 5.554924780017031, "grad_norm": 7.039271354675293, "learning_rate": 9.444791370990633e-05, "loss": 0.07922827005386353, "step": 19570 }, { "epoch": 5.557763269940391, "grad_norm": 12.414461135864258, "learning_rate": 9.444507521998297e-05, "loss": 0.06945996284484864, "step": 19580 }, { "epoch": 5.5606017598637525, "grad_norm": 7.738710880279541, "learning_rate": 9.44422367300596e-05, "loss": 0.1258586287498474, "step": 19590 }, { "epoch": 5.563440249787114, "grad_norm": 9.37479305267334, "learning_rate": 9.443939824013626e-05, "loss": 0.10373703241348267, "step": 19600 }, { "epoch": 5.566278739710474, "grad_norm": 11.564993858337402, "learning_rate": 9.44365597502129e-05, "loss": 0.08072373867034913, "step": 19610 }, { "epoch": 5.569117229633835, "grad_norm": 6.324765205383301, "learning_rate": 9.443372126028953e-05, "loss": 0.06887639760971069, "step": 19620 }, { "epoch": 5.571955719557195, "grad_norm": 12.113024711608887, "learning_rate": 9.443088277036617e-05, "loss": 0.07203938961029052, "step": 19630 }, { "epoch": 5.574794209480556, "grad_norm": 9.82325267791748, "learning_rate": 9.442804428044281e-05, "loss": 0.09342219233512879, "step": 19640 }, { "epoch": 5.577632699403917, "grad_norm": 18.202713012695312, "learning_rate": 9.442520579051945e-05, "loss": 0.10701614618301392, "step": 19650 }, { "epoch": 5.580471189327278, "grad_norm": 12.043167114257812, "learning_rate": 9.442236730059609e-05, "loss": 0.11591868400573731, "step": 19660 }, { "epoch": 5.583309679250639, "grad_norm": 7.7379984855651855, "learning_rate": 9.441952881067273e-05, "loss": 0.10180948972702027, "step": 19670 }, { "epoch": 5.586148169174, "grad_norm": 7.023738384246826, "learning_rate": 9.441669032074937e-05, "loss": 0.07058555483818055, "step": 19680 }, { "epoch": 5.58898665909736, "grad_norm": 7.2600860595703125, "learning_rate": 9.4413851830826e-05, "loss": 0.08538170456886292, "step": 19690 }, { "epoch": 5.591825149020721, "grad_norm": 10.528064727783203, "learning_rate": 9.441101334090264e-05, "loss": 0.0882144570350647, "step": 19700 }, { "epoch": 5.594663638944081, "grad_norm": 8.968792915344238, "learning_rate": 9.440817485097928e-05, "loss": 0.08134956359863281, "step": 19710 }, { "epoch": 5.5975021288674425, "grad_norm": 14.498093605041504, "learning_rate": 9.440533636105591e-05, "loss": 0.07299937009811401, "step": 19720 }, { "epoch": 5.600340618790804, "grad_norm": 11.587307929992676, "learning_rate": 9.440249787113257e-05, "loss": 0.07862650752067565, "step": 19730 }, { "epoch": 5.603179108714164, "grad_norm": 7.937727451324463, "learning_rate": 9.439965938120921e-05, "loss": 0.08454519510269165, "step": 19740 }, { "epoch": 5.606017598637525, "grad_norm": 10.590116500854492, "learning_rate": 9.439682089128584e-05, "loss": 0.06396015286445618, "step": 19750 }, { "epoch": 5.608856088560886, "grad_norm": 12.260401725769043, "learning_rate": 9.439398240136248e-05, "loss": 0.08060122728347778, "step": 19760 }, { "epoch": 5.611694578484246, "grad_norm": 11.924155235290527, "learning_rate": 9.439114391143912e-05, "loss": 0.06814630031585693, "step": 19770 }, { "epoch": 5.614533068407607, "grad_norm": 9.407285690307617, "learning_rate": 9.438830542151576e-05, "loss": 0.08907255530357361, "step": 19780 }, { "epoch": 5.617371558330968, "grad_norm": 9.259258270263672, "learning_rate": 9.438546693159239e-05, "loss": 0.07667644619941712, "step": 19790 }, { "epoch": 5.620210048254329, "grad_norm": 9.626527786254883, "learning_rate": 9.438262844166904e-05, "loss": 0.08153380751609803, "step": 19800 }, { "epoch": 5.62304853817769, "grad_norm": 15.405110359191895, "learning_rate": 9.437978995174568e-05, "loss": 0.14006257057189941, "step": 19810 }, { "epoch": 5.62588702810105, "grad_norm": 10.22412109375, "learning_rate": 9.437695146182231e-05, "loss": 0.09710671305656433, "step": 19820 }, { "epoch": 5.628725518024411, "grad_norm": 10.65337085723877, "learning_rate": 9.437411297189895e-05, "loss": 0.05488495826721192, "step": 19830 }, { "epoch": 5.631564007947771, "grad_norm": 6.062845230102539, "learning_rate": 9.43712744819756e-05, "loss": 0.06433424949645997, "step": 19840 }, { "epoch": 5.6344024978711325, "grad_norm": 7.994802951812744, "learning_rate": 9.436843599205222e-05, "loss": 0.07674207091331482, "step": 19850 }, { "epoch": 5.637240987794494, "grad_norm": 11.868600845336914, "learning_rate": 9.436559750212888e-05, "loss": 0.10992245674133301, "step": 19860 }, { "epoch": 5.640079477717854, "grad_norm": 6.023365497589111, "learning_rate": 9.436275901220552e-05, "loss": 0.09051301479339599, "step": 19870 }, { "epoch": 5.642917967641215, "grad_norm": 6.248188018798828, "learning_rate": 9.435992052228215e-05, "loss": 0.0887942910194397, "step": 19880 }, { "epoch": 5.645756457564576, "grad_norm": 9.147370338439941, "learning_rate": 9.435708203235879e-05, "loss": 0.07907897233963013, "step": 19890 }, { "epoch": 5.648594947487936, "grad_norm": 9.794856071472168, "learning_rate": 9.435424354243543e-05, "loss": 0.07521876692771912, "step": 19900 }, { "epoch": 5.651433437411297, "grad_norm": 8.990909576416016, "learning_rate": 9.435140505251207e-05, "loss": 0.07150543332099915, "step": 19910 }, { "epoch": 5.654271927334658, "grad_norm": 14.639640808105469, "learning_rate": 9.43485665625887e-05, "loss": 0.08275507688522339, "step": 19920 }, { "epoch": 5.657110417258019, "grad_norm": 3.136373519897461, "learning_rate": 9.434572807266535e-05, "loss": 0.08125224113464355, "step": 19930 }, { "epoch": 5.65994890718138, "grad_norm": 2.2366764545440674, "learning_rate": 9.4342889582742e-05, "loss": 0.09115605950355529, "step": 19940 }, { "epoch": 5.66278739710474, "grad_norm": 7.60429573059082, "learning_rate": 9.434005109281862e-05, "loss": 0.09158273339271546, "step": 19950 }, { "epoch": 5.665625887028101, "grad_norm": 3.092881441116333, "learning_rate": 9.433721260289526e-05, "loss": 0.08649827837944031, "step": 19960 }, { "epoch": 5.668464376951462, "grad_norm": 13.94498348236084, "learning_rate": 9.43343741129719e-05, "loss": 0.07618074417114258, "step": 19970 }, { "epoch": 5.671302866874822, "grad_norm": 4.923437118530273, "learning_rate": 9.433153562304853e-05, "loss": 0.0879828929901123, "step": 19980 }, { "epoch": 5.6741413567981835, "grad_norm": 16.00627326965332, "learning_rate": 9.432869713312518e-05, "loss": 0.07524152994155883, "step": 19990 }, { "epoch": 5.676979846721544, "grad_norm": 8.307519912719727, "learning_rate": 9.432585864320183e-05, "loss": 0.0855475664138794, "step": 20000 }, { "epoch": 5.676979846721544, "eval_accuracy": 0.9425192344375914, "eval_loss": 0.1697309911251068, "eval_runtime": 45.4908, "eval_samples_per_second": 345.718, "eval_steps_per_second": 5.408, "step": 20000 }, { "epoch": 5.679818336644905, "grad_norm": 8.362937927246094, "learning_rate": 9.432302015327846e-05, "loss": 0.08852348327636719, "step": 20010 }, { "epoch": 5.682656826568266, "grad_norm": 1.1705883741378784, "learning_rate": 9.43201816633551e-05, "loss": 0.06738465428352355, "step": 20020 }, { "epoch": 5.685495316491626, "grad_norm": 9.12266731262207, "learning_rate": 9.431734317343174e-05, "loss": 0.08514622449874878, "step": 20030 }, { "epoch": 5.688333806414987, "grad_norm": 14.439252853393555, "learning_rate": 9.431450468350838e-05, "loss": 0.08904629349708557, "step": 20040 }, { "epoch": 5.6911722963383475, "grad_norm": 6.256050109863281, "learning_rate": 9.431166619358501e-05, "loss": 0.08196524381637574, "step": 20050 }, { "epoch": 5.694010786261709, "grad_norm": 3.9947049617767334, "learning_rate": 9.430882770366166e-05, "loss": 0.052776938676834105, "step": 20060 }, { "epoch": 5.69684927618507, "grad_norm": 7.696474552154541, "learning_rate": 9.430598921373829e-05, "loss": 0.10913490056991577, "step": 20070 }, { "epoch": 5.69968776610843, "grad_norm": 4.450081825256348, "learning_rate": 9.430315072381493e-05, "loss": 0.059974652528762815, "step": 20080 }, { "epoch": 5.702526256031791, "grad_norm": 11.90709400177002, "learning_rate": 9.430031223389158e-05, "loss": 0.10661106109619141, "step": 20090 }, { "epoch": 5.705364745955152, "grad_norm": 7.665729999542236, "learning_rate": 9.429747374396822e-05, "loss": 0.0708132266998291, "step": 20100 }, { "epoch": 5.708203235878512, "grad_norm": 4.998851299285889, "learning_rate": 9.429463525404484e-05, "loss": 0.0532051682472229, "step": 20110 }, { "epoch": 5.7110417258018735, "grad_norm": 3.0816426277160645, "learning_rate": 9.429179676412149e-05, "loss": 0.08052088022232055, "step": 20120 }, { "epoch": 5.713880215725235, "grad_norm": 11.284135818481445, "learning_rate": 9.428895827419814e-05, "loss": 0.1282043218612671, "step": 20130 }, { "epoch": 5.716718705648595, "grad_norm": 6.322018146514893, "learning_rate": 9.428611978427477e-05, "loss": 0.08017283082008361, "step": 20140 }, { "epoch": 5.719557195571956, "grad_norm": 10.811723709106445, "learning_rate": 9.428328129435141e-05, "loss": 0.08887165784835815, "step": 20150 }, { "epoch": 5.722395685495316, "grad_norm": 12.223605155944824, "learning_rate": 9.428044280442805e-05, "loss": 0.08472169041633607, "step": 20160 }, { "epoch": 5.725234175418677, "grad_norm": 10.805670738220215, "learning_rate": 9.427760431450468e-05, "loss": 0.06547505855560302, "step": 20170 }, { "epoch": 5.728072665342038, "grad_norm": 7.128765106201172, "learning_rate": 9.427476582458132e-05, "loss": 0.09494673013687134, "step": 20180 }, { "epoch": 5.730911155265399, "grad_norm": 9.190949440002441, "learning_rate": 9.427192733465796e-05, "loss": 0.07238447666168213, "step": 20190 }, { "epoch": 5.73374964518876, "grad_norm": 16.909034729003906, "learning_rate": 9.42690888447346e-05, "loss": 0.08167011141777039, "step": 20200 }, { "epoch": 5.73658813511212, "grad_norm": 8.070013999938965, "learning_rate": 9.426625035481124e-05, "loss": 0.0710361659526825, "step": 20210 }, { "epoch": 5.739426625035481, "grad_norm": 5.874579429626465, "learning_rate": 9.426341186488789e-05, "loss": 0.06286275386810303, "step": 20220 }, { "epoch": 5.742265114958842, "grad_norm": 8.36144733428955, "learning_rate": 9.426057337496453e-05, "loss": 0.09279226064682007, "step": 20230 }, { "epoch": 5.745103604882202, "grad_norm": 7.022001266479492, "learning_rate": 9.425773488504116e-05, "loss": 0.07916021943092347, "step": 20240 }, { "epoch": 5.7479420948055635, "grad_norm": 4.247718811035156, "learning_rate": 9.42548963951178e-05, "loss": 0.06966397166252136, "step": 20250 }, { "epoch": 5.750780584728924, "grad_norm": 7.583504676818848, "learning_rate": 9.425205790519445e-05, "loss": 0.06687934398651123, "step": 20260 }, { "epoch": 5.753619074652285, "grad_norm": 10.267077445983887, "learning_rate": 9.424921941527108e-05, "loss": 0.09304417371749878, "step": 20270 }, { "epoch": 5.756457564575646, "grad_norm": 11.244630813598633, "learning_rate": 9.424638092534772e-05, "loss": 0.06960177421569824, "step": 20280 }, { "epoch": 5.759296054499006, "grad_norm": 13.201896667480469, "learning_rate": 9.424354243542436e-05, "loss": 0.10238891839981079, "step": 20290 }, { "epoch": 5.762134544422367, "grad_norm": 6.290003299713135, "learning_rate": 9.424070394550099e-05, "loss": 0.08183891773223877, "step": 20300 }, { "epoch": 5.764973034345728, "grad_norm": 9.067564964294434, "learning_rate": 9.423786545557763e-05, "loss": 0.06799808740615845, "step": 20310 }, { "epoch": 5.767811524269089, "grad_norm": 6.7885894775390625, "learning_rate": 9.423502696565427e-05, "loss": 0.05139153599739075, "step": 20320 }, { "epoch": 5.77065001419245, "grad_norm": 10.689641952514648, "learning_rate": 9.423218847573091e-05, "loss": 0.07360635995864868, "step": 20330 }, { "epoch": 5.773488504115811, "grad_norm": 16.24526596069336, "learning_rate": 9.422934998580756e-05, "loss": 0.10305349826812744, "step": 20340 }, { "epoch": 5.776326994039171, "grad_norm": 7.384768486022949, "learning_rate": 9.42265114958842e-05, "loss": 0.08453144431114197, "step": 20350 }, { "epoch": 5.779165483962532, "grad_norm": 8.917681694030762, "learning_rate": 9.422367300596084e-05, "loss": 0.0804623007774353, "step": 20360 }, { "epoch": 5.782003973885892, "grad_norm": 6.427260398864746, "learning_rate": 9.422083451603747e-05, "loss": 0.08580575585365295, "step": 20370 }, { "epoch": 5.7848424638092535, "grad_norm": 8.31935977935791, "learning_rate": 9.421799602611411e-05, "loss": 0.08821266293525695, "step": 20380 }, { "epoch": 5.787680953732615, "grad_norm": 6.603612899780273, "learning_rate": 9.421515753619075e-05, "loss": 0.10696513652801513, "step": 20390 }, { "epoch": 5.790519443655975, "grad_norm": 9.612587928771973, "learning_rate": 9.421231904626739e-05, "loss": 0.07530587911605835, "step": 20400 }, { "epoch": 5.793357933579336, "grad_norm": 12.032959938049316, "learning_rate": 9.420948055634403e-05, "loss": 0.056288611888885495, "step": 20410 }, { "epoch": 5.796196423502696, "grad_norm": 9.89351749420166, "learning_rate": 9.420664206642067e-05, "loss": 0.06583508253097534, "step": 20420 }, { "epoch": 5.799034913426057, "grad_norm": 9.445446968078613, "learning_rate": 9.42038035764973e-05, "loss": 0.10640065670013428, "step": 20430 }, { "epoch": 5.801873403349418, "grad_norm": 5.38732385635376, "learning_rate": 9.420096508657394e-05, "loss": 0.06409531831741333, "step": 20440 }, { "epoch": 5.804711893272779, "grad_norm": 8.62579345703125, "learning_rate": 9.419812659665058e-05, "loss": 0.0671120047569275, "step": 20450 }, { "epoch": 5.80755038319614, "grad_norm": 13.489418029785156, "learning_rate": 9.419528810672723e-05, "loss": 0.07729152441024781, "step": 20460 }, { "epoch": 5.810388873119501, "grad_norm": 8.329533576965332, "learning_rate": 9.419244961680387e-05, "loss": 0.09585180878639221, "step": 20470 }, { "epoch": 5.813227363042861, "grad_norm": 3.19197940826416, "learning_rate": 9.418961112688051e-05, "loss": 0.07309839725494385, "step": 20480 }, { "epoch": 5.816065852966222, "grad_norm": 13.151877403259277, "learning_rate": 9.418677263695715e-05, "loss": 0.07978783249855041, "step": 20490 }, { "epoch": 5.818904342889582, "grad_norm": 16.06995964050293, "learning_rate": 9.418393414703378e-05, "loss": 0.07461789846420289, "step": 20500 }, { "epoch": 5.818904342889582, "eval_accuracy": 0.9306924397532905, "eval_loss": 0.2063288688659668, "eval_runtime": 41.3601, "eval_samples_per_second": 380.246, "eval_steps_per_second": 5.948, "step": 20500 }, { "epoch": 5.8217428328129435, "grad_norm": 3.355853319168091, "learning_rate": 9.418109565711042e-05, "loss": 0.06107552051544189, "step": 20510 }, { "epoch": 5.824581322736305, "grad_norm": 16.63850975036621, "learning_rate": 9.417825716718706e-05, "loss": 0.0969734787940979, "step": 20520 }, { "epoch": 5.827419812659665, "grad_norm": 5.820771217346191, "learning_rate": 9.41754186772637e-05, "loss": 0.10112478733062744, "step": 20530 }, { "epoch": 5.830258302583026, "grad_norm": 13.096980094909668, "learning_rate": 9.417258018734034e-05, "loss": 0.06524617671966552, "step": 20540 }, { "epoch": 5.833096792506387, "grad_norm": 8.250523567199707, "learning_rate": 9.416974169741698e-05, "loss": 0.09841796159744262, "step": 20550 }, { "epoch": 5.835935282429747, "grad_norm": 15.811319351196289, "learning_rate": 9.416690320749361e-05, "loss": 0.140497624874115, "step": 20560 }, { "epoch": 5.838773772353108, "grad_norm": 7.819662570953369, "learning_rate": 9.416406471757025e-05, "loss": 0.07606329917907714, "step": 20570 }, { "epoch": 5.841612262276469, "grad_norm": 9.491522789001465, "learning_rate": 9.41612262276469e-05, "loss": 0.1022137999534607, "step": 20580 }, { "epoch": 5.84445075219983, "grad_norm": 8.069787979125977, "learning_rate": 9.415838773772354e-05, "loss": 0.09838594198226928, "step": 20590 }, { "epoch": 5.847289242123191, "grad_norm": 5.5568766593933105, "learning_rate": 9.415554924780018e-05, "loss": 0.09576981067657471, "step": 20600 }, { "epoch": 5.850127732046551, "grad_norm": 8.781682968139648, "learning_rate": 9.415271075787682e-05, "loss": 0.08717960715293885, "step": 20610 }, { "epoch": 5.852966221969912, "grad_norm": 8.89035415649414, "learning_rate": 9.414987226795346e-05, "loss": 0.07351911664009095, "step": 20620 }, { "epoch": 5.855804711893272, "grad_norm": 14.073203086853027, "learning_rate": 9.414703377803009e-05, "loss": 0.11070513725280762, "step": 20630 }, { "epoch": 5.8586432018166335, "grad_norm": 8.588897705078125, "learning_rate": 9.414419528810673e-05, "loss": 0.06462211608886718, "step": 20640 }, { "epoch": 5.861481691739995, "grad_norm": 12.54620361328125, "learning_rate": 9.414135679818337e-05, "loss": 0.07106034159660339, "step": 20650 }, { "epoch": 5.864320181663355, "grad_norm": 7.94242525100708, "learning_rate": 9.413851830826001e-05, "loss": 0.0705458641052246, "step": 20660 }, { "epoch": 5.867158671586716, "grad_norm": 6.975644588470459, "learning_rate": 9.413567981833665e-05, "loss": 0.08306703567504883, "step": 20670 }, { "epoch": 5.869997161510077, "grad_norm": 8.576547622680664, "learning_rate": 9.41328413284133e-05, "loss": 0.09936286211013794, "step": 20680 }, { "epoch": 5.872835651433437, "grad_norm": 3.8242883682250977, "learning_rate": 9.413000283848992e-05, "loss": 0.06709930896759034, "step": 20690 }, { "epoch": 5.875674141356798, "grad_norm": 12.594011306762695, "learning_rate": 9.412716434856656e-05, "loss": 0.08002779483795167, "step": 20700 }, { "epoch": 5.878512631280159, "grad_norm": 9.929564476013184, "learning_rate": 9.41243258586432e-05, "loss": 0.07368727326393128, "step": 20710 }, { "epoch": 5.88135112120352, "grad_norm": 9.353164672851562, "learning_rate": 9.412148736871985e-05, "loss": 0.07653219699859619, "step": 20720 }, { "epoch": 5.884189611126881, "grad_norm": 11.253243446350098, "learning_rate": 9.411864887879649e-05, "loss": 0.09326536655426025, "step": 20730 }, { "epoch": 5.887028101050241, "grad_norm": 12.768586158752441, "learning_rate": 9.411581038887313e-05, "loss": 0.13587732315063478, "step": 20740 }, { "epoch": 5.889866590973602, "grad_norm": 11.104660987854004, "learning_rate": 9.411297189894977e-05, "loss": 0.1269753694534302, "step": 20750 }, { "epoch": 5.892705080896963, "grad_norm": 9.045692443847656, "learning_rate": 9.41101334090264e-05, "loss": 0.0779412865638733, "step": 20760 }, { "epoch": 5.895543570820323, "grad_norm": 8.618453025817871, "learning_rate": 9.410729491910304e-05, "loss": 0.08266773819923401, "step": 20770 }, { "epoch": 5.8983820607436845, "grad_norm": 13.867982864379883, "learning_rate": 9.410445642917968e-05, "loss": 0.09153485298156738, "step": 20780 }, { "epoch": 5.901220550667045, "grad_norm": 5.584874629974365, "learning_rate": 9.410161793925631e-05, "loss": 0.08373277187347412, "step": 20790 }, { "epoch": 5.904059040590406, "grad_norm": 4.162350654602051, "learning_rate": 9.409877944933296e-05, "loss": 0.08566312789916992, "step": 20800 }, { "epoch": 5.906897530513767, "grad_norm": 8.214615821838379, "learning_rate": 9.40959409594096e-05, "loss": 0.0850801169872284, "step": 20810 }, { "epoch": 5.909736020437127, "grad_norm": 5.733292102813721, "learning_rate": 9.409310246948623e-05, "loss": 0.06173244118690491, "step": 20820 }, { "epoch": 5.912574510360488, "grad_norm": 6.981990337371826, "learning_rate": 9.409026397956287e-05, "loss": 0.0775724470615387, "step": 20830 }, { "epoch": 5.9154130002838485, "grad_norm": 12.068794250488281, "learning_rate": 9.408742548963952e-05, "loss": 0.06148160099983215, "step": 20840 }, { "epoch": 5.91825149020721, "grad_norm": 8.005067825317383, "learning_rate": 9.408458699971616e-05, "loss": 0.09404718279838561, "step": 20850 }, { "epoch": 5.921089980130571, "grad_norm": 16.975383758544922, "learning_rate": 9.40817485097928e-05, "loss": 0.08774300813674926, "step": 20860 }, { "epoch": 5.923928470053931, "grad_norm": 8.47479248046875, "learning_rate": 9.407891001986944e-05, "loss": 0.07542625665664673, "step": 20870 }, { "epoch": 5.926766959977292, "grad_norm": 4.810630798339844, "learning_rate": 9.407607152994608e-05, "loss": 0.056763529777526855, "step": 20880 }, { "epoch": 5.929605449900653, "grad_norm": 6.83657693862915, "learning_rate": 9.407323304002271e-05, "loss": 0.08233998417854309, "step": 20890 }, { "epoch": 5.932443939824013, "grad_norm": 8.625046730041504, "learning_rate": 9.407039455009935e-05, "loss": 0.10489623546600342, "step": 20900 }, { "epoch": 5.9352824297473745, "grad_norm": 10.964731216430664, "learning_rate": 9.406755606017599e-05, "loss": 0.08318195343017579, "step": 20910 }, { "epoch": 5.938120919670736, "grad_norm": 7.15880823135376, "learning_rate": 9.406471757025262e-05, "loss": 0.06529826521873475, "step": 20920 }, { "epoch": 5.940959409594096, "grad_norm": 11.773893356323242, "learning_rate": 9.406187908032927e-05, "loss": 0.0699180543422699, "step": 20930 }, { "epoch": 5.943797899517457, "grad_norm": 11.637399673461914, "learning_rate": 9.405904059040592e-05, "loss": 0.08833930492401124, "step": 20940 }, { "epoch": 5.946636389440817, "grad_norm": 5.403414726257324, "learning_rate": 9.405620210048254e-05, "loss": 0.06694583296775818, "step": 20950 }, { "epoch": 5.949474879364178, "grad_norm": 2.136225700378418, "learning_rate": 9.405336361055919e-05, "loss": 0.056777161359786985, "step": 20960 }, { "epoch": 5.952313369287539, "grad_norm": 6.255948543548584, "learning_rate": 9.405052512063583e-05, "loss": 0.09560567736625672, "step": 20970 }, { "epoch": 5.9551518592109, "grad_norm": 14.952638626098633, "learning_rate": 9.404768663071247e-05, "loss": 0.11277000904083252, "step": 20980 }, { "epoch": 5.957990349134261, "grad_norm": 8.258495330810547, "learning_rate": 9.404484814078911e-05, "loss": 0.0907693326473236, "step": 20990 }, { "epoch": 5.960828839057621, "grad_norm": 6.687869548797607, "learning_rate": 9.404200965086575e-05, "loss": 0.07166925668716431, "step": 21000 }, { "epoch": 5.960828839057621, "eval_accuracy": 0.9476696127678514, "eval_loss": 0.15244710445404053, "eval_runtime": 50.9711, "eval_samples_per_second": 308.548, "eval_steps_per_second": 4.826, "step": 21000 }, { "epoch": 5.963667328980982, "grad_norm": 9.658256530761719, "learning_rate": 9.403917116094238e-05, "loss": 0.08539697527885437, "step": 21010 }, { "epoch": 5.966505818904343, "grad_norm": 4.117014408111572, "learning_rate": 9.403633267101902e-05, "loss": 0.06931743025779724, "step": 21020 }, { "epoch": 5.969344308827703, "grad_norm": 8.32268238067627, "learning_rate": 9.403349418109566e-05, "loss": 0.06338712573051453, "step": 21030 }, { "epoch": 5.9721827987510645, "grad_norm": 7.0213775634765625, "learning_rate": 9.40306556911723e-05, "loss": 0.09329544901847839, "step": 21040 }, { "epoch": 5.975021288674425, "grad_norm": 7.176434516906738, "learning_rate": 9.402781720124893e-05, "loss": 0.08601340055465698, "step": 21050 }, { "epoch": 5.977859778597786, "grad_norm": 11.9928617477417, "learning_rate": 9.402497871132559e-05, "loss": 0.10251727104187011, "step": 21060 }, { "epoch": 5.980698268521147, "grad_norm": 10.96060848236084, "learning_rate": 9.402214022140223e-05, "loss": 0.07568201422691345, "step": 21070 }, { "epoch": 5.983536758444507, "grad_norm": 11.50032901763916, "learning_rate": 9.401930173147885e-05, "loss": 0.0763308823108673, "step": 21080 }, { "epoch": 5.986375248367868, "grad_norm": 5.848087310791016, "learning_rate": 9.40164632415555e-05, "loss": 0.07836049795150757, "step": 21090 }, { "epoch": 5.989213738291229, "grad_norm": 16.430204391479492, "learning_rate": 9.401362475163214e-05, "loss": 0.09596822261810303, "step": 21100 }, { "epoch": 5.99205222821459, "grad_norm": 9.427444458007812, "learning_rate": 9.401078626170877e-05, "loss": 0.07155163288116455, "step": 21110 }, { "epoch": 5.994890718137951, "grad_norm": 5.242671012878418, "learning_rate": 9.40079477717854e-05, "loss": 0.07231312990188599, "step": 21120 }, { "epoch": 5.997729208061312, "grad_norm": 10.684367179870605, "learning_rate": 9.400510928186206e-05, "loss": 0.08340182304382324, "step": 21130 }, { "epoch": 6.000567697984672, "grad_norm": 9.143176078796387, "learning_rate": 9.400227079193869e-05, "loss": 0.06981292366981506, "step": 21140 }, { "epoch": 6.003406187908033, "grad_norm": 3.9658005237579346, "learning_rate": 9.399943230201533e-05, "loss": 0.03612378537654877, "step": 21150 }, { "epoch": 6.006244677831393, "grad_norm": 7.317986011505127, "learning_rate": 9.399659381209197e-05, "loss": 0.052134573459625244, "step": 21160 }, { "epoch": 6.0090831677547545, "grad_norm": 7.173664569854736, "learning_rate": 9.399375532216861e-05, "loss": 0.05946729183197021, "step": 21170 }, { "epoch": 6.011921657678116, "grad_norm": 6.992497444152832, "learning_rate": 9.399091683224524e-05, "loss": 0.06091173887252808, "step": 21180 }, { "epoch": 6.014760147601476, "grad_norm": 3.434575080871582, "learning_rate": 9.39880783423219e-05, "loss": 0.05389639139175415, "step": 21190 }, { "epoch": 6.017598637524837, "grad_norm": 5.238525867462158, "learning_rate": 9.398523985239854e-05, "loss": 0.04209000170230866, "step": 21200 }, { "epoch": 6.020437127448197, "grad_norm": 13.759931564331055, "learning_rate": 9.398240136247517e-05, "loss": 0.05310367345809937, "step": 21210 }, { "epoch": 6.023275617371558, "grad_norm": 8.639272689819336, "learning_rate": 9.397956287255181e-05, "loss": 0.09074180722236633, "step": 21220 }, { "epoch": 6.026114107294919, "grad_norm": 8.604941368103027, "learning_rate": 9.397672438262845e-05, "loss": 0.061110538244247434, "step": 21230 }, { "epoch": 6.02895259721828, "grad_norm": 18.405363082885742, "learning_rate": 9.397388589270508e-05, "loss": 0.07753486633300781, "step": 21240 }, { "epoch": 6.031791087141641, "grad_norm": 5.4760823249816895, "learning_rate": 9.397104740278172e-05, "loss": 0.06202937960624695, "step": 21250 }, { "epoch": 6.034629577065002, "grad_norm": 7.983343601226807, "learning_rate": 9.396820891285837e-05, "loss": 0.058184468746185304, "step": 21260 }, { "epoch": 6.037468066988362, "grad_norm": 6.724315643310547, "learning_rate": 9.3965370422935e-05, "loss": 0.06080639362335205, "step": 21270 }, { "epoch": 6.040306556911723, "grad_norm": 4.235963821411133, "learning_rate": 9.396253193301164e-05, "loss": 0.06076875329017639, "step": 21280 }, { "epoch": 6.043145046835083, "grad_norm": 6.702730655670166, "learning_rate": 9.395969344308828e-05, "loss": 0.06199674010276794, "step": 21290 }, { "epoch": 6.0459835367584445, "grad_norm": 13.8462553024292, "learning_rate": 9.395685495316492e-05, "loss": 0.08117430210113526, "step": 21300 }, { "epoch": 6.048822026681806, "grad_norm": 3.5921738147735596, "learning_rate": 9.395401646324155e-05, "loss": 0.07103267312049866, "step": 21310 }, { "epoch": 6.051660516605166, "grad_norm": 7.459458351135254, "learning_rate": 9.39511779733182e-05, "loss": 0.045485228300094604, "step": 21320 }, { "epoch": 6.054499006528527, "grad_norm": 10.054925918579102, "learning_rate": 9.394833948339485e-05, "loss": 0.06371062994003296, "step": 21330 }, { "epoch": 6.057337496451888, "grad_norm": 5.37416410446167, "learning_rate": 9.394550099347148e-05, "loss": 0.05777629613876343, "step": 21340 }, { "epoch": 6.060175986375248, "grad_norm": 4.027499198913574, "learning_rate": 9.394266250354812e-05, "loss": 0.06969146132469177, "step": 21350 }, { "epoch": 6.063014476298609, "grad_norm": 12.246729850769043, "learning_rate": 9.393982401362476e-05, "loss": 0.04309948980808258, "step": 21360 }, { "epoch": 6.06585296622197, "grad_norm": 8.102431297302246, "learning_rate": 9.393698552370139e-05, "loss": 0.08400939702987671, "step": 21370 }, { "epoch": 6.068691456145331, "grad_norm": 5.957435131072998, "learning_rate": 9.393414703377803e-05, "loss": 0.06218685507774353, "step": 21380 }, { "epoch": 6.071529946068692, "grad_norm": 7.365063667297363, "learning_rate": 9.393130854385468e-05, "loss": 0.04049176871776581, "step": 21390 }, { "epoch": 6.074368435992052, "grad_norm": 9.017918586730957, "learning_rate": 9.392847005393131e-05, "loss": 0.06810371279716491, "step": 21400 }, { "epoch": 6.077206925915413, "grad_norm": 6.680815696716309, "learning_rate": 9.392563156400795e-05, "loss": 0.05874435901641846, "step": 21410 }, { "epoch": 6.080045415838773, "grad_norm": 6.949630260467529, "learning_rate": 9.39227930740846e-05, "loss": 0.07051147818565369, "step": 21420 }, { "epoch": 6.0828839057621344, "grad_norm": 2.8873131275177, "learning_rate": 9.391995458416124e-05, "loss": 0.05488317608833313, "step": 21430 }, { "epoch": 6.085722395685496, "grad_norm": 3.4227747917175293, "learning_rate": 9.391711609423786e-05, "loss": 0.0542338490486145, "step": 21440 }, { "epoch": 6.088560885608856, "grad_norm": 9.397969245910645, "learning_rate": 9.39142776043145e-05, "loss": 0.03927270174026489, "step": 21450 }, { "epoch": 6.091399375532217, "grad_norm": 6.178247928619385, "learning_rate": 9.391143911439116e-05, "loss": 0.05281578302383423, "step": 21460 }, { "epoch": 6.094237865455578, "grad_norm": 2.9313151836395264, "learning_rate": 9.390860062446779e-05, "loss": 0.03856506943702698, "step": 21470 }, { "epoch": 6.097076355378938, "grad_norm": 10.740840911865234, "learning_rate": 9.390576213454443e-05, "loss": 0.0790773868560791, "step": 21480 }, { "epoch": 6.099914845302299, "grad_norm": 2.1163671016693115, "learning_rate": 9.390292364462107e-05, "loss": 0.04837349057197571, "step": 21490 }, { "epoch": 6.1027533352256595, "grad_norm": 8.00338077545166, "learning_rate": 9.39000851546977e-05, "loss": 0.06134638786315918, "step": 21500 }, { "epoch": 6.1027533352256595, "eval_accuracy": 0.9460799898264132, "eval_loss": 0.1592402458190918, "eval_runtime": 38.0855, "eval_samples_per_second": 412.939, "eval_steps_per_second": 6.459, "step": 21500 }, { "epoch": 6.105591825149021, "grad_norm": 12.506088256835938, "learning_rate": 9.389724666477434e-05, "loss": 0.053288054466247556, "step": 21510 }, { "epoch": 6.108430315072382, "grad_norm": 6.686629772186279, "learning_rate": 9.389440817485098e-05, "loss": 0.06144238710403442, "step": 21520 }, { "epoch": 6.111268804995742, "grad_norm": 6.704835414886475, "learning_rate": 9.389156968492762e-05, "loss": 0.06716190576553345, "step": 21530 }, { "epoch": 6.114107294919103, "grad_norm": 5.94556999206543, "learning_rate": 9.388873119500426e-05, "loss": 0.04503162205219269, "step": 21540 }, { "epoch": 6.116945784842464, "grad_norm": 8.303624153137207, "learning_rate": 9.38858927050809e-05, "loss": 0.07723650932312012, "step": 21550 }, { "epoch": 6.119784274765824, "grad_norm": 12.635313987731934, "learning_rate": 9.388305421515755e-05, "loss": 0.10071357488632202, "step": 21560 }, { "epoch": 6.1226227646891855, "grad_norm": 9.264017105102539, "learning_rate": 9.388021572523417e-05, "loss": 0.061448365449905396, "step": 21570 }, { "epoch": 6.125461254612546, "grad_norm": 4.064350128173828, "learning_rate": 9.387737723531082e-05, "loss": 0.06523907780647278, "step": 21580 }, { "epoch": 6.128299744535907, "grad_norm": 4.6602678298950195, "learning_rate": 9.387453874538747e-05, "loss": 0.06008039116859436, "step": 21590 }, { "epoch": 6.131138234459268, "grad_norm": 0.9431376457214355, "learning_rate": 9.38717002554641e-05, "loss": 0.03485822677612305, "step": 21600 }, { "epoch": 6.133976724382628, "grad_norm": 6.068197250366211, "learning_rate": 9.386886176554074e-05, "loss": 0.06565170288085938, "step": 21610 }, { "epoch": 6.136815214305989, "grad_norm": 6.566659450531006, "learning_rate": 9.386602327561738e-05, "loss": 0.05589370727539063, "step": 21620 }, { "epoch": 6.1396537042293495, "grad_norm": 5.246420860290527, "learning_rate": 9.386318478569401e-05, "loss": 0.07652181386947632, "step": 21630 }, { "epoch": 6.142492194152711, "grad_norm": 9.014434814453125, "learning_rate": 9.386034629577065e-05, "loss": 0.08360793590545654, "step": 21640 }, { "epoch": 6.145330684076072, "grad_norm": 8.633270263671875, "learning_rate": 9.385750780584729e-05, "loss": 0.060514068603515624, "step": 21650 }, { "epoch": 6.148169173999432, "grad_norm": 12.748857498168945, "learning_rate": 9.385466931592393e-05, "loss": 0.07230349183082581, "step": 21660 }, { "epoch": 6.151007663922793, "grad_norm": 11.839465141296387, "learning_rate": 9.385183082600057e-05, "loss": 0.06926686763763427, "step": 21670 }, { "epoch": 6.153846153846154, "grad_norm": 3.5303401947021484, "learning_rate": 9.384899233607722e-05, "loss": 0.08087220788002014, "step": 21680 }, { "epoch": 6.156684643769514, "grad_norm": 6.665874481201172, "learning_rate": 9.384615384615386e-05, "loss": 0.05260986089706421, "step": 21690 }, { "epoch": 6.1595231336928755, "grad_norm": 14.623100280761719, "learning_rate": 9.384331535623048e-05, "loss": 0.07398749589920044, "step": 21700 }, { "epoch": 6.162361623616236, "grad_norm": 9.885360717773438, "learning_rate": 9.384047686630713e-05, "loss": 0.08134155869483947, "step": 21710 }, { "epoch": 6.165200113539597, "grad_norm": 9.322347640991211, "learning_rate": 9.383763837638377e-05, "loss": 0.06841292977333069, "step": 21720 }, { "epoch": 6.168038603462958, "grad_norm": 5.876733303070068, "learning_rate": 9.383479988646041e-05, "loss": 0.07037004232406616, "step": 21730 }, { "epoch": 6.170877093386318, "grad_norm": 5.702275276184082, "learning_rate": 9.383196139653705e-05, "loss": 0.08429449796676636, "step": 21740 }, { "epoch": 6.173715583309679, "grad_norm": 0.8919403553009033, "learning_rate": 9.382912290661369e-05, "loss": 0.042805686593055725, "step": 21750 }, { "epoch": 6.17655407323304, "grad_norm": 7.829222202301025, "learning_rate": 9.382628441669032e-05, "loss": 0.04724862575531006, "step": 21760 }, { "epoch": 6.179392563156401, "grad_norm": 6.580927848815918, "learning_rate": 9.382344592676696e-05, "loss": 0.047700130939483644, "step": 21770 }, { "epoch": 6.182231053079762, "grad_norm": 12.813886642456055, "learning_rate": 9.38206074368436e-05, "loss": 0.05651273727416992, "step": 21780 }, { "epoch": 6.185069543003122, "grad_norm": 13.307184219360352, "learning_rate": 9.381776894692024e-05, "loss": 0.0698512077331543, "step": 21790 }, { "epoch": 6.187908032926483, "grad_norm": 5.310762405395508, "learning_rate": 9.381493045699688e-05, "loss": 0.0495415449142456, "step": 21800 }, { "epoch": 6.190746522849844, "grad_norm": 8.390474319458008, "learning_rate": 9.381209196707353e-05, "loss": 0.051758086681365965, "step": 21810 }, { "epoch": 6.193585012773204, "grad_norm": 6.816893577575684, "learning_rate": 9.380925347715017e-05, "loss": 0.06044039726257324, "step": 21820 }, { "epoch": 6.1964235026965655, "grad_norm": 11.483200073242188, "learning_rate": 9.38064149872268e-05, "loss": 0.0762772798538208, "step": 21830 }, { "epoch": 6.199261992619927, "grad_norm": 4.795910835266113, "learning_rate": 9.380357649730344e-05, "loss": 0.11087045669555665, "step": 21840 }, { "epoch": 6.202100482543287, "grad_norm": 10.047807693481445, "learning_rate": 9.380073800738008e-05, "loss": 0.08291746377944946, "step": 21850 }, { "epoch": 6.204938972466648, "grad_norm": 7.711918830871582, "learning_rate": 9.379789951745672e-05, "loss": 0.07162445783615112, "step": 21860 }, { "epoch": 6.207777462390008, "grad_norm": 6.133021354675293, "learning_rate": 9.379506102753336e-05, "loss": 0.05415757298469544, "step": 21870 }, { "epoch": 6.210615952313369, "grad_norm": 4.782773971557617, "learning_rate": 9.379222253761e-05, "loss": 0.047295740246772765, "step": 21880 }, { "epoch": 6.21345444223673, "grad_norm": 4.928102493286133, "learning_rate": 9.378938404768663e-05, "loss": 0.055120646953582764, "step": 21890 }, { "epoch": 6.216292932160091, "grad_norm": 18.775083541870117, "learning_rate": 9.378654555776327e-05, "loss": 0.09375993609428405, "step": 21900 }, { "epoch": 6.219131422083452, "grad_norm": 8.380999565124512, "learning_rate": 9.378370706783991e-05, "loss": 0.07934355735778809, "step": 21910 }, { "epoch": 6.221969912006813, "grad_norm": 6.047534942626953, "learning_rate": 9.378086857791655e-05, "loss": 0.06796725988388061, "step": 21920 }, { "epoch": 6.224808401930173, "grad_norm": 2.744061231613159, "learning_rate": 9.37780300879932e-05, "loss": 0.06760486364364623, "step": 21930 }, { "epoch": 6.227646891853534, "grad_norm": 2.5729331970214844, "learning_rate": 9.377519159806984e-05, "loss": 0.047090229392051694, "step": 21940 }, { "epoch": 6.230485381776894, "grad_norm": 9.27940559387207, "learning_rate": 9.377235310814646e-05, "loss": 0.06615420579910278, "step": 21950 }, { "epoch": 6.2333238717002555, "grad_norm": 5.282214641571045, "learning_rate": 9.37695146182231e-05, "loss": 0.09526888728141784, "step": 21960 }, { "epoch": 6.236162361623617, "grad_norm": 5.387200832366943, "learning_rate": 9.376667612829975e-05, "loss": 0.06022730469703674, "step": 21970 }, { "epoch": 6.239000851546977, "grad_norm": 13.30256462097168, "learning_rate": 9.376383763837639e-05, "loss": 0.06158702373504639, "step": 21980 }, { "epoch": 6.241839341470338, "grad_norm": 10.274910926818848, "learning_rate": 9.376099914845303e-05, "loss": 0.08581958413124084, "step": 21990 }, { "epoch": 6.244677831393698, "grad_norm": 8.439434051513672, "learning_rate": 9.375816065852967e-05, "loss": 0.07260887622833252, "step": 22000 }, { "epoch": 6.244677831393698, "eval_accuracy": 0.9471609334265912, "eval_loss": 0.16688671708106995, "eval_runtime": 34.6796, "eval_samples_per_second": 453.494, "eval_steps_per_second": 7.094, "step": 22000 }, { "epoch": 6.247516321317059, "grad_norm": 6.867032051086426, "learning_rate": 9.375532216860631e-05, "loss": 0.07284599542617798, "step": 22010 }, { "epoch": 6.25035481124042, "grad_norm": 4.742612361907959, "learning_rate": 9.375248367868294e-05, "loss": 0.08616930842399598, "step": 22020 }, { "epoch": 6.253193301163781, "grad_norm": 5.108035564422607, "learning_rate": 9.374964518875958e-05, "loss": 0.05477837324142456, "step": 22030 }, { "epoch": 6.256031791087142, "grad_norm": 10.690719604492188, "learning_rate": 9.374680669883622e-05, "loss": 0.06615567803382874, "step": 22040 }, { "epoch": 6.258870281010503, "grad_norm": 10.818451881408691, "learning_rate": 9.374396820891285e-05, "loss": 0.06813653111457825, "step": 22050 }, { "epoch": 6.261708770933863, "grad_norm": 10.210262298583984, "learning_rate": 9.37411297189895e-05, "loss": 0.0598949670791626, "step": 22060 }, { "epoch": 6.264547260857224, "grad_norm": 10.788928031921387, "learning_rate": 9.373829122906615e-05, "loss": 0.06132752895355224, "step": 22070 }, { "epoch": 6.267385750780584, "grad_norm": 6.490523338317871, "learning_rate": 9.373545273914278e-05, "loss": 0.04417368769645691, "step": 22080 }, { "epoch": 6.2702242407039455, "grad_norm": 8.8082914352417, "learning_rate": 9.373261424921942e-05, "loss": 0.06670319437980651, "step": 22090 }, { "epoch": 6.273062730627307, "grad_norm": 2.959226131439209, "learning_rate": 9.372977575929606e-05, "loss": 0.07923347353935242, "step": 22100 }, { "epoch": 6.275901220550667, "grad_norm": 12.141778945922852, "learning_rate": 9.37269372693727e-05, "loss": 0.06709091067314148, "step": 22110 }, { "epoch": 6.278739710474028, "grad_norm": 11.344168663024902, "learning_rate": 9.372409877944933e-05, "loss": 0.052655524015426634, "step": 22120 }, { "epoch": 6.281578200397389, "grad_norm": 6.694931983947754, "learning_rate": 9.372126028952598e-05, "loss": 0.05782669186592102, "step": 22130 }, { "epoch": 6.284416690320749, "grad_norm": 12.090874671936035, "learning_rate": 9.371842179960262e-05, "loss": 0.05066774487495422, "step": 22140 }, { "epoch": 6.28725518024411, "grad_norm": 7.020180702209473, "learning_rate": 9.371558330967925e-05, "loss": 0.05867326259613037, "step": 22150 }, { "epoch": 6.290093670167471, "grad_norm": 6.099162578582764, "learning_rate": 9.371274481975589e-05, "loss": 0.039280617237091066, "step": 22160 }, { "epoch": 6.292932160090832, "grad_norm": 5.683497428894043, "learning_rate": 9.370990632983253e-05, "loss": 0.03736328482627869, "step": 22170 }, { "epoch": 6.295770650014193, "grad_norm": 14.352115631103516, "learning_rate": 9.370706783990916e-05, "loss": 0.07569064497947693, "step": 22180 }, { "epoch": 6.298609139937553, "grad_norm": 7.286081314086914, "learning_rate": 9.370422934998582e-05, "loss": 0.07664320468902588, "step": 22190 }, { "epoch": 6.301447629860914, "grad_norm": 6.916255474090576, "learning_rate": 9.370139086006246e-05, "loss": 0.061144834756851195, "step": 22200 }, { "epoch": 6.304286119784274, "grad_norm": 9.28487777709961, "learning_rate": 9.369855237013909e-05, "loss": 0.061165356636047365, "step": 22210 }, { "epoch": 6.307124609707635, "grad_norm": 8.437467575073242, "learning_rate": 9.369571388021573e-05, "loss": 0.0696850836277008, "step": 22220 }, { "epoch": 6.3099630996309966, "grad_norm": 9.30038833618164, "learning_rate": 9.369287539029237e-05, "loss": 0.05020316243171692, "step": 22230 }, { "epoch": 6.312801589554357, "grad_norm": 5.338715553283691, "learning_rate": 9.369003690036901e-05, "loss": 0.02660304605960846, "step": 22240 }, { "epoch": 6.315640079477718, "grad_norm": 6.360271453857422, "learning_rate": 9.368719841044564e-05, "loss": 0.05079078674316406, "step": 22250 }, { "epoch": 6.318478569401079, "grad_norm": 8.450762748718262, "learning_rate": 9.368435992052229e-05, "loss": 0.09086836576461792, "step": 22260 }, { "epoch": 6.321317059324439, "grad_norm": 2.7322285175323486, "learning_rate": 9.368152143059893e-05, "loss": 0.062165313959121705, "step": 22270 }, { "epoch": 6.3241555492478, "grad_norm": 7.443412780761719, "learning_rate": 9.367868294067556e-05, "loss": 0.09542383551597595, "step": 22280 }, { "epoch": 6.3269940391711605, "grad_norm": 22.451448440551758, "learning_rate": 9.367612829974454e-05, "loss": 0.11114087104797363, "step": 22290 }, { "epoch": 6.329832529094522, "grad_norm": 7.026437282562256, "learning_rate": 9.367328980982117e-05, "loss": 0.0892063856124878, "step": 22300 }, { "epoch": 6.332671019017883, "grad_norm": 2.0154995918273926, "learning_rate": 9.367045131989782e-05, "loss": 0.051486074924468994, "step": 22310 }, { "epoch": 6.335509508941243, "grad_norm": 12.72110652923584, "learning_rate": 9.366761282997447e-05, "loss": 0.0906631350517273, "step": 22320 }, { "epoch": 6.338347998864604, "grad_norm": 11.31689739227295, "learning_rate": 9.36647743400511e-05, "loss": 0.08576540350914001, "step": 22330 }, { "epoch": 6.341186488787965, "grad_norm": 18.1953182220459, "learning_rate": 9.366193585012774e-05, "loss": 0.12178623676300049, "step": 22340 }, { "epoch": 6.344024978711325, "grad_norm": 7.352984428405762, "learning_rate": 9.365909736020438e-05, "loss": 0.046098509430885316, "step": 22350 }, { "epoch": 6.3468634686346865, "grad_norm": 7.2171549797058105, "learning_rate": 9.3656258870281e-05, "loss": 0.058814513683319095, "step": 22360 }, { "epoch": 6.349701958558047, "grad_norm": 7.939261436462402, "learning_rate": 9.365342038035766e-05, "loss": 0.07087867259979248, "step": 22370 }, { "epoch": 6.352540448481408, "grad_norm": 10.563776016235352, "learning_rate": 9.36505818904343e-05, "loss": 0.10347676277160645, "step": 22380 }, { "epoch": 6.355378938404769, "grad_norm": 6.023850917816162, "learning_rate": 9.364774340051093e-05, "loss": 0.0544758677482605, "step": 22390 }, { "epoch": 6.358217428328129, "grad_norm": 1.775632381439209, "learning_rate": 9.364490491058757e-05, "loss": 0.07961366772651672, "step": 22400 }, { "epoch": 6.36105591825149, "grad_norm": 2.9035961627960205, "learning_rate": 9.364206642066421e-05, "loss": 0.07895339727401733, "step": 22410 }, { "epoch": 6.3638944081748505, "grad_norm": 5.34147834777832, "learning_rate": 9.363922793074085e-05, "loss": 0.05466833710670471, "step": 22420 }, { "epoch": 6.366732898098212, "grad_norm": 4.108320236206055, "learning_rate": 9.363638944081748e-05, "loss": 0.040611368417739865, "step": 22430 }, { "epoch": 6.369571388021573, "grad_norm": 10.88947868347168, "learning_rate": 9.363355095089414e-05, "loss": 0.04755908250808716, "step": 22440 }, { "epoch": 6.372409877944933, "grad_norm": 9.157010078430176, "learning_rate": 9.363071246097078e-05, "loss": 0.044393569231033325, "step": 22450 }, { "epoch": 6.375248367868294, "grad_norm": 10.94423770904541, "learning_rate": 9.36278739710474e-05, "loss": 0.09142636656761169, "step": 22460 }, { "epoch": 6.378086857791655, "grad_norm": 8.95127010345459, "learning_rate": 9.362503548112405e-05, "loss": 0.06740921139717101, "step": 22470 }, { "epoch": 6.380925347715015, "grad_norm": 6.406826496124268, "learning_rate": 9.362219699120069e-05, "loss": 0.06246722936630249, "step": 22480 }, { "epoch": 6.3837638376383765, "grad_norm": 3.8596785068511963, "learning_rate": 9.361935850127732e-05, "loss": 0.06993348598480224, "step": 22490 }, { "epoch": 6.386602327561738, "grad_norm": 3.868046760559082, "learning_rate": 9.361652001135396e-05, "loss": 0.05077237486839294, "step": 22500 }, { "epoch": 6.386602327561738, "eval_accuracy": 0.9489413111210021, "eval_loss": 0.15440458059310913, "eval_runtime": 34.4296, "eval_samples_per_second": 456.787, "eval_steps_per_second": 7.145, "step": 22500 }, { "epoch": 6.389440817485098, "grad_norm": 7.673839092254639, "learning_rate": 9.361368152143061e-05, "loss": 0.04912700653076172, "step": 22510 }, { "epoch": 6.392279307408459, "grad_norm": 7.57460355758667, "learning_rate": 9.361084303150724e-05, "loss": 0.08726553916931153, "step": 22520 }, { "epoch": 6.395117797331819, "grad_norm": 6.056525230407715, "learning_rate": 9.360800454158388e-05, "loss": 0.05921440124511719, "step": 22530 }, { "epoch": 6.39795628725518, "grad_norm": 12.275922775268555, "learning_rate": 9.360516605166052e-05, "loss": 0.07270147204399109, "step": 22540 }, { "epoch": 6.400794777178541, "grad_norm": 2.5723276138305664, "learning_rate": 9.360232756173716e-05, "loss": 0.06916555762290955, "step": 22550 }, { "epoch": 6.403633267101902, "grad_norm": 5.219125270843506, "learning_rate": 9.359948907181379e-05, "loss": 0.037217536568641664, "step": 22560 }, { "epoch": 6.406471757025263, "grad_norm": 6.662599563598633, "learning_rate": 9.359665058189045e-05, "loss": 0.04299522340297699, "step": 22570 }, { "epoch": 6.409310246948623, "grad_norm": 11.610570907592773, "learning_rate": 9.359381209196709e-05, "loss": 0.061191052198410034, "step": 22580 }, { "epoch": 6.412148736871984, "grad_norm": 20.38079833984375, "learning_rate": 9.359097360204372e-05, "loss": 0.08254905939102172, "step": 22590 }, { "epoch": 6.414987226795345, "grad_norm": 4.718470096588135, "learning_rate": 9.358813511212036e-05, "loss": 0.06650714874267578, "step": 22600 }, { "epoch": 6.417825716718705, "grad_norm": 9.892739295959473, "learning_rate": 9.3585296622197e-05, "loss": 0.04024434387683869, "step": 22610 }, { "epoch": 6.4206642066420665, "grad_norm": 3.019038677215576, "learning_rate": 9.358245813227363e-05, "loss": 0.05385313630104065, "step": 22620 }, { "epoch": 6.423502696565428, "grad_norm": 0.7461014986038208, "learning_rate": 9.357961964235027e-05, "loss": 0.04549807906150818, "step": 22630 }, { "epoch": 6.426341186488788, "grad_norm": 8.462605476379395, "learning_rate": 9.357678115242692e-05, "loss": 0.06555944681167603, "step": 22640 }, { "epoch": 6.429179676412149, "grad_norm": 4.956081867218018, "learning_rate": 9.357394266250355e-05, "loss": 0.08454323410987855, "step": 22650 }, { "epoch": 6.432018166335509, "grad_norm": 8.934706687927246, "learning_rate": 9.357110417258019e-05, "loss": 0.08213617205619812, "step": 22660 }, { "epoch": 6.43485665625887, "grad_norm": 3.2778453826904297, "learning_rate": 9.356826568265683e-05, "loss": 0.0651232123374939, "step": 22670 }, { "epoch": 6.437695146182231, "grad_norm": 2.472931146621704, "learning_rate": 9.356542719273347e-05, "loss": 0.057369965314865115, "step": 22680 }, { "epoch": 6.440533636105592, "grad_norm": 2.977193832397461, "learning_rate": 9.35625887028101e-05, "loss": 0.04924647510051727, "step": 22690 }, { "epoch": 6.443372126028953, "grad_norm": 6.130849838256836, "learning_rate": 9.355975021288676e-05, "loss": 0.046964558959007266, "step": 22700 }, { "epoch": 6.446210615952314, "grad_norm": 13.655348777770996, "learning_rate": 9.355691172296339e-05, "loss": 0.06613050699234009, "step": 22710 }, { "epoch": 6.449049105875674, "grad_norm": 6.915074825286865, "learning_rate": 9.355407323304003e-05, "loss": 0.04502607882022858, "step": 22720 }, { "epoch": 6.451887595799035, "grad_norm": 7.559329509735107, "learning_rate": 9.355123474311667e-05, "loss": 0.09521877765655518, "step": 22730 }, { "epoch": 6.454726085722395, "grad_norm": 9.595179557800293, "learning_rate": 9.354839625319331e-05, "loss": 0.07335448265075684, "step": 22740 }, { "epoch": 6.4575645756457565, "grad_norm": 5.416038990020752, "learning_rate": 9.354555776326994e-05, "loss": 0.08466384410858155, "step": 22750 }, { "epoch": 6.460403065569118, "grad_norm": 7.062206268310547, "learning_rate": 9.354271927334658e-05, "loss": 0.0828620195388794, "step": 22760 }, { "epoch": 6.463241555492478, "grad_norm": 5.461550235748291, "learning_rate": 9.353988078342323e-05, "loss": 0.05741425156593323, "step": 22770 }, { "epoch": 6.466080045415839, "grad_norm": 4.0456976890563965, "learning_rate": 9.353704229349986e-05, "loss": 0.05333778858184814, "step": 22780 }, { "epoch": 6.468918535339199, "grad_norm": 4.230417728424072, "learning_rate": 9.35342038035765e-05, "loss": 0.04704639613628388, "step": 22790 }, { "epoch": 6.47175702526256, "grad_norm": 9.533324241638184, "learning_rate": 9.353136531365314e-05, "loss": 0.0691888153553009, "step": 22800 }, { "epoch": 6.474595515185921, "grad_norm": 11.466306686401367, "learning_rate": 9.352852682372977e-05, "loss": 0.06571805477142334, "step": 22810 }, { "epoch": 6.477434005109282, "grad_norm": 16.72477912902832, "learning_rate": 9.352568833380641e-05, "loss": 0.09377620220184327, "step": 22820 }, { "epoch": 6.480272495032643, "grad_norm": 6.972917079925537, "learning_rate": 9.352284984388305e-05, "loss": 0.048062455654144284, "step": 22830 }, { "epoch": 6.483110984956004, "grad_norm": 11.324723243713379, "learning_rate": 9.35200113539597e-05, "loss": 0.06709098815917969, "step": 22840 }, { "epoch": 6.485949474879364, "grad_norm": 11.720114707946777, "learning_rate": 9.351717286403634e-05, "loss": 0.09053636789321899, "step": 22850 }, { "epoch": 6.488787964802725, "grad_norm": 7.206921100616455, "learning_rate": 9.351433437411298e-05, "loss": 0.055981510877609254, "step": 22860 }, { "epoch": 6.491626454726085, "grad_norm": 5.044811725616455, "learning_rate": 9.351149588418962e-05, "loss": 0.060987788438797, "step": 22870 }, { "epoch": 6.4944649446494465, "grad_norm": 1.8012968301773071, "learning_rate": 9.350865739426625e-05, "loss": 0.07404155731201172, "step": 22880 }, { "epoch": 6.497303434572808, "grad_norm": 8.857561111450195, "learning_rate": 9.350581890434289e-05, "loss": 0.059181541204452515, "step": 22890 }, { "epoch": 6.500141924496168, "grad_norm": 8.685898780822754, "learning_rate": 9.350298041441954e-05, "loss": 0.05578880310058594, "step": 22900 }, { "epoch": 6.502980414419529, "grad_norm": 7.340598106384277, "learning_rate": 9.350014192449617e-05, "loss": 0.06968445777893066, "step": 22910 }, { "epoch": 6.50581890434289, "grad_norm": 3.718942403793335, "learning_rate": 9.349730343457281e-05, "loss": 0.06084020137786865, "step": 22920 }, { "epoch": 6.50865739426625, "grad_norm": 4.06213903427124, "learning_rate": 9.349446494464945e-05, "loss": 0.09132059812545776, "step": 22930 }, { "epoch": 6.511495884189611, "grad_norm": 1.3623285293579102, "learning_rate": 9.349162645472608e-05, "loss": 0.07380326390266419, "step": 22940 }, { "epoch": 6.514334374112972, "grad_norm": 4.53208065032959, "learning_rate": 9.348878796480272e-05, "loss": 0.05824970602989197, "step": 22950 }, { "epoch": 6.517172864036333, "grad_norm": 14.288229942321777, "learning_rate": 9.348594947487937e-05, "loss": 0.07372841835021973, "step": 22960 }, { "epoch": 6.520011353959694, "grad_norm": 6.926364421844482, "learning_rate": 9.3483110984956e-05, "loss": 0.10001689195632935, "step": 22970 }, { "epoch": 6.522849843883054, "grad_norm": 2.893476963043213, "learning_rate": 9.348027249503265e-05, "loss": 0.0644639790058136, "step": 22980 }, { "epoch": 6.525688333806415, "grad_norm": 5.910089015960693, "learning_rate": 9.347743400510929e-05, "loss": 0.05644515752792358, "step": 22990 }, { "epoch": 6.528526823729775, "grad_norm": 6.5973920822143555, "learning_rate": 9.347459551518593e-05, "loss": 0.05937712192535401, "step": 23000 }, { "epoch": 6.528526823729775, "eval_accuracy": 0.9457620652381256, "eval_loss": 0.1592908501625061, "eval_runtime": 31.7302, "eval_samples_per_second": 495.648, "eval_steps_per_second": 7.753, "step": 23000 }, { "epoch": 6.531365313653136, "grad_norm": 13.856776237487793, "learning_rate": 9.347175702526256e-05, "loss": 0.08250008821487427, "step": 23010 }, { "epoch": 6.5342038035764975, "grad_norm": 7.635278701782227, "learning_rate": 9.34689185353392e-05, "loss": 0.09952807426452637, "step": 23020 }, { "epoch": 6.537042293499858, "grad_norm": 3.603209972381592, "learning_rate": 9.346608004541584e-05, "loss": 0.052066695690155027, "step": 23030 }, { "epoch": 6.539880783423219, "grad_norm": 5.518956184387207, "learning_rate": 9.346324155549248e-05, "loss": 0.06417028903961182, "step": 23040 }, { "epoch": 6.54271927334658, "grad_norm": 5.390430450439453, "learning_rate": 9.346040306556912e-05, "loss": 0.072234046459198, "step": 23050 }, { "epoch": 6.54555776326994, "grad_norm": 6.387752056121826, "learning_rate": 9.345756457564577e-05, "loss": 0.09717770218849182, "step": 23060 }, { "epoch": 6.548396253193301, "grad_norm": 10.630523681640625, "learning_rate": 9.34547260857224e-05, "loss": 0.05742814540863037, "step": 23070 }, { "epoch": 6.551234743116662, "grad_norm": 5.284035682678223, "learning_rate": 9.345188759579903e-05, "loss": 0.05698575973510742, "step": 23080 }, { "epoch": 6.554073233040023, "grad_norm": 7.6259846687316895, "learning_rate": 9.344904910587568e-05, "loss": 0.07448934912681579, "step": 23090 }, { "epoch": 6.556911722963384, "grad_norm": 1.6665700674057007, "learning_rate": 9.344621061595232e-05, "loss": 0.06123071312904358, "step": 23100 }, { "epoch": 6.559750212886744, "grad_norm": 5.3264546394348145, "learning_rate": 9.344337212602896e-05, "loss": 0.06302662491798401, "step": 23110 }, { "epoch": 6.562588702810105, "grad_norm": 12.584602355957031, "learning_rate": 9.34405336361056e-05, "loss": 0.0614501953125, "step": 23120 }, { "epoch": 6.565427192733466, "grad_norm": 9.477799415588379, "learning_rate": 9.343769514618224e-05, "loss": 0.07931509613990784, "step": 23130 }, { "epoch": 6.568265682656826, "grad_norm": 7.702375888824463, "learning_rate": 9.343485665625887e-05, "loss": 0.08060720562934875, "step": 23140 }, { "epoch": 6.5711041725801875, "grad_norm": 13.733010292053223, "learning_rate": 9.343201816633551e-05, "loss": 0.07649382948875427, "step": 23150 }, { "epoch": 6.573942662503548, "grad_norm": 7.085424423217773, "learning_rate": 9.342917967641215e-05, "loss": 0.05833662748336792, "step": 23160 }, { "epoch": 6.576781152426909, "grad_norm": 2.7155370712280273, "learning_rate": 9.34263411864888e-05, "loss": 0.045491567254066466, "step": 23170 }, { "epoch": 6.57961964235027, "grad_norm": 10.599963188171387, "learning_rate": 9.342350269656543e-05, "loss": 0.0598272442817688, "step": 23180 }, { "epoch": 6.58245813227363, "grad_norm": 4.592675685882568, "learning_rate": 9.342066420664208e-05, "loss": 0.07122341990470886, "step": 23190 }, { "epoch": 6.585296622196991, "grad_norm": 3.126769781112671, "learning_rate": 9.34178257167187e-05, "loss": 0.060324561595916745, "step": 23200 }, { "epoch": 6.5881351121203515, "grad_norm": 8.101386070251465, "learning_rate": 9.341498722679535e-05, "loss": 0.07873019576072693, "step": 23210 }, { "epoch": 6.590973602043713, "grad_norm": 18.560606002807617, "learning_rate": 9.341214873687199e-05, "loss": 0.0772042453289032, "step": 23220 }, { "epoch": 6.593812091967074, "grad_norm": 8.216347694396973, "learning_rate": 9.340931024694863e-05, "loss": 0.07716535329818726, "step": 23230 }, { "epoch": 6.596650581890434, "grad_norm": 13.700494766235352, "learning_rate": 9.340647175702527e-05, "loss": 0.07237286567687988, "step": 23240 }, { "epoch": 6.599489071813795, "grad_norm": 7.993924617767334, "learning_rate": 9.340363326710191e-05, "loss": 0.04505000412464142, "step": 23250 }, { "epoch": 6.602327561737156, "grad_norm": 3.5395963191986084, "learning_rate": 9.340079477717855e-05, "loss": 0.07847476601600648, "step": 23260 }, { "epoch": 6.605166051660516, "grad_norm": 9.88886833190918, "learning_rate": 9.339795628725518e-05, "loss": 0.05986697673797607, "step": 23270 }, { "epoch": 6.6080045415838775, "grad_norm": 3.9333889484405518, "learning_rate": 9.339511779733182e-05, "loss": 0.05744473338127136, "step": 23280 }, { "epoch": 6.610843031507239, "grad_norm": 12.704845428466797, "learning_rate": 9.339227930740846e-05, "loss": 0.08831512928009033, "step": 23290 }, { "epoch": 6.613681521430599, "grad_norm": 3.600497007369995, "learning_rate": 9.33894408174851e-05, "loss": 0.048995202779769896, "step": 23300 }, { "epoch": 6.61652001135396, "grad_norm": 9.86794376373291, "learning_rate": 9.338660232756175e-05, "loss": 0.07233185172080994, "step": 23310 }, { "epoch": 6.61935850127732, "grad_norm": 4.327376365661621, "learning_rate": 9.338376383763839e-05, "loss": 0.047129711508750914, "step": 23320 }, { "epoch": 6.622196991200681, "grad_norm": 10.496603012084961, "learning_rate": 9.338092534771501e-05, "loss": 0.05952298641204834, "step": 23330 }, { "epoch": 6.625035481124042, "grad_norm": 4.155147552490234, "learning_rate": 9.337808685779166e-05, "loss": 0.06855723857879639, "step": 23340 }, { "epoch": 6.627873971047403, "grad_norm": 4.5846757888793945, "learning_rate": 9.33752483678683e-05, "loss": 0.057047796249389646, "step": 23350 }, { "epoch": 6.630712460970764, "grad_norm": 4.938830375671387, "learning_rate": 9.337240987794494e-05, "loss": 0.05028601884841919, "step": 23360 }, { "epoch": 6.633550950894124, "grad_norm": 4.80103874206543, "learning_rate": 9.336957138802158e-05, "loss": 0.07054493427276612, "step": 23370 }, { "epoch": 6.636389440817485, "grad_norm": 9.108521461486816, "learning_rate": 9.336673289809822e-05, "loss": 0.074306720495224, "step": 23380 }, { "epoch": 6.639227930740846, "grad_norm": 6.8885579109191895, "learning_rate": 9.336389440817486e-05, "loss": 0.045844274759292605, "step": 23390 }, { "epoch": 6.642066420664206, "grad_norm": 4.292250156402588, "learning_rate": 9.336105591825149e-05, "loss": 0.04974898099899292, "step": 23400 }, { "epoch": 6.6449049105875675, "grad_norm": 8.093226432800293, "learning_rate": 9.335821742832813e-05, "loss": 0.07901933193206787, "step": 23410 }, { "epoch": 6.647743400510928, "grad_norm": 4.231894493103027, "learning_rate": 9.335537893840477e-05, "loss": 0.03552080094814301, "step": 23420 }, { "epoch": 6.650581890434289, "grad_norm": 7.029517650604248, "learning_rate": 9.33525404484814e-05, "loss": 0.04669412970542908, "step": 23430 }, { "epoch": 6.65342038035765, "grad_norm": 7.102984428405762, "learning_rate": 9.334970195855806e-05, "loss": 0.042848610877990724, "step": 23440 }, { "epoch": 6.65625887028101, "grad_norm": 8.387617111206055, "learning_rate": 9.33468634686347e-05, "loss": 0.059354788064956664, "step": 23450 }, { "epoch": 6.659097360204371, "grad_norm": 7.155029773712158, "learning_rate": 9.334402497871133e-05, "loss": 0.055545616149902347, "step": 23460 }, { "epoch": 6.661935850127732, "grad_norm": 9.339471817016602, "learning_rate": 9.334118648878797e-05, "loss": 0.08752689957618713, "step": 23470 }, { "epoch": 6.664774340051093, "grad_norm": 6.880680561065674, "learning_rate": 9.333834799886461e-05, "loss": 0.06102652549743652, "step": 23480 }, { "epoch": 6.667612829974454, "grad_norm": 7.5426459312438965, "learning_rate": 9.333550950894125e-05, "loss": 0.06150177717208862, "step": 23490 }, { "epoch": 6.670451319897815, "grad_norm": 4.849217891693115, "learning_rate": 9.333267101901789e-05, "loss": 0.05756954550743103, "step": 23500 }, { "epoch": 6.670451319897815, "eval_accuracy": 0.9514847078273033, "eval_loss": 0.14655059576034546, "eval_runtime": 33.147, "eval_samples_per_second": 474.463, "eval_steps_per_second": 7.421, "step": 23500 }, { "epoch": 6.673289809821175, "grad_norm": 6.187044143676758, "learning_rate": 9.332983252909453e-05, "loss": 0.07227462530136108, "step": 23510 }, { "epoch": 6.676128299744536, "grad_norm": 16.089933395385742, "learning_rate": 9.332699403917117e-05, "loss": 0.07938938736915588, "step": 23520 }, { "epoch": 6.678966789667896, "grad_norm": 3.152357578277588, "learning_rate": 9.33241555492478e-05, "loss": 0.10665467977523804, "step": 23530 }, { "epoch": 6.6818052795912575, "grad_norm": 17.384761810302734, "learning_rate": 9.332131705932444e-05, "loss": 0.07915918827056885, "step": 23540 }, { "epoch": 6.684643769514619, "grad_norm": 2.6649155616760254, "learning_rate": 9.331847856940108e-05, "loss": 0.06024469137191772, "step": 23550 }, { "epoch": 6.687482259437979, "grad_norm": 4.032787322998047, "learning_rate": 9.331564007947771e-05, "loss": 0.06250726580619811, "step": 23560 }, { "epoch": 6.69032074936134, "grad_norm": 3.4619028568267822, "learning_rate": 9.331280158955437e-05, "loss": 0.05120217204093933, "step": 23570 }, { "epoch": 6.6931592392847, "grad_norm": 8.397167205810547, "learning_rate": 9.330996309963101e-05, "loss": 0.08176122903823853, "step": 23580 }, { "epoch": 6.695997729208061, "grad_norm": 10.10000991821289, "learning_rate": 9.330712460970764e-05, "loss": 0.07552049160003663, "step": 23590 }, { "epoch": 6.698836219131422, "grad_norm": 7.2446393966674805, "learning_rate": 9.330428611978428e-05, "loss": 0.07014150619506836, "step": 23600 }, { "epoch": 6.701674709054783, "grad_norm": 7.493098735809326, "learning_rate": 9.330144762986092e-05, "loss": 0.08240560293197632, "step": 23610 }, { "epoch": 6.704513198978144, "grad_norm": 3.448248863220215, "learning_rate": 9.329860913993756e-05, "loss": 0.057129818201065066, "step": 23620 }, { "epoch": 6.707351688901504, "grad_norm": 5.865104675292969, "learning_rate": 9.329577065001419e-05, "loss": 0.0698682963848114, "step": 23630 }, { "epoch": 6.710190178824865, "grad_norm": 12.394220352172852, "learning_rate": 9.329293216009084e-05, "loss": 0.06459053754806518, "step": 23640 }, { "epoch": 6.713028668748226, "grad_norm": 11.626504898071289, "learning_rate": 9.329009367016747e-05, "loss": 0.07687658667564393, "step": 23650 }, { "epoch": 6.715867158671586, "grad_norm": 10.299532890319824, "learning_rate": 9.328725518024411e-05, "loss": 0.07676170468330383, "step": 23660 }, { "epoch": 6.7187056485949475, "grad_norm": 9.131816864013672, "learning_rate": 9.328441669032075e-05, "loss": 0.05657409429550171, "step": 23670 }, { "epoch": 6.721544138518309, "grad_norm": 8.327226638793945, "learning_rate": 9.32815782003974e-05, "loss": 0.07256149053573609, "step": 23680 }, { "epoch": 6.724382628441669, "grad_norm": 10.367690086364746, "learning_rate": 9.327873971047402e-05, "loss": 0.061317074298858645, "step": 23690 }, { "epoch": 6.72722111836503, "grad_norm": 7.896406650543213, "learning_rate": 9.327590122055068e-05, "loss": 0.039841434359550475, "step": 23700 }, { "epoch": 6.730059608288391, "grad_norm": 5.893136501312256, "learning_rate": 9.327306273062732e-05, "loss": 0.0643136203289032, "step": 23710 }, { "epoch": 6.732898098211751, "grad_norm": 15.357850074768066, "learning_rate": 9.327022424070395e-05, "loss": 0.044034743309021, "step": 23720 }, { "epoch": 6.735736588135112, "grad_norm": 4.253803730010986, "learning_rate": 9.326738575078059e-05, "loss": 0.06915061473846436, "step": 23730 }, { "epoch": 6.738575078058473, "grad_norm": 6.650298118591309, "learning_rate": 9.326454726085723e-05, "loss": 0.07395084500312805, "step": 23740 }, { "epoch": 6.741413567981834, "grad_norm": 14.894808769226074, "learning_rate": 9.326170877093386e-05, "loss": 0.054925835132598876, "step": 23750 }, { "epoch": 6.744252057905195, "grad_norm": 5.8432488441467285, "learning_rate": 9.32588702810105e-05, "loss": 0.06558787822723389, "step": 23760 }, { "epoch": 6.747090547828555, "grad_norm": 8.550345420837402, "learning_rate": 9.325603179108715e-05, "loss": 0.04870989322662354, "step": 23770 }, { "epoch": 6.749929037751916, "grad_norm": 4.980798244476318, "learning_rate": 9.325319330116378e-05, "loss": 0.054956299066543576, "step": 23780 }, { "epoch": 6.752767527675276, "grad_norm": 11.759502410888672, "learning_rate": 9.325035481124042e-05, "loss": 0.05898239016532898, "step": 23790 }, { "epoch": 6.755606017598637, "grad_norm": 6.150168418884277, "learning_rate": 9.324751632131706e-05, "loss": 0.10020248889923096, "step": 23800 }, { "epoch": 6.7584445075219985, "grad_norm": 12.145456314086914, "learning_rate": 9.32446778313937e-05, "loss": 0.0766824722290039, "step": 23810 }, { "epoch": 6.761282997445359, "grad_norm": 10.591904640197754, "learning_rate": 9.324183934147033e-05, "loss": 0.08159635066986085, "step": 23820 }, { "epoch": 6.76412148736872, "grad_norm": 11.481216430664062, "learning_rate": 9.323900085154698e-05, "loss": 0.06558504104614257, "step": 23830 }, { "epoch": 6.766959977292081, "grad_norm": 15.58019733428955, "learning_rate": 9.323616236162363e-05, "loss": 0.061024832725524905, "step": 23840 }, { "epoch": 6.769798467215441, "grad_norm": 4.141224384307861, "learning_rate": 9.323332387170026e-05, "loss": 0.07618950009346008, "step": 23850 }, { "epoch": 6.772636957138802, "grad_norm": 9.452756881713867, "learning_rate": 9.32304853817769e-05, "loss": 0.09108626246452331, "step": 23860 }, { "epoch": 6.775475447062163, "grad_norm": 4.69041633605957, "learning_rate": 9.322764689185354e-05, "loss": 0.06962264180183411, "step": 23870 }, { "epoch": 6.778313936985524, "grad_norm": 8.827424049377441, "learning_rate": 9.322480840193017e-05, "loss": 0.08700149059295655, "step": 23880 }, { "epoch": 6.781152426908885, "grad_norm": 3.8884668350219727, "learning_rate": 9.322196991200681e-05, "loss": 0.04838784635066986, "step": 23890 }, { "epoch": 6.783990916832245, "grad_norm": 8.162054061889648, "learning_rate": 9.321913142208346e-05, "loss": 0.09343326091766357, "step": 23900 }, { "epoch": 6.786829406755606, "grad_norm": 9.480267524719238, "learning_rate": 9.321629293216009e-05, "loss": 0.06143661141395569, "step": 23910 }, { "epoch": 6.789667896678967, "grad_norm": 2.3653621673583984, "learning_rate": 9.321345444223673e-05, "loss": 0.05765641331672668, "step": 23920 }, { "epoch": 6.792506386602327, "grad_norm": 5.893814563751221, "learning_rate": 9.321061595231338e-05, "loss": 0.05132954716682434, "step": 23930 }, { "epoch": 6.7953448765256885, "grad_norm": 1.579954981803894, "learning_rate": 9.320777746239002e-05, "loss": 0.045589813590049745, "step": 23940 }, { "epoch": 6.798183366449049, "grad_norm": 12.159889221191406, "learning_rate": 9.320493897246664e-05, "loss": 0.07311989068984985, "step": 23950 }, { "epoch": 6.80102185637241, "grad_norm": 5.736168384552002, "learning_rate": 9.320210048254329e-05, "loss": 0.06712495088577271, "step": 23960 }, { "epoch": 6.803860346295771, "grad_norm": 7.2729172706604, "learning_rate": 9.319926199261994e-05, "loss": 0.06673898100852967, "step": 23970 }, { "epoch": 6.806698836219131, "grad_norm": 6.675695419311523, "learning_rate": 9.319642350269657e-05, "loss": 0.04361908137798309, "step": 23980 }, { "epoch": 6.809537326142492, "grad_norm": 8.166933059692383, "learning_rate": 9.319358501277321e-05, "loss": 0.08151805996894837, "step": 23990 }, { "epoch": 6.8123758160658525, "grad_norm": 4.803466796875, "learning_rate": 9.319074652284985e-05, "loss": 0.06466462016105652, "step": 24000 }, { "epoch": 6.8123758160658525, "eval_accuracy": 0.9383226298721943, "eval_loss": 0.18636152148246765, "eval_runtime": 33.7681, "eval_samples_per_second": 465.735, "eval_steps_per_second": 7.285, "step": 24000 }, { "epoch": 6.815214305989214, "grad_norm": 2.2818028926849365, "learning_rate": 9.318790803292648e-05, "loss": 0.04944177865982056, "step": 24010 }, { "epoch": 6.818052795912575, "grad_norm": 12.350053787231445, "learning_rate": 9.318506954300312e-05, "loss": 0.059216052293777466, "step": 24020 }, { "epoch": 6.820891285835935, "grad_norm": 9.576706886291504, "learning_rate": 9.318223105307978e-05, "loss": 0.06533696055412293, "step": 24030 }, { "epoch": 6.823729775759296, "grad_norm": 5.1772565841674805, "learning_rate": 9.31793925631564e-05, "loss": 0.0948998749256134, "step": 24040 }, { "epoch": 6.826568265682657, "grad_norm": 2.3310294151306152, "learning_rate": 9.317655407323304e-05, "loss": 0.05270999073982239, "step": 24050 }, { "epoch": 6.829406755606017, "grad_norm": 3.0147361755371094, "learning_rate": 9.317371558330969e-05, "loss": 0.051987087726593016, "step": 24060 }, { "epoch": 6.8322452455293785, "grad_norm": 6.523325443267822, "learning_rate": 9.317087709338633e-05, "loss": 0.06754974126815796, "step": 24070 }, { "epoch": 6.83508373545274, "grad_norm": 8.244543075561523, "learning_rate": 9.316803860346296e-05, "loss": 0.05100887417793274, "step": 24080 }, { "epoch": 6.8379222253761, "grad_norm": 17.17783546447754, "learning_rate": 9.31652001135396e-05, "loss": 0.060204333066940306, "step": 24090 }, { "epoch": 6.840760715299461, "grad_norm": 12.304818153381348, "learning_rate": 9.316236162361625e-05, "loss": 0.04456921219825745, "step": 24100 }, { "epoch": 6.843599205222821, "grad_norm": Infinity, "learning_rate": 9.315952313369288e-05, "loss": 0.11072671413421631, "step": 24110 }, { "epoch": 6.846437695146182, "grad_norm": 7.027221202850342, "learning_rate": 9.315696849276186e-05, "loss": 0.07693027257919312, "step": 24120 }, { "epoch": 6.849276185069543, "grad_norm": 8.223810195922852, "learning_rate": 9.315413000283849e-05, "loss": 0.07276902198791504, "step": 24130 }, { "epoch": 6.852114674992904, "grad_norm": 1.4403507709503174, "learning_rate": 9.315129151291513e-05, "loss": 0.056750905513763425, "step": 24140 }, { "epoch": 6.854953164916265, "grad_norm": 11.356216430664062, "learning_rate": 9.314845302299178e-05, "loss": 0.06659359335899354, "step": 24150 }, { "epoch": 6.857791654839625, "grad_norm": 5.918548583984375, "learning_rate": 9.314561453306841e-05, "loss": 0.060737121105194095, "step": 24160 }, { "epoch": 6.860630144762986, "grad_norm": 11.800745964050293, "learning_rate": 9.314277604314505e-05, "loss": 0.08534255623817444, "step": 24170 }, { "epoch": 6.863468634686347, "grad_norm": 8.280691146850586, "learning_rate": 9.31399375532217e-05, "loss": 0.06120050549507141, "step": 24180 }, { "epoch": 6.866307124609707, "grad_norm": 4.526638031005859, "learning_rate": 9.313709906329832e-05, "loss": 0.06568164825439453, "step": 24190 }, { "epoch": 6.8691456145330685, "grad_norm": 5.921614646911621, "learning_rate": 9.313426057337496e-05, "loss": 0.08289343714714051, "step": 24200 }, { "epoch": 6.871984104456429, "grad_norm": 2.7340195178985596, "learning_rate": 9.31314220834516e-05, "loss": 0.06718329191207886, "step": 24210 }, { "epoch": 6.87482259437979, "grad_norm": 11.177754402160645, "learning_rate": 9.312858359352825e-05, "loss": 0.09456788897514343, "step": 24220 }, { "epoch": 6.877661084303151, "grad_norm": 10.81622314453125, "learning_rate": 9.312574510360489e-05, "loss": 0.06475539207458496, "step": 24230 }, { "epoch": 6.880499574226511, "grad_norm": 5.968855857849121, "learning_rate": 9.312290661368153e-05, "loss": 0.09015412330627441, "step": 24240 }, { "epoch": 6.883338064149872, "grad_norm": 2.5746445655822754, "learning_rate": 9.312006812375817e-05, "loss": 0.05000457763671875, "step": 24250 }, { "epoch": 6.886176554073233, "grad_norm": 5.538957595825195, "learning_rate": 9.31172296338348e-05, "loss": 0.05506855249404907, "step": 24260 }, { "epoch": 6.889015043996594, "grad_norm": 2.7452573776245117, "learning_rate": 9.311439114391144e-05, "loss": 0.054644709825515746, "step": 24270 }, { "epoch": 6.891853533919955, "grad_norm": 10.594897270202637, "learning_rate": 9.31115526539881e-05, "loss": 0.04821797907352447, "step": 24280 }, { "epoch": 6.894692023843316, "grad_norm": 16.681991577148438, "learning_rate": 9.310871416406472e-05, "loss": 0.1160806655883789, "step": 24290 }, { "epoch": 6.897530513766676, "grad_norm": 12.527297019958496, "learning_rate": 9.310587567414136e-05, "loss": 0.09003409147262573, "step": 24300 }, { "epoch": 6.900369003690037, "grad_norm": 4.325875282287598, "learning_rate": 9.3103037184218e-05, "loss": 0.06712737679481506, "step": 24310 }, { "epoch": 6.903207493613397, "grad_norm": 8.215907096862793, "learning_rate": 9.310019869429463e-05, "loss": 0.0737831175327301, "step": 24320 }, { "epoch": 6.9060459835367585, "grad_norm": 10.527514457702637, "learning_rate": 9.309736020437127e-05, "loss": 0.07706440687179565, "step": 24330 }, { "epoch": 6.90888447346012, "grad_norm": 2.4420652389526367, "learning_rate": 9.309452171444792e-05, "loss": 0.04704823791980743, "step": 24340 }, { "epoch": 6.91172296338348, "grad_norm": 2.9547078609466553, "learning_rate": 9.309168322452456e-05, "loss": 0.08130730390548706, "step": 24350 }, { "epoch": 6.914561453306841, "grad_norm": 1.2650052309036255, "learning_rate": 9.30888447346012e-05, "loss": 0.07290411591529847, "step": 24360 }, { "epoch": 6.917399943230201, "grad_norm": 7.135451793670654, "learning_rate": 9.308600624467784e-05, "loss": 0.05239912271499634, "step": 24370 }, { "epoch": 6.920238433153562, "grad_norm": 8.295967102050781, "learning_rate": 9.308316775475448e-05, "loss": 0.05378873348236084, "step": 24380 }, { "epoch": 6.923076923076923, "grad_norm": 10.893587112426758, "learning_rate": 9.308032926483111e-05, "loss": 0.07633600831031799, "step": 24390 }, { "epoch": 6.925915413000284, "grad_norm": 16.41339111328125, "learning_rate": 9.307749077490775e-05, "loss": 0.09146432280540466, "step": 24400 }, { "epoch": 6.928753902923645, "grad_norm": 8.952627182006836, "learning_rate": 9.30746522849844e-05, "loss": 0.04182305932044983, "step": 24410 }, { "epoch": 6.931592392847005, "grad_norm": 5.530761241912842, "learning_rate": 9.307181379506103e-05, "loss": 0.09493128061294556, "step": 24420 }, { "epoch": 6.934430882770366, "grad_norm": 8.752564430236816, "learning_rate": 9.306897530513767e-05, "loss": 0.0806997537612915, "step": 24430 }, { "epoch": 6.937269372693727, "grad_norm": 10.969762802124023, "learning_rate": 9.306613681521432e-05, "loss": 0.052368170022964476, "step": 24440 }, { "epoch": 6.940107862617087, "grad_norm": 8.80172348022461, "learning_rate": 9.306329832529094e-05, "loss": 0.08778879642486573, "step": 24450 }, { "epoch": 6.9429463525404485, "grad_norm": 6.12731409072876, "learning_rate": 9.306045983536758e-05, "loss": 0.04437322318553925, "step": 24460 }, { "epoch": 6.94578484246381, "grad_norm": 3.8472542762756348, "learning_rate": 9.305762134544423e-05, "loss": 0.08740729689598084, "step": 24470 }, { "epoch": 6.94862333238717, "grad_norm": 5.500892639160156, "learning_rate": 9.305478285552087e-05, "loss": 0.04763898551464081, "step": 24480 }, { "epoch": 6.951461822310531, "grad_norm": 6.8370747566223145, "learning_rate": 9.305194436559751e-05, "loss": 0.032831281423568726, "step": 24490 }, { "epoch": 6.954300312233892, "grad_norm": 6.4407525062561035, "learning_rate": 9.304910587567415e-05, "loss": 0.08058438301086426, "step": 24500 }, { "epoch": 6.954300312233892, "eval_accuracy": 0.9469065937559611, "eval_loss": 0.16141512989997864, "eval_runtime": 35.4067, "eval_samples_per_second": 444.181, "eval_steps_per_second": 6.948, "step": 24500 }, { "epoch": 6.957138802157252, "grad_norm": 5.352799415588379, "learning_rate": 9.304626738575079e-05, "loss": 0.06497411131858825, "step": 24510 }, { "epoch": 6.959977292080613, "grad_norm": 17.31061363220215, "learning_rate": 9.304342889582742e-05, "loss": 0.06541476249694825, "step": 24520 }, { "epoch": 6.9628157820039736, "grad_norm": 6.634043216705322, "learning_rate": 9.304059040590406e-05, "loss": 0.09170109629631043, "step": 24530 }, { "epoch": 6.965654271927335, "grad_norm": 4.040133953094482, "learning_rate": 9.30377519159807e-05, "loss": 0.05694687366485596, "step": 24540 }, { "epoch": 6.968492761850696, "grad_norm": 5.846604824066162, "learning_rate": 9.303491342605734e-05, "loss": 0.05806567668914795, "step": 24550 }, { "epoch": 6.971331251774056, "grad_norm": 15.648218154907227, "learning_rate": 9.303207493613398e-05, "loss": 0.0880549669265747, "step": 24560 }, { "epoch": 6.974169741697417, "grad_norm": 2.9076297283172607, "learning_rate": 9.302923644621063e-05, "loss": 0.0504547119140625, "step": 24570 }, { "epoch": 6.977008231620777, "grad_norm": 6.881335735321045, "learning_rate": 9.302639795628725e-05, "loss": 0.08548152446746826, "step": 24580 }, { "epoch": 6.979846721544138, "grad_norm": 6.170026779174805, "learning_rate": 9.30235594663639e-05, "loss": 0.06910431385040283, "step": 24590 }, { "epoch": 6.9826852114674995, "grad_norm": 4.721861839294434, "learning_rate": 9.302072097644054e-05, "loss": 0.08530585169792175, "step": 24600 }, { "epoch": 6.98552370139086, "grad_norm": 9.78558349609375, "learning_rate": 9.301788248651718e-05, "loss": 0.05749080181121826, "step": 24610 }, { "epoch": 6.988362191314221, "grad_norm": 2.584488868713379, "learning_rate": 9.301504399659382e-05, "loss": 0.09398843050003051, "step": 24620 }, { "epoch": 6.991200681237582, "grad_norm": 4.39893913269043, "learning_rate": 9.301220550667046e-05, "loss": 0.057060784101486205, "step": 24630 }, { "epoch": 6.994039171160942, "grad_norm": 7.3392744064331055, "learning_rate": 9.300936701674709e-05, "loss": 0.07128552198410035, "step": 24640 }, { "epoch": 6.996877661084303, "grad_norm": 8.737076759338379, "learning_rate": 9.300652852682373e-05, "loss": 0.08348881602287292, "step": 24650 }, { "epoch": 6.999716151007664, "grad_norm": 8.829516410827637, "learning_rate": 9.300369003690037e-05, "loss": 0.06868114471435546, "step": 24660 }, { "epoch": 7.002554640931025, "grad_norm": 6.369356155395508, "learning_rate": 9.300085154697701e-05, "loss": 0.06261990666389465, "step": 24670 }, { "epoch": 7.005393130854386, "grad_norm": 6.648372173309326, "learning_rate": 9.299801305705365e-05, "loss": 0.06997549533843994, "step": 24680 }, { "epoch": 7.008231620777746, "grad_norm": 4.513826847076416, "learning_rate": 9.29951745671303e-05, "loss": 0.045345228910446164, "step": 24690 }, { "epoch": 7.011070110701107, "grad_norm": 4.970345497131348, "learning_rate": 9.299233607720694e-05, "loss": 0.035926374793052676, "step": 24700 }, { "epoch": 7.013908600624468, "grad_norm": 11.32082748413086, "learning_rate": 9.298949758728356e-05, "loss": 0.04963710904121399, "step": 24710 }, { "epoch": 7.016747090547828, "grad_norm": 11.831169128417969, "learning_rate": 9.29866590973602e-05, "loss": 0.06724552512168884, "step": 24720 }, { "epoch": 7.0195855804711895, "grad_norm": 12.475922584533691, "learning_rate": 9.298382060743685e-05, "loss": 0.08935072422027587, "step": 24730 }, { "epoch": 7.02242407039455, "grad_norm": 9.521151542663574, "learning_rate": 9.298098211751348e-05, "loss": 0.06677224040031433, "step": 24740 }, { "epoch": 7.025262560317911, "grad_norm": 5.951154708862305, "learning_rate": 9.297814362759013e-05, "loss": 0.05203843116760254, "step": 24750 }, { "epoch": 7.028101050241272, "grad_norm": 7.176534175872803, "learning_rate": 9.297530513766677e-05, "loss": 0.06692192554473878, "step": 24760 }, { "epoch": 7.030939540164632, "grad_norm": 5.034002780914307, "learning_rate": 9.29724666477434e-05, "loss": 0.05921297669410706, "step": 24770 }, { "epoch": 7.033778030087993, "grad_norm": 4.602486610412598, "learning_rate": 9.296962815782004e-05, "loss": 0.05034801363945007, "step": 24780 }, { "epoch": 7.0366165200113535, "grad_norm": 11.830596923828125, "learning_rate": 9.296678966789668e-05, "loss": 0.06265047788619996, "step": 24790 }, { "epoch": 7.039455009934715, "grad_norm": 5.74329948425293, "learning_rate": 9.296395117797332e-05, "loss": 0.07640516757965088, "step": 24800 }, { "epoch": 7.042293499858076, "grad_norm": 11.603799819946289, "learning_rate": 9.296111268804997e-05, "loss": 0.052840524911880495, "step": 24810 }, { "epoch": 7.045131989781436, "grad_norm": 9.645184516906738, "learning_rate": 9.29582741981266e-05, "loss": 0.0471128910779953, "step": 24820 }, { "epoch": 7.047970479704797, "grad_norm": 5.5880327224731445, "learning_rate": 9.295543570820325e-05, "loss": 0.03766669631004334, "step": 24830 }, { "epoch": 7.050808969628158, "grad_norm": 13.030423164367676, "learning_rate": 9.295259721827988e-05, "loss": 0.048774874210357665, "step": 24840 }, { "epoch": 7.053647459551518, "grad_norm": 3.6985366344451904, "learning_rate": 9.294975872835652e-05, "loss": 0.049732381105422975, "step": 24850 }, { "epoch": 7.0564859494748795, "grad_norm": 6.274226188659668, "learning_rate": 9.294692023843316e-05, "loss": 0.03882281482219696, "step": 24860 }, { "epoch": 7.05932443939824, "grad_norm": 5.406718730926514, "learning_rate": 9.294408174850979e-05, "loss": 0.03609597384929657, "step": 24870 }, { "epoch": 7.062162929321601, "grad_norm": 5.6192121505737305, "learning_rate": 9.294124325858644e-05, "loss": 0.04787580966949463, "step": 24880 }, { "epoch": 7.065001419244962, "grad_norm": 5.610540390014648, "learning_rate": 9.293840476866308e-05, "loss": 0.04276954233646393, "step": 24890 }, { "epoch": 7.067839909168322, "grad_norm": 8.313901901245117, "learning_rate": 9.293556627873971e-05, "loss": 0.0655287504196167, "step": 24900 }, { "epoch": 7.070678399091683, "grad_norm": 10.651018142700195, "learning_rate": 9.293272778881635e-05, "loss": 0.04535415768623352, "step": 24910 }, { "epoch": 7.073516889015044, "grad_norm": 3.412822961807251, "learning_rate": 9.292988929889299e-05, "loss": 0.04194101095199585, "step": 24920 }, { "epoch": 7.076355378938405, "grad_norm": 5.707177639007568, "learning_rate": 9.292705080896963e-05, "loss": 0.07320045828819274, "step": 24930 }, { "epoch": 7.079193868861766, "grad_norm": 11.662551879882812, "learning_rate": 9.292421231904626e-05, "loss": 0.0646411120891571, "step": 24940 }, { "epoch": 7.082032358785126, "grad_norm": 6.257136344909668, "learning_rate": 9.292137382912292e-05, "loss": 0.04653863906860352, "step": 24950 }, { "epoch": 7.084870848708487, "grad_norm": 7.245937347412109, "learning_rate": 9.291853533919956e-05, "loss": 0.0467953234910965, "step": 24960 }, { "epoch": 7.087709338631848, "grad_norm": 7.966516494750977, "learning_rate": 9.291569684927619e-05, "loss": 0.04062827229499817, "step": 24970 }, { "epoch": 7.090547828555208, "grad_norm": 6.651494026184082, "learning_rate": 9.291285835935283e-05, "loss": 0.055345076322555545, "step": 24980 }, { "epoch": 7.0933863184785695, "grad_norm": 14.029841423034668, "learning_rate": 9.291001986942947e-05, "loss": 0.05360539555549622, "step": 24990 }, { "epoch": 7.096224808401931, "grad_norm": 7.768344402313232, "learning_rate": 9.29071813795061e-05, "loss": 0.060749602317810056, "step": 25000 }, { "epoch": 7.096224808401931, "eval_accuracy": 0.9455077255674954, "eval_loss": 0.15831151604652405, "eval_runtime": 31.6483, "eval_samples_per_second": 496.93, "eval_steps_per_second": 7.773, "step": 25000 }, { "epoch": 7.099063298325291, "grad_norm": 8.892114639282227, "learning_rate": 9.290434288958275e-05, "loss": 0.048785635828971864, "step": 25010 }, { "epoch": 7.101901788248652, "grad_norm": 7.339568614959717, "learning_rate": 9.29015043996594e-05, "loss": 0.04398076832294464, "step": 25020 }, { "epoch": 7.104740278172012, "grad_norm": 6.270732879638672, "learning_rate": 9.289866590973602e-05, "loss": 0.051661229133605956, "step": 25030 }, { "epoch": 7.107578768095373, "grad_norm": 8.59290885925293, "learning_rate": 9.289582741981266e-05, "loss": 0.0611577570438385, "step": 25040 }, { "epoch": 7.110417258018734, "grad_norm": 9.069544792175293, "learning_rate": 9.28929889298893e-05, "loss": 0.05329734683036804, "step": 25050 }, { "epoch": 7.113255747942095, "grad_norm": 11.613066673278809, "learning_rate": 9.289015043996595e-05, "loss": 0.04913933873176575, "step": 25060 }, { "epoch": 7.116094237865456, "grad_norm": 38.75361251831055, "learning_rate": 9.288731195004257e-05, "loss": 0.0766391098499298, "step": 25070 }, { "epoch": 7.118932727788816, "grad_norm": 2.1658103466033936, "learning_rate": 9.288447346011923e-05, "loss": 0.04753919243812561, "step": 25080 }, { "epoch": 7.121771217712177, "grad_norm": 8.675074577331543, "learning_rate": 9.288163497019587e-05, "loss": 0.04546944797039032, "step": 25090 }, { "epoch": 7.124609707635538, "grad_norm": 6.669610977172852, "learning_rate": 9.28787964802725e-05, "loss": 0.048946937918663024, "step": 25100 }, { "epoch": 7.127448197558898, "grad_norm": 2.184880495071411, "learning_rate": 9.287595799034914e-05, "loss": 0.06939942240715027, "step": 25110 }, { "epoch": 7.1302866874822595, "grad_norm": 10.331969261169434, "learning_rate": 9.287311950042578e-05, "loss": 0.06810682415962219, "step": 25120 }, { "epoch": 7.133125177405621, "grad_norm": 1.9729667901992798, "learning_rate": 9.287028101050241e-05, "loss": 0.055098289251327516, "step": 25130 }, { "epoch": 7.135963667328981, "grad_norm": 7.606093883514404, "learning_rate": 9.286744252057905e-05, "loss": 0.05109448432922363, "step": 25140 }, { "epoch": 7.138802157252342, "grad_norm": 2.2090489864349365, "learning_rate": 9.28646040306557e-05, "loss": 0.046227681636810306, "step": 25150 }, { "epoch": 7.141640647175702, "grad_norm": 1.9346799850463867, "learning_rate": 9.286176554073233e-05, "loss": 0.06358041763305664, "step": 25160 }, { "epoch": 7.144479137099063, "grad_norm": 5.790378093719482, "learning_rate": 9.285892705080897e-05, "loss": 0.05250394344329834, "step": 25170 }, { "epoch": 7.147317627022424, "grad_norm": 3.859467029571533, "learning_rate": 9.285608856088561e-05, "loss": 0.052905941009521486, "step": 25180 }, { "epoch": 7.150156116945785, "grad_norm": 5.857624530792236, "learning_rate": 9.285325007096226e-05, "loss": 0.043177032470703126, "step": 25190 }, { "epoch": 7.152994606869146, "grad_norm": 6.686002254486084, "learning_rate": 9.285041158103888e-05, "loss": 0.042976924777030946, "step": 25200 }, { "epoch": 7.155833096792507, "grad_norm": 2.3265557289123535, "learning_rate": 9.284757309111554e-05, "loss": 0.05738587379455566, "step": 25210 }, { "epoch": 7.158671586715867, "grad_norm": 19.857702255249023, "learning_rate": 9.284473460119218e-05, "loss": 0.06300274133682252, "step": 25220 }, { "epoch": 7.161510076639228, "grad_norm": 3.007843494415283, "learning_rate": 9.284189611126881e-05, "loss": 0.06716567873954774, "step": 25230 }, { "epoch": 7.164348566562588, "grad_norm": 7.331758975982666, "learning_rate": 9.283905762134545e-05, "loss": 0.03742673397064209, "step": 25240 }, { "epoch": 7.1671870564859494, "grad_norm": 5.23754358291626, "learning_rate": 9.283621913142209e-05, "loss": 0.08978217840194702, "step": 25250 }, { "epoch": 7.170025546409311, "grad_norm": 7.695954322814941, "learning_rate": 9.283338064149872e-05, "loss": 0.042345684766769406, "step": 25260 }, { "epoch": 7.172864036332671, "grad_norm": 8.342477798461914, "learning_rate": 9.283054215157536e-05, "loss": 0.05771209597587586, "step": 25270 }, { "epoch": 7.175702526256032, "grad_norm": 8.542526245117188, "learning_rate": 9.282770366165201e-05, "loss": 0.057384973764419554, "step": 25280 }, { "epoch": 7.178541016179393, "grad_norm": 15.985373497009277, "learning_rate": 9.282486517172864e-05, "loss": 0.055515825748443604, "step": 25290 }, { "epoch": 7.181379506102753, "grad_norm": 9.437061309814453, "learning_rate": 9.282202668180528e-05, "loss": 0.0848008632659912, "step": 25300 }, { "epoch": 7.184217996026114, "grad_norm": 6.897600173950195, "learning_rate": 9.281918819188193e-05, "loss": 0.053754502534866334, "step": 25310 }, { "epoch": 7.1870564859494745, "grad_norm": 4.315338611602783, "learning_rate": 9.281634970195857e-05, "loss": 0.05226545333862305, "step": 25320 }, { "epoch": 7.189894975872836, "grad_norm": 4.4751973152160645, "learning_rate": 9.28135112120352e-05, "loss": 0.03448910415172577, "step": 25330 }, { "epoch": 7.192733465796197, "grad_norm": 6.052961349487305, "learning_rate": 9.281067272211184e-05, "loss": 0.04740365445613861, "step": 25340 }, { "epoch": 7.195571955719557, "grad_norm": 3.3505899906158447, "learning_rate": 9.280783423218848e-05, "loss": 0.054388236999511716, "step": 25350 }, { "epoch": 7.198410445642918, "grad_norm": 7.653215408325195, "learning_rate": 9.280499574226512e-05, "loss": 0.047686788439750674, "step": 25360 }, { "epoch": 7.201248935566278, "grad_norm": 10.919050216674805, "learning_rate": 9.280215725234176e-05, "loss": 0.07312428951263428, "step": 25370 }, { "epoch": 7.204087425489639, "grad_norm": 2.653695821762085, "learning_rate": 9.27993187624184e-05, "loss": 0.033460021018981934, "step": 25380 }, { "epoch": 7.2069259154130005, "grad_norm": 5.892426490783691, "learning_rate": 9.279648027249503e-05, "loss": 0.04747145771980286, "step": 25390 }, { "epoch": 7.209764405336361, "grad_norm": 17.315731048583984, "learning_rate": 9.279364178257167e-05, "loss": 0.06377516984939575, "step": 25400 }, { "epoch": 7.212602895259722, "grad_norm": 0.25403892993927, "learning_rate": 9.279080329264833e-05, "loss": 0.04014536142349243, "step": 25410 }, { "epoch": 7.215441385183083, "grad_norm": 8.831771850585938, "learning_rate": 9.278796480272495e-05, "loss": 0.03756490349769592, "step": 25420 }, { "epoch": 7.218279875106443, "grad_norm": 6.84322452545166, "learning_rate": 9.27851263128016e-05, "loss": 0.04901188611984253, "step": 25430 }, { "epoch": 7.221118365029804, "grad_norm": 3.412856340408325, "learning_rate": 9.278228782287824e-05, "loss": 0.04063327014446259, "step": 25440 }, { "epoch": 7.2239568549531645, "grad_norm": 6.648988246917725, "learning_rate": 9.277944933295488e-05, "loss": 0.05273228883743286, "step": 25450 }, { "epoch": 7.226795344876526, "grad_norm": 6.485245704650879, "learning_rate": 9.27766108430315e-05, "loss": 0.06967499256134033, "step": 25460 }, { "epoch": 7.229633834799887, "grad_norm": 7.168206214904785, "learning_rate": 9.277377235310815e-05, "loss": 0.03418740034103394, "step": 25470 }, { "epoch": 7.232472324723247, "grad_norm": 8.098873138427734, "learning_rate": 9.277093386318479e-05, "loss": 0.06575109958648681, "step": 25480 }, { "epoch": 7.235310814646608, "grad_norm": 8.611875534057617, "learning_rate": 9.276809537326143e-05, "loss": 0.04655812680721283, "step": 25490 }, { "epoch": 7.238149304569969, "grad_norm": 13.001770973205566, "learning_rate": 9.276525688333807e-05, "loss": 0.051898378133773806, "step": 25500 }, { "epoch": 7.238149304569969, "eval_accuracy": 0.9485598016150569, "eval_loss": 0.16719238460063934, "eval_runtime": 34.3, "eval_samples_per_second": 458.513, "eval_steps_per_second": 7.172, "step": 25500 }, { "epoch": 7.240987794493329, "grad_norm": 11.041200637817383, "learning_rate": 9.276241839341471e-05, "loss": 0.05748438835144043, "step": 25510 }, { "epoch": 7.2438262844166905, "grad_norm": 2.1396360397338867, "learning_rate": 9.275957990349134e-05, "loss": 0.04392762184143066, "step": 25520 }, { "epoch": 7.246664774340051, "grad_norm": 18.292173385620117, "learning_rate": 9.275674141356798e-05, "loss": 0.06498112678527831, "step": 25530 }, { "epoch": 7.249503264263412, "grad_norm": 3.305121421813965, "learning_rate": 9.275390292364462e-05, "loss": 0.05211912989616394, "step": 25540 }, { "epoch": 7.252341754186773, "grad_norm": 9.468502044677734, "learning_rate": 9.275106443372126e-05, "loss": 0.05754003524780273, "step": 25550 }, { "epoch": 7.255180244110133, "grad_norm": 9.822763442993164, "learning_rate": 9.27482259437979e-05, "loss": 0.06063700914382934, "step": 25560 }, { "epoch": 7.258018734033494, "grad_norm": 5.103598117828369, "learning_rate": 9.274538745387455e-05, "loss": 0.05015867352485657, "step": 25570 }, { "epoch": 7.2608572239568545, "grad_norm": 7.100358486175537, "learning_rate": 9.274254896395117e-05, "loss": 0.03589507639408111, "step": 25580 }, { "epoch": 7.263695713880216, "grad_norm": 9.4750337600708, "learning_rate": 9.273971047402782e-05, "loss": 0.07078198194503785, "step": 25590 }, { "epoch": 7.266534203803577, "grad_norm": 7.472751140594482, "learning_rate": 9.273687198410446e-05, "loss": 0.04798069298267364, "step": 25600 }, { "epoch": 7.269372693726937, "grad_norm": 10.863080024719238, "learning_rate": 9.27340334941811e-05, "loss": 0.06839432716369628, "step": 25610 }, { "epoch": 7.272211183650298, "grad_norm": 7.607190132141113, "learning_rate": 9.273119500425774e-05, "loss": 0.05385514497756958, "step": 25620 }, { "epoch": 7.275049673573659, "grad_norm": 9.10322380065918, "learning_rate": 9.272835651433438e-05, "loss": 0.05100385546684265, "step": 25630 }, { "epoch": 7.277888163497019, "grad_norm": 1.9858859777450562, "learning_rate": 9.272551802441102e-05, "loss": 0.07281947135925293, "step": 25640 }, { "epoch": 7.2807266534203805, "grad_norm": 5.673005104064941, "learning_rate": 9.272267953448765e-05, "loss": 0.056974828243255615, "step": 25650 }, { "epoch": 7.283565143343741, "grad_norm": 5.662898063659668, "learning_rate": 9.271984104456429e-05, "loss": 0.05099035501480102, "step": 25660 }, { "epoch": 7.286403633267102, "grad_norm": 2.6869547367095947, "learning_rate": 9.271700255464093e-05, "loss": 0.03324490487575531, "step": 25670 }, { "epoch": 7.289242123190463, "grad_norm": 11.466360092163086, "learning_rate": 9.271416406471758e-05, "loss": 0.06529830694198609, "step": 25680 }, { "epoch": 7.292080613113823, "grad_norm": 11.04913330078125, "learning_rate": 9.271132557479422e-05, "loss": 0.06986249685287475, "step": 25690 }, { "epoch": 7.294919103037184, "grad_norm": 3.1632933616638184, "learning_rate": 9.270848708487086e-05, "loss": 0.08596933484077454, "step": 25700 }, { "epoch": 7.297757592960545, "grad_norm": 5.88351583480835, "learning_rate": 9.270564859494749e-05, "loss": 0.06333709359169007, "step": 25710 }, { "epoch": 7.300596082883906, "grad_norm": 5.693306922912598, "learning_rate": 9.270281010502413e-05, "loss": 0.05406801104545593, "step": 25720 }, { "epoch": 7.303434572807267, "grad_norm": 7.691220760345459, "learning_rate": 9.269997161510077e-05, "loss": 0.07567648887634278, "step": 25730 }, { "epoch": 7.306273062730627, "grad_norm": 6.808253765106201, "learning_rate": 9.269713312517741e-05, "loss": 0.044033128023147586, "step": 25740 }, { "epoch": 7.309111552653988, "grad_norm": 2.334197521209717, "learning_rate": 9.269429463525405e-05, "loss": 0.05037899017333984, "step": 25750 }, { "epoch": 7.311950042577349, "grad_norm": 6.979591369628906, "learning_rate": 9.269145614533069e-05, "loss": 0.04243600070476532, "step": 25760 }, { "epoch": 7.314788532500709, "grad_norm": 4.646600723266602, "learning_rate": 9.268861765540733e-05, "loss": 0.05679367184638977, "step": 25770 }, { "epoch": 7.3176270224240705, "grad_norm": 6.724841117858887, "learning_rate": 9.268577916548396e-05, "loss": 0.06517183184623718, "step": 25780 }, { "epoch": 7.320465512347431, "grad_norm": 14.773314476013184, "learning_rate": 9.26829406755606e-05, "loss": 0.05767623782157898, "step": 25790 }, { "epoch": 7.323304002270792, "grad_norm": 8.17959213256836, "learning_rate": 9.268010218563724e-05, "loss": 0.05301222801208496, "step": 25800 }, { "epoch": 7.326142492194153, "grad_norm": 3.029024600982666, "learning_rate": 9.267726369571389e-05, "loss": 0.06920971870422363, "step": 25810 }, { "epoch": 7.328980982117513, "grad_norm": 4.359516620635986, "learning_rate": 9.267442520579053e-05, "loss": 0.03485364019870758, "step": 25820 }, { "epoch": 7.331819472040874, "grad_norm": 5.757990837097168, "learning_rate": 9.267158671586717e-05, "loss": 0.0553173303604126, "step": 25830 }, { "epoch": 7.334657961964235, "grad_norm": 12.129940032958984, "learning_rate": 9.26687482259438e-05, "loss": 0.0741362750530243, "step": 25840 }, { "epoch": 7.337496451887596, "grad_norm": 13.367217063903809, "learning_rate": 9.266590973602044e-05, "loss": 0.05916757583618164, "step": 25850 }, { "epoch": 7.340334941810957, "grad_norm": 6.276948928833008, "learning_rate": 9.266307124609708e-05, "loss": 0.03768422901630401, "step": 25860 }, { "epoch": 7.343173431734318, "grad_norm": 8.803367614746094, "learning_rate": 9.266023275617372e-05, "loss": 0.04764154851436615, "step": 25870 }, { "epoch": 7.346011921657678, "grad_norm": 5.356255054473877, "learning_rate": 9.265739426625036e-05, "loss": 0.0503578245639801, "step": 25880 }, { "epoch": 7.348850411581039, "grad_norm": 3.2357609272003174, "learning_rate": 9.2654555776327e-05, "loss": 0.05958862900733948, "step": 25890 }, { "epoch": 7.351688901504399, "grad_norm": 9.581884384155273, "learning_rate": 9.265171728640364e-05, "loss": 0.05555160045623779, "step": 25900 }, { "epoch": 7.3545273914277605, "grad_norm": 4.653804779052734, "learning_rate": 9.264887879648027e-05, "loss": 0.04167143404483795, "step": 25910 }, { "epoch": 7.357365881351122, "grad_norm": 7.334325313568115, "learning_rate": 9.264604030655691e-05, "loss": 0.05609765648841858, "step": 25920 }, { "epoch": 7.360204371274482, "grad_norm": 9.76183795928955, "learning_rate": 9.264320181663356e-05, "loss": 0.06169427633285522, "step": 25930 }, { "epoch": 7.363042861197843, "grad_norm": 7.549182415008545, "learning_rate": 9.26403633267102e-05, "loss": 0.047147554159164426, "step": 25940 }, { "epoch": 7.365881351121203, "grad_norm": 2.332552909851074, "learning_rate": 9.263752483678684e-05, "loss": 0.05108364820480347, "step": 25950 }, { "epoch": 7.368719841044564, "grad_norm": 10.569953918457031, "learning_rate": 9.263468634686348e-05, "loss": 0.060970598459243776, "step": 25960 }, { "epoch": 7.371558330967925, "grad_norm": 8.399297714233398, "learning_rate": 9.263184785694011e-05, "loss": 0.06844512224197388, "step": 25970 }, { "epoch": 7.374396820891286, "grad_norm": 12.272492408752441, "learning_rate": 9.262900936701675e-05, "loss": 0.04748618006706238, "step": 25980 }, { "epoch": 7.377235310814647, "grad_norm": 7.4991350173950195, "learning_rate": 9.262617087709339e-05, "loss": 0.0700838804244995, "step": 25990 }, { "epoch": 7.380073800738008, "grad_norm": 8.575705528259277, "learning_rate": 9.262333238717003e-05, "loss": 0.06265588998794555, "step": 26000 }, { "epoch": 7.380073800738008, "eval_accuracy": 0.94976791505055, "eval_loss": 0.15770244598388672, "eval_runtime": 30.4846, "eval_samples_per_second": 515.9, "eval_steps_per_second": 8.07, "step": 26000 }, { "epoch": 7.382912290661368, "grad_norm": 4.742435455322266, "learning_rate": 9.262049389724667e-05, "loss": 0.09360763430595398, "step": 26010 }, { "epoch": 7.385750780584729, "grad_norm": 2.7856550216674805, "learning_rate": 9.261765540732331e-05, "loss": 0.060385727882385255, "step": 26020 }, { "epoch": 7.388589270508089, "grad_norm": 1.016340970993042, "learning_rate": 9.261481691739996e-05, "loss": 0.048832231760025026, "step": 26030 }, { "epoch": 7.3914277604314504, "grad_norm": 7.86880350112915, "learning_rate": 9.261197842747658e-05, "loss": 0.052027225494384766, "step": 26040 }, { "epoch": 7.3942662503548116, "grad_norm": 10.811531066894531, "learning_rate": 9.260913993755322e-05, "loss": 0.08172963857650757, "step": 26050 }, { "epoch": 7.397104740278172, "grad_norm": 15.862709045410156, "learning_rate": 9.260630144762987e-05, "loss": 0.05207148194313049, "step": 26060 }, { "epoch": 7.399943230201533, "grad_norm": 15.626310348510742, "learning_rate": 9.26034629577065e-05, "loss": 0.11002457141876221, "step": 26070 }, { "epoch": 7.402781720124894, "grad_norm": 6.95619535446167, "learning_rate": 9.260062446778315e-05, "loss": 0.05656765699386597, "step": 26080 }, { "epoch": 7.405620210048254, "grad_norm": 8.819171905517578, "learning_rate": 9.259778597785979e-05, "loss": 0.06208986043930054, "step": 26090 }, { "epoch": 7.408458699971615, "grad_norm": 4.154296875, "learning_rate": 9.259494748793642e-05, "loss": 0.05832427740097046, "step": 26100 }, { "epoch": 7.4112971898949755, "grad_norm": 6.696263790130615, "learning_rate": 9.259210899801306e-05, "loss": 0.046842488646507266, "step": 26110 }, { "epoch": 7.414135679818337, "grad_norm": 3.302532911300659, "learning_rate": 9.25892705080897e-05, "loss": 0.03938955366611481, "step": 26120 }, { "epoch": 7.416974169741698, "grad_norm": 5.040694713592529, "learning_rate": 9.258643201816634e-05, "loss": 0.07872294187545777, "step": 26130 }, { "epoch": 7.419812659665058, "grad_norm": 3.5507800579071045, "learning_rate": 9.258359352824298e-05, "loss": 0.049850231409072875, "step": 26140 }, { "epoch": 7.422651149588419, "grad_norm": 8.606950759887695, "learning_rate": 9.258075503831962e-05, "loss": 0.05307169556617737, "step": 26150 }, { "epoch": 7.425489639511779, "grad_norm": 8.806314468383789, "learning_rate": 9.257791654839627e-05, "loss": 0.047733640670776366, "step": 26160 }, { "epoch": 7.42832812943514, "grad_norm": 10.485142707824707, "learning_rate": 9.25750780584729e-05, "loss": 0.049794360995292664, "step": 26170 }, { "epoch": 7.4311666193585015, "grad_norm": 2.8967790603637695, "learning_rate": 9.257223956854954e-05, "loss": 0.05222916007041931, "step": 26180 }, { "epoch": 7.434005109281862, "grad_norm": 6.090600967407227, "learning_rate": 9.256940107862618e-05, "loss": 0.04957270622253418, "step": 26190 }, { "epoch": 7.436843599205223, "grad_norm": 3.291701555252075, "learning_rate": 9.25665625887028e-05, "loss": 0.05944572687149048, "step": 26200 }, { "epoch": 7.439682089128584, "grad_norm": 2.3226003646850586, "learning_rate": 9.256372409877946e-05, "loss": 0.07182380557060242, "step": 26210 }, { "epoch": 7.442520579051944, "grad_norm": 8.635110855102539, "learning_rate": 9.25608856088561e-05, "loss": 0.058639693260192874, "step": 26220 }, { "epoch": 7.445359068975305, "grad_norm": 3.0947983264923096, "learning_rate": 9.255804711893273e-05, "loss": 0.04923056960105896, "step": 26230 }, { "epoch": 7.4481975588986655, "grad_norm": Infinity, "learning_rate": 9.255520862900937e-05, "loss": 0.07544021606445313, "step": 26240 }, { "epoch": 7.451036048822027, "grad_norm": 13.142388343811035, "learning_rate": 9.255265398807834e-05, "loss": 0.06932682991027832, "step": 26250 }, { "epoch": 7.453874538745388, "grad_norm": 8.136691093444824, "learning_rate": 9.254981549815499e-05, "loss": 0.047928225994110105, "step": 26260 }, { "epoch": 7.456713028668748, "grad_norm": 12.899751663208008, "learning_rate": 9.254697700823163e-05, "loss": 0.06659846901893615, "step": 26270 }, { "epoch": 7.459551518592109, "grad_norm": 1.5240484476089478, "learning_rate": 9.254413851830826e-05, "loss": 0.05584169030189514, "step": 26280 }, { "epoch": 7.46239000851547, "grad_norm": 1.2604084014892578, "learning_rate": 9.25413000283849e-05, "loss": 0.0526470422744751, "step": 26290 }, { "epoch": 7.46522849843883, "grad_norm": 11.345165252685547, "learning_rate": 9.253846153846154e-05, "loss": 0.07551120519638062, "step": 26300 }, { "epoch": 7.4680669883621915, "grad_norm": 9.882131576538086, "learning_rate": 9.253562304853818e-05, "loss": 0.057638198137283325, "step": 26310 }, { "epoch": 7.470905478285552, "grad_norm": 9.353256225585938, "learning_rate": 9.253278455861483e-05, "loss": 0.05799208283424377, "step": 26320 }, { "epoch": 7.473743968208913, "grad_norm": 10.500579833984375, "learning_rate": 9.252994606869147e-05, "loss": 0.05137408375740051, "step": 26330 }, { "epoch": 7.476582458132274, "grad_norm": 6.1534953117370605, "learning_rate": 9.25271075787681e-05, "loss": 0.07049535512924195, "step": 26340 }, { "epoch": 7.479420948055634, "grad_norm": 5.8664422035217285, "learning_rate": 9.252426908884474e-05, "loss": 0.06326422095298767, "step": 26350 }, { "epoch": 7.482259437978995, "grad_norm": 1.8013609647750854, "learning_rate": 9.252143059892138e-05, "loss": 0.06020296216011047, "step": 26360 }, { "epoch": 7.4850979279023555, "grad_norm": 8.17503547668457, "learning_rate": 9.251859210899802e-05, "loss": 0.0697300374507904, "step": 26370 }, { "epoch": 7.487936417825717, "grad_norm": 8.01875114440918, "learning_rate": 9.251575361907465e-05, "loss": 0.05817473530769348, "step": 26380 }, { "epoch": 7.490774907749078, "grad_norm": 6.810417175292969, "learning_rate": 9.25129151291513e-05, "loss": 0.06495989561080932, "step": 26390 }, { "epoch": 7.493613397672438, "grad_norm": 6.789105415344238, "learning_rate": 9.251007663922794e-05, "loss": 0.05076800584793091, "step": 26400 }, { "epoch": 7.496451887595799, "grad_norm": 5.360595703125, "learning_rate": 9.250723814930457e-05, "loss": 0.0562969982624054, "step": 26410 }, { "epoch": 7.49929037751916, "grad_norm": 5.132838726043701, "learning_rate": 9.250439965938121e-05, "loss": 0.07120607495307922, "step": 26420 }, { "epoch": 7.50212886744252, "grad_norm": 7.1580915451049805, "learning_rate": 9.250156116945785e-05, "loss": 0.06001583337783813, "step": 26430 }, { "epoch": 7.5049673573658815, "grad_norm": 3.7924418449401855, "learning_rate": 9.249872267953448e-05, "loss": 0.06384726166725159, "step": 26440 }, { "epoch": 7.507805847289243, "grad_norm": 4.2323317527771, "learning_rate": 9.249588418961112e-05, "loss": 0.04672531485557556, "step": 26450 }, { "epoch": 7.510644337212603, "grad_norm": 11.968902587890625, "learning_rate": 9.249304569968778e-05, "loss": 0.06323580145835876, "step": 26460 }, { "epoch": 7.513482827135964, "grad_norm": 6.856075286865234, "learning_rate": 9.24902072097644e-05, "loss": 0.0636141836643219, "step": 26470 }, { "epoch": 7.516321317059324, "grad_norm": 8.09052848815918, "learning_rate": 9.248736871984105e-05, "loss": 0.04009096622467041, "step": 26480 }, { "epoch": 7.519159806982685, "grad_norm": 4.189321994781494, "learning_rate": 9.248453022991769e-05, "loss": 0.047170969843864444, "step": 26490 }, { "epoch": 7.521998296906046, "grad_norm": 7.124314308166504, "learning_rate": 9.248169173999433e-05, "loss": 0.06043018698692322, "step": 26500 }, { "epoch": 7.521998296906046, "eval_accuracy": 0.9473516881795638, "eval_loss": 0.1630299985408783, "eval_runtime": 32.8424, "eval_samples_per_second": 478.862, "eval_steps_per_second": 7.49, "step": 26500 }, { "epoch": 7.524836786829407, "grad_norm": 2.742633581161499, "learning_rate": 9.247885325007096e-05, "loss": 0.07077868580818177, "step": 26510 }, { "epoch": 7.527675276752768, "grad_norm": 7.041285514831543, "learning_rate": 9.247601476014761e-05, "loss": 0.04672999978065491, "step": 26520 }, { "epoch": 7.530513766676128, "grad_norm": 5.757533073425293, "learning_rate": 9.247317627022425e-05, "loss": 0.02860719561576843, "step": 26530 }, { "epoch": 7.533352256599489, "grad_norm": 3.7189266681671143, "learning_rate": 9.247033778030088e-05, "loss": 0.04800913333892822, "step": 26540 }, { "epoch": 7.53619074652285, "grad_norm": 7.216089248657227, "learning_rate": 9.246749929037752e-05, "loss": 0.06915775537490845, "step": 26550 }, { "epoch": 7.53902923644621, "grad_norm": 7.926145076751709, "learning_rate": 9.246466080045416e-05, "loss": 0.07104028463363647, "step": 26560 }, { "epoch": 7.5418677263695715, "grad_norm": 4.38716459274292, "learning_rate": 9.246182231053079e-05, "loss": 0.031456807255744935, "step": 26570 }, { "epoch": 7.544706216292932, "grad_norm": 5.167555332183838, "learning_rate": 9.245898382060743e-05, "loss": 0.028284734487533568, "step": 26580 }, { "epoch": 7.547544706216293, "grad_norm": 10.232625961303711, "learning_rate": 9.245614533068409e-05, "loss": 0.04959501028060913, "step": 26590 }, { "epoch": 7.550383196139654, "grad_norm": 14.02999210357666, "learning_rate": 9.245330684076072e-05, "loss": 0.0811251938343048, "step": 26600 }, { "epoch": 7.553221686063014, "grad_norm": 5.1616668701171875, "learning_rate": 9.245046835083736e-05, "loss": 0.04219097197055817, "step": 26610 }, { "epoch": 7.556060175986375, "grad_norm": 4.923098087310791, "learning_rate": 9.2447629860914e-05, "loss": 0.04550593495368958, "step": 26620 }, { "epoch": 7.558898665909736, "grad_norm": 9.60577392578125, "learning_rate": 9.244479137099064e-05, "loss": 0.057870739698410036, "step": 26630 }, { "epoch": 7.561737155833097, "grad_norm": 5.841795444488525, "learning_rate": 9.244195288106727e-05, "loss": 0.04393775463104248, "step": 26640 }, { "epoch": 7.564575645756458, "grad_norm": 3.8294336795806885, "learning_rate": 9.243911439114391e-05, "loss": 0.07476555109024048, "step": 26650 }, { "epoch": 7.567414135679819, "grad_norm": 1.9979289770126343, "learning_rate": 9.243627590122057e-05, "loss": 0.06097663640975952, "step": 26660 }, { "epoch": 7.570252625603179, "grad_norm": 7.357421398162842, "learning_rate": 9.243343741129719e-05, "loss": 0.0699048638343811, "step": 26670 }, { "epoch": 7.57309111552654, "grad_norm": 11.159279823303223, "learning_rate": 9.243059892137383e-05, "loss": 0.08299893140792847, "step": 26680 }, { "epoch": 7.5759296054499, "grad_norm": 1.455036997795105, "learning_rate": 9.242776043145048e-05, "loss": 0.044960391521453855, "step": 26690 }, { "epoch": 7.5787680953732615, "grad_norm": 8.555489540100098, "learning_rate": 9.24249219415271e-05, "loss": 0.03317358493804932, "step": 26700 }, { "epoch": 7.581606585296623, "grad_norm": 2.2475056648254395, "learning_rate": 9.242208345160374e-05, "loss": 0.08536184430122376, "step": 26710 }, { "epoch": 7.584445075219983, "grad_norm": 6.194825649261475, "learning_rate": 9.24192449616804e-05, "loss": 0.04888980686664581, "step": 26720 }, { "epoch": 7.587283565143344, "grad_norm": 10.57650089263916, "learning_rate": 9.241640647175703e-05, "loss": 0.08976618051528931, "step": 26730 }, { "epoch": 7.590122055066704, "grad_norm": 4.703174591064453, "learning_rate": 9.241356798183367e-05, "loss": 0.03950670063495636, "step": 26740 }, { "epoch": 7.592960544990065, "grad_norm": 3.15185546875, "learning_rate": 9.241072949191031e-05, "loss": 0.046169915795326234, "step": 26750 }, { "epoch": 7.595799034913426, "grad_norm": 11.534300804138184, "learning_rate": 9.240789100198695e-05, "loss": 0.08072076439857483, "step": 26760 }, { "epoch": 7.598637524836787, "grad_norm": 5.243678569793701, "learning_rate": 9.240505251206358e-05, "loss": 0.026982125639915467, "step": 26770 }, { "epoch": 7.601476014760148, "grad_norm": 4.844478607177734, "learning_rate": 9.240221402214022e-05, "loss": 0.06256864070892335, "step": 26780 }, { "epoch": 7.604314504683508, "grad_norm": 7.448295593261719, "learning_rate": 9.239937553221688e-05, "loss": 0.05735560655593872, "step": 26790 }, { "epoch": 7.607152994606869, "grad_norm": 10.694254875183105, "learning_rate": 9.23965370422935e-05, "loss": 0.04796874523162842, "step": 26800 }, { "epoch": 7.60999148453023, "grad_norm": 0.7753211855888367, "learning_rate": 9.239369855237014e-05, "loss": 0.03773359656333923, "step": 26810 }, { "epoch": 7.61282997445359, "grad_norm": 1.5660134553909302, "learning_rate": 9.239086006244679e-05, "loss": 0.05503919720649719, "step": 26820 }, { "epoch": 7.615668464376951, "grad_norm": 5.733978271484375, "learning_rate": 9.238802157252341e-05, "loss": 0.06999952793121338, "step": 26830 }, { "epoch": 7.6185069543003126, "grad_norm": 3.1165099143981934, "learning_rate": 9.238518308260006e-05, "loss": 0.07066283226013184, "step": 26840 }, { "epoch": 7.621345444223673, "grad_norm": 15.801298141479492, "learning_rate": 9.23823445926767e-05, "loss": 0.0988669753074646, "step": 26850 }, { "epoch": 7.624183934147034, "grad_norm": 8.006951332092285, "learning_rate": 9.237950610275334e-05, "loss": 0.051775014400482176, "step": 26860 }, { "epoch": 7.627022424070395, "grad_norm": 9.770270347595215, "learning_rate": 9.237666761282998e-05, "loss": 0.04186524152755737, "step": 26870 }, { "epoch": 7.629860913993755, "grad_norm": 12.776824951171875, "learning_rate": 9.237382912290662e-05, "loss": 0.07661487460136414, "step": 26880 }, { "epoch": 7.632699403917116, "grad_norm": 5.992150783538818, "learning_rate": 9.237099063298326e-05, "loss": 0.06097249984741211, "step": 26890 }, { "epoch": 7.6355378938404765, "grad_norm": 10.514327049255371, "learning_rate": 9.236815214305989e-05, "loss": 0.0651603102684021, "step": 26900 }, { "epoch": 7.638376383763838, "grad_norm": 11.377123832702637, "learning_rate": 9.236531365313653e-05, "loss": 0.05237002372741699, "step": 26910 }, { "epoch": 7.641214873687199, "grad_norm": 2.094951629638672, "learning_rate": 9.236247516321319e-05, "loss": 0.041029584407806394, "step": 26920 }, { "epoch": 7.644053363610559, "grad_norm": 9.397889137268066, "learning_rate": 9.235963667328981e-05, "loss": 0.05316284894943237, "step": 26930 }, { "epoch": 7.64689185353392, "grad_norm": 11.195029258728027, "learning_rate": 9.235679818336646e-05, "loss": 0.09512031078338623, "step": 26940 }, { "epoch": 7.64973034345728, "grad_norm": 6.35661506652832, "learning_rate": 9.23539596934431e-05, "loss": 0.03971976041793823, "step": 26950 }, { "epoch": 7.652568833380641, "grad_norm": 3.834334373474121, "learning_rate": 9.235112120351972e-05, "loss": 0.03920655846595764, "step": 26960 }, { "epoch": 7.6554073233040025, "grad_norm": 12.671218872070312, "learning_rate": 9.234828271359637e-05, "loss": 0.04359520673751831, "step": 26970 }, { "epoch": 7.658245813227363, "grad_norm": 4.942806720733643, "learning_rate": 9.234544422367301e-05, "loss": 0.06651021838188172, "step": 26980 }, { "epoch": 7.661084303150724, "grad_norm": 11.000497817993164, "learning_rate": 9.234260573374965e-05, "loss": 0.04165796637535095, "step": 26990 }, { "epoch": 7.663922793074085, "grad_norm": 14.282633781433105, "learning_rate": 9.233976724382629e-05, "loss": 0.04657668173313141, "step": 27000 }, { "epoch": 7.663922793074085, "eval_accuracy": 0.9493228206269473, "eval_loss": 0.15022501349449158, "eval_runtime": 31.1086, "eval_samples_per_second": 505.552, "eval_steps_per_second": 7.908, "step": 27000 }, { "epoch": 7.666761282997445, "grad_norm": 7.603346347808838, "learning_rate": 9.233692875390293e-05, "loss": 0.036307454109191895, "step": 27010 }, { "epoch": 7.669599772920806, "grad_norm": 7.113778591156006, "learning_rate": 9.233409026397957e-05, "loss": 0.12619259357452392, "step": 27020 }, { "epoch": 7.6724382628441665, "grad_norm": 7.61203670501709, "learning_rate": 9.23312517740562e-05, "loss": 0.08531588315963745, "step": 27030 }, { "epoch": 7.675276752767528, "grad_norm": 7.09785795211792, "learning_rate": 9.232841328413284e-05, "loss": 0.07404013872146606, "step": 27040 }, { "epoch": 7.678115242690889, "grad_norm": 7.97544527053833, "learning_rate": 9.232557479420948e-05, "loss": 0.04864976406097412, "step": 27050 }, { "epoch": 7.680953732614249, "grad_norm": 11.078845024108887, "learning_rate": 9.232273630428613e-05, "loss": 0.05180302858352661, "step": 27060 }, { "epoch": 7.68379222253761, "grad_norm": 10.929143905639648, "learning_rate": 9.231989781436277e-05, "loss": 0.05205378532409668, "step": 27070 }, { "epoch": 7.686630712460971, "grad_norm": 10.623173713684082, "learning_rate": 9.231705932443941e-05, "loss": 0.049158453941345215, "step": 27080 }, { "epoch": 7.689469202384331, "grad_norm": 10.110910415649414, "learning_rate": 9.231422083451604e-05, "loss": 0.0820845365524292, "step": 27090 }, { "epoch": 7.6923076923076925, "grad_norm": 5.106496334075928, "learning_rate": 9.231138234459268e-05, "loss": 0.03694283366203308, "step": 27100 }, { "epoch": 7.695146182231053, "grad_norm": 8.69005298614502, "learning_rate": 9.230854385466932e-05, "loss": 0.059427410364151, "step": 27110 }, { "epoch": 7.697984672154414, "grad_norm": 2.92171311378479, "learning_rate": 9.230570536474596e-05, "loss": 0.05924317240715027, "step": 27120 }, { "epoch": 7.700823162077775, "grad_norm": 8.174116134643555, "learning_rate": 9.23028668748226e-05, "loss": 0.05856873393058777, "step": 27130 }, { "epoch": 7.703661652001135, "grad_norm": 6.366161346435547, "learning_rate": 9.230002838489924e-05, "loss": 0.07504392266273499, "step": 27140 }, { "epoch": 7.706500141924496, "grad_norm": 4.173027038574219, "learning_rate": 9.229718989497588e-05, "loss": 0.04638764560222626, "step": 27150 }, { "epoch": 7.7093386318478565, "grad_norm": 8.70082950592041, "learning_rate": 9.229435140505251e-05, "loss": 0.032094866037368774, "step": 27160 }, { "epoch": 7.712177121771218, "grad_norm": 3.0265629291534424, "learning_rate": 9.229151291512915e-05, "loss": 0.0639643907546997, "step": 27170 }, { "epoch": 7.715015611694579, "grad_norm": 0.8069329857826233, "learning_rate": 9.22886744252058e-05, "loss": 0.03996948003768921, "step": 27180 }, { "epoch": 7.717854101617939, "grad_norm": 3.650559186935425, "learning_rate": 9.228583593528244e-05, "loss": 0.04203440546989441, "step": 27190 }, { "epoch": 7.7206925915413, "grad_norm": 5.180119037628174, "learning_rate": 9.228299744535908e-05, "loss": 0.03876271545886993, "step": 27200 }, { "epoch": 7.723531081464661, "grad_norm": 9.50401496887207, "learning_rate": 9.228015895543572e-05, "loss": 0.03959118723869324, "step": 27210 }, { "epoch": 7.726369571388021, "grad_norm": 2.433487892150879, "learning_rate": 9.227732046551235e-05, "loss": 0.0331896185874939, "step": 27220 }, { "epoch": 7.7292080613113825, "grad_norm": 5.518050193786621, "learning_rate": 9.227448197558899e-05, "loss": 0.054225629568099974, "step": 27230 }, { "epoch": 7.732046551234744, "grad_norm": 11.64424991607666, "learning_rate": 9.227164348566563e-05, "loss": 0.06916648745536805, "step": 27240 }, { "epoch": 7.734885041158104, "grad_norm": 10.16318130493164, "learning_rate": 9.226880499574227e-05, "loss": 0.0570537269115448, "step": 27250 }, { "epoch": 7.737723531081465, "grad_norm": 8.633238792419434, "learning_rate": 9.226596650581891e-05, "loss": 0.040804418921470645, "step": 27260 }, { "epoch": 7.740562021004825, "grad_norm": 9.693405151367188, "learning_rate": 9.226312801589555e-05, "loss": 0.08746941089630127, "step": 27270 }, { "epoch": 7.743400510928186, "grad_norm": 5.4285078048706055, "learning_rate": 9.226028952597218e-05, "loss": 0.07406668066978454, "step": 27280 }, { "epoch": 7.746239000851547, "grad_norm": 2.9961752891540527, "learning_rate": 9.225745103604882e-05, "loss": 0.04477266669273376, "step": 27290 }, { "epoch": 7.749077490774908, "grad_norm": 0.8375430703163147, "learning_rate": 9.225461254612546e-05, "loss": 0.047383731603622435, "step": 27300 }, { "epoch": 7.751915980698269, "grad_norm": 12.115437507629395, "learning_rate": 9.22517740562021e-05, "loss": 0.07108668088912964, "step": 27310 }, { "epoch": 7.754754470621629, "grad_norm": 7.579631805419922, "learning_rate": 9.224893556627875e-05, "loss": 0.0364434152841568, "step": 27320 }, { "epoch": 7.75759296054499, "grad_norm": 3.444655656814575, "learning_rate": 9.224609707635539e-05, "loss": 0.04222921431064606, "step": 27330 }, { "epoch": 7.760431450468351, "grad_norm": 3.925614595413208, "learning_rate": 9.224325858643203e-05, "loss": 0.06069764494895935, "step": 27340 }, { "epoch": 7.763269940391711, "grad_norm": 9.931295394897461, "learning_rate": 9.224042009650866e-05, "loss": 0.06606180667877197, "step": 27350 }, { "epoch": 7.7661084303150725, "grad_norm": 9.406881332397461, "learning_rate": 9.22375816065853e-05, "loss": 0.04022941887378693, "step": 27360 }, { "epoch": 7.768946920238433, "grad_norm": 7.645729064941406, "learning_rate": 9.223474311666194e-05, "loss": 0.08354773521423339, "step": 27370 }, { "epoch": 7.771785410161794, "grad_norm": 5.908488750457764, "learning_rate": 9.223190462673857e-05, "loss": 0.060514253377914426, "step": 27380 }, { "epoch": 7.774623900085155, "grad_norm": 6.174692153930664, "learning_rate": 9.222906613681522e-05, "loss": 0.06654593348503113, "step": 27390 }, { "epoch": 7.777462390008515, "grad_norm": 4.848662376403809, "learning_rate": 9.222622764689186e-05, "loss": 0.04285626709461212, "step": 27400 }, { "epoch": 7.780300879931876, "grad_norm": 10.472878456115723, "learning_rate": 9.222338915696849e-05, "loss": 0.09213249683380127, "step": 27410 }, { "epoch": 7.783139369855237, "grad_norm": 5.2125115394592285, "learning_rate": 9.222055066704513e-05, "loss": 0.05633129477500916, "step": 27420 }, { "epoch": 7.785977859778598, "grad_norm": 10.373481750488281, "learning_rate": 9.221771217712177e-05, "loss": 0.05509376525878906, "step": 27430 }, { "epoch": 7.788816349701959, "grad_norm": 2.918355941772461, "learning_rate": 9.221487368719842e-05, "loss": 0.04273054897785187, "step": 27440 }, { "epoch": 7.79165483962532, "grad_norm": 6.115579128265381, "learning_rate": 9.221203519727506e-05, "loss": 0.04877748191356659, "step": 27450 }, { "epoch": 7.79449332954868, "grad_norm": 5.734519958496094, "learning_rate": 9.22091967073517e-05, "loss": 0.06170972585678101, "step": 27460 }, { "epoch": 7.797331819472041, "grad_norm": 9.593297004699707, "learning_rate": 9.220635821742834e-05, "loss": 0.04703909754753113, "step": 27470 }, { "epoch": 7.800170309395401, "grad_norm": 7.246795177459717, "learning_rate": 9.220351972750497e-05, "loss": 0.052379781007766725, "step": 27480 }, { "epoch": 7.8030087993187625, "grad_norm": 10.030564308166504, "learning_rate": 9.220068123758161e-05, "loss": 0.06408230662345886, "step": 27490 }, { "epoch": 7.805847289242124, "grad_norm": 8.329371452331543, "learning_rate": 9.219784274765825e-05, "loss": 0.04352632761001587, "step": 27500 }, { "epoch": 7.805847289242124, "eval_accuracy": 0.949895084885865, "eval_loss": 0.1502717137336731, "eval_runtime": 34.7304, "eval_samples_per_second": 452.831, "eval_steps_per_second": 7.083, "step": 27500 }, { "epoch": 7.808685779165484, "grad_norm": 7.813311576843262, "learning_rate": 9.219500425773488e-05, "loss": 0.07399975061416626, "step": 27510 }, { "epoch": 7.811524269088845, "grad_norm": 5.832639694213867, "learning_rate": 9.219216576781153e-05, "loss": 0.04957616329193115, "step": 27520 }, { "epoch": 7.814362759012205, "grad_norm": 4.746920108795166, "learning_rate": 9.218932727788817e-05, "loss": 0.07829773426055908, "step": 27530 }, { "epoch": 7.817201248935566, "grad_norm": 1.993067741394043, "learning_rate": 9.21864887879648e-05, "loss": 0.0534460723400116, "step": 27540 }, { "epoch": 7.820039738858927, "grad_norm": 8.796961784362793, "learning_rate": 9.218365029804144e-05, "loss": 0.05994941592216492, "step": 27550 }, { "epoch": 7.822878228782288, "grad_norm": 12.875123023986816, "learning_rate": 9.218081180811809e-05, "loss": 0.060650980472564696, "step": 27560 }, { "epoch": 7.825716718705649, "grad_norm": 6.832249641418457, "learning_rate": 9.217797331819473e-05, "loss": 0.05215809345245361, "step": 27570 }, { "epoch": 7.828555208629009, "grad_norm": 9.29125690460205, "learning_rate": 9.217513482827135e-05, "loss": 0.061763995885849, "step": 27580 }, { "epoch": 7.83139369855237, "grad_norm": 5.927080154418945, "learning_rate": 9.217229633834801e-05, "loss": 0.049843046069145205, "step": 27590 }, { "epoch": 7.834232188475731, "grad_norm": 4.575740337371826, "learning_rate": 9.216945784842465e-05, "loss": 0.05336930751800537, "step": 27600 }, { "epoch": 7.837070678399091, "grad_norm": 4.323022365570068, "learning_rate": 9.216661935850128e-05, "loss": 0.04172036349773407, "step": 27610 }, { "epoch": 7.839909168322452, "grad_norm": 7.452962398529053, "learning_rate": 9.216378086857792e-05, "loss": 0.056446892023086545, "step": 27620 }, { "epoch": 7.8427476582458135, "grad_norm": 3.6570537090301514, "learning_rate": 9.216094237865456e-05, "loss": 0.0909299373626709, "step": 27630 }, { "epoch": 7.845586148169174, "grad_norm": 12.73221492767334, "learning_rate": 9.215810388873119e-05, "loss": 0.06791030168533325, "step": 27640 }, { "epoch": 7.848424638092535, "grad_norm": 7.571012496948242, "learning_rate": 9.215526539880784e-05, "loss": 0.06704255938529968, "step": 27650 }, { "epoch": 7.851263128015896, "grad_norm": 2.268461227416992, "learning_rate": 9.215242690888449e-05, "loss": 0.0493740439414978, "step": 27660 }, { "epoch": 7.854101617939256, "grad_norm": 3.347226142883301, "learning_rate": 9.214958841896111e-05, "loss": 0.05148876905441284, "step": 27670 }, { "epoch": 7.856940107862617, "grad_norm": 4.002326011657715, "learning_rate": 9.214674992903775e-05, "loss": 0.06097562909126282, "step": 27680 }, { "epoch": 7.8597785977859775, "grad_norm": 8.279675483703613, "learning_rate": 9.21439114391144e-05, "loss": 0.04657678306102753, "step": 27690 }, { "epoch": 7.862617087709339, "grad_norm": 17.320144653320312, "learning_rate": 9.214107294919104e-05, "loss": 0.06607290506362914, "step": 27700 }, { "epoch": 7.8654555776327, "grad_norm": 13.966056823730469, "learning_rate": 9.213823445926767e-05, "loss": 0.07697736620903015, "step": 27710 }, { "epoch": 7.86829406755606, "grad_norm": 7.888243198394775, "learning_rate": 9.213539596934432e-05, "loss": 0.042550137639045714, "step": 27720 }, { "epoch": 7.871132557479421, "grad_norm": 8.64561653137207, "learning_rate": 9.213255747942096e-05, "loss": 0.0545678973197937, "step": 27730 }, { "epoch": 7.873971047402781, "grad_norm": 5.617148399353027, "learning_rate": 9.212971898949759e-05, "loss": 0.03850301206111908, "step": 27740 }, { "epoch": 7.876809537326142, "grad_norm": 3.7417802810668945, "learning_rate": 9.212688049957423e-05, "loss": 0.03650761246681213, "step": 27750 }, { "epoch": 7.8796480272495035, "grad_norm": 5.586979389190674, "learning_rate": 9.212404200965087e-05, "loss": 0.05674940347671509, "step": 27760 }, { "epoch": 7.882486517172864, "grad_norm": 7.4521613121032715, "learning_rate": 9.21212035197275e-05, "loss": 0.04903608858585358, "step": 27770 }, { "epoch": 7.885325007096225, "grad_norm": 3.4446208477020264, "learning_rate": 9.211836502980414e-05, "loss": 0.05853551030158997, "step": 27780 }, { "epoch": 7.888163497019586, "grad_norm": 8.112581253051758, "learning_rate": 9.21155265398808e-05, "loss": 0.05667376518249512, "step": 27790 }, { "epoch": 7.891001986942946, "grad_norm": 4.480325698852539, "learning_rate": 9.211268804995742e-05, "loss": 0.044754183292388915, "step": 27800 }, { "epoch": 7.893840476866307, "grad_norm": 4.393670082092285, "learning_rate": 9.210984956003407e-05, "loss": 0.050616466999053956, "step": 27810 }, { "epoch": 7.8966789667896675, "grad_norm": 9.462894439697266, "learning_rate": 9.210701107011071e-05, "loss": 0.04917446374893188, "step": 27820 }, { "epoch": 7.899517456713029, "grad_norm": 4.955223083496094, "learning_rate": 9.210417258018735e-05, "loss": 0.049644047021865846, "step": 27830 }, { "epoch": 7.90235594663639, "grad_norm": 7.134303092956543, "learning_rate": 9.210133409026398e-05, "loss": 0.04633998572826385, "step": 27840 }, { "epoch": 7.90519443655975, "grad_norm": 9.44558048248291, "learning_rate": 9.209849560034063e-05, "loss": 0.05542999505996704, "step": 27850 }, { "epoch": 7.908032926483111, "grad_norm": 5.316269397735596, "learning_rate": 9.209565711041727e-05, "loss": 0.03897375762462616, "step": 27860 }, { "epoch": 7.910871416406472, "grad_norm": 7.607237339019775, "learning_rate": 9.20928186204939e-05, "loss": 0.031464028358459475, "step": 27870 }, { "epoch": 7.913709906329832, "grad_norm": 2.2420737743377686, "learning_rate": 9.208998013057054e-05, "loss": 0.058660632371902464, "step": 27880 }, { "epoch": 7.9165483962531935, "grad_norm": 3.431729316711426, "learning_rate": 9.208714164064718e-05, "loss": 0.0716614305973053, "step": 27890 }, { "epoch": 7.919386886176554, "grad_norm": 6.447558879852295, "learning_rate": 9.208430315072381e-05, "loss": 0.06112261414527893, "step": 27900 }, { "epoch": 7.922225376099915, "grad_norm": 12.647773742675781, "learning_rate": 9.208146466080045e-05, "loss": 0.05065518021583557, "step": 27910 }, { "epoch": 7.925063866023276, "grad_norm": 8.252717018127441, "learning_rate": 9.207862617087711e-05, "loss": 0.0577195942401886, "step": 27920 }, { "epoch": 7.927902355946636, "grad_norm": 5.817713737487793, "learning_rate": 9.207578768095373e-05, "loss": 0.048374733328819274, "step": 27930 }, { "epoch": 7.930740845869997, "grad_norm": 8.077741622924805, "learning_rate": 9.207294919103038e-05, "loss": 0.06330137848854064, "step": 27940 }, { "epoch": 7.9335793357933575, "grad_norm": 9.133475303649902, "learning_rate": 9.207011070110702e-05, "loss": 0.0679597795009613, "step": 27950 }, { "epoch": 7.936417825716719, "grad_norm": 7.546574592590332, "learning_rate": 9.206727221118366e-05, "loss": 0.0436888724565506, "step": 27960 }, { "epoch": 7.93925631564008, "grad_norm": 4.666861057281494, "learning_rate": 9.206443372126029e-05, "loss": 0.025900983810424806, "step": 27970 }, { "epoch": 7.94209480556344, "grad_norm": 6.884302139282227, "learning_rate": 9.206159523133693e-05, "loss": 0.03398088216781616, "step": 27980 }, { "epoch": 7.944933295486801, "grad_norm": 16.236988067626953, "learning_rate": 9.205875674141358e-05, "loss": 0.07444546222686768, "step": 27990 }, { "epoch": 7.947771785410162, "grad_norm": 9.773675918579102, "learning_rate": 9.205591825149021e-05, "loss": 0.03838198184967041, "step": 28000 }, { "epoch": 7.947771785410162, "eval_accuracy": 0.94976791505055, "eval_loss": 0.14682091772556305, "eval_runtime": 34.6033, "eval_samples_per_second": 454.495, "eval_steps_per_second": 7.109, "step": 28000 }, { "epoch": 7.950610275333522, "grad_norm": 5.911800384521484, "learning_rate": 9.205307976156685e-05, "loss": 0.04279097318649292, "step": 28010 }, { "epoch": 7.9534487652568835, "grad_norm": 5.5161614418029785, "learning_rate": 9.20502412716435e-05, "loss": 0.043392288684844973, "step": 28020 }, { "epoch": 7.956287255180245, "grad_norm": 14.02033805847168, "learning_rate": 9.204740278172012e-05, "loss": 0.0775744616985321, "step": 28030 }, { "epoch": 7.959125745103605, "grad_norm": 8.225926399230957, "learning_rate": 9.204456429179676e-05, "loss": 0.048159831762313844, "step": 28040 }, { "epoch": 7.961964235026966, "grad_norm": 4.773603916168213, "learning_rate": 9.204172580187342e-05, "loss": 0.06353996992111206, "step": 28050 }, { "epoch": 7.964802724950326, "grad_norm": 7.425128936767578, "learning_rate": 9.203888731195005e-05, "loss": 0.06563827395439148, "step": 28060 }, { "epoch": 7.967641214873687, "grad_norm": 1.2420096397399902, "learning_rate": 9.203604882202669e-05, "loss": 0.044851064682006836, "step": 28070 }, { "epoch": 7.970479704797048, "grad_norm": 0.46403560042381287, "learning_rate": 9.203321033210333e-05, "loss": 0.05992472767829895, "step": 28080 }, { "epoch": 7.973318194720409, "grad_norm": 3.339629650115967, "learning_rate": 9.203037184217997e-05, "loss": 0.057371479272842404, "step": 28090 }, { "epoch": 7.97615668464377, "grad_norm": 4.332496166229248, "learning_rate": 9.20275333522566e-05, "loss": 0.058891397714614865, "step": 28100 }, { "epoch": 7.97899517456713, "grad_norm": 5.52352237701416, "learning_rate": 9.202469486233324e-05, "loss": 0.04769936501979828, "step": 28110 }, { "epoch": 7.981833664490491, "grad_norm": 5.481972694396973, "learning_rate": 9.202185637240988e-05, "loss": 0.06935304999351502, "step": 28120 }, { "epoch": 7.984672154413852, "grad_norm": 10.53383731842041, "learning_rate": 9.201901788248652e-05, "loss": 0.0607934832572937, "step": 28130 }, { "epoch": 7.987510644337212, "grad_norm": 2.5462541580200195, "learning_rate": 9.201617939256316e-05, "loss": 0.05384949445724487, "step": 28140 }, { "epoch": 7.9903491342605735, "grad_norm": 3.49173903465271, "learning_rate": 9.20133409026398e-05, "loss": 0.06262941956520081, "step": 28150 }, { "epoch": 7.993187624183934, "grad_norm": 6.712621688842773, "learning_rate": 9.201050241271643e-05, "loss": 0.04716585576534271, "step": 28160 }, { "epoch": 7.996026114107295, "grad_norm": 5.424021244049072, "learning_rate": 9.200766392279307e-05, "loss": 0.06175909638404846, "step": 28170 }, { "epoch": 7.998864604030656, "grad_norm": 15.831409454345703, "learning_rate": 9.200482543286972e-05, "loss": 0.061083626747131345, "step": 28180 }, { "epoch": 8.001703093954017, "grad_norm": 2.762171745300293, "learning_rate": 9.200198694294636e-05, "loss": 0.04051099717617035, "step": 28190 }, { "epoch": 8.004541583877376, "grad_norm": 13.821151733398438, "learning_rate": 9.1999148453023e-05, "loss": 0.0484976589679718, "step": 28200 }, { "epoch": 8.007380073800737, "grad_norm": 2.8958840370178223, "learning_rate": 9.199630996309964e-05, "loss": 0.0331342339515686, "step": 28210 }, { "epoch": 8.010218563724099, "grad_norm": 6.545111179351807, "learning_rate": 9.199347147317627e-05, "loss": 0.05169202089309692, "step": 28220 }, { "epoch": 8.01305705364746, "grad_norm": 20.96704864501953, "learning_rate": 9.199063298325291e-05, "loss": 0.05308531522750855, "step": 28230 }, { "epoch": 8.01589554357082, "grad_norm": 2.9279415607452393, "learning_rate": 9.198779449332955e-05, "loss": 0.05656053423881531, "step": 28240 }, { "epoch": 8.018734033494182, "grad_norm": 3.5465779304504395, "learning_rate": 9.198495600340619e-05, "loss": 0.03492711186408996, "step": 28250 }, { "epoch": 8.021572523417541, "grad_norm": 3.1364502906799316, "learning_rate": 9.198211751348283e-05, "loss": 0.052075517177581784, "step": 28260 }, { "epoch": 8.024411013340902, "grad_norm": 2.4820046424865723, "learning_rate": 9.197927902355947e-05, "loss": 0.02367791086435318, "step": 28270 }, { "epoch": 8.027249503264263, "grad_norm": 4.6682939529418945, "learning_rate": 9.197644053363612e-05, "loss": 0.03664646744728088, "step": 28280 }, { "epoch": 8.030087993187625, "grad_norm": 3.0296170711517334, "learning_rate": 9.197360204371274e-05, "loss": 0.03100764751434326, "step": 28290 }, { "epoch": 8.032926483110986, "grad_norm": 6.33265495300293, "learning_rate": 9.197076355378938e-05, "loss": 0.05985856056213379, "step": 28300 }, { "epoch": 8.035764973034345, "grad_norm": 6.911393165588379, "learning_rate": 9.196820891285836e-05, "loss": 0.06504781842231751, "step": 28310 }, { "epoch": 8.038603462957706, "grad_norm": 8.955972671508789, "learning_rate": 9.1965370422935e-05, "loss": 0.05373931527137756, "step": 28320 }, { "epoch": 8.041441952881067, "grad_norm": 5.232260227203369, "learning_rate": 9.196253193301165e-05, "loss": 0.043723270297050476, "step": 28330 }, { "epoch": 8.044280442804428, "grad_norm": 2.268800973892212, "learning_rate": 9.195969344308828e-05, "loss": 0.03270927667617798, "step": 28340 }, { "epoch": 8.04711893272779, "grad_norm": 10.794477462768555, "learning_rate": 9.195685495316492e-05, "loss": 0.041232308745384215, "step": 28350 }, { "epoch": 8.049957422651149, "grad_norm": 4.0697808265686035, "learning_rate": 9.195401646324156e-05, "loss": 0.04476373791694641, "step": 28360 }, { "epoch": 8.05279591257451, "grad_norm": 9.340675354003906, "learning_rate": 9.19511779733182e-05, "loss": 0.044361650943756104, "step": 28370 }, { "epoch": 8.055634402497871, "grad_norm": 6.157769203186035, "learning_rate": 9.194833948339484e-05, "loss": 0.06706232428550721, "step": 28380 }, { "epoch": 8.058472892421232, "grad_norm": 1.6204864978790283, "learning_rate": 9.194550099347148e-05, "loss": 0.0604050874710083, "step": 28390 }, { "epoch": 8.061311382344593, "grad_norm": 11.640283584594727, "learning_rate": 9.194266250354811e-05, "loss": 0.051392877101898195, "step": 28400 }, { "epoch": 8.064149872267954, "grad_norm": 7.895516872406006, "learning_rate": 9.193982401362475e-05, "loss": 0.03971342742443085, "step": 28410 }, { "epoch": 8.066988362191314, "grad_norm": 9.128664016723633, "learning_rate": 9.193698552370139e-05, "loss": 0.04163123071193695, "step": 28420 }, { "epoch": 8.069826852114675, "grad_norm": 4.264697551727295, "learning_rate": 9.193414703377803e-05, "loss": 0.033362981677055356, "step": 28430 }, { "epoch": 8.072665342038036, "grad_norm": 1.0565276145935059, "learning_rate": 9.193130854385468e-05, "loss": 0.04537658393383026, "step": 28440 }, { "epoch": 8.075503831961397, "grad_norm": 6.020825386047363, "learning_rate": 9.192847005393132e-05, "loss": 0.038863441348075865, "step": 28450 }, { "epoch": 8.078342321884758, "grad_norm": 7.248174667358398, "learning_rate": 9.192563156400796e-05, "loss": 0.06177175045013428, "step": 28460 }, { "epoch": 8.081180811808117, "grad_norm": 3.1284103393554688, "learning_rate": 9.192279307408459e-05, "loss": 0.030463793873786928, "step": 28470 }, { "epoch": 8.084019301731479, "grad_norm": 5.543802738189697, "learning_rate": 9.191995458416123e-05, "loss": 0.031813687086105345, "step": 28480 }, { "epoch": 8.08685779165484, "grad_norm": 9.826733589172363, "learning_rate": 9.191711609423787e-05, "loss": 0.046603769063949585, "step": 28490 }, { "epoch": 8.0896962815782, "grad_norm": 1.3911833763122559, "learning_rate": 9.191427760431451e-05, "loss": 0.05172548294067383, "step": 28500 }, { "epoch": 8.0896962815782, "eval_accuracy": 0.9283397977999619, "eval_loss": 0.22108909487724304, "eval_runtime": 38.1478, "eval_samples_per_second": 412.265, "eval_steps_per_second": 6.449, "step": 28500 }, { "epoch": 8.092534771501562, "grad_norm": 7.278095245361328, "learning_rate": 9.191143911439115e-05, "loss": 0.06798923611640931, "step": 28510 }, { "epoch": 8.095373261424921, "grad_norm": 4.418974876403809, "learning_rate": 9.190860062446779e-05, "loss": 0.0382205456495285, "step": 28520 }, { "epoch": 8.098211751348282, "grad_norm": 9.568801879882812, "learning_rate": 9.190576213454442e-05, "loss": 0.04901007115840912, "step": 28530 }, { "epoch": 8.101050241271643, "grad_norm": 8.048992156982422, "learning_rate": 9.190292364462106e-05, "loss": 0.03402008414268494, "step": 28540 }, { "epoch": 8.103888731195005, "grad_norm": 4.480045795440674, "learning_rate": 9.19000851546977e-05, "loss": 0.04799588918685913, "step": 28550 }, { "epoch": 8.106727221118366, "grad_norm": 4.374752044677734, "learning_rate": 9.189724666477434e-05, "loss": 0.04597575068473816, "step": 28560 }, { "epoch": 8.109565711041725, "grad_norm": 3.2998430728912354, "learning_rate": 9.189440817485099e-05, "loss": 0.04668662250041962, "step": 28570 }, { "epoch": 8.112404200965086, "grad_norm": 6.237977981567383, "learning_rate": 9.189156968492763e-05, "loss": 0.04269538223743439, "step": 28580 }, { "epoch": 8.115242690888447, "grad_norm": 13.340614318847656, "learning_rate": 9.188873119500427e-05, "loss": 0.07286246418952942, "step": 28590 }, { "epoch": 8.118081180811808, "grad_norm": 9.157264709472656, "learning_rate": 9.18858927050809e-05, "loss": 0.03772386908531189, "step": 28600 }, { "epoch": 8.12091967073517, "grad_norm": 9.036079406738281, "learning_rate": 9.188305421515754e-05, "loss": 0.05378863215446472, "step": 28610 }, { "epoch": 8.12375816065853, "grad_norm": 6.299286365509033, "learning_rate": 9.188021572523418e-05, "loss": 0.04998157024383545, "step": 28620 }, { "epoch": 8.12659665058189, "grad_norm": 5.707374572753906, "learning_rate": 9.187737723531082e-05, "loss": 0.04355730712413788, "step": 28630 }, { "epoch": 8.129435140505251, "grad_norm": 3.698911428451538, "learning_rate": 9.187453874538746e-05, "loss": 0.04606961905956268, "step": 28640 }, { "epoch": 8.132273630428612, "grad_norm": 10.855918884277344, "learning_rate": 9.18717002554641e-05, "loss": 0.053164446353912355, "step": 28650 }, { "epoch": 8.135112120351973, "grad_norm": 4.809197425842285, "learning_rate": 9.186886176554073e-05, "loss": 0.04212176501750946, "step": 28660 }, { "epoch": 8.137950610275334, "grad_norm": 7.144283294677734, "learning_rate": 9.186602327561737e-05, "loss": 0.03446503281593323, "step": 28670 }, { "epoch": 8.140789100198694, "grad_norm": 7.3925580978393555, "learning_rate": 9.186318478569401e-05, "loss": 0.04779331386089325, "step": 28680 }, { "epoch": 8.143627590122055, "grad_norm": 5.121337890625, "learning_rate": 9.186034629577066e-05, "loss": 0.05646657943725586, "step": 28690 }, { "epoch": 8.146466080045416, "grad_norm": 5.386987686157227, "learning_rate": 9.18575078058473e-05, "loss": 0.032058247923851015, "step": 28700 }, { "epoch": 8.149304569968777, "grad_norm": 8.279790878295898, "learning_rate": 9.185466931592394e-05, "loss": 0.030203068256378175, "step": 28710 }, { "epoch": 8.152143059892138, "grad_norm": 5.5714850425720215, "learning_rate": 9.185183082600058e-05, "loss": 0.0558240532875061, "step": 28720 }, { "epoch": 8.154981549815497, "grad_norm": 9.226639747619629, "learning_rate": 9.184899233607721e-05, "loss": 0.05623010396957397, "step": 28730 }, { "epoch": 8.157820039738858, "grad_norm": 5.341446876525879, "learning_rate": 9.184615384615385e-05, "loss": 0.04455822706222534, "step": 28740 }, { "epoch": 8.16065852966222, "grad_norm": 7.046427249908447, "learning_rate": 9.184331535623049e-05, "loss": 0.04213506579399109, "step": 28750 }, { "epoch": 8.16349701958558, "grad_norm": 1.7368651628494263, "learning_rate": 9.184047686630712e-05, "loss": 0.06034585237503052, "step": 28760 }, { "epoch": 8.166335509508942, "grad_norm": 3.8199892044067383, "learning_rate": 9.183763837638377e-05, "loss": 0.04108112454414368, "step": 28770 }, { "epoch": 8.169173999432301, "grad_norm": 6.163265705108643, "learning_rate": 9.183479988646041e-05, "loss": 0.04049583673477173, "step": 28780 }, { "epoch": 8.172012489355662, "grad_norm": 6.918125152587891, "learning_rate": 9.183196139653704e-05, "loss": 0.04785979390144348, "step": 28790 }, { "epoch": 8.174850979279023, "grad_norm": 10.490229606628418, "learning_rate": 9.182912290661368e-05, "loss": 0.04642013907432556, "step": 28800 }, { "epoch": 8.177689469202384, "grad_norm": 4.313780307769775, "learning_rate": 9.182628441669032e-05, "loss": 0.056650400161743164, "step": 28810 }, { "epoch": 8.180527959125746, "grad_norm": 9.007664680480957, "learning_rate": 9.182344592676697e-05, "loss": 0.05141129493713379, "step": 28820 }, { "epoch": 8.183366449049107, "grad_norm": 1.85612154006958, "learning_rate": 9.182060743684361e-05, "loss": 0.036400502920150755, "step": 28830 }, { "epoch": 8.186204938972466, "grad_norm": 8.766794204711914, "learning_rate": 9.181776894692025e-05, "loss": 0.034586849808692935, "step": 28840 }, { "epoch": 8.189043428895827, "grad_norm": 10.433965682983398, "learning_rate": 9.181493045699689e-05, "loss": 0.048642227053642274, "step": 28850 }, { "epoch": 8.191881918819188, "grad_norm": 8.191515922546387, "learning_rate": 9.181209196707352e-05, "loss": 0.04376324117183685, "step": 28860 }, { "epoch": 8.19472040874255, "grad_norm": 7.875390529632568, "learning_rate": 9.180925347715016e-05, "loss": 0.05505368113517761, "step": 28870 }, { "epoch": 8.19755889866591, "grad_norm": 9.02625846862793, "learning_rate": 9.18064149872268e-05, "loss": 0.05025151968002319, "step": 28880 }, { "epoch": 8.20039738858927, "grad_norm": 5.015901565551758, "learning_rate": 9.180357649730343e-05, "loss": 0.030300998687744142, "step": 28890 }, { "epoch": 8.203235878512631, "grad_norm": 7.343891620635986, "learning_rate": 9.180073800738008e-05, "loss": 0.05139339566230774, "step": 28900 }, { "epoch": 8.206074368435992, "grad_norm": 4.068420886993408, "learning_rate": 9.179789951745673e-05, "loss": 0.047973239421844484, "step": 28910 }, { "epoch": 8.208912858359353, "grad_norm": 12.822044372558594, "learning_rate": 9.179506102753335e-05, "loss": 0.030669409036636352, "step": 28920 }, { "epoch": 8.211751348282714, "grad_norm": 8.481575012207031, "learning_rate": 9.179222253761e-05, "loss": 0.044131124019622804, "step": 28930 }, { "epoch": 8.214589838206074, "grad_norm": 3.3098909854888916, "learning_rate": 9.178938404768664e-05, "loss": 0.02932083308696747, "step": 28940 }, { "epoch": 8.217428328129435, "grad_norm": 6.633694171905518, "learning_rate": 9.178654555776328e-05, "loss": 0.03570869565010071, "step": 28950 }, { "epoch": 8.220266818052796, "grad_norm": 3.5205917358398438, "learning_rate": 9.17837070678399e-05, "loss": 0.03723951280117035, "step": 28960 }, { "epoch": 8.223105307976157, "grad_norm": 7.0210418701171875, "learning_rate": 9.178086857791656e-05, "loss": 0.0466498851776123, "step": 28970 }, { "epoch": 8.225943797899518, "grad_norm": 7.236568450927734, "learning_rate": 9.177803008799319e-05, "loss": 0.05295954346656799, "step": 28980 }, { "epoch": 8.228782287822877, "grad_norm": 4.857354164123535, "learning_rate": 9.177519159806983e-05, "loss": 0.039018043875694276, "step": 28990 }, { "epoch": 8.231620777746238, "grad_norm": 1.7247179746627808, "learning_rate": 9.177235310814647e-05, "loss": 0.027162781357765196, "step": 29000 }, { "epoch": 8.231620777746238, "eval_accuracy": 0.9539009346982895, "eval_loss": 0.1418570578098297, "eval_runtime": 32.2, "eval_samples_per_second": 488.416, "eval_steps_per_second": 7.64, "step": 29000 }, { "epoch": 8.2344592676696, "grad_norm": 5.467371463775635, "learning_rate": 9.176951461822311e-05, "loss": 0.03821054995059967, "step": 29010 }, { "epoch": 8.23729775759296, "grad_norm": 3.9097723960876465, "learning_rate": 9.176667612829974e-05, "loss": 0.033811265230178834, "step": 29020 }, { "epoch": 8.240136247516322, "grad_norm": 6.967401504516602, "learning_rate": 9.17638376383764e-05, "loss": 0.053939533233642575, "step": 29030 }, { "epoch": 8.242974737439683, "grad_norm": 3.343384265899658, "learning_rate": 9.176099914845304e-05, "loss": 0.054558980464935306, "step": 29040 }, { "epoch": 8.245813227363042, "grad_norm": 2.335385322570801, "learning_rate": 9.175816065852966e-05, "loss": 0.0774939477443695, "step": 29050 }, { "epoch": 8.248651717286403, "grad_norm": 10.861949920654297, "learning_rate": 9.17553221686063e-05, "loss": 0.049200204014778134, "step": 29060 }, { "epoch": 8.251490207209764, "grad_norm": 5.711300373077393, "learning_rate": 9.175248367868295e-05, "loss": 0.040151742100715634, "step": 29070 }, { "epoch": 8.254328697133126, "grad_norm": 2.2377774715423584, "learning_rate": 9.174964518875957e-05, "loss": 0.021332843601703642, "step": 29080 }, { "epoch": 8.257167187056487, "grad_norm": 2.891509771347046, "learning_rate": 9.174680669883622e-05, "loss": 0.037894412875175476, "step": 29090 }, { "epoch": 8.260005676979846, "grad_norm": 5.976760387420654, "learning_rate": 9.174396820891287e-05, "loss": 0.037930235266685486, "step": 29100 }, { "epoch": 8.262844166903207, "grad_norm": 4.79329252243042, "learning_rate": 9.17411297189895e-05, "loss": 0.028436940908432008, "step": 29110 }, { "epoch": 8.265682656826568, "grad_norm": 3.197251558303833, "learning_rate": 9.173829122906614e-05, "loss": 0.02249145358800888, "step": 29120 }, { "epoch": 8.26852114674993, "grad_norm": 3.007537603378296, "learning_rate": 9.173545273914278e-05, "loss": 0.026993483304977417, "step": 29130 }, { "epoch": 8.27135963667329, "grad_norm": 10.848280906677246, "learning_rate": 9.173261424921942e-05, "loss": 0.05157219767570496, "step": 29140 }, { "epoch": 8.27419812659665, "grad_norm": 4.921207427978516, "learning_rate": 9.172977575929605e-05, "loss": 0.04146789014339447, "step": 29150 }, { "epoch": 8.27703661652001, "grad_norm": 1.9822973012924194, "learning_rate": 9.17269372693727e-05, "loss": 0.02445521056652069, "step": 29160 }, { "epoch": 8.279875106443372, "grad_norm": 7.023909568786621, "learning_rate": 9.172409877944935e-05, "loss": 0.034274926781654357, "step": 29170 }, { "epoch": 8.282713596366733, "grad_norm": 8.618539810180664, "learning_rate": 9.172126028952597e-05, "loss": 0.05661180019378662, "step": 29180 }, { "epoch": 8.285552086290094, "grad_norm": 4.813262462615967, "learning_rate": 9.171842179960262e-05, "loss": 0.04537460505962372, "step": 29190 }, { "epoch": 8.288390576213455, "grad_norm": 7.1207685470581055, "learning_rate": 9.171558330967926e-05, "loss": 0.0473751038312912, "step": 29200 }, { "epoch": 8.291229066136815, "grad_norm": 9.682143211364746, "learning_rate": 9.171274481975588e-05, "loss": 0.05127524733543396, "step": 29210 }, { "epoch": 8.294067556060176, "grad_norm": 4.758260250091553, "learning_rate": 9.170990632983253e-05, "loss": 0.03563633263111114, "step": 29220 }, { "epoch": 8.296906045983537, "grad_norm": 4.543030261993408, "learning_rate": 9.170706783990918e-05, "loss": 0.04117048680782318, "step": 29230 }, { "epoch": 8.299744535906898, "grad_norm": 2.172999620437622, "learning_rate": 9.170422934998581e-05, "loss": 0.04413871765136719, "step": 29240 }, { "epoch": 8.302583025830259, "grad_norm": 4.496009349822998, "learning_rate": 9.170139086006245e-05, "loss": 0.03158558905124664, "step": 29250 }, { "epoch": 8.305421515753618, "grad_norm": 6.9417724609375, "learning_rate": 9.169855237013909e-05, "loss": 0.06448270082473755, "step": 29260 }, { "epoch": 8.30826000567698, "grad_norm": 3.8521358966827393, "learning_rate": 9.169571388021573e-05, "loss": 0.06258092522621155, "step": 29270 }, { "epoch": 8.31109849560034, "grad_norm": 9.800846099853516, "learning_rate": 9.169287539029236e-05, "loss": 0.07962459325790405, "step": 29280 }, { "epoch": 8.313936985523702, "grad_norm": 7.083983421325684, "learning_rate": 9.1690036900369e-05, "loss": 0.05007003545761109, "step": 29290 }, { "epoch": 8.316775475447063, "grad_norm": 9.533513069152832, "learning_rate": 9.168719841044566e-05, "loss": 0.06358844637870789, "step": 29300 }, { "epoch": 8.319613965370422, "grad_norm": 5.306739330291748, "learning_rate": 9.168435992052229e-05, "loss": 0.05043999552726745, "step": 29310 }, { "epoch": 8.322452455293783, "grad_norm": 13.880953788757324, "learning_rate": 9.168152143059893e-05, "loss": 0.044310107827186584, "step": 29320 }, { "epoch": 8.325290945217144, "grad_norm": 1.6150132417678833, "learning_rate": 9.167868294067557e-05, "loss": 0.021419061720371245, "step": 29330 }, { "epoch": 8.328129435140506, "grad_norm": 17.129220962524414, "learning_rate": 9.16758444507522e-05, "loss": 0.061433333158493045, "step": 29340 }, { "epoch": 8.330967925063867, "grad_norm": 6.641970634460449, "learning_rate": 9.167300596082884e-05, "loss": 0.04935640096664429, "step": 29350 }, { "epoch": 8.333806414987226, "grad_norm": 1.777321696281433, "learning_rate": 9.167016747090549e-05, "loss": 0.05690605044364929, "step": 29360 }, { "epoch": 8.336644904910587, "grad_norm": 8.53802490234375, "learning_rate": 9.166732898098212e-05, "loss": 0.04172219038009643, "step": 29370 }, { "epoch": 8.339483394833948, "grad_norm": 5.205832004547119, "learning_rate": 9.166449049105876e-05, "loss": 0.06130287051200867, "step": 29380 }, { "epoch": 8.34232188475731, "grad_norm": 4.565342426300049, "learning_rate": 9.16616520011354e-05, "loss": 0.05527377724647522, "step": 29390 }, { "epoch": 8.34516037468067, "grad_norm": 8.795940399169922, "learning_rate": 9.165881351121204e-05, "loss": 0.027453899383544922, "step": 29400 }, { "epoch": 8.347998864604032, "grad_norm": 12.80562973022461, "learning_rate": 9.165597502128867e-05, "loss": 0.04659712314605713, "step": 29410 }, { "epoch": 8.35083735452739, "grad_norm": 4.213021278381348, "learning_rate": 9.165313653136531e-05, "loss": 0.024847716093063354, "step": 29420 }, { "epoch": 8.353675844450752, "grad_norm": 3.0600576400756836, "learning_rate": 9.165029804144197e-05, "loss": 0.036026188731193544, "step": 29430 }, { "epoch": 8.356514334374113, "grad_norm": 5.542091369628906, "learning_rate": 9.16474595515186e-05, "loss": 0.058727341890335086, "step": 29440 }, { "epoch": 8.359352824297474, "grad_norm": 0.5456578135490417, "learning_rate": 9.164462106159524e-05, "loss": 0.06286336779594422, "step": 29450 }, { "epoch": 8.362191314220835, "grad_norm": 1.6723270416259766, "learning_rate": 9.164178257167188e-05, "loss": 0.07350655794143676, "step": 29460 }, { "epoch": 8.365029804144195, "grad_norm": 5.674473285675049, "learning_rate": 9.16389440817485e-05, "loss": 0.05541070103645325, "step": 29470 }, { "epoch": 8.367868294067556, "grad_norm": 10.766094207763672, "learning_rate": 9.163610559182515e-05, "loss": 0.04677499234676361, "step": 29480 }, { "epoch": 8.370706783990917, "grad_norm": 11.36935806274414, "learning_rate": 9.163326710190179e-05, "loss": 0.03447652757167816, "step": 29490 }, { "epoch": 8.373545273914278, "grad_norm": 8.745195388793945, "learning_rate": 9.163042861197843e-05, "loss": 0.05394824743270874, "step": 29500 }, { "epoch": 8.373545273914278, "eval_accuracy": 0.9397850829783175, "eval_loss": 0.19986635446548462, "eval_runtime": 38.1069, "eval_samples_per_second": 412.707, "eval_steps_per_second": 6.456, "step": 29500 }, { "epoch": 8.376383763837639, "grad_norm": 13.509871482849121, "learning_rate": 9.162759012205507e-05, "loss": 0.08519450426101685, "step": 29510 }, { "epoch": 8.379222253760998, "grad_norm": 12.060954093933105, "learning_rate": 9.162475163213171e-05, "loss": 0.05631096363067627, "step": 29520 }, { "epoch": 8.38206074368436, "grad_norm": 11.396729469299316, "learning_rate": 9.162191314220835e-05, "loss": 0.04942348897457123, "step": 29530 }, { "epoch": 8.38489923360772, "grad_norm": 6.362123012542725, "learning_rate": 9.161907465228498e-05, "loss": 0.04782220721244812, "step": 29540 }, { "epoch": 8.387737723531082, "grad_norm": 9.573616981506348, "learning_rate": 9.161623616236162e-05, "loss": 0.05171979665756225, "step": 29550 }, { "epoch": 8.390576213454443, "grad_norm": 6.288716793060303, "learning_rate": 9.161339767243828e-05, "loss": 0.054399871826171876, "step": 29560 }, { "epoch": 8.393414703377802, "grad_norm": 3.6049697399139404, "learning_rate": 9.16105591825149e-05, "loss": 0.06263463497161866, "step": 29570 }, { "epoch": 8.396253193301163, "grad_norm": 3.9927899837493896, "learning_rate": 9.160772069259155e-05, "loss": 0.029347407817840575, "step": 29580 }, { "epoch": 8.399091683224524, "grad_norm": 1.4540461301803589, "learning_rate": 9.160488220266819e-05, "loss": 0.031480103731155396, "step": 29590 }, { "epoch": 8.401930173147885, "grad_norm": 11.211627006530762, "learning_rate": 9.160204371274482e-05, "loss": 0.06344976425170898, "step": 29600 }, { "epoch": 8.404768663071247, "grad_norm": 3.085808515548706, "learning_rate": 9.159920522282146e-05, "loss": 0.050990891456604, "step": 29610 }, { "epoch": 8.407607152994608, "grad_norm": 6.883896350860596, "learning_rate": 9.15963667328981e-05, "loss": 0.0398075670003891, "step": 29620 }, { "epoch": 8.410445642917967, "grad_norm": 5.3507771492004395, "learning_rate": 9.159352824297474e-05, "loss": 0.04338812232017517, "step": 29630 }, { "epoch": 8.413284132841328, "grad_norm": 10.677408218383789, "learning_rate": 9.159068975305138e-05, "loss": 0.08124479055404663, "step": 29640 }, { "epoch": 8.41612262276469, "grad_norm": 8.70841121673584, "learning_rate": 9.158785126312802e-05, "loss": 0.0804270088672638, "step": 29650 }, { "epoch": 8.41896111268805, "grad_norm": 1.5135940313339233, "learning_rate": 9.158501277320467e-05, "loss": 0.058144545555114745, "step": 29660 }, { "epoch": 8.421799602611411, "grad_norm": 4.556616306304932, "learning_rate": 9.15821742832813e-05, "loss": 0.03743485510349274, "step": 29670 }, { "epoch": 8.42463809253477, "grad_norm": 9.649806022644043, "learning_rate": 9.157933579335793e-05, "loss": 0.0543703556060791, "step": 29680 }, { "epoch": 8.427476582458132, "grad_norm": 3.257772445678711, "learning_rate": 9.157649730343458e-05, "loss": 0.04200856685638428, "step": 29690 }, { "epoch": 8.430315072381493, "grad_norm": 1.5643953084945679, "learning_rate": 9.157365881351122e-05, "loss": 0.03914765417575836, "step": 29700 }, { "epoch": 8.433153562304854, "grad_norm": 7.1360039710998535, "learning_rate": 9.157082032358786e-05, "loss": 0.05011311173439026, "step": 29710 }, { "epoch": 8.435992052228215, "grad_norm": 6.519112586975098, "learning_rate": 9.15679818336645e-05, "loss": 0.037799695134162904, "step": 29720 }, { "epoch": 8.438830542151575, "grad_norm": 9.256790161132812, "learning_rate": 9.156514334374113e-05, "loss": 0.0934967815876007, "step": 29730 }, { "epoch": 8.441669032074936, "grad_norm": 1.1970384120941162, "learning_rate": 9.156230485381777e-05, "loss": 0.059510219097137454, "step": 29740 }, { "epoch": 8.444507521998297, "grad_norm": 7.748683929443359, "learning_rate": 9.155946636389441e-05, "loss": 0.03996397852897644, "step": 29750 }, { "epoch": 8.447346011921658, "grad_norm": 5.370083808898926, "learning_rate": 9.155662787397105e-05, "loss": 0.023628032207489012, "step": 29760 }, { "epoch": 8.450184501845019, "grad_norm": 3.1009252071380615, "learning_rate": 9.15537893840477e-05, "loss": 0.05738246440887451, "step": 29770 }, { "epoch": 8.453022991768378, "grad_norm": 0.9692186117172241, "learning_rate": 9.155095089412433e-05, "loss": 0.033957117795944215, "step": 29780 }, { "epoch": 8.45586148169174, "grad_norm": 5.651897430419922, "learning_rate": 9.154811240420098e-05, "loss": 0.03240921497344971, "step": 29790 }, { "epoch": 8.4586999716151, "grad_norm": 11.409942626953125, "learning_rate": 9.15452739142776e-05, "loss": 0.05659984350204468, "step": 29800 }, { "epoch": 8.461538461538462, "grad_norm": 7.090261936187744, "learning_rate": 9.154243542435425e-05, "loss": 0.048052427172660825, "step": 29810 }, { "epoch": 8.464376951461823, "grad_norm": 17.69740867614746, "learning_rate": 9.153959693443089e-05, "loss": 0.07158581614494323, "step": 29820 }, { "epoch": 8.467215441385184, "grad_norm": 8.989297866821289, "learning_rate": 9.153675844450753e-05, "loss": 0.04450439214706421, "step": 29830 }, { "epoch": 8.470053931308543, "grad_norm": 6.515008449554443, "learning_rate": 9.153391995458417e-05, "loss": 0.03546598553657532, "step": 29840 }, { "epoch": 8.472892421231904, "grad_norm": 9.532102584838867, "learning_rate": 9.153108146466081e-05, "loss": 0.04528917670249939, "step": 29850 }, { "epoch": 8.475730911155265, "grad_norm": 7.359951972961426, "learning_rate": 9.152824297473744e-05, "loss": 0.06176455616950989, "step": 29860 }, { "epoch": 8.478569401078627, "grad_norm": 9.77370548248291, "learning_rate": 9.152540448481408e-05, "loss": 0.043275696039199826, "step": 29870 }, { "epoch": 8.481407891001988, "grad_norm": 3.292433738708496, "learning_rate": 9.152256599489072e-05, "loss": 0.029536953568458556, "step": 29880 }, { "epoch": 8.484246380925347, "grad_norm": 7.120720386505127, "learning_rate": 9.151972750496736e-05, "loss": 0.0586143434047699, "step": 29890 }, { "epoch": 8.487084870848708, "grad_norm": 11.070266723632812, "learning_rate": 9.1516889015044e-05, "loss": 0.048237144947052, "step": 29900 }, { "epoch": 8.48992336077207, "grad_norm": 2.7079741954803467, "learning_rate": 9.151405052512065e-05, "loss": 0.057972532510757444, "step": 29910 }, { "epoch": 8.49276185069543, "grad_norm": 7.952143669128418, "learning_rate": 9.151121203519727e-05, "loss": 0.03985680639743805, "step": 29920 }, { "epoch": 8.495600340618791, "grad_norm": 12.695466041564941, "learning_rate": 9.150837354527391e-05, "loss": 0.05104623436927795, "step": 29930 }, { "epoch": 8.49843883054215, "grad_norm": 5.448912620544434, "learning_rate": 9.150553505535056e-05, "loss": 0.03667646646499634, "step": 29940 }, { "epoch": 8.501277320465512, "grad_norm": 12.308127403259277, "learning_rate": 9.15026965654272e-05, "loss": 0.04891490936279297, "step": 29950 }, { "epoch": 8.504115810388873, "grad_norm": 2.9608662128448486, "learning_rate": 9.149985807550384e-05, "loss": 0.02779913544654846, "step": 29960 }, { "epoch": 8.506954300312234, "grad_norm": 8.527922630310059, "learning_rate": 9.149701958558048e-05, "loss": 0.05596394538879394, "step": 29970 }, { "epoch": 8.509792790235595, "grad_norm": 16.295148849487305, "learning_rate": 9.149418109565712e-05, "loss": 0.06117507219314575, "step": 29980 }, { "epoch": 8.512631280158956, "grad_norm": 5.432333469390869, "learning_rate": 9.149134260573375e-05, "loss": 0.0326551616191864, "step": 29990 }, { "epoch": 8.515469770082316, "grad_norm": 2.96633243560791, "learning_rate": 9.148850411581039e-05, "loss": 0.048088130354881284, "step": 30000 }, { "epoch": 8.515469770082316, "eval_accuracy": 0.9552998028867553, "eval_loss": 0.1317010223865509, "eval_runtime": 46.839, "eval_samples_per_second": 335.768, "eval_steps_per_second": 5.252, "step": 30000 }, { "epoch": 8.518308260005677, "grad_norm": 6.169906139373779, "learning_rate": 9.148566562588703e-05, "loss": 0.03283742368221283, "step": 30010 }, { "epoch": 8.521146749929038, "grad_norm": 9.512548446655273, "learning_rate": 9.148282713596366e-05, "loss": 0.047890061140060426, "step": 30020 }, { "epoch": 8.523985239852399, "grad_norm": 1.1769137382507324, "learning_rate": 9.147998864604032e-05, "loss": 0.03881982564926147, "step": 30030 }, { "epoch": 8.52682372977576, "grad_norm": 0.6856592297554016, "learning_rate": 9.147715015611696e-05, "loss": 0.04445732831954956, "step": 30040 }, { "epoch": 8.52966221969912, "grad_norm": 2.947347402572632, "learning_rate": 9.147431166619358e-05, "loss": 0.044934514164924624, "step": 30050 }, { "epoch": 8.53250070962248, "grad_norm": 11.417508125305176, "learning_rate": 9.147147317627023e-05, "loss": 0.06425618529319763, "step": 30060 }, { "epoch": 8.535339199545842, "grad_norm": 2.255972385406494, "learning_rate": 9.146863468634687e-05, "loss": 0.028419864177703858, "step": 30070 }, { "epoch": 8.538177689469203, "grad_norm": 4.253436088562012, "learning_rate": 9.146579619642351e-05, "loss": 0.033470749855041504, "step": 30080 }, { "epoch": 8.541016179392564, "grad_norm": 4.127338409423828, "learning_rate": 9.146295770650014e-05, "loss": 0.04060733914375305, "step": 30090 }, { "epoch": 8.543854669315923, "grad_norm": 9.556683540344238, "learning_rate": 9.146011921657679e-05, "loss": 0.04756430983543396, "step": 30100 }, { "epoch": 8.546693159239284, "grad_norm": 7.972593307495117, "learning_rate": 9.145728072665343e-05, "loss": 0.06813076138496399, "step": 30110 }, { "epoch": 8.549531649162645, "grad_norm": 5.6918792724609375, "learning_rate": 9.145444223673006e-05, "loss": 0.04487829506397247, "step": 30120 }, { "epoch": 8.552370139086007, "grad_norm": 2.9428954124450684, "learning_rate": 9.14516037468067e-05, "loss": 0.04081617295742035, "step": 30130 }, { "epoch": 8.555208629009368, "grad_norm": 3.1528444290161133, "learning_rate": 9.144876525688334e-05, "loss": 0.03730931878089905, "step": 30140 }, { "epoch": 8.558047118932727, "grad_norm": 3.261887788772583, "learning_rate": 9.144592676695997e-05, "loss": 0.03768889605998993, "step": 30150 }, { "epoch": 8.560885608856088, "grad_norm": 4.170751571655273, "learning_rate": 9.144308827703663e-05, "loss": 0.055479174852371214, "step": 30160 }, { "epoch": 8.56372409877945, "grad_norm": 10.640676498413086, "learning_rate": 9.144024978711327e-05, "loss": 0.05951871871948242, "step": 30170 }, { "epoch": 8.56656258870281, "grad_norm": 3.052016496658325, "learning_rate": 9.14374112971899e-05, "loss": 0.04160612225532532, "step": 30180 }, { "epoch": 8.569401078626171, "grad_norm": 5.5320281982421875, "learning_rate": 9.143457280726654e-05, "loss": 0.053351306915283205, "step": 30190 }, { "epoch": 8.57223956854953, "grad_norm": 11.076940536499023, "learning_rate": 9.143173431734318e-05, "loss": 0.05688897967338562, "step": 30200 }, { "epoch": 8.575078058472892, "grad_norm": 6.101572513580322, "learning_rate": 9.142889582741982e-05, "loss": 0.06946473717689514, "step": 30210 }, { "epoch": 8.577916548396253, "grad_norm": 9.25157642364502, "learning_rate": 9.142605733749645e-05, "loss": 0.039718320965766905, "step": 30220 }, { "epoch": 8.580755038319614, "grad_norm": 11.986663818359375, "learning_rate": 9.14232188475731e-05, "loss": 0.06082029342651367, "step": 30230 }, { "epoch": 8.583593528242975, "grad_norm": 4.038545608520508, "learning_rate": 9.142038035764974e-05, "loss": 0.056028473377227786, "step": 30240 }, { "epoch": 8.586432018166336, "grad_norm": 8.111284255981445, "learning_rate": 9.141754186772637e-05, "loss": 0.07507873177528382, "step": 30250 }, { "epoch": 8.589270508089696, "grad_norm": 4.428032875061035, "learning_rate": 9.141470337780301e-05, "loss": 0.03863259255886078, "step": 30260 }, { "epoch": 8.592108998013057, "grad_norm": 6.324302673339844, "learning_rate": 9.141186488787965e-05, "loss": 0.03118553161621094, "step": 30270 }, { "epoch": 8.594947487936418, "grad_norm": 8.19318962097168, "learning_rate": 9.140902639795628e-05, "loss": 0.037725919485092164, "step": 30280 }, { "epoch": 8.597785977859779, "grad_norm": 6.916101932525635, "learning_rate": 9.140618790803294e-05, "loss": 0.051826751232147215, "step": 30290 }, { "epoch": 8.60062446778314, "grad_norm": 7.639231204986572, "learning_rate": 9.140334941810958e-05, "loss": 0.055793917179107665, "step": 30300 }, { "epoch": 8.6034629577065, "grad_norm": 11.870710372924805, "learning_rate": 9.14005109281862e-05, "loss": 0.05330667495727539, "step": 30310 }, { "epoch": 8.60630144762986, "grad_norm": 6.836239814758301, "learning_rate": 9.139767243826285e-05, "loss": 0.0415511429309845, "step": 30320 }, { "epoch": 8.609139937553222, "grad_norm": 3.526616096496582, "learning_rate": 9.139483394833949e-05, "loss": 0.05651192665100098, "step": 30330 }, { "epoch": 8.611978427476583, "grad_norm": 13.539823532104492, "learning_rate": 9.139199545841613e-05, "loss": 0.03461110591888428, "step": 30340 }, { "epoch": 8.614816917399944, "grad_norm": 0.9407704472541809, "learning_rate": 9.138915696849276e-05, "loss": 0.05941576361656189, "step": 30350 }, { "epoch": 8.617655407323305, "grad_norm": 5.082376956939697, "learning_rate": 9.138631847856941e-05, "loss": 0.04816948175430298, "step": 30360 }, { "epoch": 8.620493897246664, "grad_norm": 2.490755081176758, "learning_rate": 9.138347998864605e-05, "loss": 0.05836004018783569, "step": 30370 }, { "epoch": 8.623332387170025, "grad_norm": 0.29796460270881653, "learning_rate": 9.138064149872268e-05, "loss": 0.02883499562740326, "step": 30380 }, { "epoch": 8.626170877093386, "grad_norm": 7.8096232414245605, "learning_rate": 9.137780300879932e-05, "loss": 0.04400044083595276, "step": 30390 }, { "epoch": 8.629009367016748, "grad_norm": 3.0272037982940674, "learning_rate": 9.137496451887596e-05, "loss": 0.03767255246639252, "step": 30400 }, { "epoch": 8.631847856940109, "grad_norm": 4.269240856170654, "learning_rate": 9.137212602895259e-05, "loss": 0.04836492538452149, "step": 30410 }, { "epoch": 8.634686346863468, "grad_norm": 8.511878967285156, "learning_rate": 9.136928753902923e-05, "loss": 0.06798691749572754, "step": 30420 }, { "epoch": 8.63752483678683, "grad_norm": 6.096120357513428, "learning_rate": 9.136644904910589e-05, "loss": 0.03542257845401764, "step": 30430 }, { "epoch": 8.64036332671019, "grad_norm": 2.250990390777588, "learning_rate": 9.136361055918252e-05, "loss": 0.0451587975025177, "step": 30440 }, { "epoch": 8.643201816633551, "grad_norm": 8.620338439941406, "learning_rate": 9.136077206925916e-05, "loss": 0.05207977294921875, "step": 30450 }, { "epoch": 8.646040306556912, "grad_norm": 2.5281805992126465, "learning_rate": 9.13579335793358e-05, "loss": 0.05005844235420227, "step": 30460 }, { "epoch": 8.648878796480272, "grad_norm": 6.085796356201172, "learning_rate": 9.135509508941244e-05, "loss": 0.030826079845428466, "step": 30470 }, { "epoch": 8.651717286403633, "grad_norm": 9.094573020935059, "learning_rate": 9.135225659948907e-05, "loss": 0.06682581901550293, "step": 30480 }, { "epoch": 8.654555776326994, "grad_norm": 10.547871589660645, "learning_rate": 9.134941810956572e-05, "loss": 0.03287749290466309, "step": 30490 }, { "epoch": 8.657394266250355, "grad_norm": 1.650418758392334, "learning_rate": 9.134657961964236e-05, "loss": 0.0423515647649765, "step": 30500 }, { "epoch": 8.657394266250355, "eval_accuracy": 0.956953010745851, "eval_loss": 0.1324549913406372, "eval_runtime": 39.8947, "eval_samples_per_second": 394.213, "eval_steps_per_second": 6.166, "step": 30500 }, { "epoch": 8.660232756173716, "grad_norm": 3.7851920127868652, "learning_rate": 9.134374112971899e-05, "loss": 0.045845454931259154, "step": 30510 }, { "epoch": 8.663071246097076, "grad_norm": 7.978453636169434, "learning_rate": 9.134090263979563e-05, "loss": 0.06563513278961182, "step": 30520 }, { "epoch": 8.665909736020437, "grad_norm": 8.796338081359863, "learning_rate": 9.133806414987228e-05, "loss": 0.03826130926609039, "step": 30530 }, { "epoch": 8.668748225943798, "grad_norm": 8.59777545928955, "learning_rate": 9.13352256599489e-05, "loss": 0.05177372694015503, "step": 30540 }, { "epoch": 8.671586715867159, "grad_norm": 6.59361457824707, "learning_rate": 9.133238717002554e-05, "loss": 0.035846710205078125, "step": 30550 }, { "epoch": 8.67442520579052, "grad_norm": 6.614710330963135, "learning_rate": 9.13295486801022e-05, "loss": 0.04112863838672638, "step": 30560 }, { "epoch": 8.67726369571388, "grad_norm": 0.6049400568008423, "learning_rate": 9.132671019017883e-05, "loss": 0.046689575910568236, "step": 30570 }, { "epoch": 8.68010218563724, "grad_norm": 7.010617733001709, "learning_rate": 9.132387170025547e-05, "loss": 0.026452934741973876, "step": 30580 }, { "epoch": 8.682940675560602, "grad_norm": 0.9395793080329895, "learning_rate": 9.132103321033211e-05, "loss": 0.042896679043769835, "step": 30590 }, { "epoch": 8.685779165483963, "grad_norm": 4.594294548034668, "learning_rate": 9.131819472040875e-05, "loss": 0.03038897514343262, "step": 30600 }, { "epoch": 8.688617655407324, "grad_norm": 4.4711127281188965, "learning_rate": 9.131535623048538e-05, "loss": 0.03481318354606629, "step": 30610 }, { "epoch": 8.691456145330685, "grad_norm": 0.9071669578552246, "learning_rate": 9.131251774056202e-05, "loss": 0.04252186119556427, "step": 30620 }, { "epoch": 8.694294635254044, "grad_norm": 10.158233642578125, "learning_rate": 9.130967925063868e-05, "loss": 0.028781330585479735, "step": 30630 }, { "epoch": 8.697133125177405, "grad_norm": 10.644608497619629, "learning_rate": 9.13068407607153e-05, "loss": 0.0586283266544342, "step": 30640 }, { "epoch": 8.699971615100766, "grad_norm": 2.290597677230835, "learning_rate": 9.130400227079194e-05, "loss": 0.04732573628425598, "step": 30650 }, { "epoch": 8.702810105024128, "grad_norm": 4.306064605712891, "learning_rate": 9.130116378086859e-05, "loss": 0.03248575627803803, "step": 30660 }, { "epoch": 8.705648594947489, "grad_norm": 8.390830039978027, "learning_rate": 9.129832529094521e-05, "loss": 0.06286463141441345, "step": 30670 }, { "epoch": 8.708487084870848, "grad_norm": 5.925351619720459, "learning_rate": 9.129548680102186e-05, "loss": 0.05585277080535889, "step": 30680 }, { "epoch": 8.711325574794209, "grad_norm": 1.1202750205993652, "learning_rate": 9.129264831109851e-05, "loss": 0.022962085902690887, "step": 30690 }, { "epoch": 8.71416406471757, "grad_norm": 5.8176045417785645, "learning_rate": 9.128980982117514e-05, "loss": 0.037390729784965514, "step": 30700 }, { "epoch": 8.717002554640931, "grad_norm": 10.547014236450195, "learning_rate": 9.128697133125178e-05, "loss": 0.0622535765171051, "step": 30710 }, { "epoch": 8.719841044564292, "grad_norm": 3.654615879058838, "learning_rate": 9.128413284132842e-05, "loss": 0.07196910381317138, "step": 30720 }, { "epoch": 8.722679534487652, "grad_norm": 11.086658477783203, "learning_rate": 9.128129435140506e-05, "loss": 0.0509859561920166, "step": 30730 }, { "epoch": 8.725518024411013, "grad_norm": 1.375793695449829, "learning_rate": 9.127845586148169e-05, "loss": 0.03449099063873291, "step": 30740 }, { "epoch": 8.728356514334374, "grad_norm": 3.581342935562134, "learning_rate": 9.127561737155833e-05, "loss": 0.053788882493972776, "step": 30750 }, { "epoch": 8.731195004257735, "grad_norm": 8.907295227050781, "learning_rate": 9.127277888163497e-05, "loss": 0.05027716159820557, "step": 30760 }, { "epoch": 8.734033494181096, "grad_norm": 4.0627617835998535, "learning_rate": 9.126994039171161e-05, "loss": 0.040040013194084165, "step": 30770 }, { "epoch": 8.736871984104457, "grad_norm": 8.469511985778809, "learning_rate": 9.126710190178826e-05, "loss": 0.038476786017417906, "step": 30780 }, { "epoch": 8.739710474027817, "grad_norm": 1.2669187784194946, "learning_rate": 9.12642634118649e-05, "loss": 0.041292032599449156, "step": 30790 }, { "epoch": 8.742548963951178, "grad_norm": 5.627805709838867, "learning_rate": 9.126142492194152e-05, "loss": 0.048104244470596316, "step": 30800 }, { "epoch": 8.745387453874539, "grad_norm": 3.511007070541382, "learning_rate": 9.125858643201817e-05, "loss": 0.04589286744594574, "step": 30810 }, { "epoch": 8.7482259437979, "grad_norm": 5.939490795135498, "learning_rate": 9.125574794209481e-05, "loss": 0.02923204004764557, "step": 30820 }, { "epoch": 8.751064433721261, "grad_norm": 3.235530376434326, "learning_rate": 9.125290945217145e-05, "loss": 0.04679418504238129, "step": 30830 }, { "epoch": 8.75390292364462, "grad_norm": 10.401141166687012, "learning_rate": 9.125007096224809e-05, "loss": 0.08264702558517456, "step": 30840 }, { "epoch": 8.756741413567982, "grad_norm": 7.923050403594971, "learning_rate": 9.124723247232473e-05, "loss": 0.037309563159942626, "step": 30850 }, { "epoch": 8.759579903491343, "grad_norm": 13.72253704071045, "learning_rate": 9.124439398240136e-05, "loss": 0.025329554080963136, "step": 30860 }, { "epoch": 8.762418393414704, "grad_norm": 5.763720512390137, "learning_rate": 9.1241555492478e-05, "loss": 0.03561375439167023, "step": 30870 }, { "epoch": 8.765256883338065, "grad_norm": 14.93305492401123, "learning_rate": 9.123871700255464e-05, "loss": 0.07395615577697753, "step": 30880 }, { "epoch": 8.768095373261424, "grad_norm": 0.8058933615684509, "learning_rate": 9.123587851263128e-05, "loss": 0.03478099107742309, "step": 30890 }, { "epoch": 8.770933863184785, "grad_norm": 8.433737754821777, "learning_rate": 9.123304002270792e-05, "loss": 0.06309868693351746, "step": 30900 }, { "epoch": 8.773772353108146, "grad_norm": 15.31053352355957, "learning_rate": 9.123020153278457e-05, "loss": 0.06764643788337707, "step": 30910 }, { "epoch": 8.776610843031508, "grad_norm": 4.8403096199035645, "learning_rate": 9.122736304286121e-05, "loss": 0.06397066712379455, "step": 30920 }, { "epoch": 8.779449332954869, "grad_norm": 11.884984016418457, "learning_rate": 9.122452455293784e-05, "loss": 0.07331853508949279, "step": 30930 }, { "epoch": 8.782287822878228, "grad_norm": 0.7419648170471191, "learning_rate": 9.122168606301448e-05, "loss": 0.02677904665470123, "step": 30940 }, { "epoch": 8.785126312801589, "grad_norm": 3.4939794540405273, "learning_rate": 9.121884757309112e-05, "loss": 0.04078682959079742, "step": 30950 }, { "epoch": 8.78796480272495, "grad_norm": 4.183099746704102, "learning_rate": 9.121600908316776e-05, "loss": 0.060636645555496214, "step": 30960 }, { "epoch": 8.790803292648311, "grad_norm": 5.435800075531006, "learning_rate": 9.12131705932444e-05, "loss": 0.056917828321456906, "step": 30970 }, { "epoch": 8.793641782571672, "grad_norm": 7.0630412101745605, "learning_rate": 9.121033210332104e-05, "loss": 0.04384594559669495, "step": 30980 }, { "epoch": 8.796480272495032, "grad_norm": 5.202294826507568, "learning_rate": 9.120749361339767e-05, "loss": 0.05628109574317932, "step": 30990 }, { "epoch": 8.799318762418393, "grad_norm": 2.1396195888519287, "learning_rate": 9.120465512347431e-05, "loss": 0.07933416962623596, "step": 31000 }, { "epoch": 8.799318762418393, "eval_accuracy": 0.9562535766516183, "eval_loss": 0.131442591547966, "eval_runtime": 36.6649, "eval_samples_per_second": 428.938, "eval_steps_per_second": 6.709, "step": 31000 }, { "epoch": 8.802157252341754, "grad_norm": 6.989975929260254, "learning_rate": 9.120181663355095e-05, "loss": 0.05137381553649902, "step": 31010 }, { "epoch": 8.804995742265115, "grad_norm": 9.497977256774902, "learning_rate": 9.11989781436276e-05, "loss": 0.04207700192928314, "step": 31020 }, { "epoch": 8.807834232188476, "grad_norm": 6.491014003753662, "learning_rate": 9.119613965370424e-05, "loss": 0.05536182522773743, "step": 31030 }, { "epoch": 8.810672722111837, "grad_norm": 5.461264610290527, "learning_rate": 9.119330116378088e-05, "loss": 0.03556413650512695, "step": 31040 }, { "epoch": 8.813511212035197, "grad_norm": 4.604427337646484, "learning_rate": 9.119046267385752e-05, "loss": 0.050791627168655394, "step": 31050 }, { "epoch": 8.816349701958558, "grad_norm": 7.259810924530029, "learning_rate": 9.118762418393415e-05, "loss": 0.05528286099433899, "step": 31060 }, { "epoch": 8.819188191881919, "grad_norm": 9.48355484008789, "learning_rate": 9.118478569401079e-05, "loss": 0.0376103937625885, "step": 31070 }, { "epoch": 8.82202668180528, "grad_norm": 3.3178870677948, "learning_rate": 9.118194720408743e-05, "loss": 0.04858998656272888, "step": 31080 }, { "epoch": 8.824865171728641, "grad_norm": 6.453464508056641, "learning_rate": 9.117910871416407e-05, "loss": 0.037974387407302856, "step": 31090 }, { "epoch": 8.827703661652, "grad_norm": 8.403301239013672, "learning_rate": 9.117627022424071e-05, "loss": 0.03437066674232483, "step": 31100 }, { "epoch": 8.830542151575361, "grad_norm": 2.3387739658355713, "learning_rate": 9.117343173431735e-05, "loss": 0.05114240050315857, "step": 31110 }, { "epoch": 8.833380641498723, "grad_norm": 1.7305792570114136, "learning_rate": 9.117059324439398e-05, "loss": 0.04011023640632629, "step": 31120 }, { "epoch": 8.836219131422084, "grad_norm": 8.259313583374023, "learning_rate": 9.116775475447062e-05, "loss": 0.046813356876373294, "step": 31130 }, { "epoch": 8.839057621345445, "grad_norm": 1.5456148386001587, "learning_rate": 9.116491626454726e-05, "loss": 0.02553802728652954, "step": 31140 }, { "epoch": 8.841896111268806, "grad_norm": 4.752388000488281, "learning_rate": 9.11620777746239e-05, "loss": 0.038710737228393556, "step": 31150 }, { "epoch": 8.844734601192165, "grad_norm": 12.646970748901367, "learning_rate": 9.115923928470055e-05, "loss": 0.04051471054553986, "step": 31160 }, { "epoch": 8.847573091115526, "grad_norm": 1.2890928983688354, "learning_rate": 9.115640079477719e-05, "loss": 0.04144401252269745, "step": 31170 }, { "epoch": 8.850411581038887, "grad_norm": 7.739059925079346, "learning_rate": 9.115356230485383e-05, "loss": 0.05310244560241699, "step": 31180 }, { "epoch": 8.853250070962249, "grad_norm": 3.445984125137329, "learning_rate": 9.115072381493046e-05, "loss": 0.04254533350467682, "step": 31190 }, { "epoch": 8.85608856088561, "grad_norm": 9.581927299499512, "learning_rate": 9.11478853250071e-05, "loss": 0.0633292555809021, "step": 31200 }, { "epoch": 8.858927050808969, "grad_norm": 10.765357971191406, "learning_rate": 9.114504683508374e-05, "loss": 0.06697494983673095, "step": 31210 }, { "epoch": 8.86176554073233, "grad_norm": 1.929891586303711, "learning_rate": 9.114220834516037e-05, "loss": 0.05385153889656067, "step": 31220 }, { "epoch": 8.864604030655691, "grad_norm": 6.623532295227051, "learning_rate": 9.113936985523702e-05, "loss": 0.06776501536369324, "step": 31230 }, { "epoch": 8.867442520579052, "grad_norm": 2.7128143310546875, "learning_rate": 9.113653136531366e-05, "loss": 0.06694952845573425, "step": 31240 }, { "epoch": 8.870281010502413, "grad_norm": 3.3999416828155518, "learning_rate": 9.113369287539029e-05, "loss": 0.04315488934516907, "step": 31250 }, { "epoch": 8.873119500425773, "grad_norm": 12.498030662536621, "learning_rate": 9.113085438546693e-05, "loss": 0.05693553686141968, "step": 31260 }, { "epoch": 8.875957990349134, "grad_norm": 18.40390396118164, "learning_rate": 9.112801589554357e-05, "loss": 0.05248856544494629, "step": 31270 }, { "epoch": 8.878796480272495, "grad_norm": 9.249555587768555, "learning_rate": 9.112517740562022e-05, "loss": 0.038873481750488284, "step": 31280 }, { "epoch": 8.881634970195856, "grad_norm": 12.084673881530762, "learning_rate": 9.112233891569686e-05, "loss": 0.030299532413482665, "step": 31290 }, { "epoch": 8.884473460119217, "grad_norm": 4.595461845397949, "learning_rate": 9.11195004257735e-05, "loss": 0.03169003129005432, "step": 31300 }, { "epoch": 8.887311950042577, "grad_norm": 8.002089500427246, "learning_rate": 9.111666193585014e-05, "loss": 0.0564944326877594, "step": 31310 }, { "epoch": 8.890150439965938, "grad_norm": 0.7920204401016235, "learning_rate": 9.111382344592677e-05, "loss": 0.054355663061141965, "step": 31320 }, { "epoch": 8.892988929889299, "grad_norm": 3.0147061347961426, "learning_rate": 9.111098495600341e-05, "loss": 0.043650656938552856, "step": 31330 }, { "epoch": 8.89582741981266, "grad_norm": 9.262863159179688, "learning_rate": 9.110814646608005e-05, "loss": 0.049213367700576785, "step": 31340 }, { "epoch": 8.898665909736021, "grad_norm": 8.359726905822754, "learning_rate": 9.110530797615668e-05, "loss": 0.06825129389762878, "step": 31350 }, { "epoch": 8.90150439965938, "grad_norm": 8.55466365814209, "learning_rate": 9.110246948623333e-05, "loss": 0.03654097318649292, "step": 31360 }, { "epoch": 8.904342889582741, "grad_norm": 7.481513500213623, "learning_rate": 9.109963099630997e-05, "loss": 0.060463827848434445, "step": 31370 }, { "epoch": 8.907181379506103, "grad_norm": 11.288320541381836, "learning_rate": 9.10967925063866e-05, "loss": 0.04659051895141601, "step": 31380 }, { "epoch": 8.910019869429464, "grad_norm": 13.42967414855957, "learning_rate": 9.109395401646324e-05, "loss": 0.056679540872573854, "step": 31390 }, { "epoch": 8.912858359352825, "grad_norm": 2.9953291416168213, "learning_rate": 9.109111552653989e-05, "loss": 0.05546015501022339, "step": 31400 }, { "epoch": 8.915696849276186, "grad_norm": 9.741242408752441, "learning_rate": 9.108827703661653e-05, "loss": 0.04378204345703125, "step": 31410 }, { "epoch": 8.918535339199545, "grad_norm": 7.4349446296691895, "learning_rate": 9.108543854669315e-05, "loss": 0.052530455589294436, "step": 31420 }, { "epoch": 8.921373829122906, "grad_norm": 1.7641465663909912, "learning_rate": 9.108260005676981e-05, "loss": 0.045007678866386416, "step": 31430 }, { "epoch": 8.924212319046267, "grad_norm": 0.9988229274749756, "learning_rate": 9.107976156684645e-05, "loss": 0.0408647358417511, "step": 31440 }, { "epoch": 8.927050808969629, "grad_norm": 3.8731284141540527, "learning_rate": 9.107692307692308e-05, "loss": 0.0400809645652771, "step": 31450 }, { "epoch": 8.92988929889299, "grad_norm": 9.588872909545898, "learning_rate": 9.107408458699972e-05, "loss": 0.052127277851104735, "step": 31460 }, { "epoch": 8.932727788816349, "grad_norm": 15.750364303588867, "learning_rate": 9.107124609707636e-05, "loss": 0.0724337100982666, "step": 31470 }, { "epoch": 8.93556627873971, "grad_norm": 5.610533237457275, "learning_rate": 9.106840760715299e-05, "loss": 0.04589313268661499, "step": 31480 }, { "epoch": 8.938404768663071, "grad_norm": 7.699913501739502, "learning_rate": 9.106556911722964e-05, "loss": 0.05013878345489502, "step": 31490 }, { "epoch": 8.941243258586432, "grad_norm": 10.831008911132812, "learning_rate": 9.106273062730629e-05, "loss": 0.04161070883274078, "step": 31500 }, { "epoch": 8.941243258586432, "eval_accuracy": 0.9550454632161252, "eval_loss": 0.13428544998168945, "eval_runtime": 36.5484, "eval_samples_per_second": 430.306, "eval_steps_per_second": 6.731, "step": 31500 }, { "epoch": 8.944081748509793, "grad_norm": 3.586207866668701, "learning_rate": 9.105989213738291e-05, "loss": 0.031861457228660586, "step": 31510 }, { "epoch": 8.946920238433153, "grad_norm": 18.6157283782959, "learning_rate": 9.105705364745955e-05, "loss": 0.059053951501846315, "step": 31520 }, { "epoch": 8.949758728356514, "grad_norm": 9.792448997497559, "learning_rate": 9.10542151575362e-05, "loss": 0.04085907936096191, "step": 31530 }, { "epoch": 8.952597218279875, "grad_norm": 7.631518363952637, "learning_rate": 9.105137666761284e-05, "loss": 0.046115002036094664, "step": 31540 }, { "epoch": 8.955435708203236, "grad_norm": 15.854352951049805, "learning_rate": 9.104853817768947e-05, "loss": 0.060493475198745726, "step": 31550 }, { "epoch": 8.958274198126597, "grad_norm": 8.724392890930176, "learning_rate": 9.104569968776612e-05, "loss": 0.0477115124464035, "step": 31560 }, { "epoch": 8.961112688049958, "grad_norm": 3.875062942504883, "learning_rate": 9.104286119784276e-05, "loss": 0.04567428827285767, "step": 31570 }, { "epoch": 8.963951177973318, "grad_norm": 7.746779918670654, "learning_rate": 9.104002270791939e-05, "loss": 0.05544371604919433, "step": 31580 }, { "epoch": 8.966789667896679, "grad_norm": 6.027293682098389, "learning_rate": 9.103718421799603e-05, "loss": 0.037753325700759885, "step": 31590 }, { "epoch": 8.96962815782004, "grad_norm": 7.421422004699707, "learning_rate": 9.103434572807267e-05, "loss": 0.05751690864562988, "step": 31600 }, { "epoch": 8.972466647743401, "grad_norm": 2.450174570083618, "learning_rate": 9.10315072381493e-05, "loss": 0.05104315876960754, "step": 31610 }, { "epoch": 8.975305137666762, "grad_norm": 2.8282415866851807, "learning_rate": 9.102866874822595e-05, "loss": 0.04864087402820587, "step": 31620 }, { "epoch": 8.978143627590121, "grad_norm": 8.558664321899414, "learning_rate": 9.10258302583026e-05, "loss": 0.04701390862464905, "step": 31630 }, { "epoch": 8.980982117513483, "grad_norm": 4.145365238189697, "learning_rate": 9.102299176837922e-05, "loss": 0.040590491890907285, "step": 31640 }, { "epoch": 8.983820607436844, "grad_norm": 2.954225540161133, "learning_rate": 9.102015327845587e-05, "loss": 0.05479607582092285, "step": 31650 }, { "epoch": 8.986659097360205, "grad_norm": 9.534770965576172, "learning_rate": 9.10173147885325e-05, "loss": 0.06734203100204468, "step": 31660 }, { "epoch": 8.989497587283566, "grad_norm": 8.998602867126465, "learning_rate": 9.101447629860915e-05, "loss": 0.05343976616859436, "step": 31670 }, { "epoch": 8.992336077206925, "grad_norm": 6.742722511291504, "learning_rate": 9.101163780868578e-05, "loss": 0.038447698950767516, "step": 31680 }, { "epoch": 8.995174567130286, "grad_norm": 10.084271430969238, "learning_rate": 9.100879931876243e-05, "loss": 0.047462275624275206, "step": 31690 }, { "epoch": 8.998013057053647, "grad_norm": 6.214217185974121, "learning_rate": 9.100596082883906e-05, "loss": 0.06680863499641418, "step": 31700 }, { "epoch": 9.000851546977009, "grad_norm": 1.4589204788208008, "learning_rate": 9.10031223389157e-05, "loss": 0.04198989272117615, "step": 31710 }, { "epoch": 9.00369003690037, "grad_norm": 1.0230083465576172, "learning_rate": 9.100028384899234e-05, "loss": 0.018249234557151793, "step": 31720 }, { "epoch": 9.006528526823729, "grad_norm": 7.67701530456543, "learning_rate": 9.099744535906898e-05, "loss": 0.027402028441429138, "step": 31730 }, { "epoch": 9.00936701674709, "grad_norm": 7.839910984039307, "learning_rate": 9.099460686914561e-05, "loss": 0.03484986424446106, "step": 31740 }, { "epoch": 9.012205506670451, "grad_norm": 11.999646186828613, "learning_rate": 9.099176837922225e-05, "loss": 0.050564533472061156, "step": 31750 }, { "epoch": 9.015043996593812, "grad_norm": 6.184772968292236, "learning_rate": 9.098892988929891e-05, "loss": 0.038797608017921446, "step": 31760 }, { "epoch": 9.017882486517173, "grad_norm": 6.395945072174072, "learning_rate": 9.098609139937553e-05, "loss": 0.06241859197616577, "step": 31770 }, { "epoch": 9.020720976440534, "grad_norm": 5.369216442108154, "learning_rate": 9.098325290945218e-05, "loss": 0.022164538502693176, "step": 31780 }, { "epoch": 9.023559466363894, "grad_norm": 7.291435241699219, "learning_rate": 9.098041441952882e-05, "loss": 0.026201301813125612, "step": 31790 }, { "epoch": 9.026397956287255, "grad_norm": 5.885504245758057, "learning_rate": 9.097757592960545e-05, "loss": 0.03249071538448334, "step": 31800 }, { "epoch": 9.029236446210616, "grad_norm": 7.000491619110107, "learning_rate": 9.097473743968209e-05, "loss": 0.03307543992996216, "step": 31810 }, { "epoch": 9.032074936133977, "grad_norm": 3.084064245223999, "learning_rate": 9.097189894975874e-05, "loss": 0.035820034146308896, "step": 31820 }, { "epoch": 9.034913426057338, "grad_norm": 4.166110038757324, "learning_rate": 9.096906045983537e-05, "loss": 0.016914038360118865, "step": 31830 }, { "epoch": 9.037751915980698, "grad_norm": 8.605620384216309, "learning_rate": 9.096622196991201e-05, "loss": 0.03397142291069031, "step": 31840 }, { "epoch": 9.040590405904059, "grad_norm": 7.653633117675781, "learning_rate": 9.096338347998865e-05, "loss": 0.027692413330078124, "step": 31850 }, { "epoch": 9.04342889582742, "grad_norm": 0.9327400922775269, "learning_rate": 9.09605449900653e-05, "loss": 0.03240158259868622, "step": 31860 }, { "epoch": 9.046267385750781, "grad_norm": 12.380126953125, "learning_rate": 9.095770650014192e-05, "loss": 0.05096262097358704, "step": 31870 }, { "epoch": 9.049105875674142, "grad_norm": 7.328427314758301, "learning_rate": 9.095486801021856e-05, "loss": 0.0532070517539978, "step": 31880 }, { "epoch": 9.051944365597501, "grad_norm": 9.894255638122559, "learning_rate": 9.095202952029522e-05, "loss": 0.03881247639656067, "step": 31890 }, { "epoch": 9.054782855520862, "grad_norm": 5.252124309539795, "learning_rate": 9.094919103037185e-05, "loss": 0.024706484377384187, "step": 31900 }, { "epoch": 9.057621345444224, "grad_norm": 0.8562063574790955, "learning_rate": 9.094635254044849e-05, "loss": 0.04025550186634064, "step": 31910 }, { "epoch": 9.060459835367585, "grad_norm": 7.7702178955078125, "learning_rate": 9.094351405052513e-05, "loss": 0.036770790815353394, "step": 31920 }, { "epoch": 9.063298325290946, "grad_norm": 10.77419662475586, "learning_rate": 9.094067556060176e-05, "loss": 0.06690344214439392, "step": 31930 }, { "epoch": 9.066136815214305, "grad_norm": 7.208384990692139, "learning_rate": 9.09378370706784e-05, "loss": 0.02831999063491821, "step": 31940 }, { "epoch": 9.068975305137666, "grad_norm": 2.0755372047424316, "learning_rate": 9.093499858075504e-05, "loss": 0.04241290092468262, "step": 31950 }, { "epoch": 9.071813795061027, "grad_norm": 9.709894180297852, "learning_rate": 9.093216009083168e-05, "loss": 0.03581871390342713, "step": 31960 }, { "epoch": 9.074652284984388, "grad_norm": 8.585312843322754, "learning_rate": 9.092932160090832e-05, "loss": 0.02075933963060379, "step": 31970 }, { "epoch": 9.07749077490775, "grad_norm": 4.5492777824401855, "learning_rate": 9.092648311098496e-05, "loss": 0.03713195025920868, "step": 31980 }, { "epoch": 9.08032926483111, "grad_norm": 9.842798233032227, "learning_rate": 9.09236446210616e-05, "loss": 0.05139003992080689, "step": 31990 }, { "epoch": 9.08316775475447, "grad_norm": 2.0159356594085693, "learning_rate": 9.092080613113823e-05, "loss": 0.046948489546775815, "step": 32000 }, { "epoch": 9.08316775475447, "eval_accuracy": 0.9597507471227825, "eval_loss": 0.12335571646690369, "eval_runtime": 32.8542, "eval_samples_per_second": 478.691, "eval_steps_per_second": 7.488, "step": 32000 }, { "epoch": 9.086006244677831, "grad_norm": 3.430224895477295, "learning_rate": 9.091796764121487e-05, "loss": 0.020360584557056426, "step": 32010 }, { "epoch": 9.088844734601192, "grad_norm": 1.7622594833374023, "learning_rate": 9.091512915129153e-05, "loss": 0.037364518642425536, "step": 32020 }, { "epoch": 9.091683224524553, "grad_norm": 6.31217622756958, "learning_rate": 9.091229066136816e-05, "loss": 0.03992066979408264, "step": 32030 }, { "epoch": 9.094521714447914, "grad_norm": 2.618670701980591, "learning_rate": 9.09094521714448e-05, "loss": 0.053883600234985354, "step": 32040 }, { "epoch": 9.097360204371274, "grad_norm": 6.833083152770996, "learning_rate": 9.090661368152144e-05, "loss": 0.058245742321014406, "step": 32050 }, { "epoch": 9.100198694294635, "grad_norm": 4.540513515472412, "learning_rate": 9.090377519159807e-05, "loss": 0.0410744309425354, "step": 32060 }, { "epoch": 9.103037184217996, "grad_norm": 5.8971991539001465, "learning_rate": 9.090093670167471e-05, "loss": 0.05181349515914917, "step": 32070 }, { "epoch": 9.105875674141357, "grad_norm": 12.110474586486816, "learning_rate": 9.089809821175135e-05, "loss": 0.029780897498130798, "step": 32080 }, { "epoch": 9.108714164064718, "grad_norm": 10.550119400024414, "learning_rate": 9.089525972182799e-05, "loss": 0.016818594932556153, "step": 32090 }, { "epoch": 9.111552653988078, "grad_norm": 13.275566101074219, "learning_rate": 9.089242123190463e-05, "loss": 0.03771753311157226, "step": 32100 }, { "epoch": 9.114391143911439, "grad_norm": 2.9394333362579346, "learning_rate": 9.088958274198127e-05, "loss": 0.02406800091266632, "step": 32110 }, { "epoch": 9.1172296338348, "grad_norm": 1.8092851638793945, "learning_rate": 9.088674425205792e-05, "loss": 0.02586439847946167, "step": 32120 }, { "epoch": 9.120068123758161, "grad_norm": 6.3781890869140625, "learning_rate": 9.088390576213454e-05, "loss": 0.04452702105045318, "step": 32130 }, { "epoch": 9.122906613681522, "grad_norm": 10.13783073425293, "learning_rate": 9.088106727221118e-05, "loss": 0.03045453429222107, "step": 32140 }, { "epoch": 9.125745103604881, "grad_norm": 0.9327914118766785, "learning_rate": 9.087822878228783e-05, "loss": 0.03160166442394256, "step": 32150 }, { "epoch": 9.128583593528242, "grad_norm": 5.085025787353516, "learning_rate": 9.087539029236447e-05, "loss": 0.03440426588058472, "step": 32160 }, { "epoch": 9.131422083451604, "grad_norm": 3.798240900039673, "learning_rate": 9.087255180244111e-05, "loss": 0.019728973507881165, "step": 32170 }, { "epoch": 9.134260573374965, "grad_norm": 8.315292358398438, "learning_rate": 9.086971331251775e-05, "loss": 0.026944386959075927, "step": 32180 }, { "epoch": 9.137099063298326, "grad_norm": 7.366305351257324, "learning_rate": 9.086687482259438e-05, "loss": 0.056515824794769284, "step": 32190 }, { "epoch": 9.139937553221687, "grad_norm": 7.898773670196533, "learning_rate": 9.086403633267102e-05, "loss": 0.04857366681098938, "step": 32200 }, { "epoch": 9.142776043145046, "grad_norm": 1.398860216140747, "learning_rate": 9.086119784274766e-05, "loss": 0.05141681432723999, "step": 32210 }, { "epoch": 9.145614533068407, "grad_norm": 12.657358169555664, "learning_rate": 9.08583593528243e-05, "loss": 0.04619079828262329, "step": 32220 }, { "epoch": 9.148453022991768, "grad_norm": 6.277275562286377, "learning_rate": 9.085552086290094e-05, "loss": 0.03843480944633484, "step": 32230 }, { "epoch": 9.15129151291513, "grad_norm": 10.31881332397461, "learning_rate": 9.085268237297758e-05, "loss": 0.049346122145652774, "step": 32240 }, { "epoch": 9.15413000283849, "grad_norm": 7.293776512145996, "learning_rate": 9.084984388305423e-05, "loss": 0.04325521290302277, "step": 32250 }, { "epoch": 9.15696849276185, "grad_norm": 8.397859573364258, "learning_rate": 9.084700539313085e-05, "loss": 0.04536004662513733, "step": 32260 }, { "epoch": 9.159806982685211, "grad_norm": 5.810582637786865, "learning_rate": 9.08441669032075e-05, "loss": 0.03206147253513336, "step": 32270 }, { "epoch": 9.162645472608572, "grad_norm": 9.789913177490234, "learning_rate": 9.084132841328414e-05, "loss": 0.045726227760314944, "step": 32280 }, { "epoch": 9.165483962531933, "grad_norm": 1.4725143909454346, "learning_rate": 9.083848992336078e-05, "loss": 0.029670083522796632, "step": 32290 }, { "epoch": 9.168322452455294, "grad_norm": 1.7058342695236206, "learning_rate": 9.083565143343742e-05, "loss": 0.029241159558296204, "step": 32300 }, { "epoch": 9.171160942378654, "grad_norm": 4.869115352630615, "learning_rate": 9.083281294351406e-05, "loss": 0.040372779965400694, "step": 32310 }, { "epoch": 9.173999432302015, "grad_norm": 13.250131607055664, "learning_rate": 9.082997445359069e-05, "loss": 0.053754180669784546, "step": 32320 }, { "epoch": 9.176837922225376, "grad_norm": 3.084260940551758, "learning_rate": 9.082713596366733e-05, "loss": 0.026004487276077272, "step": 32330 }, { "epoch": 9.179676412148737, "grad_norm": 7.091236591339111, "learning_rate": 9.082429747374397e-05, "loss": 0.029289424419403076, "step": 32340 }, { "epoch": 9.182514902072098, "grad_norm": 9.52546501159668, "learning_rate": 9.082145898382061e-05, "loss": 0.03566540479660034, "step": 32350 }, { "epoch": 9.18535339199546, "grad_norm": 2.6723852157592773, "learning_rate": 9.081862049389725e-05, "loss": 0.030942189693450927, "step": 32360 }, { "epoch": 9.188191881918819, "grad_norm": 2.2812659740448, "learning_rate": 9.08157820039739e-05, "loss": 0.018845225870609283, "step": 32370 }, { "epoch": 9.19103037184218, "grad_norm": 1.556849718093872, "learning_rate": 9.081294351405054e-05, "loss": 0.018857795000076293, "step": 32380 }, { "epoch": 9.19386886176554, "grad_norm": 2.601822853088379, "learning_rate": 9.081010502412716e-05, "loss": 0.01543736904859543, "step": 32390 }, { "epoch": 9.196707351688902, "grad_norm": 3.6239283084869385, "learning_rate": 9.08072665342038e-05, "loss": 0.029795706272125244, "step": 32400 }, { "epoch": 9.199545841612263, "grad_norm": 2.4916577339172363, "learning_rate": 9.080442804428045e-05, "loss": 0.03616428673267365, "step": 32410 }, { "epoch": 9.202384331535622, "grad_norm": 2.4417362213134766, "learning_rate": 9.080158955435709e-05, "loss": 0.05504724383354187, "step": 32420 }, { "epoch": 9.205222821458984, "grad_norm": 7.166131496429443, "learning_rate": 9.079875106443373e-05, "loss": 0.038576582074165346, "step": 32430 }, { "epoch": 9.208061311382345, "grad_norm": 11.00830078125, "learning_rate": 9.07961964235027e-05, "loss": 0.06214960813522339, "step": 32440 }, { "epoch": 9.210899801305706, "grad_norm": 12.441069602966309, "learning_rate": 9.079335793357934e-05, "loss": 0.05436771512031555, "step": 32450 }, { "epoch": 9.213738291229067, "grad_norm": 9.336738586425781, "learning_rate": 9.079051944365598e-05, "loss": 0.04007699489593506, "step": 32460 }, { "epoch": 9.216576781152426, "grad_norm": 5.197333812713623, "learning_rate": 9.078768095373262e-05, "loss": 0.030173128843307494, "step": 32470 }, { "epoch": 9.219415271075787, "grad_norm": 7.479092597961426, "learning_rate": 9.078484246380926e-05, "loss": 0.04533488154411316, "step": 32480 }, { "epoch": 9.222253760999148, "grad_norm": 7.2428669929504395, "learning_rate": 9.07820039738859e-05, "loss": 0.04252261519432068, "step": 32490 }, { "epoch": 9.22509225092251, "grad_norm": 1.520056128501892, "learning_rate": 9.077916548396253e-05, "loss": 0.04779677987098694, "step": 32500 }, { "epoch": 9.22509225092251, "eval_accuracy": 0.9626756533350289, "eval_loss": 0.11282890290021896, "eval_runtime": 34.4707, "eval_samples_per_second": 456.243, "eval_steps_per_second": 7.137, "step": 32500 }, { "epoch": 9.22793074084587, "grad_norm": 3.61552357673645, "learning_rate": 9.077632699403917e-05, "loss": 0.02722768485546112, "step": 32510 }, { "epoch": 9.23076923076923, "grad_norm": 3.5022122859954834, "learning_rate": 9.077348850411581e-05, "loss": 0.03738508820533752, "step": 32520 }, { "epoch": 9.233607720692591, "grad_norm": 9.099479675292969, "learning_rate": 9.077065001419246e-05, "loss": 0.04983468651771546, "step": 32530 }, { "epoch": 9.236446210615952, "grad_norm": 5.728768825531006, "learning_rate": 9.07678115242691e-05, "loss": 0.035278132557868956, "step": 32540 }, { "epoch": 9.239284700539313, "grad_norm": 1.33148193359375, "learning_rate": 9.076497303434574e-05, "loss": 0.04145012199878693, "step": 32550 }, { "epoch": 9.242123190462674, "grad_norm": 16.933958053588867, "learning_rate": 9.076213454442237e-05, "loss": 0.041950976848602294, "step": 32560 }, { "epoch": 9.244961680386035, "grad_norm": 12.994586944580078, "learning_rate": 9.075929605449901e-05, "loss": 0.05546303391456604, "step": 32570 }, { "epoch": 9.247800170309395, "grad_norm": 5.706315517425537, "learning_rate": 9.075645756457565e-05, "loss": 0.03730215728282928, "step": 32580 }, { "epoch": 9.250638660232756, "grad_norm": 4.581672668457031, "learning_rate": 9.075361907465229e-05, "loss": 0.04317361116409302, "step": 32590 }, { "epoch": 9.253477150156117, "grad_norm": 5.51473331451416, "learning_rate": 9.075078058472893e-05, "loss": 0.019707317650318145, "step": 32600 }, { "epoch": 9.256315640079478, "grad_norm": 3.4004223346710205, "learning_rate": 9.074794209480557e-05, "loss": 0.033481156826019286, "step": 32610 }, { "epoch": 9.25915413000284, "grad_norm": 2.405076265335083, "learning_rate": 9.074510360488221e-05, "loss": 0.030858239531517027, "step": 32620 }, { "epoch": 9.261992619926199, "grad_norm": 3.090541124343872, "learning_rate": 9.074226511495884e-05, "loss": 0.04065389931201935, "step": 32630 }, { "epoch": 9.26483110984956, "grad_norm": 10.210100173950195, "learning_rate": 9.073942662503548e-05, "loss": 0.043060287833213806, "step": 32640 }, { "epoch": 9.26766959977292, "grad_norm": 0.9598020911216736, "learning_rate": 9.073658813511212e-05, "loss": 0.040640637278556824, "step": 32650 }, { "epoch": 9.270508089696282, "grad_norm": 3.916776657104492, "learning_rate": 9.073374964518877e-05, "loss": 0.058247816562652585, "step": 32660 }, { "epoch": 9.273346579619643, "grad_norm": 1.0415369272232056, "learning_rate": 9.073091115526541e-05, "loss": 0.03692522346973419, "step": 32670 }, { "epoch": 9.276185069543002, "grad_norm": 10.626914978027344, "learning_rate": 9.072807266534205e-05, "loss": 0.015696039795875548, "step": 32680 }, { "epoch": 9.279023559466363, "grad_norm": 8.792520523071289, "learning_rate": 9.072523417541868e-05, "loss": 0.0338861346244812, "step": 32690 }, { "epoch": 9.281862049389725, "grad_norm": 3.98820424079895, "learning_rate": 9.072239568549532e-05, "loss": 0.020896807312965393, "step": 32700 }, { "epoch": 9.284700539313086, "grad_norm": 2.347292900085449, "learning_rate": 9.071955719557196e-05, "loss": 0.03813081681728363, "step": 32710 }, { "epoch": 9.287539029236447, "grad_norm": 1.616136908531189, "learning_rate": 9.07167187056486e-05, "loss": 0.022858905792236327, "step": 32720 }, { "epoch": 9.290377519159806, "grad_norm": 1.4422340393066406, "learning_rate": 9.071388021572523e-05, "loss": 0.03144991397857666, "step": 32730 }, { "epoch": 9.293216009083167, "grad_norm": 5.7519683837890625, "learning_rate": 9.071104172580188e-05, "loss": 0.053111737966537474, "step": 32740 }, { "epoch": 9.296054499006528, "grad_norm": 5.419184684753418, "learning_rate": 9.070820323587852e-05, "loss": 0.023108962178230285, "step": 32750 }, { "epoch": 9.29889298892989, "grad_norm": 8.902000427246094, "learning_rate": 9.070536474595515e-05, "loss": 0.04625536203384399, "step": 32760 }, { "epoch": 9.30173147885325, "grad_norm": 11.900615692138672, "learning_rate": 9.07025262560318e-05, "loss": 0.038126188516616824, "step": 32770 }, { "epoch": 9.304569968776612, "grad_norm": 1.768319845199585, "learning_rate": 9.069968776610844e-05, "loss": 0.0440629631280899, "step": 32780 }, { "epoch": 9.307408458699971, "grad_norm": 14.587743759155273, "learning_rate": 9.069684927618506e-05, "loss": 0.055778682231903076, "step": 32790 }, { "epoch": 9.310246948623332, "grad_norm": 6.891566276550293, "learning_rate": 9.069401078626172e-05, "loss": 0.02838665246963501, "step": 32800 }, { "epoch": 9.313085438546693, "grad_norm": 3.50175404548645, "learning_rate": 9.069117229633836e-05, "loss": 0.015564295649528503, "step": 32810 }, { "epoch": 9.315923928470054, "grad_norm": 9.1006441116333, "learning_rate": 9.068833380641499e-05, "loss": 0.03435078263282776, "step": 32820 }, { "epoch": 9.318762418393415, "grad_norm": 7.961798667907715, "learning_rate": 9.068549531649163e-05, "loss": 0.04204523861408234, "step": 32830 }, { "epoch": 9.321600908316775, "grad_norm": 7.220952987670898, "learning_rate": 9.068265682656827e-05, "loss": 0.03828922212123871, "step": 32840 }, { "epoch": 9.324439398240136, "grad_norm": 5.3738179206848145, "learning_rate": 9.067981833664491e-05, "loss": 0.0561903715133667, "step": 32850 }, { "epoch": 9.327277888163497, "grad_norm": 9.843934059143066, "learning_rate": 9.067697984672154e-05, "loss": 0.03359183669090271, "step": 32860 }, { "epoch": 9.330116378086858, "grad_norm": 10.17780876159668, "learning_rate": 9.06741413567982e-05, "loss": 0.053371214866638185, "step": 32870 }, { "epoch": 9.33295486801022, "grad_norm": 4.117978096008301, "learning_rate": 9.067130286687484e-05, "loss": 0.034124940633773804, "step": 32880 }, { "epoch": 9.335793357933579, "grad_norm": 9.051239967346191, "learning_rate": 9.066846437695146e-05, "loss": 0.034045299887657164, "step": 32890 }, { "epoch": 9.33863184785694, "grad_norm": 3.954942226409912, "learning_rate": 9.06656258870281e-05, "loss": 0.02651660442352295, "step": 32900 }, { "epoch": 9.3414703377803, "grad_norm": 6.418094635009766, "learning_rate": 9.066278739710475e-05, "loss": 0.043460071086883545, "step": 32910 }, { "epoch": 9.344308827703662, "grad_norm": 7.711916446685791, "learning_rate": 9.065994890718137e-05, "loss": 0.053401333093643186, "step": 32920 }, { "epoch": 9.347147317627023, "grad_norm": 14.13762378692627, "learning_rate": 9.065711041725802e-05, "loss": 0.06232413649559021, "step": 32930 }, { "epoch": 9.349985807550382, "grad_norm": 6.756962776184082, "learning_rate": 9.065427192733467e-05, "loss": 0.07275983691215515, "step": 32940 }, { "epoch": 9.352824297473743, "grad_norm": 4.084293365478516, "learning_rate": 9.06514334374113e-05, "loss": 0.031978747248649596, "step": 32950 }, { "epoch": 9.355662787397105, "grad_norm": 2.337986469268799, "learning_rate": 9.064859494748794e-05, "loss": 0.021571289002895355, "step": 32960 }, { "epoch": 9.358501277320466, "grad_norm": 6.329718589782715, "learning_rate": 9.064575645756458e-05, "loss": 0.04712373316287995, "step": 32970 }, { "epoch": 9.361339767243827, "grad_norm": 10.265377044677734, "learning_rate": 9.064291796764122e-05, "loss": 0.056342899799346924, "step": 32980 }, { "epoch": 9.364178257167188, "grad_norm": 2.9421544075012207, "learning_rate": 9.064007947771785e-05, "loss": 0.04247595965862274, "step": 32990 }, { "epoch": 9.367016747090547, "grad_norm": 1.8413949012756348, "learning_rate": 9.06372409877945e-05, "loss": 0.03341174125671387, "step": 33000 }, { "epoch": 9.367016747090547, "eval_accuracy": 0.9541552743689197, "eval_loss": 0.1444767266511917, "eval_runtime": 35.4021, "eval_samples_per_second": 444.239, "eval_steps_per_second": 6.949, "step": 33000 }, { "epoch": 9.369855237013908, "grad_norm": 9.4076566696167, "learning_rate": 9.063440249787115e-05, "loss": 0.031842243671417234, "step": 33010 }, { "epoch": 9.37269372693727, "grad_norm": 7.114620208740234, "learning_rate": 9.063156400794777e-05, "loss": 0.029140204191207886, "step": 33020 }, { "epoch": 9.37553221686063, "grad_norm": 7.0897040367126465, "learning_rate": 9.062872551802442e-05, "loss": 0.02467966228723526, "step": 33030 }, { "epoch": 9.378370706783992, "grad_norm": 5.201611042022705, "learning_rate": 9.062588702810106e-05, "loss": 0.04776231348514557, "step": 33040 }, { "epoch": 9.381209196707351, "grad_norm": 6.096937656402588, "learning_rate": 9.062304853817768e-05, "loss": 0.026355132460594177, "step": 33050 }, { "epoch": 9.384047686630712, "grad_norm": 5.474670886993408, "learning_rate": 9.062021004825433e-05, "loss": 0.029049372673034667, "step": 33060 }, { "epoch": 9.386886176554073, "grad_norm": 9.540634155273438, "learning_rate": 9.061737155833098e-05, "loss": 0.06607010960578918, "step": 33070 }, { "epoch": 9.389724666477434, "grad_norm": 2.851853847503662, "learning_rate": 9.061453306840761e-05, "loss": 0.0404718816280365, "step": 33080 }, { "epoch": 9.392563156400795, "grad_norm": 6.747595310211182, "learning_rate": 9.061169457848425e-05, "loss": 0.03616315722465515, "step": 33090 }, { "epoch": 9.395401646324155, "grad_norm": 1.1550198793411255, "learning_rate": 9.060885608856089e-05, "loss": 0.049955052137374875, "step": 33100 }, { "epoch": 9.398240136247516, "grad_norm": 5.937556743621826, "learning_rate": 9.060601759863753e-05, "loss": 0.06274673938751221, "step": 33110 }, { "epoch": 9.401078626170877, "grad_norm": 11.114684104919434, "learning_rate": 9.060317910871416e-05, "loss": 0.06595404744148255, "step": 33120 }, { "epoch": 9.403917116094238, "grad_norm": 3.7054638862609863, "learning_rate": 9.06003406187908e-05, "loss": 0.03684907257556915, "step": 33130 }, { "epoch": 9.4067556060176, "grad_norm": 0.9444613456726074, "learning_rate": 9.059750212886746e-05, "loss": 0.0598833441734314, "step": 33140 }, { "epoch": 9.40959409594096, "grad_norm": 8.583207130432129, "learning_rate": 9.059466363894408e-05, "loss": 0.032632356882095336, "step": 33150 }, { "epoch": 9.41243258586432, "grad_norm": 4.824705600738525, "learning_rate": 9.059182514902073e-05, "loss": 0.03455105721950531, "step": 33160 }, { "epoch": 9.41527107578768, "grad_norm": 6.859195709228516, "learning_rate": 9.058898665909737e-05, "loss": 0.028941354155540465, "step": 33170 }, { "epoch": 9.418109565711042, "grad_norm": 4.087761878967285, "learning_rate": 9.0586148169174e-05, "loss": 0.03194277882575989, "step": 33180 }, { "epoch": 9.420948055634403, "grad_norm": 8.916923522949219, "learning_rate": 9.058330967925064e-05, "loss": 0.042536163330078126, "step": 33190 }, { "epoch": 9.423786545557764, "grad_norm": 2.127915620803833, "learning_rate": 9.058047118932729e-05, "loss": 0.04258140325546265, "step": 33200 }, { "epoch": 9.426625035481123, "grad_norm": 0.7151313424110413, "learning_rate": 9.057791654839626e-05, "loss": 0.06843194365501404, "step": 33210 }, { "epoch": 9.429463525404485, "grad_norm": 12.185401916503906, "learning_rate": 9.05750780584729e-05, "loss": 0.04813770055770874, "step": 33220 }, { "epoch": 9.432302015327846, "grad_norm": 2.599138021469116, "learning_rate": 9.057223956854953e-05, "loss": 0.026743701100349425, "step": 33230 }, { "epoch": 9.435140505251207, "grad_norm": 2.326446771621704, "learning_rate": 9.056940107862617e-05, "loss": 0.033201602101325986, "step": 33240 }, { "epoch": 9.437978995174568, "grad_norm": 7.677329063415527, "learning_rate": 9.056656258870282e-05, "loss": 0.04028915464878082, "step": 33250 }, { "epoch": 9.440817485097927, "grad_norm": 5.444786071777344, "learning_rate": 9.056372409877945e-05, "loss": 0.03433045446872711, "step": 33260 }, { "epoch": 9.443655975021288, "grad_norm": 8.08906078338623, "learning_rate": 9.056088560885609e-05, "loss": 0.03829061985015869, "step": 33270 }, { "epoch": 9.44649446494465, "grad_norm": 4.421164512634277, "learning_rate": 9.055804711893273e-05, "loss": 0.05907021164894104, "step": 33280 }, { "epoch": 9.44933295486801, "grad_norm": 0.23350568115711212, "learning_rate": 9.055520862900938e-05, "loss": 0.03412868082523346, "step": 33290 }, { "epoch": 9.452171444791372, "grad_norm": 6.77054500579834, "learning_rate": 9.0552370139086e-05, "loss": 0.04794325828552246, "step": 33300 }, { "epoch": 9.455009934714731, "grad_norm": 6.185222625732422, "learning_rate": 9.054953164916264e-05, "loss": 0.033584460616111755, "step": 33310 }, { "epoch": 9.457848424638092, "grad_norm": 9.665891647338867, "learning_rate": 9.05466931592393e-05, "loss": 0.04589351713657379, "step": 33320 }, { "epoch": 9.460686914561453, "grad_norm": 0.8519517779350281, "learning_rate": 9.054385466931593e-05, "loss": 0.0348354309797287, "step": 33330 }, { "epoch": 9.463525404484814, "grad_norm": 6.577652454376221, "learning_rate": 9.054101617939257e-05, "loss": 0.05248621702194214, "step": 33340 }, { "epoch": 9.466363894408175, "grad_norm": 5.779728412628174, "learning_rate": 9.053817768946921e-05, "loss": 0.03517221212387085, "step": 33350 }, { "epoch": 9.469202384331535, "grad_norm": 5.463120460510254, "learning_rate": 9.053533919954584e-05, "loss": 0.02680180370807648, "step": 33360 }, { "epoch": 9.472040874254896, "grad_norm": 7.439458847045898, "learning_rate": 9.053250070962248e-05, "loss": 0.0325348287820816, "step": 33370 }, { "epoch": 9.474879364178257, "grad_norm": 3.7902910709381104, "learning_rate": 9.052966221969913e-05, "loss": 0.018751257658004762, "step": 33380 }, { "epoch": 9.477717854101618, "grad_norm": 9.099034309387207, "learning_rate": 9.052682372977576e-05, "loss": 0.04602106809616089, "step": 33390 }, { "epoch": 9.48055634402498, "grad_norm": 7.809533596038818, "learning_rate": 9.05239852398524e-05, "loss": 0.028283408284187316, "step": 33400 }, { "epoch": 9.48339483394834, "grad_norm": 2.1843693256378174, "learning_rate": 9.052114674992905e-05, "loss": 0.051069968938827516, "step": 33410 }, { "epoch": 9.4862333238717, "grad_norm": 4.366445541381836, "learning_rate": 9.051830826000569e-05, "loss": 0.05728021264076233, "step": 33420 }, { "epoch": 9.48907181379506, "grad_norm": 7.037250995635986, "learning_rate": 9.051546977008231e-05, "loss": 0.05696791410446167, "step": 33430 }, { "epoch": 9.491910303718422, "grad_norm": 9.646135330200195, "learning_rate": 9.051263128015896e-05, "loss": 0.03192871809005737, "step": 33440 }, { "epoch": 9.494748793641783, "grad_norm": 11.774679183959961, "learning_rate": 9.05097927902356e-05, "loss": 0.05010857582092285, "step": 33450 }, { "epoch": 9.497587283565144, "grad_norm": 3.6648123264312744, "learning_rate": 9.050695430031224e-05, "loss": 0.03766113817691803, "step": 33460 }, { "epoch": 9.500425773488503, "grad_norm": 13.892163276672363, "learning_rate": 9.050411581038888e-05, "loss": 0.04227945506572724, "step": 33470 }, { "epoch": 9.503264263411864, "grad_norm": 3.7332606315612793, "learning_rate": 9.050127732046552e-05, "loss": 0.04861484169960022, "step": 33480 }, { "epoch": 9.506102753335226, "grad_norm": 11.879192352294922, "learning_rate": 9.049843883054215e-05, "loss": 0.044326949119567874, "step": 33490 }, { "epoch": 9.508941243258587, "grad_norm": 5.697183609008789, "learning_rate": 9.049560034061879e-05, "loss": 0.03038971424102783, "step": 33500 }, { "epoch": 9.508941243258587, "eval_accuracy": 0.9575252750047689, "eval_loss": 0.13387979567050934, "eval_runtime": 31.934, "eval_samples_per_second": 492.485, "eval_steps_per_second": 7.703, "step": 33500 }, { "epoch": 9.511779733181948, "grad_norm": 3.468780517578125, "learning_rate": 9.049276185069543e-05, "loss": 0.028647753596305846, "step": 33510 }, { "epoch": 9.514618223105309, "grad_norm": 8.438267707824707, "learning_rate": 9.048992336077207e-05, "loss": 0.04265975654125213, "step": 33520 }, { "epoch": 9.517456713028668, "grad_norm": 6.174384117126465, "learning_rate": 9.048708487084871e-05, "loss": 0.03336701989173889, "step": 33530 }, { "epoch": 9.52029520295203, "grad_norm": 4.557669162750244, "learning_rate": 9.048424638092536e-05, "loss": 0.0412913054227829, "step": 33540 }, { "epoch": 9.52313369287539, "grad_norm": 7.508936405181885, "learning_rate": 9.048140789100198e-05, "loss": 0.042896991968154906, "step": 33550 }, { "epoch": 9.525972182798752, "grad_norm": 17.26679229736328, "learning_rate": 9.047856940107862e-05, "loss": 0.06571347713470459, "step": 33560 }, { "epoch": 9.528810672722113, "grad_norm": 7.838509559631348, "learning_rate": 9.047573091115527e-05, "loss": 0.05835697054862976, "step": 33570 }, { "epoch": 9.531649162645472, "grad_norm": 2.220446825027466, "learning_rate": 9.047289242123191e-05, "loss": 0.021180360019207, "step": 33580 }, { "epoch": 9.534487652568833, "grad_norm": 6.490535259246826, "learning_rate": 9.047005393130855e-05, "loss": 0.03668650686740875, "step": 33590 }, { "epoch": 9.537326142492194, "grad_norm": 5.157556056976318, "learning_rate": 9.046721544138519e-05, "loss": 0.02836046814918518, "step": 33600 }, { "epoch": 9.540164632415555, "grad_norm": 5.882872581481934, "learning_rate": 9.046437695146183e-05, "loss": 0.03420082926750183, "step": 33610 }, { "epoch": 9.543003122338916, "grad_norm": 12.08295726776123, "learning_rate": 9.046153846153846e-05, "loss": 0.03880531489849091, "step": 33620 }, { "epoch": 9.545841612262276, "grad_norm": 5.673193454742432, "learning_rate": 9.04586999716151e-05, "loss": 0.023988029360771178, "step": 33630 }, { "epoch": 9.548680102185637, "grad_norm": 3.7244186401367188, "learning_rate": 9.045586148169174e-05, "loss": 0.02903994619846344, "step": 33640 }, { "epoch": 9.551518592108998, "grad_norm": 6.055826663970947, "learning_rate": 9.045302299176838e-05, "loss": 0.04008515775203705, "step": 33650 }, { "epoch": 9.55435708203236, "grad_norm": 3.615166425704956, "learning_rate": 9.045018450184503e-05, "loss": 0.03184187412261963, "step": 33660 }, { "epoch": 9.55719557195572, "grad_norm": 2.443392276763916, "learning_rate": 9.044734601192167e-05, "loss": 0.024801641702651978, "step": 33670 }, { "epoch": 9.56003406187908, "grad_norm": 1.3751469850540161, "learning_rate": 9.04445075219983e-05, "loss": 0.04181051552295685, "step": 33680 }, { "epoch": 9.56287255180244, "grad_norm": 5.534793853759766, "learning_rate": 9.044166903207494e-05, "loss": 0.02756980061531067, "step": 33690 }, { "epoch": 9.565711041725802, "grad_norm": 5.793987274169922, "learning_rate": 9.043883054215158e-05, "loss": 0.041147470474243164, "step": 33700 }, { "epoch": 9.568549531649163, "grad_norm": 0.7501510381698608, "learning_rate": 9.043599205222822e-05, "loss": 0.04357251822948456, "step": 33710 }, { "epoch": 9.571388021572524, "grad_norm": 2.81779146194458, "learning_rate": 9.043315356230486e-05, "loss": 0.047640708088874814, "step": 33720 }, { "epoch": 9.574226511495883, "grad_norm": 5.3420796394348145, "learning_rate": 9.04303150723815e-05, "loss": 0.04275528788566589, "step": 33730 }, { "epoch": 9.577065001419244, "grad_norm": 15.536238670349121, "learning_rate": 9.042747658245814e-05, "loss": 0.04495518803596497, "step": 33740 }, { "epoch": 9.579903491342606, "grad_norm": 8.860390663146973, "learning_rate": 9.042463809253477e-05, "loss": 0.04080719947814941, "step": 33750 }, { "epoch": 9.582741981265967, "grad_norm": 9.44821548461914, "learning_rate": 9.042179960261141e-05, "loss": 0.05664714574813843, "step": 33760 }, { "epoch": 9.585580471189328, "grad_norm": 9.610262870788574, "learning_rate": 9.041896111268805e-05, "loss": 0.07494034171104431, "step": 33770 }, { "epoch": 9.588418961112689, "grad_norm": 3.1243135929107666, "learning_rate": 9.04161226227647e-05, "loss": 0.021959762275218963, "step": 33780 }, { "epoch": 9.591257451036048, "grad_norm": 1.1565051078796387, "learning_rate": 9.041328413284134e-05, "loss": 0.02729032337665558, "step": 33790 }, { "epoch": 9.59409594095941, "grad_norm": 11.15577220916748, "learning_rate": 9.041044564291798e-05, "loss": 0.03151533901691437, "step": 33800 }, { "epoch": 9.59693443088277, "grad_norm": 9.443313598632812, "learning_rate": 9.04076071529946e-05, "loss": 0.046026194095611574, "step": 33810 }, { "epoch": 9.599772920806132, "grad_norm": 2.8890037536621094, "learning_rate": 9.040476866307125e-05, "loss": 0.04294655919075012, "step": 33820 }, { "epoch": 9.602611410729493, "grad_norm": 10.791956901550293, "learning_rate": 9.040193017314789e-05, "loss": 0.040559834241867064, "step": 33830 }, { "epoch": 9.605449900652852, "grad_norm": 7.838287830352783, "learning_rate": 9.039909168322453e-05, "loss": 0.045685595273971556, "step": 33840 }, { "epoch": 9.608288390576213, "grad_norm": 7.415994644165039, "learning_rate": 9.039625319330117e-05, "loss": 0.03832406997680664, "step": 33850 }, { "epoch": 9.611126880499574, "grad_norm": 7.476558685302734, "learning_rate": 9.039341470337781e-05, "loss": 0.058903664350509644, "step": 33860 }, { "epoch": 9.613965370422935, "grad_norm": 4.936344146728516, "learning_rate": 9.039057621345445e-05, "loss": 0.04167869091033936, "step": 33870 }, { "epoch": 9.616803860346296, "grad_norm": 13.136256217956543, "learning_rate": 9.038773772353108e-05, "loss": 0.047945669293403624, "step": 33880 }, { "epoch": 9.619642350269656, "grad_norm": 8.841238975524902, "learning_rate": 9.038489923360772e-05, "loss": 0.040363076329231265, "step": 33890 }, { "epoch": 9.622480840193017, "grad_norm": 6.543757915496826, "learning_rate": 9.038206074368436e-05, "loss": 0.04535337090492249, "step": 33900 }, { "epoch": 9.625319330116378, "grad_norm": 1.3899139165878296, "learning_rate": 9.0379222253761e-05, "loss": 0.04646455347537994, "step": 33910 }, { "epoch": 9.628157820039739, "grad_norm": 0.40657347440719604, "learning_rate": 9.037638376383765e-05, "loss": 0.029906392097473145, "step": 33920 }, { "epoch": 9.6309963099631, "grad_norm": 6.862537860870361, "learning_rate": 9.037354527391429e-05, "loss": 0.06086822748184204, "step": 33930 }, { "epoch": 9.633834799886461, "grad_norm": 10.930984497070312, "learning_rate": 9.037070678399092e-05, "loss": 0.03553206026554108, "step": 33940 }, { "epoch": 9.63667328980982, "grad_norm": 15.197175979614258, "learning_rate": 9.036786829406756e-05, "loss": 0.054810887575149535, "step": 33950 }, { "epoch": 9.639511779733182, "grad_norm": 4.819674968719482, "learning_rate": 9.03650298041442e-05, "loss": 0.041165906190872195, "step": 33960 }, { "epoch": 9.642350269656543, "grad_norm": 4.773018836975098, "learning_rate": 9.036219131422084e-05, "loss": 0.045457509160041806, "step": 33970 }, { "epoch": 9.645188759579904, "grad_norm": 4.253553867340088, "learning_rate": 9.035935282429748e-05, "loss": 0.03687196671962738, "step": 33980 }, { "epoch": 9.648027249503265, "grad_norm": 6.763425350189209, "learning_rate": 9.035651433437412e-05, "loss": 0.02428765892982483, "step": 33990 }, { "epoch": 9.650865739426624, "grad_norm": 2.3768458366394043, "learning_rate": 9.035367584445076e-05, "loss": 0.04737922847270966, "step": 34000 }, { "epoch": 9.650865739426624, "eval_accuracy": 0.9580975392636867, "eval_loss": 0.13609649240970612, "eval_runtime": 33.4297, "eval_samples_per_second": 470.45, "eval_steps_per_second": 7.359, "step": 34000 }, { "epoch": 9.653704229349986, "grad_norm": 4.608358383178711, "learning_rate": 9.035083735452739e-05, "loss": 0.05985658168792725, "step": 34010 }, { "epoch": 9.656542719273347, "grad_norm": 6.124251365661621, "learning_rate": 9.034799886460403e-05, "loss": 0.04412680268287659, "step": 34020 }, { "epoch": 9.659381209196708, "grad_norm": 4.959427833557129, "learning_rate": 9.034516037468067e-05, "loss": 0.027800750732421876, "step": 34030 }, { "epoch": 9.662219699120069, "grad_norm": 3.1895689964294434, "learning_rate": 9.03423218847573e-05, "loss": 0.027206403017044068, "step": 34040 }, { "epoch": 9.665058189043428, "grad_norm": 7.463231086730957, "learning_rate": 9.033948339483396e-05, "loss": 0.05610639452934265, "step": 34050 }, { "epoch": 9.66789667896679, "grad_norm": 2.4203760623931885, "learning_rate": 9.03366449049106e-05, "loss": 0.03701602816581726, "step": 34060 }, { "epoch": 9.67073516889015, "grad_norm": 1.2597800493240356, "learning_rate": 9.033380641498723e-05, "loss": 0.01692022830247879, "step": 34070 }, { "epoch": 9.673573658813511, "grad_norm": 5.837272644042969, "learning_rate": 9.033096792506387e-05, "loss": 0.03995631039142609, "step": 34080 }, { "epoch": 9.676412148736873, "grad_norm": 1.987341046333313, "learning_rate": 9.032812943514051e-05, "loss": 0.03113879859447479, "step": 34090 }, { "epoch": 9.679250638660232, "grad_norm": 7.756437301635742, "learning_rate": 9.032529094521715e-05, "loss": 0.023784950375556946, "step": 34100 }, { "epoch": 9.682089128583593, "grad_norm": 7.320964813232422, "learning_rate": 9.032245245529379e-05, "loss": 0.05391584634780884, "step": 34110 }, { "epoch": 9.684927618506954, "grad_norm": 1.1724367141723633, "learning_rate": 9.031961396537043e-05, "loss": 0.052960509061813356, "step": 34120 }, { "epoch": 9.687766108430315, "grad_norm": 5.721899032592773, "learning_rate": 9.031677547544707e-05, "loss": 0.046803629398345946, "step": 34130 }, { "epoch": 9.690604598353676, "grad_norm": 8.062273979187012, "learning_rate": 9.03139369855237e-05, "loss": 0.034913060069084165, "step": 34140 }, { "epoch": 9.693443088277036, "grad_norm": 0.6009421348571777, "learning_rate": 9.031109849560034e-05, "loss": 0.031809601187705996, "step": 34150 }, { "epoch": 9.696281578200397, "grad_norm": 7.731359004974365, "learning_rate": 9.030826000567699e-05, "loss": 0.03187408149242401, "step": 34160 }, { "epoch": 9.699120068123758, "grad_norm": 9.24056339263916, "learning_rate": 9.030542151575361e-05, "loss": 0.047576618194580075, "step": 34170 }, { "epoch": 9.701958558047119, "grad_norm": 10.47912883758545, "learning_rate": 9.030258302583027e-05, "loss": 0.06617329716682434, "step": 34180 }, { "epoch": 9.70479704797048, "grad_norm": 9.581725120544434, "learning_rate": 9.029974453590691e-05, "loss": 0.05017186403274536, "step": 34190 }, { "epoch": 9.707635537893841, "grad_norm": 2.2349345684051514, "learning_rate": 9.029690604598354e-05, "loss": 0.03913734257221222, "step": 34200 }, { "epoch": 9.7104740278172, "grad_norm": 9.559431076049805, "learning_rate": 9.029406755606018e-05, "loss": 0.0346607506275177, "step": 34210 }, { "epoch": 9.713312517740562, "grad_norm": 2.2018871307373047, "learning_rate": 9.029122906613682e-05, "loss": 0.07170119881629944, "step": 34220 }, { "epoch": 9.716151007663923, "grad_norm": 2.2907776832580566, "learning_rate": 9.028839057621346e-05, "loss": 0.02509925365447998, "step": 34230 }, { "epoch": 9.718989497587284, "grad_norm": 2.5342588424682617, "learning_rate": 9.028555208629009e-05, "loss": 0.03195511400699615, "step": 34240 }, { "epoch": 9.721827987510645, "grad_norm": 1.3210655450820923, "learning_rate": 9.028271359636674e-05, "loss": 0.03900220990180969, "step": 34250 }, { "epoch": 9.724666477434004, "grad_norm": 9.656792640686035, "learning_rate": 9.027987510644339e-05, "loss": 0.02643497884273529, "step": 34260 }, { "epoch": 9.727504967357365, "grad_norm": 1.5588232278823853, "learning_rate": 9.027703661652001e-05, "loss": 0.03875682651996613, "step": 34270 }, { "epoch": 9.730343457280727, "grad_norm": 9.861798286437988, "learning_rate": 9.027419812659665e-05, "loss": 0.07505159378051758, "step": 34280 }, { "epoch": 9.733181947204088, "grad_norm": 8.121441841125488, "learning_rate": 9.02713596366733e-05, "loss": 0.054827880859375, "step": 34290 }, { "epoch": 9.736020437127449, "grad_norm": 0.6774362325668335, "learning_rate": 9.026852114674992e-05, "loss": 0.027951955795288086, "step": 34300 }, { "epoch": 9.73885892705081, "grad_norm": 2.326490879058838, "learning_rate": 9.026568265682658e-05, "loss": 0.028879976272583006, "step": 34310 }, { "epoch": 9.74169741697417, "grad_norm": 6.065489768981934, "learning_rate": 9.026284416690322e-05, "loss": 0.02530324161052704, "step": 34320 }, { "epoch": 9.74453590689753, "grad_norm": 6.668369770050049, "learning_rate": 9.026000567697985e-05, "loss": 0.03940196335315704, "step": 34330 }, { "epoch": 9.747374396820891, "grad_norm": 7.518253326416016, "learning_rate": 9.025716718705649e-05, "loss": 0.04603544175624848, "step": 34340 }, { "epoch": 9.750212886744253, "grad_norm": 14.980262756347656, "learning_rate": 9.025432869713313e-05, "loss": 0.023533719778060912, "step": 34350 }, { "epoch": 9.753051376667614, "grad_norm": 9.096830368041992, "learning_rate": 9.025149020720977e-05, "loss": 0.04572895169258118, "step": 34360 }, { "epoch": 9.755889866590973, "grad_norm": 4.245804786682129, "learning_rate": 9.02486517172864e-05, "loss": 0.05351103544235229, "step": 34370 }, { "epoch": 9.758728356514334, "grad_norm": 8.785393714904785, "learning_rate": 9.024581322736306e-05, "loss": 0.05559138059616089, "step": 34380 }, { "epoch": 9.761566846437695, "grad_norm": 4.550991535186768, "learning_rate": 9.024297473743968e-05, "loss": 0.04080321192741394, "step": 34390 }, { "epoch": 9.764405336361056, "grad_norm": 13.535555839538574, "learning_rate": 9.024013624751632e-05, "loss": 0.027775102853775026, "step": 34400 }, { "epoch": 9.767243826284417, "grad_norm": 3.501755714416504, "learning_rate": 9.023729775759297e-05, "loss": 0.03395796418190002, "step": 34410 }, { "epoch": 9.770082316207777, "grad_norm": 2.1978719234466553, "learning_rate": 9.023445926766961e-05, "loss": 0.0392682671546936, "step": 34420 }, { "epoch": 9.772920806131138, "grad_norm": 6.529596328735352, "learning_rate": 9.023162077774623e-05, "loss": 0.04988230466842651, "step": 34430 }, { "epoch": 9.775759296054499, "grad_norm": 1.989915132522583, "learning_rate": 9.022878228782288e-05, "loss": 0.05419514179229736, "step": 34440 }, { "epoch": 9.77859778597786, "grad_norm": 1.2544078826904297, "learning_rate": 9.022594379789953e-05, "loss": 0.01975938379764557, "step": 34450 }, { "epoch": 9.781436275901221, "grad_norm": 2.9216854572296143, "learning_rate": 9.022310530797616e-05, "loss": 0.03405232429504394, "step": 34460 }, { "epoch": 9.78427476582458, "grad_norm": 8.582023620605469, "learning_rate": 9.02202668180528e-05, "loss": 0.03230764269828797, "step": 34470 }, { "epoch": 9.787113255747942, "grad_norm": 5.963564395904541, "learning_rate": 9.021742832812944e-05, "loss": 0.05612349510192871, "step": 34480 }, { "epoch": 9.789951745671303, "grad_norm": 6.603904724121094, "learning_rate": 9.021458983820607e-05, "loss": 0.04207336604595184, "step": 34490 }, { "epoch": 9.792790235594664, "grad_norm": 7.639875888824463, "learning_rate": 9.021175134828271e-05, "loss": 0.037329816818237306, "step": 34500 }, { "epoch": 9.792790235594664, "eval_accuracy": 0.9586698035226044, "eval_loss": 0.12896819412708282, "eval_runtime": 33.107, "eval_samples_per_second": 475.035, "eval_steps_per_second": 7.43, "step": 34500 }, { "epoch": 9.795628725518025, "grad_norm": 13.937736511230469, "learning_rate": 9.020891285835937e-05, "loss": 0.05667625665664673, "step": 34510 }, { "epoch": 9.798467215441384, "grad_norm": 13.180200576782227, "learning_rate": 9.0206074368436e-05, "loss": 0.03593425750732422, "step": 34520 }, { "epoch": 9.801305705364745, "grad_norm": 5.362333297729492, "learning_rate": 9.020323587851264e-05, "loss": 0.03793310821056366, "step": 34530 }, { "epoch": 9.804144195288107, "grad_norm": 11.555890083312988, "learning_rate": 9.020039738858928e-05, "loss": 0.0397509753704071, "step": 34540 }, { "epoch": 9.806982685211468, "grad_norm": 4.374037742614746, "learning_rate": 9.019755889866592e-05, "loss": 0.036021357774734496, "step": 34550 }, { "epoch": 9.809821175134829, "grad_norm": 7.874999523162842, "learning_rate": 9.019472040874255e-05, "loss": 0.056783640384674074, "step": 34560 }, { "epoch": 9.81265966505819, "grad_norm": 17.913972854614258, "learning_rate": 9.019188191881919e-05, "loss": 0.032859507203102115, "step": 34570 }, { "epoch": 9.81549815498155, "grad_norm": 4.717205047607422, "learning_rate": 9.018904342889584e-05, "loss": 0.028204640746116637, "step": 34580 }, { "epoch": 9.81833664490491, "grad_norm": 6.732220649719238, "learning_rate": 9.018620493897247e-05, "loss": 0.06731069087982178, "step": 34590 }, { "epoch": 9.821175134828271, "grad_norm": 5.308062553405762, "learning_rate": 9.018336644904911e-05, "loss": 0.06169851422309876, "step": 34600 }, { "epoch": 9.824013624751633, "grad_norm": 6.349146366119385, "learning_rate": 9.018052795912575e-05, "loss": 0.021794678270816804, "step": 34610 }, { "epoch": 9.826852114674994, "grad_norm": 8.936722755432129, "learning_rate": 9.017768946920238e-05, "loss": 0.04095582365989685, "step": 34620 }, { "epoch": 9.829690604598353, "grad_norm": 0.37106698751449585, "learning_rate": 9.017485097927902e-05, "loss": 0.031246039271354675, "step": 34630 }, { "epoch": 9.832529094521714, "grad_norm": 8.553706169128418, "learning_rate": 9.017201248935566e-05, "loss": 0.03655058741569519, "step": 34640 }, { "epoch": 9.835367584445075, "grad_norm": 3.0822067260742188, "learning_rate": 9.01691739994323e-05, "loss": 0.031194382905960084, "step": 34650 }, { "epoch": 9.838206074368436, "grad_norm": 5.298854351043701, "learning_rate": 9.016633550950895e-05, "loss": 0.03705215752124787, "step": 34660 }, { "epoch": 9.841044564291797, "grad_norm": 2.422327756881714, "learning_rate": 9.016349701958559e-05, "loss": 0.03469339907169342, "step": 34670 }, { "epoch": 9.843883054215157, "grad_norm": 4.401023864746094, "learning_rate": 9.016065852966223e-05, "loss": 0.042608579993247984, "step": 34680 }, { "epoch": 9.846721544138518, "grad_norm": 4.065598011016846, "learning_rate": 9.015782003973886e-05, "loss": 0.0962814748287201, "step": 34690 }, { "epoch": 9.849560034061879, "grad_norm": 15.094704627990723, "learning_rate": 9.01549815498155e-05, "loss": 0.06412912011146546, "step": 34700 }, { "epoch": 9.85239852398524, "grad_norm": 2.0951614379882812, "learning_rate": 9.015214305989215e-05, "loss": 0.04183577597141266, "step": 34710 }, { "epoch": 9.855237013908601, "grad_norm": 13.23237133026123, "learning_rate": 9.014930456996878e-05, "loss": 0.055329978466033936, "step": 34720 }, { "epoch": 9.858075503831962, "grad_norm": 12.889482498168945, "learning_rate": 9.014646608004542e-05, "loss": 0.05064680576324463, "step": 34730 }, { "epoch": 9.860913993755322, "grad_norm": 7.088404178619385, "learning_rate": 9.014362759012206e-05, "loss": 0.04409278035163879, "step": 34740 }, { "epoch": 9.863752483678683, "grad_norm": 9.816404342651367, "learning_rate": 9.014078910019869e-05, "loss": 0.0387785404920578, "step": 34750 }, { "epoch": 9.866590973602044, "grad_norm": 5.354479789733887, "learning_rate": 9.013795061027533e-05, "loss": 0.048231500387191775, "step": 34760 }, { "epoch": 9.869429463525405, "grad_norm": 7.224972248077393, "learning_rate": 9.013511212035197e-05, "loss": 0.0595244288444519, "step": 34770 }, { "epoch": 9.872267953448766, "grad_norm": 1.6899718046188354, "learning_rate": 9.013227363042862e-05, "loss": 0.038418203592300415, "step": 34780 }, { "epoch": 9.875106443372125, "grad_norm": 10.518245697021484, "learning_rate": 9.012943514050526e-05, "loss": 0.06840636730194091, "step": 34790 }, { "epoch": 9.877944933295487, "grad_norm": 2.9735124111175537, "learning_rate": 9.01265966505819e-05, "loss": 0.04749214351177215, "step": 34800 }, { "epoch": 9.880783423218848, "grad_norm": 7.260819911956787, "learning_rate": 9.012375816065854e-05, "loss": 0.04455476105213165, "step": 34810 }, { "epoch": 9.883621913142209, "grad_norm": 8.75222110748291, "learning_rate": 9.012091967073517e-05, "loss": 0.046039119362831116, "step": 34820 }, { "epoch": 9.88646040306557, "grad_norm": 0.6138511896133423, "learning_rate": 9.011808118081181e-05, "loss": 0.03411709070205689, "step": 34830 }, { "epoch": 9.88929889298893, "grad_norm": 5.51369047164917, "learning_rate": 9.011524269088845e-05, "loss": 0.04940845370292664, "step": 34840 }, { "epoch": 9.89213738291229, "grad_norm": 4.484133720397949, "learning_rate": 9.011240420096509e-05, "loss": 0.040586608648300174, "step": 34850 }, { "epoch": 9.894975872835651, "grad_norm": 2.2267560958862305, "learning_rate": 9.010956571104173e-05, "loss": 0.04715525209903717, "step": 34860 }, { "epoch": 9.897814362759012, "grad_norm": 4.639035224914551, "learning_rate": 9.010672722111837e-05, "loss": 0.039865562319755556, "step": 34870 }, { "epoch": 9.900652852682374, "grad_norm": 1.4928566217422485, "learning_rate": 9.0103888731195e-05, "loss": 0.03186411559581757, "step": 34880 }, { "epoch": 9.903491342605733, "grad_norm": 7.622300624847412, "learning_rate": 9.010105024127164e-05, "loss": 0.03934841454029083, "step": 34890 }, { "epoch": 9.906329832529094, "grad_norm": 7.729339599609375, "learning_rate": 9.009821175134828e-05, "loss": 0.04945856928825378, "step": 34900 }, { "epoch": 9.909168322452455, "grad_norm": 7.210046768188477, "learning_rate": 9.009537326142493e-05, "loss": 0.03677929937839508, "step": 34910 }, { "epoch": 9.912006812375816, "grad_norm": 12.899341583251953, "learning_rate": 9.009253477150157e-05, "loss": 0.06350288391113282, "step": 34920 }, { "epoch": 9.914845302299177, "grad_norm": 4.080487251281738, "learning_rate": 9.008969628157821e-05, "loss": 0.02994237542152405, "step": 34930 }, { "epoch": 9.917683792222537, "grad_norm": 9.297195434570312, "learning_rate": 9.008685779165485e-05, "loss": 0.03190662562847137, "step": 34940 }, { "epoch": 9.920522282145898, "grad_norm": 8.299120903015137, "learning_rate": 9.008401930173148e-05, "loss": 0.020569103956222533, "step": 34950 }, { "epoch": 9.923360772069259, "grad_norm": 8.229928970336914, "learning_rate": 9.008118081180812e-05, "loss": 0.06504533290863038, "step": 34960 }, { "epoch": 9.92619926199262, "grad_norm": 6.321653366088867, "learning_rate": 9.007834232188476e-05, "loss": 0.025967910885810852, "step": 34970 }, { "epoch": 9.929037751915981, "grad_norm": 11.438131332397461, "learning_rate": 9.00755038319614e-05, "loss": 0.0362243115901947, "step": 34980 }, { "epoch": 9.931876241839342, "grad_norm": 2.1078298091888428, "learning_rate": 9.007266534203804e-05, "loss": 0.046492525935173036, "step": 34990 }, { "epoch": 9.934714731762702, "grad_norm": 3.575303792953491, "learning_rate": 9.006982685211468e-05, "loss": 0.04248427450656891, "step": 35000 }, { "epoch": 9.934714731762702, "eval_accuracy": 0.9601322566287277, "eval_loss": 0.12422654777765274, "eval_runtime": 32.5445, "eval_samples_per_second": 483.246, "eval_steps_per_second": 7.559, "step": 35000 }, { "epoch": 9.937553221686063, "grad_norm": 14.13197135925293, "learning_rate": 9.006698836219131e-05, "loss": 0.037842321395874026, "step": 35010 }, { "epoch": 9.940391711609424, "grad_norm": 1.002303123474121, "learning_rate": 9.006414987226795e-05, "loss": 0.03454540073871613, "step": 35020 }, { "epoch": 9.943230201532785, "grad_norm": 9.0866117477417, "learning_rate": 9.00613113823446e-05, "loss": 0.06897977590560914, "step": 35030 }, { "epoch": 9.946068691456146, "grad_norm": 11.986724853515625, "learning_rate": 9.005847289242124e-05, "loss": 0.0504921555519104, "step": 35040 }, { "epoch": 9.948907181379505, "grad_norm": 2.956545114517212, "learning_rate": 9.005563440249788e-05, "loss": 0.03312873542308807, "step": 35050 }, { "epoch": 9.951745671302866, "grad_norm": 5.018165588378906, "learning_rate": 9.005279591257452e-05, "loss": 0.03710829615592957, "step": 35060 }, { "epoch": 9.954584161226228, "grad_norm": 5.058415412902832, "learning_rate": 9.004995742265116e-05, "loss": 0.0395652562379837, "step": 35070 }, { "epoch": 9.957422651149589, "grad_norm": 3.025658369064331, "learning_rate": 9.004711893272779e-05, "loss": 0.023204973340034483, "step": 35080 }, { "epoch": 9.96026114107295, "grad_norm": 6.235620498657227, "learning_rate": 9.004428044280443e-05, "loss": 0.025902071595191957, "step": 35090 }, { "epoch": 9.96309963099631, "grad_norm": 16.168088912963867, "learning_rate": 9.004144195288107e-05, "loss": 0.03883004188537598, "step": 35100 }, { "epoch": 9.96593812091967, "grad_norm": 5.036298751831055, "learning_rate": 9.003860346295771e-05, "loss": 0.032814204692840576, "step": 35110 }, { "epoch": 9.968776610843031, "grad_norm": 12.805106163024902, "learning_rate": 9.003576497303435e-05, "loss": 0.05057127475738525, "step": 35120 }, { "epoch": 9.971615100766392, "grad_norm": 8.813213348388672, "learning_rate": 9.0032926483111e-05, "loss": 0.03358563184738159, "step": 35130 }, { "epoch": 9.974453590689754, "grad_norm": 11.621519088745117, "learning_rate": 9.003008799318762e-05, "loss": 0.04921534061431885, "step": 35140 }, { "epoch": 9.977292080613115, "grad_norm": 9.895565032958984, "learning_rate": 9.002724950326426e-05, "loss": 0.05029029846191406, "step": 35150 }, { "epoch": 9.980130570536474, "grad_norm": 3.8346831798553467, "learning_rate": 9.00244110133409e-05, "loss": 0.05150827169418335, "step": 35160 }, { "epoch": 9.982969060459835, "grad_norm": 10.825848579406738, "learning_rate": 9.002157252341755e-05, "loss": 0.04780220985412598, "step": 35170 }, { "epoch": 9.985807550383196, "grad_norm": 7.786962509155273, "learning_rate": 9.001873403349419e-05, "loss": 0.030110731720924377, "step": 35180 }, { "epoch": 9.988646040306557, "grad_norm": 8.129878997802734, "learning_rate": 9.001589554357083e-05, "loss": 0.04617004692554474, "step": 35190 }, { "epoch": 9.991484530229918, "grad_norm": 6.897689342498779, "learning_rate": 9.001305705364747e-05, "loss": 0.043616437911987306, "step": 35200 }, { "epoch": 9.994323020153278, "grad_norm": 2.1354312896728516, "learning_rate": 9.00102185637241e-05, "loss": 0.04138109087944031, "step": 35210 }, { "epoch": 9.997161510076639, "grad_norm": 10.23293685913086, "learning_rate": 9.000738007380074e-05, "loss": 0.06601381897926331, "step": 35220 }, { "epoch": 10.0, "grad_norm": 4.907530784606934, "learning_rate": 9.000454158387738e-05, "loss": 0.03132961690425873, "step": 35230 }, { "epoch": 10.002838489923361, "grad_norm": 6.095825672149658, "learning_rate": 9.000170309395402e-05, "loss": 0.034474506974220276, "step": 35240 }, { "epoch": 10.005676979846722, "grad_norm": 1.0833871364593506, "learning_rate": 8.999886460403066e-05, "loss": 0.03468805849552155, "step": 35250 }, { "epoch": 10.008515469770082, "grad_norm": 1.9794788360595703, "learning_rate": 8.99960261141073e-05, "loss": 0.018191394209861756, "step": 35260 }, { "epoch": 10.011353959693443, "grad_norm": 7.589320182800293, "learning_rate": 8.999318762418393e-05, "loss": 0.044070661067962646, "step": 35270 }, { "epoch": 10.014192449616804, "grad_norm": 4.70731782913208, "learning_rate": 8.999034913426058e-05, "loss": 0.03236395716667175, "step": 35280 }, { "epoch": 10.017030939540165, "grad_norm": 10.598764419555664, "learning_rate": 8.998751064433722e-05, "loss": 0.04661480188369751, "step": 35290 }, { "epoch": 10.019869429463526, "grad_norm": 2.7645905017852783, "learning_rate": 8.998467215441386e-05, "loss": 0.020618355274200438, "step": 35300 }, { "epoch": 10.022707919386885, "grad_norm": 3.3781700134277344, "learning_rate": 8.99818336644905e-05, "loss": 0.03017669916152954, "step": 35310 }, { "epoch": 10.025546409310246, "grad_norm": 4.626172065734863, "learning_rate": 8.997899517456714e-05, "loss": 0.025808876752853392, "step": 35320 }, { "epoch": 10.028384899233608, "grad_norm": 5.283383846282959, "learning_rate": 8.997615668464377e-05, "loss": 0.023609749972820282, "step": 35330 }, { "epoch": 10.031223389156969, "grad_norm": 1.2535128593444824, "learning_rate": 8.997331819472041e-05, "loss": 0.015572080016136169, "step": 35340 }, { "epoch": 10.03406187908033, "grad_norm": 6.29826021194458, "learning_rate": 8.997047970479705e-05, "loss": 0.026496103405952452, "step": 35350 }, { "epoch": 10.03690036900369, "grad_norm": 9.106131553649902, "learning_rate": 8.996764121487369e-05, "loss": 0.03776850402355194, "step": 35360 }, { "epoch": 10.03973885892705, "grad_norm": 4.188176155090332, "learning_rate": 8.996480272495032e-05, "loss": 0.017377933859825133, "step": 35370 }, { "epoch": 10.042577348850411, "grad_norm": 4.6593828201293945, "learning_rate": 8.996196423502698e-05, "loss": 0.031389948725700376, "step": 35380 }, { "epoch": 10.045415838773772, "grad_norm": 13.23885726928711, "learning_rate": 8.995912574510362e-05, "loss": 0.04422411322593689, "step": 35390 }, { "epoch": 10.048254328697134, "grad_norm": 5.3530402183532715, "learning_rate": 8.995628725518024e-05, "loss": 0.023479487001895904, "step": 35400 }, { "epoch": 10.051092818620495, "grad_norm": 5.335650444030762, "learning_rate": 8.995344876525689e-05, "loss": 0.02284470647573471, "step": 35410 }, { "epoch": 10.053931308543854, "grad_norm": 11.428906440734863, "learning_rate": 8.995061027533353e-05, "loss": 0.02944795787334442, "step": 35420 }, { "epoch": 10.056769798467215, "grad_norm": 0.9574472308158875, "learning_rate": 8.994777178541016e-05, "loss": 0.037615126371383666, "step": 35430 }, { "epoch": 10.059608288390576, "grad_norm": 12.252052307128906, "learning_rate": 8.994493329548681e-05, "loss": 0.04808579981327057, "step": 35440 }, { "epoch": 10.062446778313937, "grad_norm": 4.629715919494629, "learning_rate": 8.994209480556345e-05, "loss": 0.028264811635017394, "step": 35450 }, { "epoch": 10.065285268237298, "grad_norm": 1.797143816947937, "learning_rate": 8.993925631564008e-05, "loss": 0.021342653036117553, "step": 35460 }, { "epoch": 10.068123758160658, "grad_norm": 5.235736846923828, "learning_rate": 8.993641782571672e-05, "loss": 0.042367678880691526, "step": 35470 }, { "epoch": 10.070962248084019, "grad_norm": 3.0441195964813232, "learning_rate": 8.993357933579336e-05, "loss": 0.030853268504142762, "step": 35480 }, { "epoch": 10.07380073800738, "grad_norm": 2.8096425533294678, "learning_rate": 8.993074084587e-05, "loss": 0.03237650096416474, "step": 35490 }, { "epoch": 10.076639227930741, "grad_norm": 9.221693992614746, "learning_rate": 8.992790235594663e-05, "loss": 0.02778027355670929, "step": 35500 }, { "epoch": 10.076639227930741, "eval_accuracy": 0.9633750874292618, "eval_loss": 0.11557666212320328, "eval_runtime": 33.7785, "eval_samples_per_second": 465.592, "eval_steps_per_second": 7.283, "step": 35500 }, { "epoch": 10.079477717854102, "grad_norm": 8.569828987121582, "learning_rate": 8.992506386602329e-05, "loss": 0.03530609607696533, "step": 35510 }, { "epoch": 10.082316207777462, "grad_norm": 0.7592981457710266, "learning_rate": 8.992222537609993e-05, "loss": 0.02679634392261505, "step": 35520 }, { "epoch": 10.085154697700823, "grad_norm": 2.5994648933410645, "learning_rate": 8.991938688617656e-05, "loss": 0.018612052500247955, "step": 35530 }, { "epoch": 10.087993187624184, "grad_norm": 0.402659147977829, "learning_rate": 8.99165483962532e-05, "loss": 0.022032080590724944, "step": 35540 }, { "epoch": 10.090831677547545, "grad_norm": 6.377989292144775, "learning_rate": 8.991370990632984e-05, "loss": 0.03731480836868286, "step": 35550 }, { "epoch": 10.093670167470906, "grad_norm": 7.10392427444458, "learning_rate": 8.991087141640647e-05, "loss": 0.01801188290119171, "step": 35560 }, { "epoch": 10.096508657394267, "grad_norm": 2.914478063583374, "learning_rate": 8.990803292648311e-05, "loss": 0.02553568184375763, "step": 35570 }, { "epoch": 10.099347147317626, "grad_norm": 5.724979400634766, "learning_rate": 8.990519443655976e-05, "loss": 0.025281333923339845, "step": 35580 }, { "epoch": 10.102185637240988, "grad_norm": 7.512077331542969, "learning_rate": 8.990235594663639e-05, "loss": 0.07237924933433533, "step": 35590 }, { "epoch": 10.105024127164349, "grad_norm": 3.969888210296631, "learning_rate": 8.989951745671303e-05, "loss": 0.06016642451286316, "step": 35600 }, { "epoch": 10.10786261708771, "grad_norm": 10.489106178283691, "learning_rate": 8.989667896678967e-05, "loss": 0.05233728885650635, "step": 35610 }, { "epoch": 10.11070110701107, "grad_norm": 18.430604934692383, "learning_rate": 8.989384047686631e-05, "loss": 0.04052420258522034, "step": 35620 }, { "epoch": 10.11353959693443, "grad_norm": 5.950994491577148, "learning_rate": 8.989100198694294e-05, "loss": 0.03734069168567657, "step": 35630 }, { "epoch": 10.116378086857791, "grad_norm": 3.2801907062530518, "learning_rate": 8.98881634970196e-05, "loss": 0.03596199750900268, "step": 35640 }, { "epoch": 10.119216576781152, "grad_norm": 2.2445530891418457, "learning_rate": 8.988532500709624e-05, "loss": 0.026268157362937927, "step": 35650 }, { "epoch": 10.122055066704513, "grad_norm": 8.860625267028809, "learning_rate": 8.988248651717287e-05, "loss": 0.06126977205276489, "step": 35660 }, { "epoch": 10.124893556627875, "grad_norm": 1.1546177864074707, "learning_rate": 8.987964802724951e-05, "loss": 0.02712719440460205, "step": 35670 }, { "epoch": 10.127732046551234, "grad_norm": 3.214816093444824, "learning_rate": 8.987680953732615e-05, "loss": 0.04404184818267822, "step": 35680 }, { "epoch": 10.130570536474595, "grad_norm": 13.443633079528809, "learning_rate": 8.987397104740278e-05, "loss": 0.04980318546295166, "step": 35690 }, { "epoch": 10.133409026397956, "grad_norm": 3.9765381813049316, "learning_rate": 8.987113255747942e-05, "loss": 0.016919739544391632, "step": 35700 }, { "epoch": 10.136247516321317, "grad_norm": 8.197875022888184, "learning_rate": 8.98685779165484e-05, "loss": 0.07017661929130554, "step": 35710 }, { "epoch": 10.139086006244678, "grad_norm": 3.1088390350341797, "learning_rate": 8.986573942662504e-05, "loss": 0.06942081451416016, "step": 35720 }, { "epoch": 10.141924496168038, "grad_norm": 8.817517280578613, "learning_rate": 8.986290093670168e-05, "loss": 0.03665735721588135, "step": 35730 }, { "epoch": 10.144762986091399, "grad_norm": 1.1861857175827026, "learning_rate": 8.986006244677831e-05, "loss": 0.036595305800437926, "step": 35740 }, { "epoch": 10.14760147601476, "grad_norm": 0.8763297200202942, "learning_rate": 8.985722395685495e-05, "loss": 0.040348517894744876, "step": 35750 }, { "epoch": 10.150439965938121, "grad_norm": 9.285924911499023, "learning_rate": 8.98543854669316e-05, "loss": 0.013706232607364654, "step": 35760 }, { "epoch": 10.153278455861482, "grad_norm": 4.529600143432617, "learning_rate": 8.985154697700823e-05, "loss": 0.02067701518535614, "step": 35770 }, { "epoch": 10.156116945784843, "grad_norm": 3.654876232147217, "learning_rate": 8.984870848708487e-05, "loss": 0.04686549603939057, "step": 35780 }, { "epoch": 10.158955435708203, "grad_norm": 1.0600578784942627, "learning_rate": 8.984586999716152e-05, "loss": 0.052944880723953244, "step": 35790 }, { "epoch": 10.161793925631564, "grad_norm": 0.597686767578125, "learning_rate": 8.984303150723816e-05, "loss": 0.04448198676109314, "step": 35800 }, { "epoch": 10.164632415554925, "grad_norm": 2.9176697731018066, "learning_rate": 8.984019301731478e-05, "loss": 0.027156203985214233, "step": 35810 }, { "epoch": 10.167470905478286, "grad_norm": 4.992652893066406, "learning_rate": 8.983735452739144e-05, "loss": 0.06421912908554077, "step": 35820 }, { "epoch": 10.170309395401647, "grad_norm": 16.977439880371094, "learning_rate": 8.983451603746808e-05, "loss": 0.04995571374893189, "step": 35830 }, { "epoch": 10.173147885325006, "grad_norm": 5.753499507904053, "learning_rate": 8.983167754754471e-05, "loss": 0.05789737701416016, "step": 35840 }, { "epoch": 10.175986375248367, "grad_norm": 2.9677059650421143, "learning_rate": 8.982883905762135e-05, "loss": 0.020697629451751708, "step": 35850 }, { "epoch": 10.178824865171729, "grad_norm": 6.80134391784668, "learning_rate": 8.982600056769799e-05, "loss": 0.037233474850654605, "step": 35860 }, { "epoch": 10.18166335509509, "grad_norm": 7.588155746459961, "learning_rate": 8.982316207777462e-05, "loss": 0.052743804454803464, "step": 35870 }, { "epoch": 10.18450184501845, "grad_norm": 1.4563530683517456, "learning_rate": 8.982032358785126e-05, "loss": 0.025019359588623048, "step": 35880 }, { "epoch": 10.18734033494181, "grad_norm": 5.358053207397461, "learning_rate": 8.981748509792792e-05, "loss": 0.03238402307033539, "step": 35890 }, { "epoch": 10.190178824865171, "grad_norm": 3.033215284347534, "learning_rate": 8.981464660800454e-05, "loss": 0.031496307253837584, "step": 35900 }, { "epoch": 10.193017314788532, "grad_norm": 3.0171499252319336, "learning_rate": 8.981180811808119e-05, "loss": 0.024950800836086272, "step": 35910 }, { "epoch": 10.195855804711893, "grad_norm": 3.161802291870117, "learning_rate": 8.980896962815783e-05, "loss": 0.04481050074100494, "step": 35920 }, { "epoch": 10.198694294635255, "grad_norm": 3.17761492729187, "learning_rate": 8.980613113823447e-05, "loss": 0.030840969085693358, "step": 35930 }, { "epoch": 10.201532784558616, "grad_norm": 7.365265369415283, "learning_rate": 8.98032926483111e-05, "loss": 0.022648504376411437, "step": 35940 }, { "epoch": 10.204371274481975, "grad_norm": 2.589474678039551, "learning_rate": 8.980045415838774e-05, "loss": 0.026707589626312256, "step": 35950 }, { "epoch": 10.207209764405336, "grad_norm": 4.069021224975586, "learning_rate": 8.979761566846439e-05, "loss": 0.03705121576786041, "step": 35960 }, { "epoch": 10.210048254328697, "grad_norm": 0.24393557012081146, "learning_rate": 8.979477717854102e-05, "loss": 0.03254885673522949, "step": 35970 }, { "epoch": 10.212886744252058, "grad_norm": 1.2594830989837646, "learning_rate": 8.979193868861766e-05, "loss": 0.03080446124076843, "step": 35980 }, { "epoch": 10.21572523417542, "grad_norm": 8.93659782409668, "learning_rate": 8.97891001986943e-05, "loss": 0.02370862364768982, "step": 35990 }, { "epoch": 10.218563724098779, "grad_norm": 3.832294464111328, "learning_rate": 8.978626170877093e-05, "loss": 0.017827652394771576, "step": 36000 }, { "epoch": 10.218563724098779, "eval_accuracy": 0.957779614675399, "eval_loss": 0.132136270403862, "eval_runtime": 37.7629, "eval_samples_per_second": 416.467, "eval_steps_per_second": 6.514, "step": 36000 }, { "epoch": 10.22140221402214, "grad_norm": 8.465381622314453, "learning_rate": 8.978342321884757e-05, "loss": 0.038775470852851865, "step": 36010 }, { "epoch": 10.224240703945501, "grad_norm": 4.375872611999512, "learning_rate": 8.978058472892423e-05, "loss": 0.036762386560440063, "step": 36020 }, { "epoch": 10.227079193868862, "grad_norm": 9.900911331176758, "learning_rate": 8.977774623900085e-05, "loss": 0.03091752529144287, "step": 36030 }, { "epoch": 10.229917683792223, "grad_norm": 4.054807186126709, "learning_rate": 8.97749077490775e-05, "loss": 0.029804617166519165, "step": 36040 }, { "epoch": 10.232756173715583, "grad_norm": 0.9119287133216858, "learning_rate": 8.977206925915414e-05, "loss": 0.025563180446624756, "step": 36050 }, { "epoch": 10.235594663638944, "grad_norm": 6.995174884796143, "learning_rate": 8.976923076923078e-05, "loss": 0.04624827802181244, "step": 36060 }, { "epoch": 10.238433153562305, "grad_norm": 12.61793041229248, "learning_rate": 8.97663922793074e-05, "loss": 0.02561209499835968, "step": 36070 }, { "epoch": 10.241271643485666, "grad_norm": 3.6618480682373047, "learning_rate": 8.976355378938405e-05, "loss": 0.03663640320301056, "step": 36080 }, { "epoch": 10.244110133409027, "grad_norm": 2.6220040321350098, "learning_rate": 8.976071529946069e-05, "loss": 0.03488240838050842, "step": 36090 }, { "epoch": 10.246948623332386, "grad_norm": 6.596401214599609, "learning_rate": 8.975787680953733e-05, "loss": 0.04570221304893494, "step": 36100 }, { "epoch": 10.249787113255747, "grad_norm": 14.18932056427002, "learning_rate": 8.975503831961397e-05, "loss": 0.05138771533966065, "step": 36110 }, { "epoch": 10.252625603179109, "grad_norm": 8.373106002807617, "learning_rate": 8.975219982969061e-05, "loss": 0.03060210645198822, "step": 36120 }, { "epoch": 10.25546409310247, "grad_norm": 7.532520771026611, "learning_rate": 8.974936133976724e-05, "loss": 0.050930947065353394, "step": 36130 }, { "epoch": 10.25830258302583, "grad_norm": 6.857524871826172, "learning_rate": 8.974652284984388e-05, "loss": 0.034497812390327454, "step": 36140 }, { "epoch": 10.261141072949192, "grad_norm": 8.227092742919922, "learning_rate": 8.974368435992052e-05, "loss": 0.02581099569797516, "step": 36150 }, { "epoch": 10.263979562872551, "grad_norm": 5.448975086212158, "learning_rate": 8.974084586999717e-05, "loss": 0.037546226382255556, "step": 36160 }, { "epoch": 10.266818052795912, "grad_norm": 9.228538513183594, "learning_rate": 8.97380073800738e-05, "loss": 0.028178223967552186, "step": 36170 }, { "epoch": 10.269656542719273, "grad_norm": 10.540557861328125, "learning_rate": 8.973516889015045e-05, "loss": 0.05025039911270142, "step": 36180 }, { "epoch": 10.272495032642635, "grad_norm": 9.857961654663086, "learning_rate": 8.973233040022708e-05, "loss": 0.036443567276000975, "step": 36190 }, { "epoch": 10.275333522565996, "grad_norm": 2.3980181217193604, "learning_rate": 8.972949191030372e-05, "loss": 0.03443137109279633, "step": 36200 }, { "epoch": 10.278172012489355, "grad_norm": 6.983287334442139, "learning_rate": 8.972665342038036e-05, "loss": 0.022737984359264374, "step": 36210 }, { "epoch": 10.281010502412716, "grad_norm": 4.939234733581543, "learning_rate": 8.9723814930457e-05, "loss": 0.035260212421417234, "step": 36220 }, { "epoch": 10.283848992336077, "grad_norm": 9.760107040405273, "learning_rate": 8.972097644053364e-05, "loss": 0.04070761799812317, "step": 36230 }, { "epoch": 10.286687482259438, "grad_norm": 2.460111618041992, "learning_rate": 8.971813795061028e-05, "loss": 0.027867740392684935, "step": 36240 }, { "epoch": 10.2895259721828, "grad_norm": 9.781012535095215, "learning_rate": 8.971529946068692e-05, "loss": 0.029616537690162658, "step": 36250 }, { "epoch": 10.292364462106159, "grad_norm": 0.9655983448028564, "learning_rate": 8.971246097076355e-05, "loss": 0.030763041973114014, "step": 36260 }, { "epoch": 10.29520295202952, "grad_norm": 2.457181930541992, "learning_rate": 8.97096224808402e-05, "loss": 0.039024433493614195, "step": 36270 }, { "epoch": 10.298041441952881, "grad_norm": 3.4686529636383057, "learning_rate": 8.970678399091683e-05, "loss": 0.01536424309015274, "step": 36280 }, { "epoch": 10.300879931876242, "grad_norm": 0.4543377757072449, "learning_rate": 8.970394550099348e-05, "loss": 0.042859780788421634, "step": 36290 }, { "epoch": 10.303718421799603, "grad_norm": 11.570600509643555, "learning_rate": 8.970110701107012e-05, "loss": 0.028550463914871215, "step": 36300 }, { "epoch": 10.306556911722964, "grad_norm": 1.4489885568618774, "learning_rate": 8.969826852114676e-05, "loss": 0.08185790181159973, "step": 36310 }, { "epoch": 10.309395401646324, "grad_norm": 8.538164138793945, "learning_rate": 8.969543003122339e-05, "loss": 0.02990005016326904, "step": 36320 }, { "epoch": 10.312233891569685, "grad_norm": 3.6925809383392334, "learning_rate": 8.969259154130003e-05, "loss": 0.024834844470024108, "step": 36330 }, { "epoch": 10.315072381493046, "grad_norm": 3.9602081775665283, "learning_rate": 8.968975305137667e-05, "loss": 0.016393564641475677, "step": 36340 }, { "epoch": 10.317910871416407, "grad_norm": 5.625429153442383, "learning_rate": 8.968691456145331e-05, "loss": 0.043322625756263736, "step": 36350 }, { "epoch": 10.320749361339768, "grad_norm": 8.384807586669922, "learning_rate": 8.968407607152995e-05, "loss": 0.04883542954921723, "step": 36360 }, { "epoch": 10.323587851263127, "grad_norm": 6.630516052246094, "learning_rate": 8.96812375816066e-05, "loss": 0.02265990823507309, "step": 36370 }, { "epoch": 10.326426341186489, "grad_norm": 0.9711533784866333, "learning_rate": 8.967839909168323e-05, "loss": 0.03165134489536285, "step": 36380 }, { "epoch": 10.32926483110985, "grad_norm": 0.77884441614151, "learning_rate": 8.967556060175986e-05, "loss": 0.03928411602973938, "step": 36390 }, { "epoch": 10.33210332103321, "grad_norm": 3.6236801147460938, "learning_rate": 8.96727221118365e-05, "loss": 0.031386172771453856, "step": 36400 }, { "epoch": 10.334941810956572, "grad_norm": 3.32375168800354, "learning_rate": 8.966988362191315e-05, "loss": 0.03435225486755371, "step": 36410 }, { "epoch": 10.337780300879931, "grad_norm": 12.93134593963623, "learning_rate": 8.966704513198979e-05, "loss": 0.048977863788604734, "step": 36420 }, { "epoch": 10.340618790803292, "grad_norm": 11.424259185791016, "learning_rate": 8.966420664206643e-05, "loss": 0.030730390548706056, "step": 36430 }, { "epoch": 10.343457280726653, "grad_norm": 4.101449012756348, "learning_rate": 8.966136815214307e-05, "loss": 0.037335431575775145, "step": 36440 }, { "epoch": 10.346295770650014, "grad_norm": 10.820184707641602, "learning_rate": 8.96585296622197e-05, "loss": 0.0793159544467926, "step": 36450 }, { "epoch": 10.349134260573376, "grad_norm": 11.433136940002441, "learning_rate": 8.965569117229634e-05, "loss": 0.054524165391921994, "step": 36460 }, { "epoch": 10.351972750496735, "grad_norm": 3.697908401489258, "learning_rate": 8.965285268237298e-05, "loss": 0.029478156566619874, "step": 36470 }, { "epoch": 10.354811240420096, "grad_norm": 1.2975342273712158, "learning_rate": 8.965001419244962e-05, "loss": 0.043016102910041806, "step": 36480 }, { "epoch": 10.357649730343457, "grad_norm": 5.513762950897217, "learning_rate": 8.964717570252626e-05, "loss": 0.025089555978775026, "step": 36490 }, { "epoch": 10.360488220266818, "grad_norm": 2.189058780670166, "learning_rate": 8.96443372126029e-05, "loss": 0.05551120638847351, "step": 36500 }, { "epoch": 10.360488220266818, "eval_accuracy": 0.9619126343231386, "eval_loss": 0.11916238814592361, "eval_runtime": 34.1412, "eval_samples_per_second": 460.646, "eval_steps_per_second": 7.205, "step": 36500 }, { "epoch": 10.36332671019018, "grad_norm": 1.9355348348617554, "learning_rate": 8.964149872267955e-05, "loss": 0.03180611729621887, "step": 36510 }, { "epoch": 10.366165200113539, "grad_norm": 4.5752854347229, "learning_rate": 8.963866023275617e-05, "loss": 0.03767521977424622, "step": 36520 }, { "epoch": 10.3690036900369, "grad_norm": 7.519384860992432, "learning_rate": 8.963582174283281e-05, "loss": 0.034062325954437256, "step": 36530 }, { "epoch": 10.371842179960261, "grad_norm": 6.742743492126465, "learning_rate": 8.963298325290946e-05, "loss": 0.02505907416343689, "step": 36540 }, { "epoch": 10.374680669883622, "grad_norm": 5.050496578216553, "learning_rate": 8.963014476298608e-05, "loss": 0.02737344801425934, "step": 36550 }, { "epoch": 10.377519159806983, "grad_norm": 8.849011421203613, "learning_rate": 8.962730627306274e-05, "loss": 0.028218457102775575, "step": 36560 }, { "epoch": 10.380357649730344, "grad_norm": 8.680930137634277, "learning_rate": 8.962446778313938e-05, "loss": 0.040789878368377684, "step": 36570 }, { "epoch": 10.383196139653704, "grad_norm": 12.74616813659668, "learning_rate": 8.962162929321601e-05, "loss": 0.04644421935081482, "step": 36580 }, { "epoch": 10.386034629577065, "grad_norm": 10.74756145477295, "learning_rate": 8.961879080329265e-05, "loss": 0.04794375002384186, "step": 36590 }, { "epoch": 10.388873119500426, "grad_norm": 9.5908203125, "learning_rate": 8.961595231336929e-05, "loss": 0.040392139554023744, "step": 36600 }, { "epoch": 10.391711609423787, "grad_norm": 0.7493981719017029, "learning_rate": 8.961311382344593e-05, "loss": 0.03460003137588501, "step": 36610 }, { "epoch": 10.394550099347148, "grad_norm": 7.8514227867126465, "learning_rate": 8.961027533352257e-05, "loss": 0.01938253939151764, "step": 36620 }, { "epoch": 10.397388589270507, "grad_norm": 5.107845306396484, "learning_rate": 8.960743684359922e-05, "loss": 0.025116553902626036, "step": 36630 }, { "epoch": 10.400227079193868, "grad_norm": 0.30939024686813354, "learning_rate": 8.960459835367586e-05, "loss": 0.0547336220741272, "step": 36640 }, { "epoch": 10.40306556911723, "grad_norm": 1.5137938261032104, "learning_rate": 8.960175986375248e-05, "loss": 0.028294265270233154, "step": 36650 }, { "epoch": 10.40590405904059, "grad_norm": 7.480425834655762, "learning_rate": 8.959892137382913e-05, "loss": 0.045936211943626404, "step": 36660 }, { "epoch": 10.408742548963952, "grad_norm": 0.7122107744216919, "learning_rate": 8.959608288390577e-05, "loss": 0.03044787049293518, "step": 36670 }, { "epoch": 10.411581038887311, "grad_norm": 6.348662376403809, "learning_rate": 8.95932443939824e-05, "loss": 0.03552055358886719, "step": 36680 }, { "epoch": 10.414419528810672, "grad_norm": 1.8868720531463623, "learning_rate": 8.959040590405905e-05, "loss": 0.02734040915966034, "step": 36690 }, { "epoch": 10.417258018734033, "grad_norm": 4.4465861320495605, "learning_rate": 8.958756741413569e-05, "loss": 0.0271445631980896, "step": 36700 }, { "epoch": 10.420096508657394, "grad_norm": 1.1745775938034058, "learning_rate": 8.958472892421232e-05, "loss": 0.025948095321655273, "step": 36710 }, { "epoch": 10.422934998580756, "grad_norm": 3.7686893939971924, "learning_rate": 8.958189043428896e-05, "loss": 0.04117902815341949, "step": 36720 }, { "epoch": 10.425773488504117, "grad_norm": 5.71990442276001, "learning_rate": 8.95790519443656e-05, "loss": 0.029452455043792725, "step": 36730 }, { "epoch": 10.428611978427476, "grad_norm": 15.257085800170898, "learning_rate": 8.957621345444224e-05, "loss": 0.04664063155651092, "step": 36740 }, { "epoch": 10.431450468350837, "grad_norm": 5.793542861938477, "learning_rate": 8.957337496451888e-05, "loss": 0.045833060145378114, "step": 36750 }, { "epoch": 10.434288958274198, "grad_norm": 4.972627639770508, "learning_rate": 8.957053647459553e-05, "loss": 0.04821635484695434, "step": 36760 }, { "epoch": 10.43712744819756, "grad_norm": 7.34898042678833, "learning_rate": 8.956769798467217e-05, "loss": 0.03028062582015991, "step": 36770 }, { "epoch": 10.43996593812092, "grad_norm": 1.595918893814087, "learning_rate": 8.95648594947488e-05, "loss": 0.021723525226116182, "step": 36780 }, { "epoch": 10.44280442804428, "grad_norm": 11.401412010192871, "learning_rate": 8.956202100482544e-05, "loss": 0.037694025039672854, "step": 36790 }, { "epoch": 10.44564291796764, "grad_norm": 10.21303939819336, "learning_rate": 8.955918251490208e-05, "loss": 0.0879721462726593, "step": 36800 }, { "epoch": 10.448481407891002, "grad_norm": 0.9249193072319031, "learning_rate": 8.95563440249787e-05, "loss": 0.04869570434093475, "step": 36810 }, { "epoch": 10.451319897814363, "grad_norm": 8.879692077636719, "learning_rate": 8.955350553505536e-05, "loss": 0.03243623971939087, "step": 36820 }, { "epoch": 10.454158387737724, "grad_norm": 1.5014649629592896, "learning_rate": 8.9550667045132e-05, "loss": 0.03798250257968903, "step": 36830 }, { "epoch": 10.456996877661084, "grad_norm": 0.6087681651115417, "learning_rate": 8.954782855520863e-05, "loss": 0.022580021619796754, "step": 36840 }, { "epoch": 10.459835367584445, "grad_norm": 16.626922607421875, "learning_rate": 8.954499006528527e-05, "loss": 0.036199328303337094, "step": 36850 }, { "epoch": 10.462673857507806, "grad_norm": 4.7802815437316895, "learning_rate": 8.954215157536191e-05, "loss": 0.04271156787872314, "step": 36860 }, { "epoch": 10.465512347431167, "grad_norm": 9.64822769165039, "learning_rate": 8.953931308543855e-05, "loss": 0.051044100522994997, "step": 36870 }, { "epoch": 10.468350837354528, "grad_norm": 7.6476054191589355, "learning_rate": 8.953647459551518e-05, "loss": 0.028209307789802553, "step": 36880 }, { "epoch": 10.471189327277887, "grad_norm": 3.2450921535491943, "learning_rate": 8.953363610559184e-05, "loss": 0.037076184153556825, "step": 36890 }, { "epoch": 10.474027817201248, "grad_norm": 6.099301338195801, "learning_rate": 8.953079761566848e-05, "loss": 0.0527698814868927, "step": 36900 }, { "epoch": 10.47686630712461, "grad_norm": 3.0445823669433594, "learning_rate": 8.95279591257451e-05, "loss": 0.04613305926322937, "step": 36910 }, { "epoch": 10.47970479704797, "grad_norm": 2.3866021633148193, "learning_rate": 8.952512063582175e-05, "loss": 0.031197765469551088, "step": 36920 }, { "epoch": 10.482543286971332, "grad_norm": 0.7550484538078308, "learning_rate": 8.952228214589839e-05, "loss": 0.023020866513252258, "step": 36930 }, { "epoch": 10.485381776894693, "grad_norm": 9.42840576171875, "learning_rate": 8.951944365597502e-05, "loss": 0.03542722463607788, "step": 36940 }, { "epoch": 10.488220266818052, "grad_norm": 3.3144822120666504, "learning_rate": 8.951660516605167e-05, "loss": 0.026921114325523375, "step": 36950 }, { "epoch": 10.491058756741413, "grad_norm": 1.1103624105453491, "learning_rate": 8.951376667612831e-05, "loss": 0.03855997025966644, "step": 36960 }, { "epoch": 10.493897246664774, "grad_norm": 0.46380260586738586, "learning_rate": 8.951092818620494e-05, "loss": 0.021382735669612886, "step": 36970 }, { "epoch": 10.496735736588136, "grad_norm": 3.6674861907958984, "learning_rate": 8.950808969628158e-05, "loss": 0.0354071706533432, "step": 36980 }, { "epoch": 10.499574226511497, "grad_norm": 1.2760733366012573, "learning_rate": 8.950525120635822e-05, "loss": 0.057169800996780394, "step": 36990 }, { "epoch": 10.502412716434856, "grad_norm": 4.18191385269165, "learning_rate": 8.950241271643486e-05, "loss": 0.03613161742687225, "step": 37000 }, { "epoch": 10.502412716434856, "eval_accuracy": 0.9546003687925224, "eval_loss": 0.14071452617645264, "eval_runtime": 51.4092, "eval_samples_per_second": 305.918, "eval_steps_per_second": 4.785, "step": 37000 }, { "epoch": 10.505251206358217, "grad_norm": 5.251542568206787, "learning_rate": 8.949957422651149e-05, "loss": 0.0352025032043457, "step": 37010 }, { "epoch": 10.508089696281578, "grad_norm": 8.934554100036621, "learning_rate": 8.949673573658815e-05, "loss": 0.04203507900238037, "step": 37020 }, { "epoch": 10.51092818620494, "grad_norm": 4.07866907119751, "learning_rate": 8.949389724666478e-05, "loss": 0.06852113008499146, "step": 37030 }, { "epoch": 10.5137666761283, "grad_norm": 0.8728171586990356, "learning_rate": 8.949105875674142e-05, "loss": 0.0360987663269043, "step": 37040 }, { "epoch": 10.51660516605166, "grad_norm": 5.975709915161133, "learning_rate": 8.948822026681806e-05, "loss": 0.05259674787521362, "step": 37050 }, { "epoch": 10.51944365597502, "grad_norm": 6.39314603805542, "learning_rate": 8.94853817768947e-05, "loss": 0.07525975108146668, "step": 37060 }, { "epoch": 10.522282145898382, "grad_norm": 1.5322110652923584, "learning_rate": 8.948254328697133e-05, "loss": 0.04704233407974243, "step": 37070 }, { "epoch": 10.525120635821743, "grad_norm": 3.32806134223938, "learning_rate": 8.947970479704797e-05, "loss": 0.026919493079185487, "step": 37080 }, { "epoch": 10.527959125745104, "grad_norm": 11.569388389587402, "learning_rate": 8.947686630712462e-05, "loss": 0.049755418300628663, "step": 37090 }, { "epoch": 10.530797615668465, "grad_norm": 12.883689880371094, "learning_rate": 8.947402781720125e-05, "loss": 0.04373905658721924, "step": 37100 }, { "epoch": 10.533636105591825, "grad_norm": 0.8018831610679626, "learning_rate": 8.947118932727789e-05, "loss": 0.025817561149597167, "step": 37110 }, { "epoch": 10.536474595515186, "grad_norm": 2.7872400283813477, "learning_rate": 8.946835083735453e-05, "loss": 0.02614658176898956, "step": 37120 }, { "epoch": 10.539313085438547, "grad_norm": 7.6963090896606445, "learning_rate": 8.946551234743116e-05, "loss": 0.03521303832530975, "step": 37130 }, { "epoch": 10.542151575361908, "grad_norm": 7.840782165527344, "learning_rate": 8.94626738575078e-05, "loss": 0.021747377514839173, "step": 37140 }, { "epoch": 10.544990065285269, "grad_norm": 1.799548625946045, "learning_rate": 8.945983536758446e-05, "loss": 0.02789437770843506, "step": 37150 }, { "epoch": 10.547828555208628, "grad_norm": 7.602907657623291, "learning_rate": 8.945699687766109e-05, "loss": 0.030423381924629213, "step": 37160 }, { "epoch": 10.55066704513199, "grad_norm": 1.8789817094802856, "learning_rate": 8.945415838773773e-05, "loss": 0.023447376489639283, "step": 37170 }, { "epoch": 10.55350553505535, "grad_norm": 4.333127021789551, "learning_rate": 8.945131989781437e-05, "loss": 0.01709727793931961, "step": 37180 }, { "epoch": 10.556344024978712, "grad_norm": 2.2025396823883057, "learning_rate": 8.944848140789101e-05, "loss": 0.013913652300834656, "step": 37190 }, { "epoch": 10.559182514902073, "grad_norm": 4.77195930480957, "learning_rate": 8.944564291796764e-05, "loss": 0.0207273006439209, "step": 37200 }, { "epoch": 10.562021004825432, "grad_norm": 5.345693588256836, "learning_rate": 8.944280442804428e-05, "loss": 0.028708827495574952, "step": 37210 }, { "epoch": 10.564859494748793, "grad_norm": 6.64163875579834, "learning_rate": 8.943996593812093e-05, "loss": 0.05572202801704407, "step": 37220 }, { "epoch": 10.567697984672154, "grad_norm": 10.99537181854248, "learning_rate": 8.943712744819756e-05, "loss": 0.037175387144088745, "step": 37230 }, { "epoch": 10.570536474595515, "grad_norm": 4.54201078414917, "learning_rate": 8.94342889582742e-05, "loss": 0.04588257372379303, "step": 37240 }, { "epoch": 10.573374964518877, "grad_norm": 5.677923679351807, "learning_rate": 8.943145046835084e-05, "loss": 0.023086604475975037, "step": 37250 }, { "epoch": 10.576213454442236, "grad_norm": 8.178837776184082, "learning_rate": 8.942861197842747e-05, "loss": 0.03432207703590393, "step": 37260 }, { "epoch": 10.579051944365597, "grad_norm": 4.940983772277832, "learning_rate": 8.942577348850411e-05, "loss": 0.03260141611099243, "step": 37270 }, { "epoch": 10.581890434288958, "grad_norm": 6.754091739654541, "learning_rate": 8.942293499858076e-05, "loss": 0.025221428275108336, "step": 37280 }, { "epoch": 10.58472892421232, "grad_norm": 6.779361248016357, "learning_rate": 8.94200965086574e-05, "loss": 0.044029411673545835, "step": 37290 }, { "epoch": 10.58756741413568, "grad_norm": 8.544051170349121, "learning_rate": 8.941725801873404e-05, "loss": 0.038761427998542784, "step": 37300 }, { "epoch": 10.59040590405904, "grad_norm": 1.060537576675415, "learning_rate": 8.941441952881068e-05, "loss": 0.020170974731445312, "step": 37310 }, { "epoch": 10.5932443939824, "grad_norm": 1.999685287475586, "learning_rate": 8.941158103888732e-05, "loss": 0.03689191341400146, "step": 37320 }, { "epoch": 10.596082883905762, "grad_norm": 0.29641035199165344, "learning_rate": 8.940874254896395e-05, "loss": 0.038303548097610475, "step": 37330 }, { "epoch": 10.598921373829123, "grad_norm": 1.296662449836731, "learning_rate": 8.940590405904059e-05, "loss": 0.02575809061527252, "step": 37340 }, { "epoch": 10.601759863752484, "grad_norm": 6.5464935302734375, "learning_rate": 8.940306556911724e-05, "loss": 0.059257084131240846, "step": 37350 }, { "epoch": 10.604598353675845, "grad_norm": 10.929688453674316, "learning_rate": 8.940022707919387e-05, "loss": 0.04209223985671997, "step": 37360 }, { "epoch": 10.607436843599205, "grad_norm": 6.967700004577637, "learning_rate": 8.939738858927051e-05, "loss": 0.041620075702667236, "step": 37370 }, { "epoch": 10.610275333522566, "grad_norm": 7.264950752258301, "learning_rate": 8.939455009934716e-05, "loss": 0.052823996543884276, "step": 37380 }, { "epoch": 10.613113823445927, "grad_norm": 1.87778902053833, "learning_rate": 8.939171160942378e-05, "loss": 0.02123316526412964, "step": 37390 }, { "epoch": 10.615952313369288, "grad_norm": 1.6056469678878784, "learning_rate": 8.938887311950042e-05, "loss": 0.03137262165546417, "step": 37400 }, { "epoch": 10.618790803292649, "grad_norm": 1.0563147068023682, "learning_rate": 8.938603462957707e-05, "loss": 0.03821562826633453, "step": 37410 }, { "epoch": 10.621629293216008, "grad_norm": 8.246865272521973, "learning_rate": 8.938319613965371e-05, "loss": 0.04417011737823486, "step": 37420 }, { "epoch": 10.62446778313937, "grad_norm": 7.447282314300537, "learning_rate": 8.938035764973035e-05, "loss": 0.021993711590766907, "step": 37430 }, { "epoch": 10.62730627306273, "grad_norm": 1.5030627250671387, "learning_rate": 8.937751915980699e-05, "loss": 0.024685478210449217, "step": 37440 }, { "epoch": 10.630144762986092, "grad_norm": 8.781514167785645, "learning_rate": 8.937468066988363e-05, "loss": 0.05160062313079834, "step": 37450 }, { "epoch": 10.632983252909453, "grad_norm": 3.604510545730591, "learning_rate": 8.937184217996026e-05, "loss": 0.038846978545188905, "step": 37460 }, { "epoch": 10.635821742832812, "grad_norm": 7.8089165687561035, "learning_rate": 8.93690036900369e-05, "loss": 0.03664895296096802, "step": 37470 }, { "epoch": 10.638660232756173, "grad_norm": 15.019682884216309, "learning_rate": 8.936616520011354e-05, "loss": 0.03987056612968445, "step": 37480 }, { "epoch": 10.641498722679534, "grad_norm": 1.0049844980239868, "learning_rate": 8.936332671019018e-05, "loss": 0.016740013659000397, "step": 37490 }, { "epoch": 10.644337212602895, "grad_norm": 3.0634448528289795, "learning_rate": 8.936048822026682e-05, "loss": 0.018975165486335755, "step": 37500 }, { "epoch": 10.644337212602895, "eval_accuracy": 0.9649011254530425, "eval_loss": 0.1071079820394516, "eval_runtime": 32.2188, "eval_samples_per_second": 488.131, "eval_steps_per_second": 7.635, "step": 37500 }, { "epoch": 10.647175702526257, "grad_norm": 2.4725348949432373, "learning_rate": 8.935764973034347e-05, "loss": 0.023819857835769655, "step": 37510 }, { "epoch": 10.650014192449618, "grad_norm": 1.7682656049728394, "learning_rate": 8.93548112404201e-05, "loss": 0.034901416301727294, "step": 37520 }, { "epoch": 10.652852682372977, "grad_norm": 5.055690765380859, "learning_rate": 8.935197275049674e-05, "loss": 0.07113144397735596, "step": 37530 }, { "epoch": 10.655691172296338, "grad_norm": 8.540861129760742, "learning_rate": 8.934913426057338e-05, "loss": 0.05287214517593384, "step": 37540 }, { "epoch": 10.6585296622197, "grad_norm": 10.796398162841797, "learning_rate": 8.934629577065002e-05, "loss": 0.04504440426826477, "step": 37550 }, { "epoch": 10.66136815214306, "grad_norm": 7.684292316436768, "learning_rate": 8.934345728072666e-05, "loss": 0.05234728455543518, "step": 37560 }, { "epoch": 10.664206642066421, "grad_norm": 2.885939359664917, "learning_rate": 8.93406187908033e-05, "loss": 0.027982711791992188, "step": 37570 }, { "epoch": 10.66704513198978, "grad_norm": 3.7580056190490723, "learning_rate": 8.933778030087994e-05, "loss": 0.03282267153263092, "step": 37580 }, { "epoch": 10.669883621913142, "grad_norm": 2.2083277702331543, "learning_rate": 8.933494181095657e-05, "loss": 0.027709969878196718, "step": 37590 }, { "epoch": 10.672722111836503, "grad_norm": 10.420751571655273, "learning_rate": 8.933210332103321e-05, "loss": 0.035718250274658206, "step": 37600 }, { "epoch": 10.675560601759864, "grad_norm": 1.0509521961212158, "learning_rate": 8.932926483110985e-05, "loss": 0.02771328091621399, "step": 37610 }, { "epoch": 10.678399091683225, "grad_norm": 7.422127723693848, "learning_rate": 8.93264263411865e-05, "loss": 0.04015583097934723, "step": 37620 }, { "epoch": 10.681237581606585, "grad_norm": 2.4321682453155518, "learning_rate": 8.932358785126314e-05, "loss": 0.047475415468215945, "step": 37630 }, { "epoch": 10.684076071529946, "grad_norm": 5.741279602050781, "learning_rate": 8.932074936133978e-05, "loss": 0.03600336909294129, "step": 37640 }, { "epoch": 10.686914561453307, "grad_norm": 11.126138687133789, "learning_rate": 8.93179108714164e-05, "loss": 0.039527544379234315, "step": 37650 }, { "epoch": 10.689753051376668, "grad_norm": 1.6914265155792236, "learning_rate": 8.931507238149305e-05, "loss": 0.03682173192501068, "step": 37660 }, { "epoch": 10.692591541300029, "grad_norm": 4.5530290603637695, "learning_rate": 8.931223389156969e-05, "loss": 0.036605161428451535, "step": 37670 }, { "epoch": 10.695430031223388, "grad_norm": 0.8246129155158997, "learning_rate": 8.930939540164633e-05, "loss": 0.02399342507123947, "step": 37680 }, { "epoch": 10.69826852114675, "grad_norm": 9.791586875915527, "learning_rate": 8.930655691172297e-05, "loss": 0.03998745083808899, "step": 37690 }, { "epoch": 10.70110701107011, "grad_norm": 6.334713459014893, "learning_rate": 8.930371842179961e-05, "loss": 0.026400619745254518, "step": 37700 }, { "epoch": 10.703945500993472, "grad_norm": 5.627140045166016, "learning_rate": 8.930087993187625e-05, "loss": 0.03224782347679138, "step": 37710 }, { "epoch": 10.706783990916833, "grad_norm": 1.0647366046905518, "learning_rate": 8.929804144195288e-05, "loss": 0.03330181241035461, "step": 37720 }, { "epoch": 10.709622480840192, "grad_norm": 11.89478588104248, "learning_rate": 8.929520295202952e-05, "loss": 0.040495842695236206, "step": 37730 }, { "epoch": 10.712460970763553, "grad_norm": 9.617572784423828, "learning_rate": 8.929236446210616e-05, "loss": 0.042030069231987, "step": 37740 }, { "epoch": 10.715299460686914, "grad_norm": 6.855515003204346, "learning_rate": 8.92895259721828e-05, "loss": 0.03135378956794739, "step": 37750 }, { "epoch": 10.718137950610275, "grad_norm": 2.2968556880950928, "learning_rate": 8.928668748225945e-05, "loss": 0.03238527774810791, "step": 37760 }, { "epoch": 10.720976440533637, "grad_norm": 4.48383903503418, "learning_rate": 8.928384899233609e-05, "loss": 0.035945233702659604, "step": 37770 }, { "epoch": 10.723814930456998, "grad_norm": 12.412117958068848, "learning_rate": 8.928101050241272e-05, "loss": 0.05046968460083008, "step": 37780 }, { "epoch": 10.726653420380357, "grad_norm": 2.755648612976074, "learning_rate": 8.927817201248936e-05, "loss": 0.06066227555274963, "step": 37790 }, { "epoch": 10.729491910303718, "grad_norm": 10.982110977172852, "learning_rate": 8.9275333522566e-05, "loss": 0.04127570986747742, "step": 37800 }, { "epoch": 10.73233040022708, "grad_norm": 10.086679458618164, "learning_rate": 8.927249503264264e-05, "loss": 0.054268544912338255, "step": 37810 }, { "epoch": 10.73516889015044, "grad_norm": 6.908626079559326, "learning_rate": 8.926965654271928e-05, "loss": 0.028955334424972536, "step": 37820 }, { "epoch": 10.738007380073801, "grad_norm": 5.453639984130859, "learning_rate": 8.926681805279592e-05, "loss": 0.022694703936576844, "step": 37830 }, { "epoch": 10.74084586999716, "grad_norm": 4.088376045227051, "learning_rate": 8.926397956287256e-05, "loss": 0.029334139823913575, "step": 37840 }, { "epoch": 10.743684359920522, "grad_norm": 1.2237050533294678, "learning_rate": 8.926114107294919e-05, "loss": 0.03693574070930481, "step": 37850 }, { "epoch": 10.746522849843883, "grad_norm": 1.9880056381225586, "learning_rate": 8.925830258302583e-05, "loss": 0.02573996186256409, "step": 37860 }, { "epoch": 10.749361339767244, "grad_norm": 6.905924320220947, "learning_rate": 8.925546409310247e-05, "loss": 0.07864362597465516, "step": 37870 }, { "epoch": 10.752199829690605, "grad_norm": 2.781162738800049, "learning_rate": 8.92526256031791e-05, "loss": 0.030710291862487794, "step": 37880 }, { "epoch": 10.755038319613966, "grad_norm": 6.446109771728516, "learning_rate": 8.924978711325576e-05, "loss": 0.03187253475189209, "step": 37890 }, { "epoch": 10.757876809537326, "grad_norm": 7.017765998840332, "learning_rate": 8.92469486233324e-05, "loss": 0.042497122287750246, "step": 37900 }, { "epoch": 10.760715299460687, "grad_norm": 1.2335444688796997, "learning_rate": 8.924411013340903e-05, "loss": 0.028851160407066347, "step": 37910 }, { "epoch": 10.763553789384048, "grad_norm": 1.1124621629714966, "learning_rate": 8.924127164348567e-05, "loss": 0.020607858896255493, "step": 37920 }, { "epoch": 10.766392279307409, "grad_norm": 7.91394567489624, "learning_rate": 8.923843315356231e-05, "loss": 0.03323663473129272, "step": 37930 }, { "epoch": 10.76923076923077, "grad_norm": 5.575784206390381, "learning_rate": 8.923559466363895e-05, "loss": 0.03114174008369446, "step": 37940 }, { "epoch": 10.77206925915413, "grad_norm": 4.221701622009277, "learning_rate": 8.923275617371559e-05, "loss": 0.04173942804336548, "step": 37950 }, { "epoch": 10.77490774907749, "grad_norm": 0.8155259490013123, "learning_rate": 8.922991768379223e-05, "loss": 0.023526741564273833, "step": 37960 }, { "epoch": 10.777746239000852, "grad_norm": 2.1087076663970947, "learning_rate": 8.922707919386886e-05, "loss": 0.03755518198013306, "step": 37970 }, { "epoch": 10.780584728924213, "grad_norm": 4.077432155609131, "learning_rate": 8.92242407039455e-05, "loss": 0.04911110103130341, "step": 37980 }, { "epoch": 10.783423218847574, "grad_norm": 1.5936083793640137, "learning_rate": 8.922140221402214e-05, "loss": 0.03126734495162964, "step": 37990 }, { "epoch": 10.786261708770933, "grad_norm": 8.705074310302734, "learning_rate": 8.921856372409879e-05, "loss": 0.03999944925308228, "step": 38000 }, { "epoch": 10.786261708770933, "eval_accuracy": 0.9621033890761111, "eval_loss": 0.11464520543813705, "eval_runtime": 31.5044, "eval_samples_per_second": 499.2, "eval_steps_per_second": 7.808, "step": 38000 }, { "epoch": 10.789100198694294, "grad_norm": 1.2182165384292603, "learning_rate": 8.921572523417541e-05, "loss": 0.017525941133499146, "step": 38010 }, { "epoch": 10.791938688617655, "grad_norm": 6.986833095550537, "learning_rate": 8.921288674425207e-05, "loss": 0.04936402142047882, "step": 38020 }, { "epoch": 10.794777178541016, "grad_norm": 1.773641586303711, "learning_rate": 8.921004825432871e-05, "loss": 0.02297710031270981, "step": 38030 }, { "epoch": 10.797615668464378, "grad_norm": 12.100281715393066, "learning_rate": 8.920720976440534e-05, "loss": 0.05085898637771606, "step": 38040 }, { "epoch": 10.800454158387737, "grad_norm": 7.325253963470459, "learning_rate": 8.920437127448198e-05, "loss": 0.04772913753986359, "step": 38050 }, { "epoch": 10.803292648311098, "grad_norm": 4.612751483917236, "learning_rate": 8.920153278455862e-05, "loss": 0.04164789617061615, "step": 38060 }, { "epoch": 10.80613113823446, "grad_norm": 6.717906475067139, "learning_rate": 8.919869429463525e-05, "loss": 0.053933215141296384, "step": 38070 }, { "epoch": 10.80896962815782, "grad_norm": 0.48313406109809875, "learning_rate": 8.91958558047119e-05, "loss": 0.0362622082233429, "step": 38080 }, { "epoch": 10.811808118081181, "grad_norm": 1.9742509126663208, "learning_rate": 8.919301731478854e-05, "loss": 0.06081060767173767, "step": 38090 }, { "epoch": 10.81464660800454, "grad_norm": 8.570929527282715, "learning_rate": 8.919017882486517e-05, "loss": 0.03469367623329163, "step": 38100 }, { "epoch": 10.817485097927902, "grad_norm": 7.647538661956787, "learning_rate": 8.918734033494181e-05, "loss": 0.06106448173522949, "step": 38110 }, { "epoch": 10.820323587851263, "grad_norm": 0.6763517260551453, "learning_rate": 8.918450184501845e-05, "loss": 0.033539679646492, "step": 38120 }, { "epoch": 10.823162077774624, "grad_norm": 13.730766296386719, "learning_rate": 8.91816633550951e-05, "loss": 0.057130783796310425, "step": 38130 }, { "epoch": 10.826000567697985, "grad_norm": 3.916975498199463, "learning_rate": 8.917882486517172e-05, "loss": 0.037809810042381285, "step": 38140 }, { "epoch": 10.828839057621346, "grad_norm": 6.261385917663574, "learning_rate": 8.917598637524838e-05, "loss": 0.03282144963741303, "step": 38150 }, { "epoch": 10.831677547544706, "grad_norm": 1.9048666954040527, "learning_rate": 8.917314788532502e-05, "loss": 0.026766231656074523, "step": 38160 }, { "epoch": 10.834516037468067, "grad_norm": 5.762722492218018, "learning_rate": 8.917030939540165e-05, "loss": 0.032744666934013365, "step": 38170 }, { "epoch": 10.837354527391428, "grad_norm": 9.21733283996582, "learning_rate": 8.916747090547829e-05, "loss": 0.034161853790283206, "step": 38180 }, { "epoch": 10.840193017314789, "grad_norm": 11.061073303222656, "learning_rate": 8.916463241555493e-05, "loss": 0.06247431039810181, "step": 38190 }, { "epoch": 10.84303150723815, "grad_norm": 10.529635429382324, "learning_rate": 8.916179392563156e-05, "loss": 0.046101820468902585, "step": 38200 }, { "epoch": 10.84586999716151, "grad_norm": 3.0253312587738037, "learning_rate": 8.91589554357082e-05, "loss": 0.019727879762649538, "step": 38210 }, { "epoch": 10.84870848708487, "grad_norm": 1.196020483970642, "learning_rate": 8.915611694578485e-05, "loss": 0.018204784393310545, "step": 38220 }, { "epoch": 10.851546977008232, "grad_norm": 9.149394989013672, "learning_rate": 8.915327845586148e-05, "loss": 0.04209108054637909, "step": 38230 }, { "epoch": 10.854385466931593, "grad_norm": 9.837111473083496, "learning_rate": 8.915043996593812e-05, "loss": 0.05275066494941712, "step": 38240 }, { "epoch": 10.857223956854954, "grad_norm": 1.6451373100280762, "learning_rate": 8.914760147601477e-05, "loss": 0.044077742099761966, "step": 38250 }, { "epoch": 10.860062446778313, "grad_norm": 1.2419270277023315, "learning_rate": 8.91447629860914e-05, "loss": 0.042144501209259035, "step": 38260 }, { "epoch": 10.862900936701674, "grad_norm": 11.143692016601562, "learning_rate": 8.914192449616803e-05, "loss": 0.04765017628669739, "step": 38270 }, { "epoch": 10.865739426625035, "grad_norm": 3.6061408519744873, "learning_rate": 8.913908600624469e-05, "loss": 0.029816728830337525, "step": 38280 }, { "epoch": 10.868577916548396, "grad_norm": 13.095852851867676, "learning_rate": 8.913624751632133e-05, "loss": 0.04426215291023254, "step": 38290 }, { "epoch": 10.871416406471758, "grad_norm": 11.82460880279541, "learning_rate": 8.913340902639796e-05, "loss": 0.031658872961997986, "step": 38300 }, { "epoch": 10.874254896395119, "grad_norm": 2.053227424621582, "learning_rate": 8.91305705364746e-05, "loss": 0.033156946301460266, "step": 38310 }, { "epoch": 10.877093386318478, "grad_norm": 6.699620723724365, "learning_rate": 8.912773204655124e-05, "loss": 0.0250081866979599, "step": 38320 }, { "epoch": 10.879931876241839, "grad_norm": 0.4123322367668152, "learning_rate": 8.912489355662787e-05, "loss": 0.017115718126296996, "step": 38330 }, { "epoch": 10.8827703661652, "grad_norm": 8.199004173278809, "learning_rate": 8.912205506670451e-05, "loss": 0.044949352741241455, "step": 38340 }, { "epoch": 10.885608856088561, "grad_norm": 1.9813802242279053, "learning_rate": 8.911921657678117e-05, "loss": 0.019375362992286684, "step": 38350 }, { "epoch": 10.888447346011922, "grad_norm": 14.78364086151123, "learning_rate": 8.91163780868578e-05, "loss": 0.036697566509246826, "step": 38360 }, { "epoch": 10.891285835935282, "grad_norm": 7.931263446807861, "learning_rate": 8.911353959693443e-05, "loss": 0.04864569008350372, "step": 38370 }, { "epoch": 10.894124325858643, "grad_norm": 1.4149014949798584, "learning_rate": 8.911070110701108e-05, "loss": 0.02675849199295044, "step": 38380 }, { "epoch": 10.896962815782004, "grad_norm": 5.0884904861450195, "learning_rate": 8.910786261708772e-05, "loss": 0.050648736953735354, "step": 38390 }, { "epoch": 10.899801305705365, "grad_norm": 4.588504314422607, "learning_rate": 8.910502412716435e-05, "loss": 0.04709309935569763, "step": 38400 }, { "epoch": 10.902639795628726, "grad_norm": 8.526410102844238, "learning_rate": 8.910218563724099e-05, "loss": 0.05166694521903992, "step": 38410 }, { "epoch": 10.905478285552086, "grad_norm": 7.267217636108398, "learning_rate": 8.909934714731764e-05, "loss": 0.04555327296257019, "step": 38420 }, { "epoch": 10.908316775475447, "grad_norm": 14.773509979248047, "learning_rate": 8.909650865739427e-05, "loss": 0.03317382335662842, "step": 38430 }, { "epoch": 10.911155265398808, "grad_norm": 2.4505627155303955, "learning_rate": 8.909367016747091e-05, "loss": 0.02701529860496521, "step": 38440 }, { "epoch": 10.913993755322169, "grad_norm": 5.023645877838135, "learning_rate": 8.909083167754755e-05, "loss": 0.0648931622505188, "step": 38450 }, { "epoch": 10.91683224524553, "grad_norm": 7.942727565765381, "learning_rate": 8.908799318762418e-05, "loss": 0.041131964325904845, "step": 38460 }, { "epoch": 10.91967073516889, "grad_norm": 5.156609058380127, "learning_rate": 8.908515469770082e-05, "loss": 0.06224710941314697, "step": 38470 }, { "epoch": 10.92250922509225, "grad_norm": 1.7237799167633057, "learning_rate": 8.908231620777748e-05, "loss": 0.030417358875274657, "step": 38480 }, { "epoch": 10.925347715015612, "grad_norm": 4.250205993652344, "learning_rate": 8.90794777178541e-05, "loss": 0.024495308101177216, "step": 38490 }, { "epoch": 10.928186204938973, "grad_norm": 4.064820766448975, "learning_rate": 8.907663922793075e-05, "loss": 0.027593061327934265, "step": 38500 }, { "epoch": 10.928186204938973, "eval_accuracy": 0.9630571628409741, "eval_loss": 0.11405987292528152, "eval_runtime": 30.2843, "eval_samples_per_second": 519.312, "eval_steps_per_second": 8.123, "step": 38500 }, { "epoch": 10.931024694862334, "grad_norm": 18.101533889770508, "learning_rate": 8.907380073800739e-05, "loss": 0.04277865886688233, "step": 38510 }, { "epoch": 10.933863184785693, "grad_norm": 2.851407766342163, "learning_rate": 8.907096224808403e-05, "loss": 0.057536613941192624, "step": 38520 }, { "epoch": 10.936701674709054, "grad_norm": 14.606097221374512, "learning_rate": 8.906812375816066e-05, "loss": 0.0379413902759552, "step": 38530 }, { "epoch": 10.939540164632415, "grad_norm": 16.068464279174805, "learning_rate": 8.90652852682373e-05, "loss": 0.06725850105285644, "step": 38540 }, { "epoch": 10.942378654555776, "grad_norm": 0.4289030432701111, "learning_rate": 8.906244677831395e-05, "loss": 0.05079072117805481, "step": 38550 }, { "epoch": 10.945217144479138, "grad_norm": 2.516092300415039, "learning_rate": 8.905960828839058e-05, "loss": 0.032949498295783995, "step": 38560 }, { "epoch": 10.948055634402499, "grad_norm": 12.253993034362793, "learning_rate": 8.905676979846722e-05, "loss": 0.051097214221954346, "step": 38570 }, { "epoch": 10.950894124325858, "grad_norm": 3.66261625289917, "learning_rate": 8.905393130854386e-05, "loss": 0.026334962248802184, "step": 38580 }, { "epoch": 10.953732614249219, "grad_norm": 5.605380058288574, "learning_rate": 8.905109281862049e-05, "loss": 0.040577515959739685, "step": 38590 }, { "epoch": 10.95657110417258, "grad_norm": 8.400565147399902, "learning_rate": 8.904825432869713e-05, "loss": 0.04508386850357056, "step": 38600 }, { "epoch": 10.959409594095941, "grad_norm": 11.642035484313965, "learning_rate": 8.904541583877377e-05, "loss": 0.04478132128715515, "step": 38610 }, { "epoch": 10.962248084019302, "grad_norm": 2.424344301223755, "learning_rate": 8.904257734885041e-05, "loss": 0.05042240619659424, "step": 38620 }, { "epoch": 10.965086573942662, "grad_norm": 6.036945343017578, "learning_rate": 8.903973885892706e-05, "loss": 0.04562307894229889, "step": 38630 }, { "epoch": 10.967925063866023, "grad_norm": 7.987058639526367, "learning_rate": 8.90369003690037e-05, "loss": 0.0291719526052475, "step": 38640 }, { "epoch": 10.970763553789384, "grad_norm": 3.4314353466033936, "learning_rate": 8.903406187908034e-05, "loss": 0.02088502496480942, "step": 38650 }, { "epoch": 10.973602043712745, "grad_norm": 1.74433434009552, "learning_rate": 8.903122338915697e-05, "loss": 0.01907458305358887, "step": 38660 }, { "epoch": 10.976440533636106, "grad_norm": 7.244530200958252, "learning_rate": 8.902838489923361e-05, "loss": 0.025460636615753172, "step": 38670 }, { "epoch": 10.979279023559467, "grad_norm": 1.287689208984375, "learning_rate": 8.902554640931026e-05, "loss": 0.03360937535762787, "step": 38680 }, { "epoch": 10.982117513482827, "grad_norm": 2.462334632873535, "learning_rate": 8.902299176837923e-05, "loss": 0.05594839453697205, "step": 38690 }, { "epoch": 10.984956003406188, "grad_norm": 9.20850658416748, "learning_rate": 8.902015327845587e-05, "loss": 0.028849190473556517, "step": 38700 }, { "epoch": 10.987794493329549, "grad_norm": 11.062004089355469, "learning_rate": 8.90173147885325e-05, "loss": 0.04202141463756561, "step": 38710 }, { "epoch": 10.99063298325291, "grad_norm": 3.8623580932617188, "learning_rate": 8.901447629860914e-05, "loss": 0.03822709619998932, "step": 38720 }, { "epoch": 10.993471473176271, "grad_norm": 12.331330299377441, "learning_rate": 8.901163780868578e-05, "loss": 0.04451251327991486, "step": 38730 }, { "epoch": 10.99630996309963, "grad_norm": 7.228479862213135, "learning_rate": 8.900879931876242e-05, "loss": 0.02773474156856537, "step": 38740 }, { "epoch": 10.999148453022991, "grad_norm": 3.0428898334503174, "learning_rate": 8.900596082883906e-05, "loss": 0.0693784475326538, "step": 38750 }, { "epoch": 11.001986942946353, "grad_norm": 2.2097017765045166, "learning_rate": 8.90031223389157e-05, "loss": 0.03470912873744965, "step": 38760 }, { "epoch": 11.004825432869714, "grad_norm": 7.723085880279541, "learning_rate": 8.900028384899233e-05, "loss": 0.02200377732515335, "step": 38770 }, { "epoch": 11.007663922793075, "grad_norm": 0.4707808792591095, "learning_rate": 8.899744535906897e-05, "loss": 0.03043572008609772, "step": 38780 }, { "epoch": 11.010502412716434, "grad_norm": 7.572404384613037, "learning_rate": 8.899460686914562e-05, "loss": 0.03136305510997772, "step": 38790 }, { "epoch": 11.013340902639795, "grad_norm": 1.4077868461608887, "learning_rate": 8.899176837922226e-05, "loss": 0.028094103932380675, "step": 38800 }, { "epoch": 11.016179392563156, "grad_norm": 9.686713218688965, "learning_rate": 8.89889298892989e-05, "loss": 0.029424840211868288, "step": 38810 }, { "epoch": 11.019017882486517, "grad_norm": 8.799880027770996, "learning_rate": 8.898609139937554e-05, "loss": 0.028245812654495238, "step": 38820 }, { "epoch": 11.021856372409879, "grad_norm": 1.7623929977416992, "learning_rate": 8.898325290945218e-05, "loss": 0.025759488344192505, "step": 38830 }, { "epoch": 11.024694862333238, "grad_norm": 5.2739386558532715, "learning_rate": 8.898041441952881e-05, "loss": 0.0242251992225647, "step": 38840 }, { "epoch": 11.027533352256599, "grad_norm": 3.8827316761016846, "learning_rate": 8.897757592960545e-05, "loss": 0.016704072058200837, "step": 38850 }, { "epoch": 11.03037184217996, "grad_norm": 4.5929484367370605, "learning_rate": 8.897473743968209e-05, "loss": 0.027492165565490723, "step": 38860 }, { "epoch": 11.033210332103321, "grad_norm": 2.1540887355804443, "learning_rate": 8.897189894975873e-05, "loss": 0.018764948844909667, "step": 38870 }, { "epoch": 11.036048822026682, "grad_norm": 8.193089485168457, "learning_rate": 8.896906045983538e-05, "loss": 0.02850118577480316, "step": 38880 }, { "epoch": 11.038887311950042, "grad_norm": 1.5470677614212036, "learning_rate": 8.896622196991202e-05, "loss": 0.02278001755475998, "step": 38890 }, { "epoch": 11.041725801873403, "grad_norm": 1.3612987995147705, "learning_rate": 8.896338347998864e-05, "loss": 0.008407558500766753, "step": 38900 }, { "epoch": 11.044564291796764, "grad_norm": 0.38969746232032776, "learning_rate": 8.896054499006529e-05, "loss": 0.01471705138683319, "step": 38910 }, { "epoch": 11.047402781720125, "grad_norm": 7.996700763702393, "learning_rate": 8.895770650014193e-05, "loss": 0.026653307676315307, "step": 38920 }, { "epoch": 11.050241271643486, "grad_norm": 8.743193626403809, "learning_rate": 8.895486801021857e-05, "loss": 0.017049789428710938, "step": 38930 }, { "epoch": 11.053079761566847, "grad_norm": 1.2593278884887695, "learning_rate": 8.895202952029521e-05, "loss": 0.009352418780326843, "step": 38940 }, { "epoch": 11.055918251490207, "grad_norm": 0.42129644751548767, "learning_rate": 8.894919103037185e-05, "loss": 0.016130171716213226, "step": 38950 }, { "epoch": 11.058756741413568, "grad_norm": 4.462993144989014, "learning_rate": 8.894635254044848e-05, "loss": 0.02544133961200714, "step": 38960 }, { "epoch": 11.061595231336929, "grad_norm": 1.3098411560058594, "learning_rate": 8.894351405052512e-05, "loss": 0.03292212188243866, "step": 38970 }, { "epoch": 11.06443372126029, "grad_norm": 1.3841769695281982, "learning_rate": 8.894067556060176e-05, "loss": 0.040628674626350406, "step": 38980 }, { "epoch": 11.067272211183651, "grad_norm": 3.9778859615325928, "learning_rate": 8.89378370706784e-05, "loss": 0.022025658190250395, "step": 38990 }, { "epoch": 11.07011070110701, "grad_norm": 14.36907958984375, "learning_rate": 8.893499858075504e-05, "loss": 0.04690743088722229, "step": 39000 }, { "epoch": 11.07011070110701, "eval_accuracy": 0.9554269727220703, "eval_loss": 0.14753197133541107, "eval_runtime": 31.8448, "eval_samples_per_second": 493.864, "eval_steps_per_second": 7.725, "step": 39000 }, { "epoch": 11.072949191030371, "grad_norm": 5.067372798919678, "learning_rate": 8.893216009083169e-05, "loss": 0.0173447847366333, "step": 39010 }, { "epoch": 11.075787680953733, "grad_norm": 3.6549625396728516, "learning_rate": 8.892932160090833e-05, "loss": 0.030686336755752563, "step": 39020 }, { "epoch": 11.078626170877094, "grad_norm": 2.621394157409668, "learning_rate": 8.892648311098496e-05, "loss": 0.04843833744525909, "step": 39030 }, { "epoch": 11.081464660800455, "grad_norm": 2.774726390838623, "learning_rate": 8.89236446210616e-05, "loss": 0.0424127846956253, "step": 39040 }, { "epoch": 11.084303150723814, "grad_norm": 4.249625205993652, "learning_rate": 8.892080613113824e-05, "loss": 0.03879550099372864, "step": 39050 }, { "epoch": 11.087141640647175, "grad_norm": 9.766534805297852, "learning_rate": 8.891796764121488e-05, "loss": 0.03313254415988922, "step": 39060 }, { "epoch": 11.089980130570536, "grad_norm": 1.481711745262146, "learning_rate": 8.891512915129152e-05, "loss": 0.03681328892707825, "step": 39070 }, { "epoch": 11.092818620493897, "grad_norm": 1.492143988609314, "learning_rate": 8.891229066136816e-05, "loss": 0.028679880499839782, "step": 39080 }, { "epoch": 11.095657110417259, "grad_norm": 10.449912071228027, "learning_rate": 8.890945217144479e-05, "loss": 0.03651578426361084, "step": 39090 }, { "epoch": 11.09849560034062, "grad_norm": 4.380954742431641, "learning_rate": 8.890661368152143e-05, "loss": 0.03942598700523377, "step": 39100 }, { "epoch": 11.101334090263979, "grad_norm": 1.4623929262161255, "learning_rate": 8.890377519159807e-05, "loss": 0.03354589939117432, "step": 39110 }, { "epoch": 11.10417258018734, "grad_norm": 3.657809257507324, "learning_rate": 8.890093670167471e-05, "loss": 0.042666229605674746, "step": 39120 }, { "epoch": 11.107011070110701, "grad_norm": 2.546175241470337, "learning_rate": 8.889809821175136e-05, "loss": 0.006722083687782288, "step": 39130 }, { "epoch": 11.109849560034062, "grad_norm": 0.7179209589958191, "learning_rate": 8.8895259721828e-05, "loss": 0.018499201536178587, "step": 39140 }, { "epoch": 11.112688049957423, "grad_norm": 18.417972564697266, "learning_rate": 8.889242123190464e-05, "loss": 0.032778704166412355, "step": 39150 }, { "epoch": 11.115526539880783, "grad_norm": 1.8714704513549805, "learning_rate": 8.888958274198127e-05, "loss": 0.020731404423713684, "step": 39160 }, { "epoch": 11.118365029804144, "grad_norm": 1.6303812265396118, "learning_rate": 8.888674425205791e-05, "loss": 0.030582723021507264, "step": 39170 }, { "epoch": 11.121203519727505, "grad_norm": 4.312580108642578, "learning_rate": 8.888390576213455e-05, "loss": 0.02371092289686203, "step": 39180 }, { "epoch": 11.124042009650866, "grad_norm": 5.294402599334717, "learning_rate": 8.888106727221118e-05, "loss": 0.03949802219867706, "step": 39190 }, { "epoch": 11.126880499574227, "grad_norm": 7.942931652069092, "learning_rate": 8.887822878228783e-05, "loss": 0.020351789891719818, "step": 39200 }, { "epoch": 11.129718989497587, "grad_norm": 1.3961246013641357, "learning_rate": 8.887539029236447e-05, "loss": 0.0203403577208519, "step": 39210 }, { "epoch": 11.132557479420948, "grad_norm": 5.94133186340332, "learning_rate": 8.88725518024411e-05, "loss": 0.03165285587310791, "step": 39220 }, { "epoch": 11.135395969344309, "grad_norm": 2.1853883266448975, "learning_rate": 8.886971331251774e-05, "loss": 0.03554928600788117, "step": 39230 }, { "epoch": 11.13823445926767, "grad_norm": 4.435094356536865, "learning_rate": 8.886687482259438e-05, "loss": 0.038537368178367615, "step": 39240 }, { "epoch": 11.141072949191031, "grad_norm": 0.4409341812133789, "learning_rate": 8.886403633267102e-05, "loss": 0.03152329027652741, "step": 39250 }, { "epoch": 11.14391143911439, "grad_norm": 1.186608910560608, "learning_rate": 8.886119784274767e-05, "loss": 0.023421669006347658, "step": 39260 }, { "epoch": 11.146749929037751, "grad_norm": 2.2760772705078125, "learning_rate": 8.885835935282431e-05, "loss": 0.022679845988750457, "step": 39270 }, { "epoch": 11.149588418961113, "grad_norm": 1.5064448118209839, "learning_rate": 8.885552086290095e-05, "loss": 0.02623879909515381, "step": 39280 }, { "epoch": 11.152426908884474, "grad_norm": 7.033249378204346, "learning_rate": 8.885268237297758e-05, "loss": 0.018123775720596313, "step": 39290 }, { "epoch": 11.155265398807835, "grad_norm": 6.1274733543396, "learning_rate": 8.884984388305422e-05, "loss": 0.022872525453567504, "step": 39300 }, { "epoch": 11.158103888731196, "grad_norm": 5.119394779205322, "learning_rate": 8.884700539313086e-05, "loss": 0.03518642783164978, "step": 39310 }, { "epoch": 11.160942378654555, "grad_norm": 16.30815315246582, "learning_rate": 8.884416690320749e-05, "loss": 0.023786953091621398, "step": 39320 }, { "epoch": 11.163780868577916, "grad_norm": 5.943182945251465, "learning_rate": 8.884132841328414e-05, "loss": 0.035018494725227355, "step": 39330 }, { "epoch": 11.166619358501277, "grad_norm": 3.9170877933502197, "learning_rate": 8.883848992336078e-05, "loss": 0.019326207041740418, "step": 39340 }, { "epoch": 11.169457848424639, "grad_norm": 7.83811616897583, "learning_rate": 8.883565143343741e-05, "loss": 0.049465835094451904, "step": 39350 }, { "epoch": 11.172296338348, "grad_norm": 3.784555435180664, "learning_rate": 8.883281294351405e-05, "loss": 0.031791973114013675, "step": 39360 }, { "epoch": 11.175134828271359, "grad_norm": 0.9043042063713074, "learning_rate": 8.88299744535907e-05, "loss": 0.019685475528240202, "step": 39370 }, { "epoch": 11.17797331819472, "grad_norm": 11.525797843933105, "learning_rate": 8.882713596366734e-05, "loss": 0.03947213292121887, "step": 39380 }, { "epoch": 11.180811808118081, "grad_norm": 0.8580959439277649, "learning_rate": 8.882429747374396e-05, "loss": 0.017877738177776336, "step": 39390 }, { "epoch": 11.183650298041442, "grad_norm": 7.73206090927124, "learning_rate": 8.882145898382062e-05, "loss": 0.052462238073349, "step": 39400 }, { "epoch": 11.186488787964803, "grad_norm": 1.8366074562072754, "learning_rate": 8.881862049389726e-05, "loss": 0.023527280986309053, "step": 39410 }, { "epoch": 11.189327277888163, "grad_norm": 0.7135634422302246, "learning_rate": 8.881578200397389e-05, "loss": 0.02762320637702942, "step": 39420 }, { "epoch": 11.192165767811524, "grad_norm": 4.903450965881348, "learning_rate": 8.881294351405053e-05, "loss": 0.033694005012512206, "step": 39430 }, { "epoch": 11.195004257734885, "grad_norm": 10.481388092041016, "learning_rate": 8.881010502412717e-05, "loss": 0.03647883534431458, "step": 39440 }, { "epoch": 11.197842747658246, "grad_norm": 2.2009382247924805, "learning_rate": 8.88072665342038e-05, "loss": 0.024297285079956054, "step": 39450 }, { "epoch": 11.200681237581607, "grad_norm": 5.509559154510498, "learning_rate": 8.880442804428045e-05, "loss": 0.016848967969417573, "step": 39460 }, { "epoch": 11.203519727504966, "grad_norm": 6.155548572540283, "learning_rate": 8.88015895543571e-05, "loss": 0.0333940327167511, "step": 39470 }, { "epoch": 11.206358217428328, "grad_norm": 5.80022668838501, "learning_rate": 8.879875106443372e-05, "loss": 0.028766345977783204, "step": 39480 }, { "epoch": 11.209196707351689, "grad_norm": 6.008015155792236, "learning_rate": 8.879591257451036e-05, "loss": 0.035169941186904904, "step": 39490 }, { "epoch": 11.21203519727505, "grad_norm": 8.324665069580078, "learning_rate": 8.8793074084587e-05, "loss": 0.030526870489120485, "step": 39500 }, { "epoch": 11.21203519727505, "eval_accuracy": 0.9630571628409741, "eval_loss": 0.12279607355594635, "eval_runtime": 31.9936, "eval_samples_per_second": 491.567, "eval_steps_per_second": 7.689, "step": 39500 }, { "epoch": 11.214873687198411, "grad_norm": 9.116996765136719, "learning_rate": 8.879023559466365e-05, "loss": 0.027555736899375915, "step": 39510 }, { "epoch": 11.217712177121772, "grad_norm": 5.994480609893799, "learning_rate": 8.878739710474027e-05, "loss": 0.03822968006134033, "step": 39520 }, { "epoch": 11.220550667045131, "grad_norm": 15.645219802856445, "learning_rate": 8.878455861481693e-05, "loss": 0.03583841621875763, "step": 39530 }, { "epoch": 11.223389156968492, "grad_norm": 2.4872725009918213, "learning_rate": 8.878172012489357e-05, "loss": 0.012383271753787995, "step": 39540 }, { "epoch": 11.226227646891854, "grad_norm": 1.2645692825317383, "learning_rate": 8.87788816349702e-05, "loss": 0.012924863398075104, "step": 39550 }, { "epoch": 11.229066136815215, "grad_norm": 7.703936576843262, "learning_rate": 8.877604314504684e-05, "loss": 0.034849250316619874, "step": 39560 }, { "epoch": 11.231904626738576, "grad_norm": 0.49828335642814636, "learning_rate": 8.877320465512348e-05, "loss": 0.018112237751483917, "step": 39570 }, { "epoch": 11.234743116661935, "grad_norm": 1.1897289752960205, "learning_rate": 8.877036616520011e-05, "loss": 0.029366666078567506, "step": 39580 }, { "epoch": 11.237581606585296, "grad_norm": 9.8939208984375, "learning_rate": 8.876752767527675e-05, "loss": 0.036095821857452394, "step": 39590 }, { "epoch": 11.240420096508657, "grad_norm": 17.731651306152344, "learning_rate": 8.87646891853534e-05, "loss": 0.0261627197265625, "step": 39600 }, { "epoch": 11.243258586432018, "grad_norm": 5.095267295837402, "learning_rate": 8.876185069543003e-05, "loss": 0.030870786309242247, "step": 39610 }, { "epoch": 11.24609707635538, "grad_norm": 6.781708240509033, "learning_rate": 8.875901220550667e-05, "loss": 0.04247032999992371, "step": 39620 }, { "epoch": 11.248935566278739, "grad_norm": 0.8735564351081848, "learning_rate": 8.875617371558332e-05, "loss": 0.019803762435913086, "step": 39630 }, { "epoch": 11.2517740562021, "grad_norm": 6.214321613311768, "learning_rate": 8.875333522565996e-05, "loss": 0.030209606885910033, "step": 39640 }, { "epoch": 11.254612546125461, "grad_norm": 6.896655082702637, "learning_rate": 8.875049673573658e-05, "loss": 0.023516342043876648, "step": 39650 }, { "epoch": 11.257451036048822, "grad_norm": 3.5537405014038086, "learning_rate": 8.874765824581324e-05, "loss": 0.03575185537338257, "step": 39660 }, { "epoch": 11.260289525972183, "grad_norm": 5.25169563293457, "learning_rate": 8.874481975588987e-05, "loss": 0.03764635026454925, "step": 39670 }, { "epoch": 11.263128015895543, "grad_norm": 9.011347770690918, "learning_rate": 8.874198126596651e-05, "loss": 0.023697122931480408, "step": 39680 }, { "epoch": 11.265966505818904, "grad_norm": 2.5412769317626953, "learning_rate": 8.873914277604315e-05, "loss": 0.014484362304210662, "step": 39690 }, { "epoch": 11.268804995742265, "grad_norm": 4.986505508422852, "learning_rate": 8.873630428611979e-05, "loss": 0.036568295955657956, "step": 39700 }, { "epoch": 11.271643485665626, "grad_norm": 0.23767372965812683, "learning_rate": 8.873346579619642e-05, "loss": 0.05489225387573242, "step": 39710 }, { "epoch": 11.274481975588987, "grad_norm": 4.534111976623535, "learning_rate": 8.873062730627306e-05, "loss": 0.027939262986183166, "step": 39720 }, { "epoch": 11.277320465512348, "grad_norm": 1.6047158241271973, "learning_rate": 8.872778881634972e-05, "loss": 0.030003452301025392, "step": 39730 }, { "epoch": 11.280158955435708, "grad_norm": 0.5218567252159119, "learning_rate": 8.872495032642634e-05, "loss": 0.0216441735625267, "step": 39740 }, { "epoch": 11.282997445359069, "grad_norm": 0.3662576377391815, "learning_rate": 8.872211183650298e-05, "loss": 0.026713499426841737, "step": 39750 }, { "epoch": 11.28583593528243, "grad_norm": 1.2118333578109741, "learning_rate": 8.871927334657963e-05, "loss": 0.03197303712368012, "step": 39760 }, { "epoch": 11.288674425205791, "grad_norm": 8.141847610473633, "learning_rate": 8.871643485665625e-05, "loss": 0.014501930773258209, "step": 39770 }, { "epoch": 11.291512915129152, "grad_norm": 3.148850440979004, "learning_rate": 8.87135963667329e-05, "loss": 0.029431241750717162, "step": 39780 }, { "epoch": 11.294351405052511, "grad_norm": 2.0202181339263916, "learning_rate": 8.871075787680955e-05, "loss": 0.010896000266075134, "step": 39790 }, { "epoch": 11.297189894975872, "grad_norm": 3.4427218437194824, "learning_rate": 8.870791938688618e-05, "loss": 0.03131939768791199, "step": 39800 }, { "epoch": 11.300028384899234, "grad_norm": 6.210688591003418, "learning_rate": 8.870508089696282e-05, "loss": 0.0207840695977211, "step": 39810 }, { "epoch": 11.302866874822595, "grad_norm": 5.605317115783691, "learning_rate": 8.870224240703946e-05, "loss": 0.021994388103485106, "step": 39820 }, { "epoch": 11.305705364745956, "grad_norm": 0.8640890121459961, "learning_rate": 8.86994039171161e-05, "loss": 0.045507395267486574, "step": 39830 }, { "epoch": 11.308543854669315, "grad_norm": 2.032482862472534, "learning_rate": 8.869656542719273e-05, "loss": 0.01071685180068016, "step": 39840 }, { "epoch": 11.311382344592676, "grad_norm": 4.843313217163086, "learning_rate": 8.869372693726937e-05, "loss": 0.04811188280582428, "step": 39850 }, { "epoch": 11.314220834516037, "grad_norm": 6.128026485443115, "learning_rate": 8.869088844734603e-05, "loss": 0.021548596024513245, "step": 39860 }, { "epoch": 11.317059324439398, "grad_norm": 2.7255165576934814, "learning_rate": 8.868804995742265e-05, "loss": 0.029139980673789978, "step": 39870 }, { "epoch": 11.31989781436276, "grad_norm": 4.15390682220459, "learning_rate": 8.86852114674993e-05, "loss": 0.009365811944007874, "step": 39880 }, { "epoch": 11.32273630428612, "grad_norm": 2.1152701377868652, "learning_rate": 8.868237297757594e-05, "loss": 0.030615830421447755, "step": 39890 }, { "epoch": 11.32557479420948, "grad_norm": 8.597820281982422, "learning_rate": 8.867953448765256e-05, "loss": 0.05419871211051941, "step": 39900 }, { "epoch": 11.328413284132841, "grad_norm": 1.234606385231018, "learning_rate": 8.86766959977292e-05, "loss": 0.01996539533138275, "step": 39910 }, { "epoch": 11.331251774056202, "grad_norm": 8.688271522521973, "learning_rate": 8.867385750780585e-05, "loss": 0.02766798734664917, "step": 39920 }, { "epoch": 11.334090263979563, "grad_norm": 3.683116912841797, "learning_rate": 8.867101901788249e-05, "loss": 0.01774250268936157, "step": 39930 }, { "epoch": 11.336928753902924, "grad_norm": 0.33083269000053406, "learning_rate": 8.866818052795913e-05, "loss": 0.01831842362880707, "step": 39940 }, { "epoch": 11.339767243826284, "grad_norm": 1.493120789527893, "learning_rate": 8.866534203803577e-05, "loss": 0.03705315887928009, "step": 39950 }, { "epoch": 11.342605733749645, "grad_norm": 9.20624828338623, "learning_rate": 8.866250354811241e-05, "loss": 0.06696739196777343, "step": 39960 }, { "epoch": 11.345444223673006, "grad_norm": 7.348040580749512, "learning_rate": 8.865966505818904e-05, "loss": 0.03329605460166931, "step": 39970 }, { "epoch": 11.348282713596367, "grad_norm": 3.4091098308563232, "learning_rate": 8.865682656826568e-05, "loss": 0.02745537757873535, "step": 39980 }, { "epoch": 11.351121203519728, "grad_norm": 10.50974178314209, "learning_rate": 8.865398807834234e-05, "loss": 0.05802041292190552, "step": 39990 }, { "epoch": 11.353959693443088, "grad_norm": 5.95629358291626, "learning_rate": 8.865114958841897e-05, "loss": 0.06460375785827636, "step": 40000 }, { "epoch": 11.353959693443088, "eval_accuracy": 0.9622305589114262, "eval_loss": 0.11635582149028778, "eval_runtime": 31.8431, "eval_samples_per_second": 493.89, "eval_steps_per_second": 7.725, "step": 40000 }, { "epoch": 11.356798183366449, "grad_norm": 1.2505794763565063, "learning_rate": 8.86483110984956e-05, "loss": 0.041473639011383054, "step": 40010 }, { "epoch": 11.35963667328981, "grad_norm": 6.528449535369873, "learning_rate": 8.864547260857225e-05, "loss": 0.04505537748336792, "step": 40020 }, { "epoch": 11.36247516321317, "grad_norm": 0.951397180557251, "learning_rate": 8.864263411864888e-05, "loss": 0.028059163689613344, "step": 40030 }, { "epoch": 11.365313653136532, "grad_norm": 0.2549450397491455, "learning_rate": 8.863979562872552e-05, "loss": 0.01771523952484131, "step": 40040 }, { "epoch": 11.368152143059891, "grad_norm": 4.424861907958984, "learning_rate": 8.863695713880216e-05, "loss": 0.015327318012714386, "step": 40050 }, { "epoch": 11.370990632983252, "grad_norm": 0.29332679510116577, "learning_rate": 8.86341186488788e-05, "loss": 0.029218947887420653, "step": 40060 }, { "epoch": 11.373829122906614, "grad_norm": 9.845694541931152, "learning_rate": 8.863128015895544e-05, "loss": 0.044624742865562436, "step": 40070 }, { "epoch": 11.376667612829975, "grad_norm": 7.932278156280518, "learning_rate": 8.862844166903208e-05, "loss": 0.026274853944778444, "step": 40080 }, { "epoch": 11.379506102753336, "grad_norm": 3.4915387630462646, "learning_rate": 8.862560317910872e-05, "loss": 0.012902650237083434, "step": 40090 }, { "epoch": 11.382344592676697, "grad_norm": 4.919431686401367, "learning_rate": 8.862276468918535e-05, "loss": 0.03617072105407715, "step": 40100 }, { "epoch": 11.385183082600056, "grad_norm": 9.1959228515625, "learning_rate": 8.861992619926199e-05, "loss": 0.020394164323806762, "step": 40110 }, { "epoch": 11.388021572523417, "grad_norm": 1.5866360664367676, "learning_rate": 8.861708770933863e-05, "loss": 0.02173074632883072, "step": 40120 }, { "epoch": 11.390860062446778, "grad_norm": 2.541499614715576, "learning_rate": 8.861424921941528e-05, "loss": 0.02507142722606659, "step": 40130 }, { "epoch": 11.39369855237014, "grad_norm": 7.9580979347229, "learning_rate": 8.861141072949192e-05, "loss": 0.01562546193599701, "step": 40140 }, { "epoch": 11.3965370422935, "grad_norm": 3.581310749053955, "learning_rate": 8.860857223956856e-05, "loss": 0.03749869167804718, "step": 40150 }, { "epoch": 11.39937553221686, "grad_norm": 2.7866709232330322, "learning_rate": 8.860573374964519e-05, "loss": 0.024735856056213378, "step": 40160 }, { "epoch": 11.402214022140221, "grad_norm": 14.630095481872559, "learning_rate": 8.860289525972183e-05, "loss": 0.046184498071670535, "step": 40170 }, { "epoch": 11.405052512063582, "grad_norm": 2.3438704013824463, "learning_rate": 8.860005676979847e-05, "loss": 0.01235281229019165, "step": 40180 }, { "epoch": 11.407891001986943, "grad_norm": 4.9460673332214355, "learning_rate": 8.859721827987511e-05, "loss": 0.014024251699447631, "step": 40190 }, { "epoch": 11.410729491910304, "grad_norm": 5.611842632293701, "learning_rate": 8.859437978995175e-05, "loss": 0.03010309338569641, "step": 40200 }, { "epoch": 11.413567981833664, "grad_norm": 2.91290283203125, "learning_rate": 8.85915413000284e-05, "loss": 0.024345070123672485, "step": 40210 }, { "epoch": 11.416406471757025, "grad_norm": 5.192891597747803, "learning_rate": 8.858870281010503e-05, "loss": 0.036339738965034486, "step": 40220 }, { "epoch": 11.419244961680386, "grad_norm": 5.404321670532227, "learning_rate": 8.858586432018166e-05, "loss": 0.03595627844333649, "step": 40230 }, { "epoch": 11.422083451603747, "grad_norm": 11.217823028564453, "learning_rate": 8.85830258302583e-05, "loss": 0.056194382905960086, "step": 40240 }, { "epoch": 11.424921941527108, "grad_norm": 7.636361598968506, "learning_rate": 8.858018734033495e-05, "loss": 0.037133216857910156, "step": 40250 }, { "epoch": 11.427760431450467, "grad_norm": 2.1263089179992676, "learning_rate": 8.857734885041159e-05, "loss": 0.020869164168834685, "step": 40260 }, { "epoch": 11.430598921373829, "grad_norm": 5.278923988342285, "learning_rate": 8.857451036048823e-05, "loss": 0.07499092817306519, "step": 40270 }, { "epoch": 11.43343741129719, "grad_norm": 8.755975723266602, "learning_rate": 8.857167187056487e-05, "loss": 0.014681947231292725, "step": 40280 }, { "epoch": 11.43627590122055, "grad_norm": 10.842698097229004, "learning_rate": 8.85688333806415e-05, "loss": 0.028595563769340516, "step": 40290 }, { "epoch": 11.439114391143912, "grad_norm": 7.552204608917236, "learning_rate": 8.856599489071814e-05, "loss": 0.02669263184070587, "step": 40300 }, { "epoch": 11.441952881067273, "grad_norm": 10.478028297424316, "learning_rate": 8.856315640079478e-05, "loss": 0.028721576929092406, "step": 40310 }, { "epoch": 11.444791370990632, "grad_norm": 1.6675152778625488, "learning_rate": 8.856031791087142e-05, "loss": 0.03753227591514587, "step": 40320 }, { "epoch": 11.447629860913993, "grad_norm": 8.175237655639648, "learning_rate": 8.855747942094806e-05, "loss": 0.03433559834957123, "step": 40330 }, { "epoch": 11.450468350837355, "grad_norm": 8.04157829284668, "learning_rate": 8.85546409310247e-05, "loss": 0.018507812917232514, "step": 40340 }, { "epoch": 11.453306840760716, "grad_norm": 7.27994966506958, "learning_rate": 8.855180244110135e-05, "loss": 0.040708786249160765, "step": 40350 }, { "epoch": 11.456145330684077, "grad_norm": 6.82072114944458, "learning_rate": 8.854896395117797e-05, "loss": 0.027734047174453734, "step": 40360 }, { "epoch": 11.458983820607436, "grad_norm": 0.42188936471939087, "learning_rate": 8.854612546125461e-05, "loss": 0.023866862058639526, "step": 40370 }, { "epoch": 11.461822310530797, "grad_norm": 10.239867210388184, "learning_rate": 8.854328697133126e-05, "loss": 0.03642951846122742, "step": 40380 }, { "epoch": 11.464660800454158, "grad_norm": 2.486072540283203, "learning_rate": 8.85404484814079e-05, "loss": 0.022917644679546358, "step": 40390 }, { "epoch": 11.46749929037752, "grad_norm": 14.883289337158203, "learning_rate": 8.853760999148454e-05, "loss": 0.05133838653564453, "step": 40400 }, { "epoch": 11.47033778030088, "grad_norm": 8.009035110473633, "learning_rate": 8.853477150156118e-05, "loss": 0.024933964014053345, "step": 40410 }, { "epoch": 11.47317627022424, "grad_norm": 1.4410299062728882, "learning_rate": 8.853193301163781e-05, "loss": 0.01889401376247406, "step": 40420 }, { "epoch": 11.476014760147601, "grad_norm": 7.447512149810791, "learning_rate": 8.852909452171445e-05, "loss": 0.054185223579406736, "step": 40430 }, { "epoch": 11.478853250070962, "grad_norm": 5.287721157073975, "learning_rate": 8.852625603179109e-05, "loss": 0.022848661243915557, "step": 40440 }, { "epoch": 11.481691739994323, "grad_norm": 5.844450950622559, "learning_rate": 8.852341754186773e-05, "loss": 0.017083717882633208, "step": 40450 }, { "epoch": 11.484530229917684, "grad_norm": 6.168613910675049, "learning_rate": 8.852057905194437e-05, "loss": 0.014239861071109772, "step": 40460 }, { "epoch": 11.487368719841044, "grad_norm": 0.7186439037322998, "learning_rate": 8.851774056202101e-05, "loss": 0.019264154136180878, "step": 40470 }, { "epoch": 11.490207209764405, "grad_norm": 4.704965114593506, "learning_rate": 8.851490207209766e-05, "loss": 0.026592737436294554, "step": 40480 }, { "epoch": 11.493045699687766, "grad_norm": 6.312370300292969, "learning_rate": 8.851206358217428e-05, "loss": 0.037139898538589476, "step": 40490 }, { "epoch": 11.495884189611127, "grad_norm": 0.369100421667099, "learning_rate": 8.850922509225093e-05, "loss": 0.030301761627197266, "step": 40500 }, { "epoch": 11.495884189611127, "eval_accuracy": 0.9613403700642208, "eval_loss": 0.1241578683257103, "eval_runtime": 32.0451, "eval_samples_per_second": 490.777, "eval_steps_per_second": 7.677, "step": 40500 }, { "epoch": 11.498722679534488, "grad_norm": 10.135141372680664, "learning_rate": 8.850638660232757e-05, "loss": 0.03910406231880188, "step": 40510 }, { "epoch": 11.50156116945785, "grad_norm": 1.814608097076416, "learning_rate": 8.85035481124042e-05, "loss": 0.03906750082969666, "step": 40520 }, { "epoch": 11.504399659381209, "grad_norm": 4.299266815185547, "learning_rate": 8.850070962248085e-05, "loss": 0.04703432023525238, "step": 40530 }, { "epoch": 11.50723814930457, "grad_norm": 2.0551843643188477, "learning_rate": 8.849787113255749e-05, "loss": 0.03883887529373169, "step": 40540 }, { "epoch": 11.51007663922793, "grad_norm": 8.810362815856934, "learning_rate": 8.849503264263412e-05, "loss": 0.03246828317642212, "step": 40550 }, { "epoch": 11.512915129151292, "grad_norm": 1.4277547597885132, "learning_rate": 8.849219415271076e-05, "loss": 0.02113957703113556, "step": 40560 }, { "epoch": 11.515753619074653, "grad_norm": 0.6941397190093994, "learning_rate": 8.84893556627874e-05, "loss": 0.021539199352264404, "step": 40570 }, { "epoch": 11.518592108998012, "grad_norm": 0.8388046026229858, "learning_rate": 8.848651717286404e-05, "loss": 0.04475291669368744, "step": 40580 }, { "epoch": 11.521430598921373, "grad_norm": 1.1015982627868652, "learning_rate": 8.848367868294068e-05, "loss": 0.021100239455699922, "step": 40590 }, { "epoch": 11.524269088844735, "grad_norm": 9.749149322509766, "learning_rate": 8.848084019301733e-05, "loss": 0.046405309438705446, "step": 40600 }, { "epoch": 11.527107578768096, "grad_norm": 3.3514533042907715, "learning_rate": 8.847800170309395e-05, "loss": 0.025201088190078734, "step": 40610 }, { "epoch": 11.529946068691457, "grad_norm": 12.265706062316895, "learning_rate": 8.84751632131706e-05, "loss": 0.023030999302864074, "step": 40620 }, { "epoch": 11.532784558614816, "grad_norm": 2.6569418907165527, "learning_rate": 8.847232472324724e-05, "loss": 0.030603009462356567, "step": 40630 }, { "epoch": 11.535623048538177, "grad_norm": 9.103837966918945, "learning_rate": 8.846948623332388e-05, "loss": 0.02392662167549133, "step": 40640 }, { "epoch": 11.538461538461538, "grad_norm": 2.529771327972412, "learning_rate": 8.84666477434005e-05, "loss": 0.04204635620117188, "step": 40650 }, { "epoch": 11.5413000283849, "grad_norm": 8.974900245666504, "learning_rate": 8.846380925347716e-05, "loss": 0.048991036415100095, "step": 40660 }, { "epoch": 11.54413851830826, "grad_norm": 2.5045156478881836, "learning_rate": 8.84609707635538e-05, "loss": 0.031013768911361695, "step": 40670 }, { "epoch": 11.546977008231622, "grad_norm": 5.265456676483154, "learning_rate": 8.845813227363043e-05, "loss": 0.05899316668510437, "step": 40680 }, { "epoch": 11.549815498154981, "grad_norm": 1.0323060750961304, "learning_rate": 8.845529378370707e-05, "loss": 0.04292479455471039, "step": 40690 }, { "epoch": 11.552653988078342, "grad_norm": 11.450393676757812, "learning_rate": 8.845245529378371e-05, "loss": 0.04937570691108704, "step": 40700 }, { "epoch": 11.555492478001703, "grad_norm": 1.7715065479278564, "learning_rate": 8.844961680386034e-05, "loss": 0.03219987750053406, "step": 40710 }, { "epoch": 11.558330967925064, "grad_norm": 5.308170795440674, "learning_rate": 8.844677831393698e-05, "loss": 0.041692990064620974, "step": 40720 }, { "epoch": 11.561169457848425, "grad_norm": 4.233156204223633, "learning_rate": 8.844393982401364e-05, "loss": 0.01638435274362564, "step": 40730 }, { "epoch": 11.564007947771785, "grad_norm": 7.357607364654541, "learning_rate": 8.844110133409026e-05, "loss": 0.021266745030879976, "step": 40740 }, { "epoch": 11.566846437695146, "grad_norm": 2.186805248260498, "learning_rate": 8.84382628441669e-05, "loss": 0.016843950748443602, "step": 40750 }, { "epoch": 11.569684927618507, "grad_norm": 4.167898654937744, "learning_rate": 8.843570820323587e-05, "loss": 0.04635430872440338, "step": 40760 }, { "epoch": 11.572523417541868, "grad_norm": 4.194733142852783, "learning_rate": 8.843286971331253e-05, "loss": 0.02512229382991791, "step": 40770 }, { "epoch": 11.57536190746523, "grad_norm": 2.4925312995910645, "learning_rate": 8.843003122338917e-05, "loss": 0.02663496732711792, "step": 40780 }, { "epoch": 11.578200397388589, "grad_norm": 1.6101758480072021, "learning_rate": 8.84271927334658e-05, "loss": 0.019581899046897888, "step": 40790 }, { "epoch": 11.58103888731195, "grad_norm": 10.088071823120117, "learning_rate": 8.842435424354244e-05, "loss": 0.03111754059791565, "step": 40800 }, { "epoch": 11.58387737723531, "grad_norm": 3.0669164657592773, "learning_rate": 8.842151575361908e-05, "loss": 0.02595497965812683, "step": 40810 }, { "epoch": 11.586715867158672, "grad_norm": 3.979440450668335, "learning_rate": 8.841867726369572e-05, "loss": 0.05192110538482666, "step": 40820 }, { "epoch": 11.589554357082033, "grad_norm": 1.7708674669265747, "learning_rate": 8.841583877377235e-05, "loss": 0.03658840358257294, "step": 40830 }, { "epoch": 11.592392847005392, "grad_norm": 7.226225852966309, "learning_rate": 8.8413000283849e-05, "loss": 0.029032802581787108, "step": 40840 }, { "epoch": 11.595231336928753, "grad_norm": 7.911994457244873, "learning_rate": 8.841016179392564e-05, "loss": 0.059162676334381104, "step": 40850 }, { "epoch": 11.598069826852115, "grad_norm": 6.379648208618164, "learning_rate": 8.840732330400227e-05, "loss": 0.04988559484481812, "step": 40860 }, { "epoch": 11.600908316775476, "grad_norm": 3.632519483566284, "learning_rate": 8.840448481407891e-05, "loss": 0.036395153403282164, "step": 40870 }, { "epoch": 11.603746806698837, "grad_norm": 1.6199476718902588, "learning_rate": 8.840164632415555e-05, "loss": 0.02620728313922882, "step": 40880 }, { "epoch": 11.606585296622196, "grad_norm": 9.043339729309082, "learning_rate": 8.839880783423218e-05, "loss": 0.039179551601409915, "step": 40890 }, { "epoch": 11.609423786545557, "grad_norm": 11.807465553283691, "learning_rate": 8.839596934430882e-05, "loss": 0.030200332403182983, "step": 40900 }, { "epoch": 11.612262276468918, "grad_norm": 2.4300224781036377, "learning_rate": 8.839313085438548e-05, "loss": 0.05301461815834045, "step": 40910 }, { "epoch": 11.61510076639228, "grad_norm": 7.552546501159668, "learning_rate": 8.839029236446211e-05, "loss": 0.036712533235549925, "step": 40920 }, { "epoch": 11.61793925631564, "grad_norm": 7.367305278778076, "learning_rate": 8.838745387453875e-05, "loss": 0.0307466983795166, "step": 40930 }, { "epoch": 11.620777746239002, "grad_norm": 12.000767707824707, "learning_rate": 8.838461538461539e-05, "loss": 0.03546465635299682, "step": 40940 }, { "epoch": 11.623616236162361, "grad_norm": 1.0784496068954468, "learning_rate": 8.838177689469203e-05, "loss": 0.03375645577907562, "step": 40950 }, { "epoch": 11.626454726085722, "grad_norm": 9.805306434631348, "learning_rate": 8.837893840476866e-05, "loss": 0.032104334235191344, "step": 40960 }, { "epoch": 11.629293216009083, "grad_norm": 7.918580055236816, "learning_rate": 8.837609991484531e-05, "loss": 0.035383790731430054, "step": 40970 }, { "epoch": 11.632131705932444, "grad_norm": 5.261808395385742, "learning_rate": 8.837326142492196e-05, "loss": 0.034006571769714354, "step": 40980 }, { "epoch": 11.634970195855805, "grad_norm": 1.8661668300628662, "learning_rate": 8.837042293499858e-05, "loss": 0.03130645751953125, "step": 40990 }, { "epoch": 11.637808685779165, "grad_norm": 4.381826877593994, "learning_rate": 8.836758444507522e-05, "loss": 0.04224347472190857, "step": 41000 }, { "epoch": 11.637808685779165, "eval_accuracy": 0.9688433903478095, "eval_loss": 0.097162626683712, "eval_runtime": 37.0839, "eval_samples_per_second": 424.092, "eval_steps_per_second": 6.634, "step": 41000 }, { "epoch": 11.640647175702526, "grad_norm": 2.3687832355499268, "learning_rate": 8.836474595515187e-05, "loss": 0.027124693989753722, "step": 41010 }, { "epoch": 11.643485665625887, "grad_norm": 7.93141508102417, "learning_rate": 8.83619074652285e-05, "loss": 0.019278335571289062, "step": 41020 }, { "epoch": 11.646324155549248, "grad_norm": 2.5734801292419434, "learning_rate": 8.835906897530513e-05, "loss": 0.024307581782341003, "step": 41030 }, { "epoch": 11.64916264547261, "grad_norm": 8.955913543701172, "learning_rate": 8.835623048538179e-05, "loss": 0.01892831027507782, "step": 41040 }, { "epoch": 11.65200113539597, "grad_norm": 3.4015095233917236, "learning_rate": 8.835339199545842e-05, "loss": 0.041583961248397826, "step": 41050 }, { "epoch": 11.65483962531933, "grad_norm": 1.224227786064148, "learning_rate": 8.835055350553506e-05, "loss": 0.017339396476745605, "step": 41060 }, { "epoch": 11.65767811524269, "grad_norm": 1.6589232683181763, "learning_rate": 8.83477150156117e-05, "loss": 0.026095965504646303, "step": 41070 }, { "epoch": 11.660516605166052, "grad_norm": 0.8550671935081482, "learning_rate": 8.834487652568834e-05, "loss": 0.012421229481697082, "step": 41080 }, { "epoch": 11.663355095089413, "grad_norm": 2.6294307708740234, "learning_rate": 8.834203803576497e-05, "loss": 0.020475026965141297, "step": 41090 }, { "epoch": 11.666193585012774, "grad_norm": 1.9330569505691528, "learning_rate": 8.833919954584161e-05, "loss": 0.02867964208126068, "step": 41100 }, { "epoch": 11.669032074936133, "grad_norm": 5.60837984085083, "learning_rate": 8.833636105591827e-05, "loss": 0.024872277677059174, "step": 41110 }, { "epoch": 11.671870564859494, "grad_norm": 3.9636380672454834, "learning_rate": 8.83335225659949e-05, "loss": 0.02395997941493988, "step": 41120 }, { "epoch": 11.674709054782856, "grad_norm": 1.9494198560714722, "learning_rate": 8.833068407607154e-05, "loss": 0.03716174960136413, "step": 41130 }, { "epoch": 11.677547544706217, "grad_norm": 0.8108519315719604, "learning_rate": 8.832784558614818e-05, "loss": 0.026500940322875977, "step": 41140 }, { "epoch": 11.680386034629578, "grad_norm": 4.802393436431885, "learning_rate": 8.83250070962248e-05, "loss": 0.03278830647468567, "step": 41150 }, { "epoch": 11.683224524552937, "grad_norm": 6.819237232208252, "learning_rate": 8.832216860630145e-05, "loss": 0.08061438798904419, "step": 41160 }, { "epoch": 11.686063014476298, "grad_norm": 1.3939871788024902, "learning_rate": 8.83193301163781e-05, "loss": 0.043726733326911925, "step": 41170 }, { "epoch": 11.68890150439966, "grad_norm": 9.89509105682373, "learning_rate": 8.831649162645473e-05, "loss": 0.029691717028617857, "step": 41180 }, { "epoch": 11.69173999432302, "grad_norm": 6.857607841491699, "learning_rate": 8.831365313653137e-05, "loss": 0.026519042253494263, "step": 41190 }, { "epoch": 11.694578484246382, "grad_norm": 4.844376087188721, "learning_rate": 8.831081464660801e-05, "loss": 0.024811410903930665, "step": 41200 }, { "epoch": 11.697416974169741, "grad_norm": 2.606058120727539, "learning_rate": 8.830797615668465e-05, "loss": 0.022915032505989075, "step": 41210 }, { "epoch": 11.700255464093102, "grad_norm": 8.058161735534668, "learning_rate": 8.830513766676128e-05, "loss": 0.023417387902736665, "step": 41220 }, { "epoch": 11.703093954016463, "grad_norm": 2.498708486557007, "learning_rate": 8.830229917683792e-05, "loss": 0.0392946720123291, "step": 41230 }, { "epoch": 11.705932443939824, "grad_norm": 4.329317092895508, "learning_rate": 8.829946068691458e-05, "loss": 0.040831416845321655, "step": 41240 }, { "epoch": 11.708770933863185, "grad_norm": 10.397799491882324, "learning_rate": 8.82966221969912e-05, "loss": 0.029990768432617186, "step": 41250 }, { "epoch": 11.711609423786545, "grad_norm": 14.151484489440918, "learning_rate": 8.829378370706785e-05, "loss": 0.03947511911392212, "step": 41260 }, { "epoch": 11.714447913709906, "grad_norm": 0.8845330476760864, "learning_rate": 8.829094521714449e-05, "loss": 0.053212094306945804, "step": 41270 }, { "epoch": 11.717286403633267, "grad_norm": 9.0269193649292, "learning_rate": 8.828810672722112e-05, "loss": 0.047972026467323306, "step": 41280 }, { "epoch": 11.720124893556628, "grad_norm": 2.5496275424957275, "learning_rate": 8.828526823729776e-05, "loss": 0.026084449887275696, "step": 41290 }, { "epoch": 11.72296338347999, "grad_norm": 0.8083370327949524, "learning_rate": 8.82824297473744e-05, "loss": 0.019636918604373933, "step": 41300 }, { "epoch": 11.72580187340335, "grad_norm": 8.03819465637207, "learning_rate": 8.827959125745104e-05, "loss": 0.023698605597019196, "step": 41310 }, { "epoch": 11.72864036332671, "grad_norm": 6.46199893951416, "learning_rate": 8.827675276752768e-05, "loss": 0.029300504922866823, "step": 41320 }, { "epoch": 11.73147885325007, "grad_norm": 8.992026329040527, "learning_rate": 8.827391427760432e-05, "loss": 0.05887299180030823, "step": 41330 }, { "epoch": 11.734317343173432, "grad_norm": 5.539891242980957, "learning_rate": 8.827107578768096e-05, "loss": 0.023653163015842436, "step": 41340 }, { "epoch": 11.737155833096793, "grad_norm": 1.9565318822860718, "learning_rate": 8.826823729775759e-05, "loss": 0.030856144428253175, "step": 41350 }, { "epoch": 11.739994323020154, "grad_norm": 1.5161356925964355, "learning_rate": 8.826539880783423e-05, "loss": 0.02872598171234131, "step": 41360 }, { "epoch": 11.742832812943513, "grad_norm": 10.825072288513184, "learning_rate": 8.826256031791089e-05, "loss": 0.03338582217693329, "step": 41370 }, { "epoch": 11.745671302866874, "grad_norm": 4.893728256225586, "learning_rate": 8.825972182798752e-05, "loss": 0.030100810527801513, "step": 41380 }, { "epoch": 11.748509792790236, "grad_norm": 6.301290512084961, "learning_rate": 8.825688333806416e-05, "loss": 0.02457965612411499, "step": 41390 }, { "epoch": 11.751348282713597, "grad_norm": 10.12527084350586, "learning_rate": 8.82540448481408e-05, "loss": 0.04935266971588135, "step": 41400 }, { "epoch": 11.754186772636958, "grad_norm": 8.177797317504883, "learning_rate": 8.825120635821743e-05, "loss": 0.05039690732955933, "step": 41410 }, { "epoch": 11.757025262560317, "grad_norm": 10.492124557495117, "learning_rate": 8.824836786829407e-05, "loss": 0.038214632868766786, "step": 41420 }, { "epoch": 11.759863752483678, "grad_norm": 7.343987941741943, "learning_rate": 8.824552937837071e-05, "loss": 0.023773661255836485, "step": 41430 }, { "epoch": 11.76270224240704, "grad_norm": 3.834630012512207, "learning_rate": 8.824269088844735e-05, "loss": 0.02095180153846741, "step": 41440 }, { "epoch": 11.7655407323304, "grad_norm": 0.5895691514015198, "learning_rate": 8.823985239852399e-05, "loss": 0.042954367399215695, "step": 41450 }, { "epoch": 11.768379222253762, "grad_norm": 11.992205619812012, "learning_rate": 8.823701390860063e-05, "loss": 0.047915467619895936, "step": 41460 }, { "epoch": 11.771217712177123, "grad_norm": 6.431911945343018, "learning_rate": 8.823417541867727e-05, "loss": 0.036278370022773745, "step": 41470 }, { "epoch": 11.774056202100482, "grad_norm": 7.802657604217529, "learning_rate": 8.82313369287539e-05, "loss": 0.04262373447418213, "step": 41480 }, { "epoch": 11.776894692023843, "grad_norm": 3.186356544494629, "learning_rate": 8.822849843883054e-05, "loss": 0.024875055253505706, "step": 41490 }, { "epoch": 11.779733181947204, "grad_norm": 6.842459678649902, "learning_rate": 8.822565994890718e-05, "loss": 0.041757413744926454, "step": 41500 }, { "epoch": 11.779733181947204, "eval_accuracy": 0.9578431995930565, "eval_loss": 0.1347157061100006, "eval_runtime": 35.1482, "eval_samples_per_second": 447.449, "eval_steps_per_second": 6.999, "step": 41500 }, { "epoch": 11.782571671870565, "grad_norm": 3.708627700805664, "learning_rate": 8.822282145898383e-05, "loss": 0.052143990993499756, "step": 41510 }, { "epoch": 11.785410161793926, "grad_norm": 7.399838447570801, "learning_rate": 8.821998296906047e-05, "loss": 0.058158469200134275, "step": 41520 }, { "epoch": 11.788248651717286, "grad_norm": 7.937488555908203, "learning_rate": 8.821714447913711e-05, "loss": 0.022513017058372498, "step": 41530 }, { "epoch": 11.791087141640647, "grad_norm": 1.82308828830719, "learning_rate": 8.821430598921374e-05, "loss": 0.032624995708465575, "step": 41540 }, { "epoch": 11.793925631564008, "grad_norm": 8.620182037353516, "learning_rate": 8.821146749929038e-05, "loss": 0.03693493008613587, "step": 41550 }, { "epoch": 11.796764121487369, "grad_norm": 0.8000280261039734, "learning_rate": 8.820862900936702e-05, "loss": 0.03255566358566284, "step": 41560 }, { "epoch": 11.79960261141073, "grad_norm": 7.609930038452148, "learning_rate": 8.820579051944366e-05, "loss": 0.03445599973201752, "step": 41570 }, { "epoch": 11.80244110133409, "grad_norm": 3.979921579360962, "learning_rate": 8.82029520295203e-05, "loss": 0.024638848006725313, "step": 41580 }, { "epoch": 11.80527959125745, "grad_norm": 1.3498709201812744, "learning_rate": 8.820011353959694e-05, "loss": 0.031666037440299985, "step": 41590 }, { "epoch": 11.808118081180812, "grad_norm": 3.1144492626190186, "learning_rate": 8.819727504967357e-05, "loss": 0.01487901210784912, "step": 41600 }, { "epoch": 11.810956571104173, "grad_norm": 6.564595699310303, "learning_rate": 8.819443655975021e-05, "loss": 0.05321950316429138, "step": 41610 }, { "epoch": 11.813795061027534, "grad_norm": 7.904528617858887, "learning_rate": 8.819159806982685e-05, "loss": 0.03186890184879303, "step": 41620 }, { "epoch": 11.816633550950893, "grad_norm": 1.2771729230880737, "learning_rate": 8.81887595799035e-05, "loss": 0.03461967408657074, "step": 41630 }, { "epoch": 11.819472040874254, "grad_norm": 3.1777055263519287, "learning_rate": 8.818592108998014e-05, "loss": 0.07256621718406678, "step": 41640 }, { "epoch": 11.822310530797616, "grad_norm": 2.432345151901245, "learning_rate": 8.818308260005678e-05, "loss": 0.019209159910678862, "step": 41650 }, { "epoch": 11.825149020720977, "grad_norm": 0.7073733806610107, "learning_rate": 8.818024411013342e-05, "loss": 0.03271316289901734, "step": 41660 }, { "epoch": 11.827987510644338, "grad_norm": 10.385478019714355, "learning_rate": 8.817740562021005e-05, "loss": 0.05179072022438049, "step": 41670 }, { "epoch": 11.830826000567697, "grad_norm": 2.366898775100708, "learning_rate": 8.817456713028669e-05, "loss": 0.043417584896087644, "step": 41680 }, { "epoch": 11.833664490491058, "grad_norm": 2.9725353717803955, "learning_rate": 8.817172864036333e-05, "loss": 0.0275852233171463, "step": 41690 }, { "epoch": 11.83650298041442, "grad_norm": 9.290630340576172, "learning_rate": 8.816889015043997e-05, "loss": 0.02861097753047943, "step": 41700 }, { "epoch": 11.83934147033778, "grad_norm": 4.103811264038086, "learning_rate": 8.816605166051661e-05, "loss": 0.019336076080799104, "step": 41710 }, { "epoch": 11.842179960261142, "grad_norm": 4.568559646606445, "learning_rate": 8.816321317059325e-05, "loss": 0.02775556743144989, "step": 41720 }, { "epoch": 11.845018450184503, "grad_norm": 0.9745373725891113, "learning_rate": 8.816037468066988e-05, "loss": 0.02140621691942215, "step": 41730 }, { "epoch": 11.847856940107862, "grad_norm": 6.197048664093018, "learning_rate": 8.815753619074652e-05, "loss": 0.02854481339454651, "step": 41740 }, { "epoch": 11.850695430031223, "grad_norm": 7.217499256134033, "learning_rate": 8.815469770082316e-05, "loss": 0.02187860906124115, "step": 41750 }, { "epoch": 11.853533919954584, "grad_norm": 3.6465232372283936, "learning_rate": 8.81518592108998e-05, "loss": 0.030699360370635986, "step": 41760 }, { "epoch": 11.856372409877945, "grad_norm": 10.531685829162598, "learning_rate": 8.814902072097645e-05, "loss": 0.04450918734073639, "step": 41770 }, { "epoch": 11.859210899801306, "grad_norm": 5.017262935638428, "learning_rate": 8.814618223105309e-05, "loss": 0.050351059436798094, "step": 41780 }, { "epoch": 11.862049389724666, "grad_norm": 10.963714599609375, "learning_rate": 8.814334374112973e-05, "loss": 0.03181772232055664, "step": 41790 }, { "epoch": 11.864887879648027, "grad_norm": 3.1091811656951904, "learning_rate": 8.814050525120636e-05, "loss": 0.041989824175834654, "step": 41800 }, { "epoch": 11.867726369571388, "grad_norm": 5.644045352935791, "learning_rate": 8.8137666761283e-05, "loss": 0.049888473749160764, "step": 41810 }, { "epoch": 11.870564859494749, "grad_norm": 11.62216567993164, "learning_rate": 8.813482827135964e-05, "loss": 0.04146873652935028, "step": 41820 }, { "epoch": 11.87340334941811, "grad_norm": 6.151871681213379, "learning_rate": 8.813198978143627e-05, "loss": 0.053171205520629886, "step": 41830 }, { "epoch": 11.876241839341471, "grad_norm": 7.413686275482178, "learning_rate": 8.812915129151292e-05, "loss": 0.03103487491607666, "step": 41840 }, { "epoch": 11.87908032926483, "grad_norm": 1.174538254737854, "learning_rate": 8.812631280158956e-05, "loss": 0.023112374544143676, "step": 41850 }, { "epoch": 11.881918819188192, "grad_norm": 4.05333948135376, "learning_rate": 8.812347431166619e-05, "loss": 0.04017348289489746, "step": 41860 }, { "epoch": 11.884757309111553, "grad_norm": 5.522282123565674, "learning_rate": 8.812063582174283e-05, "loss": 0.04182519912719727, "step": 41870 }, { "epoch": 11.887595799034914, "grad_norm": 13.005685806274414, "learning_rate": 8.811779733181948e-05, "loss": 0.0512126088142395, "step": 41880 }, { "epoch": 11.890434288958275, "grad_norm": 2.329993963241577, "learning_rate": 8.811495884189612e-05, "loss": 0.027847102284431456, "step": 41890 }, { "epoch": 11.893272778881634, "grad_norm": 8.609347343444824, "learning_rate": 8.811212035197276e-05, "loss": 0.03611434698104858, "step": 41900 }, { "epoch": 11.896111268804995, "grad_norm": 13.378158569335938, "learning_rate": 8.81092818620494e-05, "loss": 0.0352544754743576, "step": 41910 }, { "epoch": 11.898949758728357, "grad_norm": 6.238336086273193, "learning_rate": 8.810644337212604e-05, "loss": 0.024858418107032775, "step": 41920 }, { "epoch": 11.901788248651718, "grad_norm": 3.0889861583709717, "learning_rate": 8.810360488220267e-05, "loss": 0.029022803902626036, "step": 41930 }, { "epoch": 11.904626738575079, "grad_norm": 0.8595874309539795, "learning_rate": 8.810076639227931e-05, "loss": 0.02722196578979492, "step": 41940 }, { "epoch": 11.907465228498438, "grad_norm": 0.4795660376548767, "learning_rate": 8.809792790235595e-05, "loss": 0.032339510321617124, "step": 41950 }, { "epoch": 11.9103037184218, "grad_norm": 3.503610610961914, "learning_rate": 8.809508941243258e-05, "loss": 0.03369966447353363, "step": 41960 }, { "epoch": 11.91314220834516, "grad_norm": 3.7947089672088623, "learning_rate": 8.809225092250923e-05, "loss": 0.03344893157482147, "step": 41970 }, { "epoch": 11.915980698268521, "grad_norm": 1.0733914375305176, "learning_rate": 8.808941243258588e-05, "loss": 0.028406840562820435, "step": 41980 }, { "epoch": 11.918819188191883, "grad_norm": 3.091614246368408, "learning_rate": 8.80865739426625e-05, "loss": 0.015290825068950653, "step": 41990 }, { "epoch": 11.921657678115242, "grad_norm": 3.575798749923706, "learning_rate": 8.808373545273914e-05, "loss": 0.01531381756067276, "step": 42000 }, { "epoch": 11.921657678115242, "eval_accuracy": 0.9625484834997139, "eval_loss": 0.11711904406547546, "eval_runtime": 31.3282, "eval_samples_per_second": 502.007, "eval_steps_per_second": 7.852, "step": 42000 }, { "epoch": 11.924496168038603, "grad_norm": 11.868136405944824, "learning_rate": 8.808089696281579e-05, "loss": 0.027386486530303955, "step": 42010 }, { "epoch": 11.927334657961964, "grad_norm": 9.438006401062012, "learning_rate": 8.807805847289243e-05, "loss": 0.04521586894989014, "step": 42020 }, { "epoch": 11.930173147885325, "grad_norm": 7.782376289367676, "learning_rate": 8.807521998296906e-05, "loss": 0.014146074652671814, "step": 42030 }, { "epoch": 11.933011637808686, "grad_norm": 7.315443515777588, "learning_rate": 8.807238149304571e-05, "loss": 0.024589455127716063, "step": 42040 }, { "epoch": 11.935850127732046, "grad_norm": 12.203489303588867, "learning_rate": 8.806954300312235e-05, "loss": 0.07821328043937684, "step": 42050 }, { "epoch": 11.938688617655407, "grad_norm": 7.611723899841309, "learning_rate": 8.806670451319898e-05, "loss": 0.033363401889801025, "step": 42060 }, { "epoch": 11.941527107578768, "grad_norm": 10.137516021728516, "learning_rate": 8.806386602327562e-05, "loss": 0.04039627611637116, "step": 42070 }, { "epoch": 11.944365597502129, "grad_norm": 1.6276030540466309, "learning_rate": 8.806102753335226e-05, "loss": 0.03264874219894409, "step": 42080 }, { "epoch": 11.94720408742549, "grad_norm": 13.510990142822266, "learning_rate": 8.805818904342889e-05, "loss": 0.06236225366592407, "step": 42090 }, { "epoch": 11.950042577348851, "grad_norm": 4.523811340332031, "learning_rate": 8.805535055350555e-05, "loss": 0.038355123996734616, "step": 42100 }, { "epoch": 11.95288106727221, "grad_norm": 2.311707019805908, "learning_rate": 8.805251206358219e-05, "loss": 0.04750436842441559, "step": 42110 }, { "epoch": 11.955719557195572, "grad_norm": 1.028144359588623, "learning_rate": 8.804967357365881e-05, "loss": 0.04814756214618683, "step": 42120 }, { "epoch": 11.958558047118933, "grad_norm": 4.658628940582275, "learning_rate": 8.804683508373546e-05, "loss": 0.01757112145423889, "step": 42130 }, { "epoch": 11.961396537042294, "grad_norm": 7.542932033538818, "learning_rate": 8.80439965938121e-05, "loss": 0.04845822155475617, "step": 42140 }, { "epoch": 11.964235026965655, "grad_norm": 11.860176086425781, "learning_rate": 8.804115810388874e-05, "loss": 0.05667648911476135, "step": 42150 }, { "epoch": 11.967073516889014, "grad_norm": 5.14389705657959, "learning_rate": 8.803831961396537e-05, "loss": 0.029348573088645934, "step": 42160 }, { "epoch": 11.969912006812375, "grad_norm": 10.139193534851074, "learning_rate": 8.803548112404202e-05, "loss": 0.02429099977016449, "step": 42170 }, { "epoch": 11.972750496735737, "grad_norm": 4.764553070068359, "learning_rate": 8.803264263411866e-05, "loss": 0.032026806473731996, "step": 42180 }, { "epoch": 11.975588986659098, "grad_norm": 0.8691178560256958, "learning_rate": 8.802980414419529e-05, "loss": 0.02664041519165039, "step": 42190 }, { "epoch": 11.978427476582459, "grad_norm": 13.843423843383789, "learning_rate": 8.802696565427193e-05, "loss": 0.024278315901756286, "step": 42200 }, { "epoch": 11.981265966505818, "grad_norm": 9.946852684020996, "learning_rate": 8.802412716434857e-05, "loss": 0.042263883352279666, "step": 42210 }, { "epoch": 11.98410445642918, "grad_norm": 3.2664496898651123, "learning_rate": 8.80212886744252e-05, "loss": 0.03517971038818359, "step": 42220 }, { "epoch": 11.98694294635254, "grad_norm": 2.312603712081909, "learning_rate": 8.801845018450184e-05, "loss": 0.02691034972667694, "step": 42230 }, { "epoch": 11.989781436275901, "grad_norm": 5.909262180328369, "learning_rate": 8.80156116945785e-05, "loss": 0.024188028275966646, "step": 42240 }, { "epoch": 11.992619926199263, "grad_norm": 8.095325469970703, "learning_rate": 8.801277320465513e-05, "loss": 0.028657203912734984, "step": 42250 }, { "epoch": 11.995458416122624, "grad_norm": 1.8169091939926147, "learning_rate": 8.800993471473177e-05, "loss": 0.05447516441345215, "step": 42260 }, { "epoch": 11.998296906045983, "grad_norm": 6.331886291503906, "learning_rate": 8.800709622480841e-05, "loss": 0.025347772240638732, "step": 42270 }, { "epoch": 12.001135395969344, "grad_norm": 1.671044945716858, "learning_rate": 8.800425773488505e-05, "loss": 0.019167476892471315, "step": 42280 }, { "epoch": 12.003973885892705, "grad_norm": 4.797255516052246, "learning_rate": 8.800141924496168e-05, "loss": 0.02038729339838028, "step": 42290 }, { "epoch": 12.006812375816066, "grad_norm": 13.393505096435547, "learning_rate": 8.799858075503833e-05, "loss": 0.02722536027431488, "step": 42300 }, { "epoch": 12.009650865739427, "grad_norm": 9.200382232666016, "learning_rate": 8.799574226511496e-05, "loss": 0.024648433923721312, "step": 42310 }, { "epoch": 12.012489355662787, "grad_norm": 0.8614206314086914, "learning_rate": 8.79929037751916e-05, "loss": 0.025042256712913512, "step": 42320 }, { "epoch": 12.015327845586148, "grad_norm": 2.03387713432312, "learning_rate": 8.799006528526824e-05, "loss": 0.017334885895252228, "step": 42330 }, { "epoch": 12.018166335509509, "grad_norm": 3.047280788421631, "learning_rate": 8.798722679534488e-05, "loss": 0.009717319160699844, "step": 42340 }, { "epoch": 12.02100482543287, "grad_norm": 1.9679374694824219, "learning_rate": 8.798438830542151e-05, "loss": 0.02029114365577698, "step": 42350 }, { "epoch": 12.023843315356231, "grad_norm": 1.5019649267196655, "learning_rate": 8.798154981549815e-05, "loss": 0.012284689396619797, "step": 42360 }, { "epoch": 12.02668180527959, "grad_norm": 0.6199237108230591, "learning_rate": 8.797871132557481e-05, "loss": 0.016621315479278566, "step": 42370 }, { "epoch": 12.029520295202952, "grad_norm": 2.3096799850463867, "learning_rate": 8.797587283565144e-05, "loss": 0.013561734557151794, "step": 42380 }, { "epoch": 12.032358785126313, "grad_norm": 4.444303512573242, "learning_rate": 8.797303434572808e-05, "loss": 0.018357035517692567, "step": 42390 }, { "epoch": 12.035197275049674, "grad_norm": 1.7929736375808716, "learning_rate": 8.797019585580472e-05, "loss": 0.01274249255657196, "step": 42400 }, { "epoch": 12.038035764973035, "grad_norm": 7.450484752655029, "learning_rate": 8.796735736588136e-05, "loss": 0.029894351959228516, "step": 42410 }, { "epoch": 12.040874254896394, "grad_norm": 11.788549423217773, "learning_rate": 8.796451887595799e-05, "loss": 0.03823383450508118, "step": 42420 }, { "epoch": 12.043712744819755, "grad_norm": 1.2661882638931274, "learning_rate": 8.796168038603463e-05, "loss": 0.01705172061920166, "step": 42430 }, { "epoch": 12.046551234743117, "grad_norm": 3.336590528488159, "learning_rate": 8.795884189611127e-05, "loss": 0.029735854268074034, "step": 42440 }, { "epoch": 12.049389724666478, "grad_norm": 3.402101755142212, "learning_rate": 8.795600340618791e-05, "loss": 0.015562047064304353, "step": 42450 }, { "epoch": 12.052228214589839, "grad_norm": 1.8076858520507812, "learning_rate": 8.795316491626455e-05, "loss": 0.02131883203983307, "step": 42460 }, { "epoch": 12.0550667045132, "grad_norm": 3.749241828918457, "learning_rate": 8.79503264263412e-05, "loss": 0.031740438938140866, "step": 42470 }, { "epoch": 12.05790519443656, "grad_norm": 0.1959408074617386, "learning_rate": 8.794748793641782e-05, "loss": 0.022341904044151307, "step": 42480 }, { "epoch": 12.06074368435992, "grad_norm": 3.5620250701904297, "learning_rate": 8.794464944649446e-05, "loss": 0.01478850543498993, "step": 42490 }, { "epoch": 12.063582174283281, "grad_norm": 2.7986679077148438, "learning_rate": 8.794181095657112e-05, "loss": 0.01601594090461731, "step": 42500 }, { "epoch": 12.063582174283281, "eval_accuracy": 0.9685254657595218, "eval_loss": 0.0986095741391182, "eval_runtime": 32.7986, "eval_samples_per_second": 479.503, "eval_steps_per_second": 7.5, "step": 42500 }, { "epoch": 12.066420664206642, "grad_norm": 24.232810974121094, "learning_rate": 8.793897246664775e-05, "loss": 0.043908709287643434, "step": 42510 }, { "epoch": 12.069259154130004, "grad_norm": 13.961590766906738, "learning_rate": 8.793613397672439e-05, "loss": 0.04054493904113769, "step": 42520 }, { "epoch": 12.072097644053363, "grad_norm": 9.082690238952637, "learning_rate": 8.793329548680103e-05, "loss": 0.018843132257461547, "step": 42530 }, { "epoch": 12.074936133976724, "grad_norm": 3.163827896118164, "learning_rate": 8.793045699687766e-05, "loss": 0.023791809380054475, "step": 42540 }, { "epoch": 12.077774623900085, "grad_norm": 5.875718593597412, "learning_rate": 8.79276185069543e-05, "loss": 0.012556624412536622, "step": 42550 }, { "epoch": 12.080613113823446, "grad_norm": 2.382676362991333, "learning_rate": 8.792478001703094e-05, "loss": 0.017244070768356323, "step": 42560 }, { "epoch": 12.083451603746807, "grad_norm": 8.559048652648926, "learning_rate": 8.792194152710758e-05, "loss": 0.04110903739929199, "step": 42570 }, { "epoch": 12.086290093670167, "grad_norm": 4.43048620223999, "learning_rate": 8.791910303718422e-05, "loss": 0.019242659211158752, "step": 42580 }, { "epoch": 12.089128583593528, "grad_norm": 6.391964435577393, "learning_rate": 8.791626454726086e-05, "loss": 0.02902469038963318, "step": 42590 }, { "epoch": 12.091967073516889, "grad_norm": 5.329524993896484, "learning_rate": 8.79134260573375e-05, "loss": 0.027243250608444215, "step": 42600 }, { "epoch": 12.09480556344025, "grad_norm": 4.836333274841309, "learning_rate": 8.791058756741413e-05, "loss": 0.01187005192041397, "step": 42610 }, { "epoch": 12.097644053363611, "grad_norm": 7.633523941040039, "learning_rate": 8.790774907749077e-05, "loss": 0.04170409142971039, "step": 42620 }, { "epoch": 12.10048254328697, "grad_norm": 2.798593044281006, "learning_rate": 8.790491058756742e-05, "loss": 0.017926451563835145, "step": 42630 }, { "epoch": 12.103321033210332, "grad_norm": 1.9739197492599487, "learning_rate": 8.790207209764406e-05, "loss": 0.021616062521934508, "step": 42640 }, { "epoch": 12.106159523133693, "grad_norm": 0.9676541686058044, "learning_rate": 8.78992336077207e-05, "loss": 0.024294459819793703, "step": 42650 }, { "epoch": 12.108998013057054, "grad_norm": 1.7724647521972656, "learning_rate": 8.789639511779734e-05, "loss": 0.03185601830482483, "step": 42660 }, { "epoch": 12.111836502980415, "grad_norm": 0.3611180782318115, "learning_rate": 8.789355662787397e-05, "loss": 0.014770568907260894, "step": 42670 }, { "epoch": 12.114674992903776, "grad_norm": 16.78565216064453, "learning_rate": 8.789071813795061e-05, "loss": 0.04302522540092468, "step": 42680 }, { "epoch": 12.117513482827135, "grad_norm": 9.895925521850586, "learning_rate": 8.788787964802725e-05, "loss": 0.03827439844608307, "step": 42690 }, { "epoch": 12.120351972750496, "grad_norm": 5.188383102416992, "learning_rate": 8.788504115810389e-05, "loss": 0.03208199441432953, "step": 42700 }, { "epoch": 12.123190462673858, "grad_norm": 2.0721516609191895, "learning_rate": 8.788220266818053e-05, "loss": 0.011897308379411697, "step": 42710 }, { "epoch": 12.126028952597219, "grad_norm": 10.269426345825195, "learning_rate": 8.787936417825717e-05, "loss": 0.03165861964225769, "step": 42720 }, { "epoch": 12.12886744252058, "grad_norm": 3.3225903511047363, "learning_rate": 8.787652568833382e-05, "loss": 0.02179349511861801, "step": 42730 }, { "epoch": 12.13170593244394, "grad_norm": 1.0045795440673828, "learning_rate": 8.787368719841044e-05, "loss": 0.017593967914581298, "step": 42740 }, { "epoch": 12.1345444223673, "grad_norm": 7.803922653198242, "learning_rate": 8.787084870848709e-05, "loss": 0.015620028972625733, "step": 42750 }, { "epoch": 12.137382912290661, "grad_norm": 0.9662190675735474, "learning_rate": 8.786801021856373e-05, "loss": 0.013929079473018646, "step": 42760 }, { "epoch": 12.140221402214022, "grad_norm": 1.820534348487854, "learning_rate": 8.786517172864037e-05, "loss": 0.02228236347436905, "step": 42770 }, { "epoch": 12.143059892137384, "grad_norm": 0.58070307970047, "learning_rate": 8.786233323871701e-05, "loss": 0.01153615117073059, "step": 42780 }, { "epoch": 12.145898382060743, "grad_norm": 8.931794166564941, "learning_rate": 8.785949474879365e-05, "loss": 0.019435842335224152, "step": 42790 }, { "epoch": 12.148736871984104, "grad_norm": 5.6357502937316895, "learning_rate": 8.785665625887028e-05, "loss": 0.02729610800743103, "step": 42800 }, { "epoch": 12.151575361907465, "grad_norm": 3.2536122798919678, "learning_rate": 8.785381776894692e-05, "loss": 0.02407788634300232, "step": 42810 }, { "epoch": 12.154413851830826, "grad_norm": 6.006178379058838, "learning_rate": 8.785097927902356e-05, "loss": 0.013536371290683746, "step": 42820 }, { "epoch": 12.157252341754187, "grad_norm": 1.8390198945999146, "learning_rate": 8.78481407891002e-05, "loss": 0.017469488084316254, "step": 42830 }, { "epoch": 12.160090831677547, "grad_norm": 10.504243850708008, "learning_rate": 8.784530229917684e-05, "loss": 0.03017207682132721, "step": 42840 }, { "epoch": 12.162929321600908, "grad_norm": 3.5493767261505127, "learning_rate": 8.784246380925349e-05, "loss": 0.032809320092201236, "step": 42850 }, { "epoch": 12.165767811524269, "grad_norm": 0.32161659002304077, "learning_rate": 8.783962531933013e-05, "loss": 0.017602016031742097, "step": 42860 }, { "epoch": 12.16860630144763, "grad_norm": 7.016176223754883, "learning_rate": 8.783678682940675e-05, "loss": 0.02854626178741455, "step": 42870 }, { "epoch": 12.171444791370991, "grad_norm": 5.038053512573242, "learning_rate": 8.78339483394834e-05, "loss": 0.014125818014144897, "step": 42880 }, { "epoch": 12.174283281294352, "grad_norm": 2.78157639503479, "learning_rate": 8.783110984956004e-05, "loss": 0.02057155817747116, "step": 42890 }, { "epoch": 12.177121771217712, "grad_norm": 1.0509940385818481, "learning_rate": 8.782827135963668e-05, "loss": 0.016553373634815217, "step": 42900 }, { "epoch": 12.179960261141073, "grad_norm": 5.599396228790283, "learning_rate": 8.782543286971332e-05, "loss": 0.015610629320144653, "step": 42910 }, { "epoch": 12.182798751064434, "grad_norm": 1.4669357538223267, "learning_rate": 8.782259437978996e-05, "loss": 0.02748657464981079, "step": 42920 }, { "epoch": 12.185637240987795, "grad_norm": 1.1055556535720825, "learning_rate": 8.781975588986659e-05, "loss": 0.02215808629989624, "step": 42930 }, { "epoch": 12.188475730911156, "grad_norm": 0.5451817512512207, "learning_rate": 8.781691739994323e-05, "loss": 0.014049682021141052, "step": 42940 }, { "epoch": 12.191314220834515, "grad_norm": 0.22471275925636292, "learning_rate": 8.781407891001987e-05, "loss": 0.03508964478969574, "step": 42950 }, { "epoch": 12.194152710757876, "grad_norm": 4.065980434417725, "learning_rate": 8.781124042009651e-05, "loss": 0.024037133157253265, "step": 42960 }, { "epoch": 12.196991200681238, "grad_norm": 1.8386139869689941, "learning_rate": 8.780840193017315e-05, "loss": 0.05329045653343201, "step": 42970 }, { "epoch": 12.199829690604599, "grad_norm": 0.8893312215805054, "learning_rate": 8.78055634402498e-05, "loss": 0.027882057428359985, "step": 42980 }, { "epoch": 12.20266818052796, "grad_norm": 0.9296700358390808, "learning_rate": 8.780272495032644e-05, "loss": 0.036284875869750974, "step": 42990 }, { "epoch": 12.205506670451319, "grad_norm": 0.9350181221961975, "learning_rate": 8.779988646040307e-05, "loss": 0.018728801608085634, "step": 43000 }, { "epoch": 12.205506670451319, "eval_accuracy": 0.9661092388885356, "eval_loss": 0.10720501095056534, "eval_runtime": 46.899, "eval_samples_per_second": 335.337, "eval_steps_per_second": 5.245, "step": 43000 }, { "epoch": 12.20834516037468, "grad_norm": 5.523910045623779, "learning_rate": 8.779704797047971e-05, "loss": 0.01569734811782837, "step": 43010 }, { "epoch": 12.211183650298041, "grad_norm": 5.390267848968506, "learning_rate": 8.779420948055635e-05, "loss": 0.025904983282089233, "step": 43020 }, { "epoch": 12.214022140221402, "grad_norm": 0.23697206377983093, "learning_rate": 8.779137099063299e-05, "loss": 0.011408884078264236, "step": 43030 }, { "epoch": 12.216860630144764, "grad_norm": 6.877984046936035, "learning_rate": 8.778853250070963e-05, "loss": 0.043124690651893616, "step": 43040 }, { "epoch": 12.219699120068125, "grad_norm": 4.867430686950684, "learning_rate": 8.778569401078627e-05, "loss": 0.012697473168373108, "step": 43050 }, { "epoch": 12.222537609991484, "grad_norm": 6.123249530792236, "learning_rate": 8.77828555208629e-05, "loss": 0.018052877485752107, "step": 43060 }, { "epoch": 12.225376099914845, "grad_norm": 1.2340748310089111, "learning_rate": 8.778001703093954e-05, "loss": 0.019849632680416108, "step": 43070 }, { "epoch": 12.228214589838206, "grad_norm": 2.965710401535034, "learning_rate": 8.777717854101618e-05, "loss": 0.021002456545829773, "step": 43080 }, { "epoch": 12.231053079761567, "grad_norm": 3.2319023609161377, "learning_rate": 8.777434005109282e-05, "loss": 0.02374749779701233, "step": 43090 }, { "epoch": 12.233891569684928, "grad_norm": 3.2262725830078125, "learning_rate": 8.777150156116947e-05, "loss": 0.01536458432674408, "step": 43100 }, { "epoch": 12.236730059608288, "grad_norm": 6.554632186889648, "learning_rate": 8.776866307124611e-05, "loss": 0.015860722959041597, "step": 43110 }, { "epoch": 12.239568549531649, "grad_norm": 9.228994369506836, "learning_rate": 8.776582458132275e-05, "loss": 0.04147699475288391, "step": 43120 }, { "epoch": 12.24240703945501, "grad_norm": 5.251175880432129, "learning_rate": 8.776298609139938e-05, "loss": 0.03621347546577454, "step": 43130 }, { "epoch": 12.245245529378371, "grad_norm": 2.225816011428833, "learning_rate": 8.776014760147602e-05, "loss": 0.016661910712718962, "step": 43140 }, { "epoch": 12.248084019301732, "grad_norm": 0.5887947678565979, "learning_rate": 8.775730911155266e-05, "loss": 0.018774904310703278, "step": 43150 }, { "epoch": 12.250922509225092, "grad_norm": 0.21424315869808197, "learning_rate": 8.775447062162929e-05, "loss": 0.016441477835178374, "step": 43160 }, { "epoch": 12.253760999148453, "grad_norm": 7.893508434295654, "learning_rate": 8.775163213170594e-05, "loss": 0.011776397377252579, "step": 43170 }, { "epoch": 12.256599489071814, "grad_norm": 2.3837947845458984, "learning_rate": 8.774879364178258e-05, "loss": 0.019902046024799346, "step": 43180 }, { "epoch": 12.259437978995175, "grad_norm": 8.576164245605469, "learning_rate": 8.774595515185921e-05, "loss": 0.029707503318786622, "step": 43190 }, { "epoch": 12.262276468918536, "grad_norm": 2.6249923706054688, "learning_rate": 8.774311666193585e-05, "loss": 0.006676526367664337, "step": 43200 }, { "epoch": 12.265114958841895, "grad_norm": 2.653848886489868, "learning_rate": 8.77402781720125e-05, "loss": 0.01283724457025528, "step": 43210 }, { "epoch": 12.267953448765256, "grad_norm": 5.54274845123291, "learning_rate": 8.773743968208914e-05, "loss": 0.021368955075740815, "step": 43220 }, { "epoch": 12.270791938688618, "grad_norm": 11.993958473205566, "learning_rate": 8.773460119216578e-05, "loss": 0.032994499802589415, "step": 43230 }, { "epoch": 12.273630428611979, "grad_norm": 1.0589174032211304, "learning_rate": 8.773176270224242e-05, "loss": 0.03995351195335388, "step": 43240 }, { "epoch": 12.27646891853534, "grad_norm": 1.066496729850769, "learning_rate": 8.772892421231905e-05, "loss": 0.03768300116062164, "step": 43250 }, { "epoch": 12.279307408458699, "grad_norm": 6.978654861450195, "learning_rate": 8.772608572239569e-05, "loss": 0.027469006180763245, "step": 43260 }, { "epoch": 12.28214589838206, "grad_norm": 3.2042458057403564, "learning_rate": 8.772324723247233e-05, "loss": 0.016944824159145354, "step": 43270 }, { "epoch": 12.284984388305421, "grad_norm": 6.283276557922363, "learning_rate": 8.772040874254897e-05, "loss": 0.017949827015399933, "step": 43280 }, { "epoch": 12.287822878228782, "grad_norm": 4.380410194396973, "learning_rate": 8.77175702526256e-05, "loss": 0.006551393121480942, "step": 43290 }, { "epoch": 12.290661368152143, "grad_norm": 0.5861814022064209, "learning_rate": 8.771473176270225e-05, "loss": 0.01800150126218796, "step": 43300 }, { "epoch": 12.293499858075505, "grad_norm": 3.2520382404327393, "learning_rate": 8.77118932727789e-05, "loss": 0.021789096295833588, "step": 43310 }, { "epoch": 12.296338347998864, "grad_norm": 1.3977535963058472, "learning_rate": 8.770905478285552e-05, "loss": 0.03398105800151825, "step": 43320 }, { "epoch": 12.299176837922225, "grad_norm": 11.068792343139648, "learning_rate": 8.770621629293216e-05, "loss": 0.03667581081390381, "step": 43330 }, { "epoch": 12.302015327845586, "grad_norm": 7.974401473999023, "learning_rate": 8.77033778030088e-05, "loss": 0.02832326591014862, "step": 43340 }, { "epoch": 12.304853817768947, "grad_norm": 2.552974224090576, "learning_rate": 8.770053931308545e-05, "loss": 0.023764997720718384, "step": 43350 }, { "epoch": 12.307692307692308, "grad_norm": 6.526494979858398, "learning_rate": 8.769770082316207e-05, "loss": 0.03599141538143158, "step": 43360 }, { "epoch": 12.310530797615668, "grad_norm": 1.729649305343628, "learning_rate": 8.769486233323873e-05, "loss": 0.02461608648300171, "step": 43370 }, { "epoch": 12.313369287539029, "grad_norm": 10.23698616027832, "learning_rate": 8.769202384331536e-05, "loss": 0.019095706939697265, "step": 43380 }, { "epoch": 12.31620777746239, "grad_norm": 0.3076995611190796, "learning_rate": 8.7689185353392e-05, "loss": 0.021746070683002473, "step": 43390 }, { "epoch": 12.319046267385751, "grad_norm": 0.5416623950004578, "learning_rate": 8.768634686346864e-05, "loss": 0.030848410725593568, "step": 43400 }, { "epoch": 12.321884757309112, "grad_norm": 0.924653947353363, "learning_rate": 8.768350837354528e-05, "loss": 0.029654711484909058, "step": 43410 }, { "epoch": 12.324723247232471, "grad_norm": 6.487839698791504, "learning_rate": 8.768066988362191e-05, "loss": 0.03157211542129516, "step": 43420 }, { "epoch": 12.327561737155833, "grad_norm": 4.4102067947387695, "learning_rate": 8.767783139369856e-05, "loss": 0.01055406928062439, "step": 43430 }, { "epoch": 12.330400227079194, "grad_norm": 3.9745326042175293, "learning_rate": 8.76749929037752e-05, "loss": 0.017667178809642792, "step": 43440 }, { "epoch": 12.333238717002555, "grad_norm": 0.6378137469291687, "learning_rate": 8.767215441385183e-05, "loss": 0.011971042305231095, "step": 43450 }, { "epoch": 12.336077206925916, "grad_norm": 11.151874542236328, "learning_rate": 8.766931592392847e-05, "loss": 0.06582743525505066, "step": 43460 }, { "epoch": 12.338915696849277, "grad_norm": 7.933891773223877, "learning_rate": 8.766647743400512e-05, "loss": 0.024850860238075256, "step": 43470 }, { "epoch": 12.341754186772636, "grad_norm": 10.337605476379395, "learning_rate": 8.766363894408174e-05, "loss": 0.026383063197135924, "step": 43480 }, { "epoch": 12.344592676695997, "grad_norm": 4.934293270111084, "learning_rate": 8.766080045415838e-05, "loss": 0.031963250041007994, "step": 43490 }, { "epoch": 12.347431166619359, "grad_norm": 3.7452046871185303, "learning_rate": 8.765796196423504e-05, "loss": 0.02048766762018204, "step": 43500 }, { "epoch": 12.347431166619359, "eval_accuracy": 0.9687162205124944, "eval_loss": 0.10149853676557541, "eval_runtime": 32.8308, "eval_samples_per_second": 479.032, "eval_steps_per_second": 7.493, "step": 43500 }, { "epoch": 12.35026965654272, "grad_norm": 11.900498390197754, "learning_rate": 8.765512347431167e-05, "loss": 0.02843741774559021, "step": 43510 }, { "epoch": 12.35310814646608, "grad_norm": 11.256841659545898, "learning_rate": 8.765228498438831e-05, "loss": 0.05030051469802856, "step": 43520 }, { "epoch": 12.35594663638944, "grad_norm": 8.614985466003418, "learning_rate": 8.764944649446495e-05, "loss": 0.056306517124176024, "step": 43530 }, { "epoch": 12.358785126312801, "grad_norm": 6.389334678649902, "learning_rate": 8.764660800454159e-05, "loss": 0.03555997610092163, "step": 43540 }, { "epoch": 12.361623616236162, "grad_norm": 9.95920467376709, "learning_rate": 8.764376951461822e-05, "loss": 0.03143468499183655, "step": 43550 }, { "epoch": 12.364462106159523, "grad_norm": 4.311703681945801, "learning_rate": 8.764093102469486e-05, "loss": 0.053225082159042356, "step": 43560 }, { "epoch": 12.367300596082885, "grad_norm": 6.976881504058838, "learning_rate": 8.763809253477152e-05, "loss": 0.023119038343429564, "step": 43570 }, { "epoch": 12.370139086006244, "grad_norm": 3.761945962905884, "learning_rate": 8.763525404484814e-05, "loss": 0.015905283391475677, "step": 43580 }, { "epoch": 12.372977575929605, "grad_norm": 8.988996505737305, "learning_rate": 8.763241555492478e-05, "loss": 0.04903358221054077, "step": 43590 }, { "epoch": 12.375816065852966, "grad_norm": 8.340705871582031, "learning_rate": 8.762957706500143e-05, "loss": 0.021581581234931944, "step": 43600 }, { "epoch": 12.378654555776327, "grad_norm": 6.485612392425537, "learning_rate": 8.762673857507805e-05, "loss": 0.023418095707893372, "step": 43610 }, { "epoch": 12.381493045699688, "grad_norm": 4.081179618835449, "learning_rate": 8.76239000851547e-05, "loss": 0.025992101430892943, "step": 43620 }, { "epoch": 12.384331535623048, "grad_norm": 5.731960296630859, "learning_rate": 8.762106159523135e-05, "loss": 0.04298951923847198, "step": 43630 }, { "epoch": 12.387170025546409, "grad_norm": 8.466975212097168, "learning_rate": 8.761822310530798e-05, "loss": 0.0223959743976593, "step": 43640 }, { "epoch": 12.39000851546977, "grad_norm": 2.75673246383667, "learning_rate": 8.761538461538462e-05, "loss": 0.014331893622875213, "step": 43650 }, { "epoch": 12.392847005393131, "grad_norm": 0.28026068210601807, "learning_rate": 8.761254612546126e-05, "loss": 0.03302421569824219, "step": 43660 }, { "epoch": 12.395685495316492, "grad_norm": 8.993667602539062, "learning_rate": 8.760999148453023e-05, "loss": 0.03864609897136688, "step": 43670 }, { "epoch": 12.398523985239853, "grad_norm": 2.6343748569488525, "learning_rate": 8.760715299460688e-05, "loss": 0.020321482419967653, "step": 43680 }, { "epoch": 12.401362475163213, "grad_norm": 2.3854806423187256, "learning_rate": 8.760431450468351e-05, "loss": 0.009187107533216476, "step": 43690 }, { "epoch": 12.404200965086574, "grad_norm": 8.649785041809082, "learning_rate": 8.760147601476015e-05, "loss": 0.02888520359992981, "step": 43700 }, { "epoch": 12.407039455009935, "grad_norm": 1.1763685941696167, "learning_rate": 8.759863752483679e-05, "loss": 0.012761574983596802, "step": 43710 }, { "epoch": 12.409877944933296, "grad_norm": 10.315303802490234, "learning_rate": 8.759579903491343e-05, "loss": 0.015500989556312562, "step": 43720 }, { "epoch": 12.412716434856657, "grad_norm": 2.352046489715576, "learning_rate": 8.759296054499006e-05, "loss": 0.017738236486911772, "step": 43730 }, { "epoch": 12.415554924780016, "grad_norm": 6.597049236297607, "learning_rate": 8.75901220550667e-05, "loss": 0.03053748607635498, "step": 43740 }, { "epoch": 12.418393414703377, "grad_norm": 1.9910120964050293, "learning_rate": 8.758728356514336e-05, "loss": 0.034646409749984744, "step": 43750 }, { "epoch": 12.421231904626739, "grad_norm": 5.998736381530762, "learning_rate": 8.758444507521999e-05, "loss": 0.03493017852306366, "step": 43760 }, { "epoch": 12.4240703945501, "grad_norm": 9.419123649597168, "learning_rate": 8.758160658529663e-05, "loss": 0.025234299898147582, "step": 43770 }, { "epoch": 12.42690888447346, "grad_norm": 0.5751453638076782, "learning_rate": 8.757876809537327e-05, "loss": 0.015322671830654144, "step": 43780 }, { "epoch": 12.42974737439682, "grad_norm": 3.589245319366455, "learning_rate": 8.75759296054499e-05, "loss": 0.01881980299949646, "step": 43790 }, { "epoch": 12.432585864320181, "grad_norm": 6.581637859344482, "learning_rate": 8.757309111552654e-05, "loss": 0.02629859447479248, "step": 43800 }, { "epoch": 12.435424354243542, "grad_norm": 8.477561950683594, "learning_rate": 8.757025262560319e-05, "loss": 0.03205634653568268, "step": 43810 }, { "epoch": 12.438262844166903, "grad_norm": 2.0551233291625977, "learning_rate": 8.756741413567982e-05, "loss": 0.0353986918926239, "step": 43820 }, { "epoch": 12.441101334090265, "grad_norm": 13.160858154296875, "learning_rate": 8.756457564575646e-05, "loss": 0.024612995982170104, "step": 43830 }, { "epoch": 12.443939824013626, "grad_norm": 8.135246276855469, "learning_rate": 8.75617371558331e-05, "loss": 0.015001949667930604, "step": 43840 }, { "epoch": 12.446778313936985, "grad_norm": 0.6739336252212524, "learning_rate": 8.755889866590974e-05, "loss": 0.02338627427816391, "step": 43850 }, { "epoch": 12.449616803860346, "grad_norm": 5.415078163146973, "learning_rate": 8.755606017598637e-05, "loss": 0.026547563076019288, "step": 43860 }, { "epoch": 12.452455293783707, "grad_norm": 1.091626763343811, "learning_rate": 8.755322168606301e-05, "loss": 0.017348739504814147, "step": 43870 }, { "epoch": 12.455293783707068, "grad_norm": 11.263443946838379, "learning_rate": 8.755038319613967e-05, "loss": 0.029550474882125855, "step": 43880 }, { "epoch": 12.45813227363043, "grad_norm": 1.821685791015625, "learning_rate": 8.75475447062163e-05, "loss": 0.01811111271381378, "step": 43890 }, { "epoch": 12.460970763553789, "grad_norm": 6.1828813552856445, "learning_rate": 8.754470621629294e-05, "loss": 0.027774900197982788, "step": 43900 }, { "epoch": 12.46380925347715, "grad_norm": 3.033991575241089, "learning_rate": 8.754186772636958e-05, "loss": 0.038434916734695436, "step": 43910 }, { "epoch": 12.466647743400511, "grad_norm": 6.3611321449279785, "learning_rate": 8.753902923644621e-05, "loss": 0.014765633642673493, "step": 43920 }, { "epoch": 12.469486233323872, "grad_norm": 8.681385040283203, "learning_rate": 8.753619074652285e-05, "loss": 0.02449558675289154, "step": 43930 }, { "epoch": 12.472324723247233, "grad_norm": 5.18971061706543, "learning_rate": 8.753335225659949e-05, "loss": 0.02685062289237976, "step": 43940 }, { "epoch": 12.475163213170593, "grad_norm": 10.31009578704834, "learning_rate": 8.753051376667613e-05, "loss": 0.021674241125583648, "step": 43950 }, { "epoch": 12.478001703093954, "grad_norm": 0.7698858976364136, "learning_rate": 8.752767527675277e-05, "loss": 0.010746216773986817, "step": 43960 }, { "epoch": 12.480840193017315, "grad_norm": 7.455374240875244, "learning_rate": 8.752483678682941e-05, "loss": 0.024863488972187042, "step": 43970 }, { "epoch": 12.483678682940676, "grad_norm": 2.553424119949341, "learning_rate": 8.752199829690606e-05, "loss": 0.041084641218185426, "step": 43980 }, { "epoch": 12.486517172864037, "grad_norm": 1.3121610879898071, "learning_rate": 8.751915980698268e-05, "loss": 0.015468144416809082, "step": 43990 }, { "epoch": 12.489355662787396, "grad_norm": 0.7120893597602844, "learning_rate": 8.751632131705932e-05, "loss": 0.013517634570598602, "step": 44000 }, { "epoch": 12.489355662787396, "eval_accuracy": 0.9703058434539328, "eval_loss": 0.0920611023902893, "eval_runtime": 35.8401, "eval_samples_per_second": 438.81, "eval_steps_per_second": 6.864, "step": 44000 }, { "epoch": 12.492194152710757, "grad_norm": 3.946415901184082, "learning_rate": 8.751348282713598e-05, "loss": 0.013155283033847808, "step": 44010 }, { "epoch": 12.495032642634119, "grad_norm": 10.607535362243652, "learning_rate": 8.751064433721261e-05, "loss": 0.04540489315986633, "step": 44020 }, { "epoch": 12.49787113255748, "grad_norm": 4.514583587646484, "learning_rate": 8.750780584728925e-05, "loss": 0.030425819754600524, "step": 44030 }, { "epoch": 12.50070962248084, "grad_norm": 4.463616371154785, "learning_rate": 8.750496735736589e-05, "loss": 0.025489100813865663, "step": 44040 }, { "epoch": 12.5035481124042, "grad_norm": 9.568166732788086, "learning_rate": 8.750212886744252e-05, "loss": 0.04505818486213684, "step": 44050 }, { "epoch": 12.506386602327561, "grad_norm": 6.021180629730225, "learning_rate": 8.749929037751916e-05, "loss": 0.018857477605342864, "step": 44060 }, { "epoch": 12.509225092250922, "grad_norm": 4.7424774169921875, "learning_rate": 8.74964518875958e-05, "loss": 0.03243306279182434, "step": 44070 }, { "epoch": 12.512063582174283, "grad_norm": 9.922897338867188, "learning_rate": 8.749361339767244e-05, "loss": 0.037359943985939024, "step": 44080 }, { "epoch": 12.514902072097644, "grad_norm": 12.399426460266113, "learning_rate": 8.749077490774908e-05, "loss": 0.038216504454612735, "step": 44090 }, { "epoch": 12.517740562021006, "grad_norm": 3.647433042526245, "learning_rate": 8.748793641782572e-05, "loss": 0.019692376255989075, "step": 44100 }, { "epoch": 12.520579051944365, "grad_norm": 8.57077693939209, "learning_rate": 8.748509792790237e-05, "loss": 0.04609246253967285, "step": 44110 }, { "epoch": 12.523417541867726, "grad_norm": 9.2264404296875, "learning_rate": 8.7482259437979e-05, "loss": 0.05189837217330932, "step": 44120 }, { "epoch": 12.526256031791087, "grad_norm": 4.035073280334473, "learning_rate": 8.747942094805564e-05, "loss": 0.018150539696216585, "step": 44130 }, { "epoch": 12.529094521714448, "grad_norm": 2.2258639335632324, "learning_rate": 8.747658245813228e-05, "loss": 0.016733536124229433, "step": 44140 }, { "epoch": 12.53193301163781, "grad_norm": 12.473857879638672, "learning_rate": 8.747374396820892e-05, "loss": 0.03098931610584259, "step": 44150 }, { "epoch": 12.534771501561169, "grad_norm": 13.116628646850586, "learning_rate": 8.747090547828556e-05, "loss": 0.025827193260192872, "step": 44160 }, { "epoch": 12.53760999148453, "grad_norm": 8.575265884399414, "learning_rate": 8.74680669883622e-05, "loss": 0.05520942211151123, "step": 44170 }, { "epoch": 12.540448481407891, "grad_norm": 3.797060966491699, "learning_rate": 8.746522849843883e-05, "loss": 0.027095335721969604, "step": 44180 }, { "epoch": 12.543286971331252, "grad_norm": 12.218207359313965, "learning_rate": 8.746239000851547e-05, "loss": 0.02846195697784424, "step": 44190 }, { "epoch": 12.546125461254613, "grad_norm": 5.422717571258545, "learning_rate": 8.745955151859211e-05, "loss": 0.04512856602668762, "step": 44200 }, { "epoch": 12.548963951177974, "grad_norm": 1.153975486755371, "learning_rate": 8.745671302866875e-05, "loss": 0.021230365335941314, "step": 44210 }, { "epoch": 12.551802441101334, "grad_norm": 0.3893817067146301, "learning_rate": 8.74538745387454e-05, "loss": 0.02771477699279785, "step": 44220 }, { "epoch": 12.554640931024695, "grad_norm": 3.755164384841919, "learning_rate": 8.745103604882204e-05, "loss": 0.039801394939422606, "step": 44230 }, { "epoch": 12.557479420948056, "grad_norm": 3.329977035522461, "learning_rate": 8.744819755889866e-05, "loss": 0.02695300579071045, "step": 44240 }, { "epoch": 12.560317910871417, "grad_norm": 10.435100555419922, "learning_rate": 8.74453590689753e-05, "loss": 0.028113651275634765, "step": 44250 }, { "epoch": 12.563156400794778, "grad_norm": 7.1815409660339355, "learning_rate": 8.744252057905195e-05, "loss": 0.023507246375083925, "step": 44260 }, { "epoch": 12.565994890718137, "grad_norm": 2.1366536617279053, "learning_rate": 8.743968208912859e-05, "loss": 0.024337007105350493, "step": 44270 }, { "epoch": 12.568833380641498, "grad_norm": 6.133735656738281, "learning_rate": 8.743684359920523e-05, "loss": 0.030369016528129577, "step": 44280 }, { "epoch": 12.57167187056486, "grad_norm": 5.222670078277588, "learning_rate": 8.743400510928187e-05, "loss": 0.03186432421207428, "step": 44290 }, { "epoch": 12.57451036048822, "grad_norm": 1.179478645324707, "learning_rate": 8.743116661935851e-05, "loss": 0.01421954482793808, "step": 44300 }, { "epoch": 12.577348850411582, "grad_norm": 3.6404569149017334, "learning_rate": 8.742832812943514e-05, "loss": 0.03080369234085083, "step": 44310 }, { "epoch": 12.580187340334941, "grad_norm": 0.6826581358909607, "learning_rate": 8.742548963951178e-05, "loss": 0.025148138403892517, "step": 44320 }, { "epoch": 12.583025830258302, "grad_norm": 6.168979644775391, "learning_rate": 8.742265114958842e-05, "loss": 0.017731374502182005, "step": 44330 }, { "epoch": 12.585864320181663, "grad_norm": 8.060129165649414, "learning_rate": 8.741981265966505e-05, "loss": 0.017711402475833894, "step": 44340 }, { "epoch": 12.588702810105024, "grad_norm": 2.6680445671081543, "learning_rate": 8.74169741697417e-05, "loss": 0.016787290573120117, "step": 44350 }, { "epoch": 12.591541300028386, "grad_norm": 6.966230869293213, "learning_rate": 8.741413567981835e-05, "loss": 0.03349200785160065, "step": 44360 }, { "epoch": 12.594379789951745, "grad_norm": 3.2079851627349854, "learning_rate": 8.741129718989497e-05, "loss": 0.02147513031959534, "step": 44370 }, { "epoch": 12.597218279875106, "grad_norm": 0.405815988779068, "learning_rate": 8.740845869997162e-05, "loss": 0.019382601976394652, "step": 44380 }, { "epoch": 12.600056769798467, "grad_norm": 6.806717395782471, "learning_rate": 8.740562021004826e-05, "loss": 0.018505136668682098, "step": 44390 }, { "epoch": 12.602895259721828, "grad_norm": 3.2220189571380615, "learning_rate": 8.74027817201249e-05, "loss": 0.023961788415908812, "step": 44400 }, { "epoch": 12.60573374964519, "grad_norm": 1.0476040840148926, "learning_rate": 8.739994323020154e-05, "loss": 0.012808771431446075, "step": 44410 }, { "epoch": 12.608572239568549, "grad_norm": 3.5662219524383545, "learning_rate": 8.739710474027818e-05, "loss": 0.03692491352558136, "step": 44420 }, { "epoch": 12.61141072949191, "grad_norm": 3.016277313232422, "learning_rate": 8.739426625035482e-05, "loss": 0.013001738488674164, "step": 44430 }, { "epoch": 12.61424921941527, "grad_norm": 8.693224906921387, "learning_rate": 8.739142776043145e-05, "loss": 0.05319857597351074, "step": 44440 }, { "epoch": 12.617087709338632, "grad_norm": 5.513697624206543, "learning_rate": 8.738858927050809e-05, "loss": 0.02645609378814697, "step": 44450 }, { "epoch": 12.619926199261993, "grad_norm": 8.53377914428711, "learning_rate": 8.738575078058473e-05, "loss": 0.014668360352516174, "step": 44460 }, { "epoch": 12.622764689185354, "grad_norm": 9.301794052124023, "learning_rate": 8.738291229066136e-05, "loss": 0.014918071031570435, "step": 44470 }, { "epoch": 12.625603179108714, "grad_norm": 11.21097469329834, "learning_rate": 8.738007380073802e-05, "loss": 0.028910237550735473, "step": 44480 }, { "epoch": 12.628441669032075, "grad_norm": 7.921929359436035, "learning_rate": 8.737723531081466e-05, "loss": 0.030108627676963807, "step": 44490 }, { "epoch": 12.631280158955436, "grad_norm": 8.774811744689941, "learning_rate": 8.737439682089129e-05, "loss": 0.02356390357017517, "step": 44500 }, { "epoch": 12.631280158955436, "eval_accuracy": 0.9555541425573854, "eval_loss": 0.1534491330385208, "eval_runtime": 33.133, "eval_samples_per_second": 474.663, "eval_steps_per_second": 7.425, "step": 44500 }, { "epoch": 12.634118648878797, "grad_norm": 2.8944530487060547, "learning_rate": 8.737155833096793e-05, "loss": 0.031967395544052125, "step": 44510 }, { "epoch": 12.636957138802158, "grad_norm": 3.9340219497680664, "learning_rate": 8.736871984104457e-05, "loss": 0.02710425853729248, "step": 44520 }, { "epoch": 12.639795628725517, "grad_norm": 3.4901273250579834, "learning_rate": 8.736588135112121e-05, "loss": 0.015069495141506194, "step": 44530 }, { "epoch": 12.642634118648878, "grad_norm": 7.989969253540039, "learning_rate": 8.736304286119785e-05, "loss": 0.04205063879489899, "step": 44540 }, { "epoch": 12.64547260857224, "grad_norm": 14.133074760437012, "learning_rate": 8.736020437127449e-05, "loss": 0.023335877060890197, "step": 44550 }, { "epoch": 12.6483110984956, "grad_norm": 3.238351345062256, "learning_rate": 8.735736588135113e-05, "loss": 0.036182868480682376, "step": 44560 }, { "epoch": 12.651149588418962, "grad_norm": 12.600605964660645, "learning_rate": 8.735452739142776e-05, "loss": 0.03279470205307007, "step": 44570 }, { "epoch": 12.653988078342321, "grad_norm": 3.9097065925598145, "learning_rate": 8.73516889015044e-05, "loss": 0.016165539622306824, "step": 44580 }, { "epoch": 12.656826568265682, "grad_norm": 1.3065696954727173, "learning_rate": 8.734885041158104e-05, "loss": 0.031934693455696106, "step": 44590 }, { "epoch": 12.659665058189043, "grad_norm": 1.120517611503601, "learning_rate": 8.734601192165767e-05, "loss": 0.04707658588886261, "step": 44600 }, { "epoch": 12.662503548112404, "grad_norm": 4.768412113189697, "learning_rate": 8.734317343173433e-05, "loss": 0.05359538197517395, "step": 44610 }, { "epoch": 12.665342038035766, "grad_norm": 1.0517644882202148, "learning_rate": 8.734033494181097e-05, "loss": 0.026544001698493958, "step": 44620 }, { "epoch": 12.668180527959127, "grad_norm": 4.414589881896973, "learning_rate": 8.73374964518876e-05, "loss": 0.04027869701385498, "step": 44630 }, { "epoch": 12.671019017882486, "grad_norm": 3.095834970474243, "learning_rate": 8.733465796196424e-05, "loss": 0.033663544058799746, "step": 44640 }, { "epoch": 12.673857507805847, "grad_norm": 10.149762153625488, "learning_rate": 8.733181947204088e-05, "loss": 0.04592658877372742, "step": 44650 }, { "epoch": 12.676695997729208, "grad_norm": 6.134585857391357, "learning_rate": 8.732898098211752e-05, "loss": 0.014015226066112519, "step": 44660 }, { "epoch": 12.67953448765257, "grad_norm": 9.72177505493164, "learning_rate": 8.732614249219415e-05, "loss": 0.04023568630218506, "step": 44670 }, { "epoch": 12.68237297757593, "grad_norm": 9.719181060791016, "learning_rate": 8.73233040022708e-05, "loss": 0.020383240282535554, "step": 44680 }, { "epoch": 12.68521146749929, "grad_norm": 2.6362202167510986, "learning_rate": 8.732046551234744e-05, "loss": 0.01868530809879303, "step": 44690 }, { "epoch": 12.68804995742265, "grad_norm": 5.3783369064331055, "learning_rate": 8.731762702242407e-05, "loss": 0.0366559237241745, "step": 44700 }, { "epoch": 12.690888447346012, "grad_norm": 1.1001715660095215, "learning_rate": 8.731478853250071e-05, "loss": 0.01246526837348938, "step": 44710 }, { "epoch": 12.693726937269373, "grad_norm": 0.4098418354988098, "learning_rate": 8.731195004257735e-05, "loss": 0.013297230005264282, "step": 44720 }, { "epoch": 12.696565427192734, "grad_norm": 6.370312213897705, "learning_rate": 8.730911155265398e-05, "loss": 0.016671082377433775, "step": 44730 }, { "epoch": 12.699403917116094, "grad_norm": 7.109945774078369, "learning_rate": 8.730627306273064e-05, "loss": 0.01295877993106842, "step": 44740 }, { "epoch": 12.702242407039455, "grad_norm": 17.76192855834961, "learning_rate": 8.730343457280728e-05, "loss": 0.06499407291412354, "step": 44750 }, { "epoch": 12.705080896962816, "grad_norm": 10.26417350769043, "learning_rate": 8.73005960828839e-05, "loss": 0.02664111852645874, "step": 44760 }, { "epoch": 12.707919386886177, "grad_norm": 0.8697184920310974, "learning_rate": 8.729775759296055e-05, "loss": 0.06774661540985108, "step": 44770 }, { "epoch": 12.710757876809538, "grad_norm": 14.105785369873047, "learning_rate": 8.729491910303719e-05, "loss": 0.036456751823425296, "step": 44780 }, { "epoch": 12.713596366732897, "grad_norm": 0.4543580412864685, "learning_rate": 8.729208061311383e-05, "loss": 0.029792848229408263, "step": 44790 }, { "epoch": 12.716434856656258, "grad_norm": 0.7040238380432129, "learning_rate": 8.728924212319046e-05, "loss": 0.018314018845558167, "step": 44800 }, { "epoch": 12.71927334657962, "grad_norm": 4.714770793914795, "learning_rate": 8.728640363326711e-05, "loss": 0.021276414394378662, "step": 44810 }, { "epoch": 12.72211183650298, "grad_norm": 4.568909645080566, "learning_rate": 8.728356514334375e-05, "loss": 0.02909187078475952, "step": 44820 }, { "epoch": 12.724950326426342, "grad_norm": 6.924832344055176, "learning_rate": 8.728072665342038e-05, "loss": 0.03137224018573761, "step": 44830 }, { "epoch": 12.727788816349701, "grad_norm": 0.22362513840198517, "learning_rate": 8.727788816349702e-05, "loss": 0.03695736825466156, "step": 44840 }, { "epoch": 12.730627306273062, "grad_norm": 9.17486572265625, "learning_rate": 8.727504967357367e-05, "loss": 0.03078659176826477, "step": 44850 }, { "epoch": 12.733465796196423, "grad_norm": 4.678513050079346, "learning_rate": 8.72722111836503e-05, "loss": 0.019919341802597045, "step": 44860 }, { "epoch": 12.736304286119784, "grad_norm": 1.829468846321106, "learning_rate": 8.726937269372693e-05, "loss": 0.02785727083683014, "step": 44870 }, { "epoch": 12.739142776043145, "grad_norm": 20.356794357299805, "learning_rate": 8.726653420380359e-05, "loss": 0.05077862143516541, "step": 44880 }, { "epoch": 12.741981265966507, "grad_norm": 7.690953731536865, "learning_rate": 8.726369571388022e-05, "loss": 0.03529904782772064, "step": 44890 }, { "epoch": 12.744819755889866, "grad_norm": 3.5145695209503174, "learning_rate": 8.726085722395686e-05, "loss": 0.02233564555644989, "step": 44900 }, { "epoch": 12.747658245813227, "grad_norm": 5.415108680725098, "learning_rate": 8.72580187340335e-05, "loss": 0.015750613808631898, "step": 44910 }, { "epoch": 12.750496735736588, "grad_norm": 2.760850667953491, "learning_rate": 8.725518024411014e-05, "loss": 0.035274913907051085, "step": 44920 }, { "epoch": 12.75333522565995, "grad_norm": 1.3843302726745605, "learning_rate": 8.725234175418677e-05, "loss": 0.03669232428073883, "step": 44930 }, { "epoch": 12.75617371558331, "grad_norm": 9.84599781036377, "learning_rate": 8.724950326426342e-05, "loss": 0.024896812438964844, "step": 44940 }, { "epoch": 12.75901220550667, "grad_norm": 5.417962551116943, "learning_rate": 8.724666477434007e-05, "loss": 0.02024376839399338, "step": 44950 }, { "epoch": 12.76185069543003, "grad_norm": 5.94030237197876, "learning_rate": 8.72438262844167e-05, "loss": 0.03781768083572388, "step": 44960 }, { "epoch": 12.764689185353392, "grad_norm": 10.75522232055664, "learning_rate": 8.724098779449333e-05, "loss": 0.04019149839878082, "step": 44970 }, { "epoch": 12.767527675276753, "grad_norm": 1.6134066581726074, "learning_rate": 8.723814930456998e-05, "loss": 0.02440655082464218, "step": 44980 }, { "epoch": 12.770366165200114, "grad_norm": 11.419049263000488, "learning_rate": 8.72353108146466e-05, "loss": 0.029668760299682618, "step": 44990 }, { "epoch": 12.773204655123475, "grad_norm": 7.366904258728027, "learning_rate": 8.723247232472325e-05, "loss": 0.03961425125598907, "step": 45000 }, { "epoch": 12.773204655123475, "eval_accuracy": 0.9665543333121384, "eval_loss": 0.1135474145412445, "eval_runtime": 34.1369, "eval_samples_per_second": 460.704, "eval_steps_per_second": 7.206, "step": 45000 }, { "epoch": 12.776043145046835, "grad_norm": 9.312529563903809, "learning_rate": 8.72296338347999e-05, "loss": 0.03776895701885223, "step": 45010 }, { "epoch": 12.778881634970196, "grad_norm": 0.8636845946311951, "learning_rate": 8.722679534487653e-05, "loss": 0.030574434995651247, "step": 45020 }, { "epoch": 12.781720124893557, "grad_norm": 8.76984691619873, "learning_rate": 8.722395685495317e-05, "loss": 0.044777479767799375, "step": 45030 }, { "epoch": 12.784558614816918, "grad_norm": 9.44296932220459, "learning_rate": 8.722111836502981e-05, "loss": 0.03114142417907715, "step": 45040 }, { "epoch": 12.787397104740279, "grad_norm": 10.793941497802734, "learning_rate": 8.721827987510645e-05, "loss": 0.0454921543598175, "step": 45050 }, { "epoch": 12.790235594663638, "grad_norm": 12.8560791015625, "learning_rate": 8.721544138518308e-05, "loss": 0.0469744861125946, "step": 45060 }, { "epoch": 12.793074084587, "grad_norm": 3.2350800037384033, "learning_rate": 8.721260289525972e-05, "loss": 0.04942187070846558, "step": 45070 }, { "epoch": 12.79591257451036, "grad_norm": 8.168445587158203, "learning_rate": 8.720976440533636e-05, "loss": 0.028937822580337523, "step": 45080 }, { "epoch": 12.798751064433722, "grad_norm": 3.1044740676879883, "learning_rate": 8.7206925915413e-05, "loss": 0.042554280161857604, "step": 45090 }, { "epoch": 12.801589554357083, "grad_norm": 2.998159885406494, "learning_rate": 8.720408742548965e-05, "loss": 0.020089754462242128, "step": 45100 }, { "epoch": 12.804428044280442, "grad_norm": 8.909703254699707, "learning_rate": 8.720124893556629e-05, "loss": 0.029118481278419494, "step": 45110 }, { "epoch": 12.807266534203803, "grad_norm": 2.287930965423584, "learning_rate": 8.719841044564291e-05, "loss": 0.011904027312994003, "step": 45120 }, { "epoch": 12.810105024127164, "grad_norm": 4.826805591583252, "learning_rate": 8.719557195571956e-05, "loss": 0.02993302047252655, "step": 45130 }, { "epoch": 12.812943514050525, "grad_norm": 8.264431953430176, "learning_rate": 8.719273346579621e-05, "loss": 0.04315145611763001, "step": 45140 }, { "epoch": 12.815782003973887, "grad_norm": 1.3884044885635376, "learning_rate": 8.718989497587284e-05, "loss": 0.024026541411876677, "step": 45150 }, { "epoch": 12.818620493897246, "grad_norm": 4.608576774597168, "learning_rate": 8.718705648594948e-05, "loss": 0.025866815447807313, "step": 45160 }, { "epoch": 12.821458983820607, "grad_norm": 2.163923501968384, "learning_rate": 8.718421799602612e-05, "loss": 0.016294148564338685, "step": 45170 }, { "epoch": 12.824297473743968, "grad_norm": 5.165535926818848, "learning_rate": 8.718137950610275e-05, "loss": 0.027503693103790285, "step": 45180 }, { "epoch": 12.82713596366733, "grad_norm": 1.068270206451416, "learning_rate": 8.717854101617939e-05, "loss": 0.028598573803901673, "step": 45190 }, { "epoch": 12.82997445359069, "grad_norm": 4.028926849365234, "learning_rate": 8.717570252625603e-05, "loss": 0.022778716683387757, "step": 45200 }, { "epoch": 12.83281294351405, "grad_norm": 7.411033630371094, "learning_rate": 8.717286403633267e-05, "loss": 0.018837854266166687, "step": 45210 }, { "epoch": 12.83565143343741, "grad_norm": 7.185585021972656, "learning_rate": 8.717002554640931e-05, "loss": 0.03793368935585022, "step": 45220 }, { "epoch": 12.838489923360772, "grad_norm": 4.2972612380981445, "learning_rate": 8.716718705648596e-05, "loss": 0.02712184190750122, "step": 45230 }, { "epoch": 12.841328413284133, "grad_norm": 1.7040601968765259, "learning_rate": 8.71643485665626e-05, "loss": 0.014215601980686188, "step": 45240 }, { "epoch": 12.844166903207494, "grad_norm": 3.5631306171417236, "learning_rate": 8.716151007663923e-05, "loss": 0.05199117064476013, "step": 45250 }, { "epoch": 12.847005393130855, "grad_norm": 4.48429536819458, "learning_rate": 8.715867158671587e-05, "loss": 0.02365228533744812, "step": 45260 }, { "epoch": 12.849843883054215, "grad_norm": 2.744434356689453, "learning_rate": 8.715583309679251e-05, "loss": 0.014565663039684295, "step": 45270 }, { "epoch": 12.852682372977576, "grad_norm": 9.238537788391113, "learning_rate": 8.715299460686915e-05, "loss": 0.02844405174255371, "step": 45280 }, { "epoch": 12.855520862900937, "grad_norm": 3.7353341579437256, "learning_rate": 8.715015611694579e-05, "loss": 0.01850106567144394, "step": 45290 }, { "epoch": 12.858359352824298, "grad_norm": 0.4830169081687927, "learning_rate": 8.714731762702243e-05, "loss": 0.024861091375350954, "step": 45300 }, { "epoch": 12.861197842747659, "grad_norm": 3.302513837814331, "learning_rate": 8.714447913709906e-05, "loss": 0.03495500981807709, "step": 45310 }, { "epoch": 12.864036332671018, "grad_norm": 11.923235893249512, "learning_rate": 8.71416406471757e-05, "loss": 0.0254122793674469, "step": 45320 }, { "epoch": 12.86687482259438, "grad_norm": 9.759303092956543, "learning_rate": 8.713880215725234e-05, "loss": 0.03058304488658905, "step": 45330 }, { "epoch": 12.86971331251774, "grad_norm": 9.431136131286621, "learning_rate": 8.713596366732898e-05, "loss": 0.04605764448642731, "step": 45340 }, { "epoch": 12.872551802441102, "grad_norm": 5.870636463165283, "learning_rate": 8.713312517740563e-05, "loss": 0.03918139338493347, "step": 45350 }, { "epoch": 12.875390292364463, "grad_norm": 3.5990214347839355, "learning_rate": 8.713028668748227e-05, "loss": 0.03392835557460785, "step": 45360 }, { "epoch": 12.878228782287822, "grad_norm": 4.22739839553833, "learning_rate": 8.712744819755891e-05, "loss": 0.013621602952480317, "step": 45370 }, { "epoch": 12.881067272211183, "grad_norm": 6.4346137046813965, "learning_rate": 8.712460970763554e-05, "loss": 0.010967979580163956, "step": 45380 }, { "epoch": 12.883905762134544, "grad_norm": 11.052820205688477, "learning_rate": 8.712177121771218e-05, "loss": 0.03201175332069397, "step": 45390 }, { "epoch": 12.886744252057905, "grad_norm": 4.456981182098389, "learning_rate": 8.711893272778882e-05, "loss": 0.031309503316879275, "step": 45400 }, { "epoch": 12.889582741981267, "grad_norm": 1.6732662916183472, "learning_rate": 8.711609423786546e-05, "loss": 0.01596894860267639, "step": 45410 }, { "epoch": 12.892421231904628, "grad_norm": 4.935103893280029, "learning_rate": 8.71132557479421e-05, "loss": 0.012976595759391784, "step": 45420 }, { "epoch": 12.895259721827987, "grad_norm": 1.0597113370895386, "learning_rate": 8.711041725801874e-05, "loss": 0.018221409618854524, "step": 45430 }, { "epoch": 12.898098211751348, "grad_norm": 4.9515862464904785, "learning_rate": 8.710757876809537e-05, "loss": 0.019181770086288453, "step": 45440 }, { "epoch": 12.90093670167471, "grad_norm": 8.953062057495117, "learning_rate": 8.710474027817201e-05, "loss": 0.029577580094337464, "step": 45450 }, { "epoch": 12.90377519159807, "grad_norm": 1.708452820777893, "learning_rate": 8.710190178824865e-05, "loss": 0.023123916983604432, "step": 45460 }, { "epoch": 12.906613681521431, "grad_norm": 5.826571941375732, "learning_rate": 8.70990632983253e-05, "loss": 0.03824976086616516, "step": 45470 }, { "epoch": 12.90945217144479, "grad_norm": 8.295825958251953, "learning_rate": 8.709622480840194e-05, "loss": 0.03666889667510986, "step": 45480 }, { "epoch": 12.912290661368152, "grad_norm": 5.659279823303223, "learning_rate": 8.709338631847858e-05, "loss": 0.026533883810043336, "step": 45490 }, { "epoch": 12.915129151291513, "grad_norm": 7.0818634033203125, "learning_rate": 8.709054782855522e-05, "loss": 0.04185883104801178, "step": 45500 }, { "epoch": 12.915129151291513, "eval_accuracy": 0.9666815031474534, "eval_loss": 0.10811438411474228, "eval_runtime": 34.0318, "eval_samples_per_second": 462.127, "eval_steps_per_second": 7.229, "step": 45500 }, { "epoch": 12.917967641214874, "grad_norm": 0.8377231359481812, "learning_rate": 8.708770933863185e-05, "loss": 0.03182644248008728, "step": 45510 }, { "epoch": 12.920806131138235, "grad_norm": 1.6216492652893066, "learning_rate": 8.708487084870849e-05, "loss": 0.013430562615394593, "step": 45520 }, { "epoch": 12.923644621061595, "grad_norm": 4.567704677581787, "learning_rate": 8.708203235878513e-05, "loss": 0.020667389035224915, "step": 45530 }, { "epoch": 12.926483110984956, "grad_norm": 15.37270450592041, "learning_rate": 8.707919386886177e-05, "loss": 0.03443028330802918, "step": 45540 }, { "epoch": 12.929321600908317, "grad_norm": 3.2094223499298096, "learning_rate": 8.707635537893841e-05, "loss": 0.013166771829128265, "step": 45550 }, { "epoch": 12.932160090831678, "grad_norm": 0.543304443359375, "learning_rate": 8.707351688901505e-05, "loss": 0.03329185545444489, "step": 45560 }, { "epoch": 12.934998580755039, "grad_norm": 8.168122291564941, "learning_rate": 8.707067839909168e-05, "loss": 0.024960605800151824, "step": 45570 }, { "epoch": 12.937837070678398, "grad_norm": 12.195977210998535, "learning_rate": 8.706783990916832e-05, "loss": 0.02148410677909851, "step": 45580 }, { "epoch": 12.94067556060176, "grad_norm": 2.0982167720794678, "learning_rate": 8.706500141924496e-05, "loss": 0.021229368448257447, "step": 45590 }, { "epoch": 12.94351405052512, "grad_norm": 5.465136528015137, "learning_rate": 8.70621629293216e-05, "loss": 0.05021718144416809, "step": 45600 }, { "epoch": 12.946352540448482, "grad_norm": 7.872952938079834, "learning_rate": 8.705932443939825e-05, "loss": 0.016040097177028655, "step": 45610 }, { "epoch": 12.949191030371843, "grad_norm": 4.226383686065674, "learning_rate": 8.705648594947489e-05, "loss": 0.02944131791591644, "step": 45620 }, { "epoch": 12.952029520295202, "grad_norm": 14.999098777770996, "learning_rate": 8.705364745955153e-05, "loss": 0.04277513027191162, "step": 45630 }, { "epoch": 12.954868010218563, "grad_norm": 4.333532810211182, "learning_rate": 8.705080896962816e-05, "loss": 0.0547299861907959, "step": 45640 }, { "epoch": 12.957706500141924, "grad_norm": 2.023711919784546, "learning_rate": 8.70479704797048e-05, "loss": 0.029025545716285704, "step": 45650 }, { "epoch": 12.960544990065285, "grad_norm": 7.88656759262085, "learning_rate": 8.704513198978144e-05, "loss": 0.02532712519168854, "step": 45660 }, { "epoch": 12.963383479988646, "grad_norm": 7.859594345092773, "learning_rate": 8.704229349985807e-05, "loss": 0.027976369857788085, "step": 45670 }, { "epoch": 12.966221969912008, "grad_norm": 4.100898265838623, "learning_rate": 8.703945500993472e-05, "loss": 0.031126242876052857, "step": 45680 }, { "epoch": 12.969060459835367, "grad_norm": 15.386602401733398, "learning_rate": 8.703661652001136e-05, "loss": 0.03531977236270904, "step": 45690 }, { "epoch": 12.971898949758728, "grad_norm": 2.789466619491577, "learning_rate": 8.703377803008799e-05, "loss": 0.02565884292125702, "step": 45700 }, { "epoch": 12.97473743968209, "grad_norm": 2.2760426998138428, "learning_rate": 8.703093954016463e-05, "loss": 0.03219241499900818, "step": 45710 }, { "epoch": 12.97757592960545, "grad_norm": 6.189667701721191, "learning_rate": 8.702810105024128e-05, "loss": 0.030920299887657165, "step": 45720 }, { "epoch": 12.980414419528811, "grad_norm": 8.360000610351562, "learning_rate": 8.702526256031792e-05, "loss": 0.03859918713569641, "step": 45730 }, { "epoch": 12.98325290945217, "grad_norm": 7.013788223266602, "learning_rate": 8.702242407039456e-05, "loss": 0.03496972322463989, "step": 45740 }, { "epoch": 12.986091399375532, "grad_norm": 8.685243606567383, "learning_rate": 8.70195855804712e-05, "loss": 0.020992010831832886, "step": 45750 }, { "epoch": 12.988929889298893, "grad_norm": 5.8900861740112305, "learning_rate": 8.701674709054784e-05, "loss": 0.03140153586864471, "step": 45760 }, { "epoch": 12.991768379222254, "grad_norm": 10.04802131652832, "learning_rate": 8.701390860062447e-05, "loss": 0.025479769706726073, "step": 45770 }, { "epoch": 12.994606869145615, "grad_norm": 1.3360199928283691, "learning_rate": 8.701107011070111e-05, "loss": 0.024269139766693114, "step": 45780 }, { "epoch": 12.997445359068976, "grad_norm": 0.29020339250564575, "learning_rate": 8.700823162077775e-05, "loss": 0.032425913214683535, "step": 45790 }, { "epoch": 13.000283848992336, "grad_norm": 0.8468596339225769, "learning_rate": 8.700539313085438e-05, "loss": 0.011165130138397216, "step": 45800 }, { "epoch": 13.003122338915697, "grad_norm": 5.3550639152526855, "learning_rate": 8.700255464093103e-05, "loss": 0.01390940397977829, "step": 45810 }, { "epoch": 13.005960828839058, "grad_norm": 1.29914128780365, "learning_rate": 8.699971615100768e-05, "loss": 0.025738367438316347, "step": 45820 }, { "epoch": 13.008799318762419, "grad_norm": 3.0772016048431396, "learning_rate": 8.69968776610843e-05, "loss": 0.01688252091407776, "step": 45830 }, { "epoch": 13.01163780868578, "grad_norm": 5.174754619598389, "learning_rate": 8.699403917116094e-05, "loss": 0.02611488401889801, "step": 45840 }, { "epoch": 13.01447629860914, "grad_norm": 0.8088960647583008, "learning_rate": 8.699120068123759e-05, "loss": 0.021964259445667267, "step": 45850 }, { "epoch": 13.0173147885325, "grad_norm": 5.71260404586792, "learning_rate": 8.698836219131423e-05, "loss": 0.03132508993148804, "step": 45860 }, { "epoch": 13.020153278455862, "grad_norm": 10.256661415100098, "learning_rate": 8.698552370139087e-05, "loss": 0.019023846089839935, "step": 45870 }, { "epoch": 13.022991768379223, "grad_norm": 14.218442916870117, "learning_rate": 8.698268521146751e-05, "loss": 0.0262191504240036, "step": 45880 }, { "epoch": 13.025830258302584, "grad_norm": 10.634258270263672, "learning_rate": 8.697984672154415e-05, "loss": 0.028780031204223632, "step": 45890 }, { "epoch": 13.028668748225943, "grad_norm": 0.8869010210037231, "learning_rate": 8.697700823162078e-05, "loss": 0.02717844247817993, "step": 45900 }, { "epoch": 13.031507238149304, "grad_norm": 9.770968437194824, "learning_rate": 8.697416974169742e-05, "loss": 0.03808094263076782, "step": 45910 }, { "epoch": 13.034345728072665, "grad_norm": 16.093276977539062, "learning_rate": 8.697133125177406e-05, "loss": 0.0553678035736084, "step": 45920 }, { "epoch": 13.037184217996026, "grad_norm": 1.5574556589126587, "learning_rate": 8.696849276185069e-05, "loss": 0.01676485687494278, "step": 45930 }, { "epoch": 13.040022707919388, "grad_norm": 6.204573154449463, "learning_rate": 8.696565427192734e-05, "loss": 0.01770203411579132, "step": 45940 }, { "epoch": 13.042861197842747, "grad_norm": 0.6498982310295105, "learning_rate": 8.696281578200399e-05, "loss": 0.03689175248146057, "step": 45950 }, { "epoch": 13.045699687766108, "grad_norm": 3.9123308658599854, "learning_rate": 8.695997729208061e-05, "loss": 0.035238263010978696, "step": 45960 }, { "epoch": 13.048538177689469, "grad_norm": 1.4002550840377808, "learning_rate": 8.695713880215726e-05, "loss": 0.037841975688934326, "step": 45970 }, { "epoch": 13.05137666761283, "grad_norm": 7.652867317199707, "learning_rate": 8.69543003122339e-05, "loss": 0.01835220605134964, "step": 45980 }, { "epoch": 13.054215157536191, "grad_norm": 1.717905044555664, "learning_rate": 8.695146182231054e-05, "loss": 0.024962571263313294, "step": 45990 }, { "epoch": 13.05705364745955, "grad_norm": 3.0397019386291504, "learning_rate": 8.694862333238717e-05, "loss": 0.008857042342424393, "step": 46000 }, { "epoch": 13.05705364745955, "eval_accuracy": 0.9694156546067273, "eval_loss": 0.09774383902549744, "eval_runtime": 30.5633, "eval_samples_per_second": 514.571, "eval_steps_per_second": 8.049, "step": 46000 }, { "epoch": 13.059892137382912, "grad_norm": 0.8809338212013245, "learning_rate": 8.694578484246382e-05, "loss": 0.018774758279323577, "step": 46010 }, { "epoch": 13.062730627306273, "grad_norm": 7.947821617126465, "learning_rate": 8.694294635254045e-05, "loss": 0.024834795296192168, "step": 46020 }, { "epoch": 13.065569117229634, "grad_norm": 5.187758922576904, "learning_rate": 8.694010786261709e-05, "loss": 0.02428383529186249, "step": 46030 }, { "epoch": 13.068407607152995, "grad_norm": 1.7725969552993774, "learning_rate": 8.693726937269373e-05, "loss": 0.013517875969409943, "step": 46040 }, { "epoch": 13.071246097076356, "grad_norm": 3.0264618396759033, "learning_rate": 8.693443088277037e-05, "loss": 0.00847213938832283, "step": 46050 }, { "epoch": 13.074084586999716, "grad_norm": 6.936425685882568, "learning_rate": 8.6931592392847e-05, "loss": 0.024328960478305815, "step": 46060 }, { "epoch": 13.076923076923077, "grad_norm": 9.309427261352539, "learning_rate": 8.692875390292366e-05, "loss": 0.032242301106452945, "step": 46070 }, { "epoch": 13.079761566846438, "grad_norm": 0.4389423131942749, "learning_rate": 8.69259154130003e-05, "loss": 0.02448975443840027, "step": 46080 }, { "epoch": 13.082600056769799, "grad_norm": 2.0628151893615723, "learning_rate": 8.692307692307692e-05, "loss": 0.015783639252185823, "step": 46090 }, { "epoch": 13.08543854669316, "grad_norm": 0.985788106918335, "learning_rate": 8.692023843315357e-05, "loss": 0.024824810028076173, "step": 46100 }, { "epoch": 13.08827703661652, "grad_norm": 0.7634227275848389, "learning_rate": 8.691739994323021e-05, "loss": 0.04327364861965179, "step": 46110 }, { "epoch": 13.09111552653988, "grad_norm": 4.989902019500732, "learning_rate": 8.691456145330684e-05, "loss": 0.0137623131275177, "step": 46120 }, { "epoch": 13.093954016463242, "grad_norm": 1.4201945066452026, "learning_rate": 8.691172296338348e-05, "loss": 0.042266052961349485, "step": 46130 }, { "epoch": 13.096792506386603, "grad_norm": 1.0042543411254883, "learning_rate": 8.690888447346013e-05, "loss": 0.03759114444255829, "step": 46140 }, { "epoch": 13.099630996309964, "grad_norm": 0.4019149839878082, "learning_rate": 8.690604598353676e-05, "loss": 0.017850741744041443, "step": 46150 }, { "epoch": 13.102469486233323, "grad_norm": 8.383048057556152, "learning_rate": 8.69032074936134e-05, "loss": 0.030902191996574402, "step": 46160 }, { "epoch": 13.105307976156684, "grad_norm": 12.091774940490723, "learning_rate": 8.690036900369004e-05, "loss": 0.02828618586063385, "step": 46170 }, { "epoch": 13.108146466080045, "grad_norm": 12.4781494140625, "learning_rate": 8.689753051376668e-05, "loss": 0.027727341651916503, "step": 46180 }, { "epoch": 13.110984956003406, "grad_norm": 1.9219701290130615, "learning_rate": 8.689469202384331e-05, "loss": 0.04359899461269379, "step": 46190 }, { "epoch": 13.113823445926768, "grad_norm": 0.370877206325531, "learning_rate": 8.689185353391995e-05, "loss": 0.023471134901046752, "step": 46200 }, { "epoch": 13.116661935850127, "grad_norm": 14.903826713562012, "learning_rate": 8.688901504399661e-05, "loss": 0.017995895445346834, "step": 46210 }, { "epoch": 13.119500425773488, "grad_norm": 5.52783203125, "learning_rate": 8.688617655407324e-05, "loss": 0.029652139544486998, "step": 46220 }, { "epoch": 13.122338915696849, "grad_norm": 6.671703338623047, "learning_rate": 8.688333806414988e-05, "loss": 0.022060494124889373, "step": 46230 }, { "epoch": 13.12517740562021, "grad_norm": 0.3557266592979431, "learning_rate": 8.688049957422652e-05, "loss": 0.007563282549381256, "step": 46240 }, { "epoch": 13.128015895543571, "grad_norm": 2.8564326763153076, "learning_rate": 8.687766108430315e-05, "loss": 0.02291785627603531, "step": 46250 }, { "epoch": 13.130854385466932, "grad_norm": 0.9033476710319519, "learning_rate": 8.687482259437979e-05, "loss": 0.021770961582660675, "step": 46260 }, { "epoch": 13.133692875390292, "grad_norm": 7.0876784324646, "learning_rate": 8.687198410445644e-05, "loss": 0.02171332687139511, "step": 46270 }, { "epoch": 13.136531365313653, "grad_norm": 0.9844828844070435, "learning_rate": 8.686914561453307e-05, "loss": 0.020659266412258147, "step": 46280 }, { "epoch": 13.139369855237014, "grad_norm": 3.1477060317993164, "learning_rate": 8.686630712460971e-05, "loss": 0.019299033284187316, "step": 46290 }, { "epoch": 13.142208345160375, "grad_norm": 4.779799938201904, "learning_rate": 8.686346863468635e-05, "loss": 0.019544757902622223, "step": 46300 }, { "epoch": 13.145046835083736, "grad_norm": 0.34035179018974304, "learning_rate": 8.6860630144763e-05, "loss": 0.020966506004333495, "step": 46310 }, { "epoch": 13.147885325007096, "grad_norm": 4.520207405090332, "learning_rate": 8.685779165483962e-05, "loss": 0.01319039911031723, "step": 46320 }, { "epoch": 13.150723814930457, "grad_norm": 11.53542709350586, "learning_rate": 8.685495316491626e-05, "loss": 0.033350896835327146, "step": 46330 }, { "epoch": 13.153562304853818, "grad_norm": 4.927389144897461, "learning_rate": 8.685211467499292e-05, "loss": 0.026303091645240785, "step": 46340 }, { "epoch": 13.156400794777179, "grad_norm": 10.152743339538574, "learning_rate": 8.684927618506955e-05, "loss": 0.042313724756240845, "step": 46350 }, { "epoch": 13.15923928470054, "grad_norm": 7.355298042297363, "learning_rate": 8.684643769514619e-05, "loss": 0.029281917214393615, "step": 46360 }, { "epoch": 13.1620777746239, "grad_norm": 2.820167064666748, "learning_rate": 8.684359920522283e-05, "loss": 0.026639777421951293, "step": 46370 }, { "epoch": 13.16491626454726, "grad_norm": 0.5208292007446289, "learning_rate": 8.684076071529946e-05, "loss": 0.03929750621318817, "step": 46380 }, { "epoch": 13.167754754470621, "grad_norm": 5.474911689758301, "learning_rate": 8.68379222253761e-05, "loss": 0.0251981258392334, "step": 46390 }, { "epoch": 13.170593244393983, "grad_norm": 2.82006573677063, "learning_rate": 8.683508373545274e-05, "loss": 0.015839938819408417, "step": 46400 }, { "epoch": 13.173431734317344, "grad_norm": 7.873338222503662, "learning_rate": 8.683224524552938e-05, "loss": 0.021785670518875123, "step": 46410 }, { "epoch": 13.176270224240703, "grad_norm": 0.6101623773574829, "learning_rate": 8.682940675560602e-05, "loss": 0.03835864961147308, "step": 46420 }, { "epoch": 13.179108714164064, "grad_norm": 7.483822822570801, "learning_rate": 8.682656826568266e-05, "loss": 0.03503775298595428, "step": 46430 }, { "epoch": 13.181947204087425, "grad_norm": 1.123288631439209, "learning_rate": 8.68237297757593e-05, "loss": 0.016872091591358183, "step": 46440 }, { "epoch": 13.184785694010786, "grad_norm": 0.5324339866638184, "learning_rate": 8.682089128583593e-05, "loss": 0.010298204421997071, "step": 46450 }, { "epoch": 13.187624183934147, "grad_norm": 9.217328071594238, "learning_rate": 8.681805279591257e-05, "loss": 0.013210004568099976, "step": 46460 }, { "epoch": 13.190462673857509, "grad_norm": 6.665503978729248, "learning_rate": 8.681521430598923e-05, "loss": 0.01951979398727417, "step": 46470 }, { "epoch": 13.193301163780868, "grad_norm": 0.4675406217575073, "learning_rate": 8.681237581606586e-05, "loss": 0.018307624757289885, "step": 46480 }, { "epoch": 13.196139653704229, "grad_norm": 10.556629180908203, "learning_rate": 8.68095373261425e-05, "loss": 0.06227397918701172, "step": 46490 }, { "epoch": 13.19897814362759, "grad_norm": 0.6216328740119934, "learning_rate": 8.680669883621914e-05, "loss": 0.009867732971906662, "step": 46500 }, { "epoch": 13.19897814362759, "eval_accuracy": 0.9691613149360971, "eval_loss": 0.09594158083200455, "eval_runtime": 33.1784, "eval_samples_per_second": 474.013, "eval_steps_per_second": 7.414, "step": 46500 }, { "epoch": 13.201816633550951, "grad_norm": 5.172719478607178, "learning_rate": 8.680386034629577e-05, "loss": 0.03604285717010498, "step": 46510 }, { "epoch": 13.204655123474312, "grad_norm": 2.700817108154297, "learning_rate": 8.680102185637241e-05, "loss": 0.009263037145137787, "step": 46520 }, { "epoch": 13.207493613397672, "grad_norm": 9.49831485748291, "learning_rate": 8.679818336644905e-05, "loss": 0.026824793219566344, "step": 46530 }, { "epoch": 13.210332103321033, "grad_norm": 7.688259601593018, "learning_rate": 8.679534487652569e-05, "loss": 0.029593750834465027, "step": 46540 }, { "epoch": 13.213170593244394, "grad_norm": 11.606961250305176, "learning_rate": 8.679250638660233e-05, "loss": 0.02574879229068756, "step": 46550 }, { "epoch": 13.216009083167755, "grad_norm": 2.1865060329437256, "learning_rate": 8.678966789667897e-05, "loss": 0.02668873965740204, "step": 46560 }, { "epoch": 13.218847573091116, "grad_norm": 0.22720476984977722, "learning_rate": 8.678682940675562e-05, "loss": 0.020509344339370728, "step": 46570 }, { "epoch": 13.221686063014475, "grad_norm": 3.3293347358703613, "learning_rate": 8.678399091683224e-05, "loss": 0.02336885929107666, "step": 46580 }, { "epoch": 13.224524552937837, "grad_norm": 3.2662594318389893, "learning_rate": 8.678115242690889e-05, "loss": 0.013210025429725648, "step": 46590 }, { "epoch": 13.227363042861198, "grad_norm": 6.3399810791015625, "learning_rate": 8.677831393698553e-05, "loss": 0.021892534196376802, "step": 46600 }, { "epoch": 13.230201532784559, "grad_norm": 1.6409099102020264, "learning_rate": 8.677547544706217e-05, "loss": 0.015655624866485595, "step": 46610 }, { "epoch": 13.23304002270792, "grad_norm": 19.752504348754883, "learning_rate": 8.677263695713881e-05, "loss": 0.04362444877624512, "step": 46620 }, { "epoch": 13.235878512631281, "grad_norm": 17.53272819519043, "learning_rate": 8.676979846721545e-05, "loss": 0.03776074647903442, "step": 46630 }, { "epoch": 13.23871700255464, "grad_norm": 1.2138069868087769, "learning_rate": 8.676695997729208e-05, "loss": 0.023801498115062714, "step": 46640 }, { "epoch": 13.241555492478001, "grad_norm": 3.577829360961914, "learning_rate": 8.676412148736872e-05, "loss": 0.026755911111831666, "step": 46650 }, { "epoch": 13.244393982401363, "grad_norm": 1.3513551950454712, "learning_rate": 8.676128299744536e-05, "loss": 0.014284244179725647, "step": 46660 }, { "epoch": 13.247232472324724, "grad_norm": 0.17261090874671936, "learning_rate": 8.6758444507522e-05, "loss": 0.024923285841941832, "step": 46670 }, { "epoch": 13.250070962248085, "grad_norm": 3.794448137283325, "learning_rate": 8.675560601759864e-05, "loss": 0.02768075168132782, "step": 46680 }, { "epoch": 13.252909452171444, "grad_norm": 11.371748924255371, "learning_rate": 8.675276752767529e-05, "loss": 0.03560120463371277, "step": 46690 }, { "epoch": 13.255747942094805, "grad_norm": 3.7111172676086426, "learning_rate": 8.674992903775193e-05, "loss": 0.019095924496650696, "step": 46700 }, { "epoch": 13.258586432018166, "grad_norm": 2.890449047088623, "learning_rate": 8.674709054782855e-05, "loss": 0.023369647562503815, "step": 46710 }, { "epoch": 13.261424921941527, "grad_norm": 1.4384812116622925, "learning_rate": 8.67442520579052e-05, "loss": 0.022906868159770964, "step": 46720 }, { "epoch": 13.264263411864889, "grad_norm": 1.5624512434005737, "learning_rate": 8.674141356798184e-05, "loss": 0.00650181919336319, "step": 46730 }, { "epoch": 13.267101901788248, "grad_norm": 5.287961483001709, "learning_rate": 8.673857507805848e-05, "loss": 0.030586561560630797, "step": 46740 }, { "epoch": 13.269940391711609, "grad_norm": 7.865225315093994, "learning_rate": 8.673573658813512e-05, "loss": 0.024007827043533325, "step": 46750 }, { "epoch": 13.27277888163497, "grad_norm": 10.022920608520508, "learning_rate": 8.673289809821176e-05, "loss": 0.014459820091724395, "step": 46760 }, { "epoch": 13.275617371558331, "grad_norm": 7.027963161468506, "learning_rate": 8.673005960828839e-05, "loss": 0.021609416604042052, "step": 46770 }, { "epoch": 13.278455861481692, "grad_norm": 6.2207536697387695, "learning_rate": 8.672722111836503e-05, "loss": 0.030641442537307738, "step": 46780 }, { "epoch": 13.281294351405052, "grad_norm": 7.309433937072754, "learning_rate": 8.672438262844167e-05, "loss": 0.02569984793663025, "step": 46790 }, { "epoch": 13.284132841328413, "grad_norm": 8.000356674194336, "learning_rate": 8.672154413851831e-05, "loss": 0.0304152250289917, "step": 46800 }, { "epoch": 13.286971331251774, "grad_norm": 17.00655746459961, "learning_rate": 8.671870564859495e-05, "loss": 0.06754026412963868, "step": 46810 }, { "epoch": 13.289809821175135, "grad_norm": 6.915947914123535, "learning_rate": 8.67158671586716e-05, "loss": 0.031039905548095704, "step": 46820 }, { "epoch": 13.292648311098496, "grad_norm": 0.7999103665351868, "learning_rate": 8.671302866874824e-05, "loss": 0.014918908476829529, "step": 46830 }, { "epoch": 13.295486801021857, "grad_norm": 6.287054538726807, "learning_rate": 8.671019017882487e-05, "loss": 0.026089170575141908, "step": 46840 }, { "epoch": 13.298325290945217, "grad_norm": 7.776766777038574, "learning_rate": 8.67073516889015e-05, "loss": 0.046865451335906985, "step": 46850 }, { "epoch": 13.301163780868578, "grad_norm": 5.495208263397217, "learning_rate": 8.670451319897815e-05, "loss": 0.026737740635871886, "step": 46860 }, { "epoch": 13.304002270791939, "grad_norm": 4.5172247886657715, "learning_rate": 8.670167470905479e-05, "loss": 0.027765950560569762, "step": 46870 }, { "epoch": 13.3068407607153, "grad_norm": 7.357059001922607, "learning_rate": 8.669883621913143e-05, "loss": 0.01984446495771408, "step": 46880 }, { "epoch": 13.309679250638661, "grad_norm": 1.8796169757843018, "learning_rate": 8.669599772920807e-05, "loss": 0.02194624990224838, "step": 46890 }, { "epoch": 13.31251774056202, "grad_norm": 0.8706200122833252, "learning_rate": 8.66931592392847e-05, "loss": 0.03109003007411957, "step": 46900 }, { "epoch": 13.315356230485381, "grad_norm": 1.3757108449935913, "learning_rate": 8.669032074936134e-05, "loss": 0.0249830961227417, "step": 46910 }, { "epoch": 13.318194720408743, "grad_norm": 6.217925548553467, "learning_rate": 8.668748225943798e-05, "loss": 0.03471742272377014, "step": 46920 }, { "epoch": 13.321033210332104, "grad_norm": 12.91304874420166, "learning_rate": 8.668464376951462e-05, "loss": 0.028842982649803162, "step": 46930 }, { "epoch": 13.323871700255465, "grad_norm": 0.6600529551506042, "learning_rate": 8.668180527959127e-05, "loss": 0.03199099004268646, "step": 46940 }, { "epoch": 13.326710190178824, "grad_norm": 2.5741727352142334, "learning_rate": 8.667896678966791e-05, "loss": 0.016978855431079864, "step": 46950 }, { "epoch": 13.329548680102185, "grad_norm": 2.956181287765503, "learning_rate": 8.667612829974453e-05, "loss": 0.029133895039558412, "step": 46960 }, { "epoch": 13.332387170025546, "grad_norm": 3.090773820877075, "learning_rate": 8.667328980982118e-05, "loss": 0.01630791127681732, "step": 46970 }, { "epoch": 13.335225659948907, "grad_norm": 1.6272917985916138, "learning_rate": 8.667045131989782e-05, "loss": 0.02595115602016449, "step": 46980 }, { "epoch": 13.338064149872269, "grad_norm": 3.587589979171753, "learning_rate": 8.666761282997446e-05, "loss": 0.027806657552719116, "step": 46990 }, { "epoch": 13.34090263979563, "grad_norm": 1.1705080270767212, "learning_rate": 8.66647743400511e-05, "loss": 0.024575190246105195, "step": 47000 }, { "epoch": 13.34090263979563, "eval_accuracy": 0.9664907483944808, "eval_loss": 0.1043027862906456, "eval_runtime": 32.5265, "eval_samples_per_second": 483.514, "eval_steps_per_second": 7.563, "step": 47000 }, { "epoch": 13.343741129718989, "grad_norm": 4.939589023590088, "learning_rate": 8.666193585012774e-05, "loss": 0.035962808132171634, "step": 47010 }, { "epoch": 13.34657961964235, "grad_norm": 0.5555272698402405, "learning_rate": 8.665909736020438e-05, "loss": 0.022133150696754457, "step": 47020 }, { "epoch": 13.349418109565711, "grad_norm": 1.56986403465271, "learning_rate": 8.665625887028101e-05, "loss": 0.02483938932418823, "step": 47030 }, { "epoch": 13.352256599489072, "grad_norm": 20.057727813720703, "learning_rate": 8.665342038035765e-05, "loss": 0.04733603596687317, "step": 47040 }, { "epoch": 13.355095089412433, "grad_norm": 2.1678664684295654, "learning_rate": 8.66505818904343e-05, "loss": 0.016364413499832153, "step": 47050 }, { "epoch": 13.357933579335793, "grad_norm": 3.908045768737793, "learning_rate": 8.664774340051092e-05, "loss": 0.01119096800684929, "step": 47060 }, { "epoch": 13.360772069259154, "grad_norm": 14.604975700378418, "learning_rate": 8.664490491058758e-05, "loss": 0.03418777585029602, "step": 47070 }, { "epoch": 13.363610559182515, "grad_norm": 1.7402962446212769, "learning_rate": 8.664206642066422e-05, "loss": 0.01812865287065506, "step": 47080 }, { "epoch": 13.366449049105876, "grad_norm": 4.165730953216553, "learning_rate": 8.663922793074085e-05, "loss": 0.019417184591293334, "step": 47090 }, { "epoch": 13.369287539029237, "grad_norm": 1.62198007106781, "learning_rate": 8.663638944081749e-05, "loss": 0.020813101530075075, "step": 47100 }, { "epoch": 13.372126028952596, "grad_norm": 2.202786684036255, "learning_rate": 8.663355095089413e-05, "loss": 0.019875338673591612, "step": 47110 }, { "epoch": 13.374964518875958, "grad_norm": 3.1545629501342773, "learning_rate": 8.663071246097077e-05, "loss": 0.022807437181472778, "step": 47120 }, { "epoch": 13.377803008799319, "grad_norm": 1.0516228675842285, "learning_rate": 8.66278739710474e-05, "loss": 0.02062843441963196, "step": 47130 }, { "epoch": 13.38064149872268, "grad_norm": 1.7098437547683716, "learning_rate": 8.662503548112405e-05, "loss": 0.011880651861429215, "step": 47140 }, { "epoch": 13.383479988646041, "grad_norm": 2.7516393661499023, "learning_rate": 8.66221969912007e-05, "loss": 0.02101517915725708, "step": 47150 }, { "epoch": 13.3863184785694, "grad_norm": 6.58265495300293, "learning_rate": 8.661935850127732e-05, "loss": 0.018155384063720702, "step": 47160 }, { "epoch": 13.389156968492761, "grad_norm": 3.709552764892578, "learning_rate": 8.661652001135396e-05, "loss": 0.018187116086483, "step": 47170 }, { "epoch": 13.391995458416122, "grad_norm": 2.187462329864502, "learning_rate": 8.66136815214306e-05, "loss": 0.010234543681144714, "step": 47180 }, { "epoch": 13.394833948339484, "grad_norm": 1.8743001222610474, "learning_rate": 8.661084303150723e-05, "loss": 0.009230397641658783, "step": 47190 }, { "epoch": 13.397672438262845, "grad_norm": 7.063864231109619, "learning_rate": 8.660800454158389e-05, "loss": 0.009630545973777771, "step": 47200 }, { "epoch": 13.400510928186204, "grad_norm": 1.0792016983032227, "learning_rate": 8.660516605166053e-05, "loss": 0.03109282851219177, "step": 47210 }, { "epoch": 13.403349418109565, "grad_norm": 6.69997501373291, "learning_rate": 8.660232756173716e-05, "loss": 0.02131045013666153, "step": 47220 }, { "epoch": 13.406187908032926, "grad_norm": 5.293020725250244, "learning_rate": 8.65994890718138e-05, "loss": 0.02553023397922516, "step": 47230 }, { "epoch": 13.409026397956287, "grad_norm": 4.03358793258667, "learning_rate": 8.659665058189044e-05, "loss": 0.02158362716436386, "step": 47240 }, { "epoch": 13.411864887879648, "grad_norm": 9.096536636352539, "learning_rate": 8.659381209196708e-05, "loss": 0.020533348619937896, "step": 47250 }, { "epoch": 13.41470337780301, "grad_norm": 0.3263905942440033, "learning_rate": 8.659097360204371e-05, "loss": 0.018322643637657166, "step": 47260 }, { "epoch": 13.417541867726369, "grad_norm": 7.760745525360107, "learning_rate": 8.658813511212036e-05, "loss": 0.016046832501888274, "step": 47270 }, { "epoch": 13.42038035764973, "grad_norm": 3.04608416557312, "learning_rate": 8.6585296622197e-05, "loss": 0.02019485831260681, "step": 47280 }, { "epoch": 13.423218847573091, "grad_norm": 2.2512362003326416, "learning_rate": 8.658245813227363e-05, "loss": 0.026661804318428038, "step": 47290 }, { "epoch": 13.426057337496452, "grad_norm": 4.882451057434082, "learning_rate": 8.657961964235027e-05, "loss": 0.0380485862493515, "step": 47300 }, { "epoch": 13.428895827419813, "grad_norm": 1.531742811203003, "learning_rate": 8.657678115242692e-05, "loss": 0.030367621779441835, "step": 47310 }, { "epoch": 13.431734317343173, "grad_norm": 6.764379024505615, "learning_rate": 8.657394266250354e-05, "loss": 0.013960418105125428, "step": 47320 }, { "epoch": 13.434572807266534, "grad_norm": 1.6954492330551147, "learning_rate": 8.657110417258018e-05, "loss": 0.034249627590179445, "step": 47330 }, { "epoch": 13.437411297189895, "grad_norm": 6.12174654006958, "learning_rate": 8.656826568265684e-05, "loss": 0.032602164149284366, "step": 47340 }, { "epoch": 13.440249787113256, "grad_norm": 0.5451658964157104, "learning_rate": 8.656542719273347e-05, "loss": 0.010478059947490691, "step": 47350 }, { "epoch": 13.443088277036617, "grad_norm": 3.2245700359344482, "learning_rate": 8.656258870281011e-05, "loss": 0.03209671080112457, "step": 47360 }, { "epoch": 13.445926766959976, "grad_norm": 1.8043018579483032, "learning_rate": 8.655975021288675e-05, "loss": 0.015927663445472716, "step": 47370 }, { "epoch": 13.448765256883338, "grad_norm": 0.71964430809021, "learning_rate": 8.655691172296339e-05, "loss": 0.02185303866863251, "step": 47380 }, { "epoch": 13.451603746806699, "grad_norm": 6.081954002380371, "learning_rate": 8.655407323304002e-05, "loss": 0.02372327744960785, "step": 47390 }, { "epoch": 13.45444223673006, "grad_norm": 3.124675989151001, "learning_rate": 8.655123474311667e-05, "loss": 0.04329641461372376, "step": 47400 }, { "epoch": 13.457280726653421, "grad_norm": 2.069188356399536, "learning_rate": 8.654839625319332e-05, "loss": 0.03174784481525421, "step": 47410 }, { "epoch": 13.460119216576782, "grad_norm": 4.075662612915039, "learning_rate": 8.654555776326994e-05, "loss": 0.02883610725402832, "step": 47420 }, { "epoch": 13.462957706500141, "grad_norm": 3.180210828781128, "learning_rate": 8.654271927334658e-05, "loss": 0.02642684280872345, "step": 47430 }, { "epoch": 13.465796196423502, "grad_norm": 3.5276811122894287, "learning_rate": 8.653988078342323e-05, "loss": 0.05148385763168335, "step": 47440 }, { "epoch": 13.468634686346864, "grad_norm": 1.949007511138916, "learning_rate": 8.653704229349985e-05, "loss": 0.0464756041765213, "step": 47450 }, { "epoch": 13.471473176270225, "grad_norm": 2.4008209705352783, "learning_rate": 8.65342038035765e-05, "loss": 0.026517847180366518, "step": 47460 }, { "epoch": 13.474311666193586, "grad_norm": 6.498411655426025, "learning_rate": 8.653136531365315e-05, "loss": 0.03406121432781219, "step": 47470 }, { "epoch": 13.477150156116945, "grad_norm": 6.701434135437012, "learning_rate": 8.652852682372978e-05, "loss": 0.02322683483362198, "step": 47480 }, { "epoch": 13.479988646040306, "grad_norm": 5.868340015411377, "learning_rate": 8.652568833380642e-05, "loss": 0.016699685156345366, "step": 47490 }, { "epoch": 13.482827135963667, "grad_norm": 1.093472957611084, "learning_rate": 8.652284984388306e-05, "loss": 0.01843235194683075, "step": 47500 }, { "epoch": 13.482827135963667, "eval_accuracy": 0.967953201500604, "eval_loss": 0.10251788049936295, "eval_runtime": 34.1698, "eval_samples_per_second": 460.26, "eval_steps_per_second": 7.199, "step": 47500 }, { "epoch": 13.485665625887028, "grad_norm": 0.6539229154586792, "learning_rate": 8.65200113539597e-05, "loss": 0.02114432454109192, "step": 47510 }, { "epoch": 13.48850411581039, "grad_norm": 6.871177673339844, "learning_rate": 8.651717286403633e-05, "loss": 0.017457526922225953, "step": 47520 }, { "epoch": 13.491342605733749, "grad_norm": 2.37732195854187, "learning_rate": 8.651433437411297e-05, "loss": 0.027872276306152344, "step": 47530 }, { "epoch": 13.49418109565711, "grad_norm": 8.198321342468262, "learning_rate": 8.651149588418963e-05, "loss": 0.04992941617965698, "step": 47540 }, { "epoch": 13.497019585580471, "grad_norm": 3.0826125144958496, "learning_rate": 8.650865739426625e-05, "loss": 0.013266922533512115, "step": 47550 }, { "epoch": 13.499858075503832, "grad_norm": 7.205833911895752, "learning_rate": 8.65058189043429e-05, "loss": 0.030354610085487364, "step": 47560 }, { "epoch": 13.502696565427193, "grad_norm": 1.3375602960586548, "learning_rate": 8.650298041441954e-05, "loss": 0.026457780599594118, "step": 47570 }, { "epoch": 13.505535055350553, "grad_norm": 10.391011238098145, "learning_rate": 8.650014192449616e-05, "loss": 0.030627185106277467, "step": 47580 }, { "epoch": 13.508373545273914, "grad_norm": 0.6106948256492615, "learning_rate": 8.64973034345728e-05, "loss": 0.006647716462612152, "step": 47590 }, { "epoch": 13.511212035197275, "grad_norm": 2.5376551151275635, "learning_rate": 8.649446494464946e-05, "loss": 0.02009516805410385, "step": 47600 }, { "epoch": 13.514050525120636, "grad_norm": 10.857511520385742, "learning_rate": 8.649162645472609e-05, "loss": 0.03164174556732178, "step": 47610 }, { "epoch": 13.516889015043997, "grad_norm": 2.5206315517425537, "learning_rate": 8.648878796480273e-05, "loss": 0.010853007435798645, "step": 47620 }, { "epoch": 13.519727504967356, "grad_norm": 8.349550247192383, "learning_rate": 8.648594947487937e-05, "loss": 0.020632877945899963, "step": 47630 }, { "epoch": 13.522565994890718, "grad_norm": 1.2902880907058716, "learning_rate": 8.648311098495601e-05, "loss": 0.015938937664031982, "step": 47640 }, { "epoch": 13.525404484814079, "grad_norm": 8.415597915649414, "learning_rate": 8.648027249503264e-05, "loss": 0.018781617283821106, "step": 47650 }, { "epoch": 13.52824297473744, "grad_norm": 8.981241226196289, "learning_rate": 8.647743400510928e-05, "loss": 0.03306151330471039, "step": 47660 }, { "epoch": 13.5310814646608, "grad_norm": 7.5147504806518555, "learning_rate": 8.647459551518592e-05, "loss": 0.024411435425281524, "step": 47670 }, { "epoch": 13.533919954584162, "grad_norm": 0.636410117149353, "learning_rate": 8.647175702526256e-05, "loss": 0.019689589738845825, "step": 47680 }, { "epoch": 13.536758444507521, "grad_norm": 1.0189025402069092, "learning_rate": 8.64689185353392e-05, "loss": 0.025804674625396727, "step": 47690 }, { "epoch": 13.539596934430882, "grad_norm": 4.799825668334961, "learning_rate": 8.646608004541585e-05, "loss": 0.026017439365386964, "step": 47700 }, { "epoch": 13.542435424354244, "grad_norm": 5.060815811157227, "learning_rate": 8.646324155549248e-05, "loss": 0.021019329130649567, "step": 47710 }, { "epoch": 13.545273914277605, "grad_norm": 9.710461616516113, "learning_rate": 8.646040306556912e-05, "loss": 0.029526388645172118, "step": 47720 }, { "epoch": 13.548112404200966, "grad_norm": Infinity, "learning_rate": 8.645756457564576e-05, "loss": 0.0339257538318634, "step": 47730 }, { "epoch": 13.550950894124325, "grad_norm": 7.818232536315918, "learning_rate": 8.645500993471474e-05, "loss": 0.0259406179189682, "step": 47740 }, { "epoch": 13.553789384047686, "grad_norm": 0.6400870680809021, "learning_rate": 8.645217144479138e-05, "loss": 0.025963541865348817, "step": 47750 }, { "epoch": 13.556627873971047, "grad_norm": 5.553420066833496, "learning_rate": 8.644933295486801e-05, "loss": 0.01092444807291031, "step": 47760 }, { "epoch": 13.559466363894408, "grad_norm": 9.039496421813965, "learning_rate": 8.644649446494465e-05, "loss": 0.024571284651756287, "step": 47770 }, { "epoch": 13.56230485381777, "grad_norm": 3.236079216003418, "learning_rate": 8.64436559750213e-05, "loss": 0.030822673439979555, "step": 47780 }, { "epoch": 13.56514334374113, "grad_norm": 5.352657794952393, "learning_rate": 8.644081748509793e-05, "loss": 0.014925624430179595, "step": 47790 }, { "epoch": 13.56798183366449, "grad_norm": 7.3939900398254395, "learning_rate": 8.643797899517457e-05, "loss": 0.023567020893096924, "step": 47800 }, { "epoch": 13.570820323587851, "grad_norm": 9.706829071044922, "learning_rate": 8.643514050525121e-05, "loss": 0.019483651220798492, "step": 47810 }, { "epoch": 13.573658813511212, "grad_norm": 9.20900821685791, "learning_rate": 8.643230201532784e-05, "loss": 0.016876599192619322, "step": 47820 }, { "epoch": 13.576497303434573, "grad_norm": 1.109816074371338, "learning_rate": 8.642946352540448e-05, "loss": 0.01915617436170578, "step": 47830 }, { "epoch": 13.579335793357934, "grad_norm": 1.233250617980957, "learning_rate": 8.642662503548112e-05, "loss": 0.042709937691688536, "step": 47840 }, { "epoch": 13.582174283281294, "grad_norm": 2.221790075302124, "learning_rate": 8.642378654555777e-05, "loss": 0.020540574193000795, "step": 47850 }, { "epoch": 13.585012773204655, "grad_norm": 1.1863168478012085, "learning_rate": 8.642094805563441e-05, "loss": 0.008990667760372162, "step": 47860 }, { "epoch": 13.587851263128016, "grad_norm": 4.385209560394287, "learning_rate": 8.641810956571105e-05, "loss": 0.020012137293815613, "step": 47870 }, { "epoch": 13.590689753051377, "grad_norm": 3.1772279739379883, "learning_rate": 8.641527107578769e-05, "loss": 0.027701926231384278, "step": 47880 }, { "epoch": 13.593528242974738, "grad_norm": 1.3477269411087036, "learning_rate": 8.641243258586432e-05, "loss": 0.04219585657119751, "step": 47890 }, { "epoch": 13.596366732898097, "grad_norm": 1.410908579826355, "learning_rate": 8.640959409594096e-05, "loss": 0.026669120788574217, "step": 47900 }, { "epoch": 13.599205222821459, "grad_norm": 0.3235848844051361, "learning_rate": 8.64067556060176e-05, "loss": 0.012536750733852386, "step": 47910 }, { "epoch": 13.60204371274482, "grad_norm": 2.4514453411102295, "learning_rate": 8.640391711609424e-05, "loss": 0.04670516848564148, "step": 47920 }, { "epoch": 13.60488220266818, "grad_norm": 12.585572242736816, "learning_rate": 8.640107862617088e-05, "loss": 0.057659882307052615, "step": 47930 }, { "epoch": 13.607720692591542, "grad_norm": 0.4097186028957367, "learning_rate": 8.639824013624752e-05, "loss": 0.01748077869415283, "step": 47940 }, { "epoch": 13.610559182514901, "grad_norm": 5.478631496429443, "learning_rate": 8.639540164632415e-05, "loss": 0.028392139077186584, "step": 47950 }, { "epoch": 13.613397672438262, "grad_norm": 1.9991852045059204, "learning_rate": 8.63925631564008e-05, "loss": 0.02301488220691681, "step": 47960 }, { "epoch": 13.616236162361623, "grad_norm": 2.6684985160827637, "learning_rate": 8.638972466647744e-05, "loss": 0.0175906702876091, "step": 47970 }, { "epoch": 13.619074652284985, "grad_norm": 7.154378890991211, "learning_rate": 8.638688617655408e-05, "loss": 0.0401390939950943, "step": 47980 }, { "epoch": 13.621913142208346, "grad_norm": 10.716761589050293, "learning_rate": 8.638404768663072e-05, "loss": 0.031944882869720456, "step": 47990 }, { "epoch": 13.624751632131705, "grad_norm": 14.801020622253418, "learning_rate": 8.638120919670736e-05, "loss": 0.026560544967651367, "step": 48000 }, { "epoch": 13.624751632131705, "eval_accuracy": 0.9646467857824124, "eval_loss": 0.11300493031740189, "eval_runtime": 33.3251, "eval_samples_per_second": 471.927, "eval_steps_per_second": 7.382, "step": 48000 }, { "epoch": 13.627590122055066, "grad_norm": 6.968453407287598, "learning_rate": 8.6378370706784e-05, "loss": 0.024592173099517823, "step": 48010 }, { "epoch": 13.630428611978427, "grad_norm": 3.602597236633301, "learning_rate": 8.637553221686063e-05, "loss": 0.020995135605335235, "step": 48020 }, { "epoch": 13.633267101901788, "grad_norm": 3.6596431732177734, "learning_rate": 8.637269372693727e-05, "loss": 0.016224104166030883, "step": 48030 }, { "epoch": 13.63610559182515, "grad_norm": 2.0210461616516113, "learning_rate": 8.636985523701391e-05, "loss": 0.015348982810974122, "step": 48040 }, { "epoch": 13.63894408174851, "grad_norm": 6.4830851554870605, "learning_rate": 8.636701674709055e-05, "loss": 0.027864986658096315, "step": 48050 }, { "epoch": 13.64178257167187, "grad_norm": 4.800462245941162, "learning_rate": 8.63641782571672e-05, "loss": 0.014644613862037659, "step": 48060 }, { "epoch": 13.644621061595231, "grad_norm": 8.258902549743652, "learning_rate": 8.636133976724384e-05, "loss": 0.015869662165641785, "step": 48070 }, { "epoch": 13.647459551518592, "grad_norm": 1.3802499771118164, "learning_rate": 8.635850127732046e-05, "loss": 0.02945188581943512, "step": 48080 }, { "epoch": 13.650298041441953, "grad_norm": 7.097314357757568, "learning_rate": 8.63556627873971e-05, "loss": 0.027517828345298766, "step": 48090 }, { "epoch": 13.653136531365314, "grad_norm": 2.949167251586914, "learning_rate": 8.635282429747375e-05, "loss": 0.020987313985824586, "step": 48100 }, { "epoch": 13.655975021288674, "grad_norm": 1.3109409809112549, "learning_rate": 8.634998580755039e-05, "loss": 0.02581738233566284, "step": 48110 }, { "epoch": 13.658813511212035, "grad_norm": 9.708320617675781, "learning_rate": 8.634714731762703e-05, "loss": 0.022814098000526428, "step": 48120 }, { "epoch": 13.661652001135396, "grad_norm": 1.6338846683502197, "learning_rate": 8.634430882770367e-05, "loss": 0.027744984626770018, "step": 48130 }, { "epoch": 13.664490491058757, "grad_norm": 1.4892847537994385, "learning_rate": 8.634147033778031e-05, "loss": 0.03233826756477356, "step": 48140 }, { "epoch": 13.667328980982118, "grad_norm": 1.2755331993103027, "learning_rate": 8.633863184785694e-05, "loss": 0.02453583776950836, "step": 48150 }, { "epoch": 13.670167470905477, "grad_norm": 12.932465553283691, "learning_rate": 8.633579335793358e-05, "loss": 0.03524321913719177, "step": 48160 }, { "epoch": 13.673005960828839, "grad_norm": 3.4151675701141357, "learning_rate": 8.633295486801022e-05, "loss": 0.019614043831825256, "step": 48170 }, { "epoch": 13.6758444507522, "grad_norm": 1.3647785186767578, "learning_rate": 8.633011637808686e-05, "loss": 0.009524647891521455, "step": 48180 }, { "epoch": 13.67868294067556, "grad_norm": 1.0198265314102173, "learning_rate": 8.63272778881635e-05, "loss": 0.031081375479698182, "step": 48190 }, { "epoch": 13.681521430598922, "grad_norm": 0.6324388384819031, "learning_rate": 8.632443939824015e-05, "loss": 0.014155104756355286, "step": 48200 }, { "epoch": 13.684359920522283, "grad_norm": 0.47916683554649353, "learning_rate": 8.632160090831677e-05, "loss": 0.018383853137493134, "step": 48210 }, { "epoch": 13.687198410445642, "grad_norm": 1.933698058128357, "learning_rate": 8.631876241839342e-05, "loss": 0.03241346478462219, "step": 48220 }, { "epoch": 13.690036900369003, "grad_norm": 2.808408737182617, "learning_rate": 8.631592392847006e-05, "loss": 0.03146733343601227, "step": 48230 }, { "epoch": 13.692875390292365, "grad_norm": 1.8185762166976929, "learning_rate": 8.63130854385467e-05, "loss": 0.016577060520648956, "step": 48240 }, { "epoch": 13.695713880215726, "grad_norm": 0.731987476348877, "learning_rate": 8.631024694862334e-05, "loss": 0.008938100188970566, "step": 48250 }, { "epoch": 13.698552370139087, "grad_norm": 2.7368903160095215, "learning_rate": 8.630740845869998e-05, "loss": 0.01833193451166153, "step": 48260 }, { "epoch": 13.701390860062446, "grad_norm": 5.7124834060668945, "learning_rate": 8.630456996877662e-05, "loss": 0.02473498284816742, "step": 48270 }, { "epoch": 13.704229349985807, "grad_norm": 0.44811877608299255, "learning_rate": 8.630173147885325e-05, "loss": 0.01971999853849411, "step": 48280 }, { "epoch": 13.707067839909168, "grad_norm": 14.069496154785156, "learning_rate": 8.629889298892989e-05, "loss": 0.03217013478279114, "step": 48290 }, { "epoch": 13.70990632983253, "grad_norm": 11.276670455932617, "learning_rate": 8.629605449900653e-05, "loss": 0.024807244539260864, "step": 48300 }, { "epoch": 13.71274481975589, "grad_norm": 3.0336074829101562, "learning_rate": 8.629321600908316e-05, "loss": 0.023300619423389436, "step": 48310 }, { "epoch": 13.71558330967925, "grad_norm": 2.6246554851531982, "learning_rate": 8.629037751915982e-05, "loss": 0.019248396158218384, "step": 48320 }, { "epoch": 13.718421799602611, "grad_norm": 4.951300144195557, "learning_rate": 8.628753902923646e-05, "loss": 0.03755561113357544, "step": 48330 }, { "epoch": 13.721260289525972, "grad_norm": 1.0053681135177612, "learning_rate": 8.628470053931308e-05, "loss": 0.02803715467453003, "step": 48340 }, { "epoch": 13.724098779449333, "grad_norm": 4.239955902099609, "learning_rate": 8.628186204938973e-05, "loss": 0.03295764029026031, "step": 48350 }, { "epoch": 13.726937269372694, "grad_norm": 6.746825218200684, "learning_rate": 8.627902355946637e-05, "loss": 0.017151995003223418, "step": 48360 }, { "epoch": 13.729775759296054, "grad_norm": 5.400650978088379, "learning_rate": 8.627618506954301e-05, "loss": 0.023512905836105345, "step": 48370 }, { "epoch": 13.732614249219415, "grad_norm": 6.382096290588379, "learning_rate": 8.627334657961965e-05, "loss": 0.02544708251953125, "step": 48380 }, { "epoch": 13.735452739142776, "grad_norm": 4.756626129150391, "learning_rate": 8.627050808969629e-05, "loss": 0.032193467020988464, "step": 48390 }, { "epoch": 13.738291229066137, "grad_norm": 2.7178049087524414, "learning_rate": 8.626766959977293e-05, "loss": 0.023700739443302154, "step": 48400 }, { "epoch": 13.741129718989498, "grad_norm": 10.330757141113281, "learning_rate": 8.626483110984956e-05, "loss": 0.019866698980331422, "step": 48410 }, { "epoch": 13.743968208912857, "grad_norm": 7.4739556312561035, "learning_rate": 8.62619926199262e-05, "loss": 0.021861080825328828, "step": 48420 }, { "epoch": 13.746806698836219, "grad_norm": 4.061319351196289, "learning_rate": 8.625915413000284e-05, "loss": 0.029895338416099548, "step": 48430 }, { "epoch": 13.74964518875958, "grad_norm": 6.519285202026367, "learning_rate": 8.625659948907182e-05, "loss": 0.039746826887130736, "step": 48440 }, { "epoch": 13.75248367868294, "grad_norm": 14.24404239654541, "learning_rate": 8.625376099914846e-05, "loss": 0.027602922916412354, "step": 48450 }, { "epoch": 13.755322168606302, "grad_norm": 0.8481875061988831, "learning_rate": 8.625092250922509e-05, "loss": 0.020781995356082918, "step": 48460 }, { "epoch": 13.758160658529663, "grad_norm": 8.712288856506348, "learning_rate": 8.624808401930173e-05, "loss": 0.019238975644111634, "step": 48470 }, { "epoch": 13.760999148453022, "grad_norm": 4.400480270385742, "learning_rate": 8.624524552937838e-05, "loss": 0.017897866666316986, "step": 48480 }, { "epoch": 13.763837638376383, "grad_norm": 1.7781211137771606, "learning_rate": 8.6242407039455e-05, "loss": 0.01788599044084549, "step": 48490 }, { "epoch": 13.766676128299745, "grad_norm": 16.914600372314453, "learning_rate": 8.623956854953166e-05, "loss": 0.02708727717399597, "step": 48500 }, { "epoch": 13.766676128299745, "eval_accuracy": 0.9682711260888918, "eval_loss": 0.10824648290872574, "eval_runtime": 32.8015, "eval_samples_per_second": 479.46, "eval_steps_per_second": 7.5, "step": 48500 }, { "epoch": 13.769514618223106, "grad_norm": 3.478508710861206, "learning_rate": 8.62367300596083e-05, "loss": 0.014303536713123321, "step": 48510 }, { "epoch": 13.772353108146467, "grad_norm": 2.853921890258789, "learning_rate": 8.623389156968493e-05, "loss": 0.02035238444805145, "step": 48520 }, { "epoch": 13.775191598069826, "grad_norm": 5.074586868286133, "learning_rate": 8.623105307976157e-05, "loss": 0.034863239526748656, "step": 48530 }, { "epoch": 13.778030087993187, "grad_norm": 2.3250014781951904, "learning_rate": 8.622821458983821e-05, "loss": 0.025902813673019408, "step": 48540 }, { "epoch": 13.780868577916548, "grad_norm": 2.281215190887451, "learning_rate": 8.622537609991485e-05, "loss": 0.02156141698360443, "step": 48550 }, { "epoch": 13.78370706783991, "grad_norm": 10.517971992492676, "learning_rate": 8.622253760999149e-05, "loss": 0.0365444153547287, "step": 48560 }, { "epoch": 13.78654555776327, "grad_norm": 16.390094757080078, "learning_rate": 8.621969912006813e-05, "loss": 0.047499483823776244, "step": 48570 }, { "epoch": 13.789384047686632, "grad_norm": 0.7197175621986389, "learning_rate": 8.621686063014478e-05, "loss": 0.03773541748523712, "step": 48580 }, { "epoch": 13.792222537609991, "grad_norm": 3.7946207523345947, "learning_rate": 8.62140221402214e-05, "loss": 0.025632953643798827, "step": 48590 }, { "epoch": 13.795061027533352, "grad_norm": 0.1053265780210495, "learning_rate": 8.621118365029804e-05, "loss": 0.03307561874389649, "step": 48600 }, { "epoch": 13.797899517456713, "grad_norm": 5.817500114440918, "learning_rate": 8.620834516037469e-05, "loss": 0.017907014489173888, "step": 48610 }, { "epoch": 13.800738007380074, "grad_norm": 11.885712623596191, "learning_rate": 8.620550667045131e-05, "loss": 0.02189689576625824, "step": 48620 }, { "epoch": 13.803576497303435, "grad_norm": 6.571600914001465, "learning_rate": 8.620266818052797e-05, "loss": 0.03205992877483368, "step": 48630 }, { "epoch": 13.806414987226795, "grad_norm": 1.2123428583145142, "learning_rate": 8.619982969060461e-05, "loss": 0.028226491808891297, "step": 48640 }, { "epoch": 13.809253477150156, "grad_norm": 3.7323496341705322, "learning_rate": 8.619699120068124e-05, "loss": 0.03340776264667511, "step": 48650 }, { "epoch": 13.812091967073517, "grad_norm": 0.6420960426330566, "learning_rate": 8.619415271075788e-05, "loss": 0.01512489914894104, "step": 48660 }, { "epoch": 13.814930456996878, "grad_norm": 0.1627539098262787, "learning_rate": 8.619131422083452e-05, "loss": 0.015572097897529603, "step": 48670 }, { "epoch": 13.81776894692024, "grad_norm": 1.6404610872268677, "learning_rate": 8.618847573091116e-05, "loss": 0.03312342166900635, "step": 48680 }, { "epoch": 13.820607436843598, "grad_norm": 0.3160083293914795, "learning_rate": 8.618563724098779e-05, "loss": 0.026300227642059325, "step": 48690 }, { "epoch": 13.82344592676696, "grad_norm": 1.5496554374694824, "learning_rate": 8.618279875106445e-05, "loss": 0.037221759557724, "step": 48700 }, { "epoch": 13.82628441669032, "grad_norm": 2.0932633876800537, "learning_rate": 8.617996026114107e-05, "loss": 0.029195183515548707, "step": 48710 }, { "epoch": 13.829122906613682, "grad_norm": 8.272494316101074, "learning_rate": 8.617712177121771e-05, "loss": 0.020417775213718414, "step": 48720 }, { "epoch": 13.831961396537043, "grad_norm": 2.7476766109466553, "learning_rate": 8.617428328129436e-05, "loss": 0.024122440814971925, "step": 48730 }, { "epoch": 13.834799886460402, "grad_norm": 0.33991023898124695, "learning_rate": 8.6171444791371e-05, "loss": 0.04089182317256927, "step": 48740 }, { "epoch": 13.837638376383763, "grad_norm": 2.050687313079834, "learning_rate": 8.616860630144762e-05, "loss": 0.03679124414920807, "step": 48750 }, { "epoch": 13.840476866307124, "grad_norm": 6.133551120758057, "learning_rate": 8.616576781152428e-05, "loss": 0.031645917892456056, "step": 48760 }, { "epoch": 13.843315356230486, "grad_norm": 2.817040205001831, "learning_rate": 8.616292932160092e-05, "loss": 0.03724794387817383, "step": 48770 }, { "epoch": 13.846153846153847, "grad_norm": 4.1634297370910645, "learning_rate": 8.616009083167755e-05, "loss": 0.02581959366798401, "step": 48780 }, { "epoch": 13.848992336077206, "grad_norm": 4.093806743621826, "learning_rate": 8.615725234175419e-05, "loss": 0.027206286787986755, "step": 48790 }, { "epoch": 13.851830826000567, "grad_norm": 0.5722963213920593, "learning_rate": 8.615441385183083e-05, "loss": 0.02543571889400482, "step": 48800 }, { "epoch": 13.854669315923928, "grad_norm": 1.8233895301818848, "learning_rate": 8.615157536190746e-05, "loss": 0.021251462399959564, "step": 48810 }, { "epoch": 13.85750780584729, "grad_norm": 15.181877136230469, "learning_rate": 8.61487368719841e-05, "loss": 0.031837749481201175, "step": 48820 }, { "epoch": 13.86034629577065, "grad_norm": 6.918737411499023, "learning_rate": 8.614589838206076e-05, "loss": 0.013761065900325775, "step": 48830 }, { "epoch": 13.863184785694012, "grad_norm": 0.25852343440055847, "learning_rate": 8.614305989213738e-05, "loss": 0.04751186668872833, "step": 48840 }, { "epoch": 13.866023275617371, "grad_norm": 2.2896082401275635, "learning_rate": 8.614022140221403e-05, "loss": 0.0490240752696991, "step": 48850 }, { "epoch": 13.868861765540732, "grad_norm": 7.65220832824707, "learning_rate": 8.613738291229067e-05, "loss": 0.0441154420375824, "step": 48860 }, { "epoch": 13.871700255464093, "grad_norm": 4.703787803649902, "learning_rate": 8.613454442236731e-05, "loss": 0.03222574293613434, "step": 48870 }, { "epoch": 13.874538745387454, "grad_norm": 12.205216407775879, "learning_rate": 8.613170593244394e-05, "loss": 0.023323863744735718, "step": 48880 }, { "epoch": 13.877377235310815, "grad_norm": 2.119316577911377, "learning_rate": 8.612886744252058e-05, "loss": 0.016794483363628387, "step": 48890 }, { "epoch": 13.880215725234175, "grad_norm": 5.400528907775879, "learning_rate": 8.612602895259723e-05, "loss": 0.018571698665618898, "step": 48900 }, { "epoch": 13.883054215157536, "grad_norm": 2.876016855239868, "learning_rate": 8.612319046267386e-05, "loss": 0.012763114273548126, "step": 48910 }, { "epoch": 13.885892705080897, "grad_norm": 3.771700620651245, "learning_rate": 8.61203519727505e-05, "loss": 0.034539854526519774, "step": 48920 }, { "epoch": 13.888731195004258, "grad_norm": 5.722779273986816, "learning_rate": 8.611751348282714e-05, "loss": 0.023427408933639527, "step": 48930 }, { "epoch": 13.89156968492762, "grad_norm": 0.584364652633667, "learning_rate": 8.611467499290377e-05, "loss": 0.03621072471141815, "step": 48940 }, { "epoch": 13.894408174850978, "grad_norm": 4.3711652755737305, "learning_rate": 8.611183650298041e-05, "loss": 0.02440662533044815, "step": 48950 }, { "epoch": 13.89724666477434, "grad_norm": 1.0986334085464478, "learning_rate": 8.610899801305707e-05, "loss": 0.019401583075523376, "step": 48960 }, { "epoch": 13.9000851546977, "grad_norm": 1.4783105850219727, "learning_rate": 8.61061595231337e-05, "loss": 0.012181966751813888, "step": 48970 }, { "epoch": 13.902923644621062, "grad_norm": 6.468756198883057, "learning_rate": 8.610332103321034e-05, "loss": 0.050600582361221315, "step": 48980 }, { "epoch": 13.905762134544423, "grad_norm": 13.761086463928223, "learning_rate": 8.610048254328698e-05, "loss": 0.04037654995918274, "step": 48990 }, { "epoch": 13.908600624467784, "grad_norm": 2.0909078121185303, "learning_rate": 8.609764405336362e-05, "loss": 0.021344101428985594, "step": 49000 }, { "epoch": 13.908600624467784, "eval_accuracy": 0.9561264068163031, "eval_loss": 0.15454497933387756, "eval_runtime": 34.3717, "eval_samples_per_second": 457.556, "eval_steps_per_second": 7.157, "step": 49000 }, { "epoch": 13.911439114391143, "grad_norm": 1.771511435508728, "learning_rate": 8.609480556344025e-05, "loss": 0.044421225786209106, "step": 49010 }, { "epoch": 13.914277604314504, "grad_norm": 11.516900062561035, "learning_rate": 8.609196707351689e-05, "loss": 0.04213305413722992, "step": 49020 }, { "epoch": 13.917116094237866, "grad_norm": 4.602402210235596, "learning_rate": 8.608912858359354e-05, "loss": 0.048821157217025755, "step": 49030 }, { "epoch": 13.919954584161227, "grad_norm": 1.8233810663223267, "learning_rate": 8.608629009367017e-05, "loss": 0.01742495447397232, "step": 49040 }, { "epoch": 13.922793074084588, "grad_norm": 6.512685775756836, "learning_rate": 8.608345160374681e-05, "loss": 0.019314379990100862, "step": 49050 }, { "epoch": 13.925631564007947, "grad_norm": 3.8957431316375732, "learning_rate": 8.608061311382345e-05, "loss": 0.02148987054824829, "step": 49060 }, { "epoch": 13.928470053931308, "grad_norm": 6.465183734893799, "learning_rate": 8.607777462390008e-05, "loss": 0.03261690735816956, "step": 49070 }, { "epoch": 13.93130854385467, "grad_norm": 0.6794322729110718, "learning_rate": 8.607493613397672e-05, "loss": 0.03832847476005554, "step": 49080 }, { "epoch": 13.93414703377803, "grad_norm": 17.88524055480957, "learning_rate": 8.607209764405338e-05, "loss": 0.027626392245292664, "step": 49090 }, { "epoch": 13.936985523701392, "grad_norm": 2.984049081802368, "learning_rate": 8.606925915413e-05, "loss": 0.035411950945854184, "step": 49100 }, { "epoch": 13.93982401362475, "grad_norm": 0.32323601841926575, "learning_rate": 8.606642066420665e-05, "loss": 0.03763496577739715, "step": 49110 }, { "epoch": 13.942662503548112, "grad_norm": 9.445712089538574, "learning_rate": 8.606358217428329e-05, "loss": 0.05156281590461731, "step": 49120 }, { "epoch": 13.945500993471473, "grad_norm": 1.5384899377822876, "learning_rate": 8.606074368435993e-05, "loss": 0.01572541892528534, "step": 49130 }, { "epoch": 13.948339483394834, "grad_norm": 8.956418991088867, "learning_rate": 8.605790519443656e-05, "loss": 0.010134793072938918, "step": 49140 }, { "epoch": 13.951177973318195, "grad_norm": 0.5678937435150146, "learning_rate": 8.60550667045132e-05, "loss": 0.01265876591205597, "step": 49150 }, { "epoch": 13.954016463241555, "grad_norm": 0.4399871826171875, "learning_rate": 8.605222821458985e-05, "loss": 0.01950177550315857, "step": 49160 }, { "epoch": 13.956854953164916, "grad_norm": 3.230369806289673, "learning_rate": 8.604938972466648e-05, "loss": 0.005968512594699859, "step": 49170 }, { "epoch": 13.959693443088277, "grad_norm": 4.872127532958984, "learning_rate": 8.604655123474312e-05, "loss": 0.031170374155044554, "step": 49180 }, { "epoch": 13.962531933011638, "grad_norm": 0.35129183530807495, "learning_rate": 8.604371274481976e-05, "loss": 0.032274875044822696, "step": 49190 }, { "epoch": 13.965370422934999, "grad_norm": 9.412309646606445, "learning_rate": 8.604087425489639e-05, "loss": 0.040777587890625, "step": 49200 }, { "epoch": 13.968208912858358, "grad_norm": 10.750581741333008, "learning_rate": 8.603803576497303e-05, "loss": 0.021062809228897094, "step": 49210 }, { "epoch": 13.97104740278172, "grad_norm": 9.70480728149414, "learning_rate": 8.603519727504967e-05, "loss": 0.0331195592880249, "step": 49220 }, { "epoch": 13.97388589270508, "grad_norm": 15.119086265563965, "learning_rate": 8.603235878512632e-05, "loss": 0.03676826953887939, "step": 49230 }, { "epoch": 13.976724382628442, "grad_norm": 12.854155540466309, "learning_rate": 8.602952029520296e-05, "loss": 0.03499214947223663, "step": 49240 }, { "epoch": 13.979562872551803, "grad_norm": 3.158644914627075, "learning_rate": 8.60266818052796e-05, "loss": 0.02712774872779846, "step": 49250 }, { "epoch": 13.982401362475164, "grad_norm": 16.846097946166992, "learning_rate": 8.602384331535624e-05, "loss": 0.05120633244514465, "step": 49260 }, { "epoch": 13.985239852398523, "grad_norm": 5.883443832397461, "learning_rate": 8.602100482543287e-05, "loss": 0.04868968725204468, "step": 49270 }, { "epoch": 13.988078342321884, "grad_norm": 6.090192794799805, "learning_rate": 8.601816633550951e-05, "loss": 0.028820422291755677, "step": 49280 }, { "epoch": 13.990916832245246, "grad_norm": 2.8770573139190674, "learning_rate": 8.601532784558616e-05, "loss": 0.028646177053451537, "step": 49290 }, { "epoch": 13.993755322168607, "grad_norm": 2.207439661026001, "learning_rate": 8.601248935566279e-05, "loss": 0.018385207653045653, "step": 49300 }, { "epoch": 13.996593812091968, "grad_norm": 0.3574511408805847, "learning_rate": 8.600965086573943e-05, "loss": 0.03864841461181641, "step": 49310 }, { "epoch": 13.999432302015327, "grad_norm": 2.202913522720337, "learning_rate": 8.600681237581607e-05, "loss": 0.03278489708900452, "step": 49320 }, { "epoch": 14.002270791938688, "grad_norm": 10.33278751373291, "learning_rate": 8.60039738858927e-05, "loss": 0.012734216451644898, "step": 49330 }, { "epoch": 14.00510928186205, "grad_norm": 7.670183181762695, "learning_rate": 8.600113539596934e-05, "loss": 0.02328726351261139, "step": 49340 }, { "epoch": 14.00794777178541, "grad_norm": 1.3484965562820435, "learning_rate": 8.599829690604599e-05, "loss": 0.021844002604484557, "step": 49350 }, { "epoch": 14.010786261708772, "grad_norm": 0.9118590354919434, "learning_rate": 8.599545841612263e-05, "loss": 0.03415544033050537, "step": 49360 }, { "epoch": 14.01362475163213, "grad_norm": 6.9193243980407715, "learning_rate": 8.599261992619927e-05, "loss": 0.02200005352497101, "step": 49370 }, { "epoch": 14.016463241555492, "grad_norm": 0.4836635887622833, "learning_rate": 8.598978143627591e-05, "loss": 0.024602805078029633, "step": 49380 }, { "epoch": 14.019301731478853, "grad_norm": 2.593250036239624, "learning_rate": 8.598694294635255e-05, "loss": 0.016430683434009552, "step": 49390 }, { "epoch": 14.022140221402214, "grad_norm": 2.0901286602020264, "learning_rate": 8.598410445642918e-05, "loss": 0.017284014821052553, "step": 49400 }, { "epoch": 14.024978711325575, "grad_norm": 4.4745306968688965, "learning_rate": 8.598126596650582e-05, "loss": 0.022242334485054017, "step": 49410 }, { "epoch": 14.027817201248936, "grad_norm": 1.6471130847930908, "learning_rate": 8.597842747658246e-05, "loss": 0.022034397721290587, "step": 49420 }, { "epoch": 14.030655691172296, "grad_norm": 13.419504165649414, "learning_rate": 8.59755889866591e-05, "loss": 0.060460644960403445, "step": 49430 }, { "epoch": 14.033494181095657, "grad_norm": 6.310109615325928, "learning_rate": 8.597275049673574e-05, "loss": 0.01768212467432022, "step": 49440 }, { "epoch": 14.036332671019018, "grad_norm": 17.660402297973633, "learning_rate": 8.596991200681239e-05, "loss": 0.021090546250343324, "step": 49450 }, { "epoch": 14.039171160942379, "grad_norm": 2.987610101699829, "learning_rate": 8.596707351688901e-05, "loss": 0.02145720422267914, "step": 49460 }, { "epoch": 14.04200965086574, "grad_norm": 6.772080421447754, "learning_rate": 8.596423502696565e-05, "loss": 0.024064627289772034, "step": 49470 }, { "epoch": 14.0448481407891, "grad_norm": 7.433216571807861, "learning_rate": 8.59613965370423e-05, "loss": 0.017082253098487855, "step": 49480 }, { "epoch": 14.04768663071246, "grad_norm": 4.992000102996826, "learning_rate": 8.595855804711894e-05, "loss": 0.030871933698654173, "step": 49490 }, { "epoch": 14.050525120635822, "grad_norm": 6.8448166847229, "learning_rate": 8.595571955719558e-05, "loss": 0.010308530181646347, "step": 49500 }, { "epoch": 14.050525120635822, "eval_accuracy": 0.963883766770522, "eval_loss": 0.12005388736724854, "eval_runtime": 32.9299, "eval_samples_per_second": 477.59, "eval_steps_per_second": 7.47, "step": 49500 }, { "epoch": 14.053363610559183, "grad_norm": 7.566470146179199, "learning_rate": 8.595288106727222e-05, "loss": 0.04292806088924408, "step": 49510 }, { "epoch": 14.056202100482544, "grad_norm": 2.0903117656707764, "learning_rate": 8.595004257734885e-05, "loss": 0.015984191000461577, "step": 49520 }, { "epoch": 14.059040590405903, "grad_norm": 0.7936832904815674, "learning_rate": 8.594720408742549e-05, "loss": 0.014576594531536102, "step": 49530 }, { "epoch": 14.061879080329264, "grad_norm": 5.575385570526123, "learning_rate": 8.594436559750213e-05, "loss": 0.01626885384321213, "step": 49540 }, { "epoch": 14.064717570252625, "grad_norm": 1.7631714344024658, "learning_rate": 8.594152710757877e-05, "loss": 0.02367054224014282, "step": 49550 }, { "epoch": 14.067556060175987, "grad_norm": 3.3597817420959473, "learning_rate": 8.593868861765541e-05, "loss": 0.03148854374885559, "step": 49560 }, { "epoch": 14.070394550099348, "grad_norm": 3.0485341548919678, "learning_rate": 8.593585012773206e-05, "loss": 0.014528489112854004, "step": 49570 }, { "epoch": 14.073233040022707, "grad_norm": 1.0798425674438477, "learning_rate": 8.59330116378087e-05, "loss": 0.0077442660927772525, "step": 49580 }, { "epoch": 14.076071529946068, "grad_norm": 3.1752614974975586, "learning_rate": 8.593017314788532e-05, "loss": 0.014399024844169616, "step": 49590 }, { "epoch": 14.07891001986943, "grad_norm": 0.8773477077484131, "learning_rate": 8.592733465796197e-05, "loss": 0.018811266124248504, "step": 49600 }, { "epoch": 14.08174850979279, "grad_norm": 0.5419068932533264, "learning_rate": 8.592449616803861e-05, "loss": 0.0419379860162735, "step": 49610 }, { "epoch": 14.084586999716151, "grad_norm": 0.6644906401634216, "learning_rate": 8.592165767811525e-05, "loss": 0.010282053798437118, "step": 49620 }, { "epoch": 14.087425489639513, "grad_norm": 6.219869613647461, "learning_rate": 8.591881918819189e-05, "loss": 0.023979613184928895, "step": 49630 }, { "epoch": 14.090263979562872, "grad_norm": 0.5391266345977783, "learning_rate": 8.591598069826853e-05, "loss": 0.015533825755119324, "step": 49640 }, { "epoch": 14.093102469486233, "grad_norm": 0.4781807065010071, "learning_rate": 8.591314220834516e-05, "loss": 0.021334201097488403, "step": 49650 }, { "epoch": 14.095940959409594, "grad_norm": 0.4529660940170288, "learning_rate": 8.59103037184218e-05, "loss": 0.0171050101518631, "step": 49660 }, { "epoch": 14.098779449332955, "grad_norm": 3.881350040435791, "learning_rate": 8.590746522849844e-05, "loss": 0.022814106941223145, "step": 49670 }, { "epoch": 14.101617939256316, "grad_norm": 4.69289493560791, "learning_rate": 8.590462673857508e-05, "loss": 0.013531124591827393, "step": 49680 }, { "epoch": 14.104456429179676, "grad_norm": 2.459179162979126, "learning_rate": 8.590178824865172e-05, "loss": 0.013442096114158631, "step": 49690 }, { "epoch": 14.107294919103037, "grad_norm": 6.164515018463135, "learning_rate": 8.589894975872837e-05, "loss": 0.026451772451400755, "step": 49700 }, { "epoch": 14.110133409026398, "grad_norm": 0.4458458125591278, "learning_rate": 8.589611126880501e-05, "loss": 0.013282634317874908, "step": 49710 }, { "epoch": 14.112971898949759, "grad_norm": 0.5706819295883179, "learning_rate": 8.589327277888163e-05, "loss": 0.009839420020580292, "step": 49720 }, { "epoch": 14.11581038887312, "grad_norm": 3.5447611808776855, "learning_rate": 8.589043428895828e-05, "loss": 0.016877616941928863, "step": 49730 }, { "epoch": 14.11864887879648, "grad_norm": 0.5895349383354187, "learning_rate": 8.588759579903492e-05, "loss": 0.008669497817754746, "step": 49740 }, { "epoch": 14.12148736871984, "grad_norm": 10.835302352905273, "learning_rate": 8.588475730911155e-05, "loss": 0.036053124070167544, "step": 49750 }, { "epoch": 14.124325858643202, "grad_norm": 1.173956036567688, "learning_rate": 8.58819188191882e-05, "loss": 0.016149699687957764, "step": 49760 }, { "epoch": 14.127164348566563, "grad_norm": 4.640038013458252, "learning_rate": 8.587908032926484e-05, "loss": 0.010629125684499741, "step": 49770 }, { "epoch": 14.130002838489924, "grad_norm": 6.175779342651367, "learning_rate": 8.587624183934147e-05, "loss": 0.013221022486686707, "step": 49780 }, { "epoch": 14.132841328413285, "grad_norm": 0.688448965549469, "learning_rate": 8.587340334941811e-05, "loss": 0.019088608026504517, "step": 49790 }, { "epoch": 14.135679818336644, "grad_norm": 4.36879301071167, "learning_rate": 8.587056485949475e-05, "loss": 0.022852061688899993, "step": 49800 }, { "epoch": 14.138518308260005, "grad_norm": 0.9600671529769897, "learning_rate": 8.58677263695714e-05, "loss": 0.018824024498462676, "step": 49810 }, { "epoch": 14.141356798183367, "grad_norm": 3.3633852005004883, "learning_rate": 8.586488787964802e-05, "loss": 0.010696721076965333, "step": 49820 }, { "epoch": 14.144195288106728, "grad_norm": 3.8859894275665283, "learning_rate": 8.586204938972468e-05, "loss": 0.03061743676662445, "step": 49830 }, { "epoch": 14.147033778030089, "grad_norm": 1.420177698135376, "learning_rate": 8.585921089980132e-05, "loss": 0.008081095665693283, "step": 49840 }, { "epoch": 14.149872267953448, "grad_norm": 1.8073889017105103, "learning_rate": 8.585637240987795e-05, "loss": 0.0333761990070343, "step": 49850 }, { "epoch": 14.15271075787681, "grad_norm": 9.983675003051758, "learning_rate": 8.585353391995459e-05, "loss": 0.049060314893722534, "step": 49860 }, { "epoch": 14.15554924780017, "grad_norm": 2.8575685024261475, "learning_rate": 8.585069543003123e-05, "loss": 0.011536929011344909, "step": 49870 }, { "epoch": 14.158387737723531, "grad_norm": 0.0810675173997879, "learning_rate": 8.584785694010786e-05, "loss": 0.016037535667419434, "step": 49880 }, { "epoch": 14.161226227646893, "grad_norm": 4.7501444816589355, "learning_rate": 8.584501845018451e-05, "loss": 0.0312174916267395, "step": 49890 }, { "epoch": 14.164064717570252, "grad_norm": 13.630995750427246, "learning_rate": 8.584217996026115e-05, "loss": 0.05690450668334961, "step": 49900 }, { "epoch": 14.166903207493613, "grad_norm": 6.432366847991943, "learning_rate": 8.583934147033778e-05, "loss": 0.021261684596538544, "step": 49910 }, { "epoch": 14.169741697416974, "grad_norm": 2.7468631267547607, "learning_rate": 8.583650298041442e-05, "loss": 0.019532190263271333, "step": 49920 }, { "epoch": 14.172580187340335, "grad_norm": 12.114395141601562, "learning_rate": 8.583366449049106e-05, "loss": 0.024282027781009675, "step": 49930 }, { "epoch": 14.175418677263696, "grad_norm": 0.3635217547416687, "learning_rate": 8.58308260005677e-05, "loss": 0.009700104594230652, "step": 49940 }, { "epoch": 14.178257167187056, "grad_norm": 6.112333297729492, "learning_rate": 8.582798751064433e-05, "loss": 0.034607109427452085, "step": 49950 }, { "epoch": 14.181095657110417, "grad_norm": 1.0207067728042603, "learning_rate": 8.582514902072099e-05, "loss": 0.010723426192998885, "step": 49960 }, { "epoch": 14.183934147033778, "grad_norm": 5.8464460372924805, "learning_rate": 8.582231053079763e-05, "loss": 0.012879680097103118, "step": 49970 }, { "epoch": 14.186772636957139, "grad_norm": 8.653544425964355, "learning_rate": 8.581947204087426e-05, "loss": 0.0279911607503891, "step": 49980 }, { "epoch": 14.1896111268805, "grad_norm": 3.8843448162078857, "learning_rate": 8.58166335509509e-05, "loss": 0.01831492632627487, "step": 49990 }, { "epoch": 14.192449616803861, "grad_norm": 6.607227325439453, "learning_rate": 8.581379506102754e-05, "loss": 0.016123968362808227, "step": 50000 }, { "epoch": 14.192449616803861, "eval_accuracy": 0.9661092388885356, "eval_loss": 0.10962720960378647, "eval_runtime": 32.4711, "eval_samples_per_second": 484.339, "eval_steps_per_second": 7.576, "step": 50000 }, { "epoch": 14.19528810672722, "grad_norm": 1.987405776977539, "learning_rate": 8.581095657110417e-05, "loss": 0.011334332823753356, "step": 50010 }, { "epoch": 14.198126596650582, "grad_norm": 4.920589447021484, "learning_rate": 8.580811808118081e-05, "loss": 0.03952991962432861, "step": 50020 }, { "epoch": 14.200965086573943, "grad_norm": 1.961927890777588, "learning_rate": 8.580527959125746e-05, "loss": 0.011208149045705796, "step": 50030 }, { "epoch": 14.203803576497304, "grad_norm": 0.46156230568885803, "learning_rate": 8.580244110133409e-05, "loss": 0.008193625509738922, "step": 50040 }, { "epoch": 14.206642066420665, "grad_norm": 11.23859691619873, "learning_rate": 8.579960261141073e-05, "loss": 0.024562504887580872, "step": 50050 }, { "epoch": 14.209480556344024, "grad_norm": 1.9143143892288208, "learning_rate": 8.579676412148737e-05, "loss": 0.02574438750743866, "step": 50060 }, { "epoch": 14.212319046267385, "grad_norm": 3.0921170711517334, "learning_rate": 8.579392563156402e-05, "loss": 0.02182871848344803, "step": 50070 }, { "epoch": 14.215157536190747, "grad_norm": 3.3425211906433105, "learning_rate": 8.579108714164064e-05, "loss": 0.017059232294559478, "step": 50080 }, { "epoch": 14.217996026114108, "grad_norm": 7.3602752685546875, "learning_rate": 8.57882486517173e-05, "loss": 0.01754806935787201, "step": 50090 }, { "epoch": 14.220834516037469, "grad_norm": 0.5365791320800781, "learning_rate": 8.578541016179394e-05, "loss": 0.0136275976896286, "step": 50100 }, { "epoch": 14.223673005960828, "grad_norm": 8.257798194885254, "learning_rate": 8.578257167187057e-05, "loss": 0.02741885781288147, "step": 50110 }, { "epoch": 14.22651149588419, "grad_norm": 4.609764575958252, "learning_rate": 8.577973318194721e-05, "loss": 0.02111305296421051, "step": 50120 }, { "epoch": 14.22934998580755, "grad_norm": 2.6395609378814697, "learning_rate": 8.577689469202385e-05, "loss": 0.01083149090409279, "step": 50130 }, { "epoch": 14.232188475730911, "grad_norm": 0.888701856136322, "learning_rate": 8.577405620210048e-05, "loss": 0.028029578924179076, "step": 50140 }, { "epoch": 14.235026965654273, "grad_norm": 8.845074653625488, "learning_rate": 8.577121771217712e-05, "loss": 0.020998534560203553, "step": 50150 }, { "epoch": 14.237865455577632, "grad_norm": 9.160751342773438, "learning_rate": 8.576837922225377e-05, "loss": 0.02628929615020752, "step": 50160 }, { "epoch": 14.240703945500993, "grad_norm": 3.1616976261138916, "learning_rate": 8.57655407323304e-05, "loss": 0.019777077436447143, "step": 50170 }, { "epoch": 14.243542435424354, "grad_norm": 0.41923844814300537, "learning_rate": 8.576270224240704e-05, "loss": 0.010723181068897247, "step": 50180 }, { "epoch": 14.246380925347715, "grad_norm": 1.4563844203948975, "learning_rate": 8.575986375248368e-05, "loss": 0.014038048684597015, "step": 50190 }, { "epoch": 14.249219415271076, "grad_norm": 0.10171817243099213, "learning_rate": 8.575702526256033e-05, "loss": 0.007849007844924927, "step": 50200 }, { "epoch": 14.252057905194437, "grad_norm": 3.6726996898651123, "learning_rate": 8.575418677263695e-05, "loss": 0.016139046847820283, "step": 50210 }, { "epoch": 14.254896395117797, "grad_norm": 3.654780626296997, "learning_rate": 8.57513482827136e-05, "loss": 0.00554860457777977, "step": 50220 }, { "epoch": 14.257734885041158, "grad_norm": 0.13767306506633759, "learning_rate": 8.574850979279025e-05, "loss": 0.021599997580051423, "step": 50230 }, { "epoch": 14.260573374964519, "grad_norm": 6.021449089050293, "learning_rate": 8.574567130286688e-05, "loss": 0.01374569833278656, "step": 50240 }, { "epoch": 14.26341186488788, "grad_norm": 2.5460946559906006, "learning_rate": 8.574283281294352e-05, "loss": 0.01127743422985077, "step": 50250 }, { "epoch": 14.266250354811241, "grad_norm": 1.763565182685852, "learning_rate": 8.573999432302016e-05, "loss": 0.020109176635742188, "step": 50260 }, { "epoch": 14.2690888447346, "grad_norm": 6.259120464324951, "learning_rate": 8.573715583309679e-05, "loss": 0.023675131797790527, "step": 50270 }, { "epoch": 14.271927334657962, "grad_norm": 2.7127277851104736, "learning_rate": 8.573431734317343e-05, "loss": 0.018241071701049806, "step": 50280 }, { "epoch": 14.274765824581323, "grad_norm": 3.053025722503662, "learning_rate": 8.573147885325008e-05, "loss": 0.025934892892837524, "step": 50290 }, { "epoch": 14.277604314504684, "grad_norm": 4.059330940246582, "learning_rate": 8.572864036332671e-05, "loss": 0.039486566185951234, "step": 50300 }, { "epoch": 14.280442804428045, "grad_norm": 0.32963359355926514, "learning_rate": 8.572580187340335e-05, "loss": 0.05355376601219177, "step": 50310 }, { "epoch": 14.283281294351404, "grad_norm": 7.1962761878967285, "learning_rate": 8.572296338348e-05, "loss": 0.019762958586215972, "step": 50320 }, { "epoch": 14.286119784274765, "grad_norm": 1.080601692199707, "learning_rate": 8.572012489355664e-05, "loss": 0.033148261904716494, "step": 50330 }, { "epoch": 14.288958274198126, "grad_norm": 6.958176612854004, "learning_rate": 8.571728640363326e-05, "loss": 0.029014316201210023, "step": 50340 }, { "epoch": 14.291796764121488, "grad_norm": 0.975967526435852, "learning_rate": 8.57144479137099e-05, "loss": 0.018815536797046662, "step": 50350 }, { "epoch": 14.294635254044849, "grad_norm": 0.16733328998088837, "learning_rate": 8.571160942378655e-05, "loss": 0.027387183904647828, "step": 50360 }, { "epoch": 14.297473743968208, "grad_norm": 3.1650569438934326, "learning_rate": 8.570877093386319e-05, "loss": 0.017850051820278167, "step": 50370 }, { "epoch": 14.30031223389157, "grad_norm": 0.35174816846847534, "learning_rate": 8.570593244393983e-05, "loss": 0.018357117474079133, "step": 50380 }, { "epoch": 14.30315072381493, "grad_norm": 1.2151007652282715, "learning_rate": 8.570309395401647e-05, "loss": 0.021140156686306, "step": 50390 }, { "epoch": 14.305989213738291, "grad_norm": 7.453995227813721, "learning_rate": 8.57002554640931e-05, "loss": 0.012352051585912705, "step": 50400 }, { "epoch": 14.308827703661652, "grad_norm": 0.3007005751132965, "learning_rate": 8.569741697416974e-05, "loss": 0.01979750543832779, "step": 50410 }, { "epoch": 14.311666193585014, "grad_norm": 0.17593219876289368, "learning_rate": 8.56945784842464e-05, "loss": 0.024007920920848847, "step": 50420 }, { "epoch": 14.314504683508373, "grad_norm": 1.2617086172103882, "learning_rate": 8.569173999432302e-05, "loss": 0.02312789261341095, "step": 50430 }, { "epoch": 14.317343173431734, "grad_norm": 3.8050050735473633, "learning_rate": 8.568890150439966e-05, "loss": 0.03027205467224121, "step": 50440 }, { "epoch": 14.320181663355095, "grad_norm": 1.1075226068496704, "learning_rate": 8.56860630144763e-05, "loss": 0.007636501640081406, "step": 50450 }, { "epoch": 14.323020153278456, "grad_norm": 0.5070046186447144, "learning_rate": 8.568322452455293e-05, "loss": 0.009436709433794021, "step": 50460 }, { "epoch": 14.325858643201817, "grad_norm": 1.8871651887893677, "learning_rate": 8.568038603462958e-05, "loss": 0.016425320506095888, "step": 50470 }, { "epoch": 14.328697133125177, "grad_norm": 1.5936084985733032, "learning_rate": 8.567754754470622e-05, "loss": 0.013716436922550201, "step": 50480 }, { "epoch": 14.331535623048538, "grad_norm": 0.6831617951393127, "learning_rate": 8.567470905478286e-05, "loss": 0.02185671925544739, "step": 50490 }, { "epoch": 14.334374112971899, "grad_norm": 11.20750904083252, "learning_rate": 8.56718705648595e-05, "loss": 0.03352311849594116, "step": 50500 }, { "epoch": 14.334374112971899, "eval_accuracy": 0.9670630126533987, "eval_loss": 0.10571348667144775, "eval_runtime": 33.3315, "eval_samples_per_second": 471.836, "eval_steps_per_second": 7.38, "step": 50500 }, { "epoch": 14.33721260289526, "grad_norm": 7.211630821228027, "learning_rate": 8.566903207493614e-05, "loss": 0.02936425805091858, "step": 50510 }, { "epoch": 14.340051092818621, "grad_norm": 0.41711366176605225, "learning_rate": 8.566619358501278e-05, "loss": 0.021552859246730803, "step": 50520 }, { "epoch": 14.34288958274198, "grad_norm": 1.3852055072784424, "learning_rate": 8.566335509508941e-05, "loss": 0.02809354364871979, "step": 50530 }, { "epoch": 14.345728072665342, "grad_norm": 1.3221259117126465, "learning_rate": 8.566051660516605e-05, "loss": 0.014930884540081023, "step": 50540 }, { "epoch": 14.348566562588703, "grad_norm": 11.980422019958496, "learning_rate": 8.565767811524269e-05, "loss": 0.030402052402496337, "step": 50550 }, { "epoch": 14.351405052512064, "grad_norm": 1.9871940612792969, "learning_rate": 8.565483962531933e-05, "loss": 0.024224680662155152, "step": 50560 }, { "epoch": 14.354243542435425, "grad_norm": 1.720460057258606, "learning_rate": 8.565200113539598e-05, "loss": 0.025143852829933165, "step": 50570 }, { "epoch": 14.357082032358786, "grad_norm": 8.488961219787598, "learning_rate": 8.564916264547262e-05, "loss": 0.014349393546581268, "step": 50580 }, { "epoch": 14.359920522282145, "grad_norm": 1.6555289030075073, "learning_rate": 8.564632415554924e-05, "loss": 0.0280839204788208, "step": 50590 }, { "epoch": 14.362759012205506, "grad_norm": 1.9464515447616577, "learning_rate": 8.564348566562589e-05, "loss": 0.028211632370948793, "step": 50600 }, { "epoch": 14.365597502128868, "grad_norm": 0.09607020020484924, "learning_rate": 8.564064717570253e-05, "loss": 0.00787656456232071, "step": 50610 }, { "epoch": 14.368435992052229, "grad_norm": 2.0422425270080566, "learning_rate": 8.563780868577917e-05, "loss": 0.02645597457885742, "step": 50620 }, { "epoch": 14.37127448197559, "grad_norm": 3.4026432037353516, "learning_rate": 8.563497019585581e-05, "loss": 0.016307081282138824, "step": 50630 }, { "epoch": 14.374112971898949, "grad_norm": 9.48169231414795, "learning_rate": 8.563213170593245e-05, "loss": 0.01773492693901062, "step": 50640 }, { "epoch": 14.37695146182231, "grad_norm": 1.983173131942749, "learning_rate": 8.562929321600909e-05, "loss": 0.022882972657680512, "step": 50650 }, { "epoch": 14.379789951745671, "grad_norm": 9.702798843383789, "learning_rate": 8.562645472608572e-05, "loss": 0.03225910663604736, "step": 50660 }, { "epoch": 14.382628441669032, "grad_norm": 1.2167093753814697, "learning_rate": 8.562361623616236e-05, "loss": 0.018802011013031007, "step": 50670 }, { "epoch": 14.385466931592394, "grad_norm": 0.746484100818634, "learning_rate": 8.5620777746239e-05, "loss": 0.01246480569243431, "step": 50680 }, { "epoch": 14.388305421515753, "grad_norm": 8.777046203613281, "learning_rate": 8.561793925631565e-05, "loss": 0.018167215585708617, "step": 50690 }, { "epoch": 14.391143911439114, "grad_norm": 0.9879831075668335, "learning_rate": 8.561510076639229e-05, "loss": 0.006514652073383332, "step": 50700 }, { "epoch": 14.393982401362475, "grad_norm": 2.686424493789673, "learning_rate": 8.561226227646893e-05, "loss": 0.010459230840206146, "step": 50710 }, { "epoch": 14.396820891285836, "grad_norm": 9.012724876403809, "learning_rate": 8.560942378654556e-05, "loss": 0.026171407103538512, "step": 50720 }, { "epoch": 14.399659381209197, "grad_norm": 14.51537036895752, "learning_rate": 8.56065852966222e-05, "loss": 0.040876299142837524, "step": 50730 }, { "epoch": 14.402497871132557, "grad_norm": 0.9446371793746948, "learning_rate": 8.560374680669884e-05, "loss": 0.015081483125686645, "step": 50740 }, { "epoch": 14.405336361055918, "grad_norm": 13.199871063232422, "learning_rate": 8.560090831677548e-05, "loss": 0.018988591432571412, "step": 50750 }, { "epoch": 14.408174850979279, "grad_norm": 2.977435350418091, "learning_rate": 8.559806982685212e-05, "loss": 0.018152783811092376, "step": 50760 }, { "epoch": 14.41101334090264, "grad_norm": 5.507685661315918, "learning_rate": 8.559523133692876e-05, "loss": 0.019890107214450836, "step": 50770 }, { "epoch": 14.413851830826001, "grad_norm": 5.907930850982666, "learning_rate": 8.55923928470054e-05, "loss": 0.01816173791885376, "step": 50780 }, { "epoch": 14.416690320749362, "grad_norm": 10.446027755737305, "learning_rate": 8.558955435708203e-05, "loss": 0.01919698566198349, "step": 50790 }, { "epoch": 14.419528810672722, "grad_norm": 5.5220746994018555, "learning_rate": 8.558671586715867e-05, "loss": 0.010036560893058776, "step": 50800 }, { "epoch": 14.422367300596083, "grad_norm": 0.4808517396450043, "learning_rate": 8.558387737723531e-05, "loss": 0.015087707340717316, "step": 50810 }, { "epoch": 14.425205790519444, "grad_norm": 1.2172044515609741, "learning_rate": 8.558103888731196e-05, "loss": 0.024086163938045503, "step": 50820 }, { "epoch": 14.428044280442805, "grad_norm": 1.481182336807251, "learning_rate": 8.55782003973886e-05, "loss": 0.04571487307548523, "step": 50830 }, { "epoch": 14.430882770366166, "grad_norm": 7.657914161682129, "learning_rate": 8.557536190746524e-05, "loss": 0.0393263190984726, "step": 50840 }, { "epoch": 14.433721260289525, "grad_norm": 1.6742866039276123, "learning_rate": 8.557252341754187e-05, "loss": 0.020001593232154845, "step": 50850 }, { "epoch": 14.436559750212886, "grad_norm": 2.672473669052124, "learning_rate": 8.556968492761851e-05, "loss": 0.017517706751823424, "step": 50860 }, { "epoch": 14.439398240136248, "grad_norm": 0.09073390066623688, "learning_rate": 8.556684643769515e-05, "loss": 0.026901447772979738, "step": 50870 }, { "epoch": 14.442236730059609, "grad_norm": 1.156864047050476, "learning_rate": 8.556400794777179e-05, "loss": 0.021197380125522615, "step": 50880 }, { "epoch": 14.44507521998297, "grad_norm": 4.356119155883789, "learning_rate": 8.556116945784843e-05, "loss": 0.015231791138648986, "step": 50890 }, { "epoch": 14.447913709906329, "grad_norm": 1.1421833038330078, "learning_rate": 8.555833096792507e-05, "loss": 0.021992655098438264, "step": 50900 }, { "epoch": 14.45075219982969, "grad_norm": 0.23002612590789795, "learning_rate": 8.555549247800171e-05, "loss": 0.021518231928348543, "step": 50910 }, { "epoch": 14.453590689753051, "grad_norm": 4.457428932189941, "learning_rate": 8.555265398807834e-05, "loss": 0.026368576288223266, "step": 50920 }, { "epoch": 14.456429179676412, "grad_norm": 4.154763698577881, "learning_rate": 8.554981549815498e-05, "loss": 0.021227678656578063, "step": 50930 }, { "epoch": 14.459267669599773, "grad_norm": 2.0543501377105713, "learning_rate": 8.554697700823163e-05, "loss": 0.02073853015899658, "step": 50940 }, { "epoch": 14.462106159523135, "grad_norm": 4.015584468841553, "learning_rate": 8.554413851830825e-05, "loss": 0.027895450592041016, "step": 50950 }, { "epoch": 14.464944649446494, "grad_norm": 7.388682842254639, "learning_rate": 8.554130002838491e-05, "loss": 0.02869313955307007, "step": 50960 }, { "epoch": 14.467783139369855, "grad_norm": 4.977144718170166, "learning_rate": 8.553846153846155e-05, "loss": 0.01326272338628769, "step": 50970 }, { "epoch": 14.470621629293216, "grad_norm": 1.317305564880371, "learning_rate": 8.553562304853818e-05, "loss": 0.01400577872991562, "step": 50980 }, { "epoch": 14.473460119216577, "grad_norm": 10.458863258361816, "learning_rate": 8.553278455861482e-05, "loss": 0.030185303092002867, "step": 50990 }, { "epoch": 14.476298609139938, "grad_norm": 8.1785888671875, "learning_rate": 8.552994606869146e-05, "loss": 0.0426828145980835, "step": 51000 }, { "epoch": 14.476298609139938, "eval_accuracy": 0.9623577287467413, "eval_loss": 0.12816429138183594, "eval_runtime": 32.5841, "eval_samples_per_second": 482.659, "eval_steps_per_second": 7.55, "step": 51000 }, { "epoch": 14.479137099063298, "grad_norm": 3.057300329208374, "learning_rate": 8.55271075787681e-05, "loss": 0.030318525433540345, "step": 51010 }, { "epoch": 14.481975588986659, "grad_norm": 5.360424995422363, "learning_rate": 8.552426908884474e-05, "loss": 0.041078022122383116, "step": 51020 }, { "epoch": 14.48481407891002, "grad_norm": 0.36170557141304016, "learning_rate": 8.552143059892138e-05, "loss": 0.012086291611194611, "step": 51030 }, { "epoch": 14.487652568833381, "grad_norm": 1.2120792865753174, "learning_rate": 8.551859210899803e-05, "loss": 0.024634677171707153, "step": 51040 }, { "epoch": 14.490491058756742, "grad_norm": 14.350738525390625, "learning_rate": 8.551575361907465e-05, "loss": 0.05213993787765503, "step": 51050 }, { "epoch": 14.493329548680101, "grad_norm": 0.36761918663978577, "learning_rate": 8.55129151291513e-05, "loss": 0.011964429169893265, "step": 51060 }, { "epoch": 14.496168038603463, "grad_norm": 3.0218446254730225, "learning_rate": 8.551007663922794e-05, "loss": 0.024747122824192048, "step": 51070 }, { "epoch": 14.499006528526824, "grad_norm": 0.8341790437698364, "learning_rate": 8.550723814930456e-05, "loss": 0.044057315587997435, "step": 51080 }, { "epoch": 14.501845018450185, "grad_norm": 8.0025053024292, "learning_rate": 8.550439965938122e-05, "loss": 0.035060223937034604, "step": 51090 }, { "epoch": 14.504683508373546, "grad_norm": 10.34943962097168, "learning_rate": 8.550156116945786e-05, "loss": 0.016242331266403197, "step": 51100 }, { "epoch": 14.507521998296905, "grad_norm": 8.305143356323242, "learning_rate": 8.549872267953449e-05, "loss": 0.027518245577812194, "step": 51110 }, { "epoch": 14.510360488220266, "grad_norm": 5.813835144042969, "learning_rate": 8.549588418961113e-05, "loss": 0.037949678301811215, "step": 51120 }, { "epoch": 14.513198978143627, "grad_norm": 0.4022533893585205, "learning_rate": 8.549304569968777e-05, "loss": 0.023515960574150084, "step": 51130 }, { "epoch": 14.516037468066989, "grad_norm": 5.517359733581543, "learning_rate": 8.549020720976441e-05, "loss": 0.035278400778770445, "step": 51140 }, { "epoch": 14.51887595799035, "grad_norm": 1.7435822486877441, "learning_rate": 8.548736871984104e-05, "loss": 0.012398506700992584, "step": 51150 }, { "epoch": 14.521714447913709, "grad_norm": 2.0074353218078613, "learning_rate": 8.54845302299177e-05, "loss": 0.010980060696601868, "step": 51160 }, { "epoch": 14.52455293783707, "grad_norm": 1.2162983417510986, "learning_rate": 8.548169173999434e-05, "loss": 0.032818195223808286, "step": 51170 }, { "epoch": 14.527391427760431, "grad_norm": 2.7796096801757812, "learning_rate": 8.547885325007096e-05, "loss": 0.01596241444349289, "step": 51180 }, { "epoch": 14.530229917683792, "grad_norm": 3.4301745891571045, "learning_rate": 8.54760147601476e-05, "loss": 0.01066935658454895, "step": 51190 }, { "epoch": 14.533068407607153, "grad_norm": 0.8435087203979492, "learning_rate": 8.547317627022425e-05, "loss": 0.012991021573543548, "step": 51200 }, { "epoch": 14.535906897530515, "grad_norm": 7.661426067352295, "learning_rate": 8.547033778030087e-05, "loss": 0.018514323234558105, "step": 51210 }, { "epoch": 14.538745387453874, "grad_norm": 9.06062126159668, "learning_rate": 8.546749929037753e-05, "loss": 0.015793195366859435, "step": 51220 }, { "epoch": 14.541583877377235, "grad_norm": 8.07730484008789, "learning_rate": 8.546466080045417e-05, "loss": 0.03910217583179474, "step": 51230 }, { "epoch": 14.544422367300596, "grad_norm": 9.882948875427246, "learning_rate": 8.54618223105308e-05, "loss": 0.05026043653488159, "step": 51240 }, { "epoch": 14.547260857223957, "grad_norm": 7.14012336730957, "learning_rate": 8.545898382060744e-05, "loss": 0.022075828909873963, "step": 51250 }, { "epoch": 14.550099347147318, "grad_norm": 11.129467010498047, "learning_rate": 8.545614533068408e-05, "loss": 0.09413227438926697, "step": 51260 }, { "epoch": 14.552937837070678, "grad_norm": 8.14638900756836, "learning_rate": 8.545330684076072e-05, "loss": 0.023393148183822633, "step": 51270 }, { "epoch": 14.555776326994039, "grad_norm": 8.789048194885254, "learning_rate": 8.545046835083735e-05, "loss": 0.02766866683959961, "step": 51280 }, { "epoch": 14.5586148169174, "grad_norm": 0.38551589846611023, "learning_rate": 8.5447629860914e-05, "loss": 0.009357611835002898, "step": 51290 }, { "epoch": 14.561453306840761, "grad_norm": 13.2433443069458, "learning_rate": 8.544479137099063e-05, "loss": 0.03290225565433502, "step": 51300 }, { "epoch": 14.564291796764122, "grad_norm": 5.133528709411621, "learning_rate": 8.544195288106727e-05, "loss": 0.02740500569343567, "step": 51310 }, { "epoch": 14.567130286687481, "grad_norm": 6.7834930419921875, "learning_rate": 8.543911439114392e-05, "loss": 0.019574275612831114, "step": 51320 }, { "epoch": 14.569968776610843, "grad_norm": 5.09575891494751, "learning_rate": 8.543627590122056e-05, "loss": 0.04770260751247406, "step": 51330 }, { "epoch": 14.572807266534204, "grad_norm": 2.4561169147491455, "learning_rate": 8.543343741129719e-05, "loss": 0.012683361768722534, "step": 51340 }, { "epoch": 14.575645756457565, "grad_norm": 10.755552291870117, "learning_rate": 8.543059892137383e-05, "loss": 0.01899314671754837, "step": 51350 }, { "epoch": 14.578484246380926, "grad_norm": 3.41241192817688, "learning_rate": 8.542776043145048e-05, "loss": 0.03181129097938538, "step": 51360 }, { "epoch": 14.581322736304287, "grad_norm": 0.683917224407196, "learning_rate": 8.542492194152711e-05, "loss": 0.030765289068222047, "step": 51370 }, { "epoch": 14.584161226227646, "grad_norm": 6.973902225494385, "learning_rate": 8.542208345160375e-05, "loss": 0.0119208425283432, "step": 51380 }, { "epoch": 14.586999716151007, "grad_norm": 0.1274181306362152, "learning_rate": 8.541924496168039e-05, "loss": 0.020972032845020295, "step": 51390 }, { "epoch": 14.589838206074369, "grad_norm": 5.274182319641113, "learning_rate": 8.541640647175702e-05, "loss": 0.01074971780180931, "step": 51400 }, { "epoch": 14.59267669599773, "grad_norm": 1.8795791864395142, "learning_rate": 8.541356798183366e-05, "loss": 0.014455355703830719, "step": 51410 }, { "epoch": 14.59551518592109, "grad_norm": 10.029135704040527, "learning_rate": 8.541072949191032e-05, "loss": 0.03340495228767395, "step": 51420 }, { "epoch": 14.59835367584445, "grad_norm": 2.7344813346862793, "learning_rate": 8.540789100198694e-05, "loss": 0.019578197598457338, "step": 51430 }, { "epoch": 14.601192165767811, "grad_norm": 12.516303062438965, "learning_rate": 8.540505251206359e-05, "loss": 0.035264629125595096, "step": 51440 }, { "epoch": 14.604030655691172, "grad_norm": 6.471243858337402, "learning_rate": 8.540221402214023e-05, "loss": 0.011412911862134934, "step": 51450 }, { "epoch": 14.606869145614533, "grad_norm": 11.616470336914062, "learning_rate": 8.539937553221687e-05, "loss": 0.022739648818969727, "step": 51460 }, { "epoch": 14.609707635537895, "grad_norm": 1.0880532264709473, "learning_rate": 8.53965370422935e-05, "loss": 0.012962998449802398, "step": 51470 }, { "epoch": 14.612546125461254, "grad_norm": 0.6036632061004639, "learning_rate": 8.539369855237014e-05, "loss": 0.03275137841701507, "step": 51480 }, { "epoch": 14.615384615384615, "grad_norm": 14.601836204528809, "learning_rate": 8.539086006244679e-05, "loss": 0.02714729607105255, "step": 51490 }, { "epoch": 14.618223105307976, "grad_norm": 3.207322359085083, "learning_rate": 8.538802157252342e-05, "loss": 0.008162341266870498, "step": 51500 }, { "epoch": 14.618223105307976, "eval_accuracy": 0.9724041457366313, "eval_loss": 0.09189394116401672, "eval_runtime": 33.9481, "eval_samples_per_second": 463.266, "eval_steps_per_second": 7.246, "step": 51500 }, { "epoch": 14.621061595231337, "grad_norm": 2.1118788719177246, "learning_rate": 8.538518308260006e-05, "loss": 0.023295287787914277, "step": 51510 }, { "epoch": 14.623900085154698, "grad_norm": 2.8307955265045166, "learning_rate": 8.53823445926767e-05, "loss": 0.022620254755020143, "step": 51520 }, { "epoch": 14.626738575078058, "grad_norm": 10.486608505249023, "learning_rate": 8.537950610275333e-05, "loss": 0.025966167449951172, "step": 51530 }, { "epoch": 14.629577065001419, "grad_norm": 3.632990598678589, "learning_rate": 8.537666761282997e-05, "loss": 0.03730628788471222, "step": 51540 }, { "epoch": 14.63241555492478, "grad_norm": 3.2476694583892822, "learning_rate": 8.537382912290661e-05, "loss": 0.03382015824317932, "step": 51550 }, { "epoch": 14.635254044848141, "grad_norm": 13.662802696228027, "learning_rate": 8.537099063298325e-05, "loss": 0.03342938423156738, "step": 51560 }, { "epoch": 14.638092534771502, "grad_norm": 0.5195565223693848, "learning_rate": 8.53681521430599e-05, "loss": 0.022518911957740785, "step": 51570 }, { "epoch": 14.640931024694861, "grad_norm": 19.15862274169922, "learning_rate": 8.536531365313654e-05, "loss": 0.01407323032617569, "step": 51580 }, { "epoch": 14.643769514618223, "grad_norm": 0.4843665361404419, "learning_rate": 8.536247516321318e-05, "loss": 0.021293465793132783, "step": 51590 }, { "epoch": 14.646608004541584, "grad_norm": 3.0979204177856445, "learning_rate": 8.535963667328981e-05, "loss": 0.0137114480137825, "step": 51600 }, { "epoch": 14.649446494464945, "grad_norm": 1.924691081047058, "learning_rate": 8.535679818336645e-05, "loss": 0.01995665729045868, "step": 51610 }, { "epoch": 14.652284984388306, "grad_norm": 5.831019401550293, "learning_rate": 8.53539596934431e-05, "loss": 0.020045560598373414, "step": 51620 }, { "epoch": 14.655123474311667, "grad_norm": 5.01419734954834, "learning_rate": 8.535112120351973e-05, "loss": 0.008567073941230774, "step": 51630 }, { "epoch": 14.657961964235026, "grad_norm": 3.298065662384033, "learning_rate": 8.534828271359637e-05, "loss": 0.011941273510456086, "step": 51640 }, { "epoch": 14.660800454158387, "grad_norm": 1.5483444929122925, "learning_rate": 8.534544422367301e-05, "loss": 0.017076532542705535, "step": 51650 }, { "epoch": 14.663638944081749, "grad_norm": 0.47979605197906494, "learning_rate": 8.534260573374964e-05, "loss": 0.016923788189888, "step": 51660 }, { "epoch": 14.66647743400511, "grad_norm": 1.9927647113800049, "learning_rate": 8.533976724382628e-05, "loss": 0.022354704141616822, "step": 51670 }, { "epoch": 14.66931592392847, "grad_norm": 0.14934992790222168, "learning_rate": 8.533692875390292e-05, "loss": 0.024277690052986144, "step": 51680 }, { "epoch": 14.67215441385183, "grad_norm": 2.2958178520202637, "learning_rate": 8.533409026397957e-05, "loss": 0.013929572701454163, "step": 51690 }, { "epoch": 14.674992903775191, "grad_norm": 0.7345337271690369, "learning_rate": 8.533125177405621e-05, "loss": 0.026207783818244935, "step": 51700 }, { "epoch": 14.677831393698552, "grad_norm": 6.062478065490723, "learning_rate": 8.532841328413285e-05, "loss": 0.009923833608627319, "step": 51710 }, { "epoch": 14.680669883621913, "grad_norm": 7.150387287139893, "learning_rate": 8.532557479420949e-05, "loss": 0.02468761056661606, "step": 51720 }, { "epoch": 14.683508373545274, "grad_norm": 3.230073928833008, "learning_rate": 8.532273630428612e-05, "loss": 0.06996071934700013, "step": 51730 }, { "epoch": 14.686346863468636, "grad_norm": 10.770792961120605, "learning_rate": 8.531989781436276e-05, "loss": 0.025319790840148924, "step": 51740 }, { "epoch": 14.689185353391995, "grad_norm": 7.068141937255859, "learning_rate": 8.531705932443941e-05, "loss": 0.020250698924064635, "step": 51750 }, { "epoch": 14.692023843315356, "grad_norm": 11.491432189941406, "learning_rate": 8.531422083451604e-05, "loss": 0.022601500153541565, "step": 51760 }, { "epoch": 14.694862333238717, "grad_norm": 11.916325569152832, "learning_rate": 8.531138234459268e-05, "loss": 0.04248327612876892, "step": 51770 }, { "epoch": 14.697700823162078, "grad_norm": 5.588193893432617, "learning_rate": 8.530854385466932e-05, "loss": 0.014440391957759858, "step": 51780 }, { "epoch": 14.70053931308544, "grad_norm": 10.039924621582031, "learning_rate": 8.530570536474595e-05, "loss": 0.03509699106216431, "step": 51790 }, { "epoch": 14.703377803008799, "grad_norm": 1.1222052574157715, "learning_rate": 8.53028668748226e-05, "loss": 0.026770201325416566, "step": 51800 }, { "epoch": 14.70621629293216, "grad_norm": 0.8904647827148438, "learning_rate": 8.530002838489924e-05, "loss": 0.041410434246063235, "step": 51810 }, { "epoch": 14.709054782855521, "grad_norm": 7.355559825897217, "learning_rate": 8.529718989497588e-05, "loss": 0.014018508791923522, "step": 51820 }, { "epoch": 14.711893272778882, "grad_norm": 6.631690502166748, "learning_rate": 8.529435140505252e-05, "loss": 0.02693697214126587, "step": 51830 }, { "epoch": 14.714731762702243, "grad_norm": 15.316610336303711, "learning_rate": 8.529151291512916e-05, "loss": 0.03750421702861786, "step": 51840 }, { "epoch": 14.717570252625602, "grad_norm": 5.242663383483887, "learning_rate": 8.52886744252058e-05, "loss": 0.03107273280620575, "step": 51850 }, { "epoch": 14.720408742548964, "grad_norm": 6.7756171226501465, "learning_rate": 8.528583593528243e-05, "loss": 0.025378978252410887, "step": 51860 }, { "epoch": 14.723247232472325, "grad_norm": 6.73726749420166, "learning_rate": 8.528299744535907e-05, "loss": 0.02238323539495468, "step": 51870 }, { "epoch": 14.726085722395686, "grad_norm": 11.926229476928711, "learning_rate": 8.528015895543571e-05, "loss": 0.021154618263244628, "step": 51880 }, { "epoch": 14.728924212319047, "grad_norm": 0.5368697047233582, "learning_rate": 8.527732046551235e-05, "loss": 0.01885899603366852, "step": 51890 }, { "epoch": 14.731762702242406, "grad_norm": 2.1340489387512207, "learning_rate": 8.5274481975589e-05, "loss": 0.010505671799182891, "step": 51900 }, { "epoch": 14.734601192165767, "grad_norm": 7.346948146820068, "learning_rate": 8.527164348566564e-05, "loss": 0.02962099015712738, "step": 51910 }, { "epoch": 14.737439682089128, "grad_norm": 3.6658847332000732, "learning_rate": 8.526880499574226e-05, "loss": 0.015588580071926117, "step": 51920 }, { "epoch": 14.74027817201249, "grad_norm": 4.146515369415283, "learning_rate": 8.52659665058189e-05, "loss": 0.017428135871887206, "step": 51930 }, { "epoch": 14.74311666193585, "grad_norm": 0.9309405088424683, "learning_rate": 8.526312801589555e-05, "loss": 0.012911485135555267, "step": 51940 }, { "epoch": 14.74595515185921, "grad_norm": 5.780013084411621, "learning_rate": 8.526028952597219e-05, "loss": 0.03897911012172699, "step": 51950 }, { "epoch": 14.748793641782571, "grad_norm": 7.592972755432129, "learning_rate": 8.525745103604883e-05, "loss": 0.02850548326969147, "step": 51960 }, { "epoch": 14.751632131705932, "grad_norm": 7.459175109863281, "learning_rate": 8.525461254612547e-05, "loss": 0.018530362844467164, "step": 51970 }, { "epoch": 14.754470621629293, "grad_norm": 0.4418818950653076, "learning_rate": 8.525177405620211e-05, "loss": 0.028995144367218017, "step": 51980 }, { "epoch": 14.757309111552654, "grad_norm": 1.8403306007385254, "learning_rate": 8.524893556627874e-05, "loss": 0.019927310943603515, "step": 51990 }, { "epoch": 14.760147601476016, "grad_norm": 1.693477749824524, "learning_rate": 8.524609707635538e-05, "loss": 0.0135043665766716, "step": 52000 }, { "epoch": 14.760147601476016, "eval_accuracy": 0.9585426336872894, "eval_loss": 0.1314307600259781, "eval_runtime": 33.8394, "eval_samples_per_second": 464.754, "eval_steps_per_second": 7.27, "step": 52000 }, { "epoch": 14.762986091399375, "grad_norm": 7.1476359367370605, "learning_rate": 8.524325858643202e-05, "loss": 0.02891538143157959, "step": 52010 }, { "epoch": 14.765824581322736, "grad_norm": 8.367448806762695, "learning_rate": 8.524042009650866e-05, "loss": 0.022006432712078094, "step": 52020 }, { "epoch": 14.768663071246097, "grad_norm": 0.5517020225524902, "learning_rate": 8.52375816065853e-05, "loss": 0.01472790539264679, "step": 52030 }, { "epoch": 14.771501561169458, "grad_norm": 1.094295859336853, "learning_rate": 8.523474311666195e-05, "loss": 0.02625456154346466, "step": 52040 }, { "epoch": 14.77434005109282, "grad_norm": 7.452306270599365, "learning_rate": 8.523190462673857e-05, "loss": 0.016284295916557313, "step": 52050 }, { "epoch": 14.777178541016179, "grad_norm": 1.1724226474761963, "learning_rate": 8.522906613681522e-05, "loss": 0.004658450931310653, "step": 52060 }, { "epoch": 14.78001703093954, "grad_norm": 15.71119213104248, "learning_rate": 8.522622764689186e-05, "loss": 0.03971544504165649, "step": 52070 }, { "epoch": 14.782855520862901, "grad_norm": 0.528618335723877, "learning_rate": 8.52233891569685e-05, "loss": 0.022866708040237427, "step": 52080 }, { "epoch": 14.785694010786262, "grad_norm": 7.508442401885986, "learning_rate": 8.522055066704514e-05, "loss": 0.03367758989334106, "step": 52090 }, { "epoch": 14.788532500709623, "grad_norm": 2.8866114616394043, "learning_rate": 8.521771217712178e-05, "loss": 0.022376427054405214, "step": 52100 }, { "epoch": 14.791370990632982, "grad_norm": 1.9354732036590576, "learning_rate": 8.521487368719842e-05, "loss": 0.02133880853652954, "step": 52110 }, { "epoch": 14.794209480556344, "grad_norm": 0.2909198999404907, "learning_rate": 8.521203519727505e-05, "loss": 0.026404944062232972, "step": 52120 }, { "epoch": 14.797047970479705, "grad_norm": 0.1525513082742691, "learning_rate": 8.520919670735169e-05, "loss": 0.017587725818157197, "step": 52130 }, { "epoch": 14.799886460403066, "grad_norm": 2.113109588623047, "learning_rate": 8.520635821742833e-05, "loss": 0.01730184555053711, "step": 52140 }, { "epoch": 14.802724950326427, "grad_norm": 6.107914924621582, "learning_rate": 8.520351972750497e-05, "loss": 0.022978690266609193, "step": 52150 }, { "epoch": 14.805563440249788, "grad_norm": 0.9846431016921997, "learning_rate": 8.520068123758162e-05, "loss": 0.014191566407680512, "step": 52160 }, { "epoch": 14.808401930173147, "grad_norm": 7.876701831817627, "learning_rate": 8.519784274765826e-05, "loss": 0.02177199423313141, "step": 52170 }, { "epoch": 14.811240420096508, "grad_norm": 2.624267578125, "learning_rate": 8.519500425773488e-05, "loss": 0.008835273236036301, "step": 52180 }, { "epoch": 14.81407891001987, "grad_norm": 2.2193617820739746, "learning_rate": 8.519216576781153e-05, "loss": 0.01976000964641571, "step": 52190 }, { "epoch": 14.81691739994323, "grad_norm": 4.99762487411499, "learning_rate": 8.518932727788817e-05, "loss": 0.024918155372142793, "step": 52200 }, { "epoch": 14.819755889866592, "grad_norm": 1.8133982419967651, "learning_rate": 8.518648878796481e-05, "loss": 0.024818506836891175, "step": 52210 }, { "epoch": 14.822594379789951, "grad_norm": 8.869595527648926, "learning_rate": 8.518365029804145e-05, "loss": 0.03533506393432617, "step": 52220 }, { "epoch": 14.825432869713312, "grad_norm": 2.6566243171691895, "learning_rate": 8.518081180811809e-05, "loss": 0.014932847023010254, "step": 52230 }, { "epoch": 14.828271359636673, "grad_norm": 3.306131601333618, "learning_rate": 8.517797331819472e-05, "loss": 0.02189037501811981, "step": 52240 }, { "epoch": 14.831109849560034, "grad_norm": 5.300661087036133, "learning_rate": 8.517513482827136e-05, "loss": 0.02594158947467804, "step": 52250 }, { "epoch": 14.833948339483396, "grad_norm": 11.294709205627441, "learning_rate": 8.5172296338348e-05, "loss": 0.03136888146400452, "step": 52260 }, { "epoch": 14.836786829406755, "grad_norm": 12.100509643554688, "learning_rate": 8.516945784842464e-05, "loss": 0.03536904454231262, "step": 52270 }, { "epoch": 14.839625319330116, "grad_norm": 7.188144683837891, "learning_rate": 8.516661935850127e-05, "loss": 0.026781511306762696, "step": 52280 }, { "epoch": 14.842463809253477, "grad_norm": 0.3136914372444153, "learning_rate": 8.516378086857793e-05, "loss": 0.02372313290834427, "step": 52290 }, { "epoch": 14.845302299176838, "grad_norm": 1.3867112398147583, "learning_rate": 8.516094237865457e-05, "loss": 0.03486723005771637, "step": 52300 }, { "epoch": 14.8481407891002, "grad_norm": 1.8975248336791992, "learning_rate": 8.51581038887312e-05, "loss": 0.010269670188426972, "step": 52310 }, { "epoch": 14.850979279023559, "grad_norm": 0.673961341381073, "learning_rate": 8.515526539880784e-05, "loss": 0.024346697330474853, "step": 52320 }, { "epoch": 14.85381776894692, "grad_norm": 0.31187090277671814, "learning_rate": 8.515242690888448e-05, "loss": 0.024716326594352724, "step": 52330 }, { "epoch": 14.85665625887028, "grad_norm": 3.7828269004821777, "learning_rate": 8.51495884189611e-05, "loss": 0.0292150616645813, "step": 52340 }, { "epoch": 14.859494748793642, "grad_norm": 2.7360892295837402, "learning_rate": 8.514674992903776e-05, "loss": 0.017007063329219817, "step": 52350 }, { "epoch": 14.862333238717003, "grad_norm": 2.571808099746704, "learning_rate": 8.51439114391144e-05, "loss": 0.01464223712682724, "step": 52360 }, { "epoch": 14.865171728640362, "grad_norm": 0.8534035086631775, "learning_rate": 8.514107294919103e-05, "loss": 0.024817045032978057, "step": 52370 }, { "epoch": 14.868010218563724, "grad_norm": 0.0576590821146965, "learning_rate": 8.513823445926767e-05, "loss": 0.00861578956246376, "step": 52380 }, { "epoch": 14.870848708487085, "grad_norm": 12.54513168334961, "learning_rate": 8.513539596934431e-05, "loss": 0.02193271666765213, "step": 52390 }, { "epoch": 14.873687198410446, "grad_norm": 8.871712684631348, "learning_rate": 8.513255747942095e-05, "loss": 0.03997030854225159, "step": 52400 }, { "epoch": 14.876525688333807, "grad_norm": 0.3206276297569275, "learning_rate": 8.512971898949758e-05, "loss": 0.015971074998378753, "step": 52410 }, { "epoch": 14.879364178257168, "grad_norm": 5.1337480545043945, "learning_rate": 8.512688049957424e-05, "loss": 0.03551251888275146, "step": 52420 }, { "epoch": 14.882202668180527, "grad_norm": 3.859198570251465, "learning_rate": 8.512404200965088e-05, "loss": 0.01616571396589279, "step": 52430 }, { "epoch": 14.885041158103888, "grad_norm": 1.7815632820129395, "learning_rate": 8.51212035197275e-05, "loss": 0.029545468091964722, "step": 52440 }, { "epoch": 14.88787964802725, "grad_norm": 0.4810698628425598, "learning_rate": 8.511836502980415e-05, "loss": 0.014272180199623109, "step": 52450 }, { "epoch": 14.89071813795061, "grad_norm": 4.659372806549072, "learning_rate": 8.511552653988079e-05, "loss": 0.030330762267112732, "step": 52460 }, { "epoch": 14.893556627873972, "grad_norm": 8.235896110534668, "learning_rate": 8.511268804995742e-05, "loss": 0.022867147624492646, "step": 52470 }, { "epoch": 14.896395117797331, "grad_norm": 0.32104936242103577, "learning_rate": 8.510984956003406e-05, "loss": 0.01020715981721878, "step": 52480 }, { "epoch": 14.899233607720692, "grad_norm": 3.9835357666015625, "learning_rate": 8.510701107011071e-05, "loss": 0.020234313607215882, "step": 52490 }, { "epoch": 14.902072097644053, "grad_norm": 6.3451433181762695, "learning_rate": 8.510417258018734e-05, "loss": 0.013253730535507203, "step": 52500 }, { "epoch": 14.902072097644053, "eval_accuracy": 0.9651554651236727, "eval_loss": 0.12022577226161957, "eval_runtime": 40.1247, "eval_samples_per_second": 391.953, "eval_steps_per_second": 6.131, "step": 52500 }, { "epoch": 14.904910587567414, "grad_norm": 2.879020929336548, "learning_rate": 8.510161793925632e-05, "loss": 0.02291252017021179, "step": 52510 }, { "epoch": 14.907749077490775, "grad_norm": 0.8107879161834717, "learning_rate": 8.509877944933295e-05, "loss": 0.028547069430351256, "step": 52520 }, { "epoch": 14.910587567414137, "grad_norm": 4.464735984802246, "learning_rate": 8.50959409594096e-05, "loss": 0.021744588017463685, "step": 52530 }, { "epoch": 14.913426057337496, "grad_norm": 5.58769416809082, "learning_rate": 8.509310246948624e-05, "loss": 0.025717079639434814, "step": 52540 }, { "epoch": 14.916264547260857, "grad_norm": 5.165313243865967, "learning_rate": 8.509026397956287e-05, "loss": 0.01538359820842743, "step": 52550 }, { "epoch": 14.919103037184218, "grad_norm": 6.917158126831055, "learning_rate": 8.508742548963951e-05, "loss": 0.025838476419448853, "step": 52560 }, { "epoch": 14.92194152710758, "grad_norm": 5.761619567871094, "learning_rate": 8.508458699971616e-05, "loss": 0.020699064433574676, "step": 52570 }, { "epoch": 14.92478001703094, "grad_norm": 10.004955291748047, "learning_rate": 8.50817485097928e-05, "loss": 0.028815776109695435, "step": 52580 }, { "epoch": 14.9276185069543, "grad_norm": 4.944051742553711, "learning_rate": 8.507891001986942e-05, "loss": 0.07911953330039978, "step": 52590 }, { "epoch": 14.93045699687766, "grad_norm": 3.6758663654327393, "learning_rate": 8.507607152994608e-05, "loss": 0.031370213627815245, "step": 52600 }, { "epoch": 14.933295486801022, "grad_norm": 13.978338241577148, "learning_rate": 8.507323304002272e-05, "loss": 0.04268523454666138, "step": 52610 }, { "epoch": 14.936133976724383, "grad_norm": 4.129737377166748, "learning_rate": 8.507039455009935e-05, "loss": 0.03321532607078552, "step": 52620 }, { "epoch": 14.938972466647744, "grad_norm": 6.552499294281006, "learning_rate": 8.506755606017599e-05, "loss": 0.05168236494064331, "step": 52630 }, { "epoch": 14.941810956571103, "grad_norm": 0.46217775344848633, "learning_rate": 8.506471757025263e-05, "loss": 0.019694916903972626, "step": 52640 }, { "epoch": 14.944649446494465, "grad_norm": 0.5269156098365784, "learning_rate": 8.506187908032926e-05, "loss": 0.013715632259845734, "step": 52650 }, { "epoch": 14.947487936417826, "grad_norm": 0.8784176707267761, "learning_rate": 8.50590405904059e-05, "loss": 0.01036376804113388, "step": 52660 }, { "epoch": 14.950326426341187, "grad_norm": 0.7349655628204346, "learning_rate": 8.505620210048256e-05, "loss": 0.047914054989814756, "step": 52670 }, { "epoch": 14.953164916264548, "grad_norm": 8.401954650878906, "learning_rate": 8.505336361055918e-05, "loss": 0.018428848683834077, "step": 52680 }, { "epoch": 14.956003406187907, "grad_norm": 6.361067771911621, "learning_rate": 8.505052512063582e-05, "loss": 0.025895825028419493, "step": 52690 }, { "epoch": 14.958841896111268, "grad_norm": 2.89858341217041, "learning_rate": 8.504768663071247e-05, "loss": 0.018697208166122435, "step": 52700 }, { "epoch": 14.96168038603463, "grad_norm": 6.953074932098389, "learning_rate": 8.504484814078911e-05, "loss": 0.0270031213760376, "step": 52710 }, { "epoch": 14.96451887595799, "grad_norm": 0.5708863139152527, "learning_rate": 8.504200965086574e-05, "loss": 0.013958823680877686, "step": 52720 }, { "epoch": 14.967357365881352, "grad_norm": 5.2383012771606445, "learning_rate": 8.503917116094239e-05, "loss": 0.016605810821056367, "step": 52730 }, { "epoch": 14.970195855804711, "grad_norm": 6.382978439331055, "learning_rate": 8.503633267101903e-05, "loss": 0.02063416689634323, "step": 52740 }, { "epoch": 14.973034345728072, "grad_norm": 0.9574136734008789, "learning_rate": 8.503349418109566e-05, "loss": 0.01905331462621689, "step": 52750 }, { "epoch": 14.975872835651433, "grad_norm": 0.6861257553100586, "learning_rate": 8.50306556911723e-05, "loss": 0.021551936864852905, "step": 52760 }, { "epoch": 14.978711325574794, "grad_norm": 2.4526147842407227, "learning_rate": 8.502781720124894e-05, "loss": 0.02429864853620529, "step": 52770 }, { "epoch": 14.981549815498155, "grad_norm": 5.023222923278809, "learning_rate": 8.502497871132557e-05, "loss": 0.007180493324995041, "step": 52780 }, { "epoch": 14.984388305421517, "grad_norm": 3.5233566761016846, "learning_rate": 8.502214022140221e-05, "loss": 0.038328361511230466, "step": 52790 }, { "epoch": 14.987226795344876, "grad_norm": 11.291764259338379, "learning_rate": 8.501930173147887e-05, "loss": 0.018652980029582978, "step": 52800 }, { "epoch": 14.990065285268237, "grad_norm": 10.471677780151367, "learning_rate": 8.50164632415555e-05, "loss": 0.03265992999076843, "step": 52810 }, { "epoch": 14.992903775191598, "grad_norm": 0.3048339784145355, "learning_rate": 8.501362475163214e-05, "loss": 0.01954265534877777, "step": 52820 }, { "epoch": 14.99574226511496, "grad_norm": 4.233055591583252, "learning_rate": 8.501078626170878e-05, "loss": 0.019432641565799713, "step": 52830 }, { "epoch": 14.99858075503832, "grad_norm": 7.6398515701293945, "learning_rate": 8.500794777178542e-05, "loss": 0.01943272054195404, "step": 52840 }, { "epoch": 15.00141924496168, "grad_norm": 0.6770120859146118, "learning_rate": 8.500510928186205e-05, "loss": 0.012823785841464996, "step": 52850 }, { "epoch": 15.00425773488504, "grad_norm": 3.7106339931488037, "learning_rate": 8.500227079193869e-05, "loss": 0.011762087047100068, "step": 52860 }, { "epoch": 15.007096224808402, "grad_norm": 0.07363992184400558, "learning_rate": 8.499943230201534e-05, "loss": 0.014990490674972535, "step": 52870 }, { "epoch": 15.009934714731763, "grad_norm": 7.151165008544922, "learning_rate": 8.499659381209197e-05, "loss": 0.012440939247608186, "step": 52880 }, { "epoch": 15.012773204655124, "grad_norm": 2.890357255935669, "learning_rate": 8.499375532216861e-05, "loss": 0.00816085934638977, "step": 52890 }, { "epoch": 15.015611694578483, "grad_norm": 9.872705459594727, "learning_rate": 8.499091683224525e-05, "loss": 0.02082137018442154, "step": 52900 }, { "epoch": 15.018450184501845, "grad_norm": 3.0889999866485596, "learning_rate": 8.498807834232188e-05, "loss": 0.020700114965438842, "step": 52910 }, { "epoch": 15.021288674425206, "grad_norm": 3.4558985233306885, "learning_rate": 8.498523985239852e-05, "loss": 0.01841922998428345, "step": 52920 }, { "epoch": 15.024127164348567, "grad_norm": 2.223099946975708, "learning_rate": 8.498240136247518e-05, "loss": 0.018562173843383788, "step": 52930 }, { "epoch": 15.026965654271928, "grad_norm": 0.842693030834198, "learning_rate": 8.49795628725518e-05, "loss": 0.031805381178855896, "step": 52940 }, { "epoch": 15.029804144195289, "grad_norm": 3.466468334197998, "learning_rate": 8.497672438262845e-05, "loss": 0.04645878672599792, "step": 52950 }, { "epoch": 15.032642634118648, "grad_norm": 13.060648918151855, "learning_rate": 8.497388589270509e-05, "loss": 0.028109064698219298, "step": 52960 }, { "epoch": 15.03548112404201, "grad_norm": 8.88415241241455, "learning_rate": 8.497104740278173e-05, "loss": 0.015154854953289032, "step": 52970 }, { "epoch": 15.03831961396537, "grad_norm": 4.195424556732178, "learning_rate": 8.496820891285836e-05, "loss": 0.025537869334220885, "step": 52980 }, { "epoch": 15.041158103888732, "grad_norm": 2.0412662029266357, "learning_rate": 8.4965370422935e-05, "loss": 0.023753872513771056, "step": 52990 }, { "epoch": 15.043996593812093, "grad_norm": 2.9762237071990967, "learning_rate": 8.496253193301164e-05, "loss": 0.019068248569965363, "step": 53000 }, { "epoch": 15.043996593812093, "eval_accuracy": 0.9599415018757551, "eval_loss": 0.13849587738513947, "eval_runtime": 31.4473, "eval_samples_per_second": 500.107, "eval_steps_per_second": 7.823, "step": 53000 }, { "epoch": 15.046835083735452, "grad_norm": 9.622157096862793, "learning_rate": 8.495969344308828e-05, "loss": 0.03258668482303619, "step": 53010 }, { "epoch": 15.049673573658813, "grad_norm": 0.17980681359767914, "learning_rate": 8.495685495316492e-05, "loss": 0.017529304325580596, "step": 53020 }, { "epoch": 15.052512063582174, "grad_norm": 0.701528787612915, "learning_rate": 8.495401646324156e-05, "loss": 0.017827028036117555, "step": 53030 }, { "epoch": 15.055350553505535, "grad_norm": 1.3587353229522705, "learning_rate": 8.495117797331819e-05, "loss": 0.020718424022197722, "step": 53040 }, { "epoch": 15.058189043428897, "grad_norm": 10.213395118713379, "learning_rate": 8.494833948339483e-05, "loss": 0.03410811424255371, "step": 53050 }, { "epoch": 15.061027533352256, "grad_norm": 7.811426162719727, "learning_rate": 8.494550099347147e-05, "loss": 0.02746586799621582, "step": 53060 }, { "epoch": 15.063866023275617, "grad_norm": 5.778977870941162, "learning_rate": 8.494266250354812e-05, "loss": 0.013337841629981995, "step": 53070 }, { "epoch": 15.066704513198978, "grad_norm": 7.3025736808776855, "learning_rate": 8.49401078626171e-05, "loss": 0.0374883770942688, "step": 53080 }, { "epoch": 15.06954300312234, "grad_norm": 10.929003715515137, "learning_rate": 8.493726937269372e-05, "loss": 0.013419197499752044, "step": 53090 }, { "epoch": 15.0723814930457, "grad_norm": 0.1001855805516243, "learning_rate": 8.493443088277036e-05, "loss": 0.018354228138923644, "step": 53100 }, { "epoch": 15.07521998296906, "grad_norm": 1.9427123069763184, "learning_rate": 8.493159239284702e-05, "loss": 0.015902741253376006, "step": 53110 }, { "epoch": 15.07805847289242, "grad_norm": 6.767613410949707, "learning_rate": 8.492875390292365e-05, "loss": 0.039679181575775144, "step": 53120 }, { "epoch": 15.080896962815782, "grad_norm": 12.28404712677002, "learning_rate": 8.492591541300029e-05, "loss": 0.03286689817905426, "step": 53130 }, { "epoch": 15.083735452739143, "grad_norm": 4.032358646392822, "learning_rate": 8.492307692307693e-05, "loss": 0.021112531423568726, "step": 53140 }, { "epoch": 15.086573942662504, "grad_norm": 16.2640438079834, "learning_rate": 8.492023843315356e-05, "loss": 0.04106813669204712, "step": 53150 }, { "epoch": 15.089412432585865, "grad_norm": 7.31362771987915, "learning_rate": 8.49173999432302e-05, "loss": 0.024192117154598236, "step": 53160 }, { "epoch": 15.092250922509225, "grad_norm": 15.603915214538574, "learning_rate": 8.491456145330684e-05, "loss": 0.024449622631072997, "step": 53170 }, { "epoch": 15.095089412432586, "grad_norm": 1.1033289432525635, "learning_rate": 8.491172296338348e-05, "loss": 0.042060890793800355, "step": 53180 }, { "epoch": 15.097927902355947, "grad_norm": 0.8220723867416382, "learning_rate": 8.490888447346012e-05, "loss": 0.013409414887428283, "step": 53190 }, { "epoch": 15.100766392279308, "grad_norm": 0.8389987945556641, "learning_rate": 8.490604598353677e-05, "loss": 0.00987701416015625, "step": 53200 }, { "epoch": 15.103604882202669, "grad_norm": 2.8070530891418457, "learning_rate": 8.49032074936134e-05, "loss": 0.024442930519580842, "step": 53210 }, { "epoch": 15.106443372126028, "grad_norm": 0.7073200941085815, "learning_rate": 8.490036900369003e-05, "loss": 0.015232500433921815, "step": 53220 }, { "epoch": 15.10928186204939, "grad_norm": 5.243606090545654, "learning_rate": 8.489753051376668e-05, "loss": 0.015302164852619171, "step": 53230 }, { "epoch": 15.11212035197275, "grad_norm": 0.90240478515625, "learning_rate": 8.489469202384332e-05, "loss": 0.008487526327371597, "step": 53240 }, { "epoch": 15.114958841896112, "grad_norm": 1.1609364748001099, "learning_rate": 8.489185353391996e-05, "loss": 0.01488972008228302, "step": 53250 }, { "epoch": 15.117797331819473, "grad_norm": 5.5826873779296875, "learning_rate": 8.48890150439966e-05, "loss": 0.01556566208600998, "step": 53260 }, { "epoch": 15.120635821742832, "grad_norm": 3.1084468364715576, "learning_rate": 8.488617655407324e-05, "loss": 0.017637349665164948, "step": 53270 }, { "epoch": 15.123474311666193, "grad_norm": 1.0347892045974731, "learning_rate": 8.488333806414987e-05, "loss": 0.0169993132352829, "step": 53280 }, { "epoch": 15.126312801589554, "grad_norm": 2.6957931518554688, "learning_rate": 8.488049957422651e-05, "loss": 0.017174214124679565, "step": 53290 }, { "epoch": 15.129151291512915, "grad_norm": 17.910322189331055, "learning_rate": 8.487766108430315e-05, "loss": 0.02446994185447693, "step": 53300 }, { "epoch": 15.131989781436276, "grad_norm": 8.844029426574707, "learning_rate": 8.487482259437979e-05, "loss": 0.02642461657524109, "step": 53310 }, { "epoch": 15.134828271359636, "grad_norm": 3.4602267742156982, "learning_rate": 8.487198410445643e-05, "loss": 0.01319008320569992, "step": 53320 }, { "epoch": 15.137666761282997, "grad_norm": 1.6094123125076294, "learning_rate": 8.486914561453308e-05, "loss": 0.0057146593928337095, "step": 53330 }, { "epoch": 15.140505251206358, "grad_norm": 7.151821136474609, "learning_rate": 8.486630712460972e-05, "loss": 0.018627190589904787, "step": 53340 }, { "epoch": 15.14334374112972, "grad_norm": 2.2666633129119873, "learning_rate": 8.486346863468635e-05, "loss": 0.013075819611549378, "step": 53350 }, { "epoch": 15.14618223105308, "grad_norm": 2.913010358810425, "learning_rate": 8.486063014476299e-05, "loss": 0.01353330910205841, "step": 53360 }, { "epoch": 15.149020720976441, "grad_norm": 7.060380935668945, "learning_rate": 8.485779165483963e-05, "loss": 0.01410171240568161, "step": 53370 }, { "epoch": 15.1518592108998, "grad_norm": 16.999319076538086, "learning_rate": 8.485495316491627e-05, "loss": 0.04499986171722412, "step": 53380 }, { "epoch": 15.154697700823162, "grad_norm": 2.1588783264160156, "learning_rate": 8.485211467499291e-05, "loss": 0.010595713555812836, "step": 53390 }, { "epoch": 15.157536190746523, "grad_norm": 2.689300537109375, "learning_rate": 8.484927618506955e-05, "loss": 0.01214447319507599, "step": 53400 }, { "epoch": 15.160374680669884, "grad_norm": 0.5397258400917053, "learning_rate": 8.484643769514618e-05, "loss": 0.02905488610267639, "step": 53410 }, { "epoch": 15.163213170593245, "grad_norm": 4.432367324829102, "learning_rate": 8.484359920522282e-05, "loss": 0.0191912904381752, "step": 53420 }, { "epoch": 15.166051660516604, "grad_norm": 0.10838423669338226, "learning_rate": 8.484076071529946e-05, "loss": 0.014886307716369628, "step": 53430 }, { "epoch": 15.168890150439966, "grad_norm": 12.753387451171875, "learning_rate": 8.48379222253761e-05, "loss": 0.053890174627304076, "step": 53440 }, { "epoch": 15.171728640363327, "grad_norm": 10.219287872314453, "learning_rate": 8.483508373545275e-05, "loss": 0.01569346636533737, "step": 53450 }, { "epoch": 15.174567130286688, "grad_norm": 1.3385541439056396, "learning_rate": 8.483224524552939e-05, "loss": 0.007637448608875275, "step": 53460 }, { "epoch": 15.177405620210049, "grad_norm": 2.4267659187316895, "learning_rate": 8.482940675560603e-05, "loss": 0.013403034210205078, "step": 53470 }, { "epoch": 15.180244110133408, "grad_norm": 2.2850193977355957, "learning_rate": 8.482656826568266e-05, "loss": 0.0085382342338562, "step": 53480 }, { "epoch": 15.18308260005677, "grad_norm": 3.6434497833251953, "learning_rate": 8.48237297757593e-05, "loss": 0.018224382400512697, "step": 53490 }, { "epoch": 15.18592108998013, "grad_norm": 2.1378140449523926, "learning_rate": 8.482089128583594e-05, "loss": 0.007591058313846588, "step": 53500 }, { "epoch": 15.18592108998013, "eval_accuracy": 0.964837540535385, "eval_loss": 0.12011808902025223, "eval_runtime": 31.1651, "eval_samples_per_second": 504.634, "eval_steps_per_second": 7.893, "step": 53500 }, { "epoch": 15.188759579903492, "grad_norm": 0.4182736277580261, "learning_rate": 8.481805279591258e-05, "loss": 0.02281838804483414, "step": 53510 }, { "epoch": 15.191598069826853, "grad_norm": 4.538150787353516, "learning_rate": 8.481521430598922e-05, "loss": 0.02725878357887268, "step": 53520 }, { "epoch": 15.194436559750212, "grad_norm": 5.211309909820557, "learning_rate": 8.481237581606586e-05, "loss": 0.016534671187400818, "step": 53530 }, { "epoch": 15.197275049673573, "grad_norm": 0.043485917150974274, "learning_rate": 8.480953732614249e-05, "loss": 0.024470829963684083, "step": 53540 }, { "epoch": 15.200113539596934, "grad_norm": 1.0854041576385498, "learning_rate": 8.480669883621913e-05, "loss": 0.016517093777656554, "step": 53550 }, { "epoch": 15.202952029520295, "grad_norm": 0.23942150175571442, "learning_rate": 8.480386034629577e-05, "loss": 0.012358500063419342, "step": 53560 }, { "epoch": 15.205790519443656, "grad_norm": 2.5928940773010254, "learning_rate": 8.480102185637241e-05, "loss": 0.022774353623390198, "step": 53570 }, { "epoch": 15.208629009367018, "grad_norm": 0.5961080193519592, "learning_rate": 8.479818336644906e-05, "loss": 0.014884893596172333, "step": 53580 }, { "epoch": 15.211467499290377, "grad_norm": 12.122029304504395, "learning_rate": 8.47953448765257e-05, "loss": 0.03497117757797241, "step": 53590 }, { "epoch": 15.214305989213738, "grad_norm": 1.8768930435180664, "learning_rate": 8.479250638660234e-05, "loss": 0.02361843138933182, "step": 53600 }, { "epoch": 15.217144479137099, "grad_norm": 3.6838762760162354, "learning_rate": 8.478966789667897e-05, "loss": 0.010149551928043366, "step": 53610 }, { "epoch": 15.21998296906046, "grad_norm": 5.021037578582764, "learning_rate": 8.478682940675561e-05, "loss": 0.017436116933822632, "step": 53620 }, { "epoch": 15.222821458983821, "grad_norm": 10.80469036102295, "learning_rate": 8.478399091683225e-05, "loss": 0.02490849643945694, "step": 53630 }, { "epoch": 15.22565994890718, "grad_norm": 4.640071868896484, "learning_rate": 8.478115242690888e-05, "loss": 0.02584679126739502, "step": 53640 }, { "epoch": 15.228498438830542, "grad_norm": 1.4069832563400269, "learning_rate": 8.477831393698553e-05, "loss": 0.015845449268817903, "step": 53650 }, { "epoch": 15.231336928753903, "grad_norm": 1.6332918405532837, "learning_rate": 8.477547544706217e-05, "loss": 0.01912822276353836, "step": 53660 }, { "epoch": 15.234175418677264, "grad_norm": 1.3584104776382446, "learning_rate": 8.47726369571388e-05, "loss": 0.014244657754898072, "step": 53670 }, { "epoch": 15.237013908600625, "grad_norm": 6.506918430328369, "learning_rate": 8.476979846721544e-05, "loss": 0.017907139658927918, "step": 53680 }, { "epoch": 15.239852398523984, "grad_norm": 0.794291079044342, "learning_rate": 8.476695997729208e-05, "loss": 0.0067078597843647, "step": 53690 }, { "epoch": 15.242690888447346, "grad_norm": 10.134770393371582, "learning_rate": 8.476412148736873e-05, "loss": 0.03547353744506836, "step": 53700 }, { "epoch": 15.245529378370707, "grad_norm": 0.5092810392379761, "learning_rate": 8.476128299744537e-05, "loss": 0.014330226182937621, "step": 53710 }, { "epoch": 15.248367868294068, "grad_norm": 0.045104753226041794, "learning_rate": 8.475844450752201e-05, "loss": 0.01699364185333252, "step": 53720 }, { "epoch": 15.251206358217429, "grad_norm": 1.7504781484603882, "learning_rate": 8.475560601759865e-05, "loss": 0.017351746559143066, "step": 53730 }, { "epoch": 15.25404484814079, "grad_norm": 0.279889851808548, "learning_rate": 8.475276752767528e-05, "loss": 0.02243504822254181, "step": 53740 }, { "epoch": 15.25688333806415, "grad_norm": 4.001912593841553, "learning_rate": 8.474992903775192e-05, "loss": 0.01619107723236084, "step": 53750 }, { "epoch": 15.25972182798751, "grad_norm": 2.1504933834075928, "learning_rate": 8.474709054782856e-05, "loss": 0.03702492117881775, "step": 53760 }, { "epoch": 15.262560317910872, "grad_norm": 0.11518845707178116, "learning_rate": 8.474425205790519e-05, "loss": 0.021258176863193513, "step": 53770 }, { "epoch": 15.265398807834233, "grad_norm": 8.20329475402832, "learning_rate": 8.474141356798184e-05, "loss": 0.017360417544841765, "step": 53780 }, { "epoch": 15.268237297757594, "grad_norm": 4.547636032104492, "learning_rate": 8.473857507805848e-05, "loss": 0.01965424418449402, "step": 53790 }, { "epoch": 15.271075787680953, "grad_norm": 7.402268409729004, "learning_rate": 8.473573658813511e-05, "loss": 0.020911991596221924, "step": 53800 }, { "epoch": 15.273914277604314, "grad_norm": 0.6051502227783203, "learning_rate": 8.473289809821175e-05, "loss": 0.011221100389957429, "step": 53810 }, { "epoch": 15.276752767527675, "grad_norm": 1.2832692861557007, "learning_rate": 8.47300596082884e-05, "loss": 0.012883828580379486, "step": 53820 }, { "epoch": 15.279591257451036, "grad_norm": 8.215574264526367, "learning_rate": 8.472722111836504e-05, "loss": 0.013195377588272095, "step": 53830 }, { "epoch": 15.282429747374398, "grad_norm": 5.2063679695129395, "learning_rate": 8.472438262844168e-05, "loss": 0.009787338972091674, "step": 53840 }, { "epoch": 15.285268237297757, "grad_norm": 5.0826191902160645, "learning_rate": 8.472154413851832e-05, "loss": 0.01944967806339264, "step": 53850 }, { "epoch": 15.288106727221118, "grad_norm": 2.9122211933135986, "learning_rate": 8.471870564859496e-05, "loss": 0.007317180186510086, "step": 53860 }, { "epoch": 15.290945217144479, "grad_norm": 0.4983808696269989, "learning_rate": 8.471586715867159e-05, "loss": 0.0059824176132678986, "step": 53870 }, { "epoch": 15.29378370706784, "grad_norm": 0.22940567135810852, "learning_rate": 8.471302866874823e-05, "loss": 0.02136892229318619, "step": 53880 }, { "epoch": 15.296622196991201, "grad_norm": 0.6941245794296265, "learning_rate": 8.471019017882487e-05, "loss": 0.0071679621934890745, "step": 53890 }, { "epoch": 15.29946068691456, "grad_norm": 5.085391521453857, "learning_rate": 8.47073516889015e-05, "loss": 0.011693920195102691, "step": 53900 }, { "epoch": 15.302299176837922, "grad_norm": 3.824125289916992, "learning_rate": 8.470451319897815e-05, "loss": 0.010328211635351182, "step": 53910 }, { "epoch": 15.305137666761283, "grad_norm": 1.7529088258743286, "learning_rate": 8.47016747090548e-05, "loss": 0.012497632205486298, "step": 53920 }, { "epoch": 15.307976156684644, "grad_norm": 5.898714065551758, "learning_rate": 8.469883621913142e-05, "loss": 0.01783210188150406, "step": 53930 }, { "epoch": 15.310814646608005, "grad_norm": 2.3011796474456787, "learning_rate": 8.469599772920806e-05, "loss": 0.0161525160074234, "step": 53940 }, { "epoch": 15.313653136531366, "grad_norm": 1.318329930305481, "learning_rate": 8.46931592392847e-05, "loss": 0.012332364916801453, "step": 53950 }, { "epoch": 15.316491626454726, "grad_norm": 7.2985639572143555, "learning_rate": 8.469032074936135e-05, "loss": 0.0169648677110672, "step": 53960 }, { "epoch": 15.319330116378087, "grad_norm": 8.138712882995605, "learning_rate": 8.468748225943797e-05, "loss": 0.014516988396644592, "step": 53970 }, { "epoch": 15.322168606301448, "grad_norm": 0.05462360382080078, "learning_rate": 8.468464376951463e-05, "loss": 0.02320452928543091, "step": 53980 }, { "epoch": 15.325007096224809, "grad_norm": 1.5065282583236694, "learning_rate": 8.468180527959126e-05, "loss": 0.010577807575464249, "step": 53990 }, { "epoch": 15.32784558614817, "grad_norm": 1.25725519657135, "learning_rate": 8.46789667896679e-05, "loss": 0.013794022798538207, "step": 54000 }, { "epoch": 15.32784558614817, "eval_accuracy": 0.9696064093596999, "eval_loss": 0.09827471524477005, "eval_runtime": 32.9385, "eval_samples_per_second": 477.465, "eval_steps_per_second": 7.468, "step": 54000 }, { "epoch": 15.33068407607153, "grad_norm": 0.08128181844949722, "learning_rate": 8.467612829974454e-05, "loss": 0.02083265334367752, "step": 54010 }, { "epoch": 15.33352256599489, "grad_norm": 6.743473529815674, "learning_rate": 8.467328980982118e-05, "loss": 0.03586653769016266, "step": 54020 }, { "epoch": 15.336361055918251, "grad_norm": 5.884774684906006, "learning_rate": 8.467045131989781e-05, "loss": 0.01470150500535965, "step": 54030 }, { "epoch": 15.339199545841613, "grad_norm": 1.3585487604141235, "learning_rate": 8.466761282997446e-05, "loss": 0.025665214657783507, "step": 54040 }, { "epoch": 15.342038035764974, "grad_norm": 0.6415427923202515, "learning_rate": 8.46647743400511e-05, "loss": 0.02118144929409027, "step": 54050 }, { "epoch": 15.344876525688333, "grad_norm": 0.19256463646888733, "learning_rate": 8.466193585012773e-05, "loss": 0.009990981221199036, "step": 54060 }, { "epoch": 15.347715015611694, "grad_norm": 2.128110408782959, "learning_rate": 8.465909736020438e-05, "loss": 0.015323324501514435, "step": 54070 }, { "epoch": 15.350553505535055, "grad_norm": 9.324719429016113, "learning_rate": 8.465625887028102e-05, "loss": 0.015974496304988862, "step": 54080 }, { "epoch": 15.353391995458416, "grad_norm": 1.2316455841064453, "learning_rate": 8.465342038035764e-05, "loss": 0.016887007653713225, "step": 54090 }, { "epoch": 15.356230485381777, "grad_norm": 2.296586513519287, "learning_rate": 8.465058189043429e-05, "loss": 0.03277733325958252, "step": 54100 }, { "epoch": 15.359068975305137, "grad_norm": 0.3607354462146759, "learning_rate": 8.464774340051094e-05, "loss": 0.011593474447727204, "step": 54110 }, { "epoch": 15.361907465228498, "grad_norm": 5.1546630859375, "learning_rate": 8.464490491058757e-05, "loss": 0.032687053084373474, "step": 54120 }, { "epoch": 15.364745955151859, "grad_norm": 10.764565467834473, "learning_rate": 8.464206642066421e-05, "loss": 0.02396039813756943, "step": 54130 }, { "epoch": 15.36758444507522, "grad_norm": 4.6813178062438965, "learning_rate": 8.463922793074085e-05, "loss": 0.012174031883478164, "step": 54140 }, { "epoch": 15.370422934998581, "grad_norm": 0.5894547700881958, "learning_rate": 8.463638944081749e-05, "loss": 0.0174388587474823, "step": 54150 }, { "epoch": 15.373261424921942, "grad_norm": 1.0186477899551392, "learning_rate": 8.463355095089412e-05, "loss": 0.02093217521905899, "step": 54160 }, { "epoch": 15.376099914845302, "grad_norm": 3.2941277027130127, "learning_rate": 8.463071246097076e-05, "loss": 0.009626073390245437, "step": 54170 }, { "epoch": 15.378938404768663, "grad_norm": 7.4737629890441895, "learning_rate": 8.462787397104742e-05, "loss": 0.024148929119110107, "step": 54180 }, { "epoch": 15.381776894692024, "grad_norm": 6.647209644317627, "learning_rate": 8.462503548112404e-05, "loss": 0.018761274218559266, "step": 54190 }, { "epoch": 15.384615384615385, "grad_norm": 2.7111053466796875, "learning_rate": 8.462219699120069e-05, "loss": 0.024197030067443847, "step": 54200 }, { "epoch": 15.387453874538746, "grad_norm": 6.672558307647705, "learning_rate": 8.461935850127733e-05, "loss": 0.02863236665725708, "step": 54210 }, { "epoch": 15.390292364462105, "grad_norm": 2.813732862472534, "learning_rate": 8.461652001135395e-05, "loss": 0.0062744438648223875, "step": 54220 }, { "epoch": 15.393130854385467, "grad_norm": 2.758772134780884, "learning_rate": 8.46136815214306e-05, "loss": 0.020545566082000734, "step": 54230 }, { "epoch": 15.395969344308828, "grad_norm": 1.0899931192398071, "learning_rate": 8.461084303150725e-05, "loss": 0.01940789967775345, "step": 54240 }, { "epoch": 15.398807834232189, "grad_norm": 7.135702610015869, "learning_rate": 8.460800454158388e-05, "loss": 0.027333348989486694, "step": 54250 }, { "epoch": 15.40164632415555, "grad_norm": 6.65622091293335, "learning_rate": 8.460516605166052e-05, "loss": 0.023585012555122374, "step": 54260 }, { "epoch": 15.40448481407891, "grad_norm": 1.354555368423462, "learning_rate": 8.460232756173716e-05, "loss": 0.015648871660232544, "step": 54270 }, { "epoch": 15.40732330400227, "grad_norm": 1.2798298597335815, "learning_rate": 8.45994890718138e-05, "loss": 0.02153926491737366, "step": 54280 }, { "epoch": 15.410161793925631, "grad_norm": 3.8521997928619385, "learning_rate": 8.459665058189043e-05, "loss": 0.0171769380569458, "step": 54290 }, { "epoch": 15.413000283848993, "grad_norm": 4.540936470031738, "learning_rate": 8.459381209196707e-05, "loss": 0.01829089820384979, "step": 54300 }, { "epoch": 15.415838773772354, "grad_norm": 14.41648006439209, "learning_rate": 8.459097360204373e-05, "loss": 0.03243809938430786, "step": 54310 }, { "epoch": 15.418677263695713, "grad_norm": 6.365724086761475, "learning_rate": 8.458813511212036e-05, "loss": 0.031610727310180664, "step": 54320 }, { "epoch": 15.421515753619074, "grad_norm": 3.3882853984832764, "learning_rate": 8.4585296622197e-05, "loss": 0.027781227231025697, "step": 54330 }, { "epoch": 15.424354243542435, "grad_norm": 9.242194175720215, "learning_rate": 8.458245813227364e-05, "loss": 0.02314056158065796, "step": 54340 }, { "epoch": 15.427192733465796, "grad_norm": 2.5230467319488525, "learning_rate": 8.457961964235027e-05, "loss": 0.03593643009662628, "step": 54350 }, { "epoch": 15.430031223389157, "grad_norm": 0.035379521548748016, "learning_rate": 8.457678115242691e-05, "loss": 0.007502050697803497, "step": 54360 }, { "epoch": 15.432869713312519, "grad_norm": 2.178201913833618, "learning_rate": 8.457394266250355e-05, "loss": 0.03684559762477875, "step": 54370 }, { "epoch": 15.435708203235878, "grad_norm": 1.0753954648971558, "learning_rate": 8.457110417258019e-05, "loss": 0.031064310669898988, "step": 54380 }, { "epoch": 15.438546693159239, "grad_norm": 7.413527965545654, "learning_rate": 8.456826568265683e-05, "loss": 0.02332882285118103, "step": 54390 }, { "epoch": 15.4413851830826, "grad_norm": 4.157045364379883, "learning_rate": 8.456542719273347e-05, "loss": 0.02034565955400467, "step": 54400 }, { "epoch": 15.444223673005961, "grad_norm": 9.994023323059082, "learning_rate": 8.456258870281011e-05, "loss": 0.022838902473449708, "step": 54410 }, { "epoch": 15.447062162929322, "grad_norm": 0.1484009027481079, "learning_rate": 8.455975021288674e-05, "loss": 0.016698105633258818, "step": 54420 }, { "epoch": 15.449900652852682, "grad_norm": 3.445401430130005, "learning_rate": 8.455691172296338e-05, "loss": 0.019228267669677734, "step": 54430 }, { "epoch": 15.452739142776043, "grad_norm": 3.9224870204925537, "learning_rate": 8.455407323304004e-05, "loss": 0.009100621938705445, "step": 54440 }, { "epoch": 15.455577632699404, "grad_norm": 6.442835807800293, "learning_rate": 8.455123474311667e-05, "loss": 0.023924387991428375, "step": 54450 }, { "epoch": 15.458416122622765, "grad_norm": 3.4737038612365723, "learning_rate": 8.454839625319331e-05, "loss": 0.016269902884960174, "step": 54460 }, { "epoch": 15.461254612546126, "grad_norm": 2.8867506980895996, "learning_rate": 8.454555776326995e-05, "loss": 0.03609227240085602, "step": 54470 }, { "epoch": 15.464093102469485, "grad_norm": 11.527236938476562, "learning_rate": 8.454271927334658e-05, "loss": 0.06099653840065002, "step": 54480 }, { "epoch": 15.466931592392847, "grad_norm": 0.09683560580015182, "learning_rate": 8.453988078342322e-05, "loss": 0.01893717348575592, "step": 54490 }, { "epoch": 15.469770082316208, "grad_norm": 9.327432632446289, "learning_rate": 8.453704229349986e-05, "loss": 0.020831385254859926, "step": 54500 }, { "epoch": 15.469770082316208, "eval_accuracy": 0.9697971641126725, "eval_loss": 0.09414870291948318, "eval_runtime": 31.1835, "eval_samples_per_second": 504.338, "eval_steps_per_second": 7.889, "step": 54500 }, { "epoch": 15.472608572239569, "grad_norm": 0.38578692078590393, "learning_rate": 8.45342038035765e-05, "loss": 0.01565225124359131, "step": 54510 }, { "epoch": 15.47544706216293, "grad_norm": 0.48425784707069397, "learning_rate": 8.453136531365314e-05, "loss": 0.031522467732429504, "step": 54520 }, { "epoch": 15.478285552086291, "grad_norm": 0.8087624907493591, "learning_rate": 8.452852682372978e-05, "loss": 0.009662441909313202, "step": 54530 }, { "epoch": 15.48112404200965, "grad_norm": 3.5353991985321045, "learning_rate": 8.452568833380642e-05, "loss": 0.03443646132946014, "step": 54540 }, { "epoch": 15.483962531933011, "grad_norm": 2.3208131790161133, "learning_rate": 8.452284984388305e-05, "loss": 0.01778501570224762, "step": 54550 }, { "epoch": 15.486801021856373, "grad_norm": 0.7586269378662109, "learning_rate": 8.45200113539597e-05, "loss": 0.008875812590122222, "step": 54560 }, { "epoch": 15.489639511779734, "grad_norm": 0.3832740783691406, "learning_rate": 8.451717286403634e-05, "loss": 0.028171446919441224, "step": 54570 }, { "epoch": 15.492478001703095, "grad_norm": 21.961044311523438, "learning_rate": 8.451433437411298e-05, "loss": 0.0288849413394928, "step": 54580 }, { "epoch": 15.495316491626454, "grad_norm": 1.593316674232483, "learning_rate": 8.451149588418962e-05, "loss": 0.011887896806001663, "step": 54590 }, { "epoch": 15.498154981549815, "grad_norm": 3.871462345123291, "learning_rate": 8.450865739426626e-05, "loss": 0.011851196736097335, "step": 54600 }, { "epoch": 15.500993471473176, "grad_norm": 4.023277759552002, "learning_rate": 8.450581890434289e-05, "loss": 0.04517466127872467, "step": 54610 }, { "epoch": 15.503831961396537, "grad_norm": 4.739530563354492, "learning_rate": 8.450298041441953e-05, "loss": 0.050390034914016724, "step": 54620 }, { "epoch": 15.506670451319899, "grad_norm": 9.761510848999023, "learning_rate": 8.450014192449617e-05, "loss": 0.022122256457805634, "step": 54630 }, { "epoch": 15.509508941243258, "grad_norm": 1.1850578784942627, "learning_rate": 8.449730343457281e-05, "loss": 0.02441563755273819, "step": 54640 }, { "epoch": 15.512347431166619, "grad_norm": 1.3491291999816895, "learning_rate": 8.449446494464945e-05, "loss": 0.017559824883937834, "step": 54650 }, { "epoch": 15.51518592108998, "grad_norm": 10.874698638916016, "learning_rate": 8.44916264547261e-05, "loss": 0.0187638059258461, "step": 54660 }, { "epoch": 15.518024411013341, "grad_norm": 3.09354305267334, "learning_rate": 8.448878796480274e-05, "loss": 0.02499171644449234, "step": 54670 }, { "epoch": 15.520862900936702, "grad_norm": 1.3361862897872925, "learning_rate": 8.448594947487936e-05, "loss": 0.04024271965026856, "step": 54680 }, { "epoch": 15.523701390860062, "grad_norm": 2.5038771629333496, "learning_rate": 8.4483110984956e-05, "loss": 0.01340140700340271, "step": 54690 }, { "epoch": 15.526539880783423, "grad_norm": 1.9345667362213135, "learning_rate": 8.448027249503265e-05, "loss": 0.022599247097969056, "step": 54700 }, { "epoch": 15.529378370706784, "grad_norm": 0.2181825488805771, "learning_rate": 8.447743400510929e-05, "loss": 0.011894188821315765, "step": 54710 }, { "epoch": 15.532216860630145, "grad_norm": 7.642570972442627, "learning_rate": 8.447459551518593e-05, "loss": 0.019374457001686097, "step": 54720 }, { "epoch": 15.535055350553506, "grad_norm": 13.532971382141113, "learning_rate": 8.447175702526257e-05, "loss": 0.03413023352622986, "step": 54730 }, { "epoch": 15.537893840476865, "grad_norm": 8.226264953613281, "learning_rate": 8.44689185353392e-05, "loss": 0.024491581320762634, "step": 54740 }, { "epoch": 15.540732330400227, "grad_norm": 6.073602199554443, "learning_rate": 8.446608004541584e-05, "loss": 0.01736122965812683, "step": 54750 }, { "epoch": 15.543570820323588, "grad_norm": 7.172207832336426, "learning_rate": 8.446324155549248e-05, "loss": 0.032396191358566286, "step": 54760 }, { "epoch": 15.546409310246949, "grad_norm": 1.8932770490646362, "learning_rate": 8.446040306556912e-05, "loss": 0.015120121836662292, "step": 54770 }, { "epoch": 15.54924780017031, "grad_norm": 0.9920343160629272, "learning_rate": 8.445756457564576e-05, "loss": 0.012306859344244003, "step": 54780 }, { "epoch": 15.552086290093671, "grad_norm": 3.0767102241516113, "learning_rate": 8.44547260857224e-05, "loss": 0.017732445895671845, "step": 54790 }, { "epoch": 15.55492478001703, "grad_norm": 9.353545188903809, "learning_rate": 8.445188759579905e-05, "loss": 0.018118011951446533, "step": 54800 }, { "epoch": 15.557763269940391, "grad_norm": 1.0773159265518188, "learning_rate": 8.444904910587567e-05, "loss": 0.020585104823112488, "step": 54810 }, { "epoch": 15.560601759863752, "grad_norm": 0.36645641922950745, "learning_rate": 8.444621061595232e-05, "loss": 0.015449120104312897, "step": 54820 }, { "epoch": 15.563440249787114, "grad_norm": 11.863656997680664, "learning_rate": 8.444337212602896e-05, "loss": 0.018311242759227752, "step": 54830 }, { "epoch": 15.566278739710475, "grad_norm": 7.413156032562256, "learning_rate": 8.44405336361056e-05, "loss": 0.014626303315162658, "step": 54840 }, { "epoch": 15.569117229633834, "grad_norm": 5.375380516052246, "learning_rate": 8.443769514618224e-05, "loss": 0.016820241510868073, "step": 54850 }, { "epoch": 15.571955719557195, "grad_norm": 1.1683075428009033, "learning_rate": 8.443485665625888e-05, "loss": 0.019022004306316377, "step": 54860 }, { "epoch": 15.574794209480556, "grad_norm": 2.8779587745666504, "learning_rate": 8.443201816633551e-05, "loss": 0.023167823255062104, "step": 54870 }, { "epoch": 15.577632699403917, "grad_norm": 2.0287134647369385, "learning_rate": 8.442917967641215e-05, "loss": 0.011822560429573059, "step": 54880 }, { "epoch": 15.580471189327278, "grad_norm": 9.444206237792969, "learning_rate": 8.442634118648879e-05, "loss": 0.021786497533321382, "step": 54890 }, { "epoch": 15.58330967925064, "grad_norm": 8.390957832336426, "learning_rate": 8.442350269656543e-05, "loss": 0.02200324833393097, "step": 54900 }, { "epoch": 15.586148169173999, "grad_norm": 3.732095718383789, "learning_rate": 8.442066420664207e-05, "loss": 0.023503723740577697, "step": 54910 }, { "epoch": 15.58898665909736, "grad_norm": 3.187906503677368, "learning_rate": 8.441782571671872e-05, "loss": 0.029564535617828368, "step": 54920 }, { "epoch": 15.591825149020721, "grad_norm": 7.299498081207275, "learning_rate": 8.441498722679534e-05, "loss": 0.01790069341659546, "step": 54930 }, { "epoch": 15.594663638944082, "grad_norm": 6.8374857902526855, "learning_rate": 8.441214873687198e-05, "loss": 0.047354862093925476, "step": 54940 }, { "epoch": 15.597502128867443, "grad_norm": 9.714761734008789, "learning_rate": 8.440931024694863e-05, "loss": 0.017105314135551452, "step": 54950 }, { "epoch": 15.600340618790803, "grad_norm": 11.35418701171875, "learning_rate": 8.440647175702527e-05, "loss": 0.019198209047317505, "step": 54960 }, { "epoch": 15.603179108714164, "grad_norm": 6.233243465423584, "learning_rate": 8.44036332671019e-05, "loss": 0.025196731090545654, "step": 54970 }, { "epoch": 15.606017598637525, "grad_norm": 14.054466247558594, "learning_rate": 8.440079477717855e-05, "loss": 0.03125257194042206, "step": 54980 }, { "epoch": 15.608856088560886, "grad_norm": 1.4121173620224, "learning_rate": 8.439795628725519e-05, "loss": 0.024663817882537842, "step": 54990 }, { "epoch": 15.611694578484247, "grad_norm": 0.5750805735588074, "learning_rate": 8.439511779733182e-05, "loss": 0.030231815576553345, "step": 55000 }, { "epoch": 15.611694578484247, "eval_accuracy": 0.9700515037833026, "eval_loss": 0.10544576495885849, "eval_runtime": 32.4378, "eval_samples_per_second": 484.836, "eval_steps_per_second": 7.584, "step": 55000 }, { "epoch": 15.614533068407606, "grad_norm": 1.4233965873718262, "learning_rate": 8.439227930740846e-05, "loss": 0.00797814130783081, "step": 55010 }, { "epoch": 15.617371558330968, "grad_norm": 1.988924503326416, "learning_rate": 8.43894408174851e-05, "loss": 0.025711697340011597, "step": 55020 }, { "epoch": 15.620210048254329, "grad_norm": 0.9970003962516785, "learning_rate": 8.438660232756173e-05, "loss": 0.007479928433895111, "step": 55030 }, { "epoch": 15.62304853817769, "grad_norm": 0.9416190385818481, "learning_rate": 8.438376383763839e-05, "loss": 0.0127224862575531, "step": 55040 }, { "epoch": 15.625887028101051, "grad_norm": 3.296367645263672, "learning_rate": 8.438092534771503e-05, "loss": 0.02494272291660309, "step": 55050 }, { "epoch": 15.62872551802441, "grad_norm": 7.384988784790039, "learning_rate": 8.437808685779165e-05, "loss": 0.019091954827308653, "step": 55060 }, { "epoch": 15.631564007947771, "grad_norm": 4.1380696296691895, "learning_rate": 8.43752483678683e-05, "loss": 0.02162626087665558, "step": 55070 }, { "epoch": 15.634402497871132, "grad_norm": 1.1643669605255127, "learning_rate": 8.437240987794494e-05, "loss": 0.024815459549427033, "step": 55080 }, { "epoch": 15.637240987794494, "grad_norm": 1.2944620847702026, "learning_rate": 8.436957138802158e-05, "loss": 0.010534656047821046, "step": 55090 }, { "epoch": 15.640079477717855, "grad_norm": 7.2371907234191895, "learning_rate": 8.43667328980982e-05, "loss": 0.01968838572502136, "step": 55100 }, { "epoch": 15.642917967641214, "grad_norm": 6.594340801239014, "learning_rate": 8.436389440817486e-05, "loss": 0.02184571772813797, "step": 55110 }, { "epoch": 15.645756457564575, "grad_norm": 5.328191757202148, "learning_rate": 8.43610559182515e-05, "loss": 0.008599069714546204, "step": 55120 }, { "epoch": 15.648594947487936, "grad_norm": 0.5576837658882141, "learning_rate": 8.435821742832813e-05, "loss": 0.01337897777557373, "step": 55130 }, { "epoch": 15.651433437411297, "grad_norm": 1.9876502752304077, "learning_rate": 8.435537893840477e-05, "loss": 0.009939737617969513, "step": 55140 }, { "epoch": 15.654271927334658, "grad_norm": 0.6790979504585266, "learning_rate": 8.435254044848141e-05, "loss": 0.00718628317117691, "step": 55150 }, { "epoch": 15.65711041725802, "grad_norm": 8.203849792480469, "learning_rate": 8.434970195855804e-05, "loss": 0.02272047698497772, "step": 55160 }, { "epoch": 15.659948907181379, "grad_norm": 3.855579376220703, "learning_rate": 8.43468634686347e-05, "loss": 0.021958163380622862, "step": 55170 }, { "epoch": 15.66278739710474, "grad_norm": 1.0284315347671509, "learning_rate": 8.434402497871134e-05, "loss": 0.02121942937374115, "step": 55180 }, { "epoch": 15.665625887028101, "grad_norm": 1.9154720306396484, "learning_rate": 8.434118648878797e-05, "loss": 0.004219920933246612, "step": 55190 }, { "epoch": 15.668464376951462, "grad_norm": 8.974950790405273, "learning_rate": 8.43383479988646e-05, "loss": 0.014095652103424072, "step": 55200 }, { "epoch": 15.671302866874823, "grad_norm": 5.038712501525879, "learning_rate": 8.433550950894125e-05, "loss": 0.03908676505088806, "step": 55210 }, { "epoch": 15.674141356798183, "grad_norm": 0.1898447722196579, "learning_rate": 8.433267101901789e-05, "loss": 0.0050050333142280575, "step": 55220 }, { "epoch": 15.676979846721544, "grad_norm": 0.24637170135974884, "learning_rate": 8.432983252909452e-05, "loss": 0.02360936105251312, "step": 55230 }, { "epoch": 15.679818336644905, "grad_norm": 1.6840083599090576, "learning_rate": 8.432699403917117e-05, "loss": 0.007495050877332687, "step": 55240 }, { "epoch": 15.682656826568266, "grad_norm": 1.9523612260818481, "learning_rate": 8.432415554924781e-05, "loss": 0.01523342877626419, "step": 55250 }, { "epoch": 15.685495316491627, "grad_norm": 6.884871959686279, "learning_rate": 8.432131705932444e-05, "loss": 0.012205477058887481, "step": 55260 }, { "epoch": 15.688333806414986, "grad_norm": 1.013818621635437, "learning_rate": 8.431847856940108e-05, "loss": 0.010158887505531311, "step": 55270 }, { "epoch": 15.691172296338348, "grad_norm": 2.2827157974243164, "learning_rate": 8.431564007947772e-05, "loss": 0.019506782293319702, "step": 55280 }, { "epoch": 15.694010786261709, "grad_norm": 11.589195251464844, "learning_rate": 8.431280158955435e-05, "loss": 0.035142982006073, "step": 55290 }, { "epoch": 15.69684927618507, "grad_norm": 0.6367776393890381, "learning_rate": 8.430996309963099e-05, "loss": 0.02003236413002014, "step": 55300 }, { "epoch": 15.69968776610843, "grad_norm": 5.198462963104248, "learning_rate": 8.430712460970765e-05, "loss": 0.009380662441253662, "step": 55310 }, { "epoch": 15.702526256031792, "grad_norm": 8.88513469696045, "learning_rate": 8.430428611978428e-05, "loss": 0.016110339760780336, "step": 55320 }, { "epoch": 15.705364745955151, "grad_norm": 0.13698969781398773, "learning_rate": 8.430144762986092e-05, "loss": 0.014398828148841858, "step": 55330 }, { "epoch": 15.708203235878512, "grad_norm": 0.2826803922653198, "learning_rate": 8.429860913993756e-05, "loss": 0.02325546443462372, "step": 55340 }, { "epoch": 15.711041725801874, "grad_norm": 0.8159944415092468, "learning_rate": 8.42957706500142e-05, "loss": 0.020225876569747926, "step": 55350 }, { "epoch": 15.713880215725235, "grad_norm": 2.624211072921753, "learning_rate": 8.429293216009083e-05, "loss": 0.015441977977752685, "step": 55360 }, { "epoch": 15.716718705648596, "grad_norm": 6.881081581115723, "learning_rate": 8.429009367016748e-05, "loss": 0.02520582973957062, "step": 55370 }, { "epoch": 15.719557195571955, "grad_norm": 0.5079571604728699, "learning_rate": 8.428725518024412e-05, "loss": 0.01600262224674225, "step": 55380 }, { "epoch": 15.722395685495316, "grad_norm": 1.5323145389556885, "learning_rate": 8.428441669032075e-05, "loss": 0.02864490747451782, "step": 55390 }, { "epoch": 15.725234175418677, "grad_norm": 13.976609230041504, "learning_rate": 8.42815782003974e-05, "loss": 0.03871245086193085, "step": 55400 }, { "epoch": 15.728072665342038, "grad_norm": 11.269880294799805, "learning_rate": 8.427873971047403e-05, "loss": 0.02676869332790375, "step": 55410 }, { "epoch": 15.7309111552654, "grad_norm": 6.091939926147461, "learning_rate": 8.427590122055066e-05, "loss": 0.04925175607204437, "step": 55420 }, { "epoch": 15.733749645188759, "grad_norm": 0.9609665870666504, "learning_rate": 8.42730627306273e-05, "loss": 0.016929684579372405, "step": 55430 }, { "epoch": 15.73658813511212, "grad_norm": 0.9189953804016113, "learning_rate": 8.427022424070396e-05, "loss": 0.02172975242137909, "step": 55440 }, { "epoch": 15.739426625035481, "grad_norm": 0.7563531994819641, "learning_rate": 8.426738575078059e-05, "loss": 0.011702787876129151, "step": 55450 }, { "epoch": 15.742265114958842, "grad_norm": 7.579868793487549, "learning_rate": 8.426454726085723e-05, "loss": 0.013877998292446136, "step": 55460 }, { "epoch": 15.745103604882203, "grad_norm": 1.173156976699829, "learning_rate": 8.426170877093387e-05, "loss": 0.015098394453525543, "step": 55470 }, { "epoch": 15.747942094805563, "grad_norm": 1.419605016708374, "learning_rate": 8.425887028101051e-05, "loss": 0.011031854897737503, "step": 55480 }, { "epoch": 15.750780584728924, "grad_norm": 7.805703639984131, "learning_rate": 8.425603179108714e-05, "loss": 0.024576972424983978, "step": 55490 }, { "epoch": 15.753619074652285, "grad_norm": 0.7667238712310791, "learning_rate": 8.425319330116378e-05, "loss": 0.015779134631156922, "step": 55500 }, { "epoch": 15.753619074652285, "eval_accuracy": 0.972722070324919, "eval_loss": 0.08809272944927216, "eval_runtime": 34.0149, "eval_samples_per_second": 462.357, "eval_steps_per_second": 7.232, "step": 55500 }, { "epoch": 15.756457564575646, "grad_norm": 1.7011533975601196, "learning_rate": 8.425035481124043e-05, "loss": 0.016909709572792052, "step": 55510 }, { "epoch": 15.759296054499007, "grad_norm": 1.6607505083084106, "learning_rate": 8.424751632131706e-05, "loss": 0.03629730343818664, "step": 55520 }, { "epoch": 15.762134544422366, "grad_norm": 0.37752634286880493, "learning_rate": 8.42446778313937e-05, "loss": 0.02022552043199539, "step": 55530 }, { "epoch": 15.764973034345727, "grad_norm": 0.7922051548957825, "learning_rate": 8.424183934147035e-05, "loss": 0.008992015570402145, "step": 55540 }, { "epoch": 15.767811524269089, "grad_norm": 5.357341289520264, "learning_rate": 8.423900085154697e-05, "loss": 0.014293225109577179, "step": 55550 }, { "epoch": 15.77065001419245, "grad_norm": 2.102315664291382, "learning_rate": 8.423616236162361e-05, "loss": 0.017306053638458253, "step": 55560 }, { "epoch": 15.77348850411581, "grad_norm": 8.418023109436035, "learning_rate": 8.423332387170027e-05, "loss": 0.012293355911970139, "step": 55570 }, { "epoch": 15.776326994039172, "grad_norm": 9.959659576416016, "learning_rate": 8.42304853817769e-05, "loss": 0.01866167187690735, "step": 55580 }, { "epoch": 15.779165483962531, "grad_norm": 1.7523664236068726, "learning_rate": 8.422764689185354e-05, "loss": 0.02061079889535904, "step": 55590 }, { "epoch": 15.782003973885892, "grad_norm": 7.5381574630737305, "learning_rate": 8.422480840193018e-05, "loss": 0.01902843117713928, "step": 55600 }, { "epoch": 15.784842463809253, "grad_norm": 3.277225971221924, "learning_rate": 8.422196991200682e-05, "loss": 0.02464725524187088, "step": 55610 }, { "epoch": 15.787680953732615, "grad_norm": 5.123229503631592, "learning_rate": 8.421913142208345e-05, "loss": 0.015399637818336486, "step": 55620 }, { "epoch": 15.790519443655976, "grad_norm": 6.134268283843994, "learning_rate": 8.421629293216009e-05, "loss": 0.012647409737110139, "step": 55630 }, { "epoch": 15.793357933579335, "grad_norm": 3.2676146030426025, "learning_rate": 8.421345444223675e-05, "loss": 0.009018808603286743, "step": 55640 }, { "epoch": 15.796196423502696, "grad_norm": 0.753380537033081, "learning_rate": 8.421061595231337e-05, "loss": 0.02446572333574295, "step": 55650 }, { "epoch": 15.799034913426057, "grad_norm": 3.158764362335205, "learning_rate": 8.420777746239001e-05, "loss": 0.011283954232931137, "step": 55660 }, { "epoch": 15.801873403349418, "grad_norm": 5.813663482666016, "learning_rate": 8.420493897246666e-05, "loss": 0.012087101489305497, "step": 55670 }, { "epoch": 15.80471189327278, "grad_norm": 2.0024898052215576, "learning_rate": 8.420210048254328e-05, "loss": 0.009876446425914764, "step": 55680 }, { "epoch": 15.80755038319614, "grad_norm": 9.684225082397461, "learning_rate": 8.419926199261993e-05, "loss": 0.020591318607330322, "step": 55690 }, { "epoch": 15.8103888731195, "grad_norm": 2.91853666305542, "learning_rate": 8.419642350269657e-05, "loss": 0.025776073336601257, "step": 55700 }, { "epoch": 15.813227363042861, "grad_norm": 1.1405210494995117, "learning_rate": 8.419358501277321e-05, "loss": 0.01633152812719345, "step": 55710 }, { "epoch": 15.816065852966222, "grad_norm": 3.071894407272339, "learning_rate": 8.419074652284985e-05, "loss": 0.011122031509876252, "step": 55720 }, { "epoch": 15.818904342889583, "grad_norm": 0.8268190622329712, "learning_rate": 8.418790803292649e-05, "loss": 0.017262043058872224, "step": 55730 }, { "epoch": 15.821742832812944, "grad_norm": 6.498647212982178, "learning_rate": 8.418506954300313e-05, "loss": 0.03717512488365173, "step": 55740 }, { "epoch": 15.824581322736304, "grad_norm": 0.8219181299209595, "learning_rate": 8.418223105307976e-05, "loss": 0.023094086349010466, "step": 55750 }, { "epoch": 15.827419812659665, "grad_norm": 0.5661934614181519, "learning_rate": 8.41793925631564e-05, "loss": 0.014275072515010834, "step": 55760 }, { "epoch": 15.830258302583026, "grad_norm": 2.2975223064422607, "learning_rate": 8.417655407323304e-05, "loss": 0.02451215833425522, "step": 55770 }, { "epoch": 15.833096792506387, "grad_norm": 5.216251850128174, "learning_rate": 8.417371558330968e-05, "loss": 0.017093530297279357, "step": 55780 }, { "epoch": 15.835935282429748, "grad_norm": 2.4824378490448, "learning_rate": 8.417087709338633e-05, "loss": 0.019049453735351562, "step": 55790 }, { "epoch": 15.838773772353107, "grad_norm": 0.4735737144947052, "learning_rate": 8.416803860346297e-05, "loss": 0.007635672390460968, "step": 55800 }, { "epoch": 15.841612262276469, "grad_norm": 2.07071852684021, "learning_rate": 8.41652001135396e-05, "loss": 0.020655842125415803, "step": 55810 }, { "epoch": 15.84445075219983, "grad_norm": 2.036442756652832, "learning_rate": 8.416236162361624e-05, "loss": 0.010478053987026215, "step": 55820 }, { "epoch": 15.84728924212319, "grad_norm": 4.677788734436035, "learning_rate": 8.415952313369288e-05, "loss": 0.029353156685829163, "step": 55830 }, { "epoch": 15.850127732046552, "grad_norm": 0.871347963809967, "learning_rate": 8.415668464376952e-05, "loss": 0.013115771114826202, "step": 55840 }, { "epoch": 15.852966221969911, "grad_norm": 12.381253242492676, "learning_rate": 8.415384615384616e-05, "loss": 0.021936774253845215, "step": 55850 }, { "epoch": 15.855804711893272, "grad_norm": 0.4421146512031555, "learning_rate": 8.41510076639228e-05, "loss": 0.01272898018360138, "step": 55860 }, { "epoch": 15.858643201816633, "grad_norm": 5.484533786773682, "learning_rate": 8.414816917399943e-05, "loss": 0.018179978430271148, "step": 55870 }, { "epoch": 15.861481691739995, "grad_norm": 4.60025691986084, "learning_rate": 8.414533068407607e-05, "loss": 0.004126018285751343, "step": 55880 }, { "epoch": 15.864320181663356, "grad_norm": 7.5523176193237305, "learning_rate": 8.414249219415271e-05, "loss": 0.008252470195293427, "step": 55890 }, { "epoch": 15.867158671586715, "grad_norm": 0.4455030858516693, "learning_rate": 8.413965370422935e-05, "loss": 0.02825177609920502, "step": 55900 }, { "epoch": 15.869997161510076, "grad_norm": 0.8027476072311401, "learning_rate": 8.4136815214306e-05, "loss": 0.022722099721431733, "step": 55910 }, { "epoch": 15.872835651433437, "grad_norm": 8.973291397094727, "learning_rate": 8.413397672438264e-05, "loss": 0.03177742660045624, "step": 55920 }, { "epoch": 15.875674141356798, "grad_norm": 3.293757915496826, "learning_rate": 8.413113823445928e-05, "loss": 0.024587744474411012, "step": 55930 }, { "epoch": 15.87851263128016, "grad_norm": 0.29877614974975586, "learning_rate": 8.41282997445359e-05, "loss": 0.012386366724967957, "step": 55940 }, { "epoch": 15.88135112120352, "grad_norm": 3.581861734390259, "learning_rate": 8.412546125461255e-05, "loss": 0.014319147169589996, "step": 55950 }, { "epoch": 15.88418961112688, "grad_norm": 8.76008129119873, "learning_rate": 8.412262276468919e-05, "loss": 0.021250660717487335, "step": 55960 }, { "epoch": 15.887028101050241, "grad_norm": 0.25301527976989746, "learning_rate": 8.411978427476583e-05, "loss": 0.029129084944725037, "step": 55970 }, { "epoch": 15.889866590973602, "grad_norm": 2.3160195350646973, "learning_rate": 8.411694578484247e-05, "loss": 0.020576286315917968, "step": 55980 }, { "epoch": 15.892705080896963, "grad_norm": 3.26794171333313, "learning_rate": 8.411410729491911e-05, "loss": 0.038111436367034915, "step": 55990 }, { "epoch": 15.895543570820324, "grad_norm": 0.4757935404777527, "learning_rate": 8.411126880499574e-05, "loss": 0.01833450198173523, "step": 56000 }, { "epoch": 15.895543570820324, "eval_accuracy": 0.9717047116423985, "eval_loss": 0.0951467975974083, "eval_runtime": 34.3056, "eval_samples_per_second": 458.439, "eval_steps_per_second": 7.171, "step": 56000 }, { "epoch": 15.898382060743684, "grad_norm": 6.897797584533691, "learning_rate": 8.410843031507238e-05, "loss": 0.019877709448337555, "step": 56010 }, { "epoch": 15.901220550667045, "grad_norm": 8.808621406555176, "learning_rate": 8.410559182514902e-05, "loss": 0.029711833596229552, "step": 56020 }, { "epoch": 15.904059040590406, "grad_norm": 7.439000129699707, "learning_rate": 8.410275333522566e-05, "loss": 0.03692132830619812, "step": 56030 }, { "epoch": 15.906897530513767, "grad_norm": 1.089818000793457, "learning_rate": 8.40999148453023e-05, "loss": 0.018920217454433442, "step": 56040 }, { "epoch": 15.909736020437128, "grad_norm": 0.8006610870361328, "learning_rate": 8.409707635537895e-05, "loss": 0.012119705975055694, "step": 56050 }, { "epoch": 15.912574510360487, "grad_norm": 3.096811294555664, "learning_rate": 8.409423786545559e-05, "loss": 0.015306989848613738, "step": 56060 }, { "epoch": 15.915413000283849, "grad_norm": 1.861289620399475, "learning_rate": 8.409139937553222e-05, "loss": 0.031222766637802123, "step": 56070 }, { "epoch": 15.91825149020721, "grad_norm": 1.7287468910217285, "learning_rate": 8.408856088560886e-05, "loss": 0.02330026924610138, "step": 56080 }, { "epoch": 15.92108998013057, "grad_norm": 0.387186199426651, "learning_rate": 8.40857223956855e-05, "loss": 0.030041831731796264, "step": 56090 }, { "epoch": 15.923928470053932, "grad_norm": 0.9799952507019043, "learning_rate": 8.408288390576213e-05, "loss": 0.03200510442256928, "step": 56100 }, { "epoch": 15.926766959977293, "grad_norm": 1.9243007898330688, "learning_rate": 8.408004541583878e-05, "loss": 0.0373708188533783, "step": 56110 }, { "epoch": 15.929605449900652, "grad_norm": 8.865007400512695, "learning_rate": 8.407720692591542e-05, "loss": 0.024199195206165314, "step": 56120 }, { "epoch": 15.932443939824013, "grad_norm": 4.830738544464111, "learning_rate": 8.407436843599205e-05, "loss": 0.021696978807449342, "step": 56130 }, { "epoch": 15.935282429747375, "grad_norm": 2.1903741359710693, "learning_rate": 8.407152994606869e-05, "loss": 0.01798231303691864, "step": 56140 }, { "epoch": 15.938120919670736, "grad_norm": 4.030086517333984, "learning_rate": 8.406869145614533e-05, "loss": 0.024228760600090028, "step": 56150 }, { "epoch": 15.940959409594097, "grad_norm": 0.516659140586853, "learning_rate": 8.406585296622198e-05, "loss": 0.010312351584434509, "step": 56160 }, { "epoch": 15.943797899517456, "grad_norm": 4.089479923248291, "learning_rate": 8.406301447629862e-05, "loss": 0.024681949615478517, "step": 56170 }, { "epoch": 15.946636389440817, "grad_norm": 13.480799674987793, "learning_rate": 8.406017598637526e-05, "loss": 0.03466725945472717, "step": 56180 }, { "epoch": 15.949474879364178, "grad_norm": 4.390480995178223, "learning_rate": 8.40573374964519e-05, "loss": 0.04067705571651459, "step": 56190 }, { "epoch": 15.95231336928754, "grad_norm": 7.408836364746094, "learning_rate": 8.405449900652853e-05, "loss": 0.03349973261356354, "step": 56200 }, { "epoch": 15.9551518592109, "grad_norm": 9.587178230285645, "learning_rate": 8.405166051660517e-05, "loss": 0.025940483808517455, "step": 56210 }, { "epoch": 15.95799034913426, "grad_norm": 2.514967441558838, "learning_rate": 8.404882202668181e-05, "loss": 0.006768158078193665, "step": 56220 }, { "epoch": 15.960828839057621, "grad_norm": 12.849459648132324, "learning_rate": 8.404598353675844e-05, "loss": 0.02530829608440399, "step": 56230 }, { "epoch": 15.963667328980982, "grad_norm": 8.281516075134277, "learning_rate": 8.404314504683509e-05, "loss": 0.016662207245826722, "step": 56240 }, { "epoch": 15.966505818904343, "grad_norm": 0.2478809654712677, "learning_rate": 8.404030655691173e-05, "loss": 0.015737594664096834, "step": 56250 }, { "epoch": 15.969344308827704, "grad_norm": 13.001413345336914, "learning_rate": 8.403746806698836e-05, "loss": 0.03403189778327942, "step": 56260 }, { "epoch": 15.972182798751064, "grad_norm": 0.37501904368400574, "learning_rate": 8.4034629577065e-05, "loss": 0.012385854870080948, "step": 56270 }, { "epoch": 15.975021288674425, "grad_norm": 2.4331798553466797, "learning_rate": 8.403179108714164e-05, "loss": 0.03984523713588715, "step": 56280 }, { "epoch": 15.977859778597786, "grad_norm": 4.730863571166992, "learning_rate": 8.402895259721829e-05, "loss": 0.028044188022613527, "step": 56290 }, { "epoch": 15.980698268521147, "grad_norm": 12.803302764892578, "learning_rate": 8.402611410729491e-05, "loss": 0.021888057887554168, "step": 56300 }, { "epoch": 15.983536758444508, "grad_norm": 3.0311341285705566, "learning_rate": 8.402327561737157e-05, "loss": 0.0495830774307251, "step": 56310 }, { "epoch": 15.986375248367867, "grad_norm": 9.925941467285156, "learning_rate": 8.402043712744821e-05, "loss": 0.05141599178314209, "step": 56320 }, { "epoch": 15.989213738291228, "grad_norm": 1.1655880212783813, "learning_rate": 8.401759863752484e-05, "loss": 0.018808133900165558, "step": 56330 }, { "epoch": 15.99205222821459, "grad_norm": 2.2175683975219727, "learning_rate": 8.401476014760148e-05, "loss": 0.009132824838161469, "step": 56340 }, { "epoch": 15.99489071813795, "grad_norm": 3.056271553039551, "learning_rate": 8.401192165767812e-05, "loss": 0.006186390295624733, "step": 56350 }, { "epoch": 15.997729208061312, "grad_norm": 2.4351301193237305, "learning_rate": 8.400908316775475e-05, "loss": 0.018547897040843964, "step": 56360 }, { "epoch": 16.00056769798467, "grad_norm": 0.8901607394218445, "learning_rate": 8.40062446778314e-05, "loss": 0.021423055231571196, "step": 56370 }, { "epoch": 16.003406187908034, "grad_norm": 2.985861301422119, "learning_rate": 8.400340618790804e-05, "loss": 0.008821921050548553, "step": 56380 }, { "epoch": 16.006244677831393, "grad_norm": 1.968876838684082, "learning_rate": 8.400056769798467e-05, "loss": 0.01445077806711197, "step": 56390 }, { "epoch": 16.009083167754753, "grad_norm": 0.5305187106132507, "learning_rate": 8.399772920806131e-05, "loss": 0.01731486916542053, "step": 56400 }, { "epoch": 16.011921657678116, "grad_norm": 9.112597465515137, "learning_rate": 8.399489071813796e-05, "loss": 0.011555841565132141, "step": 56410 }, { "epoch": 16.014760147601475, "grad_norm": 0.9991663694381714, "learning_rate": 8.39920522282146e-05, "loss": 0.019527646899223327, "step": 56420 }, { "epoch": 16.017598637524838, "grad_norm": 9.205231666564941, "learning_rate": 8.398921373829122e-05, "loss": 0.01935082972049713, "step": 56430 }, { "epoch": 16.020437127448197, "grad_norm": 8.980486869812012, "learning_rate": 8.398637524836788e-05, "loss": 0.03393546938896179, "step": 56440 }, { "epoch": 16.02327561737156, "grad_norm": 13.303410530090332, "learning_rate": 8.398353675844452e-05, "loss": 0.03439453542232514, "step": 56450 }, { "epoch": 16.02611410729492, "grad_norm": 1.3741915225982666, "learning_rate": 8.398069826852115e-05, "loss": 0.013812722265720367, "step": 56460 }, { "epoch": 16.02895259721828, "grad_norm": 1.7623605728149414, "learning_rate": 8.397785977859779e-05, "loss": 0.0031712956726551054, "step": 56470 }, { "epoch": 16.03179108714164, "grad_norm": 0.4614761769771576, "learning_rate": 8.397502128867443e-05, "loss": 0.011800993978977204, "step": 56480 }, { "epoch": 16.034629577065, "grad_norm": 1.569084882736206, "learning_rate": 8.397218279875106e-05, "loss": 0.0159055694937706, "step": 56490 }, { "epoch": 16.037468066988364, "grad_norm": 8.473098754882812, "learning_rate": 8.396934430882771e-05, "loss": 0.020232082903385164, "step": 56500 }, { "epoch": 16.037468066988364, "eval_accuracy": 0.967953201500604, "eval_loss": 0.10747475922107697, "eval_runtime": 32.1639, "eval_samples_per_second": 488.964, "eval_steps_per_second": 7.648, "step": 56500 }, { "epoch": 16.040306556911723, "grad_norm": 10.859273910522461, "learning_rate": 8.396650581890436e-05, "loss": 0.011334293335676194, "step": 56510 }, { "epoch": 16.043145046835082, "grad_norm": 0.1807185560464859, "learning_rate": 8.396366732898098e-05, "loss": 0.022573040425777437, "step": 56520 }, { "epoch": 16.045983536758445, "grad_norm": 0.4650689363479614, "learning_rate": 8.396082883905762e-05, "loss": 0.009868885576725005, "step": 56530 }, { "epoch": 16.048822026681805, "grad_norm": 6.620707035064697, "learning_rate": 8.395799034913427e-05, "loss": 0.006265765428543091, "step": 56540 }, { "epoch": 16.051660516605168, "grad_norm": 2.824350595474243, "learning_rate": 8.395515185921091e-05, "loss": 0.014321784675121307, "step": 56550 }, { "epoch": 16.054499006528527, "grad_norm": 1.1856701374053955, "learning_rate": 8.395231336928754e-05, "loss": 0.026601141691207884, "step": 56560 }, { "epoch": 16.057337496451886, "grad_norm": 7.4138569831848145, "learning_rate": 8.394947487936419e-05, "loss": 0.010401749610900879, "step": 56570 }, { "epoch": 16.06017598637525, "grad_norm": 0.8966770768165588, "learning_rate": 8.394663638944083e-05, "loss": 0.01199834793806076, "step": 56580 }, { "epoch": 16.06301447629861, "grad_norm": 7.913590431213379, "learning_rate": 8.394379789951746e-05, "loss": 0.02560151517391205, "step": 56590 }, { "epoch": 16.06585296622197, "grad_norm": 6.25996208190918, "learning_rate": 8.39409594095941e-05, "loss": 0.010432422161102295, "step": 56600 }, { "epoch": 16.06869145614533, "grad_norm": 5.76288366317749, "learning_rate": 8.393812091967074e-05, "loss": 0.022164277732372284, "step": 56610 }, { "epoch": 16.07152994606869, "grad_norm": 1.8224483728408813, "learning_rate": 8.393528242974737e-05, "loss": 0.0068722285330295564, "step": 56620 }, { "epoch": 16.074368435992053, "grad_norm": 2.7723124027252197, "learning_rate": 8.393244393982401e-05, "loss": 0.01712132841348648, "step": 56630 }, { "epoch": 16.077206925915412, "grad_norm": 0.07034726440906525, "learning_rate": 8.392960544990067e-05, "loss": 0.02281448096036911, "step": 56640 }, { "epoch": 16.080045415838775, "grad_norm": 0.4675564765930176, "learning_rate": 8.39267669599773e-05, "loss": 0.002896893210709095, "step": 56650 }, { "epoch": 16.082883905762134, "grad_norm": 4.230059623718262, "learning_rate": 8.392392847005394e-05, "loss": 0.02167966663837433, "step": 56660 }, { "epoch": 16.085722395685494, "grad_norm": 0.5504025816917419, "learning_rate": 8.392108998013058e-05, "loss": 0.012623390555381775, "step": 56670 }, { "epoch": 16.088560885608857, "grad_norm": 0.33202457427978516, "learning_rate": 8.391825149020722e-05, "loss": 0.00394495390355587, "step": 56680 }, { "epoch": 16.091399375532216, "grad_norm": 3.570882558822632, "learning_rate": 8.391541300028385e-05, "loss": 0.008053090423345566, "step": 56690 }, { "epoch": 16.09423786545558, "grad_norm": 0.6065452098846436, "learning_rate": 8.39125745103605e-05, "loss": 0.01355886459350586, "step": 56700 }, { "epoch": 16.097076355378938, "grad_norm": 3.7696828842163086, "learning_rate": 8.390973602043713e-05, "loss": 0.011086444556713104, "step": 56710 }, { "epoch": 16.099914845302298, "grad_norm": 4.3725104331970215, "learning_rate": 8.390689753051377e-05, "loss": 0.01031377837061882, "step": 56720 }, { "epoch": 16.10275333522566, "grad_norm": 1.8314452171325684, "learning_rate": 8.390405904059041e-05, "loss": 0.017850810289382936, "step": 56730 }, { "epoch": 16.10559182514902, "grad_norm": 0.5787264704704285, "learning_rate": 8.390122055066705e-05, "loss": 0.02042980194091797, "step": 56740 }, { "epoch": 16.108430315072383, "grad_norm": 5.685617446899414, "learning_rate": 8.389838206074368e-05, "loss": 0.016629022359848023, "step": 56750 }, { "epoch": 16.111268804995742, "grad_norm": 6.529917240142822, "learning_rate": 8.389554357082032e-05, "loss": 0.015001729130744934, "step": 56760 }, { "epoch": 16.1141072949191, "grad_norm": 0.27390769124031067, "learning_rate": 8.389270508089698e-05, "loss": 0.013690505921840668, "step": 56770 }, { "epoch": 16.116945784842464, "grad_norm": 4.43182897567749, "learning_rate": 8.38898665909736e-05, "loss": 0.015754292905330657, "step": 56780 }, { "epoch": 16.119784274765824, "grad_norm": 0.6521686315536499, "learning_rate": 8.388702810105025e-05, "loss": 0.01274714469909668, "step": 56790 }, { "epoch": 16.122622764689186, "grad_norm": 11.09865665435791, "learning_rate": 8.388418961112689e-05, "loss": 0.025448402762413024, "step": 56800 }, { "epoch": 16.125461254612546, "grad_norm": 6.098222255706787, "learning_rate": 8.388135112120352e-05, "loss": 0.012801066040992737, "step": 56810 }, { "epoch": 16.12829974453591, "grad_norm": 6.685745716094971, "learning_rate": 8.387851263128016e-05, "loss": 0.015591798722743988, "step": 56820 }, { "epoch": 16.131138234459268, "grad_norm": 1.9449549913406372, "learning_rate": 8.38756741413568e-05, "loss": 0.010579733550548554, "step": 56830 }, { "epoch": 16.133976724382627, "grad_norm": 3.4649441242218018, "learning_rate": 8.387283565143344e-05, "loss": 0.007915187627077103, "step": 56840 }, { "epoch": 16.13681521430599, "grad_norm": 2.9308435916900635, "learning_rate": 8.386999716151008e-05, "loss": 0.01589634269475937, "step": 56850 }, { "epoch": 16.13965370422935, "grad_norm": 0.8005471229553223, "learning_rate": 8.386715867158672e-05, "loss": 0.023281852900981902, "step": 56860 }, { "epoch": 16.142492194152712, "grad_norm": 0.29310187697410583, "learning_rate": 8.386432018166336e-05, "loss": 0.011479278653860092, "step": 56870 }, { "epoch": 16.14533068407607, "grad_norm": 0.052964385598897934, "learning_rate": 8.386148169173999e-05, "loss": 0.0024305464699864387, "step": 56880 }, { "epoch": 16.14816917399943, "grad_norm": 0.8092339038848877, "learning_rate": 8.385864320181663e-05, "loss": 0.0064267762005329136, "step": 56890 }, { "epoch": 16.151007663922794, "grad_norm": 0.38532713055610657, "learning_rate": 8.385580471189329e-05, "loss": 0.020493978261947633, "step": 56900 }, { "epoch": 16.153846153846153, "grad_norm": 1.4107842445373535, "learning_rate": 8.385296622196992e-05, "loss": 0.014097584784030915, "step": 56910 }, { "epoch": 16.156684643769516, "grad_norm": 1.278185486793518, "learning_rate": 8.385012773204656e-05, "loss": 0.010156059265136718, "step": 56920 }, { "epoch": 16.159523133692876, "grad_norm": 15.548835754394531, "learning_rate": 8.38472892421232e-05, "loss": 0.03586422204971314, "step": 56930 }, { "epoch": 16.162361623616235, "grad_norm": 1.8588777780532837, "learning_rate": 8.384445075219983e-05, "loss": 0.010122347623109818, "step": 56940 }, { "epoch": 16.165200113539598, "grad_norm": 13.782515525817871, "learning_rate": 8.384161226227647e-05, "loss": 0.026955407857894898, "step": 56950 }, { "epoch": 16.168038603462957, "grad_norm": 4.238024711608887, "learning_rate": 8.383877377235311e-05, "loss": 0.022244331240653992, "step": 56960 }, { "epoch": 16.17087709338632, "grad_norm": 1.2032158374786377, "learning_rate": 8.383593528242975e-05, "loss": 0.02435516119003296, "step": 56970 }, { "epoch": 16.17371558330968, "grad_norm": 10.610177040100098, "learning_rate": 8.383309679250639e-05, "loss": 0.025236564874649047, "step": 56980 }, { "epoch": 16.17655407323304, "grad_norm": 0.7117663621902466, "learning_rate": 8.383025830258303e-05, "loss": 0.019537344574928284, "step": 56990 }, { "epoch": 16.1793925631564, "grad_norm": 2.3215038776397705, "learning_rate": 8.382741981265967e-05, "loss": 0.015648584067821502, "step": 57000 }, { "epoch": 16.1793925631564, "eval_accuracy": 0.9684618808418644, "eval_loss": 0.10522115975618362, "eval_runtime": 32.5765, "eval_samples_per_second": 482.771, "eval_steps_per_second": 7.551, "step": 57000 }, { "epoch": 16.18223105307976, "grad_norm": 3.1272974014282227, "learning_rate": 8.38245813227363e-05, "loss": 0.025592753291130067, "step": 57010 }, { "epoch": 16.185069543003124, "grad_norm": 0.5658525228500366, "learning_rate": 8.382174283281294e-05, "loss": 0.009431424736976623, "step": 57020 }, { "epoch": 16.187908032926483, "grad_norm": 7.017989158630371, "learning_rate": 8.381890434288958e-05, "loss": 0.016604286432266236, "step": 57030 }, { "epoch": 16.190746522849842, "grad_norm": 1.4445891380310059, "learning_rate": 8.381606585296623e-05, "loss": 0.020924516022205353, "step": 57040 }, { "epoch": 16.193585012773205, "grad_norm": 0.44732800126075745, "learning_rate": 8.381322736304287e-05, "loss": 0.009089373052120209, "step": 57050 }, { "epoch": 16.196423502696565, "grad_norm": 2.191286563873291, "learning_rate": 8.381038887311951e-05, "loss": 0.007824166864156722, "step": 57060 }, { "epoch": 16.199261992619927, "grad_norm": 2.083449125289917, "learning_rate": 8.380755038319614e-05, "loss": 0.024605640769004823, "step": 57070 }, { "epoch": 16.202100482543287, "grad_norm": 6.767728805541992, "learning_rate": 8.380471189327278e-05, "loss": 0.02410988658666611, "step": 57080 }, { "epoch": 16.204938972466646, "grad_norm": 0.8593205213546753, "learning_rate": 8.380215725234176e-05, "loss": 0.017685487866401672, "step": 57090 }, { "epoch": 16.20777746239001, "grad_norm": 0.786054253578186, "learning_rate": 8.37993187624184e-05, "loss": 0.013179653882980346, "step": 57100 }, { "epoch": 16.21061595231337, "grad_norm": 5.848886966705322, "learning_rate": 8.379648027249504e-05, "loss": 0.011715566366910934, "step": 57110 }, { "epoch": 16.21345444223673, "grad_norm": 1.0502969026565552, "learning_rate": 8.379364178257167e-05, "loss": 0.012504464387893677, "step": 57120 }, { "epoch": 16.21629293216009, "grad_norm": 3.7211461067199707, "learning_rate": 8.379080329264831e-05, "loss": 0.005830828845500946, "step": 57130 }, { "epoch": 16.21913142208345, "grad_norm": 8.890707969665527, "learning_rate": 8.378796480272495e-05, "loss": 0.01786009669303894, "step": 57140 }, { "epoch": 16.221969912006813, "grad_norm": 2.393730878829956, "learning_rate": 8.378512631280159e-05, "loss": 0.021514545381069183, "step": 57150 }, { "epoch": 16.224808401930172, "grad_norm": 2.6597983837127686, "learning_rate": 8.378228782287823e-05, "loss": 0.020632481575012206, "step": 57160 }, { "epoch": 16.227646891853535, "grad_norm": 0.9049206972122192, "learning_rate": 8.377944933295488e-05, "loss": 0.008988716453313828, "step": 57170 }, { "epoch": 16.230485381776894, "grad_norm": 1.4707049131393433, "learning_rate": 8.377661084303152e-05, "loss": 0.013538409769535065, "step": 57180 }, { "epoch": 16.233323871700254, "grad_norm": 0.7155722379684448, "learning_rate": 8.377377235310814e-05, "loss": 0.011087857931852341, "step": 57190 }, { "epoch": 16.236162361623617, "grad_norm": 0.4963923394680023, "learning_rate": 8.377093386318479e-05, "loss": 0.012855373322963715, "step": 57200 }, { "epoch": 16.239000851546976, "grad_norm": 3.4948840141296387, "learning_rate": 8.376809537326143e-05, "loss": 0.026183274388313294, "step": 57210 }, { "epoch": 16.24183934147034, "grad_norm": 1.185808539390564, "learning_rate": 8.376525688333807e-05, "loss": 0.017835700511932374, "step": 57220 }, { "epoch": 16.244677831393698, "grad_norm": 0.7829822301864624, "learning_rate": 8.376241839341471e-05, "loss": 0.00820973664522171, "step": 57230 }, { "epoch": 16.24751632131706, "grad_norm": 0.20171129703521729, "learning_rate": 8.375957990349135e-05, "loss": 0.0032123152166604997, "step": 57240 }, { "epoch": 16.25035481124042, "grad_norm": 11.950754165649414, "learning_rate": 8.375674141356798e-05, "loss": 0.019322913885116578, "step": 57250 }, { "epoch": 16.25319330116378, "grad_norm": 0.5684473514556885, "learning_rate": 8.375418677263697e-05, "loss": 0.008164143562316895, "step": 57260 }, { "epoch": 16.256031791087143, "grad_norm": 0.08749204128980637, "learning_rate": 8.37513482827136e-05, "loss": 0.010122041404247283, "step": 57270 }, { "epoch": 16.258870281010502, "grad_norm": 1.1085296869277954, "learning_rate": 8.374850979279024e-05, "loss": 0.028220856189727785, "step": 57280 }, { "epoch": 16.261708770933865, "grad_norm": 0.19881758093833923, "learning_rate": 8.374567130286688e-05, "loss": 0.009944482147693634, "step": 57290 }, { "epoch": 16.264547260857224, "grad_norm": 0.7458714842796326, "learning_rate": 8.374283281294351e-05, "loss": 0.020114386081695558, "step": 57300 }, { "epoch": 16.267385750780583, "grad_norm": 6.512213230133057, "learning_rate": 8.373999432302015e-05, "loss": 0.005816457420587539, "step": 57310 }, { "epoch": 16.270224240703946, "grad_norm": 3.4642205238342285, "learning_rate": 8.37371558330968e-05, "loss": 0.020034995675086976, "step": 57320 }, { "epoch": 16.273062730627306, "grad_norm": 10.368402481079102, "learning_rate": 8.373431734317344e-05, "loss": 0.02126258909702301, "step": 57330 }, { "epoch": 16.27590122055067, "grad_norm": 0.23344653844833374, "learning_rate": 8.373147885325008e-05, "loss": 0.015001994371414185, "step": 57340 }, { "epoch": 16.278739710474028, "grad_norm": 0.16870109736919403, "learning_rate": 8.372864036332672e-05, "loss": 0.012304368615150451, "step": 57350 }, { "epoch": 16.281578200397387, "grad_norm": 3.296494722366333, "learning_rate": 8.372580187340336e-05, "loss": 0.013950826227664947, "step": 57360 }, { "epoch": 16.28441669032075, "grad_norm": 3.906456232070923, "learning_rate": 8.372296338347999e-05, "loss": 0.009877628087997437, "step": 57370 }, { "epoch": 16.28725518024411, "grad_norm": 1.124372124671936, "learning_rate": 8.372012489355663e-05, "loss": 0.013397316634654998, "step": 57380 }, { "epoch": 16.290093670167472, "grad_norm": 12.569952964782715, "learning_rate": 8.371728640363327e-05, "loss": 0.01215212345123291, "step": 57390 }, { "epoch": 16.29293216009083, "grad_norm": 0.30521121621131897, "learning_rate": 8.371444791370991e-05, "loss": 0.013420632481575013, "step": 57400 }, { "epoch": 16.29577065001419, "grad_norm": 0.14397472143173218, "learning_rate": 8.371160942378655e-05, "loss": 0.016260851919651032, "step": 57410 }, { "epoch": 16.298609139937554, "grad_norm": 2.4212818145751953, "learning_rate": 8.37087709338632e-05, "loss": 0.00896652191877365, "step": 57420 }, { "epoch": 16.301447629860913, "grad_norm": 1.0097072124481201, "learning_rate": 8.370593244393982e-05, "loss": 0.006537635624408722, "step": 57430 }, { "epoch": 16.304286119784276, "grad_norm": 3.479297161102295, "learning_rate": 8.370309395401646e-05, "loss": 0.02700788676738739, "step": 57440 }, { "epoch": 16.307124609707635, "grad_norm": 2.2049782276153564, "learning_rate": 8.37002554640931e-05, "loss": 0.025444400310516358, "step": 57450 }, { "epoch": 16.309963099630995, "grad_norm": 8.471036911010742, "learning_rate": 8.369741697416975e-05, "loss": 0.024864199757575988, "step": 57460 }, { "epoch": 16.312801589554358, "grad_norm": 0.7041513323783875, "learning_rate": 8.369457848424639e-05, "loss": 0.018921288847923278, "step": 57470 }, { "epoch": 16.315640079477717, "grad_norm": 0.6235687732696533, "learning_rate": 8.369173999432303e-05, "loss": 0.008127862215042114, "step": 57480 }, { "epoch": 16.31847856940108, "grad_norm": 8.633461952209473, "learning_rate": 8.368890150439967e-05, "loss": 0.008297201991081238, "step": 57490 }, { "epoch": 16.32131705932444, "grad_norm": 8.218915939331055, "learning_rate": 8.36860630144763e-05, "loss": 0.007035072147846222, "step": 57500 }, { "epoch": 16.32131705932444, "eval_accuracy": 0.9740573535957271, "eval_loss": 0.09236717224121094, "eval_runtime": 33.1442, "eval_samples_per_second": 474.502, "eval_steps_per_second": 7.422, "step": 57500 }, { "epoch": 16.3241555492478, "grad_norm": 1.319595456123352, "learning_rate": 8.368322452455294e-05, "loss": 0.017992162704467775, "step": 57510 }, { "epoch": 16.32699403917116, "grad_norm": 1.8212885856628418, "learning_rate": 8.368038603462958e-05, "loss": 0.014280998706817627, "step": 57520 }, { "epoch": 16.32983252909452, "grad_norm": 10.973666191101074, "learning_rate": 8.367754754470622e-05, "loss": 0.02207457274198532, "step": 57530 }, { "epoch": 16.332671019017884, "grad_norm": 5.747744560241699, "learning_rate": 8.367470905478286e-05, "loss": 0.013561590015888214, "step": 57540 }, { "epoch": 16.335509508941243, "grad_norm": 2.923334836959839, "learning_rate": 8.36718705648595e-05, "loss": 0.0382994145154953, "step": 57550 }, { "epoch": 16.338347998864602, "grad_norm": 0.3345324993133545, "learning_rate": 8.366903207493613e-05, "loss": 0.008632700145244598, "step": 57560 }, { "epoch": 16.341186488787965, "grad_norm": 0.4331224262714386, "learning_rate": 8.366619358501277e-05, "loss": 0.01898273974657059, "step": 57570 }, { "epoch": 16.344024978711325, "grad_norm": 0.03429948538541794, "learning_rate": 8.366335509508942e-05, "loss": 0.01038203239440918, "step": 57580 }, { "epoch": 16.346863468634687, "grad_norm": 4.028921604156494, "learning_rate": 8.366051660516606e-05, "loss": 0.015377968549728394, "step": 57590 }, { "epoch": 16.349701958558047, "grad_norm": 1.4573915004730225, "learning_rate": 8.36576781152427e-05, "loss": 0.007619843631982803, "step": 57600 }, { "epoch": 16.352540448481406, "grad_norm": 0.10547356307506561, "learning_rate": 8.365483962531934e-05, "loss": 0.009423045068979263, "step": 57610 }, { "epoch": 16.35537893840477, "grad_norm": 5.91762113571167, "learning_rate": 8.365200113539597e-05, "loss": 0.016881753504276276, "step": 57620 }, { "epoch": 16.35821742832813, "grad_norm": 5.484067916870117, "learning_rate": 8.364916264547261e-05, "loss": 0.024827812612056733, "step": 57630 }, { "epoch": 16.36105591825149, "grad_norm": 10.583457946777344, "learning_rate": 8.364632415554925e-05, "loss": 0.014942434430122376, "step": 57640 }, { "epoch": 16.36389440817485, "grad_norm": 0.7182465195655823, "learning_rate": 8.364348566562589e-05, "loss": 0.01801667958498001, "step": 57650 }, { "epoch": 16.366732898098213, "grad_norm": 0.5132886171340942, "learning_rate": 8.364064717570253e-05, "loss": 0.027531760931015014, "step": 57660 }, { "epoch": 16.369571388021573, "grad_norm": 1.311134696006775, "learning_rate": 8.363780868577917e-05, "loss": 0.007631507515907287, "step": 57670 }, { "epoch": 16.372409877944932, "grad_norm": 8.569693565368652, "learning_rate": 8.363497019585582e-05, "loss": 0.01950222998857498, "step": 57680 }, { "epoch": 16.375248367868295, "grad_norm": 0.3966015875339508, "learning_rate": 8.363213170593244e-05, "loss": 0.008289892226457596, "step": 57690 }, { "epoch": 16.378086857791654, "grad_norm": 2.65739369392395, "learning_rate": 8.362929321600909e-05, "loss": 0.01290401965379715, "step": 57700 }, { "epoch": 16.380925347715017, "grad_norm": 2.674476385116577, "learning_rate": 8.362645472608573e-05, "loss": 0.012888827919960022, "step": 57710 }, { "epoch": 16.383763837638377, "grad_norm": 7.033500671386719, "learning_rate": 8.362361623616235e-05, "loss": 0.020376904308795928, "step": 57720 }, { "epoch": 16.386602327561736, "grad_norm": 2.621504068374634, "learning_rate": 8.362077774623901e-05, "loss": 0.02202935814857483, "step": 57730 }, { "epoch": 16.3894408174851, "grad_norm": 12.287653923034668, "learning_rate": 8.361793925631565e-05, "loss": 0.024351033568382262, "step": 57740 }, { "epoch": 16.392279307408458, "grad_norm": 1.6419258117675781, "learning_rate": 8.361510076639228e-05, "loss": 0.02276267409324646, "step": 57750 }, { "epoch": 16.39511779733182, "grad_norm": 16.25054931640625, "learning_rate": 8.361226227646892e-05, "loss": 0.016591793298721312, "step": 57760 }, { "epoch": 16.39795628725518, "grad_norm": 3.719273567199707, "learning_rate": 8.360942378654556e-05, "loss": 0.025364640355110168, "step": 57770 }, { "epoch": 16.40079477717854, "grad_norm": 10.759260177612305, "learning_rate": 8.36065852966222e-05, "loss": 0.016831652820110322, "step": 57780 }, { "epoch": 16.403633267101903, "grad_norm": 1.9268169403076172, "learning_rate": 8.360374680669883e-05, "loss": 0.015395458042621612, "step": 57790 }, { "epoch": 16.406471757025262, "grad_norm": 7.143798828125, "learning_rate": 8.360090831677549e-05, "loss": 0.03019135892391205, "step": 57800 }, { "epoch": 16.409310246948625, "grad_norm": 9.161967277526855, "learning_rate": 8.359806982685213e-05, "loss": 0.03650258183479309, "step": 57810 }, { "epoch": 16.412148736871984, "grad_norm": 0.663998007774353, "learning_rate": 8.359523133692875e-05, "loss": 0.02046005129814148, "step": 57820 }, { "epoch": 16.414987226795343, "grad_norm": 2.450256586074829, "learning_rate": 8.35923928470054e-05, "loss": 0.009145064651966095, "step": 57830 }, { "epoch": 16.417825716718706, "grad_norm": 0.3849567174911499, "learning_rate": 8.358955435708204e-05, "loss": 0.010958493500947953, "step": 57840 }, { "epoch": 16.420664206642066, "grad_norm": 2.9760806560516357, "learning_rate": 8.358671586715867e-05, "loss": 0.008709165453910827, "step": 57850 }, { "epoch": 16.42350269656543, "grad_norm": 4.011159420013428, "learning_rate": 8.358387737723532e-05, "loss": 0.03418850898742676, "step": 57860 }, { "epoch": 16.426341186488788, "grad_norm": 0.08816280215978622, "learning_rate": 8.358103888731196e-05, "loss": 0.02985650599002838, "step": 57870 }, { "epoch": 16.429179676412147, "grad_norm": 0.2714015543460846, "learning_rate": 8.357820039738859e-05, "loss": 0.01040676087141037, "step": 57880 }, { "epoch": 16.43201816633551, "grad_norm": 14.26090145111084, "learning_rate": 8.357536190746523e-05, "loss": 0.042628079652786255, "step": 57890 }, { "epoch": 16.43485665625887, "grad_norm": 1.0638296604156494, "learning_rate": 8.357252341754187e-05, "loss": 0.02021416276693344, "step": 57900 }, { "epoch": 16.437695146182232, "grad_norm": 1.9394540786743164, "learning_rate": 8.356968492761851e-05, "loss": 0.011032968759536743, "step": 57910 }, { "epoch": 16.44053363610559, "grad_norm": 0.7162463068962097, "learning_rate": 8.356684643769514e-05, "loss": 0.01696653962135315, "step": 57920 }, { "epoch": 16.44337212602895, "grad_norm": 5.895564079284668, "learning_rate": 8.35640079477718e-05, "loss": 0.018338477611541747, "step": 57930 }, { "epoch": 16.446210615952314, "grad_norm": 1.3791728019714355, "learning_rate": 8.356116945784844e-05, "loss": 0.010515202581882478, "step": 57940 }, { "epoch": 16.449049105875673, "grad_norm": 0.5799000263214111, "learning_rate": 8.355833096792507e-05, "loss": 0.012176022678613663, "step": 57950 }, { "epoch": 16.451887595799036, "grad_norm": 0.22276094555854797, "learning_rate": 8.35554924780017e-05, "loss": 0.015263059735298156, "step": 57960 }, { "epoch": 16.454726085722395, "grad_norm": 1.5283480882644653, "learning_rate": 8.355265398807835e-05, "loss": 0.008918225765228271, "step": 57970 }, { "epoch": 16.457564575645755, "grad_norm": 2.0964720249176025, "learning_rate": 8.354981549815498e-05, "loss": 0.011067420244216919, "step": 57980 }, { "epoch": 16.460403065569118, "grad_norm": 13.94503116607666, "learning_rate": 8.354697700823162e-05, "loss": 0.017902770638465883, "step": 57990 }, { "epoch": 16.463241555492477, "grad_norm": 1.2242968082427979, "learning_rate": 8.354413851830827e-05, "loss": 0.026270201802253722, "step": 58000 }, { "epoch": 16.463241555492477, "eval_accuracy": 0.9682711260888918, "eval_loss": 0.10908498615026474, "eval_runtime": 31.5421, "eval_samples_per_second": 498.604, "eval_steps_per_second": 7.799, "step": 58000 }, { "epoch": 16.46608004541584, "grad_norm": 3.2993037700653076, "learning_rate": 8.35413000283849e-05, "loss": 0.01482086330652237, "step": 58010 }, { "epoch": 16.4689185353392, "grad_norm": 0.12810245156288147, "learning_rate": 8.353846153846154e-05, "loss": 0.025041675567626952, "step": 58020 }, { "epoch": 16.471757025262562, "grad_norm": 2.4282965660095215, "learning_rate": 8.353562304853818e-05, "loss": 0.02343158274888992, "step": 58030 }, { "epoch": 16.47459551518592, "grad_norm": 5.451030254364014, "learning_rate": 8.353278455861482e-05, "loss": 0.01792638599872589, "step": 58040 }, { "epoch": 16.47743400510928, "grad_norm": 0.08058411628007889, "learning_rate": 8.352994606869145e-05, "loss": 0.02152462899684906, "step": 58050 }, { "epoch": 16.480272495032644, "grad_norm": 0.46004438400268555, "learning_rate": 8.352710757876811e-05, "loss": 0.013772794604301452, "step": 58060 }, { "epoch": 16.483110984956003, "grad_norm": 0.5002625584602356, "learning_rate": 8.352426908884475e-05, "loss": 0.04588167071342468, "step": 58070 }, { "epoch": 16.485949474879366, "grad_norm": 4.392847537994385, "learning_rate": 8.352143059892138e-05, "loss": 0.03246031701564789, "step": 58080 }, { "epoch": 16.488787964802725, "grad_norm": 2.2703194618225098, "learning_rate": 8.351859210899802e-05, "loss": 0.0058725252747535706, "step": 58090 }, { "epoch": 16.491626454726084, "grad_norm": 5.792627334594727, "learning_rate": 8.351575361907466e-05, "loss": 0.0167933851480484, "step": 58100 }, { "epoch": 16.494464944649447, "grad_norm": 7.451622486114502, "learning_rate": 8.351291512915129e-05, "loss": 0.007043012976646423, "step": 58110 }, { "epoch": 16.497303434572807, "grad_norm": 7.536740303039551, "learning_rate": 8.351007663922793e-05, "loss": 0.01883871704339981, "step": 58120 }, { "epoch": 16.50014192449617, "grad_norm": 5.395069599151611, "learning_rate": 8.350723814930458e-05, "loss": 0.01115759164094925, "step": 58130 }, { "epoch": 16.50298041441953, "grad_norm": 8.329933166503906, "learning_rate": 8.350439965938121e-05, "loss": 0.01635705530643463, "step": 58140 }, { "epoch": 16.50581890434289, "grad_norm": 1.2351986169815063, "learning_rate": 8.350156116945785e-05, "loss": 0.007817569375038146, "step": 58150 }, { "epoch": 16.50865739426625, "grad_norm": 8.746870040893555, "learning_rate": 8.34987226795345e-05, "loss": 0.026566678285598756, "step": 58160 }, { "epoch": 16.51149588418961, "grad_norm": 11.279014587402344, "learning_rate": 8.349588418961113e-05, "loss": 0.010588343441486358, "step": 58170 }, { "epoch": 16.514334374112973, "grad_norm": 6.852504253387451, "learning_rate": 8.349304569968776e-05, "loss": 0.01996084749698639, "step": 58180 }, { "epoch": 16.517172864036333, "grad_norm": 8.204095840454102, "learning_rate": 8.34902072097644e-05, "loss": 0.027003538608551026, "step": 58190 }, { "epoch": 16.520011353959692, "grad_norm": 1.7556825876235962, "learning_rate": 8.348736871984106e-05, "loss": 0.02089347392320633, "step": 58200 }, { "epoch": 16.522849843883055, "grad_norm": 0.21101777255535126, "learning_rate": 8.348453022991769e-05, "loss": 0.015630720555782317, "step": 58210 }, { "epoch": 16.525688333806414, "grad_norm": 5.011896133422852, "learning_rate": 8.348169173999433e-05, "loss": 0.007458011060953141, "step": 58220 }, { "epoch": 16.528526823729777, "grad_norm": 4.888058662414551, "learning_rate": 8.347885325007097e-05, "loss": 0.006140603125095368, "step": 58230 }, { "epoch": 16.531365313653136, "grad_norm": 1.7715506553649902, "learning_rate": 8.34760147601476e-05, "loss": 0.017218659818172454, "step": 58240 }, { "epoch": 16.534203803576496, "grad_norm": 2.5855140686035156, "learning_rate": 8.347317627022424e-05, "loss": 0.017837607860565187, "step": 58250 }, { "epoch": 16.53704229349986, "grad_norm": 0.5661490559577942, "learning_rate": 8.34703377803009e-05, "loss": 0.01270155906677246, "step": 58260 }, { "epoch": 16.539880783423218, "grad_norm": 4.726703643798828, "learning_rate": 8.346749929037752e-05, "loss": 0.01863550990819931, "step": 58270 }, { "epoch": 16.54271927334658, "grad_norm": 1.3726119995117188, "learning_rate": 8.346466080045416e-05, "loss": 0.009706395864486694, "step": 58280 }, { "epoch": 16.54555776326994, "grad_norm": 0.37909504771232605, "learning_rate": 8.34618223105308e-05, "loss": 0.03129624724388123, "step": 58290 }, { "epoch": 16.5483962531933, "grad_norm": 0.49579834938049316, "learning_rate": 8.345898382060745e-05, "loss": 0.008506166189908982, "step": 58300 }, { "epoch": 16.551234743116662, "grad_norm": 1.405500888824463, "learning_rate": 8.345614533068407e-05, "loss": 0.024441471695899962, "step": 58310 }, { "epoch": 16.55407323304002, "grad_norm": 0.6769503951072693, "learning_rate": 8.345330684076071e-05, "loss": 0.013408051431179046, "step": 58320 }, { "epoch": 16.556911722963385, "grad_norm": 0.36084574460983276, "learning_rate": 8.345046835083737e-05, "loss": 0.00796249657869339, "step": 58330 }, { "epoch": 16.559750212886744, "grad_norm": 7.54517936706543, "learning_rate": 8.3447629860914e-05, "loss": 0.018345263600349427, "step": 58340 }, { "epoch": 16.562588702810103, "grad_norm": 5.916313648223877, "learning_rate": 8.344479137099064e-05, "loss": 0.02523200213909149, "step": 58350 }, { "epoch": 16.565427192733466, "grad_norm": 2.9937806129455566, "learning_rate": 8.344195288106728e-05, "loss": 0.019701348245143892, "step": 58360 }, { "epoch": 16.568265682656826, "grad_norm": 3.081040859222412, "learning_rate": 8.343911439114391e-05, "loss": 0.0076291501522064206, "step": 58370 }, { "epoch": 16.57110417258019, "grad_norm": 0.8946557641029358, "learning_rate": 8.343627590122055e-05, "loss": 0.01645214855670929, "step": 58380 }, { "epoch": 16.573942662503548, "grad_norm": 8.519558906555176, "learning_rate": 8.343343741129719e-05, "loss": 0.01650015115737915, "step": 58390 }, { "epoch": 16.57678115242691, "grad_norm": 2.107929229736328, "learning_rate": 8.343059892137383e-05, "loss": 0.021172210574150085, "step": 58400 }, { "epoch": 16.57961964235027, "grad_norm": 7.89661169052124, "learning_rate": 8.342776043145047e-05, "loss": 0.007535956799983978, "step": 58410 }, { "epoch": 16.58245813227363, "grad_norm": 0.6490938663482666, "learning_rate": 8.342492194152712e-05, "loss": 0.01022508442401886, "step": 58420 }, { "epoch": 16.585296622196992, "grad_norm": 5.476421356201172, "learning_rate": 8.342208345160376e-05, "loss": 0.016288921236991882, "step": 58430 }, { "epoch": 16.58813511212035, "grad_norm": 0.0703764334321022, "learning_rate": 8.341924496168038e-05, "loss": 0.018961134552955627, "step": 58440 }, { "epoch": 16.590973602043714, "grad_norm": 2.362675666809082, "learning_rate": 8.341640647175703e-05, "loss": 0.009367284178733826, "step": 58450 }, { "epoch": 16.593812091967074, "grad_norm": 0.19794169068336487, "learning_rate": 8.341356798183367e-05, "loss": 0.012348584085702895, "step": 58460 }, { "epoch": 16.596650581890433, "grad_norm": 2.0445363521575928, "learning_rate": 8.341072949191031e-05, "loss": 0.04081717133522034, "step": 58470 }, { "epoch": 16.599489071813796, "grad_norm": 5.547548294067383, "learning_rate": 8.340789100198695e-05, "loss": 0.01884424239397049, "step": 58480 }, { "epoch": 16.602327561737155, "grad_norm": 11.534749984741211, "learning_rate": 8.340505251206359e-05, "loss": 0.023290008306503296, "step": 58490 }, { "epoch": 16.605166051660518, "grad_norm": 5.996431350708008, "learning_rate": 8.340221402214022e-05, "loss": 0.009347515553236008, "step": 58500 }, { "epoch": 16.605166051660518, "eval_accuracy": 0.9701786736186176, "eval_loss": 0.10197910666465759, "eval_runtime": 33.353, "eval_samples_per_second": 471.532, "eval_steps_per_second": 7.376, "step": 58500 }, { "epoch": 16.608004541583878, "grad_norm": 1.6715576648712158, "learning_rate": 8.339937553221686e-05, "loss": 0.01492326259613037, "step": 58510 }, { "epoch": 16.610843031507237, "grad_norm": 9.562216758728027, "learning_rate": 8.33965370422935e-05, "loss": 0.017912183701992036, "step": 58520 }, { "epoch": 16.6136815214306, "grad_norm": 8.274002075195312, "learning_rate": 8.339369855237014e-05, "loss": 0.022522298991680144, "step": 58530 }, { "epoch": 16.61652001135396, "grad_norm": 9.184091567993164, "learning_rate": 8.339086006244678e-05, "loss": 0.013378302752971648, "step": 58540 }, { "epoch": 16.619358501277322, "grad_norm": 1.7948696613311768, "learning_rate": 8.338802157252343e-05, "loss": 0.012252527475357055, "step": 58550 }, { "epoch": 16.62219699120068, "grad_norm": 9.252473831176758, "learning_rate": 8.338518308260005e-05, "loss": 0.014008289575576783, "step": 58560 }, { "epoch": 16.62503548112404, "grad_norm": 14.353342056274414, "learning_rate": 8.33823445926767e-05, "loss": 0.01632632315158844, "step": 58570 }, { "epoch": 16.627873971047404, "grad_norm": 0.4026396572589874, "learning_rate": 8.337950610275334e-05, "loss": 0.016963547468185423, "step": 58580 }, { "epoch": 16.630712460970763, "grad_norm": 0.5738431811332703, "learning_rate": 8.337666761282998e-05, "loss": 0.015992391109466552, "step": 58590 }, { "epoch": 16.633550950894126, "grad_norm": 2.4417455196380615, "learning_rate": 8.337382912290662e-05, "loss": 0.012426336109638215, "step": 58600 }, { "epoch": 16.636389440817485, "grad_norm": 8.04161548614502, "learning_rate": 8.337099063298326e-05, "loss": 0.010476543009281159, "step": 58610 }, { "epoch": 16.639227930740844, "grad_norm": 0.5494700074195862, "learning_rate": 8.33681521430599e-05, "loss": 0.012957891821861267, "step": 58620 }, { "epoch": 16.642066420664207, "grad_norm": 9.546879768371582, "learning_rate": 8.336531365313653e-05, "loss": 0.027926135063171386, "step": 58630 }, { "epoch": 16.644904910587567, "grad_norm": 11.368267059326172, "learning_rate": 8.336247516321317e-05, "loss": 0.01914326548576355, "step": 58640 }, { "epoch": 16.64774340051093, "grad_norm": 7.612231731414795, "learning_rate": 8.335963667328981e-05, "loss": 0.017746613919734956, "step": 58650 }, { "epoch": 16.65058189043429, "grad_norm": 8.074963569641113, "learning_rate": 8.335679818336645e-05, "loss": 0.02094816565513611, "step": 58660 }, { "epoch": 16.653420380357648, "grad_norm": 0.15181156992912292, "learning_rate": 8.33539596934431e-05, "loss": 0.0071916863322258, "step": 58670 }, { "epoch": 16.65625887028101, "grad_norm": 0.20666532218456268, "learning_rate": 8.335112120351974e-05, "loss": 0.01815638542175293, "step": 58680 }, { "epoch": 16.65909736020437, "grad_norm": 0.4218113124370575, "learning_rate": 8.334828271359636e-05, "loss": 0.01777077168226242, "step": 58690 }, { "epoch": 16.661935850127733, "grad_norm": 1.3433518409729004, "learning_rate": 8.3345444223673e-05, "loss": 0.007694992423057556, "step": 58700 }, { "epoch": 16.664774340051093, "grad_norm": 0.7735517024993896, "learning_rate": 8.334260573374965e-05, "loss": 0.016449853777885437, "step": 58710 }, { "epoch": 16.667612829974452, "grad_norm": 8.599223136901855, "learning_rate": 8.333976724382629e-05, "loss": 0.04052632451057434, "step": 58720 }, { "epoch": 16.670451319897815, "grad_norm": 0.5352591276168823, "learning_rate": 8.333692875390293e-05, "loss": 0.03280578255653381, "step": 58730 }, { "epoch": 16.673289809821174, "grad_norm": 8.601404190063477, "learning_rate": 8.333409026397957e-05, "loss": 0.06592566967010498, "step": 58740 }, { "epoch": 16.676128299744537, "grad_norm": 2.5928406715393066, "learning_rate": 8.333125177405621e-05, "loss": 0.005962970107793808, "step": 58750 }, { "epoch": 16.678966789667896, "grad_norm": 0.39038896560668945, "learning_rate": 8.332841328413284e-05, "loss": 0.03435937762260437, "step": 58760 }, { "epoch": 16.68180527959126, "grad_norm": 0.6997430920600891, "learning_rate": 8.332557479420948e-05, "loss": 0.02462855875492096, "step": 58770 }, { "epoch": 16.68464376951462, "grad_norm": 0.5729056000709534, "learning_rate": 8.332273630428612e-05, "loss": 0.015364103019237518, "step": 58780 }, { "epoch": 16.687482259437978, "grad_norm": 6.088399887084961, "learning_rate": 8.331989781436276e-05, "loss": 0.010400149226188659, "step": 58790 }, { "epoch": 16.69032074936134, "grad_norm": 7.7270402908325195, "learning_rate": 8.33170593244394e-05, "loss": 0.011964193731546401, "step": 58800 }, { "epoch": 16.6931592392847, "grad_norm": 7.348968982696533, "learning_rate": 8.331422083451605e-05, "loss": 0.02733967900276184, "step": 58810 }, { "epoch": 16.695997729208063, "grad_norm": 0.5131927132606506, "learning_rate": 8.331138234459268e-05, "loss": 0.00753500908613205, "step": 58820 }, { "epoch": 16.698836219131422, "grad_norm": 2.607360363006592, "learning_rate": 8.330854385466932e-05, "loss": 0.017744763195514678, "step": 58830 }, { "epoch": 16.70167470905478, "grad_norm": 1.769675374031067, "learning_rate": 8.330570536474596e-05, "loss": 0.02353108525276184, "step": 58840 }, { "epoch": 16.704513198978145, "grad_norm": 1.2015560865402222, "learning_rate": 8.33028668748226e-05, "loss": 0.030354398488998412, "step": 58850 }, { "epoch": 16.707351688901504, "grad_norm": 0.2548241913318634, "learning_rate": 8.330002838489924e-05, "loss": 0.018169844150543214, "step": 58860 }, { "epoch": 16.710190178824867, "grad_norm": 1.0467280149459839, "learning_rate": 8.329718989497588e-05, "loss": 0.022896963357925414, "step": 58870 }, { "epoch": 16.713028668748226, "grad_norm": 5.994368553161621, "learning_rate": 8.329435140505252e-05, "loss": 0.014371447265148163, "step": 58880 }, { "epoch": 16.715867158671585, "grad_norm": 2.3301689624786377, "learning_rate": 8.329151291512915e-05, "loss": 0.025104135274887085, "step": 58890 }, { "epoch": 16.71870564859495, "grad_norm": 4.1884684562683105, "learning_rate": 8.328867442520579e-05, "loss": 0.023795315623283388, "step": 58900 }, { "epoch": 16.721544138518308, "grad_norm": 0.2607637941837311, "learning_rate": 8.328583593528243e-05, "loss": 0.014869174361228943, "step": 58910 }, { "epoch": 16.72438262844167, "grad_norm": 1.5670897960662842, "learning_rate": 8.328299744535906e-05, "loss": 0.01373508870601654, "step": 58920 }, { "epoch": 16.72722111836503, "grad_norm": 0.44055473804473877, "learning_rate": 8.328015895543572e-05, "loss": 0.010856592655181884, "step": 58930 }, { "epoch": 16.73005960828839, "grad_norm": 2.201734781265259, "learning_rate": 8.327732046551236e-05, "loss": 0.02318454682826996, "step": 58940 }, { "epoch": 16.732898098211752, "grad_norm": 11.061216354370117, "learning_rate": 8.327448197558899e-05, "loss": 0.021474100649356842, "step": 58950 }, { "epoch": 16.73573658813511, "grad_norm": 0.17531704902648926, "learning_rate": 8.327164348566563e-05, "loss": 0.007789625972509384, "step": 58960 }, { "epoch": 16.738575078058474, "grad_norm": 10.564139366149902, "learning_rate": 8.326880499574227e-05, "loss": 0.02943071722984314, "step": 58970 }, { "epoch": 16.741413567981834, "grad_norm": 4.880338191986084, "learning_rate": 8.326596650581891e-05, "loss": 0.028732332587242126, "step": 58980 }, { "epoch": 16.744252057905193, "grad_norm": 1.6891965866088867, "learning_rate": 8.326312801589555e-05, "loss": 0.022667229175567627, "step": 58990 }, { "epoch": 16.747090547828556, "grad_norm": 0.47848132252693176, "learning_rate": 8.326028952597219e-05, "loss": 0.016487307846546173, "step": 59000 }, { "epoch": 16.747090547828556, "eval_accuracy": 0.9696064093596999, "eval_loss": 0.10117613524198532, "eval_runtime": 32.5235, "eval_samples_per_second": 483.558, "eval_steps_per_second": 7.564, "step": 59000 }, { "epoch": 16.749929037751915, "grad_norm": 6.330474376678467, "learning_rate": 8.325745103604883e-05, "loss": 0.018073922395706175, "step": 59010 }, { "epoch": 16.752767527675278, "grad_norm": 0.22673740983009338, "learning_rate": 8.325461254612546e-05, "loss": 0.0038531355559825895, "step": 59020 }, { "epoch": 16.755606017598637, "grad_norm": 1.25984525680542, "learning_rate": 8.32517740562021e-05, "loss": 0.0270535945892334, "step": 59030 }, { "epoch": 16.758444507521997, "grad_norm": 4.507450580596924, "learning_rate": 8.324893556627874e-05, "loss": 0.020542250573635103, "step": 59040 }, { "epoch": 16.76128299744536, "grad_norm": 1.800696611404419, "learning_rate": 8.324609707635537e-05, "loss": 0.01224243938922882, "step": 59050 }, { "epoch": 16.76412148736872, "grad_norm": 3.9831464290618896, "learning_rate": 8.324325858643203e-05, "loss": 0.011683669686317445, "step": 59060 }, { "epoch": 16.766959977292082, "grad_norm": 17.34760856628418, "learning_rate": 8.324042009650867e-05, "loss": 0.0342373251914978, "step": 59070 }, { "epoch": 16.76979846721544, "grad_norm": 0.45396551489830017, "learning_rate": 8.32375816065853e-05, "loss": 0.03917011618614197, "step": 59080 }, { "epoch": 16.7726369571388, "grad_norm": 8.4290189743042, "learning_rate": 8.323474311666194e-05, "loss": 0.03323173820972443, "step": 59090 }, { "epoch": 16.775475447062163, "grad_norm": 0.9577686786651611, "learning_rate": 8.323190462673858e-05, "loss": 0.006318319588899612, "step": 59100 }, { "epoch": 16.778313936985523, "grad_norm": 0.5069995522499084, "learning_rate": 8.322906613681522e-05, "loss": 0.006062549352645874, "step": 59110 }, { "epoch": 16.781152426908886, "grad_norm": 1.1846550703048706, "learning_rate": 8.322622764689185e-05, "loss": 0.010842230916023255, "step": 59120 }, { "epoch": 16.783990916832245, "grad_norm": 7.583518981933594, "learning_rate": 8.32233891569685e-05, "loss": 0.0242738276720047, "step": 59130 }, { "epoch": 16.786829406755604, "grad_norm": 0.22325898706912994, "learning_rate": 8.322055066704514e-05, "loss": 0.010330835729837418, "step": 59140 }, { "epoch": 16.789667896678967, "grad_norm": 14.182655334472656, "learning_rate": 8.321771217712177e-05, "loss": 0.024131046235561372, "step": 59150 }, { "epoch": 16.792506386602327, "grad_norm": 4.624218940734863, "learning_rate": 8.321487368719841e-05, "loss": 0.030269244313240053, "step": 59160 }, { "epoch": 16.79534487652569, "grad_norm": 0.13222636282444, "learning_rate": 8.321203519727506e-05, "loss": 0.005456266552209854, "step": 59170 }, { "epoch": 16.79818336644905, "grad_norm": 4.031569480895996, "learning_rate": 8.320919670735168e-05, "loss": 0.0065409883856773375, "step": 59180 }, { "epoch": 16.801021856372408, "grad_norm": 0.3416095972061157, "learning_rate": 8.320635821742834e-05, "loss": 0.015418782830238342, "step": 59190 }, { "epoch": 16.80386034629577, "grad_norm": 0.7199006080627441, "learning_rate": 8.320351972750498e-05, "loss": 0.034425219893455504, "step": 59200 }, { "epoch": 16.80669883621913, "grad_norm": 8.920957565307617, "learning_rate": 8.320068123758161e-05, "loss": 0.04098060727119446, "step": 59210 }, { "epoch": 16.809537326142493, "grad_norm": 9.328518867492676, "learning_rate": 8.319784274765825e-05, "loss": 0.02089225947856903, "step": 59220 }, { "epoch": 16.812375816065853, "grad_norm": 4.088455677032471, "learning_rate": 8.319500425773489e-05, "loss": 0.007977698743343354, "step": 59230 }, { "epoch": 16.815214305989215, "grad_norm": 1.5684256553649902, "learning_rate": 8.319216576781153e-05, "loss": 0.021638141572475435, "step": 59240 }, { "epoch": 16.818052795912575, "grad_norm": 13.637103080749512, "learning_rate": 8.318932727788816e-05, "loss": 0.02500157058238983, "step": 59250 }, { "epoch": 16.820891285835934, "grad_norm": 2.7244832515716553, "learning_rate": 8.318648878796481e-05, "loss": 0.025866514444351195, "step": 59260 }, { "epoch": 16.823729775759297, "grad_norm": 1.282070279121399, "learning_rate": 8.318365029804146e-05, "loss": 0.01210797280073166, "step": 59270 }, { "epoch": 16.826568265682656, "grad_norm": 12.778800964355469, "learning_rate": 8.318081180811808e-05, "loss": 0.026551297307014464, "step": 59280 }, { "epoch": 16.82940675560602, "grad_norm": 1.5891084671020508, "learning_rate": 8.317797331819472e-05, "loss": 0.020087757706642152, "step": 59290 }, { "epoch": 16.83224524552938, "grad_norm": 0.7672985792160034, "learning_rate": 8.317513482827137e-05, "loss": 0.012400572001934052, "step": 59300 }, { "epoch": 16.835083735452738, "grad_norm": 3.444721221923828, "learning_rate": 8.3172296338348e-05, "loss": 0.025675663352012636, "step": 59310 }, { "epoch": 16.8379222253761, "grad_norm": 0.5613698363304138, "learning_rate": 8.316945784842464e-05, "loss": 0.012427344918251038, "step": 59320 }, { "epoch": 16.84076071529946, "grad_norm": 0.8720005750656128, "learning_rate": 8.316661935850129e-05, "loss": 0.009545071423053742, "step": 59330 }, { "epoch": 16.843599205222823, "grad_norm": 1.0006614923477173, "learning_rate": 8.316378086857792e-05, "loss": 0.010564179718494415, "step": 59340 }, { "epoch": 16.846437695146182, "grad_norm": 0.180669367313385, "learning_rate": 8.316094237865456e-05, "loss": 0.010495248436927795, "step": 59350 }, { "epoch": 16.84927618506954, "grad_norm": 4.413321018218994, "learning_rate": 8.31581038887312e-05, "loss": 0.03815314173698425, "step": 59360 }, { "epoch": 16.852114674992904, "grad_norm": 1.090126633644104, "learning_rate": 8.315526539880784e-05, "loss": 0.028489330410957338, "step": 59370 }, { "epoch": 16.854953164916264, "grad_norm": 8.892434120178223, "learning_rate": 8.315242690888447e-05, "loss": 0.022506290674209596, "step": 59380 }, { "epoch": 16.857791654839627, "grad_norm": 6.411722183227539, "learning_rate": 8.314958841896113e-05, "loss": 0.018957775831222535, "step": 59390 }, { "epoch": 16.860630144762986, "grad_norm": 10.51352310180664, "learning_rate": 8.314674992903775e-05, "loss": 0.02647947072982788, "step": 59400 }, { "epoch": 16.863468634686345, "grad_norm": 11.328583717346191, "learning_rate": 8.31439114391144e-05, "loss": 0.020234578847885133, "step": 59410 }, { "epoch": 16.86630712460971, "grad_norm": 7.328647136688232, "learning_rate": 8.314107294919104e-05, "loss": 0.023175641894340515, "step": 59420 }, { "epoch": 16.869145614533068, "grad_norm": 0.5999188423156738, "learning_rate": 8.313823445926768e-05, "loss": 0.015295355021953583, "step": 59430 }, { "epoch": 16.87198410445643, "grad_norm": 2.7790751457214355, "learning_rate": 8.31353959693443e-05, "loss": 0.006256217509508133, "step": 59440 }, { "epoch": 16.87482259437979, "grad_norm": 9.57004451751709, "learning_rate": 8.313255747942095e-05, "loss": 0.024614939093589784, "step": 59450 }, { "epoch": 16.87766108430315, "grad_norm": 14.579944610595703, "learning_rate": 8.31297189894976e-05, "loss": 0.03471892774105072, "step": 59460 }, { "epoch": 16.880499574226512, "grad_norm": 0.5841467380523682, "learning_rate": 8.312688049957423e-05, "loss": 0.010910328477621078, "step": 59470 }, { "epoch": 16.88333806414987, "grad_norm": 6.936350345611572, "learning_rate": 8.312404200965087e-05, "loss": 0.03417450189590454, "step": 59480 }, { "epoch": 16.886176554073234, "grad_norm": 3.2573535442352295, "learning_rate": 8.312120351972751e-05, "loss": 0.03560025095939636, "step": 59490 }, { "epoch": 16.889015043996594, "grad_norm": 2.1573917865753174, "learning_rate": 8.311836502980414e-05, "loss": 0.01897345781326294, "step": 59500 }, { "epoch": 16.889015043996594, "eval_accuracy": 0.970941692630508, "eval_loss": 0.09835122525691986, "eval_runtime": 33.441, "eval_samples_per_second": 470.291, "eval_steps_per_second": 7.356, "step": 59500 }, { "epoch": 16.891853533919953, "grad_norm": 6.42120361328125, "learning_rate": 8.311552653988078e-05, "loss": 0.02761681377887726, "step": 59510 }, { "epoch": 16.894692023843316, "grad_norm": 1.1619118452072144, "learning_rate": 8.311268804995742e-05, "loss": 0.009718816727399826, "step": 59520 }, { "epoch": 16.897530513766675, "grad_norm": 0.1762501448392868, "learning_rate": 8.310984956003406e-05, "loss": 0.019588188827037813, "step": 59530 }, { "epoch": 16.900369003690038, "grad_norm": 2.9426095485687256, "learning_rate": 8.31070110701107e-05, "loss": 0.01367335170507431, "step": 59540 }, { "epoch": 16.903207493613397, "grad_norm": 3.055361032485962, "learning_rate": 8.310417258018735e-05, "loss": 0.015794186294078826, "step": 59550 }, { "epoch": 16.906045983536757, "grad_norm": 13.4407377243042, "learning_rate": 8.310133409026399e-05, "loss": 0.02144361734390259, "step": 59560 }, { "epoch": 16.90888447346012, "grad_norm": 13.010050773620605, "learning_rate": 8.309849560034062e-05, "loss": 0.01210978850722313, "step": 59570 }, { "epoch": 16.91172296338348, "grad_norm": 9.772439956665039, "learning_rate": 8.309565711041726e-05, "loss": 0.034959816932678224, "step": 59580 }, { "epoch": 16.914561453306842, "grad_norm": 4.563203811645508, "learning_rate": 8.309281862049391e-05, "loss": 0.009464364498853683, "step": 59590 }, { "epoch": 16.9173999432302, "grad_norm": 9.706631660461426, "learning_rate": 8.308998013057054e-05, "loss": 0.027202248573303223, "step": 59600 }, { "epoch": 16.920238433153564, "grad_norm": 0.7429290413856506, "learning_rate": 8.308714164064718e-05, "loss": 0.006244056671857834, "step": 59610 }, { "epoch": 16.923076923076923, "grad_norm": 5.50533390045166, "learning_rate": 8.308430315072382e-05, "loss": 0.015045884251594543, "step": 59620 }, { "epoch": 16.925915413000283, "grad_norm": 1.5773534774780273, "learning_rate": 8.308146466080045e-05, "loss": 0.035909435153007506, "step": 59630 }, { "epoch": 16.928753902923646, "grad_norm": 5.186202526092529, "learning_rate": 8.307862617087709e-05, "loss": 0.011247806996107102, "step": 59640 }, { "epoch": 16.931592392847005, "grad_norm": 3.8120548725128174, "learning_rate": 8.307578768095373e-05, "loss": 0.03312384784221649, "step": 59650 }, { "epoch": 16.934430882770368, "grad_norm": 3.50529146194458, "learning_rate": 8.307294919103037e-05, "loss": 0.02543906569480896, "step": 59660 }, { "epoch": 16.937269372693727, "grad_norm": 0.6182767152786255, "learning_rate": 8.307011070110702e-05, "loss": 0.015868456661701204, "step": 59670 }, { "epoch": 16.940107862617086, "grad_norm": 0.3213830590248108, "learning_rate": 8.306727221118366e-05, "loss": 0.009449127316474914, "step": 59680 }, { "epoch": 16.94294635254045, "grad_norm": 0.908379316329956, "learning_rate": 8.30644337212603e-05, "loss": 0.003988035768270492, "step": 59690 }, { "epoch": 16.94578484246381, "grad_norm": 2.614715814590454, "learning_rate": 8.306159523133693e-05, "loss": 0.029246652126312257, "step": 59700 }, { "epoch": 16.94862333238717, "grad_norm": 0.6855096817016602, "learning_rate": 8.305875674141357e-05, "loss": 0.023610490560531616, "step": 59710 }, { "epoch": 16.95146182231053, "grad_norm": 0.6034495234489441, "learning_rate": 8.305591825149021e-05, "loss": 0.010013815760612488, "step": 59720 }, { "epoch": 16.95430031223389, "grad_norm": 5.102558135986328, "learning_rate": 8.305307976156685e-05, "loss": 0.0316384881734848, "step": 59730 }, { "epoch": 16.957138802157253, "grad_norm": 0.43382880091667175, "learning_rate": 8.305024127164349e-05, "loss": 0.014935806393623352, "step": 59740 }, { "epoch": 16.959977292080612, "grad_norm": 0.9285465478897095, "learning_rate": 8.304740278172013e-05, "loss": 0.015131747722625733, "step": 59750 }, { "epoch": 16.962815782003975, "grad_norm": 0.2950350046157837, "learning_rate": 8.304456429179676e-05, "loss": 0.006348349153995514, "step": 59760 }, { "epoch": 16.965654271927335, "grad_norm": 12.1892728805542, "learning_rate": 8.30417258018734e-05, "loss": 0.014313901960849761, "step": 59770 }, { "epoch": 16.968492761850694, "grad_norm": 5.843630790710449, "learning_rate": 8.303888731195004e-05, "loss": 0.03413397669792175, "step": 59780 }, { "epoch": 16.971331251774057, "grad_norm": 9.735608100891113, "learning_rate": 8.303604882202669e-05, "loss": 0.039829361438751223, "step": 59790 }, { "epoch": 16.974169741697416, "grad_norm": 0.17520152032375336, "learning_rate": 8.303321033210333e-05, "loss": 0.017403219640254975, "step": 59800 }, { "epoch": 16.97700823162078, "grad_norm": 5.112416744232178, "learning_rate": 8.303037184217997e-05, "loss": 0.02405135929584503, "step": 59810 }, { "epoch": 16.97984672154414, "grad_norm": 1.3423599004745483, "learning_rate": 8.302753335225661e-05, "loss": 0.009313157200813294, "step": 59820 }, { "epoch": 16.982685211467498, "grad_norm": 7.463330268859863, "learning_rate": 8.302469486233324e-05, "loss": 0.03352530002593994, "step": 59830 }, { "epoch": 16.98552370139086, "grad_norm": 0.6925028562545776, "learning_rate": 8.302185637240988e-05, "loss": 0.029302921891212464, "step": 59840 }, { "epoch": 16.98836219131422, "grad_norm": 2.5599260330200195, "learning_rate": 8.301901788248652e-05, "loss": 0.03621283769607544, "step": 59850 }, { "epoch": 16.991200681237583, "grad_norm": 11.15510368347168, "learning_rate": 8.301617939256316e-05, "loss": 0.03140295445919037, "step": 59860 }, { "epoch": 16.994039171160942, "grad_norm": 1.0712110996246338, "learning_rate": 8.30133409026398e-05, "loss": 0.013606338202953339, "step": 59870 }, { "epoch": 16.9968776610843, "grad_norm": 7.196577548980713, "learning_rate": 8.301050241271644e-05, "loss": 0.021908745169639587, "step": 59880 }, { "epoch": 16.999716151007664, "grad_norm": 0.33696454763412476, "learning_rate": 8.300766392279307e-05, "loss": 0.013846765458583831, "step": 59890 }, { "epoch": 17.002554640931024, "grad_norm": 1.2818280458450317, "learning_rate": 8.300482543286971e-05, "loss": 0.015659791231155396, "step": 59900 }, { "epoch": 17.005393130854387, "grad_norm": 2.799314022064209, "learning_rate": 8.300198694294635e-05, "loss": 0.01224747896194458, "step": 59910 }, { "epoch": 17.008231620777746, "grad_norm": 0.5789487361907959, "learning_rate": 8.2999148453023e-05, "loss": 0.00416494756937027, "step": 59920 }, { "epoch": 17.011070110701105, "grad_norm": 0.20322588086128235, "learning_rate": 8.299630996309964e-05, "loss": 0.017922723293304445, "step": 59930 }, { "epoch": 17.013908600624468, "grad_norm": 0.4100334942340851, "learning_rate": 8.299347147317628e-05, "loss": 0.01091453731060028, "step": 59940 }, { "epoch": 17.016747090547828, "grad_norm": 2.312666177749634, "learning_rate": 8.299063298325292e-05, "loss": 0.005855745077133179, "step": 59950 }, { "epoch": 17.01958558047119, "grad_norm": 8.206287384033203, "learning_rate": 8.298779449332955e-05, "loss": 0.01479453444480896, "step": 59960 }, { "epoch": 17.02242407039455, "grad_norm": 0.2073267549276352, "learning_rate": 8.298495600340619e-05, "loss": 0.0036951426416635515, "step": 59970 }, { "epoch": 17.025262560317913, "grad_norm": 0.06631455570459366, "learning_rate": 8.298211751348283e-05, "loss": 0.007227571308612823, "step": 59980 }, { "epoch": 17.028101050241272, "grad_norm": 7.917062759399414, "learning_rate": 8.297927902355947e-05, "loss": 0.02213500738143921, "step": 59990 }, { "epoch": 17.03093954016463, "grad_norm": 9.186241149902344, "learning_rate": 8.297644053363611e-05, "loss": 0.013681623339653014, "step": 60000 }, { "epoch": 17.03093954016463, "eval_accuracy": 0.972976409995549, "eval_loss": 0.0920124277472496, "eval_runtime": 33.1572, "eval_samples_per_second": 474.316, "eval_steps_per_second": 7.419, "step": 60000 }, { "epoch": 17.033778030087994, "grad_norm": 0.051136184483766556, "learning_rate": 8.297360204371275e-05, "loss": 0.010534006357192992, "step": 60010 }, { "epoch": 17.036616520011354, "grad_norm": 0.34980571269989014, "learning_rate": 8.297076355378938e-05, "loss": 0.00844370573759079, "step": 60020 }, { "epoch": 17.039455009934716, "grad_norm": 4.840629577636719, "learning_rate": 8.296792506386602e-05, "loss": 0.017969614267349242, "step": 60030 }, { "epoch": 17.042293499858076, "grad_norm": 3.3794472217559814, "learning_rate": 8.296508657394267e-05, "loss": 0.012053081393241882, "step": 60040 }, { "epoch": 17.045131989781435, "grad_norm": 0.10263305902481079, "learning_rate": 8.29622480840193e-05, "loss": 0.02863311767578125, "step": 60050 }, { "epoch": 17.047970479704798, "grad_norm": Infinity, "learning_rate": 8.295940959409595e-05, "loss": 0.024728569388389587, "step": 60060 }, { "epoch": 17.050808969628157, "grad_norm": 1.7721612453460693, "learning_rate": 8.295685495316491e-05, "loss": 0.011141805350780487, "step": 60070 }, { "epoch": 17.05364745955152, "grad_norm": 8.777414321899414, "learning_rate": 8.295401646324156e-05, "loss": 0.014671573042869568, "step": 60080 }, { "epoch": 17.05648594947488, "grad_norm": 8.920719146728516, "learning_rate": 8.29511779733182e-05, "loss": 0.050412923097610474, "step": 60090 }, { "epoch": 17.05932443939824, "grad_norm": 3.0890700817108154, "learning_rate": 8.294833948339484e-05, "loss": 0.019531485438346863, "step": 60100 }, { "epoch": 17.0621629293216, "grad_norm": 0.8644199967384338, "learning_rate": 8.294550099347148e-05, "loss": 0.006075898930430412, "step": 60110 }, { "epoch": 17.06500141924496, "grad_norm": 8.021568298339844, "learning_rate": 8.294266250354812e-05, "loss": 0.015994280576705933, "step": 60120 }, { "epoch": 17.067839909168324, "grad_norm": 8.68488883972168, "learning_rate": 8.293982401362476e-05, "loss": 0.017658647894859315, "step": 60130 }, { "epoch": 17.070678399091683, "grad_norm": 0.35758069157600403, "learning_rate": 8.293698552370139e-05, "loss": 0.01298295110464096, "step": 60140 }, { "epoch": 17.073516889015043, "grad_norm": 1.6744041442871094, "learning_rate": 8.293414703377803e-05, "loss": 0.022433683276176453, "step": 60150 }, { "epoch": 17.076355378938405, "grad_norm": 1.4272972345352173, "learning_rate": 8.293130854385467e-05, "loss": 0.006685799360275269, "step": 60160 }, { "epoch": 17.079193868861765, "grad_norm": 0.9956719875335693, "learning_rate": 8.292847005393131e-05, "loss": 0.013381126523017883, "step": 60170 }, { "epoch": 17.082032358785128, "grad_norm": 0.15938931703567505, "learning_rate": 8.292563156400796e-05, "loss": 0.03058187961578369, "step": 60180 }, { "epoch": 17.084870848708487, "grad_norm": 0.7025190591812134, "learning_rate": 8.29227930740846e-05, "loss": 0.019236505031585693, "step": 60190 }, { "epoch": 17.087709338631846, "grad_norm": 0.5485270619392395, "learning_rate": 8.291995458416123e-05, "loss": 0.008620668947696686, "step": 60200 }, { "epoch": 17.09054782855521, "grad_norm": 4.315624237060547, "learning_rate": 8.291711609423787e-05, "loss": 0.004921108111739158, "step": 60210 }, { "epoch": 17.09338631847857, "grad_norm": 2.2285940647125244, "learning_rate": 8.291427760431451e-05, "loss": 0.006683091819286347, "step": 60220 }, { "epoch": 17.09622480840193, "grad_norm": 6.00294303894043, "learning_rate": 8.291143911439115e-05, "loss": 0.011475080996751786, "step": 60230 }, { "epoch": 17.09906329832529, "grad_norm": 6.455972671508789, "learning_rate": 8.290860062446779e-05, "loss": 0.014705395698547364, "step": 60240 }, { "epoch": 17.10190178824865, "grad_norm": 0.37130552530288696, "learning_rate": 8.290576213454443e-05, "loss": 0.02468397170305252, "step": 60250 }, { "epoch": 17.104740278172013, "grad_norm": 0.5733087658882141, "learning_rate": 8.290292364462106e-05, "loss": 0.056001299619674684, "step": 60260 }, { "epoch": 17.107578768095372, "grad_norm": 0.6906346082687378, "learning_rate": 8.29000851546977e-05, "loss": 0.01867554187774658, "step": 60270 }, { "epoch": 17.110417258018735, "grad_norm": 6.823988914489746, "learning_rate": 8.289724666477434e-05, "loss": 0.024957768619060516, "step": 60280 }, { "epoch": 17.113255747942095, "grad_norm": 1.58076012134552, "learning_rate": 8.289440817485098e-05, "loss": 0.018008801341056823, "step": 60290 }, { "epoch": 17.116094237865454, "grad_norm": 0.9722974300384521, "learning_rate": 8.289156968492763e-05, "loss": 0.014012791216373444, "step": 60300 }, { "epoch": 17.118932727788817, "grad_norm": 4.514144420623779, "learning_rate": 8.288873119500427e-05, "loss": 0.020944939553737642, "step": 60310 }, { "epoch": 17.121771217712176, "grad_norm": 0.6085273623466492, "learning_rate": 8.288589270508091e-05, "loss": 0.014564484357833862, "step": 60320 }, { "epoch": 17.12460970763554, "grad_norm": 2.912853717803955, "learning_rate": 8.288305421515754e-05, "loss": 0.015839365124702454, "step": 60330 }, { "epoch": 17.1274481975589, "grad_norm": 7.918461322784424, "learning_rate": 8.288021572523418e-05, "loss": 0.016417635977268218, "step": 60340 }, { "epoch": 17.130286687482258, "grad_norm": 4.403071403503418, "learning_rate": 8.287737723531082e-05, "loss": 0.0071085885167121885, "step": 60350 }, { "epoch": 17.13312517740562, "grad_norm": 0.08059606701135635, "learning_rate": 8.287453874538745e-05, "loss": 0.012365427613258363, "step": 60360 }, { "epoch": 17.13596366732898, "grad_norm": 0.05988854169845581, "learning_rate": 8.28717002554641e-05, "loss": 0.014021091163158417, "step": 60370 }, { "epoch": 17.138802157252343, "grad_norm": 0.6042670011520386, "learning_rate": 8.286886176554074e-05, "loss": 0.022894272208213808, "step": 60380 }, { "epoch": 17.141640647175702, "grad_norm": 4.7841949462890625, "learning_rate": 8.286602327561737e-05, "loss": 0.023886898159980775, "step": 60390 }, { "epoch": 17.144479137099065, "grad_norm": 1.6405787467956543, "learning_rate": 8.286318478569401e-05, "loss": 0.03029906451702118, "step": 60400 }, { "epoch": 17.147317627022424, "grad_norm": 8.55129337310791, "learning_rate": 8.286034629577065e-05, "loss": 0.0199605792760849, "step": 60410 }, { "epoch": 17.150156116945784, "grad_norm": 0.5794874429702759, "learning_rate": 8.28575078058473e-05, "loss": 0.012826944887638091, "step": 60420 }, { "epoch": 17.152994606869147, "grad_norm": 2.0195648670196533, "learning_rate": 8.285466931592392e-05, "loss": 0.009734425693750381, "step": 60430 }, { "epoch": 17.155833096792506, "grad_norm": 2.8576016426086426, "learning_rate": 8.285183082600058e-05, "loss": 0.00952526181936264, "step": 60440 }, { "epoch": 17.15867158671587, "grad_norm": 10.08468246459961, "learning_rate": 8.284899233607722e-05, "loss": 0.011233837902545929, "step": 60450 }, { "epoch": 17.161510076639228, "grad_norm": 0.13244937360286713, "learning_rate": 8.284615384615385e-05, "loss": 0.010768814384937287, "step": 60460 }, { "epoch": 17.164348566562587, "grad_norm": 9.749004364013672, "learning_rate": 8.284331535623049e-05, "loss": 0.010572870075702668, "step": 60470 }, { "epoch": 17.16718705648595, "grad_norm": 2.040863037109375, "learning_rate": 8.284047686630713e-05, "loss": 0.01051534190773964, "step": 60480 }, { "epoch": 17.17002554640931, "grad_norm": 0.6595520377159119, "learning_rate": 8.283763837638376e-05, "loss": 0.0321024477481842, "step": 60490 }, { "epoch": 17.172864036332673, "grad_norm": 9.365138053894043, "learning_rate": 8.283479988646041e-05, "loss": 0.017180398106575012, "step": 60500 }, { "epoch": 17.172864036332673, "eval_accuracy": 0.9699243339479875, "eval_loss": 0.10191277414560318, "eval_runtime": 31.9901, "eval_samples_per_second": 491.62, "eval_steps_per_second": 7.69, "step": 60500 }, { "epoch": 17.175702526256032, "grad_norm": 1.700337529182434, "learning_rate": 8.283196139653705e-05, "loss": 0.012546961009502412, "step": 60510 }, { "epoch": 17.17854101617939, "grad_norm": 2.4220998287200928, "learning_rate": 8.282912290661368e-05, "loss": 0.010761106014251709, "step": 60520 }, { "epoch": 17.181379506102754, "grad_norm": 11.144481658935547, "learning_rate": 8.282628441669032e-05, "loss": 0.009575797617435456, "step": 60530 }, { "epoch": 17.184217996026113, "grad_norm": 0.6005381941795349, "learning_rate": 8.282344592676696e-05, "loss": 0.011822213232517243, "step": 60540 }, { "epoch": 17.187056485949476, "grad_norm": 0.22014525532722473, "learning_rate": 8.28206074368436e-05, "loss": 0.011343812197446823, "step": 60550 }, { "epoch": 17.189894975872836, "grad_norm": 12.883678436279297, "learning_rate": 8.281776894692023e-05, "loss": 0.018257315456867217, "step": 60560 }, { "epoch": 17.192733465796195, "grad_norm": 0.7405396699905396, "learning_rate": 8.281493045699689e-05, "loss": 0.013790734112262726, "step": 60570 }, { "epoch": 17.195571955719558, "grad_norm": 0.632757306098938, "learning_rate": 8.281209196707353e-05, "loss": 0.023327629268169402, "step": 60580 }, { "epoch": 17.198410445642917, "grad_norm": 4.208858013153076, "learning_rate": 8.280925347715016e-05, "loss": 0.007754936069250107, "step": 60590 }, { "epoch": 17.20124893556628, "grad_norm": 0.143032506108284, "learning_rate": 8.28064149872268e-05, "loss": 0.012290270626544952, "step": 60600 }, { "epoch": 17.20408742548964, "grad_norm": 4.206811904907227, "learning_rate": 8.280357649730344e-05, "loss": 0.006305233389139175, "step": 60610 }, { "epoch": 17.206925915413, "grad_norm": 0.697813868522644, "learning_rate": 8.280073800738007e-05, "loss": 0.012607382237911224, "step": 60620 }, { "epoch": 17.20976440533636, "grad_norm": 8.769792556762695, "learning_rate": 8.279789951745671e-05, "loss": 0.017398476600646973, "step": 60630 }, { "epoch": 17.21260289525972, "grad_norm": 4.754141330718994, "learning_rate": 8.279506102753336e-05, "loss": 0.0054458677768707275, "step": 60640 }, { "epoch": 17.215441385183084, "grad_norm": 0.40721654891967773, "learning_rate": 8.279222253760999e-05, "loss": 0.0030492722988128664, "step": 60650 }, { "epoch": 17.218279875106443, "grad_norm": 2.4124927520751953, "learning_rate": 8.278938404768663e-05, "loss": 0.00622435100376606, "step": 60660 }, { "epoch": 17.221118365029803, "grad_norm": 3.333814859390259, "learning_rate": 8.278654555776328e-05, "loss": 0.018958735466003417, "step": 60670 }, { "epoch": 17.223956854953165, "grad_norm": 0.6594217419624329, "learning_rate": 8.278370706783992e-05, "loss": 0.008006781339645386, "step": 60680 }, { "epoch": 17.226795344876525, "grad_norm": 0.23740503191947937, "learning_rate": 8.278086857791654e-05, "loss": 0.02423333376646042, "step": 60690 }, { "epoch": 17.229633834799888, "grad_norm": 5.6190972328186035, "learning_rate": 8.27780300879932e-05, "loss": 0.008301816880702972, "step": 60700 }, { "epoch": 17.232472324723247, "grad_norm": 0.6800920963287354, "learning_rate": 8.277519159806984e-05, "loss": 0.013937462866306306, "step": 60710 }, { "epoch": 17.235310814646606, "grad_norm": 0.5660737156867981, "learning_rate": 8.277235310814647e-05, "loss": 0.0080329068005085, "step": 60720 }, { "epoch": 17.23814930456997, "grad_norm": 1.6027239561080933, "learning_rate": 8.276951461822311e-05, "loss": 0.008355498313903809, "step": 60730 }, { "epoch": 17.24098779449333, "grad_norm": 1.2806905508041382, "learning_rate": 8.276667612829975e-05, "loss": 0.010284505784511566, "step": 60740 }, { "epoch": 17.24382628441669, "grad_norm": 1.3109134435653687, "learning_rate": 8.276383763837638e-05, "loss": 0.02071026712656021, "step": 60750 }, { "epoch": 17.24666477434005, "grad_norm": 1.4722708463668823, "learning_rate": 8.276099914845302e-05, "loss": 0.008884946256875992, "step": 60760 }, { "epoch": 17.249503264263414, "grad_norm": 0.9054445028305054, "learning_rate": 8.275816065852968e-05, "loss": 0.012854351103305817, "step": 60770 }, { "epoch": 17.252341754186773, "grad_norm": 2.9971067905426025, "learning_rate": 8.27553221686063e-05, "loss": 0.012026246637105942, "step": 60780 }, { "epoch": 17.255180244110132, "grad_norm": 3.2078888416290283, "learning_rate": 8.275248367868294e-05, "loss": 0.015626372396945955, "step": 60790 }, { "epoch": 17.258018734033495, "grad_norm": 0.9394081234931946, "learning_rate": 8.274964518875959e-05, "loss": 0.02124364972114563, "step": 60800 }, { "epoch": 17.260857223956855, "grad_norm": 5.590035915374756, "learning_rate": 8.274680669883623e-05, "loss": 0.007999500632286072, "step": 60810 }, { "epoch": 17.263695713880217, "grad_norm": 7.1303534507751465, "learning_rate": 8.274396820891285e-05, "loss": 0.016105407476425172, "step": 60820 }, { "epoch": 17.266534203803577, "grad_norm": 10.8695650100708, "learning_rate": 8.27411297189895e-05, "loss": 0.027861028909683228, "step": 60830 }, { "epoch": 17.269372693726936, "grad_norm": 0.9095455408096313, "learning_rate": 8.273829122906615e-05, "loss": 0.02985200583934784, "step": 60840 }, { "epoch": 17.2722111836503, "grad_norm": 0.650879979133606, "learning_rate": 8.273545273914278e-05, "loss": 0.010480190813541412, "step": 60850 }, { "epoch": 17.27504967357366, "grad_norm": 3.5360140800476074, "learning_rate": 8.273261424921942e-05, "loss": 0.018787427246570586, "step": 60860 }, { "epoch": 17.27788816349702, "grad_norm": 2.239576578140259, "learning_rate": 8.272977575929606e-05, "loss": 0.038197532296180725, "step": 60870 }, { "epoch": 17.28072665342038, "grad_norm": 0.1785365790128708, "learning_rate": 8.272693726937269e-05, "loss": 0.03055664598941803, "step": 60880 }, { "epoch": 17.28356514334374, "grad_norm": 4.845576763153076, "learning_rate": 8.272409877944933e-05, "loss": 0.013368892669677734, "step": 60890 }, { "epoch": 17.286403633267103, "grad_norm": 0.596496045589447, "learning_rate": 8.272126028952599e-05, "loss": 0.02753378450870514, "step": 60900 }, { "epoch": 17.289242123190462, "grad_norm": 0.22615285217761993, "learning_rate": 8.271842179960261e-05, "loss": 0.010494916886091232, "step": 60910 }, { "epoch": 17.292080613113825, "grad_norm": 6.68947172164917, "learning_rate": 8.271558330967926e-05, "loss": 0.012790846824645995, "step": 60920 }, { "epoch": 17.294919103037184, "grad_norm": 0.45707669854164124, "learning_rate": 8.27127448197559e-05, "loss": 0.024885520339012146, "step": 60930 }, { "epoch": 17.297757592960544, "grad_norm": 10.56389331817627, "learning_rate": 8.270990632983254e-05, "loss": 0.016583162546157836, "step": 60940 }, { "epoch": 17.300596082883906, "grad_norm": 7.580021381378174, "learning_rate": 8.270706783990917e-05, "loss": 0.01836819648742676, "step": 60950 }, { "epoch": 17.303434572807266, "grad_norm": 9.000349998474121, "learning_rate": 8.270422934998581e-05, "loss": 0.022481262683868408, "step": 60960 }, { "epoch": 17.30627306273063, "grad_norm": 3.3225419521331787, "learning_rate": 8.270139086006246e-05, "loss": 0.015052467584609985, "step": 60970 }, { "epoch": 17.309111552653988, "grad_norm": 13.165914535522461, "learning_rate": 8.269855237013909e-05, "loss": 0.01728004217147827, "step": 60980 }, { "epoch": 17.311950042577347, "grad_norm": 0.9971885085105896, "learning_rate": 8.269571388021573e-05, "loss": 0.009444235265254975, "step": 60990 }, { "epoch": 17.31478853250071, "grad_norm": 9.107104301452637, "learning_rate": 8.269287539029237e-05, "loss": 0.006377992033958435, "step": 61000 }, { "epoch": 17.31478853250071, "eval_accuracy": 0.9724041457366313, "eval_loss": 0.09074018150568008, "eval_runtime": 33.6739, "eval_samples_per_second": 467.038, "eval_steps_per_second": 7.305, "step": 61000 }, { "epoch": 17.31762702242407, "grad_norm": 2.0779964923858643, "learning_rate": 8.2690036900369e-05, "loss": 0.010309429466724395, "step": 61010 }, { "epoch": 17.320465512347432, "grad_norm": 0.26222389936447144, "learning_rate": 8.268719841044564e-05, "loss": 0.019887381792068483, "step": 61020 }, { "epoch": 17.323304002270792, "grad_norm": 3.600162982940674, "learning_rate": 8.268435992052228e-05, "loss": 0.01938995122909546, "step": 61030 }, { "epoch": 17.32614249219415, "grad_norm": 3.3813929557800293, "learning_rate": 8.268152143059892e-05, "loss": 0.027367472648620605, "step": 61040 }, { "epoch": 17.328980982117514, "grad_norm": 1.9260897636413574, "learning_rate": 8.267868294067557e-05, "loss": 0.017145153880119324, "step": 61050 }, { "epoch": 17.331819472040873, "grad_norm": 0.18659789860248566, "learning_rate": 8.267584445075221e-05, "loss": 0.016209571063518523, "step": 61060 }, { "epoch": 17.334657961964236, "grad_norm": 0.9300244450569153, "learning_rate": 8.267300596082885e-05, "loss": 0.01639902889728546, "step": 61070 }, { "epoch": 17.337496451887596, "grad_norm": 0.4476860761642456, "learning_rate": 8.267016747090548e-05, "loss": 0.00805639624595642, "step": 61080 }, { "epoch": 17.340334941810955, "grad_norm": 8.714700698852539, "learning_rate": 8.266732898098212e-05, "loss": 0.010696236044168472, "step": 61090 }, { "epoch": 17.343173431734318, "grad_norm": 5.307934761047363, "learning_rate": 8.266449049105876e-05, "loss": 0.021429857611656188, "step": 61100 }, { "epoch": 17.346011921657677, "grad_norm": 7.301276683807373, "learning_rate": 8.26616520011354e-05, "loss": 0.027267566323280333, "step": 61110 }, { "epoch": 17.34885041158104, "grad_norm": 1.1424297094345093, "learning_rate": 8.265881351121204e-05, "loss": 0.024009637534618378, "step": 61120 }, { "epoch": 17.3516889015044, "grad_norm": 0.7486621737480164, "learning_rate": 8.265597502128868e-05, "loss": 0.021341179311275483, "step": 61130 }, { "epoch": 17.35452739142776, "grad_norm": 0.11364313215017319, "learning_rate": 8.265313653136531e-05, "loss": 0.006128537654876709, "step": 61140 }, { "epoch": 17.35736588135112, "grad_norm": 6.285084247589111, "learning_rate": 8.265029804144195e-05, "loss": 0.01049562469124794, "step": 61150 }, { "epoch": 17.36020437127448, "grad_norm": 4.951600074768066, "learning_rate": 8.26474595515186e-05, "loss": 0.008792410045862198, "step": 61160 }, { "epoch": 17.363042861197844, "grad_norm": 1.8576701879501343, "learning_rate": 8.264462106159524e-05, "loss": 0.019646164774894715, "step": 61170 }, { "epoch": 17.365881351121203, "grad_norm": 1.596095323562622, "learning_rate": 8.264178257167188e-05, "loss": 0.012482383102178574, "step": 61180 }, { "epoch": 17.368719841044566, "grad_norm": 0.24082989990711212, "learning_rate": 8.263894408174852e-05, "loss": 0.009223207831382751, "step": 61190 }, { "epoch": 17.371558330967925, "grad_norm": 8.884078979492188, "learning_rate": 8.263610559182515e-05, "loss": 0.028737032413482667, "step": 61200 }, { "epoch": 17.374396820891285, "grad_norm": 5.174246311187744, "learning_rate": 8.263326710190179e-05, "loss": 0.02334558218717575, "step": 61210 }, { "epoch": 17.377235310814648, "grad_norm": 1.2043037414550781, "learning_rate": 8.263042861197843e-05, "loss": 0.004507884383201599, "step": 61220 }, { "epoch": 17.380073800738007, "grad_norm": 5.725508213043213, "learning_rate": 8.262759012205507e-05, "loss": 0.015388375520706177, "step": 61230 }, { "epoch": 17.38291229066137, "grad_norm": 10.57107925415039, "learning_rate": 8.262475163213171e-05, "loss": 0.027135300636291503, "step": 61240 }, { "epoch": 17.38575078058473, "grad_norm": 8.947059631347656, "learning_rate": 8.262191314220835e-05, "loss": 0.018763606250286103, "step": 61250 }, { "epoch": 17.38858927050809, "grad_norm": 0.6465283036231995, "learning_rate": 8.2619074652285e-05, "loss": 0.008056195080280304, "step": 61260 }, { "epoch": 17.39142776043145, "grad_norm": 13.71313762664795, "learning_rate": 8.261623616236162e-05, "loss": 0.021186025440692903, "step": 61270 }, { "epoch": 17.39426625035481, "grad_norm": 0.7475911974906921, "learning_rate": 8.261339767243826e-05, "loss": 0.010940979421138763, "step": 61280 }, { "epoch": 17.397104740278174, "grad_norm": 1.2396737337112427, "learning_rate": 8.26105591825149e-05, "loss": 0.013402776420116424, "step": 61290 }, { "epoch": 17.399943230201533, "grad_norm": 1.4581817388534546, "learning_rate": 8.260772069259155e-05, "loss": 0.022477559745311737, "step": 61300 }, { "epoch": 17.402781720124892, "grad_norm": 0.7739901542663574, "learning_rate": 8.260488220266819e-05, "loss": 0.019741295278072356, "step": 61310 }, { "epoch": 17.405620210048255, "grad_norm": 12.744253158569336, "learning_rate": 8.260204371274483e-05, "loss": 0.023226961493492126, "step": 61320 }, { "epoch": 17.408458699971614, "grad_norm": 0.18409080803394318, "learning_rate": 8.259920522282146e-05, "loss": 0.00975734144449234, "step": 61330 }, { "epoch": 17.411297189894977, "grad_norm": 0.7691457867622375, "learning_rate": 8.25963667328981e-05, "loss": 0.021279042959213255, "step": 61340 }, { "epoch": 17.414135679818337, "grad_norm": 0.19453325867652893, "learning_rate": 8.259352824297474e-05, "loss": 0.017591717839241027, "step": 61350 }, { "epoch": 17.416974169741696, "grad_norm": 5.414878845214844, "learning_rate": 8.259068975305138e-05, "loss": 0.01786224842071533, "step": 61360 }, { "epoch": 17.41981265966506, "grad_norm": 6.233323574066162, "learning_rate": 8.258785126312802e-05, "loss": 0.011009050160646438, "step": 61370 }, { "epoch": 17.422651149588418, "grad_norm": 4.26106595993042, "learning_rate": 8.258501277320466e-05, "loss": 0.015389427542686462, "step": 61380 }, { "epoch": 17.42548963951178, "grad_norm": 0.180097758769989, "learning_rate": 8.25821742832813e-05, "loss": 0.007559391856193543, "step": 61390 }, { "epoch": 17.42832812943514, "grad_norm": 0.14663389325141907, "learning_rate": 8.257933579335793e-05, "loss": 0.018312694132328035, "step": 61400 }, { "epoch": 17.4311666193585, "grad_norm": 2.720428228378296, "learning_rate": 8.257649730343457e-05, "loss": 0.013026678562164306, "step": 61410 }, { "epoch": 17.434005109281863, "grad_norm": 1.3402146100997925, "learning_rate": 8.257365881351122e-05, "loss": 0.012370163947343827, "step": 61420 }, { "epoch": 17.436843599205222, "grad_norm": 10.160600662231445, "learning_rate": 8.257082032358784e-05, "loss": 0.024716906249523163, "step": 61430 }, { "epoch": 17.439682089128585, "grad_norm": 0.6892449259757996, "learning_rate": 8.25679818336645e-05, "loss": 0.0026948235929012297, "step": 61440 }, { "epoch": 17.442520579051944, "grad_norm": 0.5169992446899414, "learning_rate": 8.256514334374114e-05, "loss": 0.01630251556634903, "step": 61450 }, { "epoch": 17.445359068975304, "grad_norm": 0.22831790149211884, "learning_rate": 8.256230485381777e-05, "loss": 0.011030693352222443, "step": 61460 }, { "epoch": 17.448197558898666, "grad_norm": 2.0031800270080566, "learning_rate": 8.255946636389441e-05, "loss": 0.011505561321973801, "step": 61470 }, { "epoch": 17.451036048822026, "grad_norm": 8.452120780944824, "learning_rate": 8.255662787397105e-05, "loss": 0.02768073379993439, "step": 61480 }, { "epoch": 17.45387453874539, "grad_norm": 1.130696177482605, "learning_rate": 8.255378938404769e-05, "loss": 0.0066046066582202915, "step": 61490 }, { "epoch": 17.456713028668748, "grad_norm": 2.450423240661621, "learning_rate": 8.255095089412433e-05, "loss": 0.013511040806770324, "step": 61500 }, { "epoch": 17.456713028668748, "eval_accuracy": 0.9703694283715902, "eval_loss": 0.10323280841112137, "eval_runtime": 38.4498, "eval_samples_per_second": 409.027, "eval_steps_per_second": 6.398, "step": 61500 }, { "epoch": 17.459551518592107, "grad_norm": 4.175069808959961, "learning_rate": 8.254811240420097e-05, "loss": 0.01431768387556076, "step": 61510 }, { "epoch": 17.46239000851547, "grad_norm": 0.29949942231178284, "learning_rate": 8.254527391427762e-05, "loss": 0.011754083633422851, "step": 61520 }, { "epoch": 17.46522849843883, "grad_norm": 1.2364479303359985, "learning_rate": 8.254243542435424e-05, "loss": 0.019717365503311157, "step": 61530 }, { "epoch": 17.468066988362192, "grad_norm": 1.2116765975952148, "learning_rate": 8.253959693443088e-05, "loss": 0.017248308658599852, "step": 61540 }, { "epoch": 17.47090547828555, "grad_norm": 0.11094893515110016, "learning_rate": 8.253675844450753e-05, "loss": 0.0085129052400589, "step": 61550 }, { "epoch": 17.473743968208915, "grad_norm": 2.6053550243377686, "learning_rate": 8.253391995458415e-05, "loss": 0.008103728294372559, "step": 61560 }, { "epoch": 17.476582458132274, "grad_norm": 0.8313160538673401, "learning_rate": 8.253108146466081e-05, "loss": 0.017311184108257292, "step": 61570 }, { "epoch": 17.479420948055633, "grad_norm": 0.9753602147102356, "learning_rate": 8.252824297473745e-05, "loss": 0.01037101298570633, "step": 61580 }, { "epoch": 17.482259437978996, "grad_norm": 0.335663378238678, "learning_rate": 8.252540448481408e-05, "loss": 0.008461184054613113, "step": 61590 }, { "epoch": 17.485097927902356, "grad_norm": 0.43531790375709534, "learning_rate": 8.252256599489072e-05, "loss": 0.007645151764154434, "step": 61600 }, { "epoch": 17.48793641782572, "grad_norm": 4.973480701446533, "learning_rate": 8.251972750496736e-05, "loss": 0.01593599617481232, "step": 61610 }, { "epoch": 17.490774907749078, "grad_norm": 0.6126883625984192, "learning_rate": 8.2516889015044e-05, "loss": 0.011807385832071304, "step": 61620 }, { "epoch": 17.493613397672437, "grad_norm": 0.5036222338676453, "learning_rate": 8.251405052512064e-05, "loss": 0.028584447503089905, "step": 61630 }, { "epoch": 17.4964518875958, "grad_norm": 5.878015041351318, "learning_rate": 8.251121203519729e-05, "loss": 0.02974475920200348, "step": 61640 }, { "epoch": 17.49929037751916, "grad_norm": 4.7067036628723145, "learning_rate": 8.250837354527393e-05, "loss": 0.015483206510543824, "step": 61650 }, { "epoch": 17.502128867442522, "grad_norm": 6.352662086486816, "learning_rate": 8.250553505535055e-05, "loss": 0.013227233290672302, "step": 61660 }, { "epoch": 17.50496735736588, "grad_norm": 5.260533809661865, "learning_rate": 8.25026965654272e-05, "loss": 0.010836032778024673, "step": 61670 }, { "epoch": 17.50780584728924, "grad_norm": 14.600383758544922, "learning_rate": 8.249985807550384e-05, "loss": 0.024203220009803773, "step": 61680 }, { "epoch": 17.510644337212604, "grad_norm": 2.564335346221924, "learning_rate": 8.249701958558046e-05, "loss": 0.024412018060684205, "step": 61690 }, { "epoch": 17.513482827135963, "grad_norm": 9.105746269226074, "learning_rate": 8.249418109565712e-05, "loss": 0.02895221710205078, "step": 61700 }, { "epoch": 17.516321317059326, "grad_norm": 5.346796989440918, "learning_rate": 8.249134260573376e-05, "loss": 0.015102855861186981, "step": 61710 }, { "epoch": 17.519159806982685, "grad_norm": 1.8471977710723877, "learning_rate": 8.248850411581039e-05, "loss": 0.021771667897701262, "step": 61720 }, { "epoch": 17.521998296906045, "grad_norm": 1.399585247039795, "learning_rate": 8.248566562588703e-05, "loss": 0.015422055125236511, "step": 61730 }, { "epoch": 17.524836786829407, "grad_norm": 1.5477831363677979, "learning_rate": 8.248282713596367e-05, "loss": 0.01380692571401596, "step": 61740 }, { "epoch": 17.527675276752767, "grad_norm": 10.291984558105469, "learning_rate": 8.247998864604031e-05, "loss": 0.026460045576095582, "step": 61750 }, { "epoch": 17.53051376667613, "grad_norm": 1.715178370475769, "learning_rate": 8.247715015611694e-05, "loss": 0.014303673803806306, "step": 61760 }, { "epoch": 17.53335225659949, "grad_norm": 1.0733928680419922, "learning_rate": 8.24743116661936e-05, "loss": 0.010099523514509202, "step": 61770 }, { "epoch": 17.53619074652285, "grad_norm": 4.325575351715088, "learning_rate": 8.247147317627024e-05, "loss": 0.023564560711383818, "step": 61780 }, { "epoch": 17.53902923644621, "grad_norm": 5.538505554199219, "learning_rate": 8.246863468634687e-05, "loss": 0.01207343116402626, "step": 61790 }, { "epoch": 17.54186772636957, "grad_norm": 3.753171443939209, "learning_rate": 8.24657961964235e-05, "loss": 0.009594801813364029, "step": 61800 }, { "epoch": 17.544706216292933, "grad_norm": 14.94826602935791, "learning_rate": 8.246295770650015e-05, "loss": 0.01618855893611908, "step": 61810 }, { "epoch": 17.547544706216293, "grad_norm": 1.1586993932724, "learning_rate": 8.246011921657678e-05, "loss": 0.012247008085250855, "step": 61820 }, { "epoch": 17.550383196139652, "grad_norm": 1.1371415853500366, "learning_rate": 8.245728072665343e-05, "loss": 0.010183104872703552, "step": 61830 }, { "epoch": 17.553221686063015, "grad_norm": 1.0588442087173462, "learning_rate": 8.245444223673007e-05, "loss": 0.005298519879579544, "step": 61840 }, { "epoch": 17.556060175986374, "grad_norm": 1.4072620868682861, "learning_rate": 8.24516037468067e-05, "loss": 0.009971363842487336, "step": 61850 }, { "epoch": 17.558898665909737, "grad_norm": 8.111288070678711, "learning_rate": 8.244876525688334e-05, "loss": 0.01606186628341675, "step": 61860 }, { "epoch": 17.561737155833097, "grad_norm": 1.6734412908554077, "learning_rate": 8.244592676695998e-05, "loss": 0.015055789053440094, "step": 61870 }, { "epoch": 17.564575645756456, "grad_norm": 10.021623611450195, "learning_rate": 8.244308827703662e-05, "loss": 0.03179033100605011, "step": 61880 }, { "epoch": 17.56741413567982, "grad_norm": 0.2914247512817383, "learning_rate": 8.244024978711325e-05, "loss": 0.021559229493141173, "step": 61890 }, { "epoch": 17.570252625603178, "grad_norm": 0.597319483757019, "learning_rate": 8.24374112971899e-05, "loss": 0.008424359560012817, "step": 61900 }, { "epoch": 17.57309111552654, "grad_norm": 2.3098864555358887, "learning_rate": 8.243457280726655e-05, "loss": 0.016654497385025023, "step": 61910 }, { "epoch": 17.5759296054499, "grad_norm": 19.091413497924805, "learning_rate": 8.243173431734318e-05, "loss": 0.03730254471302032, "step": 61920 }, { "epoch": 17.578768095373263, "grad_norm": 4.950510025024414, "learning_rate": 8.242889582741982e-05, "loss": 0.0206155389547348, "step": 61930 }, { "epoch": 17.581606585296623, "grad_norm": 9.388574600219727, "learning_rate": 8.242605733749646e-05, "loss": 0.010144587606191635, "step": 61940 }, { "epoch": 17.584445075219982, "grad_norm": 0.5327780246734619, "learning_rate": 8.242321884757309e-05, "loss": 0.007420757412910461, "step": 61950 }, { "epoch": 17.587283565143345, "grad_norm": 0.3486921787261963, "learning_rate": 8.242038035764973e-05, "loss": 0.02931719124317169, "step": 61960 }, { "epoch": 17.590122055066704, "grad_norm": 9.741473197937012, "learning_rate": 8.241754186772638e-05, "loss": 0.011577901244163514, "step": 61970 }, { "epoch": 17.592960544990063, "grad_norm": 7.824601650238037, "learning_rate": 8.241470337780301e-05, "loss": 0.016944819688797, "step": 61980 }, { "epoch": 17.595799034913426, "grad_norm": 5.821125030517578, "learning_rate": 8.241186488787965e-05, "loss": 0.007874655723571777, "step": 61990 }, { "epoch": 17.598637524836786, "grad_norm": 0.895813524723053, "learning_rate": 8.24090263979563e-05, "loss": 0.006821882724761963, "step": 62000 }, { "epoch": 17.598637524836786, "eval_accuracy": 0.9725949004896038, "eval_loss": 0.09042610228061676, "eval_runtime": 34.3237, "eval_samples_per_second": 458.197, "eval_steps_per_second": 7.167, "step": 62000 }, { "epoch": 17.60147601476015, "grad_norm": 3.44907546043396, "learning_rate": 8.240618790803293e-05, "loss": 0.013858766853809356, "step": 62010 }, { "epoch": 17.604314504683508, "grad_norm": 2.5898563861846924, "learning_rate": 8.240334941810956e-05, "loss": 0.012462423741817474, "step": 62020 }, { "epoch": 17.60715299460687, "grad_norm": 7.554780006408691, "learning_rate": 8.240051092818622e-05, "loss": 0.0318467915058136, "step": 62030 }, { "epoch": 17.60999148453023, "grad_norm": 2.9511330127716064, "learning_rate": 8.239767243826285e-05, "loss": 0.007513165473937988, "step": 62040 }, { "epoch": 17.61282997445359, "grad_norm": 0.433972030878067, "learning_rate": 8.239483394833949e-05, "loss": 0.011287082731723786, "step": 62050 }, { "epoch": 17.615668464376952, "grad_norm": 4.62260627746582, "learning_rate": 8.239199545841613e-05, "loss": 0.009516490250825882, "step": 62060 }, { "epoch": 17.61850695430031, "grad_norm": 1.1186704635620117, "learning_rate": 8.238915696849277e-05, "loss": 0.018116609752178194, "step": 62070 }, { "epoch": 17.621345444223675, "grad_norm": 0.5700004696846008, "learning_rate": 8.23863184785694e-05, "loss": 0.012846747040748596, "step": 62080 }, { "epoch": 17.624183934147034, "grad_norm": 1.4697800874710083, "learning_rate": 8.238347998864604e-05, "loss": 0.01913735270500183, "step": 62090 }, { "epoch": 17.627022424070393, "grad_norm": 0.19675928354263306, "learning_rate": 8.23806414987227e-05, "loss": 0.01101551204919815, "step": 62100 }, { "epoch": 17.629860913993756, "grad_norm": 8.082877159118652, "learning_rate": 8.237780300879932e-05, "loss": 0.02088247984647751, "step": 62110 }, { "epoch": 17.632699403917115, "grad_norm": 0.03535906970500946, "learning_rate": 8.237496451887596e-05, "loss": 0.007874426245689393, "step": 62120 }, { "epoch": 17.63553789384048, "grad_norm": 2.2687647342681885, "learning_rate": 8.23721260289526e-05, "loss": 0.005813761800527573, "step": 62130 }, { "epoch": 17.638376383763838, "grad_norm": 0.6254578232765198, "learning_rate": 8.236928753902923e-05, "loss": 0.01222822517156601, "step": 62140 }, { "epoch": 17.641214873687197, "grad_norm": 3.6832480430603027, "learning_rate": 8.236644904910587e-05, "loss": 0.011981593817472458, "step": 62150 }, { "epoch": 17.64405336361056, "grad_norm": 7.011091709136963, "learning_rate": 8.236361055918251e-05, "loss": 0.03237915337085724, "step": 62160 }, { "epoch": 17.64689185353392, "grad_norm": 0.2354758381843567, "learning_rate": 8.236077206925916e-05, "loss": 0.00880434289574623, "step": 62170 }, { "epoch": 17.649730343457282, "grad_norm": 0.10418807715177536, "learning_rate": 8.23579335793358e-05, "loss": 0.017135900259017945, "step": 62180 }, { "epoch": 17.65256883338064, "grad_norm": 0.14549130201339722, "learning_rate": 8.235509508941244e-05, "loss": 0.018862779438495635, "step": 62190 }, { "epoch": 17.655407323304, "grad_norm": 5.672666072845459, "learning_rate": 8.235225659948908e-05, "loss": 0.016700644791126252, "step": 62200 }, { "epoch": 17.658245813227364, "grad_norm": 1.2123560905456543, "learning_rate": 8.234941810956571e-05, "loss": 0.006231961399316787, "step": 62210 }, { "epoch": 17.661084303150723, "grad_norm": 0.22028976678848267, "learning_rate": 8.234657961964235e-05, "loss": 0.02142426371574402, "step": 62220 }, { "epoch": 17.663922793074086, "grad_norm": 0.7124423980712891, "learning_rate": 8.2343741129719e-05, "loss": 0.020812688767910002, "step": 62230 }, { "epoch": 17.666761282997445, "grad_norm": 2.0622799396514893, "learning_rate": 8.234090263979563e-05, "loss": 0.019151614606380464, "step": 62240 }, { "epoch": 17.669599772920805, "grad_norm": 8.46364974975586, "learning_rate": 8.233806414987227e-05, "loss": 0.009067463874816894, "step": 62250 }, { "epoch": 17.672438262844167, "grad_norm": 12.550169944763184, "learning_rate": 8.233522565994891e-05, "loss": 0.01802738904953003, "step": 62260 }, { "epoch": 17.675276752767527, "grad_norm": 2.360891819000244, "learning_rate": 8.233238717002554e-05, "loss": 0.013236746191978455, "step": 62270 }, { "epoch": 17.67811524269089, "grad_norm": 5.366095542907715, "learning_rate": 8.232954868010218e-05, "loss": 0.01821562796831131, "step": 62280 }, { "epoch": 17.68095373261425, "grad_norm": 4.150077819824219, "learning_rate": 8.232671019017883e-05, "loss": 0.009263377636671066, "step": 62290 }, { "epoch": 17.68379222253761, "grad_norm": 11.164888381958008, "learning_rate": 8.232387170025547e-05, "loss": 0.024754358828067778, "step": 62300 }, { "epoch": 17.68663071246097, "grad_norm": 3.0886237621307373, "learning_rate": 8.232103321033211e-05, "loss": 0.026230931282043457, "step": 62310 }, { "epoch": 17.68946920238433, "grad_norm": 0.47652602195739746, "learning_rate": 8.231819472040875e-05, "loss": 0.011119580268859864, "step": 62320 }, { "epoch": 17.692307692307693, "grad_norm": 12.686592102050781, "learning_rate": 8.231535623048539e-05, "loss": 0.042090407013893126, "step": 62330 }, { "epoch": 17.695146182231053, "grad_norm": 14.014668464660645, "learning_rate": 8.231251774056202e-05, "loss": 0.02315245121717453, "step": 62340 }, { "epoch": 17.697984672154412, "grad_norm": 11.131443977355957, "learning_rate": 8.230967925063866e-05, "loss": 0.015998825430870056, "step": 62350 }, { "epoch": 17.700823162077775, "grad_norm": 8.347711563110352, "learning_rate": 8.23068407607153e-05, "loss": 0.017000842094421386, "step": 62360 }, { "epoch": 17.703661652001134, "grad_norm": 1.2479193210601807, "learning_rate": 8.230400227079194e-05, "loss": 0.023586955666542054, "step": 62370 }, { "epoch": 17.706500141924497, "grad_norm": 0.149809792637825, "learning_rate": 8.230116378086858e-05, "loss": 0.020341561734676362, "step": 62380 }, { "epoch": 17.709338631847857, "grad_norm": 3.1605405807495117, "learning_rate": 8.229832529094523e-05, "loss": 0.008027401566505433, "step": 62390 }, { "epoch": 17.71217712177122, "grad_norm": 0.043816469609737396, "learning_rate": 8.229548680102185e-05, "loss": 0.018761545419692993, "step": 62400 }, { "epoch": 17.71501561169458, "grad_norm": 3.1965885162353516, "learning_rate": 8.22926483110985e-05, "loss": 0.012146466225385667, "step": 62410 }, { "epoch": 17.717854101617938, "grad_norm": 0.9222743511199951, "learning_rate": 8.228980982117514e-05, "loss": 0.022021056711673738, "step": 62420 }, { "epoch": 17.7206925915413, "grad_norm": 0.09895863384008408, "learning_rate": 8.228697133125178e-05, "loss": 0.010440734028816224, "step": 62430 }, { "epoch": 17.72353108146466, "grad_norm": 0.3722352385520935, "learning_rate": 8.228413284132842e-05, "loss": 0.014222045242786408, "step": 62440 }, { "epoch": 17.726369571388023, "grad_norm": 0.7082600593566895, "learning_rate": 8.228129435140506e-05, "loss": 0.0062605500221252445, "step": 62450 }, { "epoch": 17.729208061311382, "grad_norm": 2.620187759399414, "learning_rate": 8.22784558614817e-05, "loss": 0.021380119025707245, "step": 62460 }, { "epoch": 17.732046551234742, "grad_norm": 6.925971984863281, "learning_rate": 8.227561737155833e-05, "loss": 0.011232168972492218, "step": 62470 }, { "epoch": 17.734885041158105, "grad_norm": 10.342658042907715, "learning_rate": 8.227277888163497e-05, "loss": 0.02125885784626007, "step": 62480 }, { "epoch": 17.737723531081464, "grad_norm": 8.710734367370605, "learning_rate": 8.226994039171161e-05, "loss": 0.02024710178375244, "step": 62490 }, { "epoch": 17.740562021004827, "grad_norm": 2.511924982070923, "learning_rate": 8.226710190178825e-05, "loss": 0.010098820179700851, "step": 62500 }, { "epoch": 17.740562021004827, "eval_accuracy": 0.970687352959878, "eval_loss": 0.09837010502815247, "eval_runtime": 36.6606, "eval_samples_per_second": 428.989, "eval_steps_per_second": 6.71, "step": 62500 }, { "epoch": 17.743400510928186, "grad_norm": 0.12733201682567596, "learning_rate": 8.22642634118649e-05, "loss": 0.008923172950744629, "step": 62510 }, { "epoch": 17.746239000851546, "grad_norm": 0.32851263880729675, "learning_rate": 8.226142492194154e-05, "loss": 0.0319708526134491, "step": 62520 }, { "epoch": 17.74907749077491, "grad_norm": 2.098435163497925, "learning_rate": 8.225858643201816e-05, "loss": 0.026207658648490905, "step": 62530 }, { "epoch": 17.751915980698268, "grad_norm": 8.043654441833496, "learning_rate": 8.22557479420948e-05, "loss": 0.035901430249214175, "step": 62540 }, { "epoch": 17.75475447062163, "grad_norm": 0.34832602739334106, "learning_rate": 8.225290945217145e-05, "loss": 0.009800473600625992, "step": 62550 }, { "epoch": 17.75759296054499, "grad_norm": 1.8256653547286987, "learning_rate": 8.225007096224809e-05, "loss": 0.01510208547115326, "step": 62560 }, { "epoch": 17.76043145046835, "grad_norm": 4.598666667938232, "learning_rate": 8.224723247232473e-05, "loss": 0.014049448072910309, "step": 62570 }, { "epoch": 17.763269940391712, "grad_norm": 13.579463005065918, "learning_rate": 8.224439398240137e-05, "loss": 0.028291037678718566, "step": 62580 }, { "epoch": 17.76610843031507, "grad_norm": 1.1781998872756958, "learning_rate": 8.224155549247801e-05, "loss": 0.02119150310754776, "step": 62590 }, { "epoch": 17.768946920238434, "grad_norm": 1.2915810346603394, "learning_rate": 8.223871700255464e-05, "loss": 0.01666664779186249, "step": 62600 }, { "epoch": 17.771785410161794, "grad_norm": 2.842689037322998, "learning_rate": 8.223587851263128e-05, "loss": 0.00673753023147583, "step": 62610 }, { "epoch": 17.774623900085153, "grad_norm": 0.541082501411438, "learning_rate": 8.223304002270792e-05, "loss": 0.021514469385147096, "step": 62620 }, { "epoch": 17.777462390008516, "grad_norm": 1.2891364097595215, "learning_rate": 8.223020153278456e-05, "loss": 0.023284460604190826, "step": 62630 }, { "epoch": 17.780300879931875, "grad_norm": 0.7459471821784973, "learning_rate": 8.22273630428612e-05, "loss": 0.018202440440654756, "step": 62640 }, { "epoch": 17.78313936985524, "grad_norm": 5.1306562423706055, "learning_rate": 8.222452455293785e-05, "loss": 0.025107717514038085, "step": 62650 }, { "epoch": 17.785977859778598, "grad_norm": 10.151063919067383, "learning_rate": 8.222196991200681e-05, "loss": 0.032685041427612305, "step": 62660 }, { "epoch": 17.788816349701957, "grad_norm": 2.75439190864563, "learning_rate": 8.221913142208345e-05, "loss": 0.012067785859107972, "step": 62670 }, { "epoch": 17.79165483962532, "grad_norm": 0.20659947395324707, "learning_rate": 8.22162929321601e-05, "loss": 0.00535687580704689, "step": 62680 }, { "epoch": 17.79449332954868, "grad_norm": 0.22523939609527588, "learning_rate": 8.221345444223674e-05, "loss": 0.006931957602500915, "step": 62690 }, { "epoch": 17.797331819472042, "grad_norm": 9.370674133300781, "learning_rate": 8.221061595231338e-05, "loss": 0.009420321881771087, "step": 62700 }, { "epoch": 17.8001703093954, "grad_norm": 5.420212745666504, "learning_rate": 8.220777746239001e-05, "loss": 0.006687845289707184, "step": 62710 }, { "epoch": 17.80300879931876, "grad_norm": 6.059547424316406, "learning_rate": 8.220493897246665e-05, "loss": 0.013150055706501008, "step": 62720 }, { "epoch": 17.805847289242124, "grad_norm": 0.3454117178916931, "learning_rate": 8.220210048254329e-05, "loss": 0.012862028181552887, "step": 62730 }, { "epoch": 17.808685779165483, "grad_norm": 8.585058212280273, "learning_rate": 8.219926199261993e-05, "loss": 0.012774714827537536, "step": 62740 }, { "epoch": 17.811524269088846, "grad_norm": 1.054860234260559, "learning_rate": 8.219642350269657e-05, "loss": 0.006054201349616051, "step": 62750 }, { "epoch": 17.814362759012205, "grad_norm": 2.0199360847473145, "learning_rate": 8.219358501277321e-05, "loss": 0.008802928030490875, "step": 62760 }, { "epoch": 17.817201248935568, "grad_norm": 0.40830835700035095, "learning_rate": 8.219074652284986e-05, "loss": 0.03096626400947571, "step": 62770 }, { "epoch": 17.820039738858927, "grad_norm": 0.42792853713035583, "learning_rate": 8.218790803292648e-05, "loss": 0.015305769443511964, "step": 62780 }, { "epoch": 17.822878228782287, "grad_norm": 1.2999038696289062, "learning_rate": 8.218506954300312e-05, "loss": 0.025196939706802368, "step": 62790 }, { "epoch": 17.82571671870565, "grad_norm": 1.036691665649414, "learning_rate": 8.218223105307977e-05, "loss": 0.01746339797973633, "step": 62800 }, { "epoch": 17.82855520862901, "grad_norm": 0.45592790842056274, "learning_rate": 8.217939256315641e-05, "loss": 0.02169123589992523, "step": 62810 }, { "epoch": 17.83139369855237, "grad_norm": 2.6539998054504395, "learning_rate": 8.217655407323305e-05, "loss": 0.0201306477189064, "step": 62820 }, { "epoch": 17.83423218847573, "grad_norm": 7.563229560852051, "learning_rate": 8.217371558330969e-05, "loss": 0.02674405574798584, "step": 62830 }, { "epoch": 17.83707067839909, "grad_norm": 11.221420288085938, "learning_rate": 8.217087709338632e-05, "loss": 0.021203263103961943, "step": 62840 }, { "epoch": 17.839909168322453, "grad_norm": 8.657453536987305, "learning_rate": 8.216803860346296e-05, "loss": 0.02190045714378357, "step": 62850 }, { "epoch": 17.842747658245813, "grad_norm": 1.6234456300735474, "learning_rate": 8.21652001135396e-05, "loss": 0.01436804085969925, "step": 62860 }, { "epoch": 17.845586148169176, "grad_norm": 0.777674674987793, "learning_rate": 8.216236162361624e-05, "loss": 0.015128807723522186, "step": 62870 }, { "epoch": 17.848424638092535, "grad_norm": 2.5748608112335205, "learning_rate": 8.215952313369288e-05, "loss": 0.02749258577823639, "step": 62880 }, { "epoch": 17.851263128015894, "grad_norm": 2.6573641300201416, "learning_rate": 8.215668464376952e-05, "loss": 0.01712031364440918, "step": 62890 }, { "epoch": 17.854101617939257, "grad_norm": 2.308138370513916, "learning_rate": 8.215384615384615e-05, "loss": 0.019389352202415465, "step": 62900 }, { "epoch": 17.856940107862616, "grad_norm": 0.4626794457435608, "learning_rate": 8.21510076639228e-05, "loss": 0.011457406729459763, "step": 62910 }, { "epoch": 17.85977859778598, "grad_norm": 0.8425937294960022, "learning_rate": 8.214816917399944e-05, "loss": 0.03470757007598877, "step": 62920 }, { "epoch": 17.86261708770934, "grad_norm": 0.1975494772195816, "learning_rate": 8.214533068407608e-05, "loss": 0.016960722208023072, "step": 62930 }, { "epoch": 17.865455577632698, "grad_norm": 7.9598212242126465, "learning_rate": 8.21424921941527e-05, "loss": 0.02543293237686157, "step": 62940 }, { "epoch": 17.86829406755606, "grad_norm": 0.8338761925697327, "learning_rate": 8.213965370422936e-05, "loss": 0.019624155759811402, "step": 62950 }, { "epoch": 17.87113255747942, "grad_norm": 10.346158981323242, "learning_rate": 8.2136815214306e-05, "loss": 0.011635848879814148, "step": 62960 }, { "epoch": 17.873971047402783, "grad_norm": 1.3004969358444214, "learning_rate": 8.213397672438263e-05, "loss": 0.01685110330581665, "step": 62970 }, { "epoch": 17.876809537326142, "grad_norm": 0.19285938143730164, "learning_rate": 8.213113823445927e-05, "loss": 0.013686656951904297, "step": 62980 }, { "epoch": 17.8796480272495, "grad_norm": 9.997819900512695, "learning_rate": 8.212829974453591e-05, "loss": 0.02674681544303894, "step": 62990 }, { "epoch": 17.882486517172865, "grad_norm": 10.04014778137207, "learning_rate": 8.212546125461254e-05, "loss": 0.027532917261123658, "step": 63000 }, { "epoch": 17.882486517172865, "eval_accuracy": 0.9703694283715902, "eval_loss": 0.09812287241220474, "eval_runtime": 33.1799, "eval_samples_per_second": 473.991, "eval_steps_per_second": 7.414, "step": 63000 }, { "epoch": 17.885325007096224, "grad_norm": 5.120038986206055, "learning_rate": 8.21226227646892e-05, "loss": 0.026342004537582397, "step": 63010 }, { "epoch": 17.888163497019587, "grad_norm": 1.2349797487258911, "learning_rate": 8.211978427476584e-05, "loss": 0.0392853856086731, "step": 63020 }, { "epoch": 17.891001986942946, "grad_norm": 3.915405035018921, "learning_rate": 8.211694578484246e-05, "loss": 0.012890052795410157, "step": 63030 }, { "epoch": 17.893840476866306, "grad_norm": 6.109304428100586, "learning_rate": 8.21141072949191e-05, "loss": 0.02629970908164978, "step": 63040 }, { "epoch": 17.89667896678967, "grad_norm": 1.5730305910110474, "learning_rate": 8.211126880499575e-05, "loss": 0.011992833763360976, "step": 63050 }, { "epoch": 17.899517456713028, "grad_norm": 1.0578458309173584, "learning_rate": 8.210843031507239e-05, "loss": 0.008684511482715606, "step": 63060 }, { "epoch": 17.90235594663639, "grad_norm": 2.258746385574341, "learning_rate": 8.210559182514901e-05, "loss": 0.02483682185411453, "step": 63070 }, { "epoch": 17.90519443655975, "grad_norm": 13.63537883758545, "learning_rate": 8.210275333522567e-05, "loss": 0.023638784885406494, "step": 63080 }, { "epoch": 17.90803292648311, "grad_norm": 0.9129111170768738, "learning_rate": 8.209991484530231e-05, "loss": 0.012398505210876464, "step": 63090 }, { "epoch": 17.910871416406472, "grad_norm": 0.5434322357177734, "learning_rate": 8.209707635537894e-05, "loss": 0.020840360224246977, "step": 63100 }, { "epoch": 17.91370990632983, "grad_norm": 0.819467306137085, "learning_rate": 8.209423786545558e-05, "loss": 0.013574461638927459, "step": 63110 }, { "epoch": 17.916548396253194, "grad_norm": 0.09551095962524414, "learning_rate": 8.209139937553222e-05, "loss": 0.013628153502941132, "step": 63120 }, { "epoch": 17.919386886176554, "grad_norm": 0.28355252742767334, "learning_rate": 8.208856088560885e-05, "loss": 0.028093889355659485, "step": 63130 }, { "epoch": 17.922225376099917, "grad_norm": 2.8406643867492676, "learning_rate": 8.208572239568549e-05, "loss": 0.006975986808538437, "step": 63140 }, { "epoch": 17.925063866023276, "grad_norm": 2.0798957347869873, "learning_rate": 8.208288390576215e-05, "loss": 0.009932101517915726, "step": 63150 }, { "epoch": 17.927902355946635, "grad_norm": 1.6234270334243774, "learning_rate": 8.208004541583877e-05, "loss": 0.022734537720680237, "step": 63160 }, { "epoch": 17.930740845869998, "grad_norm": 1.371840000152588, "learning_rate": 8.207720692591542e-05, "loss": 0.013514432311058044, "step": 63170 }, { "epoch": 17.933579335793358, "grad_norm": 0.549065351486206, "learning_rate": 8.207436843599206e-05, "loss": 0.01769469827413559, "step": 63180 }, { "epoch": 17.93641782571672, "grad_norm": 1.0799694061279297, "learning_rate": 8.20715299460687e-05, "loss": 0.0313946545124054, "step": 63190 }, { "epoch": 17.93925631564008, "grad_norm": 0.45932692289352417, "learning_rate": 8.206869145614533e-05, "loss": 0.027130329608917238, "step": 63200 }, { "epoch": 17.94209480556344, "grad_norm": 0.5596383810043335, "learning_rate": 8.206585296622198e-05, "loss": 0.05125044584274292, "step": 63210 }, { "epoch": 17.944933295486802, "grad_norm": 3.2851181030273438, "learning_rate": 8.206301447629862e-05, "loss": 0.01363530158996582, "step": 63220 }, { "epoch": 17.94777178541016, "grad_norm": 0.3789868950843811, "learning_rate": 8.206017598637525e-05, "loss": 0.017117464542388917, "step": 63230 }, { "epoch": 17.950610275333524, "grad_norm": 1.1124589443206787, "learning_rate": 8.205733749645189e-05, "loss": 0.023749449849128725, "step": 63240 }, { "epoch": 17.953448765256883, "grad_norm": 0.6287662386894226, "learning_rate": 8.205449900652853e-05, "loss": 0.015010283887386322, "step": 63250 }, { "epoch": 17.956287255180243, "grad_norm": 3.7761833667755127, "learning_rate": 8.205166051660516e-05, "loss": 0.014520318806171417, "step": 63260 }, { "epoch": 17.959125745103606, "grad_norm": 1.4113572835922241, "learning_rate": 8.20488220266818e-05, "loss": 0.005736036971211433, "step": 63270 }, { "epoch": 17.961964235026965, "grad_norm": 1.1359829902648926, "learning_rate": 8.204598353675846e-05, "loss": 0.011989904940128327, "step": 63280 }, { "epoch": 17.964802724950328, "grad_norm": 1.6833586692810059, "learning_rate": 8.204314504683508e-05, "loss": 0.00999864786863327, "step": 63290 }, { "epoch": 17.967641214873687, "grad_norm": 9.403002738952637, "learning_rate": 8.204030655691173e-05, "loss": 0.021844273805618285, "step": 63300 }, { "epoch": 17.970479704797047, "grad_norm": 8.982924461364746, "learning_rate": 8.203746806698837e-05, "loss": 0.033903712034225465, "step": 63310 }, { "epoch": 17.97331819472041, "grad_norm": 7.348380088806152, "learning_rate": 8.203462957706501e-05, "loss": 0.009591522067785263, "step": 63320 }, { "epoch": 17.97615668464377, "grad_norm": 2.2552490234375, "learning_rate": 8.203179108714164e-05, "loss": 0.04505495429039001, "step": 63330 }, { "epoch": 17.97899517456713, "grad_norm": 0.357238233089447, "learning_rate": 8.202895259721829e-05, "loss": 0.023945654928684234, "step": 63340 }, { "epoch": 17.98183366449049, "grad_norm": 3.6998040676116943, "learning_rate": 8.202611410729493e-05, "loss": 0.025970709323883057, "step": 63350 }, { "epoch": 17.98467215441385, "grad_norm": 13.53397274017334, "learning_rate": 8.202327561737156e-05, "loss": 0.02638135552406311, "step": 63360 }, { "epoch": 17.987510644337213, "grad_norm": 1.560530662536621, "learning_rate": 8.20204371274482e-05, "loss": 0.02765943706035614, "step": 63370 }, { "epoch": 17.990349134260573, "grad_norm": 2.5164995193481445, "learning_rate": 8.201759863752484e-05, "loss": 0.022004860639572143, "step": 63380 }, { "epoch": 17.993187624183935, "grad_norm": 0.08475080132484436, "learning_rate": 8.201476014760147e-05, "loss": 0.007116524130105972, "step": 63390 }, { "epoch": 17.996026114107295, "grad_norm": 5.503971099853516, "learning_rate": 8.201192165767811e-05, "loss": 0.01168704628944397, "step": 63400 }, { "epoch": 17.998864604030654, "grad_norm": 0.17828251421451569, "learning_rate": 8.200908316775477e-05, "loss": 0.0051066108047962185, "step": 63410 }, { "epoch": 18.001703093954017, "grad_norm": 5.596116542816162, "learning_rate": 8.20062446778314e-05, "loss": 0.02582048773765564, "step": 63420 }, { "epoch": 18.004541583877376, "grad_norm": 1.7009128332138062, "learning_rate": 8.200340618790804e-05, "loss": 0.0115012988448143, "step": 63430 }, { "epoch": 18.00738007380074, "grad_norm": 0.08834262937307358, "learning_rate": 8.200056769798468e-05, "loss": 0.014110808074474335, "step": 63440 }, { "epoch": 18.0102185637241, "grad_norm": 3.2692201137542725, "learning_rate": 8.199772920806132e-05, "loss": 0.004589985683560371, "step": 63450 }, { "epoch": 18.013057053647458, "grad_norm": 5.552335739135742, "learning_rate": 8.199489071813795e-05, "loss": 0.005204660445451736, "step": 63460 }, { "epoch": 18.01589554357082, "grad_norm": 13.209425926208496, "learning_rate": 8.199205222821459e-05, "loss": 0.016594859957695007, "step": 63470 }, { "epoch": 18.01873403349418, "grad_norm": 7.932065010070801, "learning_rate": 8.198921373829124e-05, "loss": 0.009701775759458542, "step": 63480 }, { "epoch": 18.021572523417543, "grad_norm": 3.5568270683288574, "learning_rate": 8.198637524836787e-05, "loss": 0.01052877977490425, "step": 63490 }, { "epoch": 18.024411013340902, "grad_norm": 0.2994891107082367, "learning_rate": 8.198353675844451e-05, "loss": 0.02152692973613739, "step": 63500 }, { "epoch": 18.024411013340902, "eval_accuracy": 0.9756469765371654, "eval_loss": 0.08282523602247238, "eval_runtime": 36.0975, "eval_samples_per_second": 435.681, "eval_steps_per_second": 6.815, "step": 63500 }, { "epoch": 18.02724950326426, "grad_norm": 0.4344005584716797, "learning_rate": 8.198069826852115e-05, "loss": 0.006046795099973678, "step": 63510 }, { "epoch": 18.030087993187625, "grad_norm": 1.226027011871338, "learning_rate": 8.197785977859778e-05, "loss": 0.016422252357006072, "step": 63520 }, { "epoch": 18.032926483110984, "grad_norm": 0.13570503890514374, "learning_rate": 8.197502128867442e-05, "loss": 0.00693868100643158, "step": 63530 }, { "epoch": 18.035764973034347, "grad_norm": 6.946529388427734, "learning_rate": 8.197218279875108e-05, "loss": 0.014408622682094575, "step": 63540 }, { "epoch": 18.038603462957706, "grad_norm": 4.519002437591553, "learning_rate": 8.19693443088277e-05, "loss": 0.008696480840444564, "step": 63550 }, { "epoch": 18.04144195288107, "grad_norm": 0.05053959786891937, "learning_rate": 8.196650581890435e-05, "loss": 0.021762165427207946, "step": 63560 }, { "epoch": 18.04428044280443, "grad_norm": 0.2058885395526886, "learning_rate": 8.196366732898099e-05, "loss": 0.015555308759212494, "step": 63570 }, { "epoch": 18.047118932727788, "grad_norm": 1.3566240072250366, "learning_rate": 8.196082883905763e-05, "loss": 0.010940085351467132, "step": 63580 }, { "epoch": 18.04995742265115, "grad_norm": 8.57244873046875, "learning_rate": 8.195799034913426e-05, "loss": 0.009846068173646926, "step": 63590 }, { "epoch": 18.05279591257451, "grad_norm": 0.3414233922958374, "learning_rate": 8.19551518592109e-05, "loss": 0.019746306538581847, "step": 63600 }, { "epoch": 18.055634402497873, "grad_norm": 0.6710842251777649, "learning_rate": 8.195231336928755e-05, "loss": 0.009552419185638428, "step": 63610 }, { "epoch": 18.058472892421232, "grad_norm": 4.083414554595947, "learning_rate": 8.194947487936418e-05, "loss": 0.017618247866630556, "step": 63620 }, { "epoch": 18.06131138234459, "grad_norm": 1.370925784111023, "learning_rate": 8.194663638944082e-05, "loss": 0.03877753019332886, "step": 63630 }, { "epoch": 18.064149872267954, "grad_norm": 11.761861801147461, "learning_rate": 8.194379789951746e-05, "loss": 0.03358525037765503, "step": 63640 }, { "epoch": 18.066988362191314, "grad_norm": 0.19323250651359558, "learning_rate": 8.194095940959409e-05, "loss": 0.02412593364715576, "step": 63650 }, { "epoch": 18.069826852114677, "grad_norm": 3.0223143100738525, "learning_rate": 8.193812091967073e-05, "loss": 0.010805560648441315, "step": 63660 }, { "epoch": 18.072665342038036, "grad_norm": 1.6559028625488281, "learning_rate": 8.193528242974738e-05, "loss": 0.00976199135184288, "step": 63670 }, { "epoch": 18.075503831961395, "grad_norm": 0.031026296317577362, "learning_rate": 8.193244393982402e-05, "loss": 0.018979427218437196, "step": 63680 }, { "epoch": 18.078342321884758, "grad_norm": 1.3790242671966553, "learning_rate": 8.192960544990066e-05, "loss": 0.00535089336335659, "step": 63690 }, { "epoch": 18.081180811808117, "grad_norm": 1.3481518030166626, "learning_rate": 8.19267669599773e-05, "loss": 0.021392324566841127, "step": 63700 }, { "epoch": 18.08401930173148, "grad_norm": 15.175721168518066, "learning_rate": 8.192392847005394e-05, "loss": 0.019316455721855162, "step": 63710 }, { "epoch": 18.08685779165484, "grad_norm": 2.0424211025238037, "learning_rate": 8.192108998013057e-05, "loss": 0.014048132300376891, "step": 63720 }, { "epoch": 18.0896962815782, "grad_norm": 3.0000364780426025, "learning_rate": 8.191825149020721e-05, "loss": 0.01618514358997345, "step": 63730 }, { "epoch": 18.092534771501562, "grad_norm": 0.10339687764644623, "learning_rate": 8.191541300028385e-05, "loss": 0.025193285942077637, "step": 63740 }, { "epoch": 18.09537326142492, "grad_norm": 4.3130693435668945, "learning_rate": 8.191257451036049e-05, "loss": 0.02877337336540222, "step": 63750 }, { "epoch": 18.098211751348284, "grad_norm": 14.694103240966797, "learning_rate": 8.190973602043713e-05, "loss": 0.037304741144180295, "step": 63760 }, { "epoch": 18.101050241271643, "grad_norm": 6.1827826499938965, "learning_rate": 8.190689753051378e-05, "loss": 0.01221163421869278, "step": 63770 }, { "epoch": 18.103888731195003, "grad_norm": 1.5237185955047607, "learning_rate": 8.19040590405904e-05, "loss": 0.015175989270210266, "step": 63780 }, { "epoch": 18.106727221118366, "grad_norm": 1.40510892868042, "learning_rate": 8.190122055066704e-05, "loss": 0.024872441589832307, "step": 63790 }, { "epoch": 18.109565711041725, "grad_norm": 13.733448028564453, "learning_rate": 8.189838206074369e-05, "loss": 0.01799137145280838, "step": 63800 }, { "epoch": 18.112404200965088, "grad_norm": 0.3721771836280823, "learning_rate": 8.189554357082033e-05, "loss": 0.015046654641628266, "step": 63810 }, { "epoch": 18.115242690888447, "grad_norm": 6.451157569885254, "learning_rate": 8.189270508089697e-05, "loss": 0.009206238389015197, "step": 63820 }, { "epoch": 18.118081180811807, "grad_norm": 1.7429025173187256, "learning_rate": 8.188986659097361e-05, "loss": 0.012143295258283615, "step": 63830 }, { "epoch": 18.12091967073517, "grad_norm": 0.25729095935821533, "learning_rate": 8.188702810105024e-05, "loss": 0.00805310532450676, "step": 63840 }, { "epoch": 18.12375816065853, "grad_norm": 1.8799551725387573, "learning_rate": 8.188418961112688e-05, "loss": 0.00404946431517601, "step": 63850 }, { "epoch": 18.12659665058189, "grad_norm": 0.11569642275571823, "learning_rate": 8.188135112120352e-05, "loss": 0.00835409089922905, "step": 63860 }, { "epoch": 18.12943514050525, "grad_norm": 0.12882059812545776, "learning_rate": 8.187851263128016e-05, "loss": 0.02296677827835083, "step": 63870 }, { "epoch": 18.13227363042861, "grad_norm": 0.41811925172805786, "learning_rate": 8.18756741413568e-05, "loss": 0.004974915087223053, "step": 63880 }, { "epoch": 18.135112120351973, "grad_norm": 1.1312847137451172, "learning_rate": 8.187283565143345e-05, "loss": 0.010475464165210724, "step": 63890 }, { "epoch": 18.137950610275333, "grad_norm": 0.7560549974441528, "learning_rate": 8.186999716151009e-05, "loss": 0.010284463316202164, "step": 63900 }, { "epoch": 18.140789100198695, "grad_norm": 5.250003337860107, "learning_rate": 8.186715867158671e-05, "loss": 0.013701197504997254, "step": 63910 }, { "epoch": 18.143627590122055, "grad_norm": 5.484339237213135, "learning_rate": 8.186432018166336e-05, "loss": 0.0056212909519672396, "step": 63920 }, { "epoch": 18.146466080045414, "grad_norm": 3.989210367202759, "learning_rate": 8.186148169174e-05, "loss": 0.018411792814731598, "step": 63930 }, { "epoch": 18.149304569968777, "grad_norm": 3.137714385986328, "learning_rate": 8.185864320181664e-05, "loss": 0.01878328025341034, "step": 63940 }, { "epoch": 18.152143059892136, "grad_norm": 1.649335265159607, "learning_rate": 8.185580471189328e-05, "loss": 0.011023390293121337, "step": 63950 }, { "epoch": 18.1549815498155, "grad_norm": 9.756315231323242, "learning_rate": 8.185296622196992e-05, "loss": 0.017147386074066163, "step": 63960 }, { "epoch": 18.15782003973886, "grad_norm": 9.087162017822266, "learning_rate": 8.185012773204655e-05, "loss": 0.0241706982254982, "step": 63970 }, { "epoch": 18.16065852966222, "grad_norm": 8.14655876159668, "learning_rate": 8.184728924212319e-05, "loss": 0.007172031700611115, "step": 63980 }, { "epoch": 18.16349701958558, "grad_norm": 1.6742770671844482, "learning_rate": 8.184445075219983e-05, "loss": 0.03243105411529541, "step": 63990 }, { "epoch": 18.16633550950894, "grad_norm": 6.08148717880249, "learning_rate": 8.184161226227647e-05, "loss": 0.02645944356918335, "step": 64000 }, { "epoch": 18.16633550950894, "eval_accuracy": 0.972849240160234, "eval_loss": 0.09253419935703278, "eval_runtime": 32.7841, "eval_samples_per_second": 479.715, "eval_steps_per_second": 7.504, "step": 64000 }, { "epoch": 18.169173999432303, "grad_norm": 0.7909792065620422, "learning_rate": 8.183877377235311e-05, "loss": 0.007281602919101715, "step": 64010 }, { "epoch": 18.172012489355662, "grad_norm": 4.5595703125, "learning_rate": 8.183593528242976e-05, "loss": 0.010615143924951553, "step": 64020 }, { "epoch": 18.174850979279025, "grad_norm": 1.6653521060943604, "learning_rate": 8.18330967925064e-05, "loss": 0.008436542004346848, "step": 64030 }, { "epoch": 18.177689469202384, "grad_norm": 1.3890280723571777, "learning_rate": 8.183025830258303e-05, "loss": 0.020527346432209014, "step": 64040 }, { "epoch": 18.180527959125744, "grad_norm": 4.738671779632568, "learning_rate": 8.182741981265967e-05, "loss": 0.012897318601608277, "step": 64050 }, { "epoch": 18.183366449049107, "grad_norm": 1.9025684595108032, "learning_rate": 8.182458132273631e-05, "loss": 0.017895436286926268, "step": 64060 }, { "epoch": 18.186204938972466, "grad_norm": 0.6886310577392578, "learning_rate": 8.182174283281294e-05, "loss": 0.00899558663368225, "step": 64070 }, { "epoch": 18.18904342889583, "grad_norm": 0.1816594898700714, "learning_rate": 8.181890434288959e-05, "loss": 0.009175996482372283, "step": 64080 }, { "epoch": 18.19188191881919, "grad_norm": 0.7115601301193237, "learning_rate": 8.181606585296623e-05, "loss": 0.005536258965730667, "step": 64090 }, { "epoch": 18.194720408742548, "grad_norm": 0.19168800115585327, "learning_rate": 8.181322736304286e-05, "loss": 0.019043384492397307, "step": 64100 }, { "epoch": 18.19755889866591, "grad_norm": 3.471911907196045, "learning_rate": 8.18103888731195e-05, "loss": 0.004335270449519158, "step": 64110 }, { "epoch": 18.20039738858927, "grad_norm": 0.07795028388500214, "learning_rate": 8.180755038319614e-05, "loss": 0.009142100811004639, "step": 64120 }, { "epoch": 18.203235878512633, "grad_norm": 8.72973346710205, "learning_rate": 8.180471189327278e-05, "loss": 0.02793293595314026, "step": 64130 }, { "epoch": 18.206074368435992, "grad_norm": 3.1181886196136475, "learning_rate": 8.180187340334943e-05, "loss": 0.01871417313814163, "step": 64140 }, { "epoch": 18.20891285835935, "grad_norm": 1.3577665090560913, "learning_rate": 8.179903491342607e-05, "loss": 0.02154628187417984, "step": 64150 }, { "epoch": 18.211751348282714, "grad_norm": 4.973752021789551, "learning_rate": 8.179619642350271e-05, "loss": 0.010517151653766632, "step": 64160 }, { "epoch": 18.214589838206074, "grad_norm": 0.17791198194026947, "learning_rate": 8.179335793357934e-05, "loss": 0.0011845273897051812, "step": 64170 }, { "epoch": 18.217428328129436, "grad_norm": 2.85772442817688, "learning_rate": 8.179051944365598e-05, "loss": 0.014440384507179261, "step": 64180 }, { "epoch": 18.220266818052796, "grad_norm": 1.9365718364715576, "learning_rate": 8.178768095373262e-05, "loss": 0.006885389983654022, "step": 64190 }, { "epoch": 18.223105307976155, "grad_norm": 3.6908276081085205, "learning_rate": 8.178484246380925e-05, "loss": 0.00742960125207901, "step": 64200 }, { "epoch": 18.225943797899518, "grad_norm": 0.2549467086791992, "learning_rate": 8.17820039738859e-05, "loss": 0.02034248411655426, "step": 64210 }, { "epoch": 18.228782287822877, "grad_norm": 0.591435432434082, "learning_rate": 8.177916548396254e-05, "loss": 0.01434108018875122, "step": 64220 }, { "epoch": 18.23162077774624, "grad_norm": 1.156069040298462, "learning_rate": 8.177632699403917e-05, "loss": 0.012024050951004029, "step": 64230 }, { "epoch": 18.2344592676696, "grad_norm": 2.9135138988494873, "learning_rate": 8.177348850411581e-05, "loss": 0.0204189270734787, "step": 64240 }, { "epoch": 18.23729775759296, "grad_norm": 7.187717914581299, "learning_rate": 8.177065001419245e-05, "loss": 0.029043132066726686, "step": 64250 }, { "epoch": 18.240136247516322, "grad_norm": 0.459384948015213, "learning_rate": 8.17678115242691e-05, "loss": 0.02640361487865448, "step": 64260 }, { "epoch": 18.24297473743968, "grad_norm": 1.7785052061080933, "learning_rate": 8.176497303434572e-05, "loss": 0.012384162843227386, "step": 64270 }, { "epoch": 18.245813227363044, "grad_norm": 6.284324645996094, "learning_rate": 8.176213454442238e-05, "loss": 0.01056932657957077, "step": 64280 }, { "epoch": 18.248651717286403, "grad_norm": 6.980228900909424, "learning_rate": 8.175929605449902e-05, "loss": 0.011804678291082383, "step": 64290 }, { "epoch": 18.251490207209763, "grad_norm": 6.617609977722168, "learning_rate": 8.175645756457565e-05, "loss": 0.004404126852750778, "step": 64300 }, { "epoch": 18.254328697133126, "grad_norm": 2.6301016807556152, "learning_rate": 8.175361907465229e-05, "loss": 0.006492830812931061, "step": 64310 }, { "epoch": 18.257167187056485, "grad_norm": 0.39303460717201233, "learning_rate": 8.175078058472893e-05, "loss": 0.005812273174524307, "step": 64320 }, { "epoch": 18.260005676979848, "grad_norm": 3.5707991123199463, "learning_rate": 8.174794209480556e-05, "loss": 0.00630013570189476, "step": 64330 }, { "epoch": 18.262844166903207, "grad_norm": 0.2379271239042282, "learning_rate": 8.174510360488221e-05, "loss": 0.0106409952044487, "step": 64340 }, { "epoch": 18.26568265682657, "grad_norm": 3.261885643005371, "learning_rate": 8.174226511495885e-05, "loss": 0.03130515813827515, "step": 64350 }, { "epoch": 18.26852114674993, "grad_norm": 2.220877170562744, "learning_rate": 8.173942662503548e-05, "loss": 0.011540243029594421, "step": 64360 }, { "epoch": 18.27135963667329, "grad_norm": 2.8791868686676025, "learning_rate": 8.173658813511212e-05, "loss": 0.011170335859060288, "step": 64370 }, { "epoch": 18.27419812659665, "grad_norm": 5.647636890411377, "learning_rate": 8.173374964518876e-05, "loss": 0.01788533329963684, "step": 64380 }, { "epoch": 18.27703661652001, "grad_norm": 0.16027964651584625, "learning_rate": 8.17309111552654e-05, "loss": 0.008350406587123872, "step": 64390 }, { "epoch": 18.279875106443374, "grad_norm": 3.7189574241638184, "learning_rate": 8.172807266534203e-05, "loss": 0.01755886971950531, "step": 64400 }, { "epoch": 18.282713596366733, "grad_norm": 6.565844535827637, "learning_rate": 8.172523417541869e-05, "loss": 0.04768775403499603, "step": 64410 }, { "epoch": 18.285552086290092, "grad_norm": 6.08485746383667, "learning_rate": 8.172239568549533e-05, "loss": 0.034477400779724124, "step": 64420 }, { "epoch": 18.288390576213455, "grad_norm": 12.25684928894043, "learning_rate": 8.171955719557196e-05, "loss": 0.021344895660877227, "step": 64430 }, { "epoch": 18.291229066136815, "grad_norm": 0.9328294992446899, "learning_rate": 8.17167187056486e-05, "loss": 0.034344762563705444, "step": 64440 }, { "epoch": 18.294067556060178, "grad_norm": 1.9570271968841553, "learning_rate": 8.171388021572524e-05, "loss": 0.01506240963935852, "step": 64450 }, { "epoch": 18.296906045983537, "grad_norm": 2.2173566818237305, "learning_rate": 8.171104172580187e-05, "loss": 0.007134252041578293, "step": 64460 }, { "epoch": 18.299744535906896, "grad_norm": 1.131592869758606, "learning_rate": 8.170820323587851e-05, "loss": 0.01459348052740097, "step": 64470 }, { "epoch": 18.30258302583026, "grad_norm": 1.5219777822494507, "learning_rate": 8.170536474595516e-05, "loss": 0.012294764071702958, "step": 64480 }, { "epoch": 18.30542151575362, "grad_norm": 2.5147440433502197, "learning_rate": 8.170252625603179e-05, "loss": 0.030322352051734926, "step": 64490 }, { "epoch": 18.30826000567698, "grad_norm": 0.07464440166950226, "learning_rate": 8.169968776610843e-05, "loss": 0.003058000095188618, "step": 64500 }, { "epoch": 18.30826000567698, "eval_accuracy": 0.9690341451007821, "eval_loss": 0.10368286818265915, "eval_runtime": 34.5609, "eval_samples_per_second": 455.051, "eval_steps_per_second": 7.118, "step": 64500 }, { "epoch": 18.31109849560034, "grad_norm": 4.478222370147705, "learning_rate": 8.169684927618507e-05, "loss": 0.032835769653320315, "step": 64510 }, { "epoch": 18.3139369855237, "grad_norm": 0.5021949410438538, "learning_rate": 8.169401078626172e-05, "loss": 0.010996091365814208, "step": 64520 }, { "epoch": 18.316775475447063, "grad_norm": 0.23742152750492096, "learning_rate": 8.169117229633834e-05, "loss": 0.020415955781936647, "step": 64530 }, { "epoch": 18.319613965370422, "grad_norm": 0.10383354127407074, "learning_rate": 8.1688333806415e-05, "loss": 0.013141006231307983, "step": 64540 }, { "epoch": 18.322452455293785, "grad_norm": 2.263467311859131, "learning_rate": 8.168549531649164e-05, "loss": 0.0028465628623962403, "step": 64550 }, { "epoch": 18.325290945217144, "grad_norm": 3.129889726638794, "learning_rate": 8.168265682656827e-05, "loss": 0.006402218341827392, "step": 64560 }, { "epoch": 18.328129435140504, "grad_norm": 3.98580002784729, "learning_rate": 8.167981833664491e-05, "loss": 0.010013539344072342, "step": 64570 }, { "epoch": 18.330967925063867, "grad_norm": 0.1880096048116684, "learning_rate": 8.167697984672155e-05, "loss": 0.0032391913235187532, "step": 64580 }, { "epoch": 18.333806414987226, "grad_norm": 2.945850133895874, "learning_rate": 8.167414135679818e-05, "loss": 0.009317662566900253, "step": 64590 }, { "epoch": 18.33664490491059, "grad_norm": 7.553272724151611, "learning_rate": 8.167130286687482e-05, "loss": 0.008638419210910797, "step": 64600 }, { "epoch": 18.339483394833948, "grad_norm": 3.5514140129089355, "learning_rate": 8.166846437695147e-05, "loss": 0.013696661591529847, "step": 64610 }, { "epoch": 18.342321884757308, "grad_norm": 10.930365562438965, "learning_rate": 8.16656258870281e-05, "loss": 0.01479831337928772, "step": 64620 }, { "epoch": 18.34516037468067, "grad_norm": 0.8180923461914062, "learning_rate": 8.166278739710474e-05, "loss": 0.011184198409318924, "step": 64630 }, { "epoch": 18.34799886460403, "grad_norm": 0.4955216646194458, "learning_rate": 8.165994890718139e-05, "loss": 0.010780367255210876, "step": 64640 }, { "epoch": 18.350837354527393, "grad_norm": 2.19299054145813, "learning_rate": 8.165711041725803e-05, "loss": 0.01413591206073761, "step": 64650 }, { "epoch": 18.353675844450752, "grad_norm": 5.656460762023926, "learning_rate": 8.165427192733465e-05, "loss": 0.015481257438659668, "step": 64660 }, { "epoch": 18.35651433437411, "grad_norm": 1.381019949913025, "learning_rate": 8.165143343741131e-05, "loss": 0.020487432181835175, "step": 64670 }, { "epoch": 18.359352824297474, "grad_norm": 5.936448097229004, "learning_rate": 8.164859494748794e-05, "loss": 0.018602804839611055, "step": 64680 }, { "epoch": 18.362191314220834, "grad_norm": 5.114973068237305, "learning_rate": 8.164575645756458e-05, "loss": 0.023753133416175843, "step": 64690 }, { "epoch": 18.365029804144196, "grad_norm": 4.586143970489502, "learning_rate": 8.164291796764122e-05, "loss": 0.025039854645729064, "step": 64700 }, { "epoch": 18.367868294067556, "grad_norm": 4.848890781402588, "learning_rate": 8.164007947771786e-05, "loss": 0.01846681982278824, "step": 64710 }, { "epoch": 18.37070678399092, "grad_norm": 1.0696460008621216, "learning_rate": 8.163724098779449e-05, "loss": 0.022922775149345397, "step": 64720 }, { "epoch": 18.373545273914278, "grad_norm": 1.330155849456787, "learning_rate": 8.163440249787113e-05, "loss": 0.027651447057724, "step": 64730 }, { "epoch": 18.376383763837637, "grad_norm": 7.053121566772461, "learning_rate": 8.163156400794779e-05, "loss": 0.018580135703086854, "step": 64740 }, { "epoch": 18.379222253761, "grad_norm": 7.181282997131348, "learning_rate": 8.162872551802441e-05, "loss": 0.04168572723865509, "step": 64750 }, { "epoch": 18.38206074368436, "grad_norm": 14.02045726776123, "learning_rate": 8.162588702810105e-05, "loss": 0.011977569758892059, "step": 64760 }, { "epoch": 18.384899233607722, "grad_norm": 1.291469931602478, "learning_rate": 8.16230485381777e-05, "loss": 0.026130855083465576, "step": 64770 }, { "epoch": 18.38773772353108, "grad_norm": 2.4930732250213623, "learning_rate": 8.162021004825432e-05, "loss": 0.01443694531917572, "step": 64780 }, { "epoch": 18.39057621345444, "grad_norm": 1.7244832515716553, "learning_rate": 8.161737155833097e-05, "loss": 0.01112772449851036, "step": 64790 }, { "epoch": 18.393414703377804, "grad_norm": 0.22653059661388397, "learning_rate": 8.161481691739995e-05, "loss": 0.03092370331287384, "step": 64800 }, { "epoch": 18.396253193301163, "grad_norm": 7.254136562347412, "learning_rate": 8.161197842747659e-05, "loss": 0.012051913887262344, "step": 64810 }, { "epoch": 18.399091683224526, "grad_norm": 5.456974506378174, "learning_rate": 8.160913993755323e-05, "loss": 0.012831449508666992, "step": 64820 }, { "epoch": 18.401930173147885, "grad_norm": 0.2961234450340271, "learning_rate": 8.160630144762986e-05, "loss": 0.01109011322259903, "step": 64830 }, { "epoch": 18.404768663071245, "grad_norm": 0.05361732468008995, "learning_rate": 8.16034629577065e-05, "loss": 0.009719224274158477, "step": 64840 }, { "epoch": 18.407607152994608, "grad_norm": 5.3745808601379395, "learning_rate": 8.160062446778314e-05, "loss": 0.010086790472269059, "step": 64850 }, { "epoch": 18.410445642917967, "grad_norm": 2.479048490524292, "learning_rate": 8.159778597785978e-05, "loss": 0.017358624935150148, "step": 64860 }, { "epoch": 18.41328413284133, "grad_norm": 0.24645620584487915, "learning_rate": 8.159494748793642e-05, "loss": 0.005355107039213181, "step": 64870 }, { "epoch": 18.41612262276469, "grad_norm": 1.6201422214508057, "learning_rate": 8.159210899801306e-05, "loss": 0.012021343410015106, "step": 64880 }, { "epoch": 18.41896111268805, "grad_norm": 8.78207778930664, "learning_rate": 8.15892705080897e-05, "loss": 0.034655234217643736, "step": 64890 }, { "epoch": 18.42179960261141, "grad_norm": 2.6894776821136475, "learning_rate": 8.158643201816633e-05, "loss": 0.008927924185991287, "step": 64900 }, { "epoch": 18.42463809253477, "grad_norm": 0.13425426185131073, "learning_rate": 8.158359352824297e-05, "loss": 0.02195619195699692, "step": 64910 }, { "epoch": 18.427476582458134, "grad_norm": 4.211025238037109, "learning_rate": 8.158075503831963e-05, "loss": 0.006853824853897095, "step": 64920 }, { "epoch": 18.430315072381493, "grad_norm": 2.7323198318481445, "learning_rate": 8.157791654839626e-05, "loss": 0.027390363812446594, "step": 64930 }, { "epoch": 18.433153562304852, "grad_norm": 13.486830711364746, "learning_rate": 8.15750780584729e-05, "loss": 0.009636478126049041, "step": 64940 }, { "epoch": 18.435992052228215, "grad_norm": 4.857482433319092, "learning_rate": 8.157223956854954e-05, "loss": 0.011126063019037246, "step": 64950 }, { "epoch": 18.438830542151575, "grad_norm": 1.6316314935684204, "learning_rate": 8.156940107862617e-05, "loss": 0.018447716534137727, "step": 64960 }, { "epoch": 18.441669032074937, "grad_norm": 0.08411791920661926, "learning_rate": 8.156656258870281e-05, "loss": 0.023391738533973694, "step": 64970 }, { "epoch": 18.444507521998297, "grad_norm": 10.549160957336426, "learning_rate": 8.156372409877945e-05, "loss": 0.027126479148864745, "step": 64980 }, { "epoch": 18.447346011921656, "grad_norm": 7.26146125793457, "learning_rate": 8.156088560885609e-05, "loss": 0.025733837485313417, "step": 64990 }, { "epoch": 18.45018450184502, "grad_norm": 2.690920114517212, "learning_rate": 8.155804711893273e-05, "loss": 0.011287415772676468, "step": 65000 }, { "epoch": 18.45018450184502, "eval_accuracy": 0.9696699942773574, "eval_loss": 0.10198283195495605, "eval_runtime": 32.4511, "eval_samples_per_second": 484.637, "eval_steps_per_second": 7.581, "step": 65000 }, { "epoch": 18.45302299176838, "grad_norm": 0.8891941905021667, "learning_rate": 8.155520862900937e-05, "loss": 0.01564575433731079, "step": 65010 }, { "epoch": 18.45586148169174, "grad_norm": 0.13758035004138947, "learning_rate": 8.155237013908602e-05, "loss": 0.02487906664609909, "step": 65020 }, { "epoch": 18.4586999716151, "grad_norm": 2.539674997329712, "learning_rate": 8.154953164916264e-05, "loss": 0.01063397079706192, "step": 65030 }, { "epoch": 18.46153846153846, "grad_norm": 0.8648252487182617, "learning_rate": 8.154669315923928e-05, "loss": 0.011678020656108856, "step": 65040 }, { "epoch": 18.464376951461823, "grad_norm": 8.473857879638672, "learning_rate": 8.154385466931594e-05, "loss": 0.014374054968357086, "step": 65050 }, { "epoch": 18.467215441385182, "grad_norm": 5.437459945678711, "learning_rate": 8.154101617939257e-05, "loss": 0.011309526860713959, "step": 65060 }, { "epoch": 18.470053931308545, "grad_norm": 0.7881172895431519, "learning_rate": 8.153817768946921e-05, "loss": 0.03389034271240234, "step": 65070 }, { "epoch": 18.472892421231904, "grad_norm": 0.16056481003761292, "learning_rate": 8.153533919954585e-05, "loss": 0.0015257732942700387, "step": 65080 }, { "epoch": 18.475730911155264, "grad_norm": 0.015211442485451698, "learning_rate": 8.153250070962248e-05, "loss": 0.00968095064163208, "step": 65090 }, { "epoch": 18.478569401078627, "grad_norm": 2.1354992389678955, "learning_rate": 8.152966221969912e-05, "loss": 0.009421783685684203, "step": 65100 }, { "epoch": 18.481407891001986, "grad_norm": 0.9720522165298462, "learning_rate": 8.152682372977576e-05, "loss": 0.016679605841636656, "step": 65110 }, { "epoch": 18.48424638092535, "grad_norm": 7.112988471984863, "learning_rate": 8.15239852398524e-05, "loss": 0.007891004532575607, "step": 65120 }, { "epoch": 18.487084870848708, "grad_norm": 6.451182842254639, "learning_rate": 8.152114674992904e-05, "loss": 0.012456642091274261, "step": 65130 }, { "epoch": 18.48992336077207, "grad_norm": 0.16130331158638, "learning_rate": 8.151830826000568e-05, "loss": 0.006427173316478729, "step": 65140 }, { "epoch": 18.49276185069543, "grad_norm": 9.48763656616211, "learning_rate": 8.151546977008233e-05, "loss": 0.008901557326316834, "step": 65150 }, { "epoch": 18.49560034061879, "grad_norm": 0.45526623725891113, "learning_rate": 8.151263128015895e-05, "loss": 0.01108473613858223, "step": 65160 }, { "epoch": 18.498438830542153, "grad_norm": 0.7395574450492859, "learning_rate": 8.15097927902356e-05, "loss": 0.013161873817443848, "step": 65170 }, { "epoch": 18.501277320465512, "grad_norm": 2.383711099624634, "learning_rate": 8.150695430031224e-05, "loss": 0.016227197647094727, "step": 65180 }, { "epoch": 18.504115810388875, "grad_norm": 0.15384098887443542, "learning_rate": 8.150411581038888e-05, "loss": 0.018149395287036896, "step": 65190 }, { "epoch": 18.506954300312234, "grad_norm": 0.8748568892478943, "learning_rate": 8.150127732046552e-05, "loss": 0.0033325232565402985, "step": 65200 }, { "epoch": 18.509792790235593, "grad_norm": 11.958146095275879, "learning_rate": 8.149843883054216e-05, "loss": 0.028678563237190247, "step": 65210 }, { "epoch": 18.512631280158956, "grad_norm": 1.496232271194458, "learning_rate": 8.149560034061879e-05, "loss": 0.00581941157579422, "step": 65220 }, { "epoch": 18.515469770082316, "grad_norm": 1.0207868814468384, "learning_rate": 8.149276185069543e-05, "loss": 0.011551957577466965, "step": 65230 }, { "epoch": 18.51830826000568, "grad_norm": 5.443925380706787, "learning_rate": 8.148992336077207e-05, "loss": 0.010677462816238404, "step": 65240 }, { "epoch": 18.521146749929038, "grad_norm": 0.606041669845581, "learning_rate": 8.148708487084871e-05, "loss": 0.008380811661481857, "step": 65250 }, { "epoch": 18.523985239852397, "grad_norm": 0.46817782521247864, "learning_rate": 8.148424638092535e-05, "loss": 0.009468620270490646, "step": 65260 }, { "epoch": 18.52682372977576, "grad_norm": 5.073399066925049, "learning_rate": 8.1481407891002e-05, "loss": 0.011484379321336747, "step": 65270 }, { "epoch": 18.52966221969912, "grad_norm": 1.9756686687469482, "learning_rate": 8.147856940107864e-05, "loss": 0.0072007313370704654, "step": 65280 }, { "epoch": 18.532500709622482, "grad_norm": 8.106208801269531, "learning_rate": 8.147573091115526e-05, "loss": 0.02204432636499405, "step": 65290 }, { "epoch": 18.53533919954584, "grad_norm": 0.2717669606208801, "learning_rate": 8.14728924212319e-05, "loss": 0.02254790663719177, "step": 65300 }, { "epoch": 18.5381776894692, "grad_norm": 8.638641357421875, "learning_rate": 8.147005393130855e-05, "loss": 0.0266812264919281, "step": 65310 }, { "epoch": 18.541016179392564, "grad_norm": 0.6886634230613708, "learning_rate": 8.146721544138519e-05, "loss": 0.023158955574035644, "step": 65320 }, { "epoch": 18.543854669315923, "grad_norm": 13.143996238708496, "learning_rate": 8.146437695146183e-05, "loss": 0.024650579690933226, "step": 65330 }, { "epoch": 18.546693159239286, "grad_norm": 1.2094290256500244, "learning_rate": 8.146153846153847e-05, "loss": 0.00432853251695633, "step": 65340 }, { "epoch": 18.549531649162645, "grad_norm": 3.272505760192871, "learning_rate": 8.14586999716151e-05, "loss": 0.010065466910600663, "step": 65350 }, { "epoch": 18.552370139086005, "grad_norm": 3.646428108215332, "learning_rate": 8.145586148169174e-05, "loss": 0.03245283961296082, "step": 65360 }, { "epoch": 18.555208629009368, "grad_norm": 5.668003559112549, "learning_rate": 8.145302299176838e-05, "loss": 0.012863484025001527, "step": 65370 }, { "epoch": 18.558047118932727, "grad_norm": 6.571054935455322, "learning_rate": 8.145018450184502e-05, "loss": 0.014046260714530944, "step": 65380 }, { "epoch": 18.56088560885609, "grad_norm": 2.5467770099639893, "learning_rate": 8.144734601192166e-05, "loss": 0.020170438289642333, "step": 65390 }, { "epoch": 18.56372409877945, "grad_norm": 7.595078468322754, "learning_rate": 8.14445075219983e-05, "loss": 0.01897081285715103, "step": 65400 }, { "epoch": 18.56656258870281, "grad_norm": 0.0956747829914093, "learning_rate": 8.144166903207495e-05, "loss": 0.01448056846857071, "step": 65410 }, { "epoch": 18.56940107862617, "grad_norm": 2.403494119644165, "learning_rate": 8.143883054215158e-05, "loss": 0.012586936354637146, "step": 65420 }, { "epoch": 18.57223956854953, "grad_norm": 6.541079044342041, "learning_rate": 8.143599205222822e-05, "loss": 0.008063363283872605, "step": 65430 }, { "epoch": 18.575078058472894, "grad_norm": 2.283950090408325, "learning_rate": 8.143315356230486e-05, "loss": 0.007760443538427353, "step": 65440 }, { "epoch": 18.577916548396253, "grad_norm": 7.8882670402526855, "learning_rate": 8.14303150723815e-05, "loss": 0.01304212659597397, "step": 65450 }, { "epoch": 18.580755038319612, "grad_norm": 0.3923953175544739, "learning_rate": 8.142747658245814e-05, "loss": 0.012824231386184692, "step": 65460 }, { "epoch": 18.583593528242975, "grad_norm": 0.36744067072868347, "learning_rate": 8.142463809253478e-05, "loss": 0.006192845106124878, "step": 65470 }, { "epoch": 18.586432018166335, "grad_norm": 1.9242703914642334, "learning_rate": 8.142179960261141e-05, "loss": 0.003169301524758339, "step": 65480 }, { "epoch": 18.589270508089697, "grad_norm": 6.785406589508057, "learning_rate": 8.141896111268805e-05, "loss": 0.021434947848320007, "step": 65490 }, { "epoch": 18.592108998013057, "grad_norm": 0.24273142218589783, "learning_rate": 8.141612262276469e-05, "loss": 0.04365226626396179, "step": 65500 }, { "epoch": 18.592108998013057, "eval_accuracy": 0.9681439562535766, "eval_loss": 0.11023939400911331, "eval_runtime": 33.8065, "eval_samples_per_second": 465.207, "eval_steps_per_second": 7.277, "step": 65500 }, { "epoch": 18.594947487936416, "grad_norm": 6.179567337036133, "learning_rate": 8.141328413284133e-05, "loss": 0.02884460985660553, "step": 65510 }, { "epoch": 18.59778597785978, "grad_norm": 8.968781471252441, "learning_rate": 8.141044564291798e-05, "loss": 0.033974486589431765, "step": 65520 }, { "epoch": 18.60062446778314, "grad_norm": 11.361102104187012, "learning_rate": 8.140760715299462e-05, "loss": 0.03882739543914795, "step": 65530 }, { "epoch": 18.6034629577065, "grad_norm": 2.247143268585205, "learning_rate": 8.140476866307126e-05, "loss": 0.02889944612979889, "step": 65540 }, { "epoch": 18.60630144762986, "grad_norm": 0.2253871113061905, "learning_rate": 8.140193017314789e-05, "loss": 0.024729686975479125, "step": 65550 }, { "epoch": 18.609139937553223, "grad_norm": 0.42723408341407776, "learning_rate": 8.139909168322453e-05, "loss": 0.006454510241746902, "step": 65560 }, { "epoch": 18.611978427476583, "grad_norm": 0.43010419607162476, "learning_rate": 8.139625319330117e-05, "loss": 0.021596884727478026, "step": 65570 }, { "epoch": 18.614816917399942, "grad_norm": 5.4712910652160645, "learning_rate": 8.13934147033778e-05, "loss": 0.017323389649391174, "step": 65580 }, { "epoch": 18.617655407323305, "grad_norm": 8.60168743133545, "learning_rate": 8.139057621345445e-05, "loss": 0.015932148694992064, "step": 65590 }, { "epoch": 18.620493897246664, "grad_norm": 13.400238037109375, "learning_rate": 8.138773772353109e-05, "loss": 0.017503833770751952, "step": 65600 }, { "epoch": 18.623332387170027, "grad_norm": 0.6350124478340149, "learning_rate": 8.138489923360772e-05, "loss": 0.010272857546806336, "step": 65610 }, { "epoch": 18.626170877093386, "grad_norm": 0.4678855240345001, "learning_rate": 8.138206074368436e-05, "loss": 0.009701205790042878, "step": 65620 }, { "epoch": 18.629009367016746, "grad_norm": 0.6845434904098511, "learning_rate": 8.1379222253761e-05, "loss": 0.01599424034357071, "step": 65630 }, { "epoch": 18.63184785694011, "grad_norm": 0.3056240975856781, "learning_rate": 8.137638376383764e-05, "loss": 0.014908777177333831, "step": 65640 }, { "epoch": 18.634686346863468, "grad_norm": 0.7635912895202637, "learning_rate": 8.137354527391429e-05, "loss": 0.008269859105348587, "step": 65650 }, { "epoch": 18.63752483678683, "grad_norm": 2.6220715045928955, "learning_rate": 8.137070678399093e-05, "loss": 0.004915742576122284, "step": 65660 }, { "epoch": 18.64036332671019, "grad_norm": 2.5507876873016357, "learning_rate": 8.136786829406756e-05, "loss": 0.004254682362079621, "step": 65670 }, { "epoch": 18.64320181663355, "grad_norm": 0.3479268550872803, "learning_rate": 8.13650298041442e-05, "loss": 0.011239035427570343, "step": 65680 }, { "epoch": 18.646040306556912, "grad_norm": 1.9590940475463867, "learning_rate": 8.136219131422084e-05, "loss": 0.011529885232448578, "step": 65690 }, { "epoch": 18.648878796480272, "grad_norm": 1.4682226181030273, "learning_rate": 8.135935282429748e-05, "loss": 0.01221732497215271, "step": 65700 }, { "epoch": 18.651717286403635, "grad_norm": 9.41601276397705, "learning_rate": 8.135651433437411e-05, "loss": 0.013988754153251648, "step": 65710 }, { "epoch": 18.654555776326994, "grad_norm": 0.7196325659751892, "learning_rate": 8.135367584445076e-05, "loss": 0.013577654957771301, "step": 65720 }, { "epoch": 18.657394266250353, "grad_norm": 0.9811475872993469, "learning_rate": 8.13508373545274e-05, "loss": 0.004997146129608154, "step": 65730 }, { "epoch": 18.660232756173716, "grad_norm": 11.28565788269043, "learning_rate": 8.134799886460403e-05, "loss": 0.010397904366254807, "step": 65740 }, { "epoch": 18.663071246097076, "grad_norm": 0.9437506794929504, "learning_rate": 8.134516037468067e-05, "loss": 0.01994735449552536, "step": 65750 }, { "epoch": 18.66590973602044, "grad_norm": 0.27086594700813293, "learning_rate": 8.134232188475731e-05, "loss": 0.01840336322784424, "step": 65760 }, { "epoch": 18.668748225943798, "grad_norm": 1.973841905593872, "learning_rate": 8.133948339483394e-05, "loss": 0.00694541335105896, "step": 65770 }, { "epoch": 18.671586715867157, "grad_norm": 0.5780234336853027, "learning_rate": 8.133664490491058e-05, "loss": 0.008664996922016143, "step": 65780 }, { "epoch": 18.67442520579052, "grad_norm": 0.7797946929931641, "learning_rate": 8.133380641498724e-05, "loss": 0.023518939316272736, "step": 65790 }, { "epoch": 18.67726369571388, "grad_norm": 4.0404372215271, "learning_rate": 8.133096792506387e-05, "loss": 0.009264381229877472, "step": 65800 }, { "epoch": 18.680102185637242, "grad_norm": 4.4820027351379395, "learning_rate": 8.132812943514051e-05, "loss": 0.006943101435899735, "step": 65810 }, { "epoch": 18.6829406755606, "grad_norm": 2.275444746017456, "learning_rate": 8.132529094521715e-05, "loss": 0.013003671169281006, "step": 65820 }, { "epoch": 18.68577916548396, "grad_norm": 0.11952267587184906, "learning_rate": 8.132245245529379e-05, "loss": 0.010550101101398469, "step": 65830 }, { "epoch": 18.688617655407324, "grad_norm": 0.5309210419654846, "learning_rate": 8.131961396537042e-05, "loss": 0.03143871128559113, "step": 65840 }, { "epoch": 18.691456145330683, "grad_norm": 2.4222359657287598, "learning_rate": 8.131677547544707e-05, "loss": 0.008392826467752457, "step": 65850 }, { "epoch": 18.694294635254046, "grad_norm": 0.4086063504219055, "learning_rate": 8.131393698552371e-05, "loss": 0.04294750988483429, "step": 65860 }, { "epoch": 18.697133125177405, "grad_norm": 0.655267596244812, "learning_rate": 8.131109849560034e-05, "loss": 0.010572954267263412, "step": 65870 }, { "epoch": 18.699971615100765, "grad_norm": 0.34024572372436523, "learning_rate": 8.130826000567698e-05, "loss": 0.018364611268043517, "step": 65880 }, { "epoch": 18.702810105024128, "grad_norm": 2.3825905323028564, "learning_rate": 8.130542151575362e-05, "loss": 0.004750026017427444, "step": 65890 }, { "epoch": 18.705648594947487, "grad_norm": 1.5823478698730469, "learning_rate": 8.130258302583025e-05, "loss": 0.008215912431478501, "step": 65900 }, { "epoch": 18.70848708487085, "grad_norm": 0.6279844641685486, "learning_rate": 8.12997445359069e-05, "loss": 0.013653947412967682, "step": 65910 }, { "epoch": 18.71132557479421, "grad_norm": 8.516203880310059, "learning_rate": 8.129690604598355e-05, "loss": 0.030845460295677186, "step": 65920 }, { "epoch": 18.714164064717572, "grad_norm": 1.2129309177398682, "learning_rate": 8.129406755606018e-05, "loss": 0.015081872045993806, "step": 65930 }, { "epoch": 18.71700255464093, "grad_norm": 4.454775333404541, "learning_rate": 8.129122906613682e-05, "loss": 0.005820025503635406, "step": 65940 }, { "epoch": 18.71984104456429, "grad_norm": 0.8875638842582703, "learning_rate": 8.128839057621346e-05, "loss": 0.007812238484621048, "step": 65950 }, { "epoch": 18.722679534487654, "grad_norm": 0.21875768899917603, "learning_rate": 8.12855520862901e-05, "loss": 0.02433929294347763, "step": 65960 }, { "epoch": 18.725518024411013, "grad_norm": 5.24880838394165, "learning_rate": 8.128271359636673e-05, "loss": 0.007466356456279755, "step": 65970 }, { "epoch": 18.728356514334376, "grad_norm": 6.274791717529297, "learning_rate": 8.127987510644337e-05, "loss": 0.012364890426397324, "step": 65980 }, { "epoch": 18.731195004257735, "grad_norm": 6.917942047119141, "learning_rate": 8.127703661652003e-05, "loss": 0.012214804440736771, "step": 65990 }, { "epoch": 18.734033494181094, "grad_norm": 7.644416332244873, "learning_rate": 8.127419812659665e-05, "loss": 0.04571822881698608, "step": 66000 }, { "epoch": 18.734033494181094, "eval_accuracy": 0.9612767851465632, "eval_loss": 0.13269035518169403, "eval_runtime": 33.5383, "eval_samples_per_second": 468.927, "eval_steps_per_second": 7.335, "step": 66000 }, { "epoch": 18.736871984104457, "grad_norm": 3.8700308799743652, "learning_rate": 8.12713596366733e-05, "loss": 0.01224614828824997, "step": 66010 }, { "epoch": 18.739710474027817, "grad_norm": 9.85750961303711, "learning_rate": 8.126852114674994e-05, "loss": 0.03495345115661621, "step": 66020 }, { "epoch": 18.74254896395118, "grad_norm": 1.3369840383529663, "learning_rate": 8.126568265682656e-05, "loss": 0.030778557062149048, "step": 66030 }, { "epoch": 18.74538745387454, "grad_norm": 0.7006474733352661, "learning_rate": 8.12628441669032e-05, "loss": 0.013155975937843322, "step": 66040 }, { "epoch": 18.748225943797898, "grad_norm": 0.13770046830177307, "learning_rate": 8.126000567697986e-05, "loss": 0.0101681187748909, "step": 66050 }, { "epoch": 18.75106443372126, "grad_norm": 1.5571401119232178, "learning_rate": 8.125716718705649e-05, "loss": 0.008423368632793426, "step": 66060 }, { "epoch": 18.75390292364462, "grad_norm": 3.109083890914917, "learning_rate": 8.125432869713313e-05, "loss": 0.01735614538192749, "step": 66070 }, { "epoch": 18.756741413567983, "grad_norm": 2.264070749282837, "learning_rate": 8.125149020720977e-05, "loss": 0.02675175666809082, "step": 66080 }, { "epoch": 18.759579903491343, "grad_norm": 6.177260875701904, "learning_rate": 8.124865171728641e-05, "loss": 0.01383574604988098, "step": 66090 }, { "epoch": 18.762418393414702, "grad_norm": 8.13632869720459, "learning_rate": 8.124581322736304e-05, "loss": 0.023966291546821596, "step": 66100 }, { "epoch": 18.765256883338065, "grad_norm": 3.427464723587036, "learning_rate": 8.124297473743968e-05, "loss": 0.020964588224887847, "step": 66110 }, { "epoch": 18.768095373261424, "grad_norm": 1.0798410177230835, "learning_rate": 8.124013624751634e-05, "loss": 0.011063165962696075, "step": 66120 }, { "epoch": 18.770933863184787, "grad_norm": 2.138603687286377, "learning_rate": 8.123729775759296e-05, "loss": 0.040188837051391604, "step": 66130 }, { "epoch": 18.773772353108146, "grad_norm": 2.790104866027832, "learning_rate": 8.12344592676696e-05, "loss": 0.012763440608978271, "step": 66140 }, { "epoch": 18.776610843031506, "grad_norm": 0.5808774828910828, "learning_rate": 8.123162077774625e-05, "loss": 0.03314026892185211, "step": 66150 }, { "epoch": 18.77944933295487, "grad_norm": 0.6953529119491577, "learning_rate": 8.122878228782287e-05, "loss": 0.005150855332612991, "step": 66160 }, { "epoch": 18.782287822878228, "grad_norm": 0.2773054540157318, "learning_rate": 8.122594379789952e-05, "loss": 0.01242266595363617, "step": 66170 }, { "epoch": 18.78512631280159, "grad_norm": 0.40293845534324646, "learning_rate": 8.122310530797616e-05, "loss": 0.016194000840187073, "step": 66180 }, { "epoch": 18.78796480272495, "grad_norm": 0.7688791155815125, "learning_rate": 8.12202668180528e-05, "loss": 0.01305808573961258, "step": 66190 }, { "epoch": 18.79080329264831, "grad_norm": 6.508535861968994, "learning_rate": 8.121742832812944e-05, "loss": 0.012065546214580536, "step": 66200 }, { "epoch": 18.793641782571672, "grad_norm": 0.3057519495487213, "learning_rate": 8.121458983820608e-05, "loss": 0.027346912026405334, "step": 66210 }, { "epoch": 18.79648027249503, "grad_norm": 0.5230058431625366, "learning_rate": 8.121175134828272e-05, "loss": 0.015798026323318483, "step": 66220 }, { "epoch": 18.799318762418395, "grad_norm": 0.34859928488731384, "learning_rate": 8.120891285835935e-05, "loss": 0.028179723024368285, "step": 66230 }, { "epoch": 18.802157252341754, "grad_norm": 12.38961124420166, "learning_rate": 8.120607436843599e-05, "loss": 0.013416914641857148, "step": 66240 }, { "epoch": 18.804995742265113, "grad_norm": 1.4976232051849365, "learning_rate": 8.120323587851265e-05, "loss": 0.0159172847867012, "step": 66250 }, { "epoch": 18.807834232188476, "grad_norm": 0.22108115255832672, "learning_rate": 8.120039738858927e-05, "loss": 0.028093093633651735, "step": 66260 }, { "epoch": 18.810672722111835, "grad_norm": 3.8638558387756348, "learning_rate": 8.119755889866592e-05, "loss": 0.010218902677297591, "step": 66270 }, { "epoch": 18.8135112120352, "grad_norm": 0.7871213555335999, "learning_rate": 8.119472040874256e-05, "loss": 0.011192785948514939, "step": 66280 }, { "epoch": 18.816349701958558, "grad_norm": 1.849426031112671, "learning_rate": 8.119188191881919e-05, "loss": 0.022012992203235625, "step": 66290 }, { "epoch": 18.81918819188192, "grad_norm": 1.9850103855133057, "learning_rate": 8.118904342889583e-05, "loss": 0.006547165662050247, "step": 66300 }, { "epoch": 18.82202668180528, "grad_norm": 8.532297134399414, "learning_rate": 8.118620493897247e-05, "loss": 0.015212714672088623, "step": 66310 }, { "epoch": 18.82486517172864, "grad_norm": 3.3553709983825684, "learning_rate": 8.118336644904911e-05, "loss": 0.012339036166667938, "step": 66320 }, { "epoch": 18.827703661652002, "grad_norm": 0.25596335530281067, "learning_rate": 8.118052795912575e-05, "loss": 0.009032484889030457, "step": 66330 }, { "epoch": 18.83054215157536, "grad_norm": 0.6270331740379333, "learning_rate": 8.117768946920239e-05, "loss": 0.0087604820728302, "step": 66340 }, { "epoch": 18.833380641498724, "grad_norm": 2.284088611602783, "learning_rate": 8.117485097927903e-05, "loss": 0.010215285420417785, "step": 66350 }, { "epoch": 18.836219131422084, "grad_norm": 4.853416919708252, "learning_rate": 8.117201248935566e-05, "loss": 0.009285935014486314, "step": 66360 }, { "epoch": 18.839057621345443, "grad_norm": 3.198435068130493, "learning_rate": 8.11691739994323e-05, "loss": 0.006463640183210373, "step": 66370 }, { "epoch": 18.841896111268806, "grad_norm": 1.4982163906097412, "learning_rate": 8.116633550950894e-05, "loss": 0.013243886828422546, "step": 66380 }, { "epoch": 18.844734601192165, "grad_norm": 0.5533506870269775, "learning_rate": 8.116349701958559e-05, "loss": 0.013612279295921325, "step": 66390 }, { "epoch": 18.847573091115528, "grad_norm": 0.11169018596410751, "learning_rate": 8.116065852966223e-05, "loss": 0.014494398236274719, "step": 66400 }, { "epoch": 18.850411581038887, "grad_norm": 0.6696150302886963, "learning_rate": 8.115782003973887e-05, "loss": 0.02288123071193695, "step": 66410 }, { "epoch": 18.853250070962247, "grad_norm": 2.863934278488159, "learning_rate": 8.11549815498155e-05, "loss": 0.0031729631125926973, "step": 66420 }, { "epoch": 18.85608856088561, "grad_norm": 8.135354042053223, "learning_rate": 8.115214305989214e-05, "loss": 0.008603744953870774, "step": 66430 }, { "epoch": 18.85892705080897, "grad_norm": 1.1584993600845337, "learning_rate": 8.114930456996878e-05, "loss": 0.011062257736921311, "step": 66440 }, { "epoch": 18.861765540732332, "grad_norm": 0.5900819897651672, "learning_rate": 8.114646608004542e-05, "loss": 0.006310056149959564, "step": 66450 }, { "epoch": 18.86460403065569, "grad_norm": 0.2956761121749878, "learning_rate": 8.114362759012206e-05, "loss": 0.012941974401473998, "step": 66460 }, { "epoch": 18.86744252057905, "grad_norm": 4.201529026031494, "learning_rate": 8.11407891001987e-05, "loss": 0.011592862010002137, "step": 66470 }, { "epoch": 18.870281010502413, "grad_norm": 13.019427299499512, "learning_rate": 8.113795061027533e-05, "loss": 0.01116764396429062, "step": 66480 }, { "epoch": 18.873119500425773, "grad_norm": 0.22097288072109222, "learning_rate": 8.113511212035197e-05, "loss": 0.008799201250076294, "step": 66490 }, { "epoch": 18.875957990349136, "grad_norm": 6.417760848999023, "learning_rate": 8.113227363042861e-05, "loss": 0.012001951038837434, "step": 66500 }, { "epoch": 18.875957990349136, "eval_accuracy": 0.972722070324919, "eval_loss": 0.09035919606685638, "eval_runtime": 35.2045, "eval_samples_per_second": 446.733, "eval_steps_per_second": 6.988, "step": 66500 }, { "epoch": 18.878796480272495, "grad_norm": 3.1576976776123047, "learning_rate": 8.112943514050525e-05, "loss": 0.019307251274585723, "step": 66510 }, { "epoch": 18.881634970195854, "grad_norm": 2.6664600372314453, "learning_rate": 8.11265966505819e-05, "loss": 0.013446062803268433, "step": 66520 }, { "epoch": 18.884473460119217, "grad_norm": 0.423504114151001, "learning_rate": 8.112375816065854e-05, "loss": 0.005678916722536087, "step": 66530 }, { "epoch": 18.887311950042577, "grad_norm": 7.068592071533203, "learning_rate": 8.112091967073518e-05, "loss": 0.007730250060558319, "step": 66540 }, { "epoch": 18.89015043996594, "grad_norm": 0.1055157482624054, "learning_rate": 8.11180811808118e-05, "loss": 0.01553587019443512, "step": 66550 }, { "epoch": 18.8929889298893, "grad_norm": 0.5283113718032837, "learning_rate": 8.111524269088845e-05, "loss": 0.01008235365152359, "step": 66560 }, { "epoch": 18.895827419812658, "grad_norm": 3.3666839599609375, "learning_rate": 8.111240420096509e-05, "loss": 0.01060042679309845, "step": 66570 }, { "epoch": 18.89866590973602, "grad_norm": 12.068689346313477, "learning_rate": 8.110956571104173e-05, "loss": 0.0326359361410141, "step": 66580 }, { "epoch": 18.90150439965938, "grad_norm": 8.531636238098145, "learning_rate": 8.110672722111837e-05, "loss": 0.01770493686199188, "step": 66590 }, { "epoch": 18.904342889582743, "grad_norm": 0.39327937364578247, "learning_rate": 8.110388873119501e-05, "loss": 0.027002593874931334, "step": 66600 }, { "epoch": 18.907181379506103, "grad_norm": 6.878226280212402, "learning_rate": 8.110105024127164e-05, "loss": 0.023195961117744447, "step": 66610 }, { "epoch": 18.910019869429462, "grad_norm": 0.6023988127708435, "learning_rate": 8.109821175134828e-05, "loss": 0.01830955743789673, "step": 66620 }, { "epoch": 18.912858359352825, "grad_norm": 0.17476624250411987, "learning_rate": 8.109537326142492e-05, "loss": 0.00927278697490692, "step": 66630 }, { "epoch": 18.915696849276184, "grad_norm": 4.982316970825195, "learning_rate": 8.109253477150157e-05, "loss": 0.01557239145040512, "step": 66640 }, { "epoch": 18.918535339199547, "grad_norm": 2.6297597885131836, "learning_rate": 8.108969628157821e-05, "loss": 0.016148705780506135, "step": 66650 }, { "epoch": 18.921373829122906, "grad_norm": 2.7952420711517334, "learning_rate": 8.108685779165485e-05, "loss": 0.016126461327075958, "step": 66660 }, { "epoch": 18.92421231904627, "grad_norm": 19.423137664794922, "learning_rate": 8.108401930173149e-05, "loss": 0.04925414025783539, "step": 66670 }, { "epoch": 18.92705080896963, "grad_norm": 14.141602516174316, "learning_rate": 8.108118081180812e-05, "loss": 0.03280113637447357, "step": 66680 }, { "epoch": 18.929889298892988, "grad_norm": 2.30189847946167, "learning_rate": 8.107834232188476e-05, "loss": 0.008106661587953567, "step": 66690 }, { "epoch": 18.93272778881635, "grad_norm": 0.24279485642910004, "learning_rate": 8.10755038319614e-05, "loss": 0.00633731484413147, "step": 66700 }, { "epoch": 18.93556627873971, "grad_norm": 11.113791465759277, "learning_rate": 8.107266534203803e-05, "loss": 0.007417207956314087, "step": 66710 }, { "epoch": 18.93840476866307, "grad_norm": 0.3591241240501404, "learning_rate": 8.106982685211468e-05, "loss": 0.008123142272233963, "step": 66720 }, { "epoch": 18.941243258586432, "grad_norm": 2.5613889694213867, "learning_rate": 8.106698836219132e-05, "loss": 0.004798300564289093, "step": 66730 }, { "epoch": 18.94408174850979, "grad_norm": 5.90300989151001, "learning_rate": 8.106414987226795e-05, "loss": 0.005936193838715553, "step": 66740 }, { "epoch": 18.946920238433155, "grad_norm": 5.515382766723633, "learning_rate": 8.10613113823446e-05, "loss": 0.013710743188858033, "step": 66750 }, { "epoch": 18.949758728356514, "grad_norm": 0.8127226829528809, "learning_rate": 8.105847289242123e-05, "loss": 0.026463246345520018, "step": 66760 }, { "epoch": 18.952597218279877, "grad_norm": 4.597580432891846, "learning_rate": 8.105563440249788e-05, "loss": 0.01565616428852081, "step": 66770 }, { "epoch": 18.955435708203236, "grad_norm": 1.1406363248825073, "learning_rate": 8.105279591257452e-05, "loss": 0.014328444004058838, "step": 66780 }, { "epoch": 18.958274198126595, "grad_norm": 0.9002832174301147, "learning_rate": 8.104995742265116e-05, "loss": 0.020723022520542145, "step": 66790 }, { "epoch": 18.96111268804996, "grad_norm": 0.5421157479286194, "learning_rate": 8.10471189327278e-05, "loss": 0.004544793069362641, "step": 66800 }, { "epoch": 18.963951177973318, "grad_norm": 2.258450984954834, "learning_rate": 8.104428044280443e-05, "loss": 0.028781715035438537, "step": 66810 }, { "epoch": 18.96678966789668, "grad_norm": 1.7957864999771118, "learning_rate": 8.104144195288107e-05, "loss": 0.01444721817970276, "step": 66820 }, { "epoch": 18.96962815782004, "grad_norm": 3.4749908447265625, "learning_rate": 8.103860346295771e-05, "loss": 0.0062502816319465635, "step": 66830 }, { "epoch": 18.9724666477434, "grad_norm": 2.3403375148773193, "learning_rate": 8.103576497303434e-05, "loss": 0.035101696848869324, "step": 66840 }, { "epoch": 18.975305137666762, "grad_norm": 5.20975923538208, "learning_rate": 8.1032926483111e-05, "loss": 0.013607844710350037, "step": 66850 }, { "epoch": 18.97814362759012, "grad_norm": 3.292044162750244, "learning_rate": 8.103008799318763e-05, "loss": 0.020733726024627686, "step": 66860 }, { "epoch": 18.980982117513484, "grad_norm": 0.7472140789031982, "learning_rate": 8.102724950326426e-05, "loss": 0.01183490753173828, "step": 66870 }, { "epoch": 18.983820607436844, "grad_norm": 1.6601287126541138, "learning_rate": 8.10244110133409e-05, "loss": 0.011008187383413314, "step": 66880 }, { "epoch": 18.986659097360203, "grad_norm": 0.5677258372306824, "learning_rate": 8.102157252341755e-05, "loss": 0.009062321484088897, "step": 66890 }, { "epoch": 18.989497587283566, "grad_norm": 0.7862358689308167, "learning_rate": 8.101873403349419e-05, "loss": 0.004938579350709915, "step": 66900 }, { "epoch": 18.992336077206925, "grad_norm": 5.511763572692871, "learning_rate": 8.101589554357081e-05, "loss": 0.017281834781169892, "step": 66910 }, { "epoch": 18.995174567130288, "grad_norm": 6.894260883331299, "learning_rate": 8.101305705364747e-05, "loss": 0.02430771440267563, "step": 66920 }, { "epoch": 18.998013057053647, "grad_norm": 9.394935607910156, "learning_rate": 8.101021856372411e-05, "loss": 0.01683236360549927, "step": 66930 }, { "epoch": 19.000851546977007, "grad_norm": 0.8228522539138794, "learning_rate": 8.100738007380074e-05, "loss": 0.00787048190832138, "step": 66940 }, { "epoch": 19.00369003690037, "grad_norm": 6.135046482086182, "learning_rate": 8.100454158387738e-05, "loss": 0.024907056987285615, "step": 66950 }, { "epoch": 19.00652852682373, "grad_norm": 9.054296493530273, "learning_rate": 8.100170309395402e-05, "loss": 0.021778059005737305, "step": 66960 }, { "epoch": 19.009367016747092, "grad_norm": 10.892069816589355, "learning_rate": 8.099886460403065e-05, "loss": 0.014873985946178437, "step": 66970 }, { "epoch": 19.01220550667045, "grad_norm": 1.1351549625396729, "learning_rate": 8.09960261141073e-05, "loss": 0.015754473209381104, "step": 66980 }, { "epoch": 19.01504399659381, "grad_norm": 11.73816204071045, "learning_rate": 8.099318762418395e-05, "loss": 0.030018576979637147, "step": 66990 }, { "epoch": 19.017882486517173, "grad_norm": 2.7029123306274414, "learning_rate": 8.099034913426057e-05, "loss": 0.023129640519618987, "step": 67000 }, { "epoch": 19.017882486517173, "eval_accuracy": 0.9684618808418644, "eval_loss": 0.10879301279783249, "eval_runtime": 35.13, "eval_samples_per_second": 447.68, "eval_steps_per_second": 7.003, "step": 67000 }, { "epoch": 19.020720976440533, "grad_norm": 0.35860660672187805, "learning_rate": 8.098751064433721e-05, "loss": 0.023211745917797087, "step": 67010 }, { "epoch": 19.023559466363896, "grad_norm": 6.844875812530518, "learning_rate": 8.098467215441386e-05, "loss": 0.028550219535827637, "step": 67020 }, { "epoch": 19.026397956287255, "grad_norm": 0.398984432220459, "learning_rate": 8.09818336644905e-05, "loss": 0.012396010756492614, "step": 67030 }, { "epoch": 19.029236446210614, "grad_norm": 2.010920286178589, "learning_rate": 8.097899517456713e-05, "loss": 0.003723376989364624, "step": 67040 }, { "epoch": 19.032074936133977, "grad_norm": 10.082107543945312, "learning_rate": 8.097615668464378e-05, "loss": 0.00984167903661728, "step": 67050 }, { "epoch": 19.034913426057336, "grad_norm": 6.38857364654541, "learning_rate": 8.097331819472042e-05, "loss": 0.011177126318216324, "step": 67060 }, { "epoch": 19.0377519159807, "grad_norm": 5.711360454559326, "learning_rate": 8.097047970479705e-05, "loss": 0.01688204109668732, "step": 67070 }, { "epoch": 19.04059040590406, "grad_norm": 1.2126646041870117, "learning_rate": 8.096764121487369e-05, "loss": 0.005413512140512467, "step": 67080 }, { "epoch": 19.043428895827418, "grad_norm": 2.8821897506713867, "learning_rate": 8.096480272495033e-05, "loss": 0.008537909388542176, "step": 67090 }, { "epoch": 19.04626738575078, "grad_norm": 7.021731376647949, "learning_rate": 8.096196423502696e-05, "loss": 0.020964626967906953, "step": 67100 }, { "epoch": 19.04910587567414, "grad_norm": 8.111486434936523, "learning_rate": 8.09591257451036e-05, "loss": 0.02418878674507141, "step": 67110 }, { "epoch": 19.051944365597503, "grad_norm": 0.8373589515686035, "learning_rate": 8.095628725518026e-05, "loss": 0.019504806399345397, "step": 67120 }, { "epoch": 19.054782855520862, "grad_norm": 0.9200805425643921, "learning_rate": 8.095344876525688e-05, "loss": 0.00317273736000061, "step": 67130 }, { "epoch": 19.057621345444225, "grad_norm": 1.7019476890563965, "learning_rate": 8.095061027533353e-05, "loss": 0.009716753661632539, "step": 67140 }, { "epoch": 19.060459835367585, "grad_norm": 1.5689568519592285, "learning_rate": 8.094777178541017e-05, "loss": 0.005203162506222725, "step": 67150 }, { "epoch": 19.063298325290944, "grad_norm": 0.6668362021446228, "learning_rate": 8.094493329548681e-05, "loss": 0.017311424016952515, "step": 67160 }, { "epoch": 19.066136815214307, "grad_norm": 2.259281635284424, "learning_rate": 8.094209480556344e-05, "loss": 0.015485018491744995, "step": 67170 }, { "epoch": 19.068975305137666, "grad_norm": 0.1918668895959854, "learning_rate": 8.093925631564009e-05, "loss": 0.0056523442268371586, "step": 67180 }, { "epoch": 19.07181379506103, "grad_norm": 11.1837739944458, "learning_rate": 8.093641782571673e-05, "loss": 0.013929513096809388, "step": 67190 }, { "epoch": 19.07465228498439, "grad_norm": 0.15655164420604706, "learning_rate": 8.093357933579336e-05, "loss": 0.0021496571600437163, "step": 67200 }, { "epoch": 19.077490774907748, "grad_norm": 0.5214512348175049, "learning_rate": 8.093074084587e-05, "loss": 0.008708280324935914, "step": 67210 }, { "epoch": 19.08032926483111, "grad_norm": 1.0678950548171997, "learning_rate": 8.092790235594664e-05, "loss": 0.008880508691072464, "step": 67220 }, { "epoch": 19.08316775475447, "grad_norm": 5.618232727050781, "learning_rate": 8.092506386602327e-05, "loss": 0.004989294707775116, "step": 67230 }, { "epoch": 19.086006244677833, "grad_norm": 0.4440780282020569, "learning_rate": 8.092222537609991e-05, "loss": 0.006950663030147552, "step": 67240 }, { "epoch": 19.088844734601192, "grad_norm": 3.9833481311798096, "learning_rate": 8.091938688617657e-05, "loss": 0.012245865911245346, "step": 67250 }, { "epoch": 19.09168322452455, "grad_norm": 4.7206196784973145, "learning_rate": 8.09165483962532e-05, "loss": 0.012672115862369538, "step": 67260 }, { "epoch": 19.094521714447914, "grad_norm": 4.032888889312744, "learning_rate": 8.091370990632984e-05, "loss": 0.04360741376876831, "step": 67270 }, { "epoch": 19.097360204371274, "grad_norm": 0.10961271822452545, "learning_rate": 8.091087141640648e-05, "loss": 0.010828191787004471, "step": 67280 }, { "epoch": 19.100198694294637, "grad_norm": 4.347888946533203, "learning_rate": 8.090803292648312e-05, "loss": 0.01356205940246582, "step": 67290 }, { "epoch": 19.103037184217996, "grad_norm": 6.789463043212891, "learning_rate": 8.090519443655975e-05, "loss": 0.029966995120048523, "step": 67300 }, { "epoch": 19.105875674141355, "grad_norm": 2.1150057315826416, "learning_rate": 8.090235594663639e-05, "loss": 0.023886469006538392, "step": 67310 }, { "epoch": 19.10871416406472, "grad_norm": 2.565760850906372, "learning_rate": 8.089951745671303e-05, "loss": 0.00881776362657547, "step": 67320 }, { "epoch": 19.111552653988078, "grad_norm": 0.24093537032604218, "learning_rate": 8.089667896678967e-05, "loss": 0.010527852177619933, "step": 67330 }, { "epoch": 19.11439114391144, "grad_norm": 10.7637357711792, "learning_rate": 8.089384047686631e-05, "loss": 0.018222635984420775, "step": 67340 }, { "epoch": 19.1172296338348, "grad_norm": 15.3842134475708, "learning_rate": 8.089100198694295e-05, "loss": 0.01397009938955307, "step": 67350 }, { "epoch": 19.12006812375816, "grad_norm": 10.032657623291016, "learning_rate": 8.088816349701958e-05, "loss": 0.03649272918701172, "step": 67360 }, { "epoch": 19.122906613681522, "grad_norm": 4.835189342498779, "learning_rate": 8.088532500709622e-05, "loss": 0.009311941266059876, "step": 67370 }, { "epoch": 19.12574510360488, "grad_norm": 4.193671703338623, "learning_rate": 8.088248651717288e-05, "loss": 0.008511964976787568, "step": 67380 }, { "epoch": 19.128583593528244, "grad_norm": 2.0051794052124023, "learning_rate": 8.08796480272495e-05, "loss": 0.024941906332969666, "step": 67390 }, { "epoch": 19.131422083451604, "grad_norm": 0.5228599905967712, "learning_rate": 8.087680953732615e-05, "loss": 0.014882281422615051, "step": 67400 }, { "epoch": 19.134260573374963, "grad_norm": 0.07257544994354248, "learning_rate": 8.087397104740279e-05, "loss": 0.014460307359695435, "step": 67410 }, { "epoch": 19.137099063298326, "grad_norm": 12.035100936889648, "learning_rate": 8.087113255747942e-05, "loss": 0.02766309380531311, "step": 67420 }, { "epoch": 19.139937553221685, "grad_norm": 0.15956340730190277, "learning_rate": 8.086829406755606e-05, "loss": 0.010959765315055848, "step": 67430 }, { "epoch": 19.142776043145048, "grad_norm": 0.5032079815864563, "learning_rate": 8.08654555776327e-05, "loss": 0.007881728559732437, "step": 67440 }, { "epoch": 19.145614533068407, "grad_norm": 0.08810815960168839, "learning_rate": 8.086261708770934e-05, "loss": 0.013535796105861664, "step": 67450 }, { "epoch": 19.148453022991767, "grad_norm": 0.2247096747159958, "learning_rate": 8.085977859778598e-05, "loss": 0.013523657619953156, "step": 67460 }, { "epoch": 19.15129151291513, "grad_norm": 4.168874740600586, "learning_rate": 8.085694010786262e-05, "loss": 0.014505213499069214, "step": 67470 }, { "epoch": 19.15413000283849, "grad_norm": 0.7329187393188477, "learning_rate": 8.085410161793926e-05, "loss": 0.007675246149301529, "step": 67480 }, { "epoch": 19.15696849276185, "grad_norm": 0.2987183928489685, "learning_rate": 8.085126312801589e-05, "loss": 0.005839854106307029, "step": 67490 }, { "epoch": 19.15980698268521, "grad_norm": 1.5082896947860718, "learning_rate": 8.084842463809253e-05, "loss": 0.007308453321456909, "step": 67500 }, { "epoch": 19.15980698268521, "eval_accuracy": 0.9746296178546449, "eval_loss": 0.09362006932497025, "eval_runtime": 34.9206, "eval_samples_per_second": 450.365, "eval_steps_per_second": 7.045, "step": 67500 }, { "epoch": 19.162645472608574, "grad_norm": 3.113666534423828, "learning_rate": 8.084558614816919e-05, "loss": 0.0066288158297538756, "step": 67510 }, { "epoch": 19.165483962531933, "grad_norm": 4.190005302429199, "learning_rate": 8.084274765824582e-05, "loss": 0.013468901813030242, "step": 67520 }, { "epoch": 19.168322452455293, "grad_norm": 10.283050537109375, "learning_rate": 8.083990916832246e-05, "loss": 0.02649218440055847, "step": 67530 }, { "epoch": 19.171160942378656, "grad_norm": 10.321257591247559, "learning_rate": 8.08370706783991e-05, "loss": 0.01264168620109558, "step": 67540 }, { "epoch": 19.173999432302015, "grad_norm": 0.33355388045310974, "learning_rate": 8.083423218847573e-05, "loss": 0.011690439283847808, "step": 67550 }, { "epoch": 19.176837922225378, "grad_norm": 1.3021514415740967, "learning_rate": 8.083139369855237e-05, "loss": 0.010784810781478882, "step": 67560 }, { "epoch": 19.179676412148737, "grad_norm": 0.116580069065094, "learning_rate": 8.082855520862901e-05, "loss": 0.013323754072189331, "step": 67570 }, { "epoch": 19.182514902072096, "grad_norm": 0.4402197599411011, "learning_rate": 8.082571671870565e-05, "loss": 0.012114851921796798, "step": 67580 }, { "epoch": 19.18535339199546, "grad_norm": 1.7596192359924316, "learning_rate": 8.082287822878229e-05, "loss": 0.01436590850353241, "step": 67590 }, { "epoch": 19.18819188191882, "grad_norm": 0.2331782877445221, "learning_rate": 8.082003973885893e-05, "loss": 0.013291910290718079, "step": 67600 }, { "epoch": 19.19103037184218, "grad_norm": 8.679878234863281, "learning_rate": 8.081720124893558e-05, "loss": 0.0063701331615448, "step": 67610 }, { "epoch": 19.19386886176554, "grad_norm": 0.1528962403535843, "learning_rate": 8.08143627590122e-05, "loss": 0.008055203408002854, "step": 67620 }, { "epoch": 19.1967073516889, "grad_norm": 0.14691723883152008, "learning_rate": 8.081152426908884e-05, "loss": 0.006062944233417511, "step": 67630 }, { "epoch": 19.199545841612263, "grad_norm": 0.8452918529510498, "learning_rate": 8.080868577916549e-05, "loss": 0.010210076719522477, "step": 67640 }, { "epoch": 19.202384331535622, "grad_norm": 6.843537330627441, "learning_rate": 8.080584728924213e-05, "loss": 0.007562895119190216, "step": 67650 }, { "epoch": 19.205222821458985, "grad_norm": 0.06004749611020088, "learning_rate": 8.080300879931877e-05, "loss": 0.009896904230117798, "step": 67660 }, { "epoch": 19.208061311382345, "grad_norm": 0.1304699033498764, "learning_rate": 8.080017030939541e-05, "loss": 0.022869089245796205, "step": 67670 }, { "epoch": 19.210899801305704, "grad_norm": 3.3976776599884033, "learning_rate": 8.079733181947204e-05, "loss": 0.003704933077096939, "step": 67680 }, { "epoch": 19.213738291229067, "grad_norm": 1.5157517194747925, "learning_rate": 8.079449332954868e-05, "loss": 0.02142843008041382, "step": 67690 }, { "epoch": 19.216576781152426, "grad_norm": 0.11498454213142395, "learning_rate": 8.079165483962532e-05, "loss": 0.014121848344802856, "step": 67700 }, { "epoch": 19.21941527107579, "grad_norm": 7.033581256866455, "learning_rate": 8.078881634970196e-05, "loss": 0.025952115654945374, "step": 67710 }, { "epoch": 19.22225376099915, "grad_norm": 10.974184036254883, "learning_rate": 8.07859778597786e-05, "loss": 0.025524702668190003, "step": 67720 }, { "epoch": 19.225092250922508, "grad_norm": 0.06944310665130615, "learning_rate": 8.078313936985524e-05, "loss": 0.008826668560504913, "step": 67730 }, { "epoch": 19.22793074084587, "grad_norm": 0.7446709871292114, "learning_rate": 8.078030087993189e-05, "loss": 0.03030249774456024, "step": 67740 }, { "epoch": 19.23076923076923, "grad_norm": 0.7239276170730591, "learning_rate": 8.077746239000851e-05, "loss": 0.01531202793121338, "step": 67750 }, { "epoch": 19.233607720692593, "grad_norm": 1.6058056354522705, "learning_rate": 8.077462390008516e-05, "loss": 0.005850139260292053, "step": 67760 }, { "epoch": 19.236446210615952, "grad_norm": 11.665865898132324, "learning_rate": 8.07717854101618e-05, "loss": 0.014820276200771332, "step": 67770 }, { "epoch": 19.23928470053931, "grad_norm": 0.10149377584457397, "learning_rate": 8.076894692023844e-05, "loss": 0.010807586461305618, "step": 67780 }, { "epoch": 19.242123190462674, "grad_norm": 0.15132802724838257, "learning_rate": 8.076610843031508e-05, "loss": 0.011475266516208648, "step": 67790 }, { "epoch": 19.244961680386034, "grad_norm": 0.09567967802286148, "learning_rate": 8.076326994039172e-05, "loss": 0.027991437911987306, "step": 67800 }, { "epoch": 19.247800170309397, "grad_norm": 1.3047796487808228, "learning_rate": 8.076043145046835e-05, "loss": 0.03183151483535766, "step": 67810 }, { "epoch": 19.250638660232756, "grad_norm": 3.8587002754211426, "learning_rate": 8.075759296054499e-05, "loss": 0.0247313991189003, "step": 67820 }, { "epoch": 19.253477150156115, "grad_norm": 1.7827682495117188, "learning_rate": 8.075475447062163e-05, "loss": 0.04688800573348999, "step": 67830 }, { "epoch": 19.256315640079478, "grad_norm": 0.5110089182853699, "learning_rate": 8.075191598069827e-05, "loss": 0.01392262578010559, "step": 67840 }, { "epoch": 19.259154130002837, "grad_norm": 3.4611167907714844, "learning_rate": 8.074907749077491e-05, "loss": 0.010926375538110733, "step": 67850 }, { "epoch": 19.2619926199262, "grad_norm": 0.11380285769701004, "learning_rate": 8.074623900085156e-05, "loss": 0.019263923168182373, "step": 67860 }, { "epoch": 19.26483110984956, "grad_norm": 4.549041748046875, "learning_rate": 8.07434005109282e-05, "loss": 0.006186319887638092, "step": 67870 }, { "epoch": 19.267669599772923, "grad_norm": 1.0789645910263062, "learning_rate": 8.074056202100482e-05, "loss": 0.002471606247127056, "step": 67880 }, { "epoch": 19.270508089696282, "grad_norm": 0.12061013281345367, "learning_rate": 8.073772353108147e-05, "loss": 0.007603899389505386, "step": 67890 }, { "epoch": 19.27334657961964, "grad_norm": 11.417141914367676, "learning_rate": 8.073488504115811e-05, "loss": 0.013187363743782043, "step": 67900 }, { "epoch": 19.276185069543004, "grad_norm": 2.8267853260040283, "learning_rate": 8.073204655123475e-05, "loss": 0.022637364268302918, "step": 67910 }, { "epoch": 19.279023559466363, "grad_norm": 1.259590744972229, "learning_rate": 8.072920806131139e-05, "loss": 0.00955088809132576, "step": 67920 }, { "epoch": 19.281862049389726, "grad_norm": 0.5626100301742554, "learning_rate": 8.072636957138803e-05, "loss": 0.0075418621301651, "step": 67930 }, { "epoch": 19.284700539313086, "grad_norm": 1.6079111099243164, "learning_rate": 8.072353108146466e-05, "loss": 0.012356351315975189, "step": 67940 }, { "epoch": 19.287539029236445, "grad_norm": 3.411980628967285, "learning_rate": 8.07206925915413e-05, "loss": 0.0049805760383605955, "step": 67950 }, { "epoch": 19.290377519159808, "grad_norm": 2.580923557281494, "learning_rate": 8.071785410161794e-05, "loss": 0.017541767656803132, "step": 67960 }, { "epoch": 19.293216009083167, "grad_norm": 5.669726371765137, "learning_rate": 8.071501561169458e-05, "loss": 0.013724504411220551, "step": 67970 }, { "epoch": 19.29605449900653, "grad_norm": 0.3092663884162903, "learning_rate": 8.071217712177123e-05, "loss": 0.00782955288887024, "step": 67980 }, { "epoch": 19.29889298892989, "grad_norm": 3.6414968967437744, "learning_rate": 8.070933863184787e-05, "loss": 0.02389530837535858, "step": 67990 }, { "epoch": 19.30173147885325, "grad_norm": 0.13271236419677734, "learning_rate": 8.070650014192451e-05, "loss": 0.0030975701287388802, "step": 68000 }, { "epoch": 19.30173147885325, "eval_accuracy": 0.9744388631016723, "eval_loss": 0.08565521985292435, "eval_runtime": 32.9682, "eval_samples_per_second": 477.036, "eval_steps_per_second": 7.462, "step": 68000 }, { "epoch": 19.30456996877661, "grad_norm": 1.4198191165924072, "learning_rate": 8.070366165200114e-05, "loss": 0.00763678178191185, "step": 68010 }, { "epoch": 19.30740845869997, "grad_norm": 1.041043996810913, "learning_rate": 8.070082316207778e-05, "loss": 0.004635143652558326, "step": 68020 }, { "epoch": 19.310246948623334, "grad_norm": 4.3196821212768555, "learning_rate": 8.069798467215442e-05, "loss": 0.010634306818246841, "step": 68030 }, { "epoch": 19.313085438546693, "grad_norm": 9.29139232635498, "learning_rate": 8.069514618223105e-05, "loss": 0.022003670036792756, "step": 68040 }, { "epoch": 19.315923928470053, "grad_norm": 2.4844846725463867, "learning_rate": 8.06923076923077e-05, "loss": 0.010720017552375793, "step": 68050 }, { "epoch": 19.318762418393415, "grad_norm": 1.1699146032333374, "learning_rate": 8.068946920238434e-05, "loss": 0.022791968286037446, "step": 68060 }, { "epoch": 19.321600908316775, "grad_norm": 9.660022735595703, "learning_rate": 8.068663071246097e-05, "loss": 0.009228043258190155, "step": 68070 }, { "epoch": 19.324439398240138, "grad_norm": 6.170736789703369, "learning_rate": 8.068379222253761e-05, "loss": 0.005584214627742767, "step": 68080 }, { "epoch": 19.327277888163497, "grad_norm": 3.952193021774292, "learning_rate": 8.068095373261425e-05, "loss": 0.004945781081914902, "step": 68090 }, { "epoch": 19.330116378086856, "grad_norm": 0.13458359241485596, "learning_rate": 8.06781152426909e-05, "loss": 0.012250816076993942, "step": 68100 }, { "epoch": 19.33295486801022, "grad_norm": 2.3284058570861816, "learning_rate": 8.067527675276754e-05, "loss": 0.007433629781007767, "step": 68110 }, { "epoch": 19.33579335793358, "grad_norm": 0.07490386813879013, "learning_rate": 8.067243826284418e-05, "loss": 0.00314120352268219, "step": 68120 }, { "epoch": 19.33863184785694, "grad_norm": 0.10765133798122406, "learning_rate": 8.066959977292082e-05, "loss": 0.005351892858743668, "step": 68130 }, { "epoch": 19.3414703377803, "grad_norm": 1.5901883840560913, "learning_rate": 8.066676128299745e-05, "loss": 0.006470400094985962, "step": 68140 }, { "epoch": 19.34430882770366, "grad_norm": 0.6758463978767395, "learning_rate": 8.066392279307409e-05, "loss": 0.013496077060699463, "step": 68150 }, { "epoch": 19.347147317627023, "grad_norm": 0.09625937789678574, "learning_rate": 8.066108430315073e-05, "loss": 0.029165837168693542, "step": 68160 }, { "epoch": 19.349985807550382, "grad_norm": 6.966421604156494, "learning_rate": 8.065824581322736e-05, "loss": 0.01671750992536545, "step": 68170 }, { "epoch": 19.352824297473745, "grad_norm": 4.31232213973999, "learning_rate": 8.065540732330401e-05, "loss": 0.007858431339263916, "step": 68180 }, { "epoch": 19.355662787397105, "grad_norm": 3.7310500144958496, "learning_rate": 8.065256883338065e-05, "loss": 0.004348382353782654, "step": 68190 }, { "epoch": 19.358501277320464, "grad_norm": 0.13277438282966614, "learning_rate": 8.064973034345728e-05, "loss": 0.009804250299930572, "step": 68200 }, { "epoch": 19.361339767243827, "grad_norm": 0.07865480333566666, "learning_rate": 8.064689185353392e-05, "loss": 0.010708212107419967, "step": 68210 }, { "epoch": 19.364178257167186, "grad_norm": 11.81151294708252, "learning_rate": 8.064405336361056e-05, "loss": 0.0053683526813983916, "step": 68220 }, { "epoch": 19.36701674709055, "grad_norm": 8.92349910736084, "learning_rate": 8.06412148736872e-05, "loss": 0.011374233663082123, "step": 68230 }, { "epoch": 19.36985523701391, "grad_norm": 1.5551488399505615, "learning_rate": 8.063837638376383e-05, "loss": 0.0038816902786493303, "step": 68240 }, { "epoch": 19.372693726937268, "grad_norm": 5.585721969604492, "learning_rate": 8.063553789384049e-05, "loss": 0.00555039793252945, "step": 68250 }, { "epoch": 19.37553221686063, "grad_norm": 6.839237213134766, "learning_rate": 8.063269940391712e-05, "loss": 0.019625718891620635, "step": 68260 }, { "epoch": 19.37837070678399, "grad_norm": 1.2135684490203857, "learning_rate": 8.062986091399376e-05, "loss": 0.01753345876932144, "step": 68270 }, { "epoch": 19.381209196707353, "grad_norm": 0.5965828895568848, "learning_rate": 8.06270224240704e-05, "loss": 0.006242763251066208, "step": 68280 }, { "epoch": 19.384047686630712, "grad_norm": 6.478125095367432, "learning_rate": 8.062418393414704e-05, "loss": 0.024333618581295013, "step": 68290 }, { "epoch": 19.386886176554075, "grad_norm": 0.8534708619117737, "learning_rate": 8.062134544422367e-05, "loss": 0.008496670424938202, "step": 68300 }, { "epoch": 19.389724666477434, "grad_norm": 0.9178301095962524, "learning_rate": 8.061850695430032e-05, "loss": 0.00907972753047943, "step": 68310 }, { "epoch": 19.392563156400794, "grad_norm": 3.925877809524536, "learning_rate": 8.061566846437696e-05, "loss": 0.015740588307380676, "step": 68320 }, { "epoch": 19.395401646324157, "grad_norm": 1.7640879154205322, "learning_rate": 8.061282997445359e-05, "loss": 0.0020490843802690507, "step": 68330 }, { "epoch": 19.398240136247516, "grad_norm": 3.381509304046631, "learning_rate": 8.060999148453023e-05, "loss": 0.009093259274959565, "step": 68340 }, { "epoch": 19.40107862617088, "grad_norm": 15.875543594360352, "learning_rate": 8.060715299460687e-05, "loss": 0.01871739625930786, "step": 68350 }, { "epoch": 19.403917116094238, "grad_norm": 7.023008346557617, "learning_rate": 8.06043145046835e-05, "loss": 0.015949773788452148, "step": 68360 }, { "epoch": 19.406755606017597, "grad_norm": 3.037229061126709, "learning_rate": 8.060147601476014e-05, "loss": 0.00839984267950058, "step": 68370 }, { "epoch": 19.40959409594096, "grad_norm": 9.917407035827637, "learning_rate": 8.05986375248368e-05, "loss": 0.00768926739692688, "step": 68380 }, { "epoch": 19.41243258586432, "grad_norm": 0.18468962609767914, "learning_rate": 8.059579903491343e-05, "loss": 0.0058216206729412075, "step": 68390 }, { "epoch": 19.415271075787683, "grad_norm": 1.3383876085281372, "learning_rate": 8.059296054499007e-05, "loss": 0.003566163778305054, "step": 68400 }, { "epoch": 19.418109565711042, "grad_norm": 4.802999496459961, "learning_rate": 8.059012205506671e-05, "loss": 0.005894004926085472, "step": 68410 }, { "epoch": 19.4209480556344, "grad_norm": 16.87082290649414, "learning_rate": 8.058728356514335e-05, "loss": 0.021160835027694704, "step": 68420 }, { "epoch": 19.423786545557764, "grad_norm": 0.37414973974227905, "learning_rate": 8.058444507521998e-05, "loss": 0.02932582199573517, "step": 68430 }, { "epoch": 19.426625035481123, "grad_norm": 6.093992710113525, "learning_rate": 8.058160658529662e-05, "loss": 0.018856915831565856, "step": 68440 }, { "epoch": 19.429463525404486, "grad_norm": 10.923074722290039, "learning_rate": 8.057876809537327e-05, "loss": 0.029166942834854125, "step": 68450 }, { "epoch": 19.432302015327846, "grad_norm": 4.770284652709961, "learning_rate": 8.05759296054499e-05, "loss": 0.01638353168964386, "step": 68460 }, { "epoch": 19.435140505251205, "grad_norm": 1.402288794517517, "learning_rate": 8.057309111552654e-05, "loss": 0.021467173099517824, "step": 68470 }, { "epoch": 19.437978995174568, "grad_norm": 0.6597139835357666, "learning_rate": 8.057025262560319e-05, "loss": 0.013897141814231873, "step": 68480 }, { "epoch": 19.440817485097927, "grad_norm": 2.0909440517425537, "learning_rate": 8.056741413567981e-05, "loss": 0.013078062236309052, "step": 68490 }, { "epoch": 19.44365597502129, "grad_norm": 0.6840941905975342, "learning_rate": 8.056457564575645e-05, "loss": 0.025497856736183166, "step": 68500 }, { "epoch": 19.44365597502129, "eval_accuracy": 0.9712596172187957, "eval_loss": 0.10924075543880463, "eval_runtime": 36.7636, "eval_samples_per_second": 427.787, "eval_steps_per_second": 6.691, "step": 68500 }, { "epoch": 19.44649446494465, "grad_norm": 0.9227526783943176, "learning_rate": 8.056173715583311e-05, "loss": 0.032569697499275206, "step": 68510 }, { "epoch": 19.44933295486801, "grad_norm": 6.001041412353516, "learning_rate": 8.055889866590974e-05, "loss": 0.023711051046848296, "step": 68520 }, { "epoch": 19.45217144479137, "grad_norm": 0.4440292418003082, "learning_rate": 8.055606017598638e-05, "loss": 0.01794673651456833, "step": 68530 }, { "epoch": 19.45500993471473, "grad_norm": 2.3024611473083496, "learning_rate": 8.055322168606302e-05, "loss": 0.02339412122964859, "step": 68540 }, { "epoch": 19.457848424638094, "grad_norm": 0.8106746077537537, "learning_rate": 8.055038319613966e-05, "loss": 0.03239016234874725, "step": 68550 }, { "epoch": 19.460686914561453, "grad_norm": 0.7832844853401184, "learning_rate": 8.054754470621629e-05, "loss": 0.008729781210422515, "step": 68560 }, { "epoch": 19.463525404484812, "grad_norm": 1.791682243347168, "learning_rate": 8.054470621629293e-05, "loss": 0.023701900243759157, "step": 68570 }, { "epoch": 19.466363894408175, "grad_norm": 1.9699652194976807, "learning_rate": 8.054186772636959e-05, "loss": 0.0104043148458004, "step": 68580 }, { "epoch": 19.469202384331535, "grad_norm": 1.3810638189315796, "learning_rate": 8.053902923644621e-05, "loss": 0.013517795503139496, "step": 68590 }, { "epoch": 19.472040874254898, "grad_norm": 0.6438302397727966, "learning_rate": 8.053619074652285e-05, "loss": 0.016304488480091094, "step": 68600 }, { "epoch": 19.474879364178257, "grad_norm": 0.6365600228309631, "learning_rate": 8.05333522565995e-05, "loss": 0.02563230097293854, "step": 68610 }, { "epoch": 19.477717854101616, "grad_norm": 11.787696838378906, "learning_rate": 8.053051376667612e-05, "loss": 0.009514009207487106, "step": 68620 }, { "epoch": 19.48055634402498, "grad_norm": 1.732863426208496, "learning_rate": 8.052767527675277e-05, "loss": 0.014350230991840362, "step": 68630 }, { "epoch": 19.48339483394834, "grad_norm": 0.24051150679588318, "learning_rate": 8.05248367868294e-05, "loss": 0.01867240369319916, "step": 68640 }, { "epoch": 19.4862333238717, "grad_norm": 0.32116565108299255, "learning_rate": 8.052199829690605e-05, "loss": 0.021615423262119293, "step": 68650 }, { "epoch": 19.48907181379506, "grad_norm": 0.35372406244277954, "learning_rate": 8.051915980698269e-05, "loss": 0.009023438394069671, "step": 68660 }, { "epoch": 19.49191030371842, "grad_norm": 3.242429733276367, "learning_rate": 8.051632131705933e-05, "loss": 0.009293357282876969, "step": 68670 }, { "epoch": 19.494748793641783, "grad_norm": 2.859532356262207, "learning_rate": 8.051348282713597e-05, "loss": 0.017050601541996002, "step": 68680 }, { "epoch": 19.497587283565142, "grad_norm": 7.634500503540039, "learning_rate": 8.05106443372126e-05, "loss": 0.020486116409301758, "step": 68690 }, { "epoch": 19.500425773488505, "grad_norm": 0.9769891500473022, "learning_rate": 8.050780584728924e-05, "loss": 0.022155511379241943, "step": 68700 }, { "epoch": 19.503264263411864, "grad_norm": 7.54861307144165, "learning_rate": 8.05049673573659e-05, "loss": 0.021709385514259338, "step": 68710 }, { "epoch": 19.506102753335227, "grad_norm": 7.965332984924316, "learning_rate": 8.050212886744252e-05, "loss": 0.01915217936038971, "step": 68720 }, { "epoch": 19.508941243258587, "grad_norm": 6.3366007804870605, "learning_rate": 8.049929037751917e-05, "loss": 0.009624135494232178, "step": 68730 }, { "epoch": 19.511779733181946, "grad_norm": 0.4215447008609772, "learning_rate": 8.049645188759581e-05, "loss": 0.00831972137093544, "step": 68740 }, { "epoch": 19.51461822310531, "grad_norm": 1.454593300819397, "learning_rate": 8.049361339767243e-05, "loss": 0.01593736857175827, "step": 68750 }, { "epoch": 19.51745671302867, "grad_norm": 0.12381938844919205, "learning_rate": 8.049077490774908e-05, "loss": 0.005413829162716865, "step": 68760 }, { "epoch": 19.52029520295203, "grad_norm": 5.427436828613281, "learning_rate": 8.048793641782572e-05, "loss": 0.008888335525989532, "step": 68770 }, { "epoch": 19.52313369287539, "grad_norm": 0.15419389307498932, "learning_rate": 8.048509792790236e-05, "loss": 0.0032293360680341722, "step": 68780 }, { "epoch": 19.52597218279875, "grad_norm": 0.805033802986145, "learning_rate": 8.0482259437979e-05, "loss": 0.025036609172821044, "step": 68790 }, { "epoch": 19.528810672722113, "grad_norm": 0.3947599530220032, "learning_rate": 8.047970479704797e-05, "loss": 0.02131952941417694, "step": 68800 }, { "epoch": 19.531649162645472, "grad_norm": 0.21530596911907196, "learning_rate": 8.047686630712461e-05, "loss": 0.018358847498893736, "step": 68810 }, { "epoch": 19.534487652568835, "grad_norm": 12.785233497619629, "learning_rate": 8.047402781720125e-05, "loss": 0.01795649975538254, "step": 68820 }, { "epoch": 19.537326142492194, "grad_norm": 0.04487783461809158, "learning_rate": 8.047118932727789e-05, "loss": 0.00816815048456192, "step": 68830 }, { "epoch": 19.540164632415554, "grad_norm": 4.906947135925293, "learning_rate": 8.046835083735453e-05, "loss": 0.01656535118818283, "step": 68840 }, { "epoch": 19.543003122338916, "grad_norm": 2.639082193374634, "learning_rate": 8.046551234743117e-05, "loss": 0.0036929655820131303, "step": 68850 }, { "epoch": 19.545841612262276, "grad_norm": 5.359637260437012, "learning_rate": 8.046267385750781e-05, "loss": 0.014106601476669312, "step": 68860 }, { "epoch": 19.54868010218564, "grad_norm": 3.0167365074157715, "learning_rate": 8.045983536758444e-05, "loss": 0.010057100653648376, "step": 68870 }, { "epoch": 19.551518592108998, "grad_norm": 0.6392567753791809, "learning_rate": 8.045699687766108e-05, "loss": 0.0032315831631422043, "step": 68880 }, { "epoch": 19.554357082032357, "grad_norm": 2.4184505939483643, "learning_rate": 8.045415838773774e-05, "loss": 0.025351834297180176, "step": 68890 }, { "epoch": 19.55719557195572, "grad_norm": 7.955771446228027, "learning_rate": 8.045131989781437e-05, "loss": 0.021710547804832458, "step": 68900 }, { "epoch": 19.56003406187908, "grad_norm": 8.154048919677734, "learning_rate": 8.044848140789101e-05, "loss": 0.011904839426279068, "step": 68910 }, { "epoch": 19.562872551802442, "grad_norm": 1.1036444902420044, "learning_rate": 8.044564291796765e-05, "loss": 0.005546470358967781, "step": 68920 }, { "epoch": 19.565711041725802, "grad_norm": 0.9240225553512573, "learning_rate": 8.044280442804428e-05, "loss": 0.02468942552804947, "step": 68930 }, { "epoch": 19.56854953164916, "grad_norm": 11.141497611999512, "learning_rate": 8.043996593812092e-05, "loss": 0.034459182620048524, "step": 68940 }, { "epoch": 19.571388021572524, "grad_norm": 9.6288480758667, "learning_rate": 8.043712744819756e-05, "loss": 0.01579929292201996, "step": 68950 }, { "epoch": 19.574226511495883, "grad_norm": 2.98494815826416, "learning_rate": 8.04342889582742e-05, "loss": 0.005646990984678269, "step": 68960 }, { "epoch": 19.577065001419246, "grad_norm": 3.8880043029785156, "learning_rate": 8.043145046835084e-05, "loss": 0.01871223747730255, "step": 68970 }, { "epoch": 19.579903491342606, "grad_norm": 0.6887973546981812, "learning_rate": 8.042861197842748e-05, "loss": 0.009410007297992707, "step": 68980 }, { "epoch": 19.582741981265965, "grad_norm": 2.721257448196411, "learning_rate": 8.042577348850413e-05, "loss": 0.011302175372838974, "step": 68990 }, { "epoch": 19.585580471189328, "grad_norm": 0.7560570240020752, "learning_rate": 8.042293499858075e-05, "loss": 0.015013045072555542, "step": 69000 }, { "epoch": 19.585580471189328, "eval_accuracy": 0.9703058434539328, "eval_loss": 0.10485247522592545, "eval_runtime": 36.4922, "eval_samples_per_second": 430.969, "eval_steps_per_second": 6.741, "step": 69000 }, { "epoch": 19.588418961112687, "grad_norm": 2.149825096130371, "learning_rate": 8.04200965086574e-05, "loss": 0.008067814260721206, "step": 69010 }, { "epoch": 19.59125745103605, "grad_norm": 1.629309058189392, "learning_rate": 8.041725801873404e-05, "loss": 0.008319585025310517, "step": 69020 }, { "epoch": 19.59409594095941, "grad_norm": 0.12949895858764648, "learning_rate": 8.041470337780302e-05, "loss": 0.015746620297431946, "step": 69030 }, { "epoch": 19.59693443088277, "grad_norm": 2.0389418601989746, "learning_rate": 8.041186488787966e-05, "loss": 0.013204771280288696, "step": 69040 }, { "epoch": 19.59977292080613, "grad_norm": 12.068704605102539, "learning_rate": 8.040902639795629e-05, "loss": 0.025858384370803834, "step": 69050 }, { "epoch": 19.60261141072949, "grad_norm": 16.840219497680664, "learning_rate": 8.040618790803293e-05, "loss": 0.028466200828552245, "step": 69060 }, { "epoch": 19.605449900652854, "grad_norm": 1.1202080249786377, "learning_rate": 8.040334941810957e-05, "loss": 0.009333810955286025, "step": 69070 }, { "epoch": 19.608288390576213, "grad_norm": 0.03376936912536621, "learning_rate": 8.040051092818621e-05, "loss": 0.019192464649677277, "step": 69080 }, { "epoch": 19.611126880499576, "grad_norm": 1.6422775983810425, "learning_rate": 8.039767243826285e-05, "loss": 0.007623858749866486, "step": 69090 }, { "epoch": 19.613965370422935, "grad_norm": 0.7358804941177368, "learning_rate": 8.039483394833949e-05, "loss": 0.010873927175998688, "step": 69100 }, { "epoch": 19.616803860346295, "grad_norm": 2.8276357650756836, "learning_rate": 8.039199545841612e-05, "loss": 0.017946134507656097, "step": 69110 }, { "epoch": 19.619642350269658, "grad_norm": 4.385036468505859, "learning_rate": 8.038915696849276e-05, "loss": 0.017290659248828888, "step": 69120 }, { "epoch": 19.622480840193017, "grad_norm": 9.988555908203125, "learning_rate": 8.03863184785694e-05, "loss": 0.013988316059112549, "step": 69130 }, { "epoch": 19.62531933011638, "grad_norm": 4.337743759155273, "learning_rate": 8.038347998864604e-05, "loss": 0.017698106169700623, "step": 69140 }, { "epoch": 19.62815782003974, "grad_norm": 0.8212990760803223, "learning_rate": 8.038064149872269e-05, "loss": 0.0110377237200737, "step": 69150 }, { "epoch": 19.6309963099631, "grad_norm": 0.468191921710968, "learning_rate": 8.037780300879933e-05, "loss": 0.0067240364849567415, "step": 69160 }, { "epoch": 19.63383479988646, "grad_norm": 0.10225032269954681, "learning_rate": 8.037496451887595e-05, "loss": 0.010601022839546203, "step": 69170 }, { "epoch": 19.63667328980982, "grad_norm": 0.8672507405281067, "learning_rate": 8.03721260289526e-05, "loss": 0.023649990558624268, "step": 69180 }, { "epoch": 19.639511779733184, "grad_norm": 0.09129869192838669, "learning_rate": 8.036928753902924e-05, "loss": 0.0062679357826709746, "step": 69190 }, { "epoch": 19.642350269656543, "grad_norm": 0.9970355033874512, "learning_rate": 8.036644904910588e-05, "loss": 0.01151217445731163, "step": 69200 }, { "epoch": 19.645188759579902, "grad_norm": 0.24987013638019562, "learning_rate": 8.036361055918252e-05, "loss": 0.02564195394515991, "step": 69210 }, { "epoch": 19.648027249503265, "grad_norm": 2.8194327354431152, "learning_rate": 8.036077206925916e-05, "loss": 0.017193835973739625, "step": 69220 }, { "epoch": 19.650865739426624, "grad_norm": 6.366069316864014, "learning_rate": 8.03579335793358e-05, "loss": 0.013714145123958587, "step": 69230 }, { "epoch": 19.653704229349987, "grad_norm": 13.596235275268555, "learning_rate": 8.035509508941243e-05, "loss": 0.029501429200172423, "step": 69240 }, { "epoch": 19.656542719273347, "grad_norm": 1.8236782550811768, "learning_rate": 8.035225659948907e-05, "loss": 0.021018481254577635, "step": 69250 }, { "epoch": 19.659381209196706, "grad_norm": 0.1474526971578598, "learning_rate": 8.034941810956571e-05, "loss": 0.0173532173037529, "step": 69260 }, { "epoch": 19.66221969912007, "grad_norm": 0.49610206484794617, "learning_rate": 8.034657961964235e-05, "loss": 0.011975479125976563, "step": 69270 }, { "epoch": 19.665058189043428, "grad_norm": 2.198840379714966, "learning_rate": 8.0343741129719e-05, "loss": 0.0073925435543060304, "step": 69280 }, { "epoch": 19.66789667896679, "grad_norm": 2.5677545070648193, "learning_rate": 8.034090263979564e-05, "loss": 0.011912268400192261, "step": 69290 }, { "epoch": 19.67073516889015, "grad_norm": 1.5093374252319336, "learning_rate": 8.033806414987227e-05, "loss": 0.009709258377552033, "step": 69300 }, { "epoch": 19.67357365881351, "grad_norm": 0.10617073625326157, "learning_rate": 8.033522565994891e-05, "loss": 0.006183573603630066, "step": 69310 }, { "epoch": 19.676412148736873, "grad_norm": 4.007343292236328, "learning_rate": 8.033238717002555e-05, "loss": 0.007568816840648651, "step": 69320 }, { "epoch": 19.679250638660232, "grad_norm": 0.22447921335697174, "learning_rate": 8.032954868010219e-05, "loss": 0.012568357586860656, "step": 69330 }, { "epoch": 19.682089128583595, "grad_norm": 6.23994255065918, "learning_rate": 8.032671019017883e-05, "loss": 0.010301060974597931, "step": 69340 }, { "epoch": 19.684927618506954, "grad_norm": 0.7636321187019348, "learning_rate": 8.032387170025547e-05, "loss": 0.02458552420139313, "step": 69350 }, { "epoch": 19.687766108430313, "grad_norm": 0.5784066319465637, "learning_rate": 8.032103321033211e-05, "loss": 0.020122992992401122, "step": 69360 }, { "epoch": 19.690604598353676, "grad_norm": 1.27125883102417, "learning_rate": 8.031819472040874e-05, "loss": 0.013517719507217408, "step": 69370 }, { "epoch": 19.693443088277036, "grad_norm": 0.06491050124168396, "learning_rate": 8.031535623048538e-05, "loss": 0.0346727579832077, "step": 69380 }, { "epoch": 19.6962815782004, "grad_norm": 0.419322669506073, "learning_rate": 8.031251774056202e-05, "loss": 0.017085514962673187, "step": 69390 }, { "epoch": 19.699120068123758, "grad_norm": 1.0662063360214233, "learning_rate": 8.030967925063865e-05, "loss": 0.01921740621328354, "step": 69400 }, { "epoch": 19.701958558047117, "grad_norm": 0.41703176498413086, "learning_rate": 8.030684076071531e-05, "loss": 0.027681082487106323, "step": 69410 }, { "epoch": 19.70479704797048, "grad_norm": 10.522482872009277, "learning_rate": 8.030400227079195e-05, "loss": 0.022417092323303224, "step": 69420 }, { "epoch": 19.70763553789384, "grad_norm": 5.370880603790283, "learning_rate": 8.030116378086858e-05, "loss": 0.03224923610687256, "step": 69430 }, { "epoch": 19.710474027817202, "grad_norm": 2.0849058628082275, "learning_rate": 8.029832529094522e-05, "loss": 0.016292952001094818, "step": 69440 }, { "epoch": 19.71331251774056, "grad_norm": 7.055314064025879, "learning_rate": 8.029548680102186e-05, "loss": 0.012166781723499298, "step": 69450 }, { "epoch": 19.716151007663925, "grad_norm": 0.8461816906929016, "learning_rate": 8.02926483110985e-05, "loss": 0.014598387479782104, "step": 69460 }, { "epoch": 19.718989497587284, "grad_norm": 5.560688495635986, "learning_rate": 8.028980982117514e-05, "loss": 0.01355898380279541, "step": 69470 }, { "epoch": 19.721827987510643, "grad_norm": 0.6217544674873352, "learning_rate": 8.028697133125178e-05, "loss": 0.022909250855445863, "step": 69480 }, { "epoch": 19.724666477434006, "grad_norm": 3.4564316272735596, "learning_rate": 8.028413284132842e-05, "loss": 0.011682914197444915, "step": 69490 }, { "epoch": 19.727504967357365, "grad_norm": 4.977991104125977, "learning_rate": 8.028129435140505e-05, "loss": 0.009229032695293427, "step": 69500 }, { "epoch": 19.727504967357365, "eval_accuracy": 0.9739301837604121, "eval_loss": 0.0867098867893219, "eval_runtime": 36.6002, "eval_samples_per_second": 429.697, "eval_steps_per_second": 6.721, "step": 69500 }, { "epoch": 19.73034345728073, "grad_norm": 2.2493624687194824, "learning_rate": 8.02784558614817e-05, "loss": 0.030845165252685547, "step": 69510 }, { "epoch": 19.733181947204088, "grad_norm": 0.38069915771484375, "learning_rate": 8.027561737155834e-05, "loss": 0.024247755110263825, "step": 69520 }, { "epoch": 19.736020437127447, "grad_norm": 0.09490971267223358, "learning_rate": 8.027277888163496e-05, "loss": 0.019725000858306883, "step": 69530 }, { "epoch": 19.73885892705081, "grad_norm": 10.00368595123291, "learning_rate": 8.026994039171162e-05, "loss": 0.012398285418748855, "step": 69540 }, { "epoch": 19.74169741697417, "grad_norm": 10.819515228271484, "learning_rate": 8.026710190178826e-05, "loss": 0.013089922070503236, "step": 69550 }, { "epoch": 19.744535906897532, "grad_norm": 4.131497383117676, "learning_rate": 8.026426341186489e-05, "loss": 0.02197558879852295, "step": 69560 }, { "epoch": 19.74737439682089, "grad_norm": 0.4524899423122406, "learning_rate": 8.026142492194153e-05, "loss": 0.0046019434928894045, "step": 69570 }, { "epoch": 19.75021288674425, "grad_norm": 0.40342190861701965, "learning_rate": 8.025858643201817e-05, "loss": 0.00500902347266674, "step": 69580 }, { "epoch": 19.753051376667614, "grad_norm": 0.737342357635498, "learning_rate": 8.025574794209481e-05, "loss": 0.023260261118412017, "step": 69590 }, { "epoch": 19.755889866590973, "grad_norm": 7.49202299118042, "learning_rate": 8.025290945217145e-05, "loss": 0.006156142055988312, "step": 69600 }, { "epoch": 19.758728356514336, "grad_norm": 0.3927282989025116, "learning_rate": 8.02500709622481e-05, "loss": 0.01013113334774971, "step": 69610 }, { "epoch": 19.761566846437695, "grad_norm": 0.10173380374908447, "learning_rate": 8.024723247232474e-05, "loss": 0.01344219148159027, "step": 69620 }, { "epoch": 19.764405336361055, "grad_norm": 0.6857070326805115, "learning_rate": 8.024439398240136e-05, "loss": 0.0040159933269023895, "step": 69630 }, { "epoch": 19.767243826284417, "grad_norm": 3.1280875205993652, "learning_rate": 8.0241555492478e-05, "loss": 0.009645438939332961, "step": 69640 }, { "epoch": 19.770082316207777, "grad_norm": 0.3708900809288025, "learning_rate": 8.023871700255465e-05, "loss": 0.01586942672729492, "step": 69650 }, { "epoch": 19.77292080613114, "grad_norm": 2.6516525745391846, "learning_rate": 8.023587851263127e-05, "loss": 0.013574014604091644, "step": 69660 }, { "epoch": 19.7757592960545, "grad_norm": 2.5607426166534424, "learning_rate": 8.023304002270793e-05, "loss": 0.011482727527618409, "step": 69670 }, { "epoch": 19.77859778597786, "grad_norm": 7.187978744506836, "learning_rate": 8.023020153278457e-05, "loss": 0.012701305747032165, "step": 69680 }, { "epoch": 19.78143627590122, "grad_norm": 0.3876288831233978, "learning_rate": 8.02273630428612e-05, "loss": 0.009183244407176971, "step": 69690 }, { "epoch": 19.78427476582458, "grad_norm": 0.18749873340129852, "learning_rate": 8.022452455293784e-05, "loss": 0.005341282486915589, "step": 69700 }, { "epoch": 19.787113255747943, "grad_norm": 0.3299677073955536, "learning_rate": 8.022168606301448e-05, "loss": 0.01921142190694809, "step": 69710 }, { "epoch": 19.789951745671303, "grad_norm": 0.3436776101589203, "learning_rate": 8.021884757309112e-05, "loss": 0.006154390797019005, "step": 69720 }, { "epoch": 19.792790235594662, "grad_norm": 0.06608443707227707, "learning_rate": 8.021600908316775e-05, "loss": 0.009082941710948944, "step": 69730 }, { "epoch": 19.795628725518025, "grad_norm": 8.674753189086914, "learning_rate": 8.02131705932444e-05, "loss": 0.010397860407829284, "step": 69740 }, { "epoch": 19.798467215441384, "grad_norm": 0.12611012160778046, "learning_rate": 8.021033210332105e-05, "loss": 0.006111374497413636, "step": 69750 }, { "epoch": 19.801305705364747, "grad_norm": 3.5887839794158936, "learning_rate": 8.020749361339767e-05, "loss": 0.024138873815536498, "step": 69760 }, { "epoch": 19.804144195288107, "grad_norm": 0.3671558201313019, "learning_rate": 8.020465512347432e-05, "loss": 0.009663715213537215, "step": 69770 }, { "epoch": 19.806982685211466, "grad_norm": 11.511858940124512, "learning_rate": 8.020181663355096e-05, "loss": 0.009761935472488404, "step": 69780 }, { "epoch": 19.80982117513483, "grad_norm": 2.9406018257141113, "learning_rate": 8.019897814362758e-05, "loss": 0.021665024757385253, "step": 69790 }, { "epoch": 19.812659665058188, "grad_norm": 7.197723388671875, "learning_rate": 8.019613965370424e-05, "loss": 0.015151454508304596, "step": 69800 }, { "epoch": 19.81549815498155, "grad_norm": 0.40413954854011536, "learning_rate": 8.019330116378088e-05, "loss": 0.01368630826473236, "step": 69810 }, { "epoch": 19.81833664490491, "grad_norm": 2.4552419185638428, "learning_rate": 8.019046267385751e-05, "loss": 0.009326066821813583, "step": 69820 }, { "epoch": 19.82117513482827, "grad_norm": 0.7302841544151306, "learning_rate": 8.018762418393415e-05, "loss": 0.009500931203365325, "step": 69830 }, { "epoch": 19.824013624751633, "grad_norm": 0.24555978178977966, "learning_rate": 8.018478569401079e-05, "loss": 0.01546638309955597, "step": 69840 }, { "epoch": 19.826852114674992, "grad_norm": 0.9644136428833008, "learning_rate": 8.018194720408743e-05, "loss": 0.005413316190242767, "step": 69850 }, { "epoch": 19.829690604598355, "grad_norm": 0.35862264037132263, "learning_rate": 8.017910871416406e-05, "loss": 0.017298707365989686, "step": 69860 }, { "epoch": 19.832529094521714, "grad_norm": 0.06862282007932663, "learning_rate": 8.017627022424072e-05, "loss": 0.002622532844543457, "step": 69870 }, { "epoch": 19.835367584445073, "grad_norm": 0.3619443476200104, "learning_rate": 8.017343173431736e-05, "loss": 0.006654743105173111, "step": 69880 }, { "epoch": 19.838206074368436, "grad_norm": 6.627309799194336, "learning_rate": 8.017059324439398e-05, "loss": 0.008912458270788192, "step": 69890 }, { "epoch": 19.841044564291796, "grad_norm": 0.33512771129608154, "learning_rate": 8.016775475447063e-05, "loss": 0.015094064176082611, "step": 69900 }, { "epoch": 19.84388305421516, "grad_norm": 5.380963325500488, "learning_rate": 8.016491626454727e-05, "loss": 0.02249898761510849, "step": 69910 }, { "epoch": 19.846721544138518, "grad_norm": 2.4651830196380615, "learning_rate": 8.01620777746239e-05, "loss": 0.004171670600771904, "step": 69920 }, { "epoch": 19.84956003406188, "grad_norm": 1.1480640172958374, "learning_rate": 8.015923928470054e-05, "loss": 0.010425204783678055, "step": 69930 }, { "epoch": 19.85239852398524, "grad_norm": 0.3810730278491974, "learning_rate": 8.015640079477719e-05, "loss": 0.008157751709222793, "step": 69940 }, { "epoch": 19.8552370139086, "grad_norm": 0.5663885474205017, "learning_rate": 8.015356230485382e-05, "loss": 0.03674625158309937, "step": 69950 }, { "epoch": 19.858075503831962, "grad_norm": 1.3524690866470337, "learning_rate": 8.015072381493046e-05, "loss": 0.016058652102947234, "step": 69960 }, { "epoch": 19.86091399375532, "grad_norm": 10.498462677001953, "learning_rate": 8.01478853250071e-05, "loss": 0.013900864124298095, "step": 69970 }, { "epoch": 19.863752483678685, "grad_norm": 3.037470817565918, "learning_rate": 8.014504683508374e-05, "loss": 0.028281795978546142, "step": 69980 }, { "epoch": 19.866590973602044, "grad_norm": 6.656005859375, "learning_rate": 8.014220834516037e-05, "loss": 0.0225153848528862, "step": 69990 }, { "epoch": 19.869429463525403, "grad_norm": 0.997305154800415, "learning_rate": 8.013936985523703e-05, "loss": 0.012276075780391693, "step": 70000 }, { "epoch": 19.869429463525403, "eval_accuracy": 0.9746296178546449, "eval_loss": 0.09326709061861038, "eval_runtime": 34.8708, "eval_samples_per_second": 451.007, "eval_steps_per_second": 7.055, "step": 70000 }, { "epoch": 19.872267953448766, "grad_norm": 8.349200248718262, "learning_rate": 8.013653136531365e-05, "loss": 0.020797835290431978, "step": 70010 }, { "epoch": 19.875106443372125, "grad_norm": 5.669768333435059, "learning_rate": 8.01336928753903e-05, "loss": 0.012933573126792908, "step": 70020 }, { "epoch": 19.87794493329549, "grad_norm": 0.6435303092002869, "learning_rate": 8.013085438546694e-05, "loss": 0.01373857855796814, "step": 70030 }, { "epoch": 19.880783423218848, "grad_norm": 9.640437126159668, "learning_rate": 8.012801589554358e-05, "loss": 0.0473611980676651, "step": 70040 }, { "epoch": 19.883621913142207, "grad_norm": 3.372373580932617, "learning_rate": 8.01251774056202e-05, "loss": 0.009634517878293992, "step": 70050 }, { "epoch": 19.88646040306557, "grad_norm": 0.6144418120384216, "learning_rate": 8.012233891569685e-05, "loss": 0.016693800687789917, "step": 70060 }, { "epoch": 19.88929889298893, "grad_norm": 6.740437984466553, "learning_rate": 8.01195004257735e-05, "loss": 0.005502850189805031, "step": 70070 }, { "epoch": 19.892137382912292, "grad_norm": 9.659167289733887, "learning_rate": 8.011666193585013e-05, "loss": 0.016458888351917268, "step": 70080 }, { "epoch": 19.89497587283565, "grad_norm": 3.012026309967041, "learning_rate": 8.011382344592677e-05, "loss": 0.027854835987091063, "step": 70090 }, { "epoch": 19.89781436275901, "grad_norm": 9.539318084716797, "learning_rate": 8.011098495600341e-05, "loss": 0.012483859062194824, "step": 70100 }, { "epoch": 19.900652852682374, "grad_norm": 0.9721259474754333, "learning_rate": 8.010814646608004e-05, "loss": 0.02147800028324127, "step": 70110 }, { "epoch": 19.903491342605733, "grad_norm": 13.002565383911133, "learning_rate": 8.010530797615668e-05, "loss": 0.038715255260467527, "step": 70120 }, { "epoch": 19.906329832529096, "grad_norm": 6.417557716369629, "learning_rate": 8.010246948623332e-05, "loss": 0.015531025826931, "step": 70130 }, { "epoch": 19.909168322452455, "grad_norm": 17.78860092163086, "learning_rate": 8.009963099630996e-05, "loss": 0.022406674921512604, "step": 70140 }, { "epoch": 19.912006812375814, "grad_norm": 0.5026978850364685, "learning_rate": 8.00967925063866e-05, "loss": 0.010102760046720505, "step": 70150 }, { "epoch": 19.914845302299177, "grad_norm": 0.6659536361694336, "learning_rate": 8.009395401646325e-05, "loss": 0.009998419880867004, "step": 70160 }, { "epoch": 19.917683792222537, "grad_norm": 0.3441292643547058, "learning_rate": 8.009111552653989e-05, "loss": 0.0092659130692482, "step": 70170 }, { "epoch": 19.9205222821459, "grad_norm": 3.3865954875946045, "learning_rate": 8.008827703661652e-05, "loss": 0.03509729504585266, "step": 70180 }, { "epoch": 19.92336077206926, "grad_norm": 4.162532806396484, "learning_rate": 8.008543854669316e-05, "loss": 0.020271417498588563, "step": 70190 }, { "epoch": 19.92619926199262, "grad_norm": 0.39653849601745605, "learning_rate": 8.008260005676981e-05, "loss": 0.013508158922195434, "step": 70200 }, { "epoch": 19.92903775191598, "grad_norm": 0.2079203575849533, "learning_rate": 8.007976156684644e-05, "loss": 0.006418728083372116, "step": 70210 }, { "epoch": 19.93187624183934, "grad_norm": 0.1307549923658371, "learning_rate": 8.007692307692308e-05, "loss": 0.029881322383880617, "step": 70220 }, { "epoch": 19.934714731762703, "grad_norm": 0.21897612512111664, "learning_rate": 8.007408458699972e-05, "loss": 0.024579383432865143, "step": 70230 }, { "epoch": 19.937553221686063, "grad_norm": 0.5737637877464294, "learning_rate": 8.007124609707635e-05, "loss": 0.014177274703979493, "step": 70240 }, { "epoch": 19.940391711609422, "grad_norm": 1.6633081436157227, "learning_rate": 8.006840760715299e-05, "loss": 0.02371632605791092, "step": 70250 }, { "epoch": 19.943230201532785, "grad_norm": 3.3881356716156006, "learning_rate": 8.006556911722963e-05, "loss": 0.0038258709013462068, "step": 70260 }, { "epoch": 19.946068691456144, "grad_norm": 0.3298971652984619, "learning_rate": 8.006273062730628e-05, "loss": 0.026192861795425414, "step": 70270 }, { "epoch": 19.948907181379507, "grad_norm": 1.7790664434432983, "learning_rate": 8.005989213738292e-05, "loss": 0.004630395025014877, "step": 70280 }, { "epoch": 19.951745671302866, "grad_norm": 14.787984848022461, "learning_rate": 8.005705364745956e-05, "loss": 0.020761188864707947, "step": 70290 }, { "epoch": 19.95458416122623, "grad_norm": 1.3828383684158325, "learning_rate": 8.00542151575362e-05, "loss": 0.018538634479045867, "step": 70300 }, { "epoch": 19.95742265114959, "grad_norm": 0.4922795593738556, "learning_rate": 8.005137666761283e-05, "loss": 0.011452168226242065, "step": 70310 }, { "epoch": 19.960261141072948, "grad_norm": 0.9053069353103638, "learning_rate": 8.004853817768947e-05, "loss": 0.016468432545661927, "step": 70320 }, { "epoch": 19.96309963099631, "grad_norm": 8.889573097229004, "learning_rate": 8.004569968776611e-05, "loss": 0.013756434619426727, "step": 70330 }, { "epoch": 19.96593812091967, "grad_norm": 0.456452339887619, "learning_rate": 8.004286119784275e-05, "loss": 0.006173787266016006, "step": 70340 }, { "epoch": 19.968776610843033, "grad_norm": 0.07288449257612228, "learning_rate": 8.004002270791939e-05, "loss": 0.015461984276771545, "step": 70350 }, { "epoch": 19.971615100766392, "grad_norm": 0.7012103796005249, "learning_rate": 8.003718421799603e-05, "loss": 0.017776045203208923, "step": 70360 }, { "epoch": 19.974453590689752, "grad_norm": 6.952215671539307, "learning_rate": 8.003434572807266e-05, "loss": 0.027541956305503844, "step": 70370 }, { "epoch": 19.977292080613115, "grad_norm": 3.905641555786133, "learning_rate": 8.00315072381493e-05, "loss": 0.006900443881750107, "step": 70380 }, { "epoch": 19.980130570536474, "grad_norm": 2.8557472229003906, "learning_rate": 8.002866874822594e-05, "loss": 0.005438680574297905, "step": 70390 }, { "epoch": 19.982969060459837, "grad_norm": 1.9506566524505615, "learning_rate": 8.002583025830259e-05, "loss": 0.026613077521324156, "step": 70400 }, { "epoch": 19.985807550383196, "grad_norm": 3.460448741912842, "learning_rate": 8.002299176837923e-05, "loss": 0.009308099001646041, "step": 70410 }, { "epoch": 19.988646040306556, "grad_norm": 0.4749567210674286, "learning_rate": 8.002015327845587e-05, "loss": 0.014468608796596527, "step": 70420 }, { "epoch": 19.99148453022992, "grad_norm": 1.8069318532943726, "learning_rate": 8.001731478853251e-05, "loss": 0.011399106681346893, "step": 70430 }, { "epoch": 19.994323020153278, "grad_norm": 0.37195438146591187, "learning_rate": 8.001447629860914e-05, "loss": 0.004803759604692459, "step": 70440 }, { "epoch": 19.99716151007664, "grad_norm": 6.259244918823242, "learning_rate": 8.001163780868578e-05, "loss": 0.00889127403497696, "step": 70450 }, { "epoch": 20.0, "grad_norm": 0.5172131061553955, "learning_rate": 8.000879931876242e-05, "loss": 0.012535420060157777, "step": 70460 }, { "epoch": 20.00283848992336, "grad_norm": 10.114303588867188, "learning_rate": 8.000596082883906e-05, "loss": 0.03997641205787659, "step": 70470 }, { "epoch": 20.005676979846722, "grad_norm": 0.6311675906181335, "learning_rate": 8.00031223389157e-05, "loss": 0.011592544615268707, "step": 70480 }, { "epoch": 20.00851546977008, "grad_norm": 8.894587516784668, "learning_rate": 8.000028384899235e-05, "loss": 0.017207434773445128, "step": 70490 }, { "epoch": 20.011353959693444, "grad_norm": 0.46717751026153564, "learning_rate": 7.999744535906897e-05, "loss": 0.013500450551509858, "step": 70500 }, { "epoch": 20.011353959693444, "eval_accuracy": 0.9750747122782476, "eval_loss": 0.08832468092441559, "eval_runtime": 33.3915, "eval_samples_per_second": 470.988, "eval_steps_per_second": 7.367, "step": 70500 }, { "epoch": 20.014192449616804, "grad_norm": 0.930184006690979, "learning_rate": 7.999460686914561e-05, "loss": 0.009992731362581253, "step": 70510 }, { "epoch": 20.017030939540163, "grad_norm": 3.4409539699554443, "learning_rate": 7.999176837922226e-05, "loss": 0.006263540685176849, "step": 70520 }, { "epoch": 20.019869429463526, "grad_norm": 15.277495384216309, "learning_rate": 7.99889298892989e-05, "loss": 0.01408422291278839, "step": 70530 }, { "epoch": 20.022707919386885, "grad_norm": 0.08249526470899582, "learning_rate": 7.998609139937554e-05, "loss": 0.009355398267507553, "step": 70540 }, { "epoch": 20.025546409310248, "grad_norm": 0.9968146681785583, "learning_rate": 7.998325290945218e-05, "loss": 0.017765945196151732, "step": 70550 }, { "epoch": 20.028384899233608, "grad_norm": 0.7314165234565735, "learning_rate": 7.998041441952882e-05, "loss": 0.008213134855031968, "step": 70560 }, { "epoch": 20.031223389156967, "grad_norm": 5.232751369476318, "learning_rate": 7.997757592960545e-05, "loss": 0.023738621175289153, "step": 70570 }, { "epoch": 20.03406187908033, "grad_norm": 4.401555061340332, "learning_rate": 7.997473743968209e-05, "loss": 0.00568949319422245, "step": 70580 }, { "epoch": 20.03690036900369, "grad_norm": 3.0780553817749023, "learning_rate": 7.997189894975873e-05, "loss": 0.015137974917888642, "step": 70590 }, { "epoch": 20.039738858927052, "grad_norm": 4.9375081062316895, "learning_rate": 7.996906045983537e-05, "loss": 0.014754903316497803, "step": 70600 }, { "epoch": 20.04257734885041, "grad_norm": 4.63508939743042, "learning_rate": 7.996622196991201e-05, "loss": 0.004317229613661766, "step": 70610 }, { "epoch": 20.04541583877377, "grad_norm": 9.89782428741455, "learning_rate": 7.996338347998866e-05, "loss": 0.01868225634098053, "step": 70620 }, { "epoch": 20.048254328697134, "grad_norm": 1.0848830938339233, "learning_rate": 7.996054499006528e-05, "loss": 0.018375705182552337, "step": 70630 }, { "epoch": 20.051092818620493, "grad_norm": 0.20393210649490356, "learning_rate": 7.995770650014193e-05, "loss": 0.012399987131357194, "step": 70640 }, { "epoch": 20.053931308543856, "grad_norm": 0.22528260946273804, "learning_rate": 7.995486801021857e-05, "loss": 0.009944108873605728, "step": 70650 }, { "epoch": 20.056769798467215, "grad_norm": 0.46602919697761536, "learning_rate": 7.995202952029521e-05, "loss": 0.010029295831918717, "step": 70660 }, { "epoch": 20.059608288390578, "grad_norm": 0.4332464337348938, "learning_rate": 7.994919103037185e-05, "loss": 0.007784443348646164, "step": 70670 }, { "epoch": 20.062446778313937, "grad_norm": 0.6346091032028198, "learning_rate": 7.994635254044849e-05, "loss": 0.024679088592529298, "step": 70680 }, { "epoch": 20.065285268237297, "grad_norm": 2.126380205154419, "learning_rate": 7.994351405052513e-05, "loss": 0.01791771352291107, "step": 70690 }, { "epoch": 20.06812375816066, "grad_norm": 6.891622543334961, "learning_rate": 7.994067556060176e-05, "loss": 0.025194555521011353, "step": 70700 }, { "epoch": 20.07096224808402, "grad_norm": 0.07077255100011826, "learning_rate": 7.99378370706784e-05, "loss": 0.01100798398256302, "step": 70710 }, { "epoch": 20.07380073800738, "grad_norm": 0.29096195101737976, "learning_rate": 7.993499858075504e-05, "loss": 0.012457359582185745, "step": 70720 }, { "epoch": 20.07663922793074, "grad_norm": 0.7592195868492126, "learning_rate": 7.993216009083167e-05, "loss": 0.007818204909563064, "step": 70730 }, { "epoch": 20.0794777178541, "grad_norm": 0.08397205173969269, "learning_rate": 7.992932160090833e-05, "loss": 0.024569763243198393, "step": 70740 }, { "epoch": 20.082316207777463, "grad_norm": 1.350937843322754, "learning_rate": 7.992648311098497e-05, "loss": 0.006878745555877685, "step": 70750 }, { "epoch": 20.085154697700823, "grad_norm": 0.3663390576839447, "learning_rate": 7.99236446210616e-05, "loss": 0.004136440902948379, "step": 70760 }, { "epoch": 20.087993187624186, "grad_norm": 1.7821446657180786, "learning_rate": 7.992080613113824e-05, "loss": 0.004560092091560363, "step": 70770 }, { "epoch": 20.090831677547545, "grad_norm": 3.139600992202759, "learning_rate": 7.991796764121488e-05, "loss": 0.011883917450904845, "step": 70780 }, { "epoch": 20.093670167470904, "grad_norm": 0.3908631205558777, "learning_rate": 7.991512915129152e-05, "loss": 0.0027276404201984406, "step": 70790 }, { "epoch": 20.096508657394267, "grad_norm": 7.082748889923096, "learning_rate": 7.991229066136816e-05, "loss": 0.007720889151096344, "step": 70800 }, { "epoch": 20.099347147317626, "grad_norm": 0.10420786589384079, "learning_rate": 7.99094521714448e-05, "loss": 0.010558195412158966, "step": 70810 }, { "epoch": 20.10218563724099, "grad_norm": 0.48521357774734497, "learning_rate": 7.990661368152144e-05, "loss": 0.0019357111304998398, "step": 70820 }, { "epoch": 20.10502412716435, "grad_norm": 2.8287222385406494, "learning_rate": 7.990377519159807e-05, "loss": 0.009632499516010284, "step": 70830 }, { "epoch": 20.107862617087708, "grad_norm": 0.21305982768535614, "learning_rate": 7.990093670167471e-05, "loss": 0.00776190310716629, "step": 70840 }, { "epoch": 20.11070110701107, "grad_norm": 0.45418596267700195, "learning_rate": 7.989809821175135e-05, "loss": 0.004835011810064316, "step": 70850 }, { "epoch": 20.11353959693443, "grad_norm": 0.03585055470466614, "learning_rate": 7.989525972182798e-05, "loss": 0.010946637392044068, "step": 70860 }, { "epoch": 20.116378086857793, "grad_norm": 9.583426475524902, "learning_rate": 7.989242123190464e-05, "loss": 0.01724056005477905, "step": 70870 }, { "epoch": 20.119216576781152, "grad_norm": 3.252826690673828, "learning_rate": 7.988958274198128e-05, "loss": 0.005347991734743119, "step": 70880 }, { "epoch": 20.12205506670451, "grad_norm": 0.0747561976313591, "learning_rate": 7.98867442520579e-05, "loss": 0.008530103415250779, "step": 70890 }, { "epoch": 20.124893556627875, "grad_norm": 1.0255763530731201, "learning_rate": 7.988390576213455e-05, "loss": 0.007750038057565689, "step": 70900 }, { "epoch": 20.127732046551234, "grad_norm": 8.028404235839844, "learning_rate": 7.988106727221119e-05, "loss": 0.010138098895549775, "step": 70910 }, { "epoch": 20.130570536474597, "grad_norm": 0.07961180061101913, "learning_rate": 7.987822878228783e-05, "loss": 0.007064875215291977, "step": 70920 }, { "epoch": 20.133409026397956, "grad_norm": 0.45506641268730164, "learning_rate": 7.987539029236447e-05, "loss": 0.010567998886108399, "step": 70930 }, { "epoch": 20.136247516321315, "grad_norm": 2.628929376602173, "learning_rate": 7.987255180244111e-05, "loss": 0.013213369250297546, "step": 70940 }, { "epoch": 20.13908600624468, "grad_norm": 1.0333198308944702, "learning_rate": 7.986971331251774e-05, "loss": 0.013406898081302642, "step": 70950 }, { "epoch": 20.141924496168038, "grad_norm": 1.925592064857483, "learning_rate": 7.986687482259438e-05, "loss": 0.013082584738731385, "step": 70960 }, { "epoch": 20.1447629860914, "grad_norm": 1.4674748182296753, "learning_rate": 7.986403633267102e-05, "loss": 0.0037319473922252655, "step": 70970 }, { "epoch": 20.14760147601476, "grad_norm": 1.664434790611267, "learning_rate": 7.986119784274766e-05, "loss": 0.008153005689382552, "step": 70980 }, { "epoch": 20.15043996593812, "grad_norm": 0.12340985983610153, "learning_rate": 7.985835935282429e-05, "loss": 0.012183694541454316, "step": 70990 }, { "epoch": 20.153278455861482, "grad_norm": 0.25919803977012634, "learning_rate": 7.985552086290095e-05, "loss": 0.0062221776694059375, "step": 71000 }, { "epoch": 20.153278455861482, "eval_accuracy": 0.9715775418070833, "eval_loss": 0.09833943098783493, "eval_runtime": 34.4847, "eval_samples_per_second": 456.057, "eval_steps_per_second": 7.134, "step": 71000 }, { "epoch": 20.15611694578484, "grad_norm": 0.06415791064500809, "learning_rate": 7.985268237297759e-05, "loss": 0.005183562636375427, "step": 71010 }, { "epoch": 20.158955435708204, "grad_norm": 5.839943885803223, "learning_rate": 7.984984388305422e-05, "loss": 0.010195431858301162, "step": 71020 }, { "epoch": 20.161793925631564, "grad_norm": 1.0687507390975952, "learning_rate": 7.984700539313086e-05, "loss": 0.005439663678407669, "step": 71030 }, { "epoch": 20.164632415554923, "grad_norm": 1.2230420112609863, "learning_rate": 7.98441669032075e-05, "loss": 0.00555882602930069, "step": 71040 }, { "epoch": 20.167470905478286, "grad_norm": 0.0641670823097229, "learning_rate": 7.984132841328413e-05, "loss": 0.015961262583732604, "step": 71050 }, { "epoch": 20.170309395401645, "grad_norm": 0.21978293359279633, "learning_rate": 7.983848992336077e-05, "loss": 0.008723118901252746, "step": 71060 }, { "epoch": 20.173147885325008, "grad_norm": 1.1472264528274536, "learning_rate": 7.983565143343742e-05, "loss": 0.007990701496601105, "step": 71070 }, { "epoch": 20.175986375248367, "grad_norm": 1.6013227701187134, "learning_rate": 7.983281294351405e-05, "loss": 0.013109168410301209, "step": 71080 }, { "epoch": 20.17882486517173, "grad_norm": 2.6663811206817627, "learning_rate": 7.982997445359069e-05, "loss": 0.011013048887252807, "step": 71090 }, { "epoch": 20.18166335509509, "grad_norm": 6.417524337768555, "learning_rate": 7.982713596366733e-05, "loss": 0.021243469417095186, "step": 71100 }, { "epoch": 20.18450184501845, "grad_norm": 0.08042505383491516, "learning_rate": 7.982429747374397e-05, "loss": 0.020945796370506288, "step": 71110 }, { "epoch": 20.187340334941812, "grad_norm": 3.085923671722412, "learning_rate": 7.98214589838206e-05, "loss": 0.011323709040880203, "step": 71120 }, { "epoch": 20.19017882486517, "grad_norm": 0.7011781930923462, "learning_rate": 7.981862049389726e-05, "loss": 0.005912034958600998, "step": 71130 }, { "epoch": 20.193017314788534, "grad_norm": 1.4507988691329956, "learning_rate": 7.98157820039739e-05, "loss": 0.014409781992435455, "step": 71140 }, { "epoch": 20.195855804711893, "grad_norm": 2.1834218502044678, "learning_rate": 7.981294351405053e-05, "loss": 0.010265100002288818, "step": 71150 }, { "epoch": 20.198694294635253, "grad_norm": 2.628633499145508, "learning_rate": 7.981010502412717e-05, "loss": 0.015361364185810088, "step": 71160 }, { "epoch": 20.201532784558616, "grad_norm": 2.399979591369629, "learning_rate": 7.980726653420381e-05, "loss": 0.013739021122455597, "step": 71170 }, { "epoch": 20.204371274481975, "grad_norm": 2.3257687091827393, "learning_rate": 7.980442804428044e-05, "loss": 0.027626794576644898, "step": 71180 }, { "epoch": 20.207209764405338, "grad_norm": 0.7898116111755371, "learning_rate": 7.980158955435708e-05, "loss": 0.013887503743171692, "step": 71190 }, { "epoch": 20.210048254328697, "grad_norm": 0.5000703930854797, "learning_rate": 7.979875106443373e-05, "loss": 0.006867320090532303, "step": 71200 }, { "epoch": 20.212886744252057, "grad_norm": 1.0472383499145508, "learning_rate": 7.979591257451036e-05, "loss": 0.002737540192902088, "step": 71210 }, { "epoch": 20.21572523417542, "grad_norm": 0.0754508450627327, "learning_rate": 7.9793074084587e-05, "loss": 0.012058969587087631, "step": 71220 }, { "epoch": 20.21856372409878, "grad_norm": 5.038946628570557, "learning_rate": 7.979051944365597e-05, "loss": 0.028577423095703124, "step": 71230 }, { "epoch": 20.22140221402214, "grad_norm": 1.6348474025726318, "learning_rate": 7.978768095373261e-05, "loss": 0.018154531717300415, "step": 71240 }, { "epoch": 20.2242407039455, "grad_norm": 0.6914674043655396, "learning_rate": 7.978484246380927e-05, "loss": 0.00909312516450882, "step": 71250 }, { "epoch": 20.22707919386886, "grad_norm": 0.5614136457443237, "learning_rate": 7.97820039738859e-05, "loss": 0.02603203058242798, "step": 71260 }, { "epoch": 20.229917683792223, "grad_norm": 0.12528486549854279, "learning_rate": 7.977916548396253e-05, "loss": 0.033506572246551514, "step": 71270 }, { "epoch": 20.232756173715583, "grad_norm": 0.7197099328041077, "learning_rate": 7.977632699403918e-05, "loss": 0.016043220460414887, "step": 71280 }, { "epoch": 20.235594663638945, "grad_norm": 13.577713012695312, "learning_rate": 7.977348850411582e-05, "loss": 0.03595433533191681, "step": 71290 }, { "epoch": 20.238433153562305, "grad_norm": 10.372049331665039, "learning_rate": 7.977065001419245e-05, "loss": 0.008638687431812286, "step": 71300 }, { "epoch": 20.241271643485664, "grad_norm": 0.48972439765930176, "learning_rate": 7.97678115242691e-05, "loss": 0.008214093744754791, "step": 71310 }, { "epoch": 20.244110133409027, "grad_norm": 0.48795127868652344, "learning_rate": 7.976497303434574e-05, "loss": 0.012323207408189773, "step": 71320 }, { "epoch": 20.246948623332386, "grad_norm": 13.70875072479248, "learning_rate": 7.976213454442237e-05, "loss": 0.05417248010635376, "step": 71330 }, { "epoch": 20.24978711325575, "grad_norm": 1.6629012823104858, "learning_rate": 7.975929605449901e-05, "loss": 0.02711491584777832, "step": 71340 }, { "epoch": 20.25262560317911, "grad_norm": 1.2792797088623047, "learning_rate": 7.975645756457565e-05, "loss": 0.005712501704692841, "step": 71350 }, { "epoch": 20.255464093102468, "grad_norm": 6.207645893096924, "learning_rate": 7.975361907465228e-05, "loss": 0.012255604565143585, "step": 71360 }, { "epoch": 20.25830258302583, "grad_norm": 0.45407602190971375, "learning_rate": 7.975078058472892e-05, "loss": 0.00972742959856987, "step": 71370 }, { "epoch": 20.26114107294919, "grad_norm": 0.035239990800619125, "learning_rate": 7.974794209480558e-05, "loss": 0.01658585071563721, "step": 71380 }, { "epoch": 20.263979562872553, "grad_norm": 5.291571617126465, "learning_rate": 7.97451036048822e-05, "loss": 0.006126683205366135, "step": 71390 }, { "epoch": 20.266818052795912, "grad_norm": 8.226225852966309, "learning_rate": 7.974226511495885e-05, "loss": 0.009804755449295044, "step": 71400 }, { "epoch": 20.26965654271927, "grad_norm": 1.46675705909729, "learning_rate": 7.973942662503549e-05, "loss": 0.005511943995952606, "step": 71410 }, { "epoch": 20.272495032642635, "grad_norm": 4.6062750816345215, "learning_rate": 7.973658813511213e-05, "loss": 0.0062370352447032925, "step": 71420 }, { "epoch": 20.275333522565994, "grad_norm": 11.774337768554688, "learning_rate": 7.973374964518876e-05, "loss": 0.012304014712572097, "step": 71430 }, { "epoch": 20.278172012489357, "grad_norm": 1.2033756971359253, "learning_rate": 7.97309111552654e-05, "loss": 0.022356146574020387, "step": 71440 }, { "epoch": 20.281010502412716, "grad_norm": 1.7641831636428833, "learning_rate": 7.972807266534205e-05, "loss": 0.005710183829069138, "step": 71450 }, { "epoch": 20.283848992336075, "grad_norm": 1.1928189992904663, "learning_rate": 7.972523417541868e-05, "loss": 0.035009777545928954, "step": 71460 }, { "epoch": 20.28668748225944, "grad_norm": 1.8014436960220337, "learning_rate": 7.972239568549532e-05, "loss": 0.03726495504379272, "step": 71470 }, { "epoch": 20.289525972182798, "grad_norm": 2.5950076580047607, "learning_rate": 7.971955719557196e-05, "loss": 0.012007193267345428, "step": 71480 }, { "epoch": 20.29236446210616, "grad_norm": 5.762093544006348, "learning_rate": 7.971671870564859e-05, "loss": 0.01574503481388092, "step": 71490 }, { "epoch": 20.29520295202952, "grad_norm": 1.2392830848693848, "learning_rate": 7.971388021572523e-05, "loss": 0.010526101291179656, "step": 71500 }, { "epoch": 20.29520295202952, "eval_accuracy": 0.9741845234310421, "eval_loss": 0.09422118961811066, "eval_runtime": 34.1024, "eval_samples_per_second": 461.17, "eval_steps_per_second": 7.214, "step": 71500 }, { "epoch": 20.298041441952883, "grad_norm": 1.6695079803466797, "learning_rate": 7.971104172580189e-05, "loss": 0.006963803619146347, "step": 71510 }, { "epoch": 20.300879931876242, "grad_norm": 3.2166404724121094, "learning_rate": 7.970820323587851e-05, "loss": 0.02515282332897186, "step": 71520 }, { "epoch": 20.3037184217996, "grad_norm": 8.878299713134766, "learning_rate": 7.970536474595516e-05, "loss": 0.008853291720151901, "step": 71530 }, { "epoch": 20.306556911722964, "grad_norm": 0.0799056813120842, "learning_rate": 7.97025262560318e-05, "loss": 0.015310835838317872, "step": 71540 }, { "epoch": 20.309395401646324, "grad_norm": 1.1386926174163818, "learning_rate": 7.969968776610844e-05, "loss": 0.01113351657986641, "step": 71550 }, { "epoch": 20.312233891569687, "grad_norm": 9.597572326660156, "learning_rate": 7.969684927618507e-05, "loss": 0.011992889642715453, "step": 71560 }, { "epoch": 20.315072381493046, "grad_norm": 1.976238489151001, "learning_rate": 7.969401078626171e-05, "loss": 0.0054257214069366455, "step": 71570 }, { "epoch": 20.317910871416405, "grad_norm": 0.1846029907464981, "learning_rate": 7.969117229633836e-05, "loss": 0.018076620995998383, "step": 71580 }, { "epoch": 20.320749361339768, "grad_norm": 2.5870375633239746, "learning_rate": 7.968833380641499e-05, "loss": 0.0019892336800694465, "step": 71590 }, { "epoch": 20.323587851263127, "grad_norm": 10.250353813171387, "learning_rate": 7.968549531649163e-05, "loss": 0.017803683876991272, "step": 71600 }, { "epoch": 20.32642634118649, "grad_norm": 8.339384078979492, "learning_rate": 7.968265682656827e-05, "loss": 0.012162114679813384, "step": 71610 }, { "epoch": 20.32926483110985, "grad_norm": 3.0461528301239014, "learning_rate": 7.96798183366449e-05, "loss": 0.007265843451023102, "step": 71620 }, { "epoch": 20.33210332103321, "grad_norm": 0.7026576399803162, "learning_rate": 7.967697984672154e-05, "loss": 0.01599828600883484, "step": 71630 }, { "epoch": 20.334941810956572, "grad_norm": 0.44114840030670166, "learning_rate": 7.967414135679818e-05, "loss": 0.010729076713323593, "step": 71640 }, { "epoch": 20.33778030087993, "grad_norm": 8.060572624206543, "learning_rate": 7.967130286687483e-05, "loss": 0.007116299122571945, "step": 71650 }, { "epoch": 20.340618790803294, "grad_norm": 0.07551302760839462, "learning_rate": 7.966846437695147e-05, "loss": 0.004788613319396973, "step": 71660 }, { "epoch": 20.343457280726653, "grad_norm": 8.419034004211426, "learning_rate": 7.966562588702811e-05, "loss": 0.007219448685646057, "step": 71670 }, { "epoch": 20.346295770650013, "grad_norm": 0.31392791867256165, "learning_rate": 7.966278739710475e-05, "loss": 0.028424176573753356, "step": 71680 }, { "epoch": 20.349134260573376, "grad_norm": 0.04325127229094505, "learning_rate": 7.965994890718138e-05, "loss": 0.008724796026945114, "step": 71690 }, { "epoch": 20.351972750496735, "grad_norm": 0.12740127742290497, "learning_rate": 7.965711041725802e-05, "loss": 0.005463557690382004, "step": 71700 }, { "epoch": 20.354811240420098, "grad_norm": 0.24927918612957, "learning_rate": 7.965427192733467e-05, "loss": 0.008780656754970551, "step": 71710 }, { "epoch": 20.357649730343457, "grad_norm": 1.2686970233917236, "learning_rate": 7.96514334374113e-05, "loss": 0.013705286383628845, "step": 71720 }, { "epoch": 20.360488220266816, "grad_norm": 9.048550605773926, "learning_rate": 7.964859494748794e-05, "loss": 0.025367507338523866, "step": 71730 }, { "epoch": 20.36332671019018, "grad_norm": 0.2923521399497986, "learning_rate": 7.964575645756458e-05, "loss": 0.004051685705780983, "step": 71740 }, { "epoch": 20.36616520011354, "grad_norm": 5.30816125869751, "learning_rate": 7.964291796764121e-05, "loss": 0.020503339171409608, "step": 71750 }, { "epoch": 20.3690036900369, "grad_norm": 0.35195493698120117, "learning_rate": 7.964007947771785e-05, "loss": 0.017305321991443634, "step": 71760 }, { "epoch": 20.37184217996026, "grad_norm": 0.05347999930381775, "learning_rate": 7.96372409877945e-05, "loss": 0.013624610006809234, "step": 71770 }, { "epoch": 20.37468066988362, "grad_norm": 2.84073805809021, "learning_rate": 7.963440249787114e-05, "loss": 0.013450303673744201, "step": 71780 }, { "epoch": 20.377519159806983, "grad_norm": 5.410573959350586, "learning_rate": 7.963156400794778e-05, "loss": 0.033284375071525575, "step": 71790 }, { "epoch": 20.380357649730342, "grad_norm": 0.25082850456237793, "learning_rate": 7.962872551802442e-05, "loss": 0.00481058731675148, "step": 71800 }, { "epoch": 20.383196139653705, "grad_norm": 2.0178093910217285, "learning_rate": 7.962588702810106e-05, "loss": 0.014687678217887879, "step": 71810 }, { "epoch": 20.386034629577065, "grad_norm": 10.085311889648438, "learning_rate": 7.962304853817769e-05, "loss": 0.01878054141998291, "step": 71820 }, { "epoch": 20.388873119500424, "grad_norm": 0.23137490451335907, "learning_rate": 7.962021004825433e-05, "loss": 0.00991353690624237, "step": 71830 }, { "epoch": 20.391711609423787, "grad_norm": 0.6344987154006958, "learning_rate": 7.961737155833097e-05, "loss": 0.012723009288311004, "step": 71840 }, { "epoch": 20.394550099347146, "grad_norm": 6.912217140197754, "learning_rate": 7.961453306840761e-05, "loss": 0.030232995748519897, "step": 71850 }, { "epoch": 20.39738858927051, "grad_norm": 6.077348709106445, "learning_rate": 7.961169457848425e-05, "loss": 0.00785011649131775, "step": 71860 }, { "epoch": 20.40022707919387, "grad_norm": 0.46405521035194397, "learning_rate": 7.96088560885609e-05, "loss": 0.003696853667497635, "step": 71870 }, { "epoch": 20.40306556911723, "grad_norm": 0.36093464493751526, "learning_rate": 7.960601759863752e-05, "loss": 0.010967625677585602, "step": 71880 }, { "epoch": 20.40590405904059, "grad_norm": 11.93065357208252, "learning_rate": 7.960317910871416e-05, "loss": 0.01051737815141678, "step": 71890 }, { "epoch": 20.40874254896395, "grad_norm": 0.4335460364818573, "learning_rate": 7.96003406187908e-05, "loss": 0.015007099509239197, "step": 71900 }, { "epoch": 20.411581038887313, "grad_norm": 1.974636435508728, "learning_rate": 7.959750212886745e-05, "loss": 0.017540694773197175, "step": 71910 }, { "epoch": 20.414419528810672, "grad_norm": 0.38072291016578674, "learning_rate": 7.959466363894409e-05, "loss": 0.006635349243879318, "step": 71920 }, { "epoch": 20.417258018734035, "grad_norm": 0.30468127131462097, "learning_rate": 7.959182514902073e-05, "loss": 0.009739407896995544, "step": 71930 }, { "epoch": 20.420096508657394, "grad_norm": 0.6683716773986816, "learning_rate": 7.958898665909736e-05, "loss": 0.0038137849420309066, "step": 71940 }, { "epoch": 20.422934998580754, "grad_norm": 11.024478912353516, "learning_rate": 7.9586148169174e-05, "loss": 0.007583583891391754, "step": 71950 }, { "epoch": 20.425773488504117, "grad_norm": 8.787445068359375, "learning_rate": 7.958330967925064e-05, "loss": 0.008058632910251617, "step": 71960 }, { "epoch": 20.428611978427476, "grad_norm": 9.991742134094238, "learning_rate": 7.958047118932728e-05, "loss": 0.019328847527503967, "step": 71970 }, { "epoch": 20.43145046835084, "grad_norm": 4.142068386077881, "learning_rate": 7.957763269940392e-05, "loss": 0.016896636784076692, "step": 71980 }, { "epoch": 20.434288958274198, "grad_norm": 1.7371329069137573, "learning_rate": 7.957479420948056e-05, "loss": 0.0194592610001564, "step": 71990 }, { "epoch": 20.437127448197558, "grad_norm": 8.044124603271484, "learning_rate": 7.95719557195572e-05, "loss": 0.01736692190170288, "step": 72000 }, { "epoch": 20.437127448197558, "eval_accuracy": 0.9722769759013162, "eval_loss": 0.09542180597782135, "eval_runtime": 33.8205, "eval_samples_per_second": 465.015, "eval_steps_per_second": 7.274, "step": 72000 }, { "epoch": 20.43996593812092, "grad_norm": 0.08491235226392746, "learning_rate": 7.956911722963383e-05, "loss": 0.008915182948112488, "step": 72010 }, { "epoch": 20.44280442804428, "grad_norm": 0.9316523671150208, "learning_rate": 7.956627873971048e-05, "loss": 0.032794144749641416, "step": 72020 }, { "epoch": 20.445642917967643, "grad_norm": 0.8594599366188049, "learning_rate": 7.956344024978712e-05, "loss": 0.011322449147701263, "step": 72030 }, { "epoch": 20.448481407891002, "grad_norm": 13.988378524780273, "learning_rate": 7.956060175986374e-05, "loss": 0.014166532456874848, "step": 72040 }, { "epoch": 20.45131989781436, "grad_norm": 9.828819274902344, "learning_rate": 7.95577632699404e-05, "loss": 0.015715351700782774, "step": 72050 }, { "epoch": 20.454158387737724, "grad_norm": 3.977185010910034, "learning_rate": 7.955492478001704e-05, "loss": 0.008187664300203323, "step": 72060 }, { "epoch": 20.456996877661084, "grad_norm": 0.7853831052780151, "learning_rate": 7.955208629009367e-05, "loss": 0.0016485979780554772, "step": 72070 }, { "epoch": 20.459835367584446, "grad_norm": 0.24561822414398193, "learning_rate": 7.954924780017031e-05, "loss": 0.005656232684850692, "step": 72080 }, { "epoch": 20.462673857507806, "grad_norm": 8.962769508361816, "learning_rate": 7.954640931024695e-05, "loss": 0.012949442863464356, "step": 72090 }, { "epoch": 20.465512347431165, "grad_norm": 1.5460302829742432, "learning_rate": 7.954357082032359e-05, "loss": 0.014253039658069611, "step": 72100 }, { "epoch": 20.468350837354528, "grad_norm": 11.935850143432617, "learning_rate": 7.954073233040023e-05, "loss": 0.01864160895347595, "step": 72110 }, { "epoch": 20.471189327277887, "grad_norm": 0.5329301953315735, "learning_rate": 7.953789384047688e-05, "loss": 0.011663000285625457, "step": 72120 }, { "epoch": 20.47402781720125, "grad_norm": 3.4041054248809814, "learning_rate": 7.953505535055352e-05, "loss": 0.007515928149223328, "step": 72130 }, { "epoch": 20.47686630712461, "grad_norm": 0.062259942293167114, "learning_rate": 7.953221686063014e-05, "loss": 0.008645369857549667, "step": 72140 }, { "epoch": 20.47970479704797, "grad_norm": 0.0270087867975235, "learning_rate": 7.952937837070679e-05, "loss": 0.0033864609897136686, "step": 72150 }, { "epoch": 20.48254328697133, "grad_norm": 15.520306587219238, "learning_rate": 7.952653988078343e-05, "loss": 0.012915948033332824, "step": 72160 }, { "epoch": 20.48538177689469, "grad_norm": 0.2323584407567978, "learning_rate": 7.952370139086006e-05, "loss": 0.011641310900449753, "step": 72170 }, { "epoch": 20.488220266818054, "grad_norm": 3.507131338119507, "learning_rate": 7.952086290093671e-05, "loss": 0.01729032099246979, "step": 72180 }, { "epoch": 20.491058756741413, "grad_norm": 0.05009045451879501, "learning_rate": 7.951802441101335e-05, "loss": 0.037716978788375856, "step": 72190 }, { "epoch": 20.493897246664773, "grad_norm": 1.4873847961425781, "learning_rate": 7.951518592108998e-05, "loss": 0.01263839304447174, "step": 72200 }, { "epoch": 20.496735736588136, "grad_norm": 2.2642126083374023, "learning_rate": 7.951234743116662e-05, "loss": 0.01159719079732895, "step": 72210 }, { "epoch": 20.499574226511495, "grad_norm": 0.13426822423934937, "learning_rate": 7.950950894124326e-05, "loss": 0.013325349986553192, "step": 72220 }, { "epoch": 20.502412716434858, "grad_norm": 0.2050573229789734, "learning_rate": 7.95066704513199e-05, "loss": 0.01784103363752365, "step": 72230 }, { "epoch": 20.505251206358217, "grad_norm": 0.5942414402961731, "learning_rate": 7.950383196139653e-05, "loss": 0.005443480610847473, "step": 72240 }, { "epoch": 20.50808969628158, "grad_norm": 0.030989205464720726, "learning_rate": 7.950099347147319e-05, "loss": 0.0063235975801944734, "step": 72250 }, { "epoch": 20.51092818620494, "grad_norm": 0.1702861338853836, "learning_rate": 7.949815498154983e-05, "loss": 0.016134339570999145, "step": 72260 }, { "epoch": 20.5137666761283, "grad_norm": 12.798343658447266, "learning_rate": 7.949531649162646e-05, "loss": 0.016515547037124635, "step": 72270 }, { "epoch": 20.51660516605166, "grad_norm": 0.06161119416356087, "learning_rate": 7.94924780017031e-05, "loss": 0.017520207166671752, "step": 72280 }, { "epoch": 20.51944365597502, "grad_norm": 13.819439888000488, "learning_rate": 7.948963951177974e-05, "loss": 0.020389054715633393, "step": 72290 }, { "epoch": 20.522282145898384, "grad_norm": 0.40526050329208374, "learning_rate": 7.948680102185637e-05, "loss": 0.011181681603193282, "step": 72300 }, { "epoch": 20.525120635821743, "grad_norm": 0.053208865225315094, "learning_rate": 7.948396253193302e-05, "loss": 0.005820669233798981, "step": 72310 }, { "epoch": 20.527959125745102, "grad_norm": 0.5523765683174133, "learning_rate": 7.948112404200966e-05, "loss": 0.029317867755889893, "step": 72320 }, { "epoch": 20.530797615668465, "grad_norm": 0.32700037956237793, "learning_rate": 7.947828555208629e-05, "loss": 0.018427588045597076, "step": 72330 }, { "epoch": 20.533636105591825, "grad_norm": 5.3386101722717285, "learning_rate": 7.947544706216293e-05, "loss": 0.0059106480330228806, "step": 72340 }, { "epoch": 20.536474595515188, "grad_norm": 1.5190528631210327, "learning_rate": 7.947260857223957e-05, "loss": 0.005229027196764946, "step": 72350 }, { "epoch": 20.539313085438547, "grad_norm": 2.207822322845459, "learning_rate": 7.946977008231621e-05, "loss": 0.00702689066529274, "step": 72360 }, { "epoch": 20.542151575361906, "grad_norm": 2.5628275871276855, "learning_rate": 7.946693159239284e-05, "loss": 0.006532460451126099, "step": 72370 }, { "epoch": 20.54499006528527, "grad_norm": 5.681426048278809, "learning_rate": 7.94640931024695e-05, "loss": 0.005067500472068787, "step": 72380 }, { "epoch": 20.54782855520863, "grad_norm": 0.2532278001308441, "learning_rate": 7.946125461254614e-05, "loss": 0.005772820115089417, "step": 72390 }, { "epoch": 20.55066704513199, "grad_norm": 0.060357362031936646, "learning_rate": 7.945841612262277e-05, "loss": 0.004460076242685318, "step": 72400 }, { "epoch": 20.55350553505535, "grad_norm": 0.07478784769773483, "learning_rate": 7.945557763269941e-05, "loss": 0.0018306365236639977, "step": 72410 }, { "epoch": 20.55634402497871, "grad_norm": 0.22554221749305725, "learning_rate": 7.945273914277605e-05, "loss": 0.011449097096920014, "step": 72420 }, { "epoch": 20.559182514902073, "grad_norm": 10.41325569152832, "learning_rate": 7.944990065285268e-05, "loss": 0.014823152124881745, "step": 72430 }, { "epoch": 20.562021004825432, "grad_norm": 1.5804752111434937, "learning_rate": 7.944706216292932e-05, "loss": 0.0031535908579826354, "step": 72440 }, { "epoch": 20.564859494748795, "grad_norm": 1.3235818147659302, "learning_rate": 7.944422367300597e-05, "loss": 0.010452428460121154, "step": 72450 }, { "epoch": 20.567697984672154, "grad_norm": 1.4440994262695312, "learning_rate": 7.94413851830826e-05, "loss": 0.029002204537391663, "step": 72460 }, { "epoch": 20.570536474595514, "grad_norm": 0.5540594458580017, "learning_rate": 7.943854669315924e-05, "loss": 0.022501324117183686, "step": 72470 }, { "epoch": 20.573374964518877, "grad_norm": 4.881194591522217, "learning_rate": 7.943570820323588e-05, "loss": 0.013215628266334534, "step": 72480 }, { "epoch": 20.576213454442236, "grad_norm": 0.19283536076545715, "learning_rate": 7.943286971331252e-05, "loss": 0.0032246723771095277, "step": 72490 }, { "epoch": 20.5790519443656, "grad_norm": 1.8409188985824585, "learning_rate": 7.943003122338915e-05, "loss": 0.005358802154660225, "step": 72500 }, { "epoch": 20.5790519443656, "eval_accuracy": 0.9743752781840147, "eval_loss": 0.0831722840666771, "eval_runtime": 34.9234, "eval_samples_per_second": 450.329, "eval_steps_per_second": 7.044, "step": 72500 }, { "epoch": 20.581890434288958, "grad_norm": 3.974062919616699, "learning_rate": 7.942719273346581e-05, "loss": 0.0055478416383266446, "step": 72510 }, { "epoch": 20.584728924212317, "grad_norm": 0.9268086552619934, "learning_rate": 7.942435424354245e-05, "loss": 0.003149773180484772, "step": 72520 }, { "epoch": 20.58756741413568, "grad_norm": 0.3432391285896301, "learning_rate": 7.942151575361908e-05, "loss": 0.0039763770997524265, "step": 72530 }, { "epoch": 20.59040590405904, "grad_norm": 1.4847079515457153, "learning_rate": 7.941867726369572e-05, "loss": 0.013653279840946197, "step": 72540 }, { "epoch": 20.593244393982403, "grad_norm": 10.791133880615234, "learning_rate": 7.941583877377236e-05, "loss": 0.01275353878736496, "step": 72550 }, { "epoch": 20.596082883905762, "grad_norm": 1.4344544410705566, "learning_rate": 7.941300028384899e-05, "loss": 0.010457056760787963, "step": 72560 }, { "epoch": 20.59892137382912, "grad_norm": 0.12157366424798965, "learning_rate": 7.941016179392563e-05, "loss": 0.006041015312075615, "step": 72570 }, { "epoch": 20.601759863752484, "grad_norm": 0.6478506326675415, "learning_rate": 7.940732330400228e-05, "loss": 0.005896477401256562, "step": 72580 }, { "epoch": 20.604598353675843, "grad_norm": 0.2348155975341797, "learning_rate": 7.940448481407891e-05, "loss": 0.00341084823012352, "step": 72590 }, { "epoch": 20.607436843599206, "grad_norm": 3.474848747253418, "learning_rate": 7.940164632415555e-05, "loss": 0.005464639514684677, "step": 72600 }, { "epoch": 20.610275333522566, "grad_norm": 0.6121522784233093, "learning_rate": 7.93988078342322e-05, "loss": 0.0074565589427948, "step": 72610 }, { "epoch": 20.61311382344593, "grad_norm": 0.6807460784912109, "learning_rate": 7.939596934430884e-05, "loss": 0.03690108954906464, "step": 72620 }, { "epoch": 20.615952313369288, "grad_norm": 10.075699806213379, "learning_rate": 7.939313085438546e-05, "loss": 0.008872085064649583, "step": 72630 }, { "epoch": 20.618790803292647, "grad_norm": 2.334808349609375, "learning_rate": 7.939029236446212e-05, "loss": 0.002708200179040432, "step": 72640 }, { "epoch": 20.62162929321601, "grad_norm": 5.481932640075684, "learning_rate": 7.938745387453875e-05, "loss": 0.01088976114988327, "step": 72650 }, { "epoch": 20.62446778313937, "grad_norm": 0.12752610445022583, "learning_rate": 7.938461538461539e-05, "loss": 0.0177481546998024, "step": 72660 }, { "epoch": 20.627306273062732, "grad_norm": 0.560413122177124, "learning_rate": 7.938177689469203e-05, "loss": 0.003744165599346161, "step": 72670 }, { "epoch": 20.63014476298609, "grad_norm": 14.076713562011719, "learning_rate": 7.937893840476867e-05, "loss": 0.019052773714065552, "step": 72680 }, { "epoch": 20.63298325290945, "grad_norm": 0.9807866215705872, "learning_rate": 7.93760999148453e-05, "loss": 0.010610899329185486, "step": 72690 }, { "epoch": 20.635821742832814, "grad_norm": 1.716973900794983, "learning_rate": 7.937326142492194e-05, "loss": 0.009712444990873337, "step": 72700 }, { "epoch": 20.638660232756173, "grad_norm": 10.118756294250488, "learning_rate": 7.93704229349986e-05, "loss": 0.013426978886127473, "step": 72710 }, { "epoch": 20.641498722679536, "grad_norm": 6.493293285369873, "learning_rate": 7.936758444507522e-05, "loss": 0.01773618459701538, "step": 72720 }, { "epoch": 20.644337212602895, "grad_norm": 1.4987260103225708, "learning_rate": 7.936474595515186e-05, "loss": 0.014094020426273345, "step": 72730 }, { "epoch": 20.647175702526255, "grad_norm": 0.0901440978050232, "learning_rate": 7.93619074652285e-05, "loss": 0.015811887383461, "step": 72740 }, { "epoch": 20.650014192449618, "grad_norm": 0.36958616971969604, "learning_rate": 7.935906897530515e-05, "loss": 0.03805981874465943, "step": 72750 }, { "epoch": 20.652852682372977, "grad_norm": 0.7274298071861267, "learning_rate": 7.935623048538177e-05, "loss": 0.006943802535533905, "step": 72760 }, { "epoch": 20.65569117229634, "grad_norm": 1.1641286611557007, "learning_rate": 7.935339199545842e-05, "loss": 0.010816558450460433, "step": 72770 }, { "epoch": 20.6585296622197, "grad_norm": 0.3852696418762207, "learning_rate": 7.935055350553506e-05, "loss": 0.013059176504611969, "step": 72780 }, { "epoch": 20.66136815214306, "grad_norm": 1.8852826356887817, "learning_rate": 7.93477150156117e-05, "loss": 0.009805868566036224, "step": 72790 }, { "epoch": 20.66420664206642, "grad_norm": 2.876755952835083, "learning_rate": 7.934487652568834e-05, "loss": 0.017488007247447968, "step": 72800 }, { "epoch": 20.66704513198978, "grad_norm": 0.23893587291240692, "learning_rate": 7.934203803576498e-05, "loss": 0.015460638701915741, "step": 72810 }, { "epoch": 20.669883621913144, "grad_norm": 3.7793610095977783, "learning_rate": 7.933919954584161e-05, "loss": 0.007938335090875626, "step": 72820 }, { "epoch": 20.672722111836503, "grad_norm": 6.958462715148926, "learning_rate": 7.933636105591825e-05, "loss": 0.013024720549583434, "step": 72830 }, { "epoch": 20.675560601759862, "grad_norm": 0.21333874762058258, "learning_rate": 7.93335225659949e-05, "loss": 0.012101350724697113, "step": 72840 }, { "epoch": 20.678399091683225, "grad_norm": 1.4298280477523804, "learning_rate": 7.933068407607153e-05, "loss": 0.009360031038522721, "step": 72850 }, { "epoch": 20.681237581606585, "grad_norm": 2.9617512226104736, "learning_rate": 7.932784558614817e-05, "loss": 0.012097227573394775, "step": 72860 }, { "epoch": 20.684076071529947, "grad_norm": 0.4240191876888275, "learning_rate": 7.932500709622482e-05, "loss": 0.011783113330602646, "step": 72870 }, { "epoch": 20.686914561453307, "grad_norm": 0.4565923810005188, "learning_rate": 7.932216860630144e-05, "loss": 0.01876079887151718, "step": 72880 }, { "epoch": 20.689753051376666, "grad_norm": 0.4552061855792999, "learning_rate": 7.931933011637809e-05, "loss": 0.03461885452270508, "step": 72890 }, { "epoch": 20.69259154130003, "grad_norm": 5.949021816253662, "learning_rate": 7.931649162645473e-05, "loss": 0.0052215375006198885, "step": 72900 }, { "epoch": 20.69543003122339, "grad_norm": 0.1046123057603836, "learning_rate": 7.931365313653137e-05, "loss": 0.02269044518470764, "step": 72910 }, { "epoch": 20.69826852114675, "grad_norm": 10.013261795043945, "learning_rate": 7.931081464660801e-05, "loss": 0.009363660961389542, "step": 72920 }, { "epoch": 20.70110701107011, "grad_norm": 20.21061134338379, "learning_rate": 7.930797615668465e-05, "loss": 0.01969160735607147, "step": 72930 }, { "epoch": 20.70394550099347, "grad_norm": 2.5235249996185303, "learning_rate": 7.930513766676129e-05, "loss": 0.006459902226924896, "step": 72940 }, { "epoch": 20.706783990916833, "grad_norm": 0.20632071793079376, "learning_rate": 7.930229917683792e-05, "loss": 0.014433580636978149, "step": 72950 }, { "epoch": 20.709622480840192, "grad_norm": 1.1663204431533813, "learning_rate": 7.929946068691456e-05, "loss": 0.009025096148252486, "step": 72960 }, { "epoch": 20.712460970763555, "grad_norm": 1.978821039199829, "learning_rate": 7.92966221969912e-05, "loss": 0.013295818865299226, "step": 72970 }, { "epoch": 20.715299460686914, "grad_norm": 0.2144603133201599, "learning_rate": 7.929378370706784e-05, "loss": 0.03700239062309265, "step": 72980 }, { "epoch": 20.718137950610274, "grad_norm": 4.750355243682861, "learning_rate": 7.929094521714449e-05, "loss": 0.012140108644962311, "step": 72990 }, { "epoch": 20.720976440533637, "grad_norm": 7.192612648010254, "learning_rate": 7.928810672722113e-05, "loss": 0.012775751948356628, "step": 73000 }, { "epoch": 20.720976440533637, "eval_accuracy": 0.9717047116423985, "eval_loss": 0.09327775985002518, "eval_runtime": 33.3541, "eval_samples_per_second": 471.516, "eval_steps_per_second": 7.375, "step": 73000 }, { "epoch": 20.723814930456996, "grad_norm": 0.7481854557991028, "learning_rate": 7.928526823729775e-05, "loss": 0.006145288050174713, "step": 73010 }, { "epoch": 20.72665342038036, "grad_norm": 0.7519213557243347, "learning_rate": 7.92824297473744e-05, "loss": 0.01866353303194046, "step": 73020 }, { "epoch": 20.729491910303718, "grad_norm": 0.7924914360046387, "learning_rate": 7.927959125745104e-05, "loss": 0.030584457516670226, "step": 73030 }, { "epoch": 20.732330400227077, "grad_norm": 4.846336364746094, "learning_rate": 7.927675276752768e-05, "loss": 0.008511370420455933, "step": 73040 }, { "epoch": 20.73516889015044, "grad_norm": 0.6003618240356445, "learning_rate": 7.927391427760432e-05, "loss": 0.024074213206768037, "step": 73050 }, { "epoch": 20.7380073800738, "grad_norm": 0.2633776366710663, "learning_rate": 7.927107578768096e-05, "loss": 0.01506279706954956, "step": 73060 }, { "epoch": 20.740845869997163, "grad_norm": 1.0924656391143799, "learning_rate": 7.92682372977576e-05, "loss": 0.005329587310552597, "step": 73070 }, { "epoch": 20.743684359920522, "grad_norm": 3.0554864406585693, "learning_rate": 7.926539880783423e-05, "loss": 0.00844067484140396, "step": 73080 }, { "epoch": 20.746522849843885, "grad_norm": 1.1213736534118652, "learning_rate": 7.926256031791087e-05, "loss": 0.0053549081087112425, "step": 73090 }, { "epoch": 20.749361339767244, "grad_norm": 1.2408546209335327, "learning_rate": 7.925972182798751e-05, "loss": 0.0073475025594234465, "step": 73100 }, { "epoch": 20.752199829690603, "grad_norm": 8.673306465148926, "learning_rate": 7.925688333806415e-05, "loss": 0.016597452759742736, "step": 73110 }, { "epoch": 20.755038319613966, "grad_norm": 7.336655616760254, "learning_rate": 7.92540448481408e-05, "loss": 0.017374302446842193, "step": 73120 }, { "epoch": 20.757876809537326, "grad_norm": 0.14627434313297272, "learning_rate": 7.925120635821744e-05, "loss": 0.009167732298374176, "step": 73130 }, { "epoch": 20.76071529946069, "grad_norm": 0.08693002909421921, "learning_rate": 7.924836786829407e-05, "loss": 0.0031259529292583465, "step": 73140 }, { "epoch": 20.763553789384048, "grad_norm": 0.3097310960292816, "learning_rate": 7.92455293783707e-05, "loss": 0.019970138370990754, "step": 73150 }, { "epoch": 20.766392279307407, "grad_norm": 2.77786922454834, "learning_rate": 7.924269088844735e-05, "loss": 0.011018165200948716, "step": 73160 }, { "epoch": 20.76923076923077, "grad_norm": 2.266305923461914, "learning_rate": 7.923985239852399e-05, "loss": 0.0123715840280056, "step": 73170 }, { "epoch": 20.77206925915413, "grad_norm": 0.09519746899604797, "learning_rate": 7.923701390860063e-05, "loss": 0.003356894478201866, "step": 73180 }, { "epoch": 20.774907749077492, "grad_norm": 0.11105557531118393, "learning_rate": 7.923417541867727e-05, "loss": 0.008596660941839218, "step": 73190 }, { "epoch": 20.77774623900085, "grad_norm": 0.9448263049125671, "learning_rate": 7.923133692875391e-05, "loss": 0.009567052870988847, "step": 73200 }, { "epoch": 20.78058472892421, "grad_norm": 0.4676162004470825, "learning_rate": 7.922849843883054e-05, "loss": 0.02745891809463501, "step": 73210 }, { "epoch": 20.783423218847574, "grad_norm": 7.9547882080078125, "learning_rate": 7.922565994890718e-05, "loss": 0.0282556414604187, "step": 73220 }, { "epoch": 20.786261708770933, "grad_norm": 4.572864055633545, "learning_rate": 7.922282145898382e-05, "loss": 0.007587039470672607, "step": 73230 }, { "epoch": 20.789100198694296, "grad_norm": 6.184006214141846, "learning_rate": 7.921998296906047e-05, "loss": 0.009011179208755493, "step": 73240 }, { "epoch": 20.791938688617655, "grad_norm": 0.17885246872901917, "learning_rate": 7.921714447913711e-05, "loss": 0.03410559594631195, "step": 73250 }, { "epoch": 20.794777178541015, "grad_norm": 0.267272025346756, "learning_rate": 7.921430598921375e-05, "loss": 0.01605355888605118, "step": 73260 }, { "epoch": 20.797615668464378, "grad_norm": 0.7710351347923279, "learning_rate": 7.921146749929038e-05, "loss": 0.01925737112760544, "step": 73270 }, { "epoch": 20.800454158387737, "grad_norm": 0.635677695274353, "learning_rate": 7.920862900936702e-05, "loss": 0.00371282622218132, "step": 73280 }, { "epoch": 20.8032926483111, "grad_norm": 0.17678743600845337, "learning_rate": 7.920579051944366e-05, "loss": 0.00581563301384449, "step": 73290 }, { "epoch": 20.80613113823446, "grad_norm": 0.3600148856639862, "learning_rate": 7.92029520295203e-05, "loss": 0.008430084586143494, "step": 73300 }, { "epoch": 20.80896962815782, "grad_norm": 0.2927381992340088, "learning_rate": 7.920011353959694e-05, "loss": 0.0026071490719914436, "step": 73310 }, { "epoch": 20.81180811808118, "grad_norm": 0.38162893056869507, "learning_rate": 7.919727504967358e-05, "loss": 0.006387043744325638, "step": 73320 }, { "epoch": 20.81464660800454, "grad_norm": 0.4553722143173218, "learning_rate": 7.919443655975022e-05, "loss": 0.013175581395626069, "step": 73330 }, { "epoch": 20.817485097927904, "grad_norm": 9.46007251739502, "learning_rate": 7.919159806982685e-05, "loss": 0.014119277894496917, "step": 73340 }, { "epoch": 20.820323587851263, "grad_norm": 1.4327914714813232, "learning_rate": 7.91887595799035e-05, "loss": 0.01932302713394165, "step": 73350 }, { "epoch": 20.823162077774622, "grad_norm": 0.22880929708480835, "learning_rate": 7.918592108998013e-05, "loss": 0.014097033441066742, "step": 73360 }, { "epoch": 20.826000567697985, "grad_norm": 9.240327835083008, "learning_rate": 7.918308260005676e-05, "loss": 0.013062742352485657, "step": 73370 }, { "epoch": 20.828839057621344, "grad_norm": 10.282024383544922, "learning_rate": 7.918024411013342e-05, "loss": 0.01182577684521675, "step": 73380 }, { "epoch": 20.831677547544707, "grad_norm": 2.494396209716797, "learning_rate": 7.917740562021006e-05, "loss": 0.01931355893611908, "step": 73390 }, { "epoch": 20.834516037468067, "grad_norm": 3.8924310207366943, "learning_rate": 7.917456713028669e-05, "loss": 0.01901145875453949, "step": 73400 }, { "epoch": 20.837354527391426, "grad_norm": 0.40834158658981323, "learning_rate": 7.917172864036333e-05, "loss": 0.015667819976806642, "step": 73410 }, { "epoch": 20.84019301731479, "grad_norm": 0.943143367767334, "learning_rate": 7.916889015043997e-05, "loss": 0.016954603791236877, "step": 73420 }, { "epoch": 20.84303150723815, "grad_norm": 8.108253479003906, "learning_rate": 7.916605166051661e-05, "loss": 0.025237858295440674, "step": 73430 }, { "epoch": 20.84586999716151, "grad_norm": 2.406580686569214, "learning_rate": 7.916321317059325e-05, "loss": 0.027797847986221313, "step": 73440 }, { "epoch": 20.84870848708487, "grad_norm": 0.49890559911727905, "learning_rate": 7.91603746806699e-05, "loss": 0.017985662817955016, "step": 73450 }, { "epoch": 20.851546977008233, "grad_norm": 0.8972476124763489, "learning_rate": 7.915753619074654e-05, "loss": 0.022947561740875245, "step": 73460 }, { "epoch": 20.854385466931593, "grad_norm": 1.7816755771636963, "learning_rate": 7.915469770082316e-05, "loss": 0.02361636459827423, "step": 73470 }, { "epoch": 20.857223956854952, "grad_norm": 11.20187759399414, "learning_rate": 7.91518592108998e-05, "loss": 0.019215202331542967, "step": 73480 }, { "epoch": 20.860062446778315, "grad_norm": 0.7419866919517517, "learning_rate": 7.914902072097645e-05, "loss": 0.020421874523162842, "step": 73490 }, { "epoch": 20.862900936701674, "grad_norm": 0.3039185404777527, "learning_rate": 7.914618223105307e-05, "loss": 0.01601864695549011, "step": 73500 }, { "epoch": 20.862900936701674, "eval_accuracy": 0.9734215044191518, "eval_loss": 0.08969491720199585, "eval_runtime": 33.7049, "eval_samples_per_second": 466.608, "eval_steps_per_second": 7.299, "step": 73500 }, { "epoch": 20.865739426625037, "grad_norm": 4.781130313873291, "learning_rate": 7.914334374112973e-05, "loss": 0.006029101461172104, "step": 73510 }, { "epoch": 20.868577916548396, "grad_norm": 2.115995168685913, "learning_rate": 7.914050525120637e-05, "loss": 0.018475423753261565, "step": 73520 }, { "epoch": 20.871416406471756, "grad_norm": 1.5206923484802246, "learning_rate": 7.9137666761283e-05, "loss": 0.009048598259687424, "step": 73530 }, { "epoch": 20.87425489639512, "grad_norm": 5.818942070007324, "learning_rate": 7.913482827135964e-05, "loss": 0.03572856187820435, "step": 73540 }, { "epoch": 20.877093386318478, "grad_norm": 14.409184455871582, "learning_rate": 7.913198978143628e-05, "loss": 0.02162979692220688, "step": 73550 }, { "epoch": 20.87993187624184, "grad_norm": 2.2163286209106445, "learning_rate": 7.912915129151292e-05, "loss": 0.00675315260887146, "step": 73560 }, { "epoch": 20.8827703661652, "grad_norm": 4.288771152496338, "learning_rate": 7.912631280158955e-05, "loss": 0.008942141383886337, "step": 73570 }, { "epoch": 20.88560885608856, "grad_norm": 3.617682933807373, "learning_rate": 7.91234743116662e-05, "loss": 0.011860379576683044, "step": 73580 }, { "epoch": 20.888447346011922, "grad_norm": 3.3110129833221436, "learning_rate": 7.912063582174283e-05, "loss": 0.009432968497276307, "step": 73590 }, { "epoch": 20.89128583593528, "grad_norm": 3.1292169094085693, "learning_rate": 7.911779733181947e-05, "loss": 0.0058609843254089355, "step": 73600 }, { "epoch": 20.894124325858645, "grad_norm": 17.329936981201172, "learning_rate": 7.911495884189611e-05, "loss": 0.03028983473777771, "step": 73610 }, { "epoch": 20.896962815782004, "grad_norm": 0.15319930016994476, "learning_rate": 7.911212035197276e-05, "loss": 0.01901219189167023, "step": 73620 }, { "epoch": 20.899801305705363, "grad_norm": 8.340362548828125, "learning_rate": 7.910928186204938e-05, "loss": 0.01371970921754837, "step": 73630 }, { "epoch": 20.902639795628726, "grad_norm": 0.9283625483512878, "learning_rate": 7.910644337212604e-05, "loss": 0.020096203684806822, "step": 73640 }, { "epoch": 20.905478285552086, "grad_norm": 3.73777174949646, "learning_rate": 7.910360488220268e-05, "loss": 0.013257558643817901, "step": 73650 }, { "epoch": 20.90831677547545, "grad_norm": 0.02115478739142418, "learning_rate": 7.910076639227931e-05, "loss": 0.02490440011024475, "step": 73660 }, { "epoch": 20.911155265398808, "grad_norm": 0.37541502714157104, "learning_rate": 7.909792790235595e-05, "loss": 0.014266550540924072, "step": 73670 }, { "epoch": 20.913993755322167, "grad_norm": 9.299164772033691, "learning_rate": 7.909508941243259e-05, "loss": 0.014790317416191101, "step": 73680 }, { "epoch": 20.91683224524553, "grad_norm": 0.8228964805603027, "learning_rate": 7.909225092250922e-05, "loss": 0.01431729793548584, "step": 73690 }, { "epoch": 20.91967073516889, "grad_norm": 0.10576353967189789, "learning_rate": 7.908941243258586e-05, "loss": 0.024030637741088868, "step": 73700 }, { "epoch": 20.922509225092252, "grad_norm": 0.2716499865055084, "learning_rate": 7.908657394266252e-05, "loss": 0.0020967504009604455, "step": 73710 }, { "epoch": 20.92534771501561, "grad_norm": 8.516170501708984, "learning_rate": 7.908373545273914e-05, "loss": 0.007076786458492279, "step": 73720 }, { "epoch": 20.92818620493897, "grad_norm": 3.6050844192504883, "learning_rate": 7.908089696281578e-05, "loss": 0.02694910764694214, "step": 73730 }, { "epoch": 20.931024694862334, "grad_norm": 7.104453086853027, "learning_rate": 7.907805847289243e-05, "loss": 0.025199893116950988, "step": 73740 }, { "epoch": 20.933863184785693, "grad_norm": 1.625727653503418, "learning_rate": 7.907521998296907e-05, "loss": 0.012181611359119415, "step": 73750 }, { "epoch": 20.936701674709056, "grad_norm": 5.253945827484131, "learning_rate": 7.90723814930457e-05, "loss": 0.013204029202461243, "step": 73760 }, { "epoch": 20.939540164632415, "grad_norm": 8.672134399414062, "learning_rate": 7.906954300312234e-05, "loss": 0.018232086300849916, "step": 73770 }, { "epoch": 20.942378654555775, "grad_norm": 0.5812059044837952, "learning_rate": 7.906670451319899e-05, "loss": 0.01700669527053833, "step": 73780 }, { "epoch": 20.945217144479138, "grad_norm": 8.373154640197754, "learning_rate": 7.906386602327562e-05, "loss": 0.010769186168909073, "step": 73790 }, { "epoch": 20.948055634402497, "grad_norm": 3.250288963317871, "learning_rate": 7.906102753335226e-05, "loss": 0.007400356233119965, "step": 73800 }, { "epoch": 20.95089412432586, "grad_norm": 6.418565273284912, "learning_rate": 7.90581890434289e-05, "loss": 0.024983973801136018, "step": 73810 }, { "epoch": 20.95373261424922, "grad_norm": 0.903744101524353, "learning_rate": 7.905535055350553e-05, "loss": 0.002473265677690506, "step": 73820 }, { "epoch": 20.956571104172582, "grad_norm": 4.976179122924805, "learning_rate": 7.905251206358217e-05, "loss": 0.017603135108947753, "step": 73830 }, { "epoch": 20.95940959409594, "grad_norm": 1.1032419204711914, "learning_rate": 7.904967357365883e-05, "loss": 0.010605220496654511, "step": 73840 }, { "epoch": 20.9622480840193, "grad_norm": 7.748438835144043, "learning_rate": 7.904683508373545e-05, "loss": 0.017736876010894777, "step": 73850 }, { "epoch": 20.965086573942664, "grad_norm": 1.1798015832901, "learning_rate": 7.90439965938121e-05, "loss": 0.00708203911781311, "step": 73860 }, { "epoch": 20.967925063866023, "grad_norm": 2.3954155445098877, "learning_rate": 7.904115810388874e-05, "loss": 0.021498659253120424, "step": 73870 }, { "epoch": 20.970763553789386, "grad_norm": 6.08751916885376, "learning_rate": 7.903831961396538e-05, "loss": 0.019680988788604737, "step": 73880 }, { "epoch": 20.973602043712745, "grad_norm": 0.6197250485420227, "learning_rate": 7.9035481124042e-05, "loss": 0.012173322588205337, "step": 73890 }, { "epoch": 20.976440533636104, "grad_norm": 10.510972023010254, "learning_rate": 7.903264263411865e-05, "loss": 0.006921101361513138, "step": 73900 }, { "epoch": 20.979279023559467, "grad_norm": 1.103395938873291, "learning_rate": 7.90298041441953e-05, "loss": 0.016540084779262543, "step": 73910 }, { "epoch": 20.982117513482827, "grad_norm": 1.3859333992004395, "learning_rate": 7.902724950326427e-05, "loss": 0.014293298125267029, "step": 73920 }, { "epoch": 20.98495600340619, "grad_norm": 0.24746812880039215, "learning_rate": 7.902441101334091e-05, "loss": 0.0084824338555336, "step": 73930 }, { "epoch": 20.98779449332955, "grad_norm": 0.3146625757217407, "learning_rate": 7.902157252341754e-05, "loss": 0.014729849994182587, "step": 73940 }, { "epoch": 20.990632983252908, "grad_norm": 0.09383119642734528, "learning_rate": 7.901873403349418e-05, "loss": 0.007636082917451858, "step": 73950 }, { "epoch": 20.99347147317627, "grad_norm": 5.827504634857178, "learning_rate": 7.901589554357083e-05, "loss": 0.010426318645477295, "step": 73960 }, { "epoch": 20.99630996309963, "grad_norm": 0.18144777417182922, "learning_rate": 7.901305705364746e-05, "loss": 0.00841691792011261, "step": 73970 }, { "epoch": 20.999148453022993, "grad_norm": 0.5525778532028198, "learning_rate": 7.90102185637241e-05, "loss": 0.0049810543656349186, "step": 73980 }, { "epoch": 21.001986942946353, "grad_norm": 0.25606733560562134, "learning_rate": 7.900738007380074e-05, "loss": 0.036818936467170715, "step": 73990 }, { "epoch": 21.004825432869712, "grad_norm": 1.0598633289337158, "learning_rate": 7.900454158387737e-05, "loss": 0.006294305622577667, "step": 74000 }, { "epoch": 21.004825432869712, "eval_accuracy": 0.9765371653843709, "eval_loss": 0.08259952813386917, "eval_runtime": 33.7644, "eval_samples_per_second": 465.786, "eval_steps_per_second": 7.286, "step": 74000 }, { "epoch": 21.007663922793075, "grad_norm": 0.2671886384487152, "learning_rate": 7.900170309395401e-05, "loss": 0.005551052093505859, "step": 74010 }, { "epoch": 21.010502412716434, "grad_norm": 1.2782680988311768, "learning_rate": 7.899886460403067e-05, "loss": 0.0024282675236463546, "step": 74020 }, { "epoch": 21.013340902639797, "grad_norm": 0.6298090219497681, "learning_rate": 7.89960261141073e-05, "loss": 0.0028094012290239335, "step": 74030 }, { "epoch": 21.016179392563156, "grad_norm": 0.05162602663040161, "learning_rate": 7.899318762418394e-05, "loss": 0.005594388395547867, "step": 74040 }, { "epoch": 21.019017882486516, "grad_norm": 0.019270887598395348, "learning_rate": 7.899034913426058e-05, "loss": 0.0054659247398376465, "step": 74050 }, { "epoch": 21.02185637240988, "grad_norm": 1.4074394702911377, "learning_rate": 7.898751064433722e-05, "loss": 0.0037485338747501374, "step": 74060 }, { "epoch": 21.024694862333238, "grad_norm": 1.1224313974380493, "learning_rate": 7.898467215441385e-05, "loss": 0.01356179416179657, "step": 74070 }, { "epoch": 21.0275333522566, "grad_norm": 6.977365493774414, "learning_rate": 7.898183366449049e-05, "loss": 0.029550650715827943, "step": 74080 }, { "epoch": 21.03037184217996, "grad_norm": 2.015493392944336, "learning_rate": 7.897899517456714e-05, "loss": 0.004232940822839737, "step": 74090 }, { "epoch": 21.03321033210332, "grad_norm": 0.5422430038452148, "learning_rate": 7.897615668464377e-05, "loss": 0.011875130981206895, "step": 74100 }, { "epoch": 21.036048822026682, "grad_norm": 0.147427037358284, "learning_rate": 7.897331819472041e-05, "loss": 0.0018413804471492767, "step": 74110 }, { "epoch": 21.03888731195004, "grad_norm": 0.2465590089559555, "learning_rate": 7.897047970479706e-05, "loss": 0.006416933238506317, "step": 74120 }, { "epoch": 21.041725801873405, "grad_norm": 0.07990511506795883, "learning_rate": 7.896764121487368e-05, "loss": 0.015041956305503845, "step": 74130 }, { "epoch": 21.044564291796764, "grad_norm": 0.22640562057495117, "learning_rate": 7.896480272495032e-05, "loss": 0.00538831390440464, "step": 74140 }, { "epoch": 21.047402781720123, "grad_norm": 0.41051244735717773, "learning_rate": 7.896196423502697e-05, "loss": 0.015361399948596954, "step": 74150 }, { "epoch": 21.050241271643486, "grad_norm": 0.08929464221000671, "learning_rate": 7.895912574510361e-05, "loss": 0.010897236317396164, "step": 74160 }, { "epoch": 21.053079761566845, "grad_norm": 0.015337598510086536, "learning_rate": 7.895628725518025e-05, "loss": 0.006318677961826324, "step": 74170 }, { "epoch": 21.05591825149021, "grad_norm": 8.280385971069336, "learning_rate": 7.895344876525689e-05, "loss": 0.00957949534058571, "step": 74180 }, { "epoch": 21.058756741413568, "grad_norm": 0.4455494284629822, "learning_rate": 7.895061027533353e-05, "loss": 0.013331460952758788, "step": 74190 }, { "epoch": 21.061595231336927, "grad_norm": 13.971404075622559, "learning_rate": 7.894777178541016e-05, "loss": 0.01860889196395874, "step": 74200 }, { "epoch": 21.06443372126029, "grad_norm": 9.588574409484863, "learning_rate": 7.89449332954868e-05, "loss": 0.01042596623301506, "step": 74210 }, { "epoch": 21.06727221118365, "grad_norm": 20.582250595092773, "learning_rate": 7.894209480556346e-05, "loss": 0.024581128358840944, "step": 74220 }, { "epoch": 21.070110701107012, "grad_norm": 1.8051358461380005, "learning_rate": 7.893925631564008e-05, "loss": 0.03033907413482666, "step": 74230 }, { "epoch": 21.07294919103037, "grad_norm": 2.468682050704956, "learning_rate": 7.893641782571672e-05, "loss": 0.02397492974996567, "step": 74240 }, { "epoch": 21.075787680953734, "grad_norm": 0.31337854266166687, "learning_rate": 7.893357933579337e-05, "loss": 0.02807079553604126, "step": 74250 }, { "epoch": 21.078626170877094, "grad_norm": 3.864345073699951, "learning_rate": 7.893074084587e-05, "loss": 0.012448618561029435, "step": 74260 }, { "epoch": 21.081464660800453, "grad_norm": 0.33543190360069275, "learning_rate": 7.892790235594664e-05, "loss": 0.02185899466276169, "step": 74270 }, { "epoch": 21.084303150723816, "grad_norm": 4.082113742828369, "learning_rate": 7.892506386602328e-05, "loss": 0.0064596585929393765, "step": 74280 }, { "epoch": 21.087141640647175, "grad_norm": 0.9887866973876953, "learning_rate": 7.892222537609992e-05, "loss": 0.0047198571264743805, "step": 74290 }, { "epoch": 21.089980130570538, "grad_norm": 9.262771606445312, "learning_rate": 7.891938688617656e-05, "loss": 0.01502087265253067, "step": 74300 }, { "epoch": 21.092818620493897, "grad_norm": 11.301651954650879, "learning_rate": 7.89165483962532e-05, "loss": 0.031329980492591857, "step": 74310 }, { "epoch": 21.095657110417257, "grad_norm": 5.087924480438232, "learning_rate": 7.891370990632984e-05, "loss": 0.009515728056430816, "step": 74320 }, { "epoch": 21.09849560034062, "grad_norm": 0.6557723879814148, "learning_rate": 7.891087141640647e-05, "loss": 0.006102156639099121, "step": 74330 }, { "epoch": 21.10133409026398, "grad_norm": 9.054040908813477, "learning_rate": 7.890803292648311e-05, "loss": 0.007851332426071167, "step": 74340 }, { "epoch": 21.104172580187342, "grad_norm": 1.7233529090881348, "learning_rate": 7.890519443655977e-05, "loss": 0.010117017477750779, "step": 74350 }, { "epoch": 21.1070110701107, "grad_norm": 0.37435153126716614, "learning_rate": 7.89023559466364e-05, "loss": 0.03052864968776703, "step": 74360 }, { "epoch": 21.10984956003406, "grad_norm": 0.45081624388694763, "learning_rate": 7.889951745671304e-05, "loss": 0.012850350141525269, "step": 74370 }, { "epoch": 21.112688049957423, "grad_norm": 1.33158540725708, "learning_rate": 7.889667896678968e-05, "loss": 0.0030336786061525346, "step": 74380 }, { "epoch": 21.115526539880783, "grad_norm": 0.2844926416873932, "learning_rate": 7.88938404768663e-05, "loss": 0.01432536393404007, "step": 74390 }, { "epoch": 21.118365029804146, "grad_norm": 9.82292366027832, "learning_rate": 7.889100198694295e-05, "loss": 0.02523890733718872, "step": 74400 }, { "epoch": 21.121203519727505, "grad_norm": 0.3239232003688812, "learning_rate": 7.888816349701959e-05, "loss": 0.008892231434583665, "step": 74410 }, { "epoch": 21.124042009650864, "grad_norm": 2.498927116394043, "learning_rate": 7.888532500709623e-05, "loss": 0.011777588725090027, "step": 74420 }, { "epoch": 21.126880499574227, "grad_norm": 2.1587576866149902, "learning_rate": 7.888248651717287e-05, "loss": 0.008887037634849548, "step": 74430 }, { "epoch": 21.129718989497587, "grad_norm": 0.4233526289463043, "learning_rate": 7.887964802724951e-05, "loss": 0.004100196063518524, "step": 74440 }, { "epoch": 21.13255747942095, "grad_norm": 0.8652952909469604, "learning_rate": 7.887680953732615e-05, "loss": 0.01704609990119934, "step": 74450 }, { "epoch": 21.13539596934431, "grad_norm": 5.231020450592041, "learning_rate": 7.887397104740278e-05, "loss": 0.0056127957999706265, "step": 74460 }, { "epoch": 21.138234459267668, "grad_norm": 7.351198673248291, "learning_rate": 7.887113255747942e-05, "loss": 0.032976216077804564, "step": 74470 }, { "epoch": 21.14107294919103, "grad_norm": 3.6310174465179443, "learning_rate": 7.886829406755606e-05, "loss": 0.01447279155254364, "step": 74480 }, { "epoch": 21.14391143911439, "grad_norm": 2.0432844161987305, "learning_rate": 7.88654555776327e-05, "loss": 0.01165406107902527, "step": 74490 }, { "epoch": 21.146749929037753, "grad_norm": 4.339851379394531, "learning_rate": 7.886261708770935e-05, "loss": 0.003976891189813614, "step": 74500 }, { "epoch": 21.146749929037753, "eval_accuracy": 0.9621669739937687, "eval_loss": 0.13976551592350006, "eval_runtime": 36.9516, "eval_samples_per_second": 425.611, "eval_steps_per_second": 6.657, "step": 74500 }, { "epoch": 21.149588418961113, "grad_norm": 2.6675825119018555, "learning_rate": 7.885977859778599e-05, "loss": 0.01100269854068756, "step": 74510 }, { "epoch": 21.152426908884472, "grad_norm": 0.7704554200172424, "learning_rate": 7.885694010786262e-05, "loss": 0.02964049279689789, "step": 74520 }, { "epoch": 21.155265398807835, "grad_norm": 6.697051525115967, "learning_rate": 7.885410161793926e-05, "loss": 0.009759602695703506, "step": 74530 }, { "epoch": 21.158103888731194, "grad_norm": 2.1058878898620605, "learning_rate": 7.88512631280159e-05, "loss": 0.01880491226911545, "step": 74540 }, { "epoch": 21.160942378654557, "grad_norm": 0.9615850448608398, "learning_rate": 7.884842463809254e-05, "loss": 0.00445898249745369, "step": 74550 }, { "epoch": 21.163780868577916, "grad_norm": 0.09030302613973618, "learning_rate": 7.884558614816918e-05, "loss": 0.010557128489017487, "step": 74560 }, { "epoch": 21.166619358501276, "grad_norm": 10.61948299407959, "learning_rate": 7.884274765824582e-05, "loss": 0.013550925254821777, "step": 74570 }, { "epoch": 21.16945784842464, "grad_norm": 6.313736915588379, "learning_rate": 7.883990916832245e-05, "loss": 0.010119151324033737, "step": 74580 }, { "epoch": 21.172296338347998, "grad_norm": 0.1528005748987198, "learning_rate": 7.883707067839909e-05, "loss": 0.012779344618320466, "step": 74590 }, { "epoch": 21.17513482827136, "grad_norm": 0.5982903242111206, "learning_rate": 7.883423218847573e-05, "loss": 0.002932750433683395, "step": 74600 }, { "epoch": 21.17797331819472, "grad_norm": 11.598091125488281, "learning_rate": 7.883139369855237e-05, "loss": 0.009352734684944153, "step": 74610 }, { "epoch": 21.18081180811808, "grad_norm": 0.8152559995651245, "learning_rate": 7.882855520862902e-05, "loss": 0.003933268785476685, "step": 74620 }, { "epoch": 21.183650298041442, "grad_norm": 0.7113914489746094, "learning_rate": 7.882571671870566e-05, "loss": 0.005226729437708855, "step": 74630 }, { "epoch": 21.1864887879648, "grad_norm": 5.512732982635498, "learning_rate": 7.88228782287823e-05, "loss": 0.010239028185606004, "step": 74640 }, { "epoch": 21.189327277888165, "grad_norm": 0.5761372447013855, "learning_rate": 7.882003973885893e-05, "loss": 0.017617686092853545, "step": 74650 }, { "epoch": 21.192165767811524, "grad_norm": 0.16559647023677826, "learning_rate": 7.881720124893557e-05, "loss": 0.0037040464580059052, "step": 74660 }, { "epoch": 21.195004257734887, "grad_norm": 0.24575182795524597, "learning_rate": 7.881436275901221e-05, "loss": 0.005731088295578957, "step": 74670 }, { "epoch": 21.197842747658246, "grad_norm": 2.6057214736938477, "learning_rate": 7.881152426908884e-05, "loss": 0.004068983346223831, "step": 74680 }, { "epoch": 21.200681237581605, "grad_norm": 0.2677629590034485, "learning_rate": 7.880868577916549e-05, "loss": 0.020187343657016753, "step": 74690 }, { "epoch": 21.20351972750497, "grad_norm": 0.10357426106929779, "learning_rate": 7.880584728924213e-05, "loss": 0.00479891449213028, "step": 74700 }, { "epoch": 21.206358217428328, "grad_norm": 0.5730968713760376, "learning_rate": 7.880300879931876e-05, "loss": 0.005114637315273285, "step": 74710 }, { "epoch": 21.20919670735169, "grad_norm": 1.3332642316818237, "learning_rate": 7.88001703093954e-05, "loss": 0.008949743211269378, "step": 74720 }, { "epoch": 21.21203519727505, "grad_norm": 2.8570303916931152, "learning_rate": 7.879733181947204e-05, "loss": 0.004488053172826767, "step": 74730 }, { "epoch": 21.21487368719841, "grad_norm": 0.023729074746370316, "learning_rate": 7.879449332954868e-05, "loss": 0.009389784932136536, "step": 74740 }, { "epoch": 21.217712177121772, "grad_norm": 1.9996713399887085, "learning_rate": 7.879165483962533e-05, "loss": 0.004719888046383858, "step": 74750 }, { "epoch": 21.22055066704513, "grad_norm": 0.05543321743607521, "learning_rate": 7.878881634970197e-05, "loss": 0.0027310142293572428, "step": 74760 }, { "epoch": 21.223389156968494, "grad_norm": 9.543432235717773, "learning_rate": 7.878597785977861e-05, "loss": 0.010254717618227004, "step": 74770 }, { "epoch": 21.226227646891854, "grad_norm": 2.55241322517395, "learning_rate": 7.878313936985524e-05, "loss": 0.022832368314266206, "step": 74780 }, { "epoch": 21.229066136815213, "grad_norm": 1.7601590156555176, "learning_rate": 7.878030087993188e-05, "loss": 0.017185239493846892, "step": 74790 }, { "epoch": 21.231904626738576, "grad_norm": 6.202152252197266, "learning_rate": 7.877746239000852e-05, "loss": 0.012297289818525315, "step": 74800 }, { "epoch": 21.234743116661935, "grad_norm": 1.5462493896484375, "learning_rate": 7.877462390008515e-05, "loss": 0.005318980664014816, "step": 74810 }, { "epoch": 21.237581606585298, "grad_norm": 2.130160093307495, "learning_rate": 7.87717854101618e-05, "loss": 0.014842423796653747, "step": 74820 }, { "epoch": 21.240420096508657, "grad_norm": 0.2702905237674713, "learning_rate": 7.876894692023844e-05, "loss": 0.0038507193326950072, "step": 74830 }, { "epoch": 21.243258586432017, "grad_norm": 1.6419241428375244, "learning_rate": 7.876610843031507e-05, "loss": 0.006574520468711853, "step": 74840 }, { "epoch": 21.24609707635538, "grad_norm": 4.900099277496338, "learning_rate": 7.876326994039171e-05, "loss": 0.008168254792690278, "step": 74850 }, { "epoch": 21.24893556627874, "grad_norm": 7.478531360626221, "learning_rate": 7.876043145046835e-05, "loss": 0.004328766465187072, "step": 74860 }, { "epoch": 21.251774056202102, "grad_norm": 0.057931460440158844, "learning_rate": 7.8757592960545e-05, "loss": 0.011958015710115432, "step": 74870 }, { "epoch": 21.25461254612546, "grad_norm": 0.2874072790145874, "learning_rate": 7.875475447062162e-05, "loss": 0.00504942461848259, "step": 74880 }, { "epoch": 21.25745103604882, "grad_norm": 8.093240737915039, "learning_rate": 7.875191598069828e-05, "loss": 0.00629311203956604, "step": 74890 }, { "epoch": 21.260289525972183, "grad_norm": 0.03590461611747742, "learning_rate": 7.874907749077492e-05, "loss": 0.007547885924577713, "step": 74900 }, { "epoch": 21.263128015895543, "grad_norm": 1.719968318939209, "learning_rate": 7.874623900085155e-05, "loss": 0.001773984171450138, "step": 74910 }, { "epoch": 21.265966505818906, "grad_norm": 0.5208271741867065, "learning_rate": 7.874340051092819e-05, "loss": 0.005212204903364182, "step": 74920 }, { "epoch": 21.268804995742265, "grad_norm": 3.1190130710601807, "learning_rate": 7.874056202100483e-05, "loss": 0.005944105982780457, "step": 74930 }, { "epoch": 21.271643485665624, "grad_norm": 11.095008850097656, "learning_rate": 7.873772353108146e-05, "loss": 0.030556172132492065, "step": 74940 }, { "epoch": 21.274481975588987, "grad_norm": 7.694209098815918, "learning_rate": 7.873488504115811e-05, "loss": 0.014055082201957702, "step": 74950 }, { "epoch": 21.277320465512346, "grad_norm": 0.1623324751853943, "learning_rate": 7.873204655123475e-05, "loss": 0.009943245351314545, "step": 74960 }, { "epoch": 21.28015895543571, "grad_norm": 5.353640556335449, "learning_rate": 7.872920806131138e-05, "loss": 0.012375854700803757, "step": 74970 }, { "epoch": 21.28299744535907, "grad_norm": 2.360304355621338, "learning_rate": 7.872636957138802e-05, "loss": 0.005787131190299988, "step": 74980 }, { "epoch": 21.285835935282428, "grad_norm": 2.0549628734588623, "learning_rate": 7.872353108146467e-05, "loss": 0.008867263048887252, "step": 74990 }, { "epoch": 21.28867442520579, "grad_norm": 2.9832072257995605, "learning_rate": 7.87206925915413e-05, "loss": 0.03144326210021973, "step": 75000 }, { "epoch": 21.28867442520579, "eval_accuracy": 0.9641381064411522, "eval_loss": 0.13521158695220947, "eval_runtime": 36.7397, "eval_samples_per_second": 428.066, "eval_steps_per_second": 6.696, "step": 75000 }, { "epoch": 21.29151291512915, "grad_norm": 1.6326223611831665, "learning_rate": 7.871785410161793e-05, "loss": 0.011526013165712357, "step": 75010 }, { "epoch": 21.294351405052513, "grad_norm": 17.261449813842773, "learning_rate": 7.871501561169459e-05, "loss": 0.02155701220035553, "step": 75020 }, { "epoch": 21.297189894975872, "grad_norm": 3.85467791557312, "learning_rate": 7.871217712177123e-05, "loss": 0.004643431305885315, "step": 75030 }, { "epoch": 21.300028384899235, "grad_norm": 2.491572380065918, "learning_rate": 7.870933863184786e-05, "loss": 0.007375634461641312, "step": 75040 }, { "epoch": 21.302866874822595, "grad_norm": 0.2983391582965851, "learning_rate": 7.87065001419245e-05, "loss": 0.0074639558792114254, "step": 75050 }, { "epoch": 21.305705364745954, "grad_norm": 0.07484964281320572, "learning_rate": 7.870366165200114e-05, "loss": 0.007521601021289825, "step": 75060 }, { "epoch": 21.308543854669317, "grad_norm": 4.318295955657959, "learning_rate": 7.870082316207777e-05, "loss": 0.0028971320018172262, "step": 75070 }, { "epoch": 21.311382344592676, "grad_norm": 0.8310025334358215, "learning_rate": 7.869798467215441e-05, "loss": 0.0051066137850284575, "step": 75080 }, { "epoch": 21.31422083451604, "grad_norm": 1.0951956510543823, "learning_rate": 7.869514618223107e-05, "loss": 0.01820375621318817, "step": 75090 }, { "epoch": 21.3170593244394, "grad_norm": 1.1368814706802368, "learning_rate": 7.869230769230769e-05, "loss": 0.03227033615112305, "step": 75100 }, { "epoch": 21.319897814362758, "grad_norm": 1.2359962463378906, "learning_rate": 7.868946920238433e-05, "loss": 0.01668013632297516, "step": 75110 }, { "epoch": 21.32273630428612, "grad_norm": 0.032505184412002563, "learning_rate": 7.868663071246098e-05, "loss": 0.006751296669244766, "step": 75120 }, { "epoch": 21.32557479420948, "grad_norm": 2.661971092224121, "learning_rate": 7.868379222253762e-05, "loss": 0.015013130009174347, "step": 75130 }, { "epoch": 21.328413284132843, "grad_norm": 0.8621452450752258, "learning_rate": 7.868095373261425e-05, "loss": 0.017131681740283965, "step": 75140 }, { "epoch": 21.331251774056202, "grad_norm": 12.534412384033203, "learning_rate": 7.86781152426909e-05, "loss": 0.03171437382698059, "step": 75150 }, { "epoch": 21.33409026397956, "grad_norm": 2.2516934871673584, "learning_rate": 7.867527675276754e-05, "loss": 0.005315155535936355, "step": 75160 }, { "epoch": 21.336928753902924, "grad_norm": 1.7871376276016235, "learning_rate": 7.867243826284417e-05, "loss": 0.0033514659851789476, "step": 75170 }, { "epoch": 21.339767243826284, "grad_norm": 11.091992378234863, "learning_rate": 7.866959977292081e-05, "loss": 0.014129960536956787, "step": 75180 }, { "epoch": 21.342605733749647, "grad_norm": 1.2448067665100098, "learning_rate": 7.866676128299745e-05, "loss": 0.008548451960086823, "step": 75190 }, { "epoch": 21.345444223673006, "grad_norm": 1.0992155075073242, "learning_rate": 7.866392279307408e-05, "loss": 0.00531751811504364, "step": 75200 }, { "epoch": 21.348282713596365, "grad_norm": 3.908897638320923, "learning_rate": 7.866108430315072e-05, "loss": 0.012845085561275482, "step": 75210 }, { "epoch": 21.351121203519728, "grad_norm": 12.719903945922852, "learning_rate": 7.865824581322738e-05, "loss": 0.01917527914047241, "step": 75220 }, { "epoch": 21.353959693443088, "grad_norm": 10.852532386779785, "learning_rate": 7.8655407323304e-05, "loss": 0.01984216719865799, "step": 75230 }, { "epoch": 21.35679818336645, "grad_norm": 2.329275131225586, "learning_rate": 7.865256883338065e-05, "loss": 0.00475042499601841, "step": 75240 }, { "epoch": 21.35963667328981, "grad_norm": 0.6325433850288391, "learning_rate": 7.864973034345729e-05, "loss": 0.006340565532445908, "step": 75250 }, { "epoch": 21.36247516321317, "grad_norm": 0.17733001708984375, "learning_rate": 7.864689185353393e-05, "loss": 0.009946484118700027, "step": 75260 }, { "epoch": 21.365313653136532, "grad_norm": 0.08541008830070496, "learning_rate": 7.864405336361056e-05, "loss": 0.007309769093990326, "step": 75270 }, { "epoch": 21.36815214305989, "grad_norm": 0.04691701382398605, "learning_rate": 7.86412148736872e-05, "loss": 0.004984775930643082, "step": 75280 }, { "epoch": 21.370990632983254, "grad_norm": 0.5120503306388855, "learning_rate": 7.863837638376385e-05, "loss": 0.009898581355810166, "step": 75290 }, { "epoch": 21.373829122906614, "grad_norm": 1.7361562252044678, "learning_rate": 7.863553789384048e-05, "loss": 0.012177450954914093, "step": 75300 }, { "epoch": 21.376667612829973, "grad_norm": 1.8089913129806519, "learning_rate": 7.863269940391712e-05, "loss": 0.005124437808990479, "step": 75310 }, { "epoch": 21.379506102753336, "grad_norm": 2.894075870513916, "learning_rate": 7.862986091399376e-05, "loss": 0.006614212691783905, "step": 75320 }, { "epoch": 21.382344592676695, "grad_norm": 0.46276935935020447, "learning_rate": 7.862702242407039e-05, "loss": 0.004492413252592087, "step": 75330 }, { "epoch": 21.385183082600058, "grad_norm": 12.252242088317871, "learning_rate": 7.862418393414703e-05, "loss": 0.031849262118339536, "step": 75340 }, { "epoch": 21.388021572523417, "grad_norm": 0.6322017908096313, "learning_rate": 7.862134544422369e-05, "loss": 0.026522797346115113, "step": 75350 }, { "epoch": 21.390860062446777, "grad_norm": 2.3536596298217773, "learning_rate": 7.861850695430031e-05, "loss": 0.01679241955280304, "step": 75360 }, { "epoch": 21.39369855237014, "grad_norm": 0.4844360649585724, "learning_rate": 7.861566846437696e-05, "loss": 0.005199184268712997, "step": 75370 }, { "epoch": 21.3965370422935, "grad_norm": 1.0903657674789429, "learning_rate": 7.86128299744536e-05, "loss": 0.016880692541599275, "step": 75380 }, { "epoch": 21.39937553221686, "grad_norm": 0.11510296911001205, "learning_rate": 7.860999148453024e-05, "loss": 0.01655648946762085, "step": 75390 }, { "epoch": 21.40221402214022, "grad_norm": 0.12198255211114883, "learning_rate": 7.860715299460687e-05, "loss": 0.006692472100257874, "step": 75400 }, { "epoch": 21.405052512063584, "grad_norm": 1.1299306154251099, "learning_rate": 7.860431450468351e-05, "loss": 0.0210096076130867, "step": 75410 }, { "epoch": 21.407891001986943, "grad_norm": 2.1311182975769043, "learning_rate": 7.860147601476015e-05, "loss": 0.006875139474868774, "step": 75420 }, { "epoch": 21.410729491910303, "grad_norm": 0.3989589214324951, "learning_rate": 7.859863752483679e-05, "loss": 0.008405142277479172, "step": 75430 }, { "epoch": 21.413567981833665, "grad_norm": 0.09731027483940125, "learning_rate": 7.859579903491343e-05, "loss": 0.007980497181415558, "step": 75440 }, { "epoch": 21.416406471757025, "grad_norm": 1.9039108753204346, "learning_rate": 7.859296054499007e-05, "loss": 0.01016746535897255, "step": 75450 }, { "epoch": 21.419244961680388, "grad_norm": 2.8415374755859375, "learning_rate": 7.85901220550667e-05, "loss": 0.024085420370101928, "step": 75460 }, { "epoch": 21.422083451603747, "grad_norm": 2.034208297729492, "learning_rate": 7.858728356514334e-05, "loss": 0.007176344841718673, "step": 75470 }, { "epoch": 21.424921941527106, "grad_norm": 0.3464799225330353, "learning_rate": 7.858444507521998e-05, "loss": 0.009476847201585769, "step": 75480 }, { "epoch": 21.42776043145047, "grad_norm": 2.5340957641601562, "learning_rate": 7.858160658529663e-05, "loss": 0.006865528970956802, "step": 75490 }, { "epoch": 21.43059892137383, "grad_norm": 0.4835187792778015, "learning_rate": 7.857876809537327e-05, "loss": 0.009280438721179961, "step": 75500 }, { "epoch": 21.43059892137383, "eval_accuracy": 0.970814522795193, "eval_loss": 0.10230260342359543, "eval_runtime": 33.5752, "eval_samples_per_second": 468.412, "eval_steps_per_second": 7.327, "step": 75500 }, { "epoch": 21.43343741129719, "grad_norm": 2.7144675254821777, "learning_rate": 7.857592960544991e-05, "loss": 0.01696545034646988, "step": 75510 }, { "epoch": 21.43627590122055, "grad_norm": 1.5383164882659912, "learning_rate": 7.857309111552654e-05, "loss": 0.013233226537704468, "step": 75520 }, { "epoch": 21.43911439114391, "grad_norm": 0.16926966607570648, "learning_rate": 7.857025262560318e-05, "loss": 0.005664090067148209, "step": 75530 }, { "epoch": 21.441952881067273, "grad_norm": 1.9096518754959106, "learning_rate": 7.856741413567982e-05, "loss": 0.002835162729024887, "step": 75540 }, { "epoch": 21.444791370990632, "grad_norm": 6.673807144165039, "learning_rate": 7.856457564575646e-05, "loss": 0.007688800990581513, "step": 75550 }, { "epoch": 21.447629860913995, "grad_norm": 0.6507651209831238, "learning_rate": 7.85617371558331e-05, "loss": 0.012678685784339904, "step": 75560 }, { "epoch": 21.450468350837355, "grad_norm": 0.07551060616970062, "learning_rate": 7.855889866590974e-05, "loss": 0.010251745581626892, "step": 75570 }, { "epoch": 21.453306840760714, "grad_norm": 0.09524097293615341, "learning_rate": 7.855606017598638e-05, "loss": 0.008862710744142532, "step": 75580 }, { "epoch": 21.456145330684077, "grad_norm": 3.505009174346924, "learning_rate": 7.855322168606301e-05, "loss": 0.02507287859916687, "step": 75590 }, { "epoch": 21.458983820607436, "grad_norm": 3.28354549407959, "learning_rate": 7.855038319613965e-05, "loss": 0.03327754437923432, "step": 75600 }, { "epoch": 21.4618223105308, "grad_norm": 10.775246620178223, "learning_rate": 7.85475447062163e-05, "loss": 0.0280952513217926, "step": 75610 }, { "epoch": 21.46466080045416, "grad_norm": 1.5976272821426392, "learning_rate": 7.854470621629294e-05, "loss": 0.010918256640434266, "step": 75620 }, { "epoch": 21.467499290377518, "grad_norm": 4.534170150756836, "learning_rate": 7.854186772636958e-05, "loss": 0.018703280389308928, "step": 75630 }, { "epoch": 21.47033778030088, "grad_norm": 0.05094924569129944, "learning_rate": 7.853902923644622e-05, "loss": 0.007288248091936111, "step": 75640 }, { "epoch": 21.47317627022424, "grad_norm": 0.15765702724456787, "learning_rate": 7.853619074652285e-05, "loss": 0.006977279484272003, "step": 75650 }, { "epoch": 21.476014760147603, "grad_norm": 0.9869862198829651, "learning_rate": 7.853335225659949e-05, "loss": 0.0022855060175061225, "step": 75660 }, { "epoch": 21.478853250070962, "grad_norm": 0.05720775946974754, "learning_rate": 7.853051376667613e-05, "loss": 0.006868983805179596, "step": 75670 }, { "epoch": 21.48169173999432, "grad_norm": 4.669098377227783, "learning_rate": 7.852767527675277e-05, "loss": 0.004796161130070686, "step": 75680 }, { "epoch": 21.484530229917684, "grad_norm": 8.851587295532227, "learning_rate": 7.852483678682941e-05, "loss": 0.014657337963581086, "step": 75690 }, { "epoch": 21.487368719841044, "grad_norm": 1.033220887184143, "learning_rate": 7.852199829690605e-05, "loss": 0.016292527318000793, "step": 75700 }, { "epoch": 21.490207209764407, "grad_norm": 7.531527042388916, "learning_rate": 7.85191598069827e-05, "loss": 0.006246595084667206, "step": 75710 }, { "epoch": 21.493045699687766, "grad_norm": 0.010327405296266079, "learning_rate": 7.851632131705932e-05, "loss": 0.021929718554019928, "step": 75720 }, { "epoch": 21.495884189611125, "grad_norm": 0.14996542036533356, "learning_rate": 7.851348282713596e-05, "loss": 0.017142881453037263, "step": 75730 }, { "epoch": 21.498722679534488, "grad_norm": 7.114850997924805, "learning_rate": 7.85106443372126e-05, "loss": 0.02753571271896362, "step": 75740 }, { "epoch": 21.501561169457847, "grad_norm": 3.8850033283233643, "learning_rate": 7.850780584728925e-05, "loss": 0.01241466924548149, "step": 75750 }, { "epoch": 21.50439965938121, "grad_norm": 0.59904545545578, "learning_rate": 7.850496735736589e-05, "loss": 0.027277958393096925, "step": 75760 }, { "epoch": 21.50723814930457, "grad_norm": 9.41364860534668, "learning_rate": 7.850212886744253e-05, "loss": 0.032134199142456056, "step": 75770 }, { "epoch": 21.510076639227933, "grad_norm": 2.426119565963745, "learning_rate": 7.849929037751916e-05, "loss": 0.022758811712265015, "step": 75780 }, { "epoch": 21.512915129151292, "grad_norm": 7.225414752960205, "learning_rate": 7.84964518875958e-05, "loss": 0.009830516576766968, "step": 75790 }, { "epoch": 21.51575361907465, "grad_norm": 7.11154317855835, "learning_rate": 7.849361339767244e-05, "loss": 0.020433375239372255, "step": 75800 }, { "epoch": 21.518592108998014, "grad_norm": 2.7180557250976562, "learning_rate": 7.849077490774908e-05, "loss": 0.02018067389726639, "step": 75810 }, { "epoch": 21.521430598921373, "grad_norm": 1.189454197883606, "learning_rate": 7.848793641782572e-05, "loss": 0.0058222062885761264, "step": 75820 }, { "epoch": 21.524269088844733, "grad_norm": 5.727425575256348, "learning_rate": 7.848509792790236e-05, "loss": 0.023678253591060638, "step": 75830 }, { "epoch": 21.527107578768096, "grad_norm": 0.8131870627403259, "learning_rate": 7.8482259437979e-05, "loss": 0.0109755776822567, "step": 75840 }, { "epoch": 21.529946068691455, "grad_norm": 0.8204752802848816, "learning_rate": 7.847942094805563e-05, "loss": 0.014629398286342622, "step": 75850 }, { "epoch": 21.532784558614818, "grad_norm": 0.2153329700231552, "learning_rate": 7.847658245813227e-05, "loss": 0.011555787920951844, "step": 75860 }, { "epoch": 21.535623048538177, "grad_norm": 2.112541675567627, "learning_rate": 7.847374396820892e-05, "loss": 0.005674155801534653, "step": 75870 }, { "epoch": 21.53846153846154, "grad_norm": 5.352249622344971, "learning_rate": 7.847090547828556e-05, "loss": 0.019017118215560912, "step": 75880 }, { "epoch": 21.5413000283849, "grad_norm": 8.009117126464844, "learning_rate": 7.84680669883622e-05, "loss": 0.01032036691904068, "step": 75890 }, { "epoch": 21.54413851830826, "grad_norm": 0.2751868963241577, "learning_rate": 7.846522849843884e-05, "loss": 0.0031623322516679763, "step": 75900 }, { "epoch": 21.54697700823162, "grad_norm": 4.631112575531006, "learning_rate": 7.846239000851547e-05, "loss": 0.005109605193138122, "step": 75910 }, { "epoch": 21.54981549815498, "grad_norm": 1.3002986907958984, "learning_rate": 7.845955151859211e-05, "loss": 0.003481493890285492, "step": 75920 }, { "epoch": 21.552653988078344, "grad_norm": 0.14788788557052612, "learning_rate": 7.845671302866875e-05, "loss": 0.01180480495095253, "step": 75930 }, { "epoch": 21.555492478001703, "grad_norm": 3.4899916648864746, "learning_rate": 7.845387453874539e-05, "loss": 0.0031131066381931303, "step": 75940 }, { "epoch": 21.558330967925063, "grad_norm": 8.902555465698242, "learning_rate": 7.845103604882203e-05, "loss": 0.014702621102333068, "step": 75950 }, { "epoch": 21.561169457848425, "grad_norm": 2.1848864555358887, "learning_rate": 7.844819755889868e-05, "loss": 0.008567447960376739, "step": 75960 }, { "epoch": 21.564007947771785, "grad_norm": 2.2485058307647705, "learning_rate": 7.844535906897532e-05, "loss": 0.006678443402051926, "step": 75970 }, { "epoch": 21.566846437695148, "grad_norm": 0.8421973586082458, "learning_rate": 7.844252057905194e-05, "loss": 0.01511918306350708, "step": 75980 }, { "epoch": 21.569684927618507, "grad_norm": 0.9486920237541199, "learning_rate": 7.843968208912859e-05, "loss": 0.010798220336437226, "step": 75990 }, { "epoch": 21.572523417541866, "grad_norm": 0.0706065371632576, "learning_rate": 7.843684359920523e-05, "loss": 0.004098658263683319, "step": 76000 }, { "epoch": 21.572523417541866, "eval_accuracy": 0.971895466395371, "eval_loss": 0.09626450389623642, "eval_runtime": 35.1547, "eval_samples_per_second": 447.365, "eval_steps_per_second": 6.998, "step": 76000 }, { "epoch": 21.57536190746523, "grad_norm": 1.6319620609283447, "learning_rate": 7.843400510928185e-05, "loss": 0.003714373707771301, "step": 76010 }, { "epoch": 21.57820039738859, "grad_norm": 0.0223070178180933, "learning_rate": 7.843116661935851e-05, "loss": 0.006712935119867325, "step": 76020 }, { "epoch": 21.58103888731195, "grad_norm": 0.45223599672317505, "learning_rate": 7.842832812943515e-05, "loss": 0.007672571390867233, "step": 76030 }, { "epoch": 21.58387737723531, "grad_norm": 13.059121131896973, "learning_rate": 7.842548963951178e-05, "loss": 0.023030942678451537, "step": 76040 }, { "epoch": 21.58671586715867, "grad_norm": 9.552334785461426, "learning_rate": 7.842265114958842e-05, "loss": 0.0077619768679142, "step": 76050 }, { "epoch": 21.589554357082033, "grad_norm": 0.07798145711421967, "learning_rate": 7.841981265966506e-05, "loss": 0.005865170806646347, "step": 76060 }, { "epoch": 21.592392847005392, "grad_norm": 0.2977941632270813, "learning_rate": 7.84169741697417e-05, "loss": 0.006793246418237686, "step": 76070 }, { "epoch": 21.595231336928755, "grad_norm": 8.4812650680542, "learning_rate": 7.841413567981834e-05, "loss": 0.0160169392824173, "step": 76080 }, { "epoch": 21.598069826852115, "grad_norm": 6.554344177246094, "learning_rate": 7.841129718989499e-05, "loss": 0.005630845576524735, "step": 76090 }, { "epoch": 21.600908316775474, "grad_norm": 0.294132798910141, "learning_rate": 7.840845869997163e-05, "loss": 0.0013465629890561104, "step": 76100 }, { "epoch": 21.603746806698837, "grad_norm": 0.7140658497810364, "learning_rate": 7.840562021004826e-05, "loss": 0.011323384940624237, "step": 76110 }, { "epoch": 21.606585296622196, "grad_norm": 6.027770519256592, "learning_rate": 7.84027817201249e-05, "loss": 0.013676756620407104, "step": 76120 }, { "epoch": 21.60942378654556, "grad_norm": 0.3219413161277771, "learning_rate": 7.839994323020154e-05, "loss": 0.0025444496423006058, "step": 76130 }, { "epoch": 21.61226227646892, "grad_norm": 1.4464608430862427, "learning_rate": 7.839710474027817e-05, "loss": 0.014741565287113189, "step": 76140 }, { "epoch": 21.615100766392278, "grad_norm": 9.450418472290039, "learning_rate": 7.839426625035482e-05, "loss": 0.005599839985370636, "step": 76150 }, { "epoch": 21.61793925631564, "grad_norm": 0.5558962821960449, "learning_rate": 7.839142776043146e-05, "loss": 0.0023310644552111625, "step": 76160 }, { "epoch": 21.620777746239, "grad_norm": 0.5326500535011292, "learning_rate": 7.838858927050809e-05, "loss": 0.005129107460379601, "step": 76170 }, { "epoch": 21.623616236162363, "grad_norm": 1.6963669061660767, "learning_rate": 7.838575078058473e-05, "loss": 0.0037729524075984953, "step": 76180 }, { "epoch": 21.626454726085722, "grad_norm": 0.6168829798698425, "learning_rate": 7.838291229066137e-05, "loss": 0.006750669330358505, "step": 76190 }, { "epoch": 21.62929321600908, "grad_norm": 0.8574807643890381, "learning_rate": 7.838007380073801e-05, "loss": 0.005789808556437492, "step": 76200 }, { "epoch": 21.632131705932444, "grad_norm": 9.690081596374512, "learning_rate": 7.837723531081464e-05, "loss": 0.017111361026763916, "step": 76210 }, { "epoch": 21.634970195855804, "grad_norm": 8.000529289245605, "learning_rate": 7.83743968208913e-05, "loss": 0.01142665594816208, "step": 76220 }, { "epoch": 21.637808685779166, "grad_norm": 0.29230913519859314, "learning_rate": 7.837155833096794e-05, "loss": 0.019384782016277313, "step": 76230 }, { "epoch": 21.640647175702526, "grad_norm": 11.557282447814941, "learning_rate": 7.836871984104457e-05, "loss": 0.020827051997184754, "step": 76240 }, { "epoch": 21.64348566562589, "grad_norm": 2.305434226989746, "learning_rate": 7.836588135112121e-05, "loss": 0.03439134359359741, "step": 76250 }, { "epoch": 21.646324155549248, "grad_norm": 2.6589722633361816, "learning_rate": 7.836304286119785e-05, "loss": 0.02175535261631012, "step": 76260 }, { "epoch": 21.649162645472607, "grad_norm": 0.11015072464942932, "learning_rate": 7.836020437127448e-05, "loss": 0.00935019999742508, "step": 76270 }, { "epoch": 21.65200113539597, "grad_norm": 0.9063291549682617, "learning_rate": 7.835736588135113e-05, "loss": 0.00965784266591072, "step": 76280 }, { "epoch": 21.65483962531933, "grad_norm": 0.756948709487915, "learning_rate": 7.835452739142777e-05, "loss": 0.003368126228451729, "step": 76290 }, { "epoch": 21.657678115242692, "grad_norm": 4.284371852874756, "learning_rate": 7.83516889015044e-05, "loss": 0.029542556405067442, "step": 76300 }, { "epoch": 21.660516605166052, "grad_norm": 2.3349971771240234, "learning_rate": 7.834885041158104e-05, "loss": 0.011359532177448273, "step": 76310 }, { "epoch": 21.66335509508941, "grad_norm": 0.3879821002483368, "learning_rate": 7.834601192165768e-05, "loss": 0.020156021416187286, "step": 76320 }, { "epoch": 21.666193585012774, "grad_norm": 2.2115564346313477, "learning_rate": 7.834317343173432e-05, "loss": 0.013124699890613555, "step": 76330 }, { "epoch": 21.669032074936133, "grad_norm": 8.992923736572266, "learning_rate": 7.834033494181095e-05, "loss": 0.03172659277915955, "step": 76340 }, { "epoch": 21.671870564859496, "grad_norm": 4.043951511383057, "learning_rate": 7.833749645188761e-05, "loss": 0.024024268984794615, "step": 76350 }, { "epoch": 21.674709054782856, "grad_norm": 3.690535306930542, "learning_rate": 7.833465796196424e-05, "loss": 0.01768091320991516, "step": 76360 }, { "epoch": 21.677547544706215, "grad_norm": 0.7416356205940247, "learning_rate": 7.833181947204088e-05, "loss": 0.025128251314163207, "step": 76370 }, { "epoch": 21.680386034629578, "grad_norm": 12.065498352050781, "learning_rate": 7.832926483110984e-05, "loss": 0.022164861857891082, "step": 76380 }, { "epoch": 21.683224524552937, "grad_norm": 3.18854022026062, "learning_rate": 7.832642634118648e-05, "loss": 0.015490077435970306, "step": 76390 }, { "epoch": 21.6860630144763, "grad_norm": 1.4037779569625854, "learning_rate": 7.832358785126314e-05, "loss": 0.009948326647281647, "step": 76400 }, { "epoch": 21.68890150439966, "grad_norm": 0.6806366443634033, "learning_rate": 7.832074936133977e-05, "loss": 0.016630303859710694, "step": 76410 }, { "epoch": 21.69173999432302, "grad_norm": 0.3701835572719574, "learning_rate": 7.831791087141641e-05, "loss": 0.007824569195508956, "step": 76420 }, { "epoch": 21.69457848424638, "grad_norm": 0.2999122738838196, "learning_rate": 7.831507238149305e-05, "loss": 0.006511928141117096, "step": 76430 }, { "epoch": 21.69741697416974, "grad_norm": 1.5539482831954956, "learning_rate": 7.831223389156969e-05, "loss": 0.02385521084070206, "step": 76440 }, { "epoch": 21.700255464093104, "grad_norm": 0.04691850766539574, "learning_rate": 7.830939540164632e-05, "loss": 0.011762581765651703, "step": 76450 }, { "epoch": 21.703093954016463, "grad_norm": 7.5690460205078125, "learning_rate": 7.830655691172297e-05, "loss": 0.01207951009273529, "step": 76460 }, { "epoch": 21.705932443939822, "grad_norm": 0.644452691078186, "learning_rate": 7.830371842179962e-05, "loss": 0.010083774477243424, "step": 76470 }, { "epoch": 21.708770933863185, "grad_norm": 0.10585452616214752, "learning_rate": 7.830087993187624e-05, "loss": 0.007852667570114135, "step": 76480 }, { "epoch": 21.711609423786545, "grad_norm": 0.13105358183383942, "learning_rate": 7.829804144195288e-05, "loss": 0.011062109470367431, "step": 76490 }, { "epoch": 21.714447913709908, "grad_norm": 1.8580065965652466, "learning_rate": 7.829520295202953e-05, "loss": 0.005259155109524727, "step": 76500 }, { "epoch": 21.714447913709908, "eval_accuracy": 0.9743752781840147, "eval_loss": 0.09365662187337875, "eval_runtime": 33.7759, "eval_samples_per_second": 465.628, "eval_steps_per_second": 7.283, "step": 76500 }, { "epoch": 21.717286403633267, "grad_norm": 2.9138054847717285, "learning_rate": 7.829236446210615e-05, "loss": 0.012289131432771683, "step": 76510 }, { "epoch": 21.720124893556626, "grad_norm": 1.5889636278152466, "learning_rate": 7.82895259721828e-05, "loss": 0.02432795912027359, "step": 76520 }, { "epoch": 21.72296338347999, "grad_norm": 5.072165489196777, "learning_rate": 7.828668748225945e-05, "loss": 0.013843004405498505, "step": 76530 }, { "epoch": 21.72580187340335, "grad_norm": 1.6750872135162354, "learning_rate": 7.828384899233608e-05, "loss": 0.014633762836456298, "step": 76540 }, { "epoch": 21.72864036332671, "grad_norm": 2.293349027633667, "learning_rate": 7.828101050241272e-05, "loss": 0.014585523307323456, "step": 76550 }, { "epoch": 21.73147885325007, "grad_norm": 2.608394145965576, "learning_rate": 7.827817201248936e-05, "loss": 0.007868467271327973, "step": 76560 }, { "epoch": 21.73431734317343, "grad_norm": 3.383575916290283, "learning_rate": 7.8275333522566e-05, "loss": 0.01575823575258255, "step": 76570 }, { "epoch": 21.737155833096793, "grad_norm": 4.789353847503662, "learning_rate": 7.827249503264263e-05, "loss": 0.020788240432739257, "step": 76580 }, { "epoch": 21.739994323020152, "grad_norm": 3.1665830612182617, "learning_rate": 7.826965654271927e-05, "loss": 0.023198559880256653, "step": 76590 }, { "epoch": 21.742832812943515, "grad_norm": 1.2074291706085205, "learning_rate": 7.826681805279593e-05, "loss": 0.016294828057289122, "step": 76600 }, { "epoch": 21.745671302866874, "grad_norm": 16.145673751831055, "learning_rate": 7.826397956287255e-05, "loss": 0.025405138731002808, "step": 76610 }, { "epoch": 21.748509792790237, "grad_norm": 7.610554218292236, "learning_rate": 7.82611410729492e-05, "loss": 0.026652061939239503, "step": 76620 }, { "epoch": 21.751348282713597, "grad_norm": 0.1755785495042801, "learning_rate": 7.825830258302584e-05, "loss": 0.008329382538795472, "step": 76630 }, { "epoch": 21.754186772636956, "grad_norm": 4.101770401000977, "learning_rate": 7.825546409310246e-05, "loss": 0.004594181478023529, "step": 76640 }, { "epoch": 21.75702526256032, "grad_norm": 13.351091384887695, "learning_rate": 7.82526256031791e-05, "loss": 0.023375841975212096, "step": 76650 }, { "epoch": 21.759863752483678, "grad_norm": 0.015629559755325317, "learning_rate": 7.824978711325576e-05, "loss": 0.01931239515542984, "step": 76660 }, { "epoch": 21.76270224240704, "grad_norm": 0.5826571583747864, "learning_rate": 7.824694862333239e-05, "loss": 0.01900211423635483, "step": 76670 }, { "epoch": 21.7655407323304, "grad_norm": 2.1782968044281006, "learning_rate": 7.824411013340903e-05, "loss": 0.012255797535181046, "step": 76680 }, { "epoch": 21.76837922225376, "grad_norm": 0.7445348501205444, "learning_rate": 7.824127164348567e-05, "loss": 0.017368848621845245, "step": 76690 }, { "epoch": 21.771217712177123, "grad_norm": 2.3139021396636963, "learning_rate": 7.823843315356231e-05, "loss": 0.008800503611564637, "step": 76700 }, { "epoch": 21.774056202100482, "grad_norm": 0.17572715878486633, "learning_rate": 7.823559466363894e-05, "loss": 0.026796159148216248, "step": 76710 }, { "epoch": 21.776894692023845, "grad_norm": 10.952397346496582, "learning_rate": 7.823275617371558e-05, "loss": 0.038211727142333986, "step": 76720 }, { "epoch": 21.779733181947204, "grad_norm": 7.899598598480225, "learning_rate": 7.822991768379224e-05, "loss": 0.008760966360569, "step": 76730 }, { "epoch": 21.782571671870564, "grad_norm": 6.278811931610107, "learning_rate": 7.822707919386886e-05, "loss": 0.0124712735414505, "step": 76740 }, { "epoch": 21.785410161793926, "grad_norm": 1.3256267309188843, "learning_rate": 7.82242407039455e-05, "loss": 0.004632719606161117, "step": 76750 }, { "epoch": 21.788248651717286, "grad_norm": 10.130837440490723, "learning_rate": 7.822140221402215e-05, "loss": 0.011178233474493027, "step": 76760 }, { "epoch": 21.79108714164065, "grad_norm": 0.037961434572935104, "learning_rate": 7.821856372409878e-05, "loss": 0.0033314526081085205, "step": 76770 }, { "epoch": 21.793925631564008, "grad_norm": 1.3093103170394897, "learning_rate": 7.821572523417542e-05, "loss": 0.015557391941547394, "step": 76780 }, { "epoch": 21.796764121487367, "grad_norm": 8.625869750976562, "learning_rate": 7.821288674425206e-05, "loss": 0.008550330996513367, "step": 76790 }, { "epoch": 21.79960261141073, "grad_norm": 0.025949792936444283, "learning_rate": 7.82100482543287e-05, "loss": 0.007548306882381439, "step": 76800 }, { "epoch": 21.80244110133409, "grad_norm": 11.467525482177734, "learning_rate": 7.820720976440534e-05, "loss": 0.010235334932804107, "step": 76810 }, { "epoch": 21.805279591257452, "grad_norm": 0.9276819229125977, "learning_rate": 7.820437127448198e-05, "loss": 0.005799291655421257, "step": 76820 }, { "epoch": 21.80811808118081, "grad_norm": 0.6728413701057434, "learning_rate": 7.820153278455862e-05, "loss": 0.016754713654518128, "step": 76830 }, { "epoch": 21.81095657110417, "grad_norm": 0.25227752327919006, "learning_rate": 7.819869429463525e-05, "loss": 0.005897929519414901, "step": 76840 }, { "epoch": 21.813795061027534, "grad_norm": 0.47478193044662476, "learning_rate": 7.819585580471189e-05, "loss": 0.021110185980796815, "step": 76850 }, { "epoch": 21.816633550950893, "grad_norm": 8.020572662353516, "learning_rate": 7.819301731478855e-05, "loss": 0.006472976505756378, "step": 76860 }, { "epoch": 21.819472040874256, "grad_norm": 2.0486252307891846, "learning_rate": 7.819017882486518e-05, "loss": 0.008240105211734771, "step": 76870 }, { "epoch": 21.822310530797616, "grad_norm": 1.2566803693771362, "learning_rate": 7.818734033494182e-05, "loss": 0.008492986857891082, "step": 76880 }, { "epoch": 21.825149020720975, "grad_norm": 8.961441993713379, "learning_rate": 7.818450184501846e-05, "loss": 0.01190599650144577, "step": 76890 }, { "epoch": 21.827987510644338, "grad_norm": 7.850216865539551, "learning_rate": 7.818166335509509e-05, "loss": 0.01161976382136345, "step": 76900 }, { "epoch": 21.830826000567697, "grad_norm": 9.453813552856445, "learning_rate": 7.817882486517173e-05, "loss": 0.011448770761489868, "step": 76910 }, { "epoch": 21.83366449049106, "grad_norm": 0.43145498633384705, "learning_rate": 7.817598637524837e-05, "loss": 0.011171109974384308, "step": 76920 }, { "epoch": 21.83650298041442, "grad_norm": 4.079193115234375, "learning_rate": 7.817314788532501e-05, "loss": 0.01476123034954071, "step": 76930 }, { "epoch": 21.83934147033778, "grad_norm": 0.4642220139503479, "learning_rate": 7.817030939540165e-05, "loss": 0.02437371462583542, "step": 76940 }, { "epoch": 21.84217996026114, "grad_norm": 0.29717254638671875, "learning_rate": 7.816747090547829e-05, "loss": 0.023793309926986694, "step": 76950 }, { "epoch": 21.8450184501845, "grad_norm": 0.18478889763355255, "learning_rate": 7.816463241555493e-05, "loss": 0.02362641990184784, "step": 76960 }, { "epoch": 21.847856940107864, "grad_norm": 0.4515392482280731, "learning_rate": 7.816179392563156e-05, "loss": 0.005959854274988174, "step": 76970 }, { "epoch": 21.850695430031223, "grad_norm": 7.528244972229004, "learning_rate": 7.81589554357082e-05, "loss": 0.013747429847717286, "step": 76980 }, { "epoch": 21.853533919954586, "grad_norm": 11.693696022033691, "learning_rate": 7.815611694578484e-05, "loss": 0.025805670022964477, "step": 76990 }, { "epoch": 21.856372409877945, "grad_norm": 0.7792622447013855, "learning_rate": 7.815327845586149e-05, "loss": 0.01826193928718567, "step": 77000 }, { "epoch": 21.856372409877945, "eval_accuracy": 0.9666815031474534, "eval_loss": 0.12362799048423767, "eval_runtime": 33.4861, "eval_samples_per_second": 469.658, "eval_steps_per_second": 7.346, "step": 77000 }, { "epoch": 21.859210899801305, "grad_norm": 2.1487925052642822, "learning_rate": 7.815043996593813e-05, "loss": 0.020477095246315004, "step": 77010 }, { "epoch": 21.862049389724667, "grad_norm": 0.060275908559560776, "learning_rate": 7.814760147601477e-05, "loss": 0.02336960881948471, "step": 77020 }, { "epoch": 21.864887879648027, "grad_norm": 0.5851543545722961, "learning_rate": 7.81447629860914e-05, "loss": 0.011754945665597916, "step": 77030 }, { "epoch": 21.86772636957139, "grad_norm": 0.03535527363419533, "learning_rate": 7.814192449616804e-05, "loss": 0.005765859037637711, "step": 77040 }, { "epoch": 21.87056485949475, "grad_norm": 0.16079451143741608, "learning_rate": 7.813908600624468e-05, "loss": 0.0145330548286438, "step": 77050 }, { "epoch": 21.87340334941811, "grad_norm": 2.5771403312683105, "learning_rate": 7.813624751632132e-05, "loss": 0.021582962572574617, "step": 77060 }, { "epoch": 21.87624183934147, "grad_norm": 2.3872170448303223, "learning_rate": 7.813340902639796e-05, "loss": 0.004911260306835174, "step": 77070 }, { "epoch": 21.87908032926483, "grad_norm": 5.504036903381348, "learning_rate": 7.81305705364746e-05, "loss": 0.02059899866580963, "step": 77080 }, { "epoch": 21.881918819188193, "grad_norm": 10.037178039550781, "learning_rate": 7.812773204655125e-05, "loss": 0.012835609912872314, "step": 77090 }, { "epoch": 21.884757309111553, "grad_norm": 4.540242671966553, "learning_rate": 7.812489355662787e-05, "loss": 0.012746194005012512, "step": 77100 }, { "epoch": 21.887595799034912, "grad_norm": 1.8088051080703735, "learning_rate": 7.812205506670451e-05, "loss": 0.020083943009376527, "step": 77110 }, { "epoch": 21.890434288958275, "grad_norm": 4.076766490936279, "learning_rate": 7.811921657678116e-05, "loss": 0.013205003738403321, "step": 77120 }, { "epoch": 21.893272778881634, "grad_norm": 0.13293194770812988, "learning_rate": 7.81163780868578e-05, "loss": 0.008111776411533355, "step": 77130 }, { "epoch": 21.896111268804997, "grad_norm": 0.4091350734233856, "learning_rate": 7.811353959693444e-05, "loss": 0.004111322015523911, "step": 77140 }, { "epoch": 21.898949758728357, "grad_norm": 5.208039283752441, "learning_rate": 7.811070110701108e-05, "loss": 0.016199421882629395, "step": 77150 }, { "epoch": 21.901788248651716, "grad_norm": 4.101673603057861, "learning_rate": 7.810786261708771e-05, "loss": 0.013776785135269165, "step": 77160 }, { "epoch": 21.90462673857508, "grad_norm": 0.4423852562904358, "learning_rate": 7.810502412716435e-05, "loss": 0.005289822816848755, "step": 77170 }, { "epoch": 21.907465228498438, "grad_norm": 0.2951292097568512, "learning_rate": 7.810218563724099e-05, "loss": 0.021123364567756653, "step": 77180 }, { "epoch": 21.9103037184218, "grad_norm": 10.400200843811035, "learning_rate": 7.809934714731763e-05, "loss": 0.012416242063045502, "step": 77190 }, { "epoch": 21.91314220834516, "grad_norm": 0.32414743304252625, "learning_rate": 7.809650865739427e-05, "loss": 0.012587009370326996, "step": 77200 }, { "epoch": 21.91598069826852, "grad_norm": 2.90313982963562, "learning_rate": 7.809367016747091e-05, "loss": 0.0075712896883487705, "step": 77210 }, { "epoch": 21.918819188191883, "grad_norm": 0.14449569582939148, "learning_rate": 7.809083167754754e-05, "loss": 0.00769277811050415, "step": 77220 }, { "epoch": 21.921657678115242, "grad_norm": 0.05917392671108246, "learning_rate": 7.808799318762418e-05, "loss": 0.0018422221764922142, "step": 77230 }, { "epoch": 21.924496168038605, "grad_norm": 0.3852158188819885, "learning_rate": 7.808515469770083e-05, "loss": 0.016780880093574525, "step": 77240 }, { "epoch": 21.927334657961964, "grad_norm": 4.943944454193115, "learning_rate": 7.808231620777747e-05, "loss": 0.004543472081422806, "step": 77250 }, { "epoch": 21.930173147885323, "grad_norm": 3.574143409729004, "learning_rate": 7.807947771785411e-05, "loss": 0.0065775588154792786, "step": 77260 }, { "epoch": 21.933011637808686, "grad_norm": 4.3394341468811035, "learning_rate": 7.807663922793075e-05, "loss": 0.015891978144645692, "step": 77270 }, { "epoch": 21.935850127732046, "grad_norm": 3.5937559604644775, "learning_rate": 7.807380073800739e-05, "loss": 0.011776401102542878, "step": 77280 }, { "epoch": 21.93868861765541, "grad_norm": 0.37168216705322266, "learning_rate": 7.807096224808402e-05, "loss": 0.021691039204597473, "step": 77290 }, { "epoch": 21.941527107578768, "grad_norm": 5.883046627044678, "learning_rate": 7.806812375816066e-05, "loss": 0.006059474870562553, "step": 77300 }, { "epoch": 21.944365597502127, "grad_norm": 0.29428091645240784, "learning_rate": 7.80652852682373e-05, "loss": 0.014749822020530701, "step": 77310 }, { "epoch": 21.94720408742549, "grad_norm": 0.09071836620569229, "learning_rate": 7.806244677831393e-05, "loss": 0.015358825027942658, "step": 77320 }, { "epoch": 21.95004257734885, "grad_norm": 0.3599034547805786, "learning_rate": 7.805960828839058e-05, "loss": 0.006007420644164085, "step": 77330 }, { "epoch": 21.952881067272212, "grad_norm": 1.3748537302017212, "learning_rate": 7.805676979846723e-05, "loss": 0.009552021324634553, "step": 77340 }, { "epoch": 21.95571955719557, "grad_norm": 2.223006010055542, "learning_rate": 7.805393130854385e-05, "loss": 0.006381532549858094, "step": 77350 }, { "epoch": 21.958558047118935, "grad_norm": 1.2997554540634155, "learning_rate": 7.80510928186205e-05, "loss": 0.004666983336210251, "step": 77360 }, { "epoch": 21.961396537042294, "grad_norm": 0.38340434432029724, "learning_rate": 7.804825432869714e-05, "loss": 0.005203187465667725, "step": 77370 }, { "epoch": 21.964235026965653, "grad_norm": 8.27868366241455, "learning_rate": 7.804541583877378e-05, "loss": 0.012209732085466385, "step": 77380 }, { "epoch": 21.967073516889016, "grad_norm": 8.566084861755371, "learning_rate": 7.804257734885042e-05, "loss": 0.011986808478832245, "step": 77390 }, { "epoch": 21.969912006812375, "grad_norm": 4.243077278137207, "learning_rate": 7.803973885892706e-05, "loss": 0.023333890736103056, "step": 77400 }, { "epoch": 21.972750496735735, "grad_norm": 12.54522705078125, "learning_rate": 7.80369003690037e-05, "loss": 0.03881212770938873, "step": 77410 }, { "epoch": 21.975588986659098, "grad_norm": 7.875763416290283, "learning_rate": 7.803406187908033e-05, "loss": 0.01567939668893814, "step": 77420 }, { "epoch": 21.978427476582457, "grad_norm": 1.0205825567245483, "learning_rate": 7.803122338915697e-05, "loss": 0.01401524841785431, "step": 77430 }, { "epoch": 21.98126596650582, "grad_norm": 4.787927627563477, "learning_rate": 7.802838489923361e-05, "loss": 0.015198959410190583, "step": 77440 }, { "epoch": 21.98410445642918, "grad_norm": 6.5761542320251465, "learning_rate": 7.802554640931024e-05, "loss": 0.009529849886894226, "step": 77450 }, { "epoch": 21.986942946352542, "grad_norm": 0.1588035225868225, "learning_rate": 7.80227079193869e-05, "loss": 0.002994191274046898, "step": 77460 }, { "epoch": 21.9897814362759, "grad_norm": 8.410499572753906, "learning_rate": 7.801986942946354e-05, "loss": 0.01730930805206299, "step": 77470 }, { "epoch": 21.99261992619926, "grad_norm": 3.9827117919921875, "learning_rate": 7.801703093954016e-05, "loss": 0.0038729265332221987, "step": 77480 }, { "epoch": 21.995458416122624, "grad_norm": 0.22626996040344238, "learning_rate": 7.80141924496168e-05, "loss": 0.005005461722612381, "step": 77490 }, { "epoch": 21.998296906045983, "grad_norm": 0.6835347414016724, "learning_rate": 7.801135395969345e-05, "loss": 0.03292423784732819, "step": 77500 }, { "epoch": 21.998296906045983, "eval_accuracy": 0.9756469765371654, "eval_loss": 0.09014362096786499, "eval_runtime": 32.7272, "eval_samples_per_second": 480.548, "eval_steps_per_second": 7.517, "step": 77500 }, { "epoch": 22.001135395969346, "grad_norm": 1.3483967781066895, "learning_rate": 7.800851546977009e-05, "loss": 0.014343774318695069, "step": 77510 }, { "epoch": 22.003973885892705, "grad_norm": 0.5825226306915283, "learning_rate": 7.800567697984672e-05, "loss": 0.004287770390510559, "step": 77520 }, { "epoch": 22.006812375816065, "grad_norm": 6.3119635581970215, "learning_rate": 7.800283848992337e-05, "loss": 0.005004978179931641, "step": 77530 }, { "epoch": 22.009650865739427, "grad_norm": 15.6392240524292, "learning_rate": 7.800000000000001e-05, "loss": 0.02024669647216797, "step": 77540 }, { "epoch": 22.012489355662787, "grad_norm": 9.434500694274902, "learning_rate": 7.799716151007664e-05, "loss": 0.0071985751390457155, "step": 77550 }, { "epoch": 22.01532784558615, "grad_norm": 0.259632408618927, "learning_rate": 7.799432302015328e-05, "loss": 0.007211542874574661, "step": 77560 }, { "epoch": 22.01816633550951, "grad_norm": 5.107972621917725, "learning_rate": 7.799148453022992e-05, "loss": 0.015748824179172515, "step": 77570 }, { "epoch": 22.02100482543287, "grad_norm": 0.028901919722557068, "learning_rate": 7.798864604030655e-05, "loss": 0.007405951619148254, "step": 77580 }, { "epoch": 22.02384331535623, "grad_norm": 0.09767768532037735, "learning_rate": 7.79858075503832e-05, "loss": 0.012481608986854553, "step": 77590 }, { "epoch": 22.02668180527959, "grad_norm": 0.259518027305603, "learning_rate": 7.798296906045985e-05, "loss": 0.011680335551500321, "step": 77600 }, { "epoch": 22.029520295202953, "grad_norm": 0.7509496212005615, "learning_rate": 7.798013057053647e-05, "loss": 0.0051587805151939396, "step": 77610 }, { "epoch": 22.032358785126313, "grad_norm": 15.268369674682617, "learning_rate": 7.797729208061312e-05, "loss": 0.008162900805473328, "step": 77620 }, { "epoch": 22.035197275049672, "grad_norm": 0.956761360168457, "learning_rate": 7.797445359068976e-05, "loss": 0.008550591766834259, "step": 77630 }, { "epoch": 22.038035764973035, "grad_norm": 8.0202054977417, "learning_rate": 7.79716151007664e-05, "loss": 0.004946637526154518, "step": 77640 }, { "epoch": 22.040874254896394, "grad_norm": 11.644031524658203, "learning_rate": 7.796877661084303e-05, "loss": 0.022229984402656555, "step": 77650 }, { "epoch": 22.043712744819757, "grad_norm": 0.11613766849040985, "learning_rate": 7.796593812091968e-05, "loss": 0.007181527465581894, "step": 77660 }, { "epoch": 22.046551234743117, "grad_norm": 11.280129432678223, "learning_rate": 7.796309963099632e-05, "loss": 0.047621297836303714, "step": 77670 }, { "epoch": 22.049389724666476, "grad_norm": 0.4252116084098816, "learning_rate": 7.796026114107295e-05, "loss": 0.007183042168617248, "step": 77680 }, { "epoch": 22.05222821458984, "grad_norm": 2.37814998626709, "learning_rate": 7.795742265114959e-05, "loss": 0.007742247730493546, "step": 77690 }, { "epoch": 22.055066704513198, "grad_norm": 13.491863250732422, "learning_rate": 7.795458416122623e-05, "loss": 0.009257175773382188, "step": 77700 }, { "epoch": 22.05790519443656, "grad_norm": 4.062692642211914, "learning_rate": 7.795174567130286e-05, "loss": 0.027096742391586305, "step": 77710 }, { "epoch": 22.06074368435992, "grad_norm": 0.49900022149086, "learning_rate": 7.79489071813795e-05, "loss": 0.003364697843790054, "step": 77720 }, { "epoch": 22.06358217428328, "grad_norm": 9.776283264160156, "learning_rate": 7.794606869145616e-05, "loss": 0.010374584794044494, "step": 77730 }, { "epoch": 22.066420664206642, "grad_norm": 0.4270979166030884, "learning_rate": 7.794323020153279e-05, "loss": 0.02255811244249344, "step": 77740 }, { "epoch": 22.069259154130002, "grad_norm": 8.046928405761719, "learning_rate": 7.794039171160943e-05, "loss": 0.007629990577697754, "step": 77750 }, { "epoch": 22.072097644053365, "grad_norm": 1.2421576976776123, "learning_rate": 7.793755322168607e-05, "loss": 0.014832359552383424, "step": 77760 }, { "epoch": 22.074936133976724, "grad_norm": 9.161088943481445, "learning_rate": 7.793471473176271e-05, "loss": 0.01653517484664917, "step": 77770 }, { "epoch": 22.077774623900083, "grad_norm": 6.888538837432861, "learning_rate": 7.793187624183934e-05, "loss": 0.02254423648118973, "step": 77780 }, { "epoch": 22.080613113823446, "grad_norm": 2.602606773376465, "learning_rate": 7.792903775191599e-05, "loss": 0.015897072851657867, "step": 77790 }, { "epoch": 22.083451603746806, "grad_norm": 0.15491096675395966, "learning_rate": 7.792619926199263e-05, "loss": 0.005921715497970581, "step": 77800 }, { "epoch": 22.08629009367017, "grad_norm": 17.114343643188477, "learning_rate": 7.792336077206926e-05, "loss": 0.03800462186336517, "step": 77810 }, { "epoch": 22.089128583593528, "grad_norm": 2.354292392730713, "learning_rate": 7.79205222821459e-05, "loss": 0.024470826983451842, "step": 77820 }, { "epoch": 22.09196707351689, "grad_norm": 8.81424331665039, "learning_rate": 7.791768379222254e-05, "loss": 0.009136177599430084, "step": 77830 }, { "epoch": 22.09480556344025, "grad_norm": 1.850272297859192, "learning_rate": 7.791484530229917e-05, "loss": 0.006464783102273941, "step": 77840 }, { "epoch": 22.09764405336361, "grad_norm": 0.36094018816947937, "learning_rate": 7.791200681237581e-05, "loss": 0.02273803800344467, "step": 77850 }, { "epoch": 22.100482543286972, "grad_norm": 2.385016918182373, "learning_rate": 7.790916832245247e-05, "loss": 0.005814734101295471, "step": 77860 }, { "epoch": 22.10332103321033, "grad_norm": 5.603761672973633, "learning_rate": 7.79063298325291e-05, "loss": 0.007511169463396072, "step": 77870 }, { "epoch": 22.106159523133694, "grad_norm": 3.8210558891296387, "learning_rate": 7.790349134260574e-05, "loss": 0.007698233425617218, "step": 77880 }, { "epoch": 22.108998013057054, "grad_norm": 0.5562136769294739, "learning_rate": 7.790065285268238e-05, "loss": 0.0030463142320513725, "step": 77890 }, { "epoch": 22.111836502980413, "grad_norm": 3.246532678604126, "learning_rate": 7.789781436275902e-05, "loss": 0.008175823092460632, "step": 77900 }, { "epoch": 22.114674992903776, "grad_norm": 12.016993522644043, "learning_rate": 7.789497587283565e-05, "loss": 0.028232917189598083, "step": 77910 }, { "epoch": 22.117513482827135, "grad_norm": 2.2258007526397705, "learning_rate": 7.789213738291229e-05, "loss": 0.0030083611607551576, "step": 77920 }, { "epoch": 22.1203519727505, "grad_norm": 1.903570532798767, "learning_rate": 7.788929889298894e-05, "loss": 0.0029294125735759733, "step": 77930 }, { "epoch": 22.123190462673858, "grad_norm": 0.10629672557115555, "learning_rate": 7.788646040306557e-05, "loss": 0.01323242038488388, "step": 77940 }, { "epoch": 22.126028952597217, "grad_norm": 9.25344467163086, "learning_rate": 7.788362191314221e-05, "loss": 0.021307869255542754, "step": 77950 }, { "epoch": 22.12886744252058, "grad_norm": 9.047112464904785, "learning_rate": 7.788078342321886e-05, "loss": 0.007579444348812104, "step": 77960 }, { "epoch": 22.13170593244394, "grad_norm": 0.42127689719200134, "learning_rate": 7.787794493329548e-05, "loss": 0.014253318309783936, "step": 77970 }, { "epoch": 22.134544422367302, "grad_norm": 0.1254568099975586, "learning_rate": 7.787510644337212e-05, "loss": 0.0046181917190551754, "step": 77980 }, { "epoch": 22.13738291229066, "grad_norm": 0.07271739095449448, "learning_rate": 7.787226795344878e-05, "loss": 0.001771392486989498, "step": 77990 }, { "epoch": 22.14022140221402, "grad_norm": 1.2715011835098267, "learning_rate": 7.786942946352541e-05, "loss": 0.0018854713067412376, "step": 78000 }, { "epoch": 22.14022140221402, "eval_accuracy": 0.9776816939022064, "eval_loss": 0.07739634811878204, "eval_runtime": 33.3404, "eval_samples_per_second": 471.71, "eval_steps_per_second": 7.378, "step": 78000 }, { "epoch": 22.143059892137384, "grad_norm": 9.371847152709961, "learning_rate": 7.786659097360205e-05, "loss": 0.024596229195594788, "step": 78010 }, { "epoch": 22.145898382060743, "grad_norm": 0.17622709274291992, "learning_rate": 7.786375248367869e-05, "loss": 0.0028368325904011725, "step": 78020 }, { "epoch": 22.148736871984106, "grad_norm": 6.260890960693359, "learning_rate": 7.786091399375533e-05, "loss": 0.02645450532436371, "step": 78030 }, { "epoch": 22.151575361907465, "grad_norm": 3.4022223949432373, "learning_rate": 7.785807550383196e-05, "loss": 0.008299890160560607, "step": 78040 }, { "epoch": 22.154413851830824, "grad_norm": 0.15186583995819092, "learning_rate": 7.78552370139086e-05, "loss": 0.01200697422027588, "step": 78050 }, { "epoch": 22.157252341754187, "grad_norm": 9.307485580444336, "learning_rate": 7.785239852398524e-05, "loss": 0.013498611748218536, "step": 78060 }, { "epoch": 22.160090831677547, "grad_norm": 0.6698565483093262, "learning_rate": 7.784956003406188e-05, "loss": 0.005210898816585541, "step": 78070 }, { "epoch": 22.16292932160091, "grad_norm": 3.2632052898406982, "learning_rate": 7.784672154413852e-05, "loss": 0.011483563482761383, "step": 78080 }, { "epoch": 22.16576781152427, "grad_norm": 0.13775205612182617, "learning_rate": 7.784388305421517e-05, "loss": 0.0038060333579778673, "step": 78090 }, { "epoch": 22.16860630144763, "grad_norm": 2.7806217670440674, "learning_rate": 7.78410445642918e-05, "loss": 0.003142637759447098, "step": 78100 }, { "epoch": 22.17144479137099, "grad_norm": 1.9912196397781372, "learning_rate": 7.783820607436843e-05, "loss": 0.00872998908162117, "step": 78110 }, { "epoch": 22.17428328129435, "grad_norm": 0.3750802278518677, "learning_rate": 7.783536758444508e-05, "loss": 0.012335088849067689, "step": 78120 }, { "epoch": 22.177121771217713, "grad_norm": 0.8216896653175354, "learning_rate": 7.783252909452172e-05, "loss": 0.007229675352573395, "step": 78130 }, { "epoch": 22.179960261141073, "grad_norm": 0.18844787776470184, "learning_rate": 7.782969060459836e-05, "loss": 0.002840219810605049, "step": 78140 }, { "epoch": 22.182798751064432, "grad_norm": 0.7840450406074524, "learning_rate": 7.7826852114675e-05, "loss": 0.004582007229328155, "step": 78150 }, { "epoch": 22.185637240987795, "grad_norm": 13.046009063720703, "learning_rate": 7.782401362475163e-05, "loss": 0.026206472516059877, "step": 78160 }, { "epoch": 22.188475730911154, "grad_norm": 1.3467410802841187, "learning_rate": 7.782117513482827e-05, "loss": 0.004358679801225662, "step": 78170 }, { "epoch": 22.191314220834517, "grad_norm": 0.3109036684036255, "learning_rate": 7.781833664490491e-05, "loss": 0.0105992890894413, "step": 78180 }, { "epoch": 22.194152710757876, "grad_norm": 0.5197497606277466, "learning_rate": 7.781549815498155e-05, "loss": 0.0070513151586055756, "step": 78190 }, { "epoch": 22.19699120068124, "grad_norm": 0.39078763127326965, "learning_rate": 7.78126596650582e-05, "loss": 0.006872041523456574, "step": 78200 }, { "epoch": 22.1998296906046, "grad_norm": 0.16057530045509338, "learning_rate": 7.780982117513484e-05, "loss": 0.013001783192157746, "step": 78210 }, { "epoch": 22.202668180527958, "grad_norm": 14.848234176635742, "learning_rate": 7.780698268521148e-05, "loss": 0.018975910544395447, "step": 78220 }, { "epoch": 22.20550667045132, "grad_norm": 4.843760013580322, "learning_rate": 7.78041441952881e-05, "loss": 0.02856616973876953, "step": 78230 }, { "epoch": 22.20834516037468, "grad_norm": 3.108414649963379, "learning_rate": 7.780130570536475e-05, "loss": 0.008760303258895874, "step": 78240 }, { "epoch": 22.211183650298043, "grad_norm": 1.7247322797775269, "learning_rate": 7.779846721544139e-05, "loss": 0.0033042073249816895, "step": 78250 }, { "epoch": 22.214022140221402, "grad_norm": 1.9464201927185059, "learning_rate": 7.779562872551803e-05, "loss": 0.009155268222093583, "step": 78260 }, { "epoch": 22.21686063014476, "grad_norm": 2.5281872749328613, "learning_rate": 7.779279023559467e-05, "loss": 0.010307162255048751, "step": 78270 }, { "epoch": 22.219699120068125, "grad_norm": 0.6465842127799988, "learning_rate": 7.778995174567131e-05, "loss": 0.0014159763231873512, "step": 78280 }, { "epoch": 22.222537609991484, "grad_norm": 1.793881893157959, "learning_rate": 7.778711325574794e-05, "loss": 0.003528258949518204, "step": 78290 }, { "epoch": 22.225376099914847, "grad_norm": 10.99521541595459, "learning_rate": 7.778427476582458e-05, "loss": 0.011187750101089477, "step": 78300 }, { "epoch": 22.228214589838206, "grad_norm": 0.6423080563545227, "learning_rate": 7.778143627590122e-05, "loss": 0.007745723426342011, "step": 78310 }, { "epoch": 22.231053079761566, "grad_norm": 4.23691463470459, "learning_rate": 7.777859778597786e-05, "loss": 0.01753370016813278, "step": 78320 }, { "epoch": 22.23389156968493, "grad_norm": 0.3115551173686981, "learning_rate": 7.77757592960545e-05, "loss": 0.009907785058021545, "step": 78330 }, { "epoch": 22.236730059608288, "grad_norm": 8.04884147644043, "learning_rate": 7.777292080613115e-05, "loss": 0.0066960200667381285, "step": 78340 }, { "epoch": 22.23956854953165, "grad_norm": 11.745262145996094, "learning_rate": 7.777008231620779e-05, "loss": 0.016690939664840698, "step": 78350 }, { "epoch": 22.24240703945501, "grad_norm": 1.52156662940979, "learning_rate": 7.776724382628442e-05, "loss": 0.018538717925548554, "step": 78360 }, { "epoch": 22.24524552937837, "grad_norm": 9.211953163146973, "learning_rate": 7.776440533636106e-05, "loss": 0.014981551468372345, "step": 78370 }, { "epoch": 22.248084019301732, "grad_norm": 6.561932563781738, "learning_rate": 7.77615668464377e-05, "loss": 0.037529265880584715, "step": 78380 }, { "epoch": 22.25092250922509, "grad_norm": 0.07710597664117813, "learning_rate": 7.775872835651434e-05, "loss": 0.022948597371578217, "step": 78390 }, { "epoch": 22.253760999148454, "grad_norm": 0.12797318398952484, "learning_rate": 7.775588986659098e-05, "loss": 0.0081459641456604, "step": 78400 }, { "epoch": 22.256599489071814, "grad_norm": 0.5739738941192627, "learning_rate": 7.775305137666762e-05, "loss": 0.009614607691764832, "step": 78410 }, { "epoch": 22.259437978995173, "grad_norm": 0.07147172838449478, "learning_rate": 7.775021288674425e-05, "loss": 0.007371384650468826, "step": 78420 }, { "epoch": 22.262276468918536, "grad_norm": 0.620552122592926, "learning_rate": 7.774737439682089e-05, "loss": 0.009639114141464233, "step": 78430 }, { "epoch": 22.265114958841895, "grad_norm": 0.6758928894996643, "learning_rate": 7.774453590689753e-05, "loss": 0.012700499594211578, "step": 78440 }, { "epoch": 22.267953448765258, "grad_norm": 0.117019422352314, "learning_rate": 7.774169741697417e-05, "loss": 0.018893077969551086, "step": 78450 }, { "epoch": 22.270791938688618, "grad_norm": 3.0981619358062744, "learning_rate": 7.773885892705082e-05, "loss": 0.007216107845306396, "step": 78460 }, { "epoch": 22.273630428611977, "grad_norm": 0.13910093903541565, "learning_rate": 7.773602043712746e-05, "loss": 0.0040295179933309555, "step": 78470 }, { "epoch": 22.27646891853534, "grad_norm": 3.0923655033111572, "learning_rate": 7.77331819472041e-05, "loss": 0.004825994372367859, "step": 78480 }, { "epoch": 22.2793074084587, "grad_norm": 4.314969539642334, "learning_rate": 7.773034345728073e-05, "loss": 0.015155453979969025, "step": 78490 }, { "epoch": 22.282145898382062, "grad_norm": 13.063375473022461, "learning_rate": 7.772750496735737e-05, "loss": 0.020191851258277892, "step": 78500 }, { "epoch": 22.282145898382062, "eval_accuracy": 0.9759013162077955, "eval_loss": 0.07833293080329895, "eval_runtime": 34.0132, "eval_samples_per_second": 462.379, "eval_steps_per_second": 7.232, "step": 78500 }, { "epoch": 22.28498438830542, "grad_norm": 1.5211994647979736, "learning_rate": 7.772466647743401e-05, "loss": 0.002881387434899807, "step": 78510 }, { "epoch": 22.28782287822878, "grad_norm": 0.3373485505580902, "learning_rate": 7.772182798751064e-05, "loss": 0.008139905333518983, "step": 78520 }, { "epoch": 22.290661368152143, "grad_norm": 0.0358709990978241, "learning_rate": 7.771898949758729e-05, "loss": 0.011457935720682145, "step": 78530 }, { "epoch": 22.293499858075503, "grad_norm": 8.895052909851074, "learning_rate": 7.771615100766393e-05, "loss": 0.0058646082878112795, "step": 78540 }, { "epoch": 22.296338347998866, "grad_norm": 0.28080394864082336, "learning_rate": 7.771331251774056e-05, "loss": 0.009014776349067688, "step": 78550 }, { "epoch": 22.299176837922225, "grad_norm": 0.49101755023002625, "learning_rate": 7.77104740278172e-05, "loss": 0.020105372369289397, "step": 78560 }, { "epoch": 22.302015327845588, "grad_norm": 0.41965022683143616, "learning_rate": 7.770763553789384e-05, "loss": 0.006330445408821106, "step": 78570 }, { "epoch": 22.304853817768947, "grad_norm": 12.430011749267578, "learning_rate": 7.770479704797048e-05, "loss": 0.016957478225231172, "step": 78580 }, { "epoch": 22.307692307692307, "grad_norm": 0.2376209795475006, "learning_rate": 7.770195855804713e-05, "loss": 0.01241535171866417, "step": 78590 }, { "epoch": 22.31053079761567, "grad_norm": 1.798538327217102, "learning_rate": 7.769912006812377e-05, "loss": 0.016513723134994506, "step": 78600 }, { "epoch": 22.31336928753903, "grad_norm": 0.15012899041175842, "learning_rate": 7.769628157820041e-05, "loss": 0.005339158326387405, "step": 78610 }, { "epoch": 22.31620777746239, "grad_norm": 0.6968691945075989, "learning_rate": 7.769344308827704e-05, "loss": 0.012961794435977936, "step": 78620 }, { "epoch": 22.31904626738575, "grad_norm": 0.6125990152359009, "learning_rate": 7.769060459835368e-05, "loss": 0.003985529020428658, "step": 78630 }, { "epoch": 22.32188475730911, "grad_norm": 0.168002188205719, "learning_rate": 7.768776610843032e-05, "loss": 0.004057208821177482, "step": 78640 }, { "epoch": 22.324723247232473, "grad_norm": 1.6701442003250122, "learning_rate": 7.768492761850695e-05, "loss": 0.013524794578552246, "step": 78650 }, { "epoch": 22.327561737155833, "grad_norm": 4.636582374572754, "learning_rate": 7.76820891285836e-05, "loss": 0.007733342796564102, "step": 78660 }, { "epoch": 22.330400227079195, "grad_norm": 5.738679885864258, "learning_rate": 7.767925063866024e-05, "loss": 0.020876120030879974, "step": 78670 }, { "epoch": 22.333238717002555, "grad_norm": 1.6890742778778076, "learning_rate": 7.767641214873687e-05, "loss": 0.01171334981918335, "step": 78680 }, { "epoch": 22.336077206925914, "grad_norm": 0.7850841879844666, "learning_rate": 7.767357365881351e-05, "loss": 0.025643178820610048, "step": 78690 }, { "epoch": 22.338915696849277, "grad_norm": 4.371233940124512, "learning_rate": 7.767073516889015e-05, "loss": 0.009742523729801177, "step": 78700 }, { "epoch": 22.341754186772636, "grad_norm": 10.401566505432129, "learning_rate": 7.76678966789668e-05, "loss": 0.013111153244972229, "step": 78710 }, { "epoch": 22.344592676696, "grad_norm": 9.20460033416748, "learning_rate": 7.766505818904344e-05, "loss": 0.020062635838985442, "step": 78720 }, { "epoch": 22.34743116661936, "grad_norm": 1.4913864135742188, "learning_rate": 7.766221969912008e-05, "loss": 0.009672766178846359, "step": 78730 }, { "epoch": 22.350269656542718, "grad_norm": 3.2296993732452393, "learning_rate": 7.765938120919672e-05, "loss": 0.009793665260076523, "step": 78740 }, { "epoch": 22.35310814646608, "grad_norm": 1.3499083518981934, "learning_rate": 7.765654271927335e-05, "loss": 0.016467033326625823, "step": 78750 }, { "epoch": 22.35594663638944, "grad_norm": 0.30259424448013306, "learning_rate": 7.765370422934999e-05, "loss": 0.005251864716410637, "step": 78760 }, { "epoch": 22.358785126312803, "grad_norm": 1.2634217739105225, "learning_rate": 7.765086573942663e-05, "loss": 0.010318867117166518, "step": 78770 }, { "epoch": 22.361623616236162, "grad_norm": 3.469515323638916, "learning_rate": 7.764802724950326e-05, "loss": 0.00502680242061615, "step": 78780 }, { "epoch": 22.36446210615952, "grad_norm": 6.936007022857666, "learning_rate": 7.764518875957991e-05, "loss": 0.008236106485128403, "step": 78790 }, { "epoch": 22.367300596082885, "grad_norm": 2.635146379470825, "learning_rate": 7.764235026965655e-05, "loss": 0.013207918405532837, "step": 78800 }, { "epoch": 22.370139086006244, "grad_norm": 0.4503542184829712, "learning_rate": 7.763951177973318e-05, "loss": 0.0023884139955043793, "step": 78810 }, { "epoch": 22.372977575929607, "grad_norm": 9.006097793579102, "learning_rate": 7.763667328980982e-05, "loss": 0.007156608253717422, "step": 78820 }, { "epoch": 22.375816065852966, "grad_norm": 4.122788429260254, "learning_rate": 7.763383479988646e-05, "loss": 0.008919757604599, "step": 78830 }, { "epoch": 22.378654555776325, "grad_norm": 2.0654680728912354, "learning_rate": 7.76309963099631e-05, "loss": 0.005712532252073288, "step": 78840 }, { "epoch": 22.38149304569969, "grad_norm": 3.7832093238830566, "learning_rate": 7.762815782003973e-05, "loss": 0.005411183834075928, "step": 78850 }, { "epoch": 22.384331535623048, "grad_norm": 2.1124603748321533, "learning_rate": 7.762531933011639e-05, "loss": 0.0031862057745456695, "step": 78860 }, { "epoch": 22.38717002554641, "grad_norm": 0.3779962658882141, "learning_rate": 7.762248084019303e-05, "loss": 0.0069703437387943264, "step": 78870 }, { "epoch": 22.39000851546977, "grad_norm": 4.245120525360107, "learning_rate": 7.761964235026966e-05, "loss": 0.020406708121299744, "step": 78880 }, { "epoch": 22.39284700539313, "grad_norm": 0.4387591481208801, "learning_rate": 7.76168038603463e-05, "loss": 0.004083443433046341, "step": 78890 }, { "epoch": 22.395685495316492, "grad_norm": 0.17759831249713898, "learning_rate": 7.761396537042294e-05, "loss": 0.015512402355670928, "step": 78900 }, { "epoch": 22.39852398523985, "grad_norm": 6.041721343994141, "learning_rate": 7.761112688049957e-05, "loss": 0.028350430727005004, "step": 78910 }, { "epoch": 22.401362475163214, "grad_norm": 7.026527404785156, "learning_rate": 7.760828839057622e-05, "loss": 0.0108842633664608, "step": 78920 }, { "epoch": 22.404200965086574, "grad_norm": 0.7132558822631836, "learning_rate": 7.760544990065287e-05, "loss": 0.005656861513853073, "step": 78930 }, { "epoch": 22.407039455009933, "grad_norm": 0.307973176240921, "learning_rate": 7.760261141072949e-05, "loss": 0.009238821268081666, "step": 78940 }, { "epoch": 22.409877944933296, "grad_norm": 1.8904776573181152, "learning_rate": 7.759977292080613e-05, "loss": 0.011265485733747482, "step": 78950 }, { "epoch": 22.412716434856655, "grad_norm": 3.918023109436035, "learning_rate": 7.759693443088278e-05, "loss": 0.008855794370174409, "step": 78960 }, { "epoch": 22.415554924780018, "grad_norm": 0.2955299913883209, "learning_rate": 7.759409594095942e-05, "loss": 0.0042527526617050174, "step": 78970 }, { "epoch": 22.418393414703377, "grad_norm": 2.0486485958099365, "learning_rate": 7.759125745103604e-05, "loss": 0.008816121518611908, "step": 78980 }, { "epoch": 22.42123190462674, "grad_norm": 1.0607678890228271, "learning_rate": 7.75884189611127e-05, "loss": 0.008166714757680892, "step": 78990 }, { "epoch": 22.4240703945501, "grad_norm": 15.318842887878418, "learning_rate": 7.758558047118933e-05, "loss": 0.019472017884254456, "step": 79000 }, { "epoch": 22.4240703945501, "eval_accuracy": 0.9755833916195078, "eval_loss": 0.08629642426967621, "eval_runtime": 35.41, "eval_samples_per_second": 444.14, "eval_steps_per_second": 6.947, "step": 79000 }, { "epoch": 22.42690888447346, "grad_norm": 12.262932777404785, "learning_rate": 7.758274198126597e-05, "loss": 0.017708252370357513, "step": 79010 }, { "epoch": 22.429747374396822, "grad_norm": 11.638760566711426, "learning_rate": 7.757990349134261e-05, "loss": 0.010571684688329697, "step": 79020 }, { "epoch": 22.43258586432018, "grad_norm": 0.15298330783843994, "learning_rate": 7.757706500141925e-05, "loss": 0.0038291729986667635, "step": 79030 }, { "epoch": 22.435424354243544, "grad_norm": 1.2631851434707642, "learning_rate": 7.757451036048823e-05, "loss": 0.01988000124692917, "step": 79040 }, { "epoch": 22.438262844166903, "grad_norm": 3.0214877128601074, "learning_rate": 7.757167187056486e-05, "loss": 0.004475393146276474, "step": 79050 }, { "epoch": 22.441101334090263, "grad_norm": 6.598615646362305, "learning_rate": 7.75688333806415e-05, "loss": 0.01613222360610962, "step": 79060 }, { "epoch": 22.443939824013626, "grad_norm": 0.48028361797332764, "learning_rate": 7.756599489071814e-05, "loss": 0.009009097516536713, "step": 79070 }, { "epoch": 22.446778313936985, "grad_norm": 5.430286884307861, "learning_rate": 7.756315640079478e-05, "loss": 0.04605386555194855, "step": 79080 }, { "epoch": 22.449616803860348, "grad_norm": 14.352376937866211, "learning_rate": 7.756031791087141e-05, "loss": 0.019012558460235595, "step": 79090 }, { "epoch": 22.452455293783707, "grad_norm": 0.06154656037688255, "learning_rate": 7.755747942094807e-05, "loss": 0.002349245920777321, "step": 79100 }, { "epoch": 22.455293783707067, "grad_norm": 0.8340765833854675, "learning_rate": 7.755464093102471e-05, "loss": 0.008090756088495254, "step": 79110 }, { "epoch": 22.45813227363043, "grad_norm": 8.118119239807129, "learning_rate": 7.755180244110134e-05, "loss": 0.008427225053310394, "step": 79120 }, { "epoch": 22.46097076355379, "grad_norm": 3.1391260623931885, "learning_rate": 7.754896395117798e-05, "loss": 0.010734327882528306, "step": 79130 }, { "epoch": 22.46380925347715, "grad_norm": 3.668593406677246, "learning_rate": 7.754612546125462e-05, "loss": 0.0047319836914539335, "step": 79140 }, { "epoch": 22.46664774340051, "grad_norm": 0.024234844371676445, "learning_rate": 7.754328697133125e-05, "loss": 0.007279926538467407, "step": 79150 }, { "epoch": 22.46948623332387, "grad_norm": 0.7840533256530762, "learning_rate": 7.754044848140789e-05, "loss": 0.00638076588511467, "step": 79160 }, { "epoch": 22.472324723247233, "grad_norm": 1.448838710784912, "learning_rate": 7.753760999148454e-05, "loss": 0.020050182938575745, "step": 79170 }, { "epoch": 22.475163213170593, "grad_norm": 1.0937997102737427, "learning_rate": 7.753477150156117e-05, "loss": 0.0037620030343532562, "step": 79180 }, { "epoch": 22.478001703093955, "grad_norm": 1.9206480979919434, "learning_rate": 7.753193301163781e-05, "loss": 0.012116539478302001, "step": 79190 }, { "epoch": 22.480840193017315, "grad_norm": 2.2560641765594482, "learning_rate": 7.752909452171445e-05, "loss": 0.014464446902275085, "step": 79200 }, { "epoch": 22.483678682940674, "grad_norm": 2.8898134231567383, "learning_rate": 7.75262560317911e-05, "loss": 0.017966724932193756, "step": 79210 }, { "epoch": 22.486517172864037, "grad_norm": 1.342119574546814, "learning_rate": 7.752341754186772e-05, "loss": 0.013817609846591949, "step": 79220 }, { "epoch": 22.489355662787396, "grad_norm": 8.695272445678711, "learning_rate": 7.752057905194436e-05, "loss": 0.017691722512245177, "step": 79230 }, { "epoch": 22.49219415271076, "grad_norm": 0.4410251975059509, "learning_rate": 7.751774056202102e-05, "loss": 0.002933601476252079, "step": 79240 }, { "epoch": 22.49503264263412, "grad_norm": 2.731127977371216, "learning_rate": 7.751490207209765e-05, "loss": 0.003912259638309479, "step": 79250 }, { "epoch": 22.497871132557478, "grad_norm": 1.2270982265472412, "learning_rate": 7.751206358217429e-05, "loss": 0.008627659827470779, "step": 79260 }, { "epoch": 22.50070962248084, "grad_norm": 0.17316944897174835, "learning_rate": 7.750922509225093e-05, "loss": 0.009258954226970673, "step": 79270 }, { "epoch": 22.5035481124042, "grad_norm": 14.260405540466309, "learning_rate": 7.750638660232756e-05, "loss": 0.009277745336294174, "step": 79280 }, { "epoch": 22.506386602327563, "grad_norm": 6.405237197875977, "learning_rate": 7.75035481124042e-05, "loss": 0.010582424700260162, "step": 79290 }, { "epoch": 22.509225092250922, "grad_norm": 4.978114128112793, "learning_rate": 7.750070962248085e-05, "loss": 0.007594918459653854, "step": 79300 }, { "epoch": 22.51206358217428, "grad_norm": 0.43827736377716064, "learning_rate": 7.749787113255748e-05, "loss": 0.008570787310600281, "step": 79310 }, { "epoch": 22.514902072097644, "grad_norm": 0.06372730433940887, "learning_rate": 7.749503264263412e-05, "loss": 0.007185947149991989, "step": 79320 }, { "epoch": 22.517740562021004, "grad_norm": 1.9358264207839966, "learning_rate": 7.749219415271076e-05, "loss": 0.01828165352344513, "step": 79330 }, { "epoch": 22.520579051944367, "grad_norm": 2.2444138526916504, "learning_rate": 7.74893556627874e-05, "loss": 0.004466582462191582, "step": 79340 }, { "epoch": 22.523417541867726, "grad_norm": 0.6957405805587769, "learning_rate": 7.748651717286403e-05, "loss": 0.009484469145536422, "step": 79350 }, { "epoch": 22.526256031791085, "grad_norm": 1.9727164506912231, "learning_rate": 7.748367868294067e-05, "loss": 0.004942643642425537, "step": 79360 }, { "epoch": 22.52909452171445, "grad_norm": 3.293560028076172, "learning_rate": 7.748084019301733e-05, "loss": 0.03488859236240387, "step": 79370 }, { "epoch": 22.531933011637808, "grad_norm": 0.36543524265289307, "learning_rate": 7.747800170309396e-05, "loss": 0.020212624967098237, "step": 79380 }, { "epoch": 22.53477150156117, "grad_norm": 2.0747416019439697, "learning_rate": 7.74751632131706e-05, "loss": 0.0037982787936925886, "step": 79390 }, { "epoch": 22.53760999148453, "grad_norm": 6.281290054321289, "learning_rate": 7.747232472324724e-05, "loss": 0.004734662920236587, "step": 79400 }, { "epoch": 22.540448481407893, "grad_norm": 4.397636890411377, "learning_rate": 7.746948623332387e-05, "loss": 0.013562235236167907, "step": 79410 }, { "epoch": 22.543286971331252, "grad_norm": 2.3642446994781494, "learning_rate": 7.746664774340051e-05, "loss": 0.011409103125333785, "step": 79420 }, { "epoch": 22.54612546125461, "grad_norm": 0.7623924016952515, "learning_rate": 7.746380925347715e-05, "loss": 0.009672952443361282, "step": 79430 }, { "epoch": 22.548963951177974, "grad_norm": 3.209023952484131, "learning_rate": 7.746097076355379e-05, "loss": 0.007690660655498505, "step": 79440 }, { "epoch": 22.551802441101334, "grad_norm": 0.3065905272960663, "learning_rate": 7.745813227363043e-05, "loss": 0.01803574711084366, "step": 79450 }, { "epoch": 22.554640931024696, "grad_norm": 5.7436699867248535, "learning_rate": 7.745529378370707e-05, "loss": 0.009968225657939912, "step": 79460 }, { "epoch": 22.557479420948056, "grad_norm": 0.24719908833503723, "learning_rate": 7.745245529378372e-05, "loss": 0.006529628485441208, "step": 79470 }, { "epoch": 22.560317910871415, "grad_norm": 0.14368918538093567, "learning_rate": 7.744961680386034e-05, "loss": 0.0015956073999404906, "step": 79480 }, { "epoch": 22.563156400794778, "grad_norm": 0.23443642258644104, "learning_rate": 7.744677831393699e-05, "loss": 0.016875922679901123, "step": 79490 }, { "epoch": 22.565994890718137, "grad_norm": 4.345728397369385, "learning_rate": 7.744393982401364e-05, "loss": 0.009955111891031265, "step": 79500 }, { "epoch": 22.565994890718137, "eval_accuracy": 0.9768550899726585, "eval_loss": 0.08239362388849258, "eval_runtime": 39.1865, "eval_samples_per_second": 401.337, "eval_steps_per_second": 6.278, "step": 79500 }, { "epoch": 22.5688333806415, "grad_norm": 0.23899933695793152, "learning_rate": 7.744110133409027e-05, "loss": 0.003941354900598526, "step": 79510 }, { "epoch": 22.57167187056486, "grad_norm": 0.2564370632171631, "learning_rate": 7.743826284416691e-05, "loss": 0.00630846917629242, "step": 79520 }, { "epoch": 22.57451036048822, "grad_norm": 6.1483354568481445, "learning_rate": 7.743542435424355e-05, "loss": 0.009297397732734681, "step": 79530 }, { "epoch": 22.577348850411582, "grad_norm": 0.006377923768013716, "learning_rate": 7.743258586432018e-05, "loss": 0.011376951634883881, "step": 79540 }, { "epoch": 22.58018734033494, "grad_norm": 6.268465042114258, "learning_rate": 7.742974737439682e-05, "loss": 0.020463232696056367, "step": 79550 }, { "epoch": 22.583025830258304, "grad_norm": 1.619179129600525, "learning_rate": 7.742690888447346e-05, "loss": 0.006083917990326882, "step": 79560 }, { "epoch": 22.585864320181663, "grad_norm": 0.09844130277633667, "learning_rate": 7.74240703945501e-05, "loss": 0.003566272556781769, "step": 79570 }, { "epoch": 22.588702810105023, "grad_norm": 5.898601055145264, "learning_rate": 7.742123190462674e-05, "loss": 0.003236593306064606, "step": 79580 }, { "epoch": 22.591541300028386, "grad_norm": 0.2833153307437897, "learning_rate": 7.741839341470339e-05, "loss": 0.0099777452647686, "step": 79590 }, { "epoch": 22.594379789951745, "grad_norm": 0.7480348348617554, "learning_rate": 7.741555492478003e-05, "loss": 0.006408081948757171, "step": 79600 }, { "epoch": 22.597218279875108, "grad_norm": 0.08068668097257614, "learning_rate": 7.741271643485665e-05, "loss": 0.004003764688968658, "step": 79610 }, { "epoch": 22.600056769798467, "grad_norm": 11.714157104492188, "learning_rate": 7.74098779449333e-05, "loss": 0.014060266315937042, "step": 79620 }, { "epoch": 22.602895259721826, "grad_norm": 6.70611047744751, "learning_rate": 7.740703945500994e-05, "loss": 0.005483989790081978, "step": 79630 }, { "epoch": 22.60573374964519, "grad_norm": 1.971276044845581, "learning_rate": 7.740420096508658e-05, "loss": 0.005359835177659989, "step": 79640 }, { "epoch": 22.60857223956855, "grad_norm": 5.308992385864258, "learning_rate": 7.740136247516322e-05, "loss": 0.021819914877414703, "step": 79650 }, { "epoch": 22.61141072949191, "grad_norm": 0.3479658365249634, "learning_rate": 7.739852398523986e-05, "loss": 0.014000149071216583, "step": 79660 }, { "epoch": 22.61424921941527, "grad_norm": 0.06540220975875854, "learning_rate": 7.739568549531649e-05, "loss": 0.012312840670347214, "step": 79670 }, { "epoch": 22.61708770933863, "grad_norm": 0.8271252512931824, "learning_rate": 7.739284700539313e-05, "loss": 0.01974947303533554, "step": 79680 }, { "epoch": 22.619926199261993, "grad_norm": 5.209263324737549, "learning_rate": 7.739000851546977e-05, "loss": 0.009722544997930526, "step": 79690 }, { "epoch": 22.622764689185352, "grad_norm": 4.325723171234131, "learning_rate": 7.738717002554641e-05, "loss": 0.0054989088326692585, "step": 79700 }, { "epoch": 22.625603179108715, "grad_norm": 0.34446606040000916, "learning_rate": 7.738433153562305e-05, "loss": 0.011004829406738281, "step": 79710 }, { "epoch": 22.628441669032075, "grad_norm": 0.09298574924468994, "learning_rate": 7.73814930456997e-05, "loss": 0.012293099611997604, "step": 79720 }, { "epoch": 22.631280158955434, "grad_norm": 3.446995258331299, "learning_rate": 7.737865455577634e-05, "loss": 0.007939675450325012, "step": 79730 }, { "epoch": 22.634118648878797, "grad_norm": 5.422833442687988, "learning_rate": 7.737581606585297e-05, "loss": 0.022127528488636018, "step": 79740 }, { "epoch": 22.636957138802156, "grad_norm": 1.9281678199768066, "learning_rate": 7.73729775759296e-05, "loss": 0.008284381031990052, "step": 79750 }, { "epoch": 22.63979562872552, "grad_norm": 0.6741786599159241, "learning_rate": 7.737013908600625e-05, "loss": 0.004213647544384002, "step": 79760 }, { "epoch": 22.64263411864888, "grad_norm": 1.4033104181289673, "learning_rate": 7.736730059608289e-05, "loss": 0.012106390297412872, "step": 79770 }, { "epoch": 22.64547260857224, "grad_norm": 6.11046028137207, "learning_rate": 7.736446210615953e-05, "loss": 0.015587593615055084, "step": 79780 }, { "epoch": 22.6483110984956, "grad_norm": 2.0148401260375977, "learning_rate": 7.736162361623617e-05, "loss": 0.00584096610546112, "step": 79790 }, { "epoch": 22.65114958841896, "grad_norm": 12.005495071411133, "learning_rate": 7.73587851263128e-05, "loss": 0.025040611624717712, "step": 79800 }, { "epoch": 22.653988078342323, "grad_norm": 0.658894419670105, "learning_rate": 7.735594663638944e-05, "loss": 0.02042766660451889, "step": 79810 }, { "epoch": 22.656826568265682, "grad_norm": 1.1792173385620117, "learning_rate": 7.735310814646608e-05, "loss": 0.004305172711610794, "step": 79820 }, { "epoch": 22.659665058189045, "grad_norm": 2.8419320583343506, "learning_rate": 7.735026965654272e-05, "loss": 0.01157229244709015, "step": 79830 }, { "epoch": 22.662503548112404, "grad_norm": 0.3166353404521942, "learning_rate": 7.734743116661937e-05, "loss": 0.011971043050289154, "step": 79840 }, { "epoch": 22.665342038035764, "grad_norm": 1.9768791198730469, "learning_rate": 7.734459267669601e-05, "loss": 0.023450547456741334, "step": 79850 }, { "epoch": 22.668180527959127, "grad_norm": 0.219072625041008, "learning_rate": 7.734175418677263e-05, "loss": 0.013892801105976104, "step": 79860 }, { "epoch": 22.671019017882486, "grad_norm": 1.7263541221618652, "learning_rate": 7.733891569684928e-05, "loss": 0.010886938869953155, "step": 79870 }, { "epoch": 22.67385750780585, "grad_norm": 6.85352087020874, "learning_rate": 7.733607720692592e-05, "loss": 0.021765464544296266, "step": 79880 }, { "epoch": 22.676695997729208, "grad_norm": 9.08678913116455, "learning_rate": 7.733323871700256e-05, "loss": 0.030223235487937927, "step": 79890 }, { "epoch": 22.679534487652568, "grad_norm": 1.5890876054763794, "learning_rate": 7.73304002270792e-05, "loss": 0.024520748853683473, "step": 79900 }, { "epoch": 22.68237297757593, "grad_norm": 0.3226659893989563, "learning_rate": 7.732756173715584e-05, "loss": 0.01176881194114685, "step": 79910 }, { "epoch": 22.68521146749929, "grad_norm": 0.2847535312175751, "learning_rate": 7.732472324723248e-05, "loss": 0.01495707333087921, "step": 79920 }, { "epoch": 22.688049957422653, "grad_norm": 0.5597468018531799, "learning_rate": 7.732188475730911e-05, "loss": 0.005822597444057465, "step": 79930 }, { "epoch": 22.690888447346012, "grad_norm": 4.061117172241211, "learning_rate": 7.731904626738575e-05, "loss": 0.005364912748336792, "step": 79940 }, { "epoch": 22.69372693726937, "grad_norm": 0.9473974108695984, "learning_rate": 7.73162077774624e-05, "loss": 0.003888687491416931, "step": 79950 }, { "epoch": 22.696565427192734, "grad_norm": 2.335792064666748, "learning_rate": 7.731336928753903e-05, "loss": 0.0033767517656087876, "step": 79960 }, { "epoch": 22.699403917116094, "grad_norm": 0.8960579037666321, "learning_rate": 7.731053079761568e-05, "loss": 0.010854437947273254, "step": 79970 }, { "epoch": 22.702242407039456, "grad_norm": 0.7199541330337524, "learning_rate": 7.730769230769232e-05, "loss": 0.00851796418428421, "step": 79980 }, { "epoch": 22.705080896962816, "grad_norm": 9.359832763671875, "learning_rate": 7.730485381776895e-05, "loss": 0.014589454233646392, "step": 79990 }, { "epoch": 22.707919386886175, "grad_norm": 0.5293030142784119, "learning_rate": 7.730201532784559e-05, "loss": 0.02011541575193405, "step": 80000 }, { "epoch": 22.707919386886175, "eval_accuracy": 0.9795256565142748, "eval_loss": 0.07504372298717499, "eval_runtime": 37.6317, "eval_samples_per_second": 417.919, "eval_steps_per_second": 6.537, "step": 80000 }, { "epoch": 22.710757876809538, "grad_norm": 1.9100292921066284, "learning_rate": 7.729917683792223e-05, "loss": 0.0040435843169689175, "step": 80010 }, { "epoch": 22.713596366732897, "grad_norm": 7.016022682189941, "learning_rate": 7.729633834799887e-05, "loss": 0.007350543141365051, "step": 80020 }, { "epoch": 22.71643485665626, "grad_norm": 1.2258089780807495, "learning_rate": 7.72934998580755e-05, "loss": 0.006944693624973297, "step": 80030 }, { "epoch": 22.71927334657962, "grad_norm": 0.24977675080299377, "learning_rate": 7.729066136815215e-05, "loss": 0.007485708594322205, "step": 80040 }, { "epoch": 22.72211183650298, "grad_norm": 2.085789442062378, "learning_rate": 7.72878228782288e-05, "loss": 0.0014101464301347732, "step": 80050 }, { "epoch": 22.72495032642634, "grad_norm": 7.444011688232422, "learning_rate": 7.728498438830542e-05, "loss": 0.008905861526727676, "step": 80060 }, { "epoch": 22.7277888163497, "grad_norm": 7.076019763946533, "learning_rate": 7.728214589838206e-05, "loss": 0.013775551319122314, "step": 80070 }, { "epoch": 22.730627306273064, "grad_norm": 2.248584508895874, "learning_rate": 7.72793074084587e-05, "loss": 0.013943053781986237, "step": 80080 }, { "epoch": 22.733465796196423, "grad_norm": 0.3748319149017334, "learning_rate": 7.727646891853533e-05, "loss": 0.011280252784490585, "step": 80090 }, { "epoch": 22.736304286119783, "grad_norm": 4.45000696182251, "learning_rate": 7.727363042861199e-05, "loss": 0.019295661151409148, "step": 80100 }, { "epoch": 22.739142776043145, "grad_norm": 0.11562126129865646, "learning_rate": 7.727079193868863e-05, "loss": 0.009846065938472748, "step": 80110 }, { "epoch": 22.741981265966505, "grad_norm": 0.787649393081665, "learning_rate": 7.726795344876526e-05, "loss": 0.004059987887740135, "step": 80120 }, { "epoch": 22.744819755889868, "grad_norm": 0.15042386949062347, "learning_rate": 7.72651149588419e-05, "loss": 0.015277796983718872, "step": 80130 }, { "epoch": 22.747658245813227, "grad_norm": 0.9993942379951477, "learning_rate": 7.726227646891854e-05, "loss": 0.005593614652752876, "step": 80140 }, { "epoch": 22.75049673573659, "grad_norm": 2.2794079780578613, "learning_rate": 7.725943797899518e-05, "loss": 0.003110416606068611, "step": 80150 }, { "epoch": 22.75333522565995, "grad_norm": 6.9930949211120605, "learning_rate": 7.725659948907181e-05, "loss": 0.014611572027206421, "step": 80160 }, { "epoch": 22.75617371558331, "grad_norm": 9.466537475585938, "learning_rate": 7.725376099914846e-05, "loss": 0.0119357168674469, "step": 80170 }, { "epoch": 22.75901220550667, "grad_norm": 0.24673154950141907, "learning_rate": 7.72509225092251e-05, "loss": 0.018336501717567445, "step": 80180 }, { "epoch": 22.76185069543003, "grad_norm": 2.2826266288757324, "learning_rate": 7.724808401930173e-05, "loss": 0.0034449972212314607, "step": 80190 }, { "epoch": 22.764689185353394, "grad_norm": 0.1695210486650467, "learning_rate": 7.724524552937837e-05, "loss": 0.010156305134296417, "step": 80200 }, { "epoch": 22.767527675276753, "grad_norm": 9.505305290222168, "learning_rate": 7.724240703945502e-05, "loss": 0.01257409155368805, "step": 80210 }, { "epoch": 22.770366165200112, "grad_norm": 0.15547753870487213, "learning_rate": 7.723956854953164e-05, "loss": 0.005081860721111298, "step": 80220 }, { "epoch": 22.773204655123475, "grad_norm": 0.007290353532880545, "learning_rate": 7.723673005960828e-05, "loss": 0.008691122382879257, "step": 80230 }, { "epoch": 22.776043145046835, "grad_norm": 5.877913475036621, "learning_rate": 7.723389156968494e-05, "loss": 0.006253403425216675, "step": 80240 }, { "epoch": 22.778881634970197, "grad_norm": 0.4238259494304657, "learning_rate": 7.723105307976157e-05, "loss": 0.012057116627693177, "step": 80250 }, { "epoch": 22.781720124893557, "grad_norm": 0.6454994082450867, "learning_rate": 7.722821458983821e-05, "loss": 0.018121793866157532, "step": 80260 }, { "epoch": 22.784558614816916, "grad_norm": 3.0175228118896484, "learning_rate": 7.722537609991485e-05, "loss": 0.017546574771404266, "step": 80270 }, { "epoch": 22.78739710474028, "grad_norm": 6.3151092529296875, "learning_rate": 7.722253760999149e-05, "loss": 0.0036906927824020387, "step": 80280 }, { "epoch": 22.79023559466364, "grad_norm": 1.3708118200302124, "learning_rate": 7.721969912006812e-05, "loss": 0.01482076644897461, "step": 80290 }, { "epoch": 22.793074084587, "grad_norm": 2.337540864944458, "learning_rate": 7.721686063014477e-05, "loss": 0.008846692740917206, "step": 80300 }, { "epoch": 22.79591257451036, "grad_norm": 0.1788816750049591, "learning_rate": 7.721402214022142e-05, "loss": 0.005152397230267525, "step": 80310 }, { "epoch": 22.79875106443372, "grad_norm": 0.20861689746379852, "learning_rate": 7.721118365029804e-05, "loss": 0.01187308058142662, "step": 80320 }, { "epoch": 22.801589554357083, "grad_norm": 3.3265247344970703, "learning_rate": 7.720834516037468e-05, "loss": 0.00996658280491829, "step": 80330 }, { "epoch": 22.804428044280442, "grad_norm": 0.20800715684890747, "learning_rate": 7.720550667045133e-05, "loss": 0.013279438018798828, "step": 80340 }, { "epoch": 22.807266534203805, "grad_norm": 9.21181583404541, "learning_rate": 7.720266818052795e-05, "loss": 0.008865047991275788, "step": 80350 }, { "epoch": 22.810105024127164, "grad_norm": 2.461883068084717, "learning_rate": 7.71998296906046e-05, "loss": 0.007784314453601837, "step": 80360 }, { "epoch": 22.812943514050524, "grad_norm": 3.8758440017700195, "learning_rate": 7.719699120068125e-05, "loss": 0.01847960352897644, "step": 80370 }, { "epoch": 22.815782003973887, "grad_norm": 0.15455086529254913, "learning_rate": 7.719415271075788e-05, "loss": 0.004468599706888199, "step": 80380 }, { "epoch": 22.818620493897246, "grad_norm": 17.47947883605957, "learning_rate": 7.719131422083452e-05, "loss": 0.02069390267133713, "step": 80390 }, { "epoch": 22.82145898382061, "grad_norm": 10.52957820892334, "learning_rate": 7.718847573091116e-05, "loss": 0.005012344568967819, "step": 80400 }, { "epoch": 22.824297473743968, "grad_norm": 0.2548460364341736, "learning_rate": 7.71856372409878e-05, "loss": 0.006536735594272614, "step": 80410 }, { "epoch": 22.827135963667327, "grad_norm": 1.786026954650879, "learning_rate": 7.718279875106443e-05, "loss": 0.004687326774001121, "step": 80420 }, { "epoch": 22.82997445359069, "grad_norm": 2.1519548892974854, "learning_rate": 7.717996026114108e-05, "loss": 0.006726164370775223, "step": 80430 }, { "epoch": 22.83281294351405, "grad_norm": 8.312111854553223, "learning_rate": 7.717712177121773e-05, "loss": 0.010057786107063293, "step": 80440 }, { "epoch": 22.835651433437413, "grad_norm": 8.229938507080078, "learning_rate": 7.717428328129435e-05, "loss": 0.019519910216331482, "step": 80450 }, { "epoch": 22.838489923360772, "grad_norm": 0.15670490264892578, "learning_rate": 7.7171444791371e-05, "loss": 0.02175893783569336, "step": 80460 }, { "epoch": 22.84132841328413, "grad_norm": 10.244100570678711, "learning_rate": 7.716860630144764e-05, "loss": 0.0371213436126709, "step": 80470 }, { "epoch": 22.844166903207494, "grad_norm": 0.3592592179775238, "learning_rate": 7.716576781152426e-05, "loss": 0.02179994136095047, "step": 80480 }, { "epoch": 22.847005393130853, "grad_norm": 1.7011759281158447, "learning_rate": 7.71629293216009e-05, "loss": 0.017097650468349455, "step": 80490 }, { "epoch": 22.849843883054216, "grad_norm": 0.4747786223888397, "learning_rate": 7.716009083167756e-05, "loss": 0.0077143371105194095, "step": 80500 }, { "epoch": 22.849843883054216, "eval_accuracy": 0.9707509378775354, "eval_loss": 0.11232608556747437, "eval_runtime": 37.5969, "eval_samples_per_second": 418.306, "eval_steps_per_second": 6.543, "step": 80500 }, { "epoch": 22.852682372977576, "grad_norm": 1.95169198513031, "learning_rate": 7.715725234175419e-05, "loss": 0.007816539704799652, "step": 80510 }, { "epoch": 22.855520862900935, "grad_norm": 10.166973114013672, "learning_rate": 7.715441385183083e-05, "loss": 0.00774303749203682, "step": 80520 }, { "epoch": 22.858359352824298, "grad_norm": 0.03639163821935654, "learning_rate": 7.715157536190747e-05, "loss": 0.018021290004253388, "step": 80530 }, { "epoch": 22.861197842747657, "grad_norm": 10.283255577087402, "learning_rate": 7.714873687198411e-05, "loss": 0.020117157697677614, "step": 80540 }, { "epoch": 22.86403633267102, "grad_norm": 5.580199241638184, "learning_rate": 7.714589838206074e-05, "loss": 0.00978466346859932, "step": 80550 }, { "epoch": 22.86687482259438, "grad_norm": 1.4274505376815796, "learning_rate": 7.714305989213738e-05, "loss": 0.015023978054523468, "step": 80560 }, { "epoch": 22.86971331251774, "grad_norm": 3.7662999629974365, "learning_rate": 7.714022140221404e-05, "loss": 0.023614345490932463, "step": 80570 }, { "epoch": 22.8725518024411, "grad_norm": 2.1600341796875, "learning_rate": 7.713738291229066e-05, "loss": 0.01373826265335083, "step": 80580 }, { "epoch": 22.87539029236446, "grad_norm": 0.6059398651123047, "learning_rate": 7.71345444223673e-05, "loss": 0.009813755750656128, "step": 80590 }, { "epoch": 22.878228782287824, "grad_norm": 0.08320900052785873, "learning_rate": 7.713170593244395e-05, "loss": 0.008657921850681306, "step": 80600 }, { "epoch": 22.881067272211183, "grad_norm": 1.0911009311676025, "learning_rate": 7.712886744252058e-05, "loss": 0.008206647634506226, "step": 80610 }, { "epoch": 22.883905762134546, "grad_norm": 7.917581558227539, "learning_rate": 7.712602895259722e-05, "loss": 0.031457763910293576, "step": 80620 }, { "epoch": 22.886744252057905, "grad_norm": 2.0815036296844482, "learning_rate": 7.712319046267387e-05, "loss": 0.012146852165460586, "step": 80630 }, { "epoch": 22.889582741981265, "grad_norm": 0.7023632526397705, "learning_rate": 7.71203519727505e-05, "loss": 0.007493425160646439, "step": 80640 }, { "epoch": 22.892421231904628, "grad_norm": 4.943428993225098, "learning_rate": 7.711751348282714e-05, "loss": 0.008392899483442306, "step": 80650 }, { "epoch": 22.895259721827987, "grad_norm": 0.15019188821315765, "learning_rate": 7.711467499290378e-05, "loss": 0.012198986113071441, "step": 80660 }, { "epoch": 22.89809821175135, "grad_norm": 2.128286600112915, "learning_rate": 7.711183650298042e-05, "loss": 0.01725355088710785, "step": 80670 }, { "epoch": 22.90093670167471, "grad_norm": 0.2459285855293274, "learning_rate": 7.710899801305705e-05, "loss": 0.01541634052991867, "step": 80680 }, { "epoch": 22.90377519159807, "grad_norm": 3.0551724433898926, "learning_rate": 7.710615952313369e-05, "loss": 0.009091270714998245, "step": 80690 }, { "epoch": 22.90661368152143, "grad_norm": 3.374202251434326, "learning_rate": 7.710332103321033e-05, "loss": 0.005875414609909058, "step": 80700 }, { "epoch": 22.90945217144479, "grad_norm": 0.05651824176311493, "learning_rate": 7.710048254328698e-05, "loss": 0.01458112895488739, "step": 80710 }, { "epoch": 22.912290661368154, "grad_norm": 0.8213116526603699, "learning_rate": 7.709764405336362e-05, "loss": 0.008629890531301499, "step": 80720 }, { "epoch": 22.915129151291513, "grad_norm": 0.15686114132404327, "learning_rate": 7.709480556344026e-05, "loss": 0.00797632485628128, "step": 80730 }, { "epoch": 22.917967641214872, "grad_norm": 2.9621317386627197, "learning_rate": 7.709196707351689e-05, "loss": 0.02076922059059143, "step": 80740 }, { "epoch": 22.920806131138235, "grad_norm": 1.1234564781188965, "learning_rate": 7.708912858359353e-05, "loss": 0.010919442027807235, "step": 80750 }, { "epoch": 22.923644621061595, "grad_norm": 0.08816233277320862, "learning_rate": 7.708629009367017e-05, "loss": 0.0051305446773767475, "step": 80760 }, { "epoch": 22.926483110984957, "grad_norm": 11.08006763458252, "learning_rate": 7.708345160374681e-05, "loss": 0.011507345736026764, "step": 80770 }, { "epoch": 22.929321600908317, "grad_norm": 0.2179691642522812, "learning_rate": 7.708061311382345e-05, "loss": 0.004180579259991646, "step": 80780 }, { "epoch": 22.932160090831676, "grad_norm": 0.04716915637254715, "learning_rate": 7.707777462390009e-05, "loss": 0.010029374808073043, "step": 80790 }, { "epoch": 22.93499858075504, "grad_norm": 3.8479762077331543, "learning_rate": 7.707493613397672e-05, "loss": 0.015663395822048187, "step": 80800 }, { "epoch": 22.9378370706784, "grad_norm": 0.6916437745094299, "learning_rate": 7.707209764405336e-05, "loss": 0.007235579192638397, "step": 80810 }, { "epoch": 22.94067556060176, "grad_norm": 0.6383578777313232, "learning_rate": 7.706925915413e-05, "loss": 0.00840734839439392, "step": 80820 }, { "epoch": 22.94351405052512, "grad_norm": 0.04771510139107704, "learning_rate": 7.706642066420664e-05, "loss": 0.019774450361728667, "step": 80830 }, { "epoch": 22.94635254044848, "grad_norm": 0.8613829016685486, "learning_rate": 7.706358217428329e-05, "loss": 0.003354334831237793, "step": 80840 }, { "epoch": 22.949191030371843, "grad_norm": 2.2828640937805176, "learning_rate": 7.706074368435993e-05, "loss": 0.027871066331863405, "step": 80850 }, { "epoch": 22.952029520295202, "grad_norm": 3.7151269912719727, "learning_rate": 7.705790519443657e-05, "loss": 0.010745569318532943, "step": 80860 }, { "epoch": 22.954868010218565, "grad_norm": 7.464791297912598, "learning_rate": 7.70550667045132e-05, "loss": 0.01719197928905487, "step": 80870 }, { "epoch": 22.957706500141924, "grad_norm": 0.18444932997226715, "learning_rate": 7.705222821458984e-05, "loss": 0.0035711243748664857, "step": 80880 }, { "epoch": 22.960544990065284, "grad_norm": 1.0678298473358154, "learning_rate": 7.704938972466648e-05, "loss": 0.002201534062623978, "step": 80890 }, { "epoch": 22.963383479988646, "grad_norm": 0.6928747296333313, "learning_rate": 7.704655123474312e-05, "loss": 0.005387838184833527, "step": 80900 }, { "epoch": 22.966221969912006, "grad_norm": 5.486608982086182, "learning_rate": 7.704371274481976e-05, "loss": 0.006371983140707016, "step": 80910 }, { "epoch": 22.96906045983537, "grad_norm": 2.2720372676849365, "learning_rate": 7.70408742548964e-05, "loss": 0.004830885678529739, "step": 80920 }, { "epoch": 22.971898949758728, "grad_norm": 1.1208019256591797, "learning_rate": 7.703803576497303e-05, "loss": 0.0059481080621480945, "step": 80930 }, { "epoch": 22.974737439682087, "grad_norm": 0.10673771798610687, "learning_rate": 7.703519727504967e-05, "loss": 0.00913172960281372, "step": 80940 }, { "epoch": 22.97757592960545, "grad_norm": 0.8721470236778259, "learning_rate": 7.703235878512631e-05, "loss": 0.009836816787719726, "step": 80950 }, { "epoch": 22.98041441952881, "grad_norm": 1.7987909317016602, "learning_rate": 7.702952029520296e-05, "loss": 0.007826974987983704, "step": 80960 }, { "epoch": 22.983252909452172, "grad_norm": 1.0852606296539307, "learning_rate": 7.70266818052796e-05, "loss": 0.007979839295148849, "step": 80970 }, { "epoch": 22.986091399375532, "grad_norm": 0.4164920747280121, "learning_rate": 7.702384331535624e-05, "loss": 0.00845290720462799, "step": 80980 }, { "epoch": 22.988929889298895, "grad_norm": 8.1318998336792, "learning_rate": 7.702100482543288e-05, "loss": 0.026432469487190247, "step": 80990 }, { "epoch": 22.991768379222254, "grad_norm": 1.2967190742492676, "learning_rate": 7.701816633550951e-05, "loss": 0.0015520129352807998, "step": 81000 }, { "epoch": 22.991768379222254, "eval_accuracy": 0.9769822598079736, "eval_loss": 0.07923009246587753, "eval_runtime": 36.9741, "eval_samples_per_second": 425.352, "eval_steps_per_second": 6.653, "step": 81000 }, { "epoch": 22.994606869145613, "grad_norm": 0.3596697747707367, "learning_rate": 7.701532784558615e-05, "loss": 0.004689115285873413, "step": 81010 }, { "epoch": 22.997445359068976, "grad_norm": 7.809355735778809, "learning_rate": 7.701248935566279e-05, "loss": 0.004823901504278183, "step": 81020 }, { "epoch": 23.000283848992336, "grad_norm": 6.560569763183594, "learning_rate": 7.700965086573943e-05, "loss": 0.007491499930620193, "step": 81030 }, { "epoch": 23.0031223389157, "grad_norm": 1.9543020725250244, "learning_rate": 7.700681237581607e-05, "loss": 0.008111250400543214, "step": 81040 }, { "epoch": 23.005960828839058, "grad_norm": 0.03751612827181816, "learning_rate": 7.700397388589271e-05, "loss": 0.007660305500030518, "step": 81050 }, { "epoch": 23.008799318762417, "grad_norm": 2.235783576965332, "learning_rate": 7.700113539596934e-05, "loss": 0.013505250215530396, "step": 81060 }, { "epoch": 23.01163780868578, "grad_norm": 0.0635543093085289, "learning_rate": 7.699829690604598e-05, "loss": 0.022214835882186888, "step": 81070 }, { "epoch": 23.01447629860914, "grad_norm": 0.6917733550071716, "learning_rate": 7.699545841612262e-05, "loss": 0.004464536905288696, "step": 81080 }, { "epoch": 23.017314788532502, "grad_norm": 0.7307275533676147, "learning_rate": 7.699261992619927e-05, "loss": 0.012608867883682252, "step": 81090 }, { "epoch": 23.02015327845586, "grad_norm": 0.6064980626106262, "learning_rate": 7.698978143627591e-05, "loss": 0.00604487843811512, "step": 81100 }, { "epoch": 23.02299176837922, "grad_norm": 3.3047525882720947, "learning_rate": 7.698694294635255e-05, "loss": 0.009372488409280778, "step": 81110 }, { "epoch": 23.025830258302584, "grad_norm": 0.04034562408924103, "learning_rate": 7.698410445642919e-05, "loss": 0.021719354391098022, "step": 81120 }, { "epoch": 23.028668748225943, "grad_norm": 0.865129292011261, "learning_rate": 7.698126596650582e-05, "loss": 0.012165988981723785, "step": 81130 }, { "epoch": 23.031507238149306, "grad_norm": 6.516345024108887, "learning_rate": 7.697842747658246e-05, "loss": 0.00856238454580307, "step": 81140 }, { "epoch": 23.034345728072665, "grad_norm": 1.6680307388305664, "learning_rate": 7.69755889866591e-05, "loss": 0.018250998854637147, "step": 81150 }, { "epoch": 23.037184217996025, "grad_norm": 0.47110602259635925, "learning_rate": 7.697275049673573e-05, "loss": 0.012021335959434509, "step": 81160 }, { "epoch": 23.040022707919388, "grad_norm": 0.33083492517471313, "learning_rate": 7.696991200681238e-05, "loss": 0.003756391257047653, "step": 81170 }, { "epoch": 23.042861197842747, "grad_norm": 6.448624134063721, "learning_rate": 7.696707351688903e-05, "loss": 0.011584585160017013, "step": 81180 }, { "epoch": 23.04569968776611, "grad_norm": 0.11146972328424454, "learning_rate": 7.696423502696565e-05, "loss": 0.008874837309122086, "step": 81190 }, { "epoch": 23.04853817768947, "grad_norm": 7.409440994262695, "learning_rate": 7.696168038603463e-05, "loss": 0.018270556628704072, "step": 81200 }, { "epoch": 23.05137666761283, "grad_norm": 2.801926374435425, "learning_rate": 7.695884189611127e-05, "loss": 0.0070484824478626255, "step": 81210 }, { "epoch": 23.05421515753619, "grad_norm": 2.7060954570770264, "learning_rate": 7.695600340618792e-05, "loss": 0.004448938369750977, "step": 81220 }, { "epoch": 23.05705364745955, "grad_norm": 0.05945291742682457, "learning_rate": 7.695316491626456e-05, "loss": 0.004018323123455047, "step": 81230 }, { "epoch": 23.059892137382914, "grad_norm": 9.314692497253418, "learning_rate": 7.695032642634118e-05, "loss": 0.019163666665554045, "step": 81240 }, { "epoch": 23.062730627306273, "grad_norm": 0.21715980768203735, "learning_rate": 7.694748793641783e-05, "loss": 0.0016057725995779037, "step": 81250 }, { "epoch": 23.065569117229632, "grad_norm": 0.0401313379406929, "learning_rate": 7.694464944649447e-05, "loss": 0.001777767762541771, "step": 81260 }, { "epoch": 23.068407607152995, "grad_norm": 1.3117729425430298, "learning_rate": 7.694181095657111e-05, "loss": 0.008886754512786865, "step": 81270 }, { "epoch": 23.071246097076354, "grad_norm": 2.1394894123077393, "learning_rate": 7.693897246664775e-05, "loss": 0.005376420170068741, "step": 81280 }, { "epoch": 23.074084586999717, "grad_norm": 1.4896842241287231, "learning_rate": 7.693613397672439e-05, "loss": 0.0073803536593914035, "step": 81290 }, { "epoch": 23.076923076923077, "grad_norm": 2.903303861618042, "learning_rate": 7.693329548680103e-05, "loss": 0.016283400356769562, "step": 81300 }, { "epoch": 23.079761566846436, "grad_norm": 0.919712483882904, "learning_rate": 7.693045699687766e-05, "loss": 0.00851224660873413, "step": 81310 }, { "epoch": 23.0826000567698, "grad_norm": 5.389035701751709, "learning_rate": 7.69276185069543e-05, "loss": 0.007847213745117187, "step": 81320 }, { "epoch": 23.085438546693158, "grad_norm": 5.51215124130249, "learning_rate": 7.692478001703094e-05, "loss": 0.027783894538879396, "step": 81330 }, { "epoch": 23.08827703661652, "grad_norm": 0.06448338180780411, "learning_rate": 7.692194152710757e-05, "loss": 0.006015015393495559, "step": 81340 }, { "epoch": 23.09111552653988, "grad_norm": 12.007226943969727, "learning_rate": 7.691910303718423e-05, "loss": 0.010529353469610214, "step": 81350 }, { "epoch": 23.093954016463243, "grad_norm": 0.029768139123916626, "learning_rate": 7.691626454726087e-05, "loss": 0.00293233934789896, "step": 81360 }, { "epoch": 23.096792506386603, "grad_norm": 6.821073532104492, "learning_rate": 7.69134260573375e-05, "loss": 0.005704577639698982, "step": 81370 }, { "epoch": 23.099630996309962, "grad_norm": 0.8387489318847656, "learning_rate": 7.691058756741414e-05, "loss": 0.004422587901353836, "step": 81380 }, { "epoch": 23.102469486233325, "grad_norm": 0.20926372706890106, "learning_rate": 7.690774907749078e-05, "loss": 0.01953086704015732, "step": 81390 }, { "epoch": 23.105307976156684, "grad_norm": 10.798333168029785, "learning_rate": 7.690491058756742e-05, "loss": 0.015512165427207947, "step": 81400 }, { "epoch": 23.108146466080047, "grad_norm": 0.3008500039577484, "learning_rate": 7.690207209764406e-05, "loss": 0.0019133590161800385, "step": 81410 }, { "epoch": 23.110984956003406, "grad_norm": 4.402613162994385, "learning_rate": 7.68992336077207e-05, "loss": 0.005158171057701111, "step": 81420 }, { "epoch": 23.113823445926766, "grad_norm": 0.023862946778535843, "learning_rate": 7.689639511779734e-05, "loss": 0.0032924942672252653, "step": 81430 }, { "epoch": 23.11666193585013, "grad_norm": 6.357728958129883, "learning_rate": 7.689355662787397e-05, "loss": 0.0069560021162033085, "step": 81440 }, { "epoch": 23.119500425773488, "grad_norm": 0.3322376012802124, "learning_rate": 7.689071813795061e-05, "loss": 0.00860765427350998, "step": 81450 }, { "epoch": 23.12233891569685, "grad_norm": 0.06234443560242653, "learning_rate": 7.688787964802725e-05, "loss": 0.013141393661499023, "step": 81460 }, { "epoch": 23.12517740562021, "grad_norm": 0.6384140253067017, "learning_rate": 7.688504115810388e-05, "loss": 0.003967722505331039, "step": 81470 }, { "epoch": 23.12801589554357, "grad_norm": 7.180395603179932, "learning_rate": 7.688220266818054e-05, "loss": 0.013569165766239167, "step": 81480 }, { "epoch": 23.130854385466932, "grad_norm": 7.968719005584717, "learning_rate": 7.687936417825718e-05, "loss": 0.01835716366767883, "step": 81490 }, { "epoch": 23.13369287539029, "grad_norm": 6.476689338684082, "learning_rate": 7.68765256883338e-05, "loss": 0.015287229418754577, "step": 81500 }, { "epoch": 23.13369287539029, "eval_accuracy": 0.9732943345838367, "eval_loss": 0.1037626713514328, "eval_runtime": 35.6701, "eval_samples_per_second": 440.901, "eval_steps_per_second": 6.897, "step": 81500 }, { "epoch": 23.136531365313655, "grad_norm": 0.13207989931106567, "learning_rate": 7.687368719841045e-05, "loss": 0.01481889933347702, "step": 81510 }, { "epoch": 23.139369855237014, "grad_norm": 0.9117758274078369, "learning_rate": 7.687084870848709e-05, "loss": 0.01754283010959625, "step": 81520 }, { "epoch": 23.142208345160373, "grad_norm": 2.651644468307495, "learning_rate": 7.686801021856373e-05, "loss": 0.028936332464218138, "step": 81530 }, { "epoch": 23.145046835083736, "grad_norm": 0.2869761288166046, "learning_rate": 7.686517172864036e-05, "loss": 0.016687460243701935, "step": 81540 }, { "epoch": 23.147885325007096, "grad_norm": 8.448064804077148, "learning_rate": 7.686233323871701e-05, "loss": 0.013673782348632812, "step": 81550 }, { "epoch": 23.15072381493046, "grad_norm": 9.373184204101562, "learning_rate": 7.685949474879365e-05, "loss": 0.006771321594715119, "step": 81560 }, { "epoch": 23.153562304853818, "grad_norm": 0.26870954036712646, "learning_rate": 7.685665625887028e-05, "loss": 0.009419485181570052, "step": 81570 }, { "epoch": 23.156400794777177, "grad_norm": 1.7603081464767456, "learning_rate": 7.685381776894692e-05, "loss": 0.0035121195018291474, "step": 81580 }, { "epoch": 23.15923928470054, "grad_norm": 0.2029772698879242, "learning_rate": 7.685097927902357e-05, "loss": 0.010575334727764129, "step": 81590 }, { "epoch": 23.1620777746239, "grad_norm": 0.1182018369436264, "learning_rate": 7.684814078910019e-05, "loss": 0.002596491388976574, "step": 81600 }, { "epoch": 23.164916264547262, "grad_norm": 1.8105189800262451, "learning_rate": 7.684530229917685e-05, "loss": 0.013197554647922516, "step": 81610 }, { "epoch": 23.16775475447062, "grad_norm": 0.5690510869026184, "learning_rate": 7.684246380925349e-05, "loss": 0.0038293272256851195, "step": 81620 }, { "epoch": 23.17059324439398, "grad_norm": 0.627167284488678, "learning_rate": 7.683962531933012e-05, "loss": 0.004614150896668434, "step": 81630 }, { "epoch": 23.173431734317344, "grad_norm": 6.707760334014893, "learning_rate": 7.683678682940676e-05, "loss": 0.011009125411510468, "step": 81640 }, { "epoch": 23.176270224240703, "grad_norm": 0.2908567488193512, "learning_rate": 7.68339483394834e-05, "loss": 0.004563101381063461, "step": 81650 }, { "epoch": 23.179108714164066, "grad_norm": 0.38182368874549866, "learning_rate": 7.683110984956004e-05, "loss": 0.020302999019622802, "step": 81660 }, { "epoch": 23.181947204087425, "grad_norm": 0.8528896570205688, "learning_rate": 7.682827135963667e-05, "loss": 0.0071938283741474155, "step": 81670 }, { "epoch": 23.184785694010785, "grad_norm": 0.1856847107410431, "learning_rate": 7.682543286971332e-05, "loss": 0.009772911667823792, "step": 81680 }, { "epoch": 23.187624183934147, "grad_norm": 0.0840163603425026, "learning_rate": 7.682259437978995e-05, "loss": 0.008996152132749558, "step": 81690 }, { "epoch": 23.190462673857507, "grad_norm": 0.20470739901065826, "learning_rate": 7.681975588986659e-05, "loss": 0.0042634628713130954, "step": 81700 }, { "epoch": 23.19330116378087, "grad_norm": 0.6801945567131042, "learning_rate": 7.681691739994323e-05, "loss": 0.023178763687610626, "step": 81710 }, { "epoch": 23.19613965370423, "grad_norm": 3.3054616451263428, "learning_rate": 7.681407891001988e-05, "loss": 0.005546675622463226, "step": 81720 }, { "epoch": 23.19897814362759, "grad_norm": 1.6950911283493042, "learning_rate": 7.68112404200965e-05, "loss": 0.003949405252933502, "step": 81730 }, { "epoch": 23.20181663355095, "grad_norm": 0.5547425150871277, "learning_rate": 7.680840193017315e-05, "loss": 0.011741380393505096, "step": 81740 }, { "epoch": 23.20465512347431, "grad_norm": 1.562556505203247, "learning_rate": 7.68055634402498e-05, "loss": 0.003937307745218277, "step": 81750 }, { "epoch": 23.207493613397673, "grad_norm": 4.891447067260742, "learning_rate": 7.680272495032643e-05, "loss": 0.009959622472524642, "step": 81760 }, { "epoch": 23.210332103321033, "grad_norm": 7.319037914276123, "learning_rate": 7.679988646040307e-05, "loss": 0.01680293083190918, "step": 81770 }, { "epoch": 23.213170593244396, "grad_norm": 1.2419548034667969, "learning_rate": 7.679704797047971e-05, "loss": 0.0023527096956968306, "step": 81780 }, { "epoch": 23.216009083167755, "grad_norm": 0.04639270901679993, "learning_rate": 7.679420948055634e-05, "loss": 0.003205937519669533, "step": 81790 }, { "epoch": 23.218847573091114, "grad_norm": 2.0982062816619873, "learning_rate": 7.679137099063298e-05, "loss": 0.00859610065817833, "step": 81800 }, { "epoch": 23.221686063014477, "grad_norm": 0.027259139344096184, "learning_rate": 7.678853250070963e-05, "loss": 0.008101701736450195, "step": 81810 }, { "epoch": 23.224524552937837, "grad_norm": 1.1900553703308105, "learning_rate": 7.678569401078626e-05, "loss": 0.002190883830189705, "step": 81820 }, { "epoch": 23.2273630428612, "grad_norm": 7.871462821960449, "learning_rate": 7.67828555208629e-05, "loss": 0.007357116043567658, "step": 81830 }, { "epoch": 23.23020153278456, "grad_norm": 0.15519435703754425, "learning_rate": 7.678001703093955e-05, "loss": 0.005924094095826149, "step": 81840 }, { "epoch": 23.233040022707918, "grad_norm": 2.9074606895446777, "learning_rate": 7.677717854101619e-05, "loss": 0.014111007750034332, "step": 81850 }, { "epoch": 23.23587851263128, "grad_norm": 0.30423080921173096, "learning_rate": 7.677434005109281e-05, "loss": 0.00619983933866024, "step": 81860 }, { "epoch": 23.23871700255464, "grad_norm": 7.845170497894287, "learning_rate": 7.677150156116946e-05, "loss": 0.008826737105846406, "step": 81870 }, { "epoch": 23.241555492478003, "grad_norm": 0.44623863697052, "learning_rate": 7.676866307124611e-05, "loss": 0.015175335109233856, "step": 81880 }, { "epoch": 23.244393982401363, "grad_norm": 0.14766062796115875, "learning_rate": 7.676582458132274e-05, "loss": 0.012079689651727676, "step": 81890 }, { "epoch": 23.247232472324722, "grad_norm": 6.153023719787598, "learning_rate": 7.676298609139938e-05, "loss": 0.00824093520641327, "step": 81900 }, { "epoch": 23.250070962248085, "grad_norm": 0.1399608850479126, "learning_rate": 7.676014760147602e-05, "loss": 0.013990193605422974, "step": 81910 }, { "epoch": 23.252909452171444, "grad_norm": 8.042696952819824, "learning_rate": 7.675730911155265e-05, "loss": 0.006334808468818664, "step": 81920 }, { "epoch": 23.255747942094807, "grad_norm": 0.5858334898948669, "learning_rate": 7.675447062162929e-05, "loss": 0.008942129462957383, "step": 81930 }, { "epoch": 23.258586432018166, "grad_norm": 2.7674648761749268, "learning_rate": 7.675163213170593e-05, "loss": 0.02443698197603226, "step": 81940 }, { "epoch": 23.261424921941526, "grad_norm": 0.11328199505805969, "learning_rate": 7.674879364178257e-05, "loss": 0.003637566789984703, "step": 81950 }, { "epoch": 23.26426341186489, "grad_norm": 0.4787064492702484, "learning_rate": 7.674595515185921e-05, "loss": 0.009274352341890335, "step": 81960 }, { "epoch": 23.267101901788248, "grad_norm": 0.527097225189209, "learning_rate": 7.674311666193586e-05, "loss": 0.013374726474285125, "step": 81970 }, { "epoch": 23.26994039171161, "grad_norm": 0.28882959485054016, "learning_rate": 7.67402781720125e-05, "loss": 0.01637282222509384, "step": 81980 }, { "epoch": 23.27277888163497, "grad_norm": 3.737379789352417, "learning_rate": 7.673743968208913e-05, "loss": 0.00799555778503418, "step": 81990 }, { "epoch": 23.27561737155833, "grad_norm": 0.6791947484016418, "learning_rate": 7.673460119216577e-05, "loss": 0.01558711677789688, "step": 82000 }, { "epoch": 23.27561737155833, "eval_accuracy": 0.9710688624658231, "eval_loss": 0.10240678489208221, "eval_runtime": 36.1118, "eval_samples_per_second": 435.509, "eval_steps_per_second": 6.812, "step": 82000 }, { "epoch": 23.278455861481692, "grad_norm": 0.10400428622961044, "learning_rate": 7.673176270224242e-05, "loss": 0.006929214298725128, "step": 82010 }, { "epoch": 23.28129435140505, "grad_norm": 0.5660949349403381, "learning_rate": 7.672892421231905e-05, "loss": 0.013738700747489929, "step": 82020 }, { "epoch": 23.284132841328415, "grad_norm": 0.7232049703598022, "learning_rate": 7.672608572239569e-05, "loss": 0.015219768881797791, "step": 82030 }, { "epoch": 23.286971331251774, "grad_norm": 0.23227258026599884, "learning_rate": 7.672324723247233e-05, "loss": 0.0062318965792655945, "step": 82040 }, { "epoch": 23.289809821175133, "grad_norm": 6.245209693908691, "learning_rate": 7.672040874254896e-05, "loss": 0.01120917946100235, "step": 82050 }, { "epoch": 23.292648311098496, "grad_norm": 0.29765066504478455, "learning_rate": 7.67175702526256e-05, "loss": 0.01128171980381012, "step": 82060 }, { "epoch": 23.295486801021855, "grad_norm": 0.4770434498786926, "learning_rate": 7.671473176270224e-05, "loss": 0.002929018810391426, "step": 82070 }, { "epoch": 23.29832529094522, "grad_norm": 0.21925266087055206, "learning_rate": 7.671189327277888e-05, "loss": 0.005688890814781189, "step": 82080 }, { "epoch": 23.301163780868578, "grad_norm": 0.20042312145233154, "learning_rate": 7.670905478285553e-05, "loss": 0.016575488448143005, "step": 82090 }, { "epoch": 23.304002270791937, "grad_norm": 1.1242927312850952, "learning_rate": 7.670621629293217e-05, "loss": 0.0023508623242378237, "step": 82100 }, { "epoch": 23.3068407607153, "grad_norm": 3.7072973251342773, "learning_rate": 7.670337780300881e-05, "loss": 0.009111734479665757, "step": 82110 }, { "epoch": 23.30967925063866, "grad_norm": 0.8564975261688232, "learning_rate": 7.670053931308544e-05, "loss": 0.017138220369815826, "step": 82120 }, { "epoch": 23.312517740562022, "grad_norm": 0.9721642732620239, "learning_rate": 7.669770082316208e-05, "loss": 0.00844324827194214, "step": 82130 }, { "epoch": 23.31535623048538, "grad_norm": 0.15315192937850952, "learning_rate": 7.669486233323873e-05, "loss": 0.0028723502531647683, "step": 82140 }, { "epoch": 23.31819472040874, "grad_norm": 0.198772132396698, "learning_rate": 7.669202384331536e-05, "loss": 0.009109287708997726, "step": 82150 }, { "epoch": 23.321033210332104, "grad_norm": 0.06452597677707672, "learning_rate": 7.6689185353392e-05, "loss": 0.007885898649692535, "step": 82160 }, { "epoch": 23.323871700255463, "grad_norm": 0.03093358501791954, "learning_rate": 7.668634686346864e-05, "loss": 0.010780239105224609, "step": 82170 }, { "epoch": 23.326710190178826, "grad_norm": 7.463912010192871, "learning_rate": 7.668350837354527e-05, "loss": 0.019592815637588502, "step": 82180 }, { "epoch": 23.329548680102185, "grad_norm": 0.2365385740995407, "learning_rate": 7.668066988362191e-05, "loss": 0.005379261821508408, "step": 82190 }, { "epoch": 23.332387170025548, "grad_norm": 1.8277747631072998, "learning_rate": 7.667783139369855e-05, "loss": 0.010969098657369614, "step": 82200 }, { "epoch": 23.335225659948907, "grad_norm": 0.7646867632865906, "learning_rate": 7.66749929037752e-05, "loss": 0.009833910316228867, "step": 82210 }, { "epoch": 23.338064149872267, "grad_norm": 10.94370174407959, "learning_rate": 7.667215441385184e-05, "loss": 0.016024944186210633, "step": 82220 }, { "epoch": 23.34090263979563, "grad_norm": 4.214101314544678, "learning_rate": 7.666931592392848e-05, "loss": 0.005390293896198273, "step": 82230 }, { "epoch": 23.34374112971899, "grad_norm": 2.245440721511841, "learning_rate": 7.666647743400512e-05, "loss": 0.024965739250183104, "step": 82240 }, { "epoch": 23.346579619642352, "grad_norm": 0.21125191450119019, "learning_rate": 7.666363894408175e-05, "loss": 0.009628114104270936, "step": 82250 }, { "epoch": 23.34941810956571, "grad_norm": 0.5894094109535217, "learning_rate": 7.666080045415839e-05, "loss": 0.009629537165164948, "step": 82260 }, { "epoch": 23.35225659948907, "grad_norm": 7.2881388664245605, "learning_rate": 7.665796196423503e-05, "loss": 0.03168748915195465, "step": 82270 }, { "epoch": 23.355095089412433, "grad_norm": 1.2618179321289062, "learning_rate": 7.665512347431167e-05, "loss": 0.010381420701742172, "step": 82280 }, { "epoch": 23.357933579335793, "grad_norm": 6.128857612609863, "learning_rate": 7.665228498438831e-05, "loss": 0.0074318088591098785, "step": 82290 }, { "epoch": 23.360772069259156, "grad_norm": 8.164018630981445, "learning_rate": 7.664944649446495e-05, "loss": 0.00485217273235321, "step": 82300 }, { "epoch": 23.363610559182515, "grad_norm": 0.850664496421814, "learning_rate": 7.664660800454158e-05, "loss": 0.007349809259176254, "step": 82310 }, { "epoch": 23.366449049105874, "grad_norm": 0.07353486120700836, "learning_rate": 7.664376951461822e-05, "loss": 0.0303107887506485, "step": 82320 }, { "epoch": 23.369287539029237, "grad_norm": 14.458773612976074, "learning_rate": 7.664093102469486e-05, "loss": 0.012528590857982635, "step": 82330 }, { "epoch": 23.372126028952596, "grad_norm": 0.240944042801857, "learning_rate": 7.66380925347715e-05, "loss": 0.022163794934749605, "step": 82340 }, { "epoch": 23.37496451887596, "grad_norm": 6.242356300354004, "learning_rate": 7.663525404484815e-05, "loss": 0.02319914400577545, "step": 82350 }, { "epoch": 23.37780300879932, "grad_norm": 0.6697514057159424, "learning_rate": 7.663241555492479e-05, "loss": 0.008798914402723313, "step": 82360 }, { "epoch": 23.380641498722678, "grad_norm": 0.16965435445308685, "learning_rate": 7.662957706500143e-05, "loss": 0.011322137713432313, "step": 82370 }, { "epoch": 23.38347998864604, "grad_norm": 0.5556501746177673, "learning_rate": 7.662673857507806e-05, "loss": 0.006774319708347321, "step": 82380 }, { "epoch": 23.3863184785694, "grad_norm": 2.17026686668396, "learning_rate": 7.66239000851547e-05, "loss": 0.006023798882961273, "step": 82390 }, { "epoch": 23.389156968492763, "grad_norm": 6.107196807861328, "learning_rate": 7.662106159523134e-05, "loss": 0.01598813235759735, "step": 82400 }, { "epoch": 23.391995458416122, "grad_norm": 2.0219736099243164, "learning_rate": 7.661822310530798e-05, "loss": 0.01825248748064041, "step": 82410 }, { "epoch": 23.394833948339482, "grad_norm": 8.172938346862793, "learning_rate": 7.661538461538462e-05, "loss": 0.009701067209243774, "step": 82420 }, { "epoch": 23.397672438262845, "grad_norm": 2.4970438480377197, "learning_rate": 7.661254612546126e-05, "loss": 0.010900556296110153, "step": 82430 }, { "epoch": 23.400510928186204, "grad_norm": 4.4375081062316895, "learning_rate": 7.660970763553789e-05, "loss": 0.010027940571308135, "step": 82440 }, { "epoch": 23.403349418109567, "grad_norm": 1.4317631721496582, "learning_rate": 7.660686914561453e-05, "loss": 0.004000164940953255, "step": 82450 }, { "epoch": 23.406187908032926, "grad_norm": 0.4155215919017792, "learning_rate": 7.660403065569118e-05, "loss": 0.0295761376619339, "step": 82460 }, { "epoch": 23.409026397956286, "grad_norm": 6.0767316818237305, "learning_rate": 7.660119216576782e-05, "loss": 0.021253062784671782, "step": 82470 }, { "epoch": 23.41186488787965, "grad_norm": 3.781285047531128, "learning_rate": 7.659835367584446e-05, "loss": 0.011869900673627854, "step": 82480 }, { "epoch": 23.414703377803008, "grad_norm": 8.221796989440918, "learning_rate": 7.65955151859211e-05, "loss": 0.014174406230449677, "step": 82490 }, { "epoch": 23.41754186772637, "grad_norm": 0.1543228030204773, "learning_rate": 7.659267669599774e-05, "loss": 0.005767758935689926, "step": 82500 }, { "epoch": 23.41754186772637, "eval_accuracy": 0.9701786736186176, "eval_loss": 0.11003053933382034, "eval_runtime": 37.1236, "eval_samples_per_second": 423.638, "eval_steps_per_second": 6.627, "step": 82500 }, { "epoch": 23.42038035764973, "grad_norm": 0.24461278319358826, "learning_rate": 7.658983820607437e-05, "loss": 0.018075646460056306, "step": 82510 }, { "epoch": 23.42321884757309, "grad_norm": 0.6870940327644348, "learning_rate": 7.658699971615101e-05, "loss": 0.01929754912853241, "step": 82520 }, { "epoch": 23.426057337496452, "grad_norm": 1.3880414962768555, "learning_rate": 7.658416122622765e-05, "loss": 0.007120543718338012, "step": 82530 }, { "epoch": 23.42889582741981, "grad_norm": 0.21735821664333344, "learning_rate": 7.658132273630429e-05, "loss": 0.005458180606365204, "step": 82540 }, { "epoch": 23.431734317343174, "grad_norm": 0.038668423891067505, "learning_rate": 7.657848424638093e-05, "loss": 0.0031930610537528993, "step": 82550 }, { "epoch": 23.434572807266534, "grad_norm": 0.7962296605110168, "learning_rate": 7.657564575645758e-05, "loss": 0.009740663319826126, "step": 82560 }, { "epoch": 23.437411297189897, "grad_norm": 0.25888511538505554, "learning_rate": 7.65728072665342e-05, "loss": 0.010283946990966797, "step": 82570 }, { "epoch": 23.440249787113256, "grad_norm": 1.0867631435394287, "learning_rate": 7.656996877661084e-05, "loss": 0.0032704465091228487, "step": 82580 }, { "epoch": 23.443088277036615, "grad_norm": 0.1953134387731552, "learning_rate": 7.656713028668749e-05, "loss": 0.019503585994243622, "step": 82590 }, { "epoch": 23.44592676695998, "grad_norm": 0.41204342246055603, "learning_rate": 7.656429179676413e-05, "loss": 0.008747527003288269, "step": 82600 }, { "epoch": 23.448765256883338, "grad_norm": 3.902374267578125, "learning_rate": 7.656145330684077e-05, "loss": 0.01469370573759079, "step": 82610 }, { "epoch": 23.4516037468067, "grad_norm": 0.16031230986118317, "learning_rate": 7.655861481691741e-05, "loss": 0.0038981132209300997, "step": 82620 }, { "epoch": 23.45444223673006, "grad_norm": 0.11208526045084, "learning_rate": 7.655577632699404e-05, "loss": 0.0036126576364040376, "step": 82630 }, { "epoch": 23.45728072665342, "grad_norm": 0.46853017807006836, "learning_rate": 7.655293783707068e-05, "loss": 0.005664881691336632, "step": 82640 }, { "epoch": 23.460119216576782, "grad_norm": 1.2285023927688599, "learning_rate": 7.655009934714732e-05, "loss": 0.006935249269008637, "step": 82650 }, { "epoch": 23.46295770650014, "grad_norm": 1.4592968225479126, "learning_rate": 7.654726085722396e-05, "loss": 0.004635683819651603, "step": 82660 }, { "epoch": 23.465796196423504, "grad_norm": 0.8710943460464478, "learning_rate": 7.654442236730059e-05, "loss": 0.006511399894952774, "step": 82670 }, { "epoch": 23.468634686346864, "grad_norm": 1.8328312635421753, "learning_rate": 7.654158387737724e-05, "loss": 0.012214802950620652, "step": 82680 }, { "epoch": 23.471473176270223, "grad_norm": 0.18338333070278168, "learning_rate": 7.653874538745389e-05, "loss": 0.007677891850471496, "step": 82690 }, { "epoch": 23.474311666193586, "grad_norm": 1.3060551881790161, "learning_rate": 7.653590689753051e-05, "loss": 0.020458313822746276, "step": 82700 }, { "epoch": 23.477150156116945, "grad_norm": 15.411857604980469, "learning_rate": 7.653306840760716e-05, "loss": 0.015999168157577515, "step": 82710 }, { "epoch": 23.479988646040308, "grad_norm": 0.6500678062438965, "learning_rate": 7.65302299176838e-05, "loss": 0.00863279178738594, "step": 82720 }, { "epoch": 23.482827135963667, "grad_norm": 14.04487419128418, "learning_rate": 7.652739142776042e-05, "loss": 0.022079755365848542, "step": 82730 }, { "epoch": 23.485665625887027, "grad_norm": 3.353844404220581, "learning_rate": 7.652455293783708e-05, "loss": 0.033744871616363525, "step": 82740 }, { "epoch": 23.48850411581039, "grad_norm": 0.39460763335227966, "learning_rate": 7.652171444791372e-05, "loss": 0.011075519770383836, "step": 82750 }, { "epoch": 23.49134260573375, "grad_norm": 0.019491074606776237, "learning_rate": 7.651887595799035e-05, "loss": 0.003433636948466301, "step": 82760 }, { "epoch": 23.49418109565711, "grad_norm": 1.0429631471633911, "learning_rate": 7.651603746806699e-05, "loss": 0.012812730669975281, "step": 82770 }, { "epoch": 23.49701958558047, "grad_norm": 12.816428184509277, "learning_rate": 7.651319897814363e-05, "loss": 0.03752733170986176, "step": 82780 }, { "epoch": 23.49985807550383, "grad_norm": 0.7258029580116272, "learning_rate": 7.651036048822027e-05, "loss": 0.015445084869861602, "step": 82790 }, { "epoch": 23.502696565427193, "grad_norm": 2.853813409805298, "learning_rate": 7.65075219982969e-05, "loss": 0.013580481708049773, "step": 82800 }, { "epoch": 23.505535055350553, "grad_norm": 0.132659912109375, "learning_rate": 7.650468350837356e-05, "loss": 0.0026641201227903368, "step": 82810 }, { "epoch": 23.508373545273916, "grad_norm": 1.1652557849884033, "learning_rate": 7.65018450184502e-05, "loss": 0.005946411937475205, "step": 82820 }, { "epoch": 23.511212035197275, "grad_norm": 8.858762741088867, "learning_rate": 7.649900652852682e-05, "loss": 0.009072017669677735, "step": 82830 }, { "epoch": 23.514050525120634, "grad_norm": 9.33841323852539, "learning_rate": 7.649616803860347e-05, "loss": 0.0138062983751297, "step": 82840 }, { "epoch": 23.516889015043997, "grad_norm": 9.081640243530273, "learning_rate": 7.649332954868011e-05, "loss": 0.009516554325819016, "step": 82850 }, { "epoch": 23.519727504967356, "grad_norm": 0.11178320646286011, "learning_rate": 7.649049105875674e-05, "loss": 0.013533645868301391, "step": 82860 }, { "epoch": 23.52256599489072, "grad_norm": 0.16800419986248016, "learning_rate": 7.648765256883338e-05, "loss": 0.010853425413370133, "step": 82870 }, { "epoch": 23.52540448481408, "grad_norm": 1.2921358346939087, "learning_rate": 7.648481407891003e-05, "loss": 0.009971789270639419, "step": 82880 }, { "epoch": 23.528242974737438, "grad_norm": 2.187589168548584, "learning_rate": 7.648197558898666e-05, "loss": 0.00893573760986328, "step": 82890 }, { "epoch": 23.5310814646608, "grad_norm": 6.661747932434082, "learning_rate": 7.64791370990633e-05, "loss": 0.010074706375598907, "step": 82900 }, { "epoch": 23.53391995458416, "grad_norm": 0.06168321892619133, "learning_rate": 7.647629860913994e-05, "loss": 0.025014305114746095, "step": 82910 }, { "epoch": 23.536758444507523, "grad_norm": 0.78514564037323, "learning_rate": 7.647346011921658e-05, "loss": 0.007557832449674606, "step": 82920 }, { "epoch": 23.539596934430882, "grad_norm": 0.6691725850105286, "learning_rate": 7.647062162929321e-05, "loss": 0.021049922704696654, "step": 82930 }, { "epoch": 23.542435424354245, "grad_norm": 10.922364234924316, "learning_rate": 7.646778313936987e-05, "loss": 0.010700206458568572, "step": 82940 }, { "epoch": 23.545273914277605, "grad_norm": 0.07801969349384308, "learning_rate": 7.646494464944651e-05, "loss": 0.01139642596244812, "step": 82950 }, { "epoch": 23.548112404200964, "grad_norm": 0.13847509026527405, "learning_rate": 7.646210615952314e-05, "loss": 0.004735830798745155, "step": 82960 }, { "epoch": 23.550950894124327, "grad_norm": 9.68779182434082, "learning_rate": 7.645926766959978e-05, "loss": 0.01046949028968811, "step": 82970 }, { "epoch": 23.553789384047686, "grad_norm": 8.555501937866211, "learning_rate": 7.645642917967642e-05, "loss": 0.010984577238559723, "step": 82980 }, { "epoch": 23.55662787397105, "grad_norm": 0.08500794321298599, "learning_rate": 7.645359068975305e-05, "loss": 0.01838345229625702, "step": 82990 }, { "epoch": 23.55946636389441, "grad_norm": 0.24740268290042877, "learning_rate": 7.645075219982969e-05, "loss": 0.016810990869998932, "step": 83000 }, { "epoch": 23.55946636389441, "eval_accuracy": 0.9768550899726585, "eval_loss": 0.08165000379085541, "eval_runtime": 34.8811, "eval_samples_per_second": 450.874, "eval_steps_per_second": 7.053, "step": 83000 }, { "epoch": 23.562304853817768, "grad_norm": 0.053957499563694, "learning_rate": 7.644791370990634e-05, "loss": 0.0016903962939977645, "step": 83010 }, { "epoch": 23.56514334374113, "grad_norm": 1.4073177576065063, "learning_rate": 7.644507521998297e-05, "loss": 0.008111879229545593, "step": 83020 }, { "epoch": 23.56798183366449, "grad_norm": 2.554368495941162, "learning_rate": 7.644223673005961e-05, "loss": 0.009932426363229751, "step": 83030 }, { "epoch": 23.570820323587853, "grad_norm": 3.4561588764190674, "learning_rate": 7.643939824013625e-05, "loss": 0.023860082030296326, "step": 83040 }, { "epoch": 23.573658813511212, "grad_norm": 0.27410221099853516, "learning_rate": 7.64365597502129e-05, "loss": 0.02107025533914566, "step": 83050 }, { "epoch": 23.57649730343457, "grad_norm": 7.200558662414551, "learning_rate": 7.643372126028952e-05, "loss": 0.030504381656646727, "step": 83060 }, { "epoch": 23.579335793357934, "grad_norm": 2.58229923248291, "learning_rate": 7.643088277036616e-05, "loss": 0.00971253588795662, "step": 83070 }, { "epoch": 23.582174283281294, "grad_norm": 9.228938102722168, "learning_rate": 7.642804428044282e-05, "loss": 0.007369693368673325, "step": 83080 }, { "epoch": 23.585012773204657, "grad_norm": 0.3336181044578552, "learning_rate": 7.642520579051945e-05, "loss": 0.008686105161905289, "step": 83090 }, { "epoch": 23.587851263128016, "grad_norm": 6.909820556640625, "learning_rate": 7.642236730059609e-05, "loss": 0.0107952319085598, "step": 83100 }, { "epoch": 23.590689753051375, "grad_norm": 0.796527087688446, "learning_rate": 7.641952881067273e-05, "loss": 0.01300070881843567, "step": 83110 }, { "epoch": 23.593528242974738, "grad_norm": 1.4561911821365356, "learning_rate": 7.641669032074936e-05, "loss": 0.004566886648535729, "step": 83120 }, { "epoch": 23.596366732898097, "grad_norm": 0.4071297347545624, "learning_rate": 7.6413851830826e-05, "loss": 0.014137823879718781, "step": 83130 }, { "epoch": 23.59920522282146, "grad_norm": 0.9633180499076843, "learning_rate": 7.641101334090265e-05, "loss": 0.01902862638235092, "step": 83140 }, { "epoch": 23.60204371274482, "grad_norm": 6.373671054840088, "learning_rate": 7.640817485097928e-05, "loss": 0.009961824119091033, "step": 83150 }, { "epoch": 23.60488220266818, "grad_norm": 0.42503249645233154, "learning_rate": 7.640533636105592e-05, "loss": 0.003250456973910332, "step": 83160 }, { "epoch": 23.607720692591542, "grad_norm": 0.7217661738395691, "learning_rate": 7.640249787113256e-05, "loss": 0.005257481336593628, "step": 83170 }, { "epoch": 23.6105591825149, "grad_norm": 0.8448941111564636, "learning_rate": 7.63996593812092e-05, "loss": 0.0030246954411268234, "step": 83180 }, { "epoch": 23.613397672438264, "grad_norm": 9.558594703674316, "learning_rate": 7.639682089128583e-05, "loss": 0.008547596633434296, "step": 83190 }, { "epoch": 23.616236162361623, "grad_norm": 0.12210258841514587, "learning_rate": 7.639398240136247e-05, "loss": 0.0037641644477844237, "step": 83200 }, { "epoch": 23.619074652284983, "grad_norm": 1.536490559577942, "learning_rate": 7.639114391143913e-05, "loss": 0.012031871080398559, "step": 83210 }, { "epoch": 23.621913142208346, "grad_norm": 0.03750494867563248, "learning_rate": 7.638830542151576e-05, "loss": 0.0032878100872039797, "step": 83220 }, { "epoch": 23.624751632131705, "grad_norm": 0.21471598744392395, "learning_rate": 7.63854669315924e-05, "loss": 0.010217183828353881, "step": 83230 }, { "epoch": 23.627590122055068, "grad_norm": 2.8514935970306396, "learning_rate": 7.638262844166904e-05, "loss": 0.011469648778438568, "step": 83240 }, { "epoch": 23.630428611978427, "grad_norm": 1.5993677377700806, "learning_rate": 7.637978995174567e-05, "loss": 0.004862155765295029, "step": 83250 }, { "epoch": 23.633267101901787, "grad_norm": 0.3846570551395416, "learning_rate": 7.637695146182231e-05, "loss": 0.0031325094401836394, "step": 83260 }, { "epoch": 23.63610559182515, "grad_norm": 0.36531928181648254, "learning_rate": 7.637411297189895e-05, "loss": 0.0025025106966495513, "step": 83270 }, { "epoch": 23.63894408174851, "grad_norm": 2.6369359493255615, "learning_rate": 7.637127448197559e-05, "loss": 0.011030103266239166, "step": 83280 }, { "epoch": 23.64178257167187, "grad_norm": 0.8070433139801025, "learning_rate": 7.636843599205223e-05, "loss": 0.013387694954872131, "step": 83290 }, { "epoch": 23.64462106159523, "grad_norm": 3.266462802886963, "learning_rate": 7.636559750212887e-05, "loss": 0.009889991581439972, "step": 83300 }, { "epoch": 23.647459551518594, "grad_norm": 0.6590173840522766, "learning_rate": 7.636275901220552e-05, "loss": 0.007937929034233094, "step": 83310 }, { "epoch": 23.650298041441953, "grad_norm": 5.0903167724609375, "learning_rate": 7.635992052228214e-05, "loss": 0.008365467190742493, "step": 83320 }, { "epoch": 23.653136531365313, "grad_norm": 0.9725505709648132, "learning_rate": 7.635708203235878e-05, "loss": 0.022783493995666503, "step": 83330 }, { "epoch": 23.655975021288675, "grad_norm": 3.3030192852020264, "learning_rate": 7.635424354243543e-05, "loss": 0.01601141095161438, "step": 83340 }, { "epoch": 23.658813511212035, "grad_norm": 10.252058982849121, "learning_rate": 7.635140505251207e-05, "loss": 0.037320280075073244, "step": 83350 }, { "epoch": 23.661652001135398, "grad_norm": 1.5416988134384155, "learning_rate": 7.634856656258871e-05, "loss": 0.007413164526224136, "step": 83360 }, { "epoch": 23.664490491058757, "grad_norm": 3.9745428562164307, "learning_rate": 7.634572807266535e-05, "loss": 0.012902036309242249, "step": 83370 }, { "epoch": 23.667328980982116, "grad_norm": 7.774428844451904, "learning_rate": 7.634288958274198e-05, "loss": 0.011671137064695358, "step": 83380 }, { "epoch": 23.67016747090548, "grad_norm": 0.6638226509094238, "learning_rate": 7.634005109281862e-05, "loss": 0.0035319037735462187, "step": 83390 }, { "epoch": 23.67300596082884, "grad_norm": 0.07780949771404266, "learning_rate": 7.633721260289526e-05, "loss": 0.016288092732429503, "step": 83400 }, { "epoch": 23.6758444507522, "grad_norm": 14.102219581604004, "learning_rate": 7.63343741129719e-05, "loss": 0.010649022459983826, "step": 83410 }, { "epoch": 23.67868294067556, "grad_norm": 0.3644264042377472, "learning_rate": 7.633153562304854e-05, "loss": 0.008539967238903046, "step": 83420 }, { "epoch": 23.68152143059892, "grad_norm": 0.826295793056488, "learning_rate": 7.632869713312519e-05, "loss": 0.014359790086746215, "step": 83430 }, { "epoch": 23.684359920522283, "grad_norm": 9.932479858398438, "learning_rate": 7.632585864320183e-05, "loss": 0.0049505744129419325, "step": 83440 }, { "epoch": 23.687198410445642, "grad_norm": 0.2574816346168518, "learning_rate": 7.632302015327845e-05, "loss": 0.0014718355610966682, "step": 83450 }, { "epoch": 23.690036900369005, "grad_norm": 8.949747085571289, "learning_rate": 7.63201816633551e-05, "loss": 0.011672496795654297, "step": 83460 }, { "epoch": 23.692875390292365, "grad_norm": 0.15590237081050873, "learning_rate": 7.631734317343174e-05, "loss": 0.005810753256082535, "step": 83470 }, { "epoch": 23.695713880215724, "grad_norm": 9.437899589538574, "learning_rate": 7.631450468350838e-05, "loss": 0.004453033208847046, "step": 83480 }, { "epoch": 23.698552370139087, "grad_norm": 0.22151565551757812, "learning_rate": 7.631166619358502e-05, "loss": 0.00893886610865593, "step": 83490 }, { "epoch": 23.701390860062446, "grad_norm": 0.7607523202896118, "learning_rate": 7.630882770366166e-05, "loss": 0.005906077846884727, "step": 83500 }, { "epoch": 23.701390860062446, "eval_accuracy": 0.9781903732434667, "eval_loss": 0.07955409586429596, "eval_runtime": 34.043, "eval_samples_per_second": 461.975, "eval_steps_per_second": 7.226, "step": 83500 }, { "epoch": 23.70422934998581, "grad_norm": 1.7085850238800049, "learning_rate": 7.630598921373829e-05, "loss": 0.005171524733304978, "step": 83510 }, { "epoch": 23.70706783990917, "grad_norm": 0.7668812870979309, "learning_rate": 7.630315072381493e-05, "loss": 0.004255309700965881, "step": 83520 }, { "epoch": 23.709906329832528, "grad_norm": 1.0505894422531128, "learning_rate": 7.630031223389157e-05, "loss": 0.009959181398153305, "step": 83530 }, { "epoch": 23.71274481975589, "grad_norm": 0.1731732189655304, "learning_rate": 7.629747374396821e-05, "loss": 0.01182742565870285, "step": 83540 }, { "epoch": 23.71558330967925, "grad_norm": 0.27348780632019043, "learning_rate": 7.629463525404485e-05, "loss": 0.01326952874660492, "step": 83550 }, { "epoch": 23.718421799602613, "grad_norm": 0.08113700151443481, "learning_rate": 7.62917967641215e-05, "loss": 0.011057189106941223, "step": 83560 }, { "epoch": 23.721260289525972, "grad_norm": 0.030299322679638863, "learning_rate": 7.628895827419812e-05, "loss": 0.001291564479470253, "step": 83570 }, { "epoch": 23.72409877944933, "grad_norm": 0.8118656277656555, "learning_rate": 7.628611978427477e-05, "loss": 0.0018354734405875207, "step": 83580 }, { "epoch": 23.726937269372694, "grad_norm": 1.9243136644363403, "learning_rate": 7.62832812943514e-05, "loss": 0.00728772059082985, "step": 83590 }, { "epoch": 23.729775759296054, "grad_norm": 1.3715013265609741, "learning_rate": 7.628044280442805e-05, "loss": 0.0029618801549077035, "step": 83600 }, { "epoch": 23.732614249219417, "grad_norm": 0.7682806253433228, "learning_rate": 7.627760431450469e-05, "loss": 0.013978187739849091, "step": 83610 }, { "epoch": 23.735452739142776, "grad_norm": 1.1702017784118652, "learning_rate": 7.627476582458133e-05, "loss": 0.013321058452129364, "step": 83620 }, { "epoch": 23.738291229066135, "grad_norm": 7.553680419921875, "learning_rate": 7.627192733465797e-05, "loss": 0.01252075433731079, "step": 83630 }, { "epoch": 23.741129718989498, "grad_norm": 1.7667757272720337, "learning_rate": 7.62690888447346e-05, "loss": 0.011162129789590835, "step": 83640 }, { "epoch": 23.743968208912857, "grad_norm": 0.4200684726238251, "learning_rate": 7.626625035481124e-05, "loss": 0.013753196597099305, "step": 83650 }, { "epoch": 23.74680669883622, "grad_norm": 2.354255437850952, "learning_rate": 7.626341186488788e-05, "loss": 0.007948704063892365, "step": 83660 }, { "epoch": 23.74964518875958, "grad_norm": 11.634864807128906, "learning_rate": 7.626057337496452e-05, "loss": 0.011926925927400588, "step": 83670 }, { "epoch": 23.75248367868294, "grad_norm": 1.2079297304153442, "learning_rate": 7.625773488504117e-05, "loss": 0.006289815157651901, "step": 83680 }, { "epoch": 23.755322168606302, "grad_norm": 1.5779659748077393, "learning_rate": 7.62548963951178e-05, "loss": 0.016148561239242555, "step": 83690 }, { "epoch": 23.75816065852966, "grad_norm": 0.20897361636161804, "learning_rate": 7.625205790519443e-05, "loss": 0.019407950341701508, "step": 83700 }, { "epoch": 23.760999148453024, "grad_norm": 0.1493149697780609, "learning_rate": 7.624921941527108e-05, "loss": 0.0035867162048816683, "step": 83710 }, { "epoch": 23.763837638376383, "grad_norm": 0.20904211699962616, "learning_rate": 7.624638092534772e-05, "loss": 0.00559118390083313, "step": 83720 }, { "epoch": 23.766676128299743, "grad_norm": 6.493514060974121, "learning_rate": 7.624354243542436e-05, "loss": 0.00998116359114647, "step": 83730 }, { "epoch": 23.769514618223106, "grad_norm": 0.02400655671954155, "learning_rate": 7.6240703945501e-05, "loss": 0.0033200442790985107, "step": 83740 }, { "epoch": 23.772353108146465, "grad_norm": 8.261314392089844, "learning_rate": 7.623786545557764e-05, "loss": 0.017843395471572876, "step": 83750 }, { "epoch": 23.775191598069828, "grad_norm": 0.10652616620063782, "learning_rate": 7.623502696565428e-05, "loss": 0.025509646534919737, "step": 83760 }, { "epoch": 23.778030087993187, "grad_norm": 3.5438313484191895, "learning_rate": 7.623218847573091e-05, "loss": 0.006164643168449402, "step": 83770 }, { "epoch": 23.78086857791655, "grad_norm": 0.025201736018061638, "learning_rate": 7.622934998580755e-05, "loss": 0.0033208608627319338, "step": 83780 }, { "epoch": 23.78370706783991, "grad_norm": 2.9440720081329346, "learning_rate": 7.62265114958842e-05, "loss": 0.005298439413309097, "step": 83790 }, { "epoch": 23.78654555776327, "grad_norm": 0.9568774104118347, "learning_rate": 7.622367300596082e-05, "loss": 0.0013764740899205208, "step": 83800 }, { "epoch": 23.78938404768663, "grad_norm": 2.4807677268981934, "learning_rate": 7.622083451603748e-05, "loss": 0.005151051282882691, "step": 83810 }, { "epoch": 23.79222253760999, "grad_norm": 6.041550636291504, "learning_rate": 7.621799602611412e-05, "loss": 0.02173142284154892, "step": 83820 }, { "epoch": 23.795061027533354, "grad_norm": 0.2436843365430832, "learning_rate": 7.621515753619075e-05, "loss": 0.0031268827617168425, "step": 83830 }, { "epoch": 23.797899517456713, "grad_norm": 0.6928277611732483, "learning_rate": 7.621231904626739e-05, "loss": 0.018086090683937073, "step": 83840 }, { "epoch": 23.800738007380073, "grad_norm": 0.04800180345773697, "learning_rate": 7.620948055634403e-05, "loss": 0.014593857526779174, "step": 83850 }, { "epoch": 23.803576497303435, "grad_norm": 0.38905850052833557, "learning_rate": 7.620664206642067e-05, "loss": 0.005582575500011444, "step": 83860 }, { "epoch": 23.806414987226795, "grad_norm": 12.609797477722168, "learning_rate": 7.620380357649731e-05, "loss": 0.01731165647506714, "step": 83870 }, { "epoch": 23.809253477150158, "grad_norm": 0.691362738609314, "learning_rate": 7.620096508657395e-05, "loss": 0.005572275817394256, "step": 83880 }, { "epoch": 23.812091967073517, "grad_norm": 3.7794885635375977, "learning_rate": 7.61981265966506e-05, "loss": 0.007864025980234146, "step": 83890 }, { "epoch": 23.814930456996876, "grad_norm": 2.160618305206299, "learning_rate": 7.619528810672722e-05, "loss": 0.0055538065731525425, "step": 83900 }, { "epoch": 23.81776894692024, "grad_norm": 2.2357919216156006, "learning_rate": 7.619244961680386e-05, "loss": 0.004322709143161773, "step": 83910 }, { "epoch": 23.8206074368436, "grad_norm": 11.350666046142578, "learning_rate": 7.61896111268805e-05, "loss": 0.007168896496295929, "step": 83920 }, { "epoch": 23.82344592676696, "grad_norm": 0.3394611179828644, "learning_rate": 7.618677263695713e-05, "loss": 0.019289126992225646, "step": 83930 }, { "epoch": 23.82628441669032, "grad_norm": 10.226175308227539, "learning_rate": 7.618393414703379e-05, "loss": 0.021676382422447203, "step": 83940 }, { "epoch": 23.82912290661368, "grad_norm": 0.3027615249156952, "learning_rate": 7.618109565711043e-05, "loss": 0.008240342140197754, "step": 83950 }, { "epoch": 23.831961396537043, "grad_norm": 0.17308437824249268, "learning_rate": 7.617825716718706e-05, "loss": 0.004812006279826165, "step": 83960 }, { "epoch": 23.834799886460402, "grad_norm": 1.9846278429031372, "learning_rate": 7.61754186772637e-05, "loss": 0.020823952555656434, "step": 83970 }, { "epoch": 23.837638376383765, "grad_norm": 4.379695415496826, "learning_rate": 7.617258018734034e-05, "loss": 0.01543356329202652, "step": 83980 }, { "epoch": 23.840476866307124, "grad_norm": 12.622753143310547, "learning_rate": 7.616974169741698e-05, "loss": 0.019014051556587218, "step": 83990 }, { "epoch": 23.843315356230484, "grad_norm": 2.3405168056488037, "learning_rate": 7.616690320749361e-05, "loss": 0.020064570009708405, "step": 84000 }, { "epoch": 23.843315356230484, "eval_accuracy": 0.9744388631016723, "eval_loss": 0.09727957844734192, "eval_runtime": 34.8087, "eval_samples_per_second": 451.812, "eval_steps_per_second": 7.067, "step": 84000 }, { "epoch": 23.846153846153847, "grad_norm": 12.733762741088867, "learning_rate": 7.616406471757026e-05, "loss": 0.026537388563156128, "step": 84010 }, { "epoch": 23.848992336077206, "grad_norm": 1.274953842163086, "learning_rate": 7.61612262276469e-05, "loss": 0.018619488179683685, "step": 84020 }, { "epoch": 23.85183082600057, "grad_norm": 3.816718578338623, "learning_rate": 7.615838773772353e-05, "loss": 0.007619549334049225, "step": 84030 }, { "epoch": 23.85466931592393, "grad_norm": 2.0278425216674805, "learning_rate": 7.615554924780017e-05, "loss": 0.015104363858699798, "step": 84040 }, { "epoch": 23.857507805847288, "grad_norm": 6.1755452156066895, "learning_rate": 7.615271075787681e-05, "loss": 0.011066446453332901, "step": 84050 }, { "epoch": 23.86034629577065, "grad_norm": 9.330097198486328, "learning_rate": 7.614987226795344e-05, "loss": 0.011031214892864228, "step": 84060 }, { "epoch": 23.86318478569401, "grad_norm": 0.09433527290821075, "learning_rate": 7.61470337780301e-05, "loss": 0.013332599401473999, "step": 84070 }, { "epoch": 23.866023275617373, "grad_norm": 0.2199907898902893, "learning_rate": 7.614419528810674e-05, "loss": 0.010739105194807053, "step": 84080 }, { "epoch": 23.868861765540732, "grad_norm": 6.053071022033691, "learning_rate": 7.614135679818337e-05, "loss": 0.012464222311973572, "step": 84090 }, { "epoch": 23.87170025546409, "grad_norm": 3.1437833309173584, "learning_rate": 7.613851830826001e-05, "loss": 0.0044216252863407135, "step": 84100 }, { "epoch": 23.874538745387454, "grad_norm": 0.1346217542886734, "learning_rate": 7.613567981833665e-05, "loss": 0.011340545117855072, "step": 84110 }, { "epoch": 23.877377235310814, "grad_norm": 0.7074870467185974, "learning_rate": 7.613284132841329e-05, "loss": 0.0014205142855644227, "step": 84120 }, { "epoch": 23.880215725234176, "grad_norm": 0.42718154191970825, "learning_rate": 7.613000283848992e-05, "loss": 0.007598124444484711, "step": 84130 }, { "epoch": 23.883054215157536, "grad_norm": 7.125083923339844, "learning_rate": 7.612716434856657e-05, "loss": 0.0026372669264674185, "step": 84140 }, { "epoch": 23.8858927050809, "grad_norm": 0.1026073470711708, "learning_rate": 7.612432585864321e-05, "loss": 0.005088038742542267, "step": 84150 }, { "epoch": 23.888731195004258, "grad_norm": 0.08469197899103165, "learning_rate": 7.612148736871984e-05, "loss": 0.003656920790672302, "step": 84160 }, { "epoch": 23.891569684927617, "grad_norm": 1.538608193397522, "learning_rate": 7.611864887879648e-05, "loss": 0.0036801792681217193, "step": 84170 }, { "epoch": 23.89440817485098, "grad_norm": 1.9428929090499878, "learning_rate": 7.611581038887313e-05, "loss": 0.004346510767936707, "step": 84180 }, { "epoch": 23.89724666477434, "grad_norm": 0.5948730707168579, "learning_rate": 7.611297189894975e-05, "loss": 0.008678402751684189, "step": 84190 }, { "epoch": 23.900085154697702, "grad_norm": 0.46552443504333496, "learning_rate": 7.61101334090264e-05, "loss": 0.007814085483551026, "step": 84200 }, { "epoch": 23.902923644621062, "grad_norm": 0.5122572779655457, "learning_rate": 7.610729491910305e-05, "loss": 0.007247558236122132, "step": 84210 }, { "epoch": 23.90576213454442, "grad_norm": 3.3049354553222656, "learning_rate": 7.610445642917968e-05, "loss": 0.002832024544477463, "step": 84220 }, { "epoch": 23.908600624467784, "grad_norm": 0.14071516692638397, "learning_rate": 7.610161793925632e-05, "loss": 0.0014033103361725807, "step": 84230 }, { "epoch": 23.911439114391143, "grad_norm": 0.9384325742721558, "learning_rate": 7.609877944933296e-05, "loss": 0.0035245470702648164, "step": 84240 }, { "epoch": 23.914277604314506, "grad_norm": 0.033501036465168, "learning_rate": 7.60959409594096e-05, "loss": 0.005856392160058022, "step": 84250 }, { "epoch": 23.917116094237866, "grad_norm": 0.1453283578157425, "learning_rate": 7.609310246948623e-05, "loss": 0.020835040509700774, "step": 84260 }, { "epoch": 23.919954584161225, "grad_norm": 10.089277267456055, "learning_rate": 7.609026397956288e-05, "loss": 0.018767300248146056, "step": 84270 }, { "epoch": 23.922793074084588, "grad_norm": 0.6052333116531372, "learning_rate": 7.608742548963951e-05, "loss": 0.02574348747730255, "step": 84280 }, { "epoch": 23.925631564007947, "grad_norm": 0.1765177845954895, "learning_rate": 7.608458699971615e-05, "loss": 0.01788782775402069, "step": 84290 }, { "epoch": 23.92847005393131, "grad_norm": 6.70344352722168, "learning_rate": 7.60817485097928e-05, "loss": 0.010568921267986298, "step": 84300 }, { "epoch": 23.93130854385467, "grad_norm": 2.1327459812164307, "learning_rate": 7.607891001986944e-05, "loss": 0.013131430745124817, "step": 84310 }, { "epoch": 23.93414703377803, "grad_norm": 0.6922464966773987, "learning_rate": 7.607607152994606e-05, "loss": 0.00819971114397049, "step": 84320 }, { "epoch": 23.93698552370139, "grad_norm": 4.479637145996094, "learning_rate": 7.60732330400227e-05, "loss": 0.005847763270139694, "step": 84330 }, { "epoch": 23.93982401362475, "grad_norm": 1.606452465057373, "learning_rate": 7.607039455009936e-05, "loss": 0.01508607417345047, "step": 84340 }, { "epoch": 23.942662503548114, "grad_norm": 0.8503910899162292, "learning_rate": 7.606755606017599e-05, "loss": 0.014186304807662965, "step": 84350 }, { "epoch": 23.945500993471473, "grad_norm": 4.417670726776123, "learning_rate": 7.606471757025263e-05, "loss": 0.014810888469219208, "step": 84360 }, { "epoch": 23.948339483394832, "grad_norm": 3.083169937133789, "learning_rate": 7.606187908032927e-05, "loss": 0.011068567633628845, "step": 84370 }, { "epoch": 23.951177973318195, "grad_norm": 0.10994665324687958, "learning_rate": 7.60590405904059e-05, "loss": 0.005345769971609115, "step": 84380 }, { "epoch": 23.954016463241555, "grad_norm": 0.7388126254081726, "learning_rate": 7.605620210048254e-05, "loss": 0.004741813987493515, "step": 84390 }, { "epoch": 23.956854953164918, "grad_norm": 0.03155841305851936, "learning_rate": 7.605336361055918e-05, "loss": 0.007068003714084625, "step": 84400 }, { "epoch": 23.959693443088277, "grad_norm": 1.2062712907791138, "learning_rate": 7.605052512063582e-05, "loss": 0.0031297624111175535, "step": 84410 }, { "epoch": 23.962531933011636, "grad_norm": 0.1365397870540619, "learning_rate": 7.604768663071246e-05, "loss": 0.0064655192196369175, "step": 84420 }, { "epoch": 23.965370422935, "grad_norm": 1.7727723121643066, "learning_rate": 7.60448481407891e-05, "loss": 0.017142876982688904, "step": 84430 }, { "epoch": 23.96820891285836, "grad_norm": 0.052268996834754944, "learning_rate": 7.604200965086575e-05, "loss": 0.004548939689993858, "step": 84440 }, { "epoch": 23.97104740278172, "grad_norm": 4.247425556182861, "learning_rate": 7.603917116094237e-05, "loss": 0.0045635908842086795, "step": 84450 }, { "epoch": 23.97388589270508, "grad_norm": 1.7568303346633911, "learning_rate": 7.603633267101902e-05, "loss": 0.0023510776460170745, "step": 84460 }, { "epoch": 23.97672438262844, "grad_norm": 0.34775930643081665, "learning_rate": 7.603349418109567e-05, "loss": 0.004971436038613319, "step": 84470 }, { "epoch": 23.979562872551803, "grad_norm": 12.677882194519043, "learning_rate": 7.60306556911723e-05, "loss": 0.016394373774528504, "step": 84480 }, { "epoch": 23.982401362475162, "grad_norm": 0.2139485478401184, "learning_rate": 7.602781720124894e-05, "loss": 0.010329995304346085, "step": 84490 }, { "epoch": 23.985239852398525, "grad_norm": 0.027204792946577072, "learning_rate": 7.602497871132558e-05, "loss": 0.005082706734538078, "step": 84500 }, { "epoch": 23.985239852398525, "eval_accuracy": 0.9764735804667133, "eval_loss": 0.09108339995145798, "eval_runtime": 33.657, "eval_samples_per_second": 467.272, "eval_steps_per_second": 7.309, "step": 84500 }, { "epoch": 23.988078342321884, "grad_norm": 4.553252220153809, "learning_rate": 7.602214022140221e-05, "loss": 0.0075197979807853695, "step": 84510 }, { "epoch": 23.990916832245247, "grad_norm": 0.2624935805797577, "learning_rate": 7.601930173147885e-05, "loss": 0.018244516849517823, "step": 84520 }, { "epoch": 23.993755322168607, "grad_norm": 2.0142314434051514, "learning_rate": 7.601646324155549e-05, "loss": 0.02420286238193512, "step": 84530 }, { "epoch": 23.996593812091966, "grad_norm": 1.2045501470565796, "learning_rate": 7.601362475163213e-05, "loss": 0.007133318483829499, "step": 84540 }, { "epoch": 23.99943230201533, "grad_norm": 1.3756266832351685, "learning_rate": 7.601078626170878e-05, "loss": 0.009131786972284317, "step": 84550 }, { "epoch": 24.002270791938688, "grad_norm": 0.36336299777030945, "learning_rate": 7.600794777178542e-05, "loss": 0.003840317949652672, "step": 84560 }, { "epoch": 24.00510928186205, "grad_norm": 1.087823510169983, "learning_rate": 7.600510928186206e-05, "loss": 0.016184772551059722, "step": 84570 }, { "epoch": 24.00794777178541, "grad_norm": 0.022056689485907555, "learning_rate": 7.600227079193869e-05, "loss": 0.007910224050283432, "step": 84580 }, { "epoch": 24.01078626170877, "grad_norm": 0.04916263744235039, "learning_rate": 7.599943230201533e-05, "loss": 0.0033433571457862853, "step": 84590 }, { "epoch": 24.013624751632133, "grad_norm": 0.04034461826086044, "learning_rate": 7.599659381209198e-05, "loss": 0.0026331596076488496, "step": 84600 }, { "epoch": 24.016463241555492, "grad_norm": 4.139782428741455, "learning_rate": 7.599375532216861e-05, "loss": 0.0026933494955301285, "step": 84610 }, { "epoch": 24.019301731478855, "grad_norm": 0.25521737337112427, "learning_rate": 7.599091683224525e-05, "loss": 0.0017058271914720535, "step": 84620 }, { "epoch": 24.022140221402214, "grad_norm": 5.989036560058594, "learning_rate": 7.598807834232189e-05, "loss": 0.01571953296661377, "step": 84630 }, { "epoch": 24.024978711325573, "grad_norm": 0.05633162334561348, "learning_rate": 7.598523985239852e-05, "loss": 0.005261561274528504, "step": 84640 }, { "epoch": 24.027817201248936, "grad_norm": 0.8631510138511658, "learning_rate": 7.598240136247516e-05, "loss": 0.01660420298576355, "step": 84650 }, { "epoch": 24.030655691172296, "grad_norm": 0.16817794740200043, "learning_rate": 7.59795628725518e-05, "loss": 0.008578766882419587, "step": 84660 }, { "epoch": 24.03349418109566, "grad_norm": 0.21310651302337646, "learning_rate": 7.597672438262844e-05, "loss": 0.018255580961704255, "step": 84670 }, { "epoch": 24.036332671019018, "grad_norm": 10.179167747497559, "learning_rate": 7.597388589270509e-05, "loss": 0.008689191937446595, "step": 84680 }, { "epoch": 24.039171160942377, "grad_norm": 1.3997291326522827, "learning_rate": 7.597104740278173e-05, "loss": 0.010524090379476547, "step": 84690 }, { "epoch": 24.04200965086574, "grad_norm": 0.5248434543609619, "learning_rate": 7.596820891285837e-05, "loss": 0.02033347487449646, "step": 84700 }, { "epoch": 24.0448481407891, "grad_norm": 2.8737733364105225, "learning_rate": 7.5965370422935e-05, "loss": 0.005329130589962006, "step": 84710 }, { "epoch": 24.047686630712462, "grad_norm": 0.08467231690883636, "learning_rate": 7.596281578200398e-05, "loss": 0.016791512072086335, "step": 84720 }, { "epoch": 24.05052512063582, "grad_norm": 0.28643226623535156, "learning_rate": 7.595997729208062e-05, "loss": 0.011879729479551316, "step": 84730 }, { "epoch": 24.05336361055918, "grad_norm": 0.1374460607767105, "learning_rate": 7.595713880215726e-05, "loss": 0.012994198501110077, "step": 84740 }, { "epoch": 24.056202100482544, "grad_norm": 0.09951157867908478, "learning_rate": 7.59543003122339e-05, "loss": 0.001163851097226143, "step": 84750 }, { "epoch": 24.059040590405903, "grad_norm": 7.152926921844482, "learning_rate": 7.595146182231053e-05, "loss": 0.0026121634989976885, "step": 84760 }, { "epoch": 24.061879080329266, "grad_norm": 0.05738834664225578, "learning_rate": 7.594862333238717e-05, "loss": 0.0033878441900014877, "step": 84770 }, { "epoch": 24.064717570252625, "grad_norm": 0.9204378724098206, "learning_rate": 7.594578484246381e-05, "loss": 0.007579003274440765, "step": 84780 }, { "epoch": 24.067556060175985, "grad_norm": 0.6094701290130615, "learning_rate": 7.594294635254045e-05, "loss": 0.0056509733200073246, "step": 84790 }, { "epoch": 24.070394550099348, "grad_norm": 0.7759151458740234, "learning_rate": 7.59401078626171e-05, "loss": 0.0066897556185722355, "step": 84800 }, { "epoch": 24.073233040022707, "grad_norm": 9.091606140136719, "learning_rate": 7.593726937269374e-05, "loss": 0.009775279462337494, "step": 84810 }, { "epoch": 24.07607152994607, "grad_norm": 0.8321952819824219, "learning_rate": 7.593443088277036e-05, "loss": 0.011336057633161544, "step": 84820 }, { "epoch": 24.07891001986943, "grad_norm": 0.5862356424331665, "learning_rate": 7.5931592392847e-05, "loss": 0.014900332689285279, "step": 84830 }, { "epoch": 24.08174850979279, "grad_norm": 0.19016321003437042, "learning_rate": 7.592875390292365e-05, "loss": 0.00792021006345749, "step": 84840 }, { "epoch": 24.08458699971615, "grad_norm": 2.305518865585327, "learning_rate": 7.592591541300029e-05, "loss": 0.00419909656047821, "step": 84850 }, { "epoch": 24.08742548963951, "grad_norm": 12.184083938598633, "learning_rate": 7.592307692307693e-05, "loss": 0.007298870384693146, "step": 84860 }, { "epoch": 24.090263979562874, "grad_norm": 0.1454903930425644, "learning_rate": 7.592023843315357e-05, "loss": 0.007690966874361038, "step": 84870 }, { "epoch": 24.093102469486233, "grad_norm": 0.2704269587993622, "learning_rate": 7.591739994323021e-05, "loss": 0.002466236613690853, "step": 84880 }, { "epoch": 24.095940959409592, "grad_norm": 3.637474536895752, "learning_rate": 7.591456145330684e-05, "loss": 0.008704179525375366, "step": 84890 }, { "epoch": 24.098779449332955, "grad_norm": 1.8340343236923218, "learning_rate": 7.591172296338348e-05, "loss": 0.0026545487344264984, "step": 84900 }, { "epoch": 24.101617939256315, "grad_norm": 0.025200052186846733, "learning_rate": 7.590888447346012e-05, "loss": 0.005726618319749832, "step": 84910 }, { "epoch": 24.104456429179677, "grad_norm": 0.6710706353187561, "learning_rate": 7.590604598353676e-05, "loss": 0.0035374417901039125, "step": 84920 }, { "epoch": 24.107294919103037, "grad_norm": 0.23064793646335602, "learning_rate": 7.59032074936134e-05, "loss": 0.0037888951599597933, "step": 84930 }, { "epoch": 24.1101334090264, "grad_norm": 8.020915985107422, "learning_rate": 7.590036900369005e-05, "loss": 0.014701536297798157, "step": 84940 }, { "epoch": 24.11297189894976, "grad_norm": 1.3453774452209473, "learning_rate": 7.589753051376667e-05, "loss": 0.014889298379421234, "step": 84950 }, { "epoch": 24.11581038887312, "grad_norm": 1.3543195724487305, "learning_rate": 7.589469202384332e-05, "loss": 0.008187253028154373, "step": 84960 }, { "epoch": 24.11864887879648, "grad_norm": 0.06542831659317017, "learning_rate": 7.589185353391996e-05, "loss": 0.011218450218439101, "step": 84970 }, { "epoch": 24.12148736871984, "grad_norm": 1.0464439392089844, "learning_rate": 7.58890150439966e-05, "loss": 0.012249088287353516, "step": 84980 }, { "epoch": 24.124325858643203, "grad_norm": 0.06092916801571846, "learning_rate": 7.588617655407324e-05, "loss": 0.02008618265390396, "step": 84990 }, { "epoch": 24.127164348566563, "grad_norm": 4.641836643218994, "learning_rate": 7.588333806414988e-05, "loss": 0.015608687698841096, "step": 85000 }, { "epoch": 24.127164348566563, "eval_accuracy": 0.9800979207731926, "eval_loss": 0.07173267751932144, "eval_runtime": 32.7708, "eval_samples_per_second": 479.909, "eval_steps_per_second": 7.507, "step": 85000 }, { "epoch": 24.130002838489922, "grad_norm": 0.671139121055603, "learning_rate": 7.588049957422652e-05, "loss": 0.003621276468038559, "step": 85010 }, { "epoch": 24.132841328413285, "grad_norm": 0.10709131509065628, "learning_rate": 7.587766108430315e-05, "loss": 0.0053010348230600355, "step": 85020 }, { "epoch": 24.135679818336644, "grad_norm": 0.10030747950077057, "learning_rate": 7.587482259437979e-05, "loss": 0.003152785450220108, "step": 85030 }, { "epoch": 24.138518308260007, "grad_norm": 0.2050783485174179, "learning_rate": 7.587198410445643e-05, "loss": 0.013658976554870606, "step": 85040 }, { "epoch": 24.141356798183367, "grad_norm": 0.1518978625535965, "learning_rate": 7.586914561453307e-05, "loss": 0.007435982674360275, "step": 85050 }, { "epoch": 24.144195288106726, "grad_norm": 0.16166546940803528, "learning_rate": 7.586630712460972e-05, "loss": 0.008932608366012573, "step": 85060 }, { "epoch": 24.14703377803009, "grad_norm": 0.05054601654410362, "learning_rate": 7.586346863468636e-05, "loss": 0.009279216080904007, "step": 85070 }, { "epoch": 24.149872267953448, "grad_norm": 0.9135128259658813, "learning_rate": 7.586063014476298e-05, "loss": 0.006755013763904571, "step": 85080 }, { "epoch": 24.15271075787681, "grad_norm": 1.2795021533966064, "learning_rate": 7.585779165483963e-05, "loss": 0.015272343158721923, "step": 85090 }, { "epoch": 24.15554924780017, "grad_norm": 1.7445003986358643, "learning_rate": 7.585495316491627e-05, "loss": 0.004668787494301796, "step": 85100 }, { "epoch": 24.15838773772353, "grad_norm": 0.3349231779575348, "learning_rate": 7.585211467499291e-05, "loss": 0.0016556696966290474, "step": 85110 }, { "epoch": 24.161226227646893, "grad_norm": 0.11324737221002579, "learning_rate": 7.584927618506955e-05, "loss": 0.004476838558912277, "step": 85120 }, { "epoch": 24.164064717570252, "grad_norm": 9.007710456848145, "learning_rate": 7.584643769514619e-05, "loss": 0.020775505900382997, "step": 85130 }, { "epoch": 24.166903207493615, "grad_norm": 0.586499035358429, "learning_rate": 7.584359920522283e-05, "loss": 0.002689219079911709, "step": 85140 }, { "epoch": 24.169741697416974, "grad_norm": 0.3509954810142517, "learning_rate": 7.584076071529946e-05, "loss": 0.002200099267065525, "step": 85150 }, { "epoch": 24.172580187340333, "grad_norm": 0.8887097239494324, "learning_rate": 7.58379222253761e-05, "loss": 0.003335307911038399, "step": 85160 }, { "epoch": 24.175418677263696, "grad_norm": 0.6529326438903809, "learning_rate": 7.583508373545274e-05, "loss": 0.0053554408252239226, "step": 85170 }, { "epoch": 24.178257167187056, "grad_norm": 6.310112476348877, "learning_rate": 7.583224524552938e-05, "loss": 0.00254551712423563, "step": 85180 }, { "epoch": 24.18109565711042, "grad_norm": 0.09739043563604355, "learning_rate": 7.582940675560603e-05, "loss": 0.010995099693536759, "step": 85190 }, { "epoch": 24.183934147033778, "grad_norm": 0.23119981586933136, "learning_rate": 7.582656826568267e-05, "loss": 0.003934941440820694, "step": 85200 }, { "epoch": 24.186772636957137, "grad_norm": 11.46474838256836, "learning_rate": 7.58237297757593e-05, "loss": 0.009228085726499557, "step": 85210 }, { "epoch": 24.1896111268805, "grad_norm": 0.016618777066469193, "learning_rate": 7.582089128583594e-05, "loss": 0.012006250768899917, "step": 85220 }, { "epoch": 24.19244961680386, "grad_norm": 18.067153930664062, "learning_rate": 7.581805279591258e-05, "loss": 0.018495890498161315, "step": 85230 }, { "epoch": 24.195288106727222, "grad_norm": 6.591973304748535, "learning_rate": 7.581521430598922e-05, "loss": 0.010405851155519485, "step": 85240 }, { "epoch": 24.19812659665058, "grad_norm": 2.893824815750122, "learning_rate": 7.581237581606586e-05, "loss": 0.005208656191825867, "step": 85250 }, { "epoch": 24.20096508657394, "grad_norm": 1.2204358577728271, "learning_rate": 7.58095373261425e-05, "loss": 0.003474922478199005, "step": 85260 }, { "epoch": 24.203803576497304, "grad_norm": 0.06728530675172806, "learning_rate": 7.580669883621913e-05, "loss": 0.004909406974911689, "step": 85270 }, { "epoch": 24.206642066420663, "grad_norm": 2.9409306049346924, "learning_rate": 7.580386034629577e-05, "loss": 0.009987357258796691, "step": 85280 }, { "epoch": 24.209480556344026, "grad_norm": 3.2303647994995117, "learning_rate": 7.580102185637241e-05, "loss": 0.006879280507564545, "step": 85290 }, { "epoch": 24.212319046267385, "grad_norm": 10.542433738708496, "learning_rate": 7.579818336644905e-05, "loss": 0.008543367683887481, "step": 85300 }, { "epoch": 24.215157536190745, "grad_norm": 1.1002780199050903, "learning_rate": 7.579534487652568e-05, "loss": 0.015024890005588532, "step": 85310 }, { "epoch": 24.217996026114108, "grad_norm": 1.1891716718673706, "learning_rate": 7.579250638660234e-05, "loss": 0.003810758516192436, "step": 85320 }, { "epoch": 24.220834516037467, "grad_norm": 1.6156916618347168, "learning_rate": 7.578966789667898e-05, "loss": 0.005288823693990708, "step": 85330 }, { "epoch": 24.22367300596083, "grad_norm": 3.6296632289886475, "learning_rate": 7.57868294067556e-05, "loss": 0.009055513143539428, "step": 85340 }, { "epoch": 24.22651149588419, "grad_norm": 0.050322916358709335, "learning_rate": 7.578399091683225e-05, "loss": 0.03489323854446411, "step": 85350 }, { "epoch": 24.229349985807552, "grad_norm": 11.209318161010742, "learning_rate": 7.578115242690889e-05, "loss": 0.01656494438648224, "step": 85360 }, { "epoch": 24.23218847573091, "grad_norm": 6.308953762054443, "learning_rate": 7.577831393698552e-05, "loss": 0.014331993460655213, "step": 85370 }, { "epoch": 24.23502696565427, "grad_norm": 13.148971557617188, "learning_rate": 7.577547544706217e-05, "loss": 0.024738866090774535, "step": 85380 }, { "epoch": 24.237865455577634, "grad_norm": 0.5044548511505127, "learning_rate": 7.577263695713881e-05, "loss": 0.005694955959916115, "step": 85390 }, { "epoch": 24.240703945500993, "grad_norm": 0.08838040381669998, "learning_rate": 7.576979846721544e-05, "loss": 0.002527649514377117, "step": 85400 }, { "epoch": 24.243542435424356, "grad_norm": 0.9529962539672852, "learning_rate": 7.576695997729208e-05, "loss": 0.022084954380989074, "step": 85410 }, { "epoch": 24.246380925347715, "grad_norm": 0.5134181976318359, "learning_rate": 7.576412148736872e-05, "loss": 0.0014967631548643112, "step": 85420 }, { "epoch": 24.249219415271074, "grad_norm": 0.3662240505218506, "learning_rate": 7.576128299744536e-05, "loss": 0.0077068574726581575, "step": 85430 }, { "epoch": 24.252057905194437, "grad_norm": 0.2916456460952759, "learning_rate": 7.575844450752199e-05, "loss": 0.007889066636562348, "step": 85440 }, { "epoch": 24.254896395117797, "grad_norm": 9.80663776397705, "learning_rate": 7.575560601759865e-05, "loss": 0.012558755278587342, "step": 85450 }, { "epoch": 24.25773488504116, "grad_norm": 4.818694591522217, "learning_rate": 7.575276752767529e-05, "loss": 0.003650948405265808, "step": 85460 }, { "epoch": 24.26057337496452, "grad_norm": 0.26533856987953186, "learning_rate": 7.574992903775192e-05, "loss": 0.0023170849308371544, "step": 85470 }, { "epoch": 24.26341186488788, "grad_norm": 0.26028114557266235, "learning_rate": 7.574709054782856e-05, "loss": 0.002117781899869442, "step": 85480 }, { "epoch": 24.26625035481124, "grad_norm": 5.33561897277832, "learning_rate": 7.57442520579052e-05, "loss": 0.0033349622040987014, "step": 85490 }, { "epoch": 24.2690888447346, "grad_norm": 0.8699515461921692, "learning_rate": 7.574141356798183e-05, "loss": 0.0021961716935038566, "step": 85500 }, { "epoch": 24.2690888447346, "eval_accuracy": 0.9734850893368093, "eval_loss": 0.09723507612943649, "eval_runtime": 32.9066, "eval_samples_per_second": 477.928, "eval_steps_per_second": 7.476, "step": 85500 }, { "epoch": 24.271927334657963, "grad_norm": 3.694206714630127, "learning_rate": 7.573857507805847e-05, "loss": 0.008948960155248643, "step": 85510 }, { "epoch": 24.274765824581323, "grad_norm": 0.7049967050552368, "learning_rate": 7.573573658813512e-05, "loss": 0.006759609282016754, "step": 85520 }, { "epoch": 24.277604314504682, "grad_norm": 0.28003963828086853, "learning_rate": 7.573289809821175e-05, "loss": 0.013377580046653747, "step": 85530 }, { "epoch": 24.280442804428045, "grad_norm": 6.272402286529541, "learning_rate": 7.573005960828839e-05, "loss": 0.010943593084812164, "step": 85540 }, { "epoch": 24.283281294351404, "grad_norm": 0.13266277313232422, "learning_rate": 7.572722111836503e-05, "loss": 0.011967794597148895, "step": 85550 }, { "epoch": 24.286119784274767, "grad_norm": 0.48153308033943176, "learning_rate": 7.572438262844168e-05, "loss": 0.015490886569023133, "step": 85560 }, { "epoch": 24.288958274198126, "grad_norm": 0.1258280724287033, "learning_rate": 7.57215441385183e-05, "loss": 0.003524067625403404, "step": 85570 }, { "epoch": 24.291796764121486, "grad_norm": 0.9483364820480347, "learning_rate": 7.571870564859496e-05, "loss": 0.01631483733654022, "step": 85580 }, { "epoch": 24.29463525404485, "grad_norm": 0.6438968181610107, "learning_rate": 7.57158671586716e-05, "loss": 0.01630740612745285, "step": 85590 }, { "epoch": 24.297473743968208, "grad_norm": 5.591843605041504, "learning_rate": 7.571302866874823e-05, "loss": 0.011011810600757599, "step": 85600 }, { "epoch": 24.30031223389157, "grad_norm": 1.7593024969100952, "learning_rate": 7.571019017882487e-05, "loss": 0.025105413794517518, "step": 85610 }, { "epoch": 24.30315072381493, "grad_norm": 0.24703864753246307, "learning_rate": 7.570735168890151e-05, "loss": 0.007796461880207062, "step": 85620 }, { "epoch": 24.30598921373829, "grad_norm": 1.6252418756484985, "learning_rate": 7.570451319897814e-05, "loss": 0.010629761219024658, "step": 85630 }, { "epoch": 24.308827703661652, "grad_norm": 7.682460784912109, "learning_rate": 7.570167470905478e-05, "loss": 0.013785001635551453, "step": 85640 }, { "epoch": 24.311666193585012, "grad_norm": 7.005798816680908, "learning_rate": 7.569883621913143e-05, "loss": 0.014349192380905151, "step": 85650 }, { "epoch": 24.314504683508375, "grad_norm": 0.9127695560455322, "learning_rate": 7.569599772920806e-05, "loss": 0.009352823346853256, "step": 85660 }, { "epoch": 24.317343173431734, "grad_norm": 6.543940544128418, "learning_rate": 7.56931592392847e-05, "loss": 0.00508253239095211, "step": 85670 }, { "epoch": 24.320181663355093, "grad_norm": 0.1129620149731636, "learning_rate": 7.569032074936135e-05, "loss": 0.0020300883799791334, "step": 85680 }, { "epoch": 24.323020153278456, "grad_norm": 5.334319114685059, "learning_rate": 7.568748225943799e-05, "loss": 0.004731272161006927, "step": 85690 }, { "epoch": 24.325858643201816, "grad_norm": 0.4068819582462311, "learning_rate": 7.568464376951461e-05, "loss": 0.014670644700527192, "step": 85700 }, { "epoch": 24.32869713312518, "grad_norm": 0.048841215670108795, "learning_rate": 7.568180527959126e-05, "loss": 0.010739873349666595, "step": 85710 }, { "epoch": 24.331535623048538, "grad_norm": 0.16416765749454498, "learning_rate": 7.567896678966791e-05, "loss": 0.004410497844219208, "step": 85720 }, { "epoch": 24.3343741129719, "grad_norm": 0.22972124814987183, "learning_rate": 7.567612829974454e-05, "loss": 0.018216148018836975, "step": 85730 }, { "epoch": 24.33721260289526, "grad_norm": 0.7919242978096008, "learning_rate": 7.567328980982118e-05, "loss": 0.002911912277340889, "step": 85740 }, { "epoch": 24.34005109281862, "grad_norm": 0.634665846824646, "learning_rate": 7.567045131989782e-05, "loss": 0.008802688121795655, "step": 85750 }, { "epoch": 24.342889582741982, "grad_norm": 9.69843578338623, "learning_rate": 7.566761282997445e-05, "loss": 0.006585302203893662, "step": 85760 }, { "epoch": 24.34572807266534, "grad_norm": 0.4233599901199341, "learning_rate": 7.566477434005109e-05, "loss": 0.0033801443874835967, "step": 85770 }, { "epoch": 24.348566562588704, "grad_norm": 0.17386914789676666, "learning_rate": 7.566193585012775e-05, "loss": 0.0015261529013514518, "step": 85780 }, { "epoch": 24.351405052512064, "grad_norm": 0.13903822004795074, "learning_rate": 7.565909736020437e-05, "loss": 0.006319597363471985, "step": 85790 }, { "epoch": 24.354243542435423, "grad_norm": 10.305364608764648, "learning_rate": 7.565625887028101e-05, "loss": 0.018925634026527405, "step": 85800 }, { "epoch": 24.357082032358786, "grad_norm": 9.59134292602539, "learning_rate": 7.565342038035766e-05, "loss": 0.030721840262413026, "step": 85810 }, { "epoch": 24.359920522282145, "grad_norm": 0.1118253692984581, "learning_rate": 7.56505818904343e-05, "loss": 0.005049101263284683, "step": 85820 }, { "epoch": 24.362759012205508, "grad_norm": 0.25453293323516846, "learning_rate": 7.564774340051093e-05, "loss": 0.005940008908510208, "step": 85830 }, { "epoch": 24.365597502128868, "grad_norm": 0.07276259362697601, "learning_rate": 7.564490491058757e-05, "loss": 0.0036886125802993775, "step": 85840 }, { "epoch": 24.368435992052227, "grad_norm": 2.0516951084136963, "learning_rate": 7.564206642066422e-05, "loss": 0.0055821996182203295, "step": 85850 }, { "epoch": 24.37127448197559, "grad_norm": 1.0493091344833374, "learning_rate": 7.563922793074085e-05, "loss": 0.0019837718456983565, "step": 85860 }, { "epoch": 24.37411297189895, "grad_norm": 2.155586004257202, "learning_rate": 7.563638944081749e-05, "loss": 0.0024043817073106767, "step": 85870 }, { "epoch": 24.376951461822312, "grad_norm": 1.1936856508255005, "learning_rate": 7.563355095089413e-05, "loss": 0.00969858020544052, "step": 85880 }, { "epoch": 24.37978995174567, "grad_norm": 0.41577020287513733, "learning_rate": 7.563071246097076e-05, "loss": 0.0050261497497558595, "step": 85890 }, { "epoch": 24.38262844166903, "grad_norm": 3.7925469875335693, "learning_rate": 7.56278739710474e-05, "loss": 0.0042712677270174025, "step": 85900 }, { "epoch": 24.385466931592394, "grad_norm": 11.220908164978027, "learning_rate": 7.562503548112404e-05, "loss": 0.007814764976501465, "step": 85910 }, { "epoch": 24.388305421515753, "grad_norm": 0.32751837372779846, "learning_rate": 7.562219699120068e-05, "loss": 0.0032188698649406435, "step": 85920 }, { "epoch": 24.391143911439116, "grad_norm": 0.10071156919002533, "learning_rate": 7.561935850127733e-05, "loss": 0.002522362768650055, "step": 85930 }, { "epoch": 24.393982401362475, "grad_norm": 0.3215309977531433, "learning_rate": 7.561652001135397e-05, "loss": 0.007904096692800521, "step": 85940 }, { "epoch": 24.396820891285834, "grad_norm": 1.207585334777832, "learning_rate": 7.561368152143061e-05, "loss": 0.004986270889639854, "step": 85950 }, { "epoch": 24.399659381209197, "grad_norm": 0.049531225115060806, "learning_rate": 7.561084303150724e-05, "loss": 0.0014478221535682678, "step": 85960 }, { "epoch": 24.402497871132557, "grad_norm": 2.7912118434906006, "learning_rate": 7.560800454158388e-05, "loss": 0.004913611710071564, "step": 85970 }, { "epoch": 24.40533636105592, "grad_norm": 0.41520196199417114, "learning_rate": 7.560516605166053e-05, "loss": 0.0054070338606834415, "step": 85980 }, { "epoch": 24.40817485097928, "grad_norm": 0.5563364028930664, "learning_rate": 7.560232756173716e-05, "loss": 0.018155132234096528, "step": 85990 }, { "epoch": 24.411013340902638, "grad_norm": 0.09771277010440826, "learning_rate": 7.55994890718138e-05, "loss": 0.005348341912031174, "step": 86000 }, { "epoch": 24.411013340902638, "eval_accuracy": 0.9739937686780695, "eval_loss": 0.09140126407146454, "eval_runtime": 32.9476, "eval_samples_per_second": 477.334, "eval_steps_per_second": 7.466, "step": 86000 }, { "epoch": 24.413851830826, "grad_norm": 0.6408570408821106, "learning_rate": 7.559665058189044e-05, "loss": 0.02635324001312256, "step": 86010 }, { "epoch": 24.41669032074936, "grad_norm": 0.04719017073512077, "learning_rate": 7.559381209196707e-05, "loss": 0.012010387331247329, "step": 86020 }, { "epoch": 24.419528810672723, "grad_norm": 0.9467768669128418, "learning_rate": 7.559097360204371e-05, "loss": 0.006073184683918953, "step": 86030 }, { "epoch": 24.422367300596083, "grad_norm": 1.16517972946167, "learning_rate": 7.558813511212035e-05, "loss": 0.008661470562219619, "step": 86040 }, { "epoch": 24.425205790519442, "grad_norm": 3.2752015590667725, "learning_rate": 7.5585296622197e-05, "loss": 0.0049818255007267, "step": 86050 }, { "epoch": 24.428044280442805, "grad_norm": 0.4028700292110443, "learning_rate": 7.558245813227364e-05, "loss": 0.009834858775138854, "step": 86060 }, { "epoch": 24.430882770366164, "grad_norm": 10.536252975463867, "learning_rate": 7.557961964235028e-05, "loss": 0.013364049792289733, "step": 86070 }, { "epoch": 24.433721260289527, "grad_norm": 0.47427505254745483, "learning_rate": 7.557678115242692e-05, "loss": 0.022562147676944734, "step": 86080 }, { "epoch": 24.436559750212886, "grad_norm": 0.12210086733102798, "learning_rate": 7.557394266250355e-05, "loss": 0.013589726388454437, "step": 86090 }, { "epoch": 24.43939824013625, "grad_norm": 0.49628424644470215, "learning_rate": 7.557110417258019e-05, "loss": 0.004454721510410309, "step": 86100 }, { "epoch": 24.44223673005961, "grad_norm": 4.38597297668457, "learning_rate": 7.556826568265683e-05, "loss": 0.017503975331783293, "step": 86110 }, { "epoch": 24.445075219982968, "grad_norm": 4.720366477966309, "learning_rate": 7.556542719273347e-05, "loss": 0.014267253875732421, "step": 86120 }, { "epoch": 24.44791370990633, "grad_norm": 2.899860143661499, "learning_rate": 7.556258870281011e-05, "loss": 0.015398019552230835, "step": 86130 }, { "epoch": 24.45075219982969, "grad_norm": 8.761333465576172, "learning_rate": 7.555975021288675e-05, "loss": 0.01932882219552994, "step": 86140 }, { "epoch": 24.453590689753053, "grad_norm": 8.966391563415527, "learning_rate": 7.555691172296338e-05, "loss": 0.005079405009746551, "step": 86150 }, { "epoch": 24.456429179676412, "grad_norm": 0.26827970147132874, "learning_rate": 7.555407323304002e-05, "loss": 0.005834465473890304, "step": 86160 }, { "epoch": 24.45926766959977, "grad_norm": 1.133062720298767, "learning_rate": 7.555123474311666e-05, "loss": 0.00789586678147316, "step": 86170 }, { "epoch": 24.462106159523135, "grad_norm": 0.0478067472577095, "learning_rate": 7.55483962531933e-05, "loss": 0.012094701081514359, "step": 86180 }, { "epoch": 24.464944649446494, "grad_norm": 0.9537006616592407, "learning_rate": 7.554555776326995e-05, "loss": 0.013540935516357423, "step": 86190 }, { "epoch": 24.467783139369857, "grad_norm": 1.3698433637619019, "learning_rate": 7.554271927334659e-05, "loss": 0.006780289113521576, "step": 86200 }, { "epoch": 24.470621629293216, "grad_norm": 8.682150840759277, "learning_rate": 7.553988078342322e-05, "loss": 0.018687745928764342, "step": 86210 }, { "epoch": 24.473460119216575, "grad_norm": 0.04828153923153877, "learning_rate": 7.553704229349986e-05, "loss": 0.009658598899841308, "step": 86220 }, { "epoch": 24.47629860913994, "grad_norm": 0.7624835968017578, "learning_rate": 7.55342038035765e-05, "loss": 0.007826638221740723, "step": 86230 }, { "epoch": 24.479137099063298, "grad_norm": 0.21243922412395477, "learning_rate": 7.553136531365314e-05, "loss": 0.0050539948046207425, "step": 86240 }, { "epoch": 24.48197558898666, "grad_norm": 0.48611968755722046, "learning_rate": 7.552852682372978e-05, "loss": 0.011003097146749496, "step": 86250 }, { "epoch": 24.48481407891002, "grad_norm": 1.617064356803894, "learning_rate": 7.552568833380642e-05, "loss": 0.03259071111679077, "step": 86260 }, { "epoch": 24.48765256883338, "grad_norm": 0.06952182948589325, "learning_rate": 7.552284984388306e-05, "loss": 0.00531071275472641, "step": 86270 }, { "epoch": 24.490491058756742, "grad_norm": 0.9092909097671509, "learning_rate": 7.552001135395969e-05, "loss": 0.0033127449452877043, "step": 86280 }, { "epoch": 24.4933295486801, "grad_norm": 0.03526105359196663, "learning_rate": 7.551717286403633e-05, "loss": 0.007274515181779862, "step": 86290 }, { "epoch": 24.496168038603464, "grad_norm": 0.5676483511924744, "learning_rate": 7.551433437411297e-05, "loss": 0.01319008469581604, "step": 86300 }, { "epoch": 24.499006528526824, "grad_norm": 0.17656975984573364, "learning_rate": 7.551149588418962e-05, "loss": 0.01271030455827713, "step": 86310 }, { "epoch": 24.501845018450183, "grad_norm": 17.421104431152344, "learning_rate": 7.550865739426626e-05, "loss": 0.02242029011249542, "step": 86320 }, { "epoch": 24.504683508373546, "grad_norm": 4.383487701416016, "learning_rate": 7.55058189043429e-05, "loss": 0.012563468515872955, "step": 86330 }, { "epoch": 24.507521998296905, "grad_norm": 6.804769039154053, "learning_rate": 7.550298041441953e-05, "loss": 0.04043106734752655, "step": 86340 }, { "epoch": 24.510360488220268, "grad_norm": 0.5830639600753784, "learning_rate": 7.550014192449617e-05, "loss": 0.011816391348838806, "step": 86350 }, { "epoch": 24.513198978143627, "grad_norm": 12.93265151977539, "learning_rate": 7.549730343457281e-05, "loss": 0.009117768704891204, "step": 86360 }, { "epoch": 24.516037468066987, "grad_norm": 0.13617335259914398, "learning_rate": 7.549446494464945e-05, "loss": 0.011832229793071747, "step": 86370 }, { "epoch": 24.51887595799035, "grad_norm": 10.370160102844238, "learning_rate": 7.549162645472609e-05, "loss": 0.010516803711652756, "step": 86380 }, { "epoch": 24.52171444791371, "grad_norm": 4.176164627075195, "learning_rate": 7.548878796480273e-05, "loss": 0.004990801215171814, "step": 86390 }, { "epoch": 24.524552937837072, "grad_norm": 1.1447991132736206, "learning_rate": 7.548594947487937e-05, "loss": 0.005484912544488907, "step": 86400 }, { "epoch": 24.52739142776043, "grad_norm": 0.6924852728843689, "learning_rate": 7.5483110984956e-05, "loss": 0.008074317872524262, "step": 86410 }, { "epoch": 24.53022991768379, "grad_norm": 5.722362518310547, "learning_rate": 7.548027249503264e-05, "loss": 0.008575747907161712, "step": 86420 }, { "epoch": 24.533068407607153, "grad_norm": 0.08387243002653122, "learning_rate": 7.547743400510929e-05, "loss": 0.0023767890408635138, "step": 86430 }, { "epoch": 24.535906897530513, "grad_norm": 2.5749258995056152, "learning_rate": 7.547459551518591e-05, "loss": 0.006698532402515412, "step": 86440 }, { "epoch": 24.538745387453876, "grad_norm": 4.278012275695801, "learning_rate": 7.547175702526257e-05, "loss": 0.021793460845947264, "step": 86450 }, { "epoch": 24.541583877377235, "grad_norm": 1.423075795173645, "learning_rate": 7.546891853533921e-05, "loss": 0.011750874668359756, "step": 86460 }, { "epoch": 24.544422367300598, "grad_norm": 0.36262691020965576, "learning_rate": 7.546608004541584e-05, "loss": 0.0057570554316043855, "step": 86470 }, { "epoch": 24.547260857223957, "grad_norm": 0.6418554186820984, "learning_rate": 7.546324155549248e-05, "loss": 0.015154564380645752, "step": 86480 }, { "epoch": 24.550099347147317, "grad_norm": 0.3239717185497284, "learning_rate": 7.546040306556912e-05, "loss": 0.006946538388729095, "step": 86490 }, { "epoch": 24.55293783707068, "grad_norm": 1.393970251083374, "learning_rate": 7.545756457564576e-05, "loss": 0.013685151934623718, "step": 86500 }, { "epoch": 24.55293783707068, "eval_accuracy": 0.9759649011254531, "eval_loss": 0.08706456422805786, "eval_runtime": 35.8056, "eval_samples_per_second": 439.233, "eval_steps_per_second": 6.87, "step": 86500 }, { "epoch": 24.55577632699404, "grad_norm": 3.4574058055877686, "learning_rate": 7.54547260857224e-05, "loss": 0.02135709673166275, "step": 86510 }, { "epoch": 24.558614816917398, "grad_norm": 0.11148232221603394, "learning_rate": 7.545188759579904e-05, "loss": 0.005903488397598267, "step": 86520 }, { "epoch": 24.56145330684076, "grad_norm": 0.20081453025341034, "learning_rate": 7.544904910587569e-05, "loss": 0.014195267856121064, "step": 86530 }, { "epoch": 24.56429179676412, "grad_norm": 0.022058457136154175, "learning_rate": 7.544621061595231e-05, "loss": 0.01331600546836853, "step": 86540 }, { "epoch": 24.567130286687483, "grad_norm": 4.018233776092529, "learning_rate": 7.544337212602895e-05, "loss": 0.009540240466594695, "step": 86550 }, { "epoch": 24.569968776610843, "grad_norm": 3.9388556480407715, "learning_rate": 7.54405336361056e-05, "loss": 0.014902783930301667, "step": 86560 }, { "epoch": 24.572807266534205, "grad_norm": 0.19377146661281586, "learning_rate": 7.543769514618222e-05, "loss": 0.01029449924826622, "step": 86570 }, { "epoch": 24.575645756457565, "grad_norm": 0.48169878125190735, "learning_rate": 7.543485665625888e-05, "loss": 0.0042801350355148315, "step": 86580 }, { "epoch": 24.578484246380924, "grad_norm": 0.17707686126232147, "learning_rate": 7.543201816633552e-05, "loss": 0.006390997022390365, "step": 86590 }, { "epoch": 24.581322736304287, "grad_norm": 0.10965363681316376, "learning_rate": 7.542917967641215e-05, "loss": 0.000650588609278202, "step": 86600 }, { "epoch": 24.584161226227646, "grad_norm": 0.10622919350862503, "learning_rate": 7.542634118648879e-05, "loss": 0.00449899435043335, "step": 86610 }, { "epoch": 24.58699971615101, "grad_norm": 15.669697761535645, "learning_rate": 7.542350269656543e-05, "loss": 0.014814561605453492, "step": 86620 }, { "epoch": 24.58983820607437, "grad_norm": 0.03191252425312996, "learning_rate": 7.542066420664207e-05, "loss": 0.0034497857093811037, "step": 86630 }, { "epoch": 24.592676695997728, "grad_norm": 3.190653085708618, "learning_rate": 7.54178257167187e-05, "loss": 0.0101723313331604, "step": 86640 }, { "epoch": 24.59551518592109, "grad_norm": 0.45216843485832214, "learning_rate": 7.541498722679536e-05, "loss": 0.011951404809951782, "step": 86650 }, { "epoch": 24.59835367584445, "grad_norm": 0.48940715193748474, "learning_rate": 7.5412148736872e-05, "loss": 0.009761112183332444, "step": 86660 }, { "epoch": 24.601192165767813, "grad_norm": 2.720578908920288, "learning_rate": 7.540931024694862e-05, "loss": 0.012603236734867096, "step": 86670 }, { "epoch": 24.604030655691172, "grad_norm": 9.713536262512207, "learning_rate": 7.540647175702527e-05, "loss": 0.019026246666908265, "step": 86680 }, { "epoch": 24.60686914561453, "grad_norm": 9.530990600585938, "learning_rate": 7.540363326710191e-05, "loss": 0.006954858452081681, "step": 86690 }, { "epoch": 24.609707635537895, "grad_norm": 0.07602737843990326, "learning_rate": 7.540079477717853e-05, "loss": 0.017309682071208955, "step": 86700 }, { "epoch": 24.612546125461254, "grad_norm": 10.40809154510498, "learning_rate": 7.539795628725519e-05, "loss": 0.013315209746360778, "step": 86710 }, { "epoch": 24.615384615384617, "grad_norm": 7.767976760864258, "learning_rate": 7.539511779733183e-05, "loss": 0.009416758269071578, "step": 86720 }, { "epoch": 24.618223105307976, "grad_norm": 9.235051155090332, "learning_rate": 7.539227930740846e-05, "loss": 0.01594068706035614, "step": 86730 }, { "epoch": 24.621061595231335, "grad_norm": 2.765253782272339, "learning_rate": 7.53894408174851e-05, "loss": 0.010237958282232285, "step": 86740 }, { "epoch": 24.6239000851547, "grad_norm": 1.3483573198318481, "learning_rate": 7.538660232756174e-05, "loss": 0.01824696213006973, "step": 86750 }, { "epoch": 24.626738575078058, "grad_norm": 0.07410582900047302, "learning_rate": 7.538376383763838e-05, "loss": 0.007808191329240799, "step": 86760 }, { "epoch": 24.62957706500142, "grad_norm": 0.3965435326099396, "learning_rate": 7.538092534771501e-05, "loss": 0.008463595062494278, "step": 86770 }, { "epoch": 24.63241555492478, "grad_norm": 0.295073926448822, "learning_rate": 7.537808685779167e-05, "loss": 0.013459493219852448, "step": 86780 }, { "epoch": 24.63525404484814, "grad_norm": 3.2794878482818604, "learning_rate": 7.537524836786831e-05, "loss": 0.022573918104171753, "step": 86790 }, { "epoch": 24.638092534771502, "grad_norm": 14.55314826965332, "learning_rate": 7.537240987794494e-05, "loss": 0.006751532107591629, "step": 86800 }, { "epoch": 24.64093102469486, "grad_norm": 3.100156784057617, "learning_rate": 7.536957138802158e-05, "loss": 0.006707204878330231, "step": 86810 }, { "epoch": 24.643769514618224, "grad_norm": 2.729118585586548, "learning_rate": 7.536673289809822e-05, "loss": 0.021135397255420685, "step": 86820 }, { "epoch": 24.646608004541584, "grad_norm": 0.25113990902900696, "learning_rate": 7.536389440817485e-05, "loss": 0.002165621891617775, "step": 86830 }, { "epoch": 24.649446494464943, "grad_norm": 0.9848399758338928, "learning_rate": 7.536105591825149e-05, "loss": 0.008683568984270095, "step": 86840 }, { "epoch": 24.652284984388306, "grad_norm": 0.17548789083957672, "learning_rate": 7.535821742832814e-05, "loss": 0.01153913587331772, "step": 86850 }, { "epoch": 24.655123474311665, "grad_norm": 5.069494724273682, "learning_rate": 7.535537893840477e-05, "loss": 0.009331363439559936, "step": 86860 }, { "epoch": 24.657961964235028, "grad_norm": 2.3621506690979004, "learning_rate": 7.535254044848141e-05, "loss": 0.00514492355287075, "step": 86870 }, { "epoch": 24.660800454158387, "grad_norm": 0.4391665458679199, "learning_rate": 7.534970195855805e-05, "loss": 0.011124081164598464, "step": 86880 }, { "epoch": 24.663638944081747, "grad_norm": 0.08878729492425919, "learning_rate": 7.53468634686347e-05, "loss": 0.007803434133529663, "step": 86890 }, { "epoch": 24.66647743400511, "grad_norm": 0.0667199194431305, "learning_rate": 7.534402497871132e-05, "loss": 0.004006920754909516, "step": 86900 }, { "epoch": 24.66931592392847, "grad_norm": 0.027779418975114822, "learning_rate": 7.534118648878798e-05, "loss": 0.020309111475944518, "step": 86910 }, { "epoch": 24.672154413851832, "grad_norm": 1.0376701354980469, "learning_rate": 7.53383479988646e-05, "loss": 0.002126300893723965, "step": 86920 }, { "epoch": 24.67499290377519, "grad_norm": 2.6511569023132324, "learning_rate": 7.533550950894125e-05, "loss": 0.00195627361536026, "step": 86930 }, { "epoch": 24.677831393698554, "grad_norm": 8.901161193847656, "learning_rate": 7.533267101901789e-05, "loss": 0.009313812106847763, "step": 86940 }, { "epoch": 24.680669883621913, "grad_norm": 0.18774433434009552, "learning_rate": 7.532983252909453e-05, "loss": 0.021689322590827943, "step": 86950 }, { "epoch": 24.683508373545273, "grad_norm": 8.20618724822998, "learning_rate": 7.532699403917116e-05, "loss": 0.007482375204563141, "step": 86960 }, { "epoch": 24.686346863468636, "grad_norm": 4.0656256675720215, "learning_rate": 7.53241555492478e-05, "loss": 0.006976133584976197, "step": 86970 }, { "epoch": 24.689185353391995, "grad_norm": 12.2095365524292, "learning_rate": 7.532131705932445e-05, "loss": 0.021067818999290465, "step": 86980 }, { "epoch": 24.692023843315358, "grad_norm": 0.07006686180830002, "learning_rate": 7.531847856940108e-05, "loss": 0.010320131480693818, "step": 86990 }, { "epoch": 24.694862333238717, "grad_norm": 7.398126125335693, "learning_rate": 7.531564007947772e-05, "loss": 0.00798179879784584, "step": 87000 }, { "epoch": 24.694862333238717, "eval_accuracy": 0.9699243339479875, "eval_loss": 0.11560443043708801, "eval_runtime": 36.3356, "eval_samples_per_second": 432.826, "eval_steps_per_second": 6.77, "step": 87000 }, { "epoch": 24.697700823162076, "grad_norm": 3.9571096897125244, "learning_rate": 7.531280158955436e-05, "loss": 0.016849452257156373, "step": 87010 }, { "epoch": 24.70053931308544, "grad_norm": 1.594070553779602, "learning_rate": 7.5309963099631e-05, "loss": 0.026917541027069093, "step": 87020 }, { "epoch": 24.7033778030088, "grad_norm": 10.646675109863281, "learning_rate": 7.530712460970763e-05, "loss": 0.009811930358409882, "step": 87030 }, { "epoch": 24.70621629293216, "grad_norm": 0.4501258432865143, "learning_rate": 7.530428611978427e-05, "loss": 0.006300915777683258, "step": 87040 }, { "epoch": 24.70905478285552, "grad_norm": 10.497018814086914, "learning_rate": 7.530173147885325e-05, "loss": 0.032254308462142944, "step": 87050 }, { "epoch": 24.71189327277888, "grad_norm": 1.539094090461731, "learning_rate": 7.52988929889299e-05, "loss": 0.005339851975440979, "step": 87060 }, { "epoch": 24.714731762702243, "grad_norm": 0.03659192472696304, "learning_rate": 7.529605449900652e-05, "loss": 0.0035965517163276672, "step": 87070 }, { "epoch": 24.717570252625602, "grad_norm": 0.9223887920379639, "learning_rate": 7.529321600908316e-05, "loss": 0.011777596175670623, "step": 87080 }, { "epoch": 24.720408742548965, "grad_norm": 0.06458701938390732, "learning_rate": 7.529037751915982e-05, "loss": 0.0021952107548713685, "step": 87090 }, { "epoch": 24.723247232472325, "grad_norm": 2.3350465297698975, "learning_rate": 7.528753902923645e-05, "loss": 0.004438789933919907, "step": 87100 }, { "epoch": 24.726085722395684, "grad_norm": 0.1840370148420334, "learning_rate": 7.528470053931309e-05, "loss": 0.004221893846988678, "step": 87110 }, { "epoch": 24.728924212319047, "grad_norm": 0.20192715525627136, "learning_rate": 7.528186204938973e-05, "loss": 0.008806905895471572, "step": 87120 }, { "epoch": 24.731762702242406, "grad_norm": 1.9185311794281006, "learning_rate": 7.527902355946637e-05, "loss": 0.0028255799785256385, "step": 87130 }, { "epoch": 24.73460119216577, "grad_norm": 0.6158114671707153, "learning_rate": 7.5276185069543e-05, "loss": 0.016376110911369323, "step": 87140 }, { "epoch": 24.73743968208913, "grad_norm": 17.799938201904297, "learning_rate": 7.527334657961964e-05, "loss": 0.02384234368801117, "step": 87150 }, { "epoch": 24.740278172012488, "grad_norm": 0.5206533670425415, "learning_rate": 7.52705080896963e-05, "loss": 0.004596534371376038, "step": 87160 }, { "epoch": 24.74311666193585, "grad_norm": 0.23070290684700012, "learning_rate": 7.526766959977292e-05, "loss": 0.017424358427524565, "step": 87170 }, { "epoch": 24.74595515185921, "grad_norm": 0.8285106420516968, "learning_rate": 7.526483110984956e-05, "loss": 0.007202371209859848, "step": 87180 }, { "epoch": 24.748793641782573, "grad_norm": 0.06021019071340561, "learning_rate": 7.52619926199262e-05, "loss": 0.004186632484197617, "step": 87190 }, { "epoch": 24.751632131705932, "grad_norm": 1.133433222770691, "learning_rate": 7.525915413000283e-05, "loss": 0.004422216489911079, "step": 87200 }, { "epoch": 24.75447062162929, "grad_norm": 0.5356155633926392, "learning_rate": 7.525631564007948e-05, "loss": 0.010812388360500335, "step": 87210 }, { "epoch": 24.757309111552654, "grad_norm": 5.12717866897583, "learning_rate": 7.525347715015612e-05, "loss": 0.012163816392421723, "step": 87220 }, { "epoch": 24.760147601476014, "grad_norm": 3.869861602783203, "learning_rate": 7.525063866023276e-05, "loss": 0.004036540165543556, "step": 87230 }, { "epoch": 24.762986091399377, "grad_norm": 0.8541666269302368, "learning_rate": 7.52478001703094e-05, "loss": 0.00793260633945465, "step": 87240 }, { "epoch": 24.765824581322736, "grad_norm": 3.968493938446045, "learning_rate": 7.524496168038604e-05, "loss": 0.024329043924808502, "step": 87250 }, { "epoch": 24.768663071246095, "grad_norm": 1.812638759613037, "learning_rate": 7.524212319046268e-05, "loss": 0.017119535803794862, "step": 87260 }, { "epoch": 24.77150156116946, "grad_norm": 12.732528686523438, "learning_rate": 7.523928470053931e-05, "loss": 0.014038312435150146, "step": 87270 }, { "epoch": 24.774340051092818, "grad_norm": 9.65519905090332, "learning_rate": 7.523644621061595e-05, "loss": 0.0076732337474823, "step": 87280 }, { "epoch": 24.77717854101618, "grad_norm": 2.1152451038360596, "learning_rate": 7.52336077206926e-05, "loss": 0.010788124054670334, "step": 87290 }, { "epoch": 24.78001703093954, "grad_norm": 4.0766167640686035, "learning_rate": 7.523076923076923e-05, "loss": 0.005369715392589569, "step": 87300 }, { "epoch": 24.782855520862903, "grad_norm": 1.3789077997207642, "learning_rate": 7.522793074084588e-05, "loss": 0.012975892424583435, "step": 87310 }, { "epoch": 24.785694010786262, "grad_norm": 1.2857592105865479, "learning_rate": 7.522509225092252e-05, "loss": 0.00774562954902649, "step": 87320 }, { "epoch": 24.78853250070962, "grad_norm": 0.1518229991197586, "learning_rate": 7.522225376099914e-05, "loss": 0.017656470835208892, "step": 87330 }, { "epoch": 24.791370990632984, "grad_norm": 7.980907917022705, "learning_rate": 7.521941527107579e-05, "loss": 0.011712975054979324, "step": 87340 }, { "epoch": 24.794209480556344, "grad_norm": 0.18618148565292358, "learning_rate": 7.521657678115243e-05, "loss": 0.008985430002212524, "step": 87350 }, { "epoch": 24.797047970479706, "grad_norm": 0.17472121119499207, "learning_rate": 7.521373829122907e-05, "loss": 0.01674937307834625, "step": 87360 }, { "epoch": 24.799886460403066, "grad_norm": 1.2771965265274048, "learning_rate": 7.521089980130571e-05, "loss": 0.004381605982780456, "step": 87370 }, { "epoch": 24.802724950326425, "grad_norm": 0.16322775185108185, "learning_rate": 7.520806131138235e-05, "loss": 0.009621676057577133, "step": 87380 }, { "epoch": 24.805563440249788, "grad_norm": 0.13559271395206451, "learning_rate": 7.520522282145899e-05, "loss": 0.015726584196090698, "step": 87390 }, { "epoch": 24.808401930173147, "grad_norm": 0.1906222552061081, "learning_rate": 7.520238433153562e-05, "loss": 0.007416890561580658, "step": 87400 }, { "epoch": 24.81124042009651, "grad_norm": 0.0866331085562706, "learning_rate": 7.519954584161226e-05, "loss": 0.001524054817855358, "step": 87410 }, { "epoch": 24.81407891001987, "grad_norm": 1.7412865161895752, "learning_rate": 7.51967073516889e-05, "loss": 0.004613496363162994, "step": 87420 }, { "epoch": 24.81691739994323, "grad_norm": 0.34620413184165955, "learning_rate": 7.519386886176554e-05, "loss": 0.0035087950527668, "step": 87430 }, { "epoch": 24.81975588986659, "grad_norm": 0.4526467025279999, "learning_rate": 7.519103037184219e-05, "loss": 0.00894540473818779, "step": 87440 }, { "epoch": 24.82259437978995, "grad_norm": 5.128594398498535, "learning_rate": 7.518819188191883e-05, "loss": 0.020580627024173737, "step": 87450 }, { "epoch": 24.825432869713314, "grad_norm": 1.5423985719680786, "learning_rate": 7.518535339199546e-05, "loss": 0.004791968315839767, "step": 87460 }, { "epoch": 24.828271359636673, "grad_norm": 0.5146508812904358, "learning_rate": 7.51825149020721e-05, "loss": 0.01723148375749588, "step": 87470 }, { "epoch": 24.831109849560033, "grad_norm": 0.04994138702750206, "learning_rate": 7.517967641214874e-05, "loss": 0.0016427012160420418, "step": 87480 }, { "epoch": 24.833948339483396, "grad_norm": 0.23686924576759338, "learning_rate": 7.517683792222538e-05, "loss": 0.005818529054522514, "step": 87490 }, { "epoch": 24.836786829406755, "grad_norm": 1.4012969732284546, "learning_rate": 7.517399943230202e-05, "loss": 0.007302983105182648, "step": 87500 }, { "epoch": 24.836786829406755, "eval_accuracy": 0.9761556558784257, "eval_loss": 0.09237578511238098, "eval_runtime": 34.6144, "eval_samples_per_second": 454.348, "eval_steps_per_second": 7.107, "step": 87500 }, { "epoch": 24.839625319330118, "grad_norm": 0.7835609912872314, "learning_rate": 7.517116094237866e-05, "loss": 0.011503133922815323, "step": 87510 }, { "epoch": 24.842463809253477, "grad_norm": 5.966135501861572, "learning_rate": 7.51683224524553e-05, "loss": 0.008599453419446946, "step": 87520 }, { "epoch": 24.845302299176836, "grad_norm": 0.1854587197303772, "learning_rate": 7.516548396253193e-05, "loss": 0.019643385708332062, "step": 87530 }, { "epoch": 24.8481407891002, "grad_norm": 0.48283809423446655, "learning_rate": 7.516264547260857e-05, "loss": 0.005117164924740791, "step": 87540 }, { "epoch": 24.85097927902356, "grad_norm": 0.05672990903258324, "learning_rate": 7.515980698268521e-05, "loss": 0.011621752381324768, "step": 87550 }, { "epoch": 24.85381776894692, "grad_norm": 4.306065559387207, "learning_rate": 7.515696849276186e-05, "loss": 0.011923772096633912, "step": 87560 }, { "epoch": 24.85665625887028, "grad_norm": 9.567153930664062, "learning_rate": 7.51541300028385e-05, "loss": 0.021456889808177948, "step": 87570 }, { "epoch": 24.85949474879364, "grad_norm": 10.522123336791992, "learning_rate": 7.515129151291514e-05, "loss": 0.013189193606376649, "step": 87580 }, { "epoch": 24.862333238717003, "grad_norm": 1.0069661140441895, "learning_rate": 7.514845302299177e-05, "loss": 0.0033274915069341658, "step": 87590 }, { "epoch": 24.865171728640362, "grad_norm": 0.15518683195114136, "learning_rate": 7.514561453306841e-05, "loss": 0.014722950756549835, "step": 87600 }, { "epoch": 24.868010218563725, "grad_norm": 9.696077346801758, "learning_rate": 7.514277604314505e-05, "loss": 0.010433254390954971, "step": 87610 }, { "epoch": 24.870848708487085, "grad_norm": 6.802279472351074, "learning_rate": 7.513993755322169e-05, "loss": 0.011865378171205521, "step": 87620 }, { "epoch": 24.873687198410444, "grad_norm": 4.190077304840088, "learning_rate": 7.513709906329833e-05, "loss": 0.016419379413127898, "step": 87630 }, { "epoch": 24.876525688333807, "grad_norm": 7.949484348297119, "learning_rate": 7.513426057337497e-05, "loss": 0.01625964790582657, "step": 87640 }, { "epoch": 24.879364178257166, "grad_norm": 0.40452560782432556, "learning_rate": 7.513142208345161e-05, "loss": 0.006613999605178833, "step": 87650 }, { "epoch": 24.88220266818053, "grad_norm": 0.33537521958351135, "learning_rate": 7.512858359352824e-05, "loss": 0.002374131977558136, "step": 87660 }, { "epoch": 24.88504115810389, "grad_norm": 0.05582400783896446, "learning_rate": 7.512574510360488e-05, "loss": 0.004646296799182892, "step": 87670 }, { "epoch": 24.88787964802725, "grad_norm": 0.10197947174310684, "learning_rate": 7.512290661368152e-05, "loss": 0.010714323818683624, "step": 87680 }, { "epoch": 24.89071813795061, "grad_norm": 0.028952963650226593, "learning_rate": 7.512006812375817e-05, "loss": 0.011170991510152817, "step": 87690 }, { "epoch": 24.89355662787397, "grad_norm": 10.456950187683105, "learning_rate": 7.511722963383481e-05, "loss": 0.007939024269580841, "step": 87700 }, { "epoch": 24.896395117797333, "grad_norm": 7.101510524749756, "learning_rate": 7.511439114391145e-05, "loss": 0.01237301379442215, "step": 87710 }, { "epoch": 24.899233607720692, "grad_norm": 0.03330978751182556, "learning_rate": 7.511155265398808e-05, "loss": 0.0077405616641044615, "step": 87720 }, { "epoch": 24.902072097644055, "grad_norm": 0.13669542968273163, "learning_rate": 7.510871416406472e-05, "loss": 0.01343540996313095, "step": 87730 }, { "epoch": 24.904910587567414, "grad_norm": 0.9004843831062317, "learning_rate": 7.510587567414136e-05, "loss": 0.00557859092950821, "step": 87740 }, { "epoch": 24.907749077490774, "grad_norm": 2.4862706661224365, "learning_rate": 7.5103037184218e-05, "loss": 0.005845819413661957, "step": 87750 }, { "epoch": 24.910587567414137, "grad_norm": 0.20643296837806702, "learning_rate": 7.510019869429464e-05, "loss": 0.00590667687356472, "step": 87760 }, { "epoch": 24.913426057337496, "grad_norm": 2.414794921875, "learning_rate": 7.509736020437128e-05, "loss": 0.002042117528617382, "step": 87770 }, { "epoch": 24.91626454726086, "grad_norm": 0.1686076670885086, "learning_rate": 7.509452171444793e-05, "loss": 0.0034356698393821715, "step": 87780 }, { "epoch": 24.919103037184218, "grad_norm": 0.2552534341812134, "learning_rate": 7.509168322452455e-05, "loss": 0.003187919035553932, "step": 87790 }, { "epoch": 24.921941527107577, "grad_norm": 6.109468460083008, "learning_rate": 7.50888447346012e-05, "loss": 0.0222549170255661, "step": 87800 }, { "epoch": 24.92478001703094, "grad_norm": 0.2182239145040512, "learning_rate": 7.508600624467784e-05, "loss": 0.0020371681079268456, "step": 87810 }, { "epoch": 24.9276185069543, "grad_norm": 0.6013546586036682, "learning_rate": 7.508316775475446e-05, "loss": 0.020118914544582367, "step": 87820 }, { "epoch": 24.930456996877663, "grad_norm": 0.22881437838077545, "learning_rate": 7.508032926483112e-05, "loss": 0.011861808598041534, "step": 87830 }, { "epoch": 24.933295486801022, "grad_norm": 0.08396732062101364, "learning_rate": 7.507749077490776e-05, "loss": 0.004083474352955818, "step": 87840 }, { "epoch": 24.93613397672438, "grad_norm": 3.6119205951690674, "learning_rate": 7.507465228498439e-05, "loss": 0.010880187153816223, "step": 87850 }, { "epoch": 24.938972466647744, "grad_norm": 1.5538374185562134, "learning_rate": 7.507181379506103e-05, "loss": 0.008099214732646942, "step": 87860 }, { "epoch": 24.941810956571103, "grad_norm": 4.776947498321533, "learning_rate": 7.506897530513767e-05, "loss": 0.01773812174797058, "step": 87870 }, { "epoch": 24.944649446494466, "grad_norm": 5.1138129234313965, "learning_rate": 7.506613681521431e-05, "loss": 0.009686540067195892, "step": 87880 }, { "epoch": 24.947487936417826, "grad_norm": 5.3209052085876465, "learning_rate": 7.506329832529095e-05, "loss": 0.00564223900437355, "step": 87890 }, { "epoch": 24.950326426341185, "grad_norm": 0.35971033573150635, "learning_rate": 7.50604598353676e-05, "loss": 0.00454176813364029, "step": 87900 }, { "epoch": 24.953164916264548, "grad_norm": 0.08217697590589523, "learning_rate": 7.505762134544422e-05, "loss": 0.020520010590553285, "step": 87910 }, { "epoch": 24.956003406187907, "grad_norm": 0.9215506911277771, "learning_rate": 7.505478285552086e-05, "loss": 0.011780644208192826, "step": 87920 }, { "epoch": 24.95884189611127, "grad_norm": 0.1610453575849533, "learning_rate": 7.50519443655975e-05, "loss": 0.010268250852823258, "step": 87930 }, { "epoch": 24.96168038603463, "grad_norm": 0.6995009183883667, "learning_rate": 7.504910587567415e-05, "loss": 0.02024802416563034, "step": 87940 }, { "epoch": 24.96451887595799, "grad_norm": 2.319716215133667, "learning_rate": 7.504626738575077e-05, "loss": 0.0062054343521595005, "step": 87950 }, { "epoch": 24.96735736588135, "grad_norm": 2.5772149562835693, "learning_rate": 7.504342889582743e-05, "loss": 0.005675658583641052, "step": 87960 }, { "epoch": 24.97019585580471, "grad_norm": 0.034360941499471664, "learning_rate": 7.504059040590407e-05, "loss": 0.0027786212041974066, "step": 87970 }, { "epoch": 24.973034345728074, "grad_norm": 5.2496185302734375, "learning_rate": 7.50377519159807e-05, "loss": 0.0077677503228187565, "step": 87980 }, { "epoch": 24.975872835651433, "grad_norm": 0.33430224657058716, "learning_rate": 7.503491342605734e-05, "loss": 0.012898695468902589, "step": 87990 }, { "epoch": 24.978711325574793, "grad_norm": 0.0445103719830513, "learning_rate": 7.503207493613398e-05, "loss": 0.01701689511537552, "step": 88000 }, { "epoch": 24.978711325574793, "eval_accuracy": 0.9754562217841928, "eval_loss": 0.08335555344820023, "eval_runtime": 34.8436, "eval_samples_per_second": 451.359, "eval_steps_per_second": 7.06, "step": 88000 }, { "epoch": 24.981549815498155, "grad_norm": 9.931129455566406, "learning_rate": 7.502923644621061e-05, "loss": 0.013030609488487244, "step": 88010 }, { "epoch": 24.984388305421515, "grad_norm": 2.9251387119293213, "learning_rate": 7.502639795628726e-05, "loss": 0.006698767095804215, "step": 88020 }, { "epoch": 24.987226795344878, "grad_norm": 0.12897706031799316, "learning_rate": 7.50235594663639e-05, "loss": 0.006181690096855164, "step": 88030 }, { "epoch": 24.990065285268237, "grad_norm": 0.018919827416539192, "learning_rate": 7.502072097644053e-05, "loss": 0.0023221084848046303, "step": 88040 }, { "epoch": 24.9929037751916, "grad_norm": 8.605438232421875, "learning_rate": 7.501788248651717e-05, "loss": 0.008400126546621322, "step": 88050 }, { "epoch": 24.99574226511496, "grad_norm": 0.07835251837968826, "learning_rate": 7.501504399659382e-05, "loss": 0.010087057948112488, "step": 88060 }, { "epoch": 24.99858075503832, "grad_norm": 0.17573747038841248, "learning_rate": 7.501220550667046e-05, "loss": 0.012451525032520293, "step": 88070 }, { "epoch": 25.00141924496168, "grad_norm": 0.06881272047758102, "learning_rate": 7.500936701674709e-05, "loss": 0.0190571665763855, "step": 88080 }, { "epoch": 25.00425773488504, "grad_norm": 1.672728419303894, "learning_rate": 7.500652852682374e-05, "loss": 0.0058500152081251144, "step": 88090 }, { "epoch": 25.007096224808404, "grad_norm": 0.3750596046447754, "learning_rate": 7.500369003690038e-05, "loss": 0.005219118297100067, "step": 88100 }, { "epoch": 25.009934714731763, "grad_norm": 6.355255603790283, "learning_rate": 7.500085154697701e-05, "loss": 0.01779741942882538, "step": 88110 }, { "epoch": 25.012773204655122, "grad_norm": 0.08187111467123032, "learning_rate": 7.499801305705365e-05, "loss": 0.012395285069942474, "step": 88120 }, { "epoch": 25.015611694578485, "grad_norm": 0.16787870228290558, "learning_rate": 7.499517456713029e-05, "loss": 0.014133422076702118, "step": 88130 }, { "epoch": 25.018450184501845, "grad_norm": 6.1347856521606445, "learning_rate": 7.499233607720692e-05, "loss": 0.0050543338060379025, "step": 88140 }, { "epoch": 25.021288674425207, "grad_norm": 0.25163033604621887, "learning_rate": 7.498949758728356e-05, "loss": 0.010337083041667939, "step": 88150 }, { "epoch": 25.024127164348567, "grad_norm": 1.178110957145691, "learning_rate": 7.498665909736022e-05, "loss": 0.003611401468515396, "step": 88160 }, { "epoch": 25.026965654271926, "grad_norm": 4.9885358810424805, "learning_rate": 7.498382060743684e-05, "loss": 0.0023345576599240305, "step": 88170 }, { "epoch": 25.02980414419529, "grad_norm": 4.427271842956543, "learning_rate": 7.498098211751349e-05, "loss": 0.015090787410736084, "step": 88180 }, { "epoch": 25.03264263411865, "grad_norm": 0.055738095194101334, "learning_rate": 7.497814362759013e-05, "loss": 0.017386102676391603, "step": 88190 }, { "epoch": 25.03548112404201, "grad_norm": 2.905576705932617, "learning_rate": 7.497530513766677e-05, "loss": 0.0127057746052742, "step": 88200 }, { "epoch": 25.03831961396537, "grad_norm": 0.04182777553796768, "learning_rate": 7.49724666477434e-05, "loss": 0.010071522742509841, "step": 88210 }, { "epoch": 25.04115810388873, "grad_norm": 7.442935943603516, "learning_rate": 7.496962815782005e-05, "loss": 0.01192655935883522, "step": 88220 }, { "epoch": 25.043996593812093, "grad_norm": 12.103716850280762, "learning_rate": 7.496678966789669e-05, "loss": 0.015621484816074371, "step": 88230 }, { "epoch": 25.046835083735452, "grad_norm": 1.1626819372177124, "learning_rate": 7.496395117797332e-05, "loss": 0.010397998988628388, "step": 88240 }, { "epoch": 25.049673573658815, "grad_norm": 0.5756862759590149, "learning_rate": 7.496111268804996e-05, "loss": 0.009964820742607117, "step": 88250 }, { "epoch": 25.052512063582174, "grad_norm": 0.5981858968734741, "learning_rate": 7.49582741981266e-05, "loss": 0.007760126143693924, "step": 88260 }, { "epoch": 25.055350553505534, "grad_norm": 0.3978712856769562, "learning_rate": 7.495543570820323e-05, "loss": 0.0028943099081516267, "step": 88270 }, { "epoch": 25.058189043428897, "grad_norm": 0.7095226645469666, "learning_rate": 7.495259721827987e-05, "loss": 0.00821102187037468, "step": 88280 }, { "epoch": 25.061027533352256, "grad_norm": 9.480746269226074, "learning_rate": 7.494975872835653e-05, "loss": 0.013113942742347718, "step": 88290 }, { "epoch": 25.06386602327562, "grad_norm": 0.39881187677383423, "learning_rate": 7.494692023843315e-05, "loss": 0.001146947778761387, "step": 88300 }, { "epoch": 25.066704513198978, "grad_norm": 0.017957117408514023, "learning_rate": 7.49440817485098e-05, "loss": 0.001961051858961582, "step": 88310 }, { "epoch": 25.069543003122337, "grad_norm": 0.308811217546463, "learning_rate": 7.494124325858644e-05, "loss": 0.005096273496747017, "step": 88320 }, { "epoch": 25.0723814930457, "grad_norm": 0.1531006097793579, "learning_rate": 7.493840476866308e-05, "loss": 0.007415460050106048, "step": 88330 }, { "epoch": 25.07521998296906, "grad_norm": 0.5807761549949646, "learning_rate": 7.49355662787397e-05, "loss": 0.00396123193204403, "step": 88340 }, { "epoch": 25.078058472892423, "grad_norm": 1.0949815511703491, "learning_rate": 7.493272778881635e-05, "loss": 0.029714208841323853, "step": 88350 }, { "epoch": 25.080896962815782, "grad_norm": 4.758900165557861, "learning_rate": 7.4929889298893e-05, "loss": 0.005913631618022918, "step": 88360 }, { "epoch": 25.08373545273914, "grad_norm": 0.29923468828201294, "learning_rate": 7.492705080896963e-05, "loss": 0.00536656491458416, "step": 88370 }, { "epoch": 25.086573942662504, "grad_norm": 0.11426595598459244, "learning_rate": 7.492421231904627e-05, "loss": 0.0006685871630907059, "step": 88380 }, { "epoch": 25.089412432585863, "grad_norm": 14.75670337677002, "learning_rate": 7.492137382912291e-05, "loss": 0.012132228910923004, "step": 88390 }, { "epoch": 25.092250922509226, "grad_norm": 0.3809480667114258, "learning_rate": 7.491853533919954e-05, "loss": 0.004427645727992058, "step": 88400 }, { "epoch": 25.095089412432586, "grad_norm": 0.5373806357383728, "learning_rate": 7.491569684927618e-05, "loss": 0.006331613659858704, "step": 88410 }, { "epoch": 25.097927902355945, "grad_norm": 0.24683767557144165, "learning_rate": 7.491285835935284e-05, "loss": 0.006447121500968933, "step": 88420 }, { "epoch": 25.100766392279308, "grad_norm": 9.50399112701416, "learning_rate": 7.491001986942947e-05, "loss": 0.01121443286538124, "step": 88430 }, { "epoch": 25.103604882202667, "grad_norm": 0.38323602080345154, "learning_rate": 7.49071813795061e-05, "loss": 0.006935006380081177, "step": 88440 }, { "epoch": 25.10644337212603, "grad_norm": 0.031743165105581284, "learning_rate": 7.490434288958275e-05, "loss": 0.011472496390342712, "step": 88450 }, { "epoch": 25.10928186204939, "grad_norm": 2.280695915222168, "learning_rate": 7.490150439965939e-05, "loss": 0.008001950383186341, "step": 88460 }, { "epoch": 25.11212035197275, "grad_norm": 1.4241598844528198, "learning_rate": 7.489866590973602e-05, "loss": 0.0028953997418284414, "step": 88470 }, { "epoch": 25.11495884189611, "grad_norm": 0.33574607968330383, "learning_rate": 7.489582741981266e-05, "loss": 0.0023410677909851073, "step": 88480 }, { "epoch": 25.11779733181947, "grad_norm": 0.34760549664497375, "learning_rate": 7.489298892988931e-05, "loss": 0.006047024577856064, "step": 88490 }, { "epoch": 25.120635821742834, "grad_norm": 7.948306560516357, "learning_rate": 7.489015043996594e-05, "loss": 0.013668856024742127, "step": 88500 }, { "epoch": 25.120635821742834, "eval_accuracy": 0.9619762192407961, "eval_loss": 0.1474638283252716, "eval_runtime": 33.8285, "eval_samples_per_second": 464.904, "eval_steps_per_second": 7.272, "step": 88500 }, { "epoch": 25.123474311666193, "grad_norm": 0.34346601366996765, "learning_rate": 7.488731195004258e-05, "loss": 0.012086507678031922, "step": 88510 }, { "epoch": 25.126312801589556, "grad_norm": 1.1539472341537476, "learning_rate": 7.488447346011922e-05, "loss": 0.017246027290821076, "step": 88520 }, { "epoch": 25.129151291512915, "grad_norm": 2.115556001663208, "learning_rate": 7.488163497019585e-05, "loss": 0.00232306607067585, "step": 88530 }, { "epoch": 25.131989781436275, "grad_norm": 0.36488595604896545, "learning_rate": 7.48787964802725e-05, "loss": 0.004482611268758774, "step": 88540 }, { "epoch": 25.134828271359638, "grad_norm": 2.891345262527466, "learning_rate": 7.487595799034913e-05, "loss": 0.004416335374116898, "step": 88550 }, { "epoch": 25.137666761282997, "grad_norm": 0.042882878333330154, "learning_rate": 7.487311950042578e-05, "loss": 0.011962465196847915, "step": 88560 }, { "epoch": 25.14050525120636, "grad_norm": 4.585739612579346, "learning_rate": 7.487028101050242e-05, "loss": 0.018870751559734344, "step": 88570 }, { "epoch": 25.14334374112972, "grad_norm": 0.6428725123405457, "learning_rate": 7.486744252057906e-05, "loss": 0.0077162012457847595, "step": 88580 }, { "epoch": 25.14618223105308, "grad_norm": 5.821713447570801, "learning_rate": 7.48646040306557e-05, "loss": 0.013907374441623687, "step": 88590 }, { "epoch": 25.14902072097644, "grad_norm": 0.7723859548568726, "learning_rate": 7.486176554073233e-05, "loss": 0.003116578981280327, "step": 88600 }, { "epoch": 25.1518592108998, "grad_norm": 0.3836494982242584, "learning_rate": 7.485892705080897e-05, "loss": 0.009475016593933105, "step": 88610 }, { "epoch": 25.154697700823164, "grad_norm": 0.12689273059368134, "learning_rate": 7.485608856088562e-05, "loss": 0.004091928899288178, "step": 88620 }, { "epoch": 25.157536190746523, "grad_norm": 5.022653579711914, "learning_rate": 7.485325007096225e-05, "loss": 0.0025140833109617232, "step": 88630 }, { "epoch": 25.160374680669882, "grad_norm": 0.0697626993060112, "learning_rate": 7.48504115810389e-05, "loss": 0.0034309811890125275, "step": 88640 }, { "epoch": 25.163213170593245, "grad_norm": 0.10173014551401138, "learning_rate": 7.484757309111553e-05, "loss": 0.0017784124240279198, "step": 88650 }, { "epoch": 25.166051660516604, "grad_norm": 0.25699591636657715, "learning_rate": 7.484473460119216e-05, "loss": 0.0015286367386579513, "step": 88660 }, { "epoch": 25.168890150439967, "grad_norm": 0.29642629623413086, "learning_rate": 7.48418961112688e-05, "loss": 0.004366127401590347, "step": 88670 }, { "epoch": 25.171728640363327, "grad_norm": 5.627523899078369, "learning_rate": 7.483905762134545e-05, "loss": 0.023440131545066835, "step": 88680 }, { "epoch": 25.174567130286686, "grad_norm": 1.1175857782363892, "learning_rate": 7.483621913142209e-05, "loss": 0.0044670671224594114, "step": 88690 }, { "epoch": 25.17740562021005, "grad_norm": 6.043957710266113, "learning_rate": 7.483338064149873e-05, "loss": 0.004698056727647781, "step": 88700 }, { "epoch": 25.18024411013341, "grad_norm": 0.26613715291023254, "learning_rate": 7.483054215157537e-05, "loss": 0.013679195940494538, "step": 88710 }, { "epoch": 25.18308260005677, "grad_norm": 0.25003382563591003, "learning_rate": 7.482770366165201e-05, "loss": 0.004292218387126923, "step": 88720 }, { "epoch": 25.18592108998013, "grad_norm": 1.6341605186462402, "learning_rate": 7.482486517172864e-05, "loss": 0.010587939620018005, "step": 88730 }, { "epoch": 25.18875957990349, "grad_norm": 4.346147060394287, "learning_rate": 7.482202668180528e-05, "loss": 0.002901364676654339, "step": 88740 }, { "epoch": 25.191598069826853, "grad_norm": 0.7656651139259338, "learning_rate": 7.481918819188192e-05, "loss": 0.0033268436789512636, "step": 88750 }, { "epoch": 25.194436559750212, "grad_norm": 0.37408724427223206, "learning_rate": 7.481634970195856e-05, "loss": 0.005521881952881813, "step": 88760 }, { "epoch": 25.197275049673575, "grad_norm": 0.084024578332901, "learning_rate": 7.48135112120352e-05, "loss": 0.012909308075904846, "step": 88770 }, { "epoch": 25.200113539596934, "grad_norm": 1.7578648328781128, "learning_rate": 7.481067272211185e-05, "loss": 0.005168053507804871, "step": 88780 }, { "epoch": 25.202952029520294, "grad_norm": 6.937491416931152, "learning_rate": 7.480783423218847e-05, "loss": 0.008936774730682374, "step": 88790 }, { "epoch": 25.205790519443656, "grad_norm": 6.809782981872559, "learning_rate": 7.480499574226511e-05, "loss": 0.010390879213809967, "step": 88800 }, { "epoch": 25.208629009367016, "grad_norm": 2.6956403255462646, "learning_rate": 7.480215725234176e-05, "loss": 0.001871239021420479, "step": 88810 }, { "epoch": 25.21146749929038, "grad_norm": 0.04798933491110802, "learning_rate": 7.47993187624184e-05, "loss": 0.0022146468982100487, "step": 88820 }, { "epoch": 25.214305989213738, "grad_norm": 0.061015255749225616, "learning_rate": 7.479648027249504e-05, "loss": 0.002726881392300129, "step": 88830 }, { "epoch": 25.217144479137097, "grad_norm": 1.0991555452346802, "learning_rate": 7.479364178257168e-05, "loss": 0.006791436672210693, "step": 88840 }, { "epoch": 25.21998296906046, "grad_norm": 3.3832404613494873, "learning_rate": 7.479080329264831e-05, "loss": 0.008105304837226868, "step": 88850 }, { "epoch": 25.22282145898382, "grad_norm": 0.0398436114192009, "learning_rate": 7.478796480272495e-05, "loss": 0.005916780978441239, "step": 88860 }, { "epoch": 25.225659948907182, "grad_norm": 0.7736295461654663, "learning_rate": 7.478512631280159e-05, "loss": 0.007523863017559052, "step": 88870 }, { "epoch": 25.22849843883054, "grad_norm": 0.098592109978199, "learning_rate": 7.478228782287823e-05, "loss": 0.01450311839580536, "step": 88880 }, { "epoch": 25.231336928753905, "grad_norm": 0.040703628212213516, "learning_rate": 7.477944933295487e-05, "loss": 0.0035933084785938264, "step": 88890 }, { "epoch": 25.234175418677264, "grad_norm": 0.15914537012577057, "learning_rate": 7.477661084303152e-05, "loss": 0.004341611266136169, "step": 88900 }, { "epoch": 25.237013908600623, "grad_norm": 0.5689504742622375, "learning_rate": 7.477377235310816e-05, "loss": 0.003740827739238739, "step": 88910 }, { "epoch": 25.239852398523986, "grad_norm": 1.510378360748291, "learning_rate": 7.477093386318478e-05, "loss": 0.005659632384777069, "step": 88920 }, { "epoch": 25.242690888447346, "grad_norm": 8.258662223815918, "learning_rate": 7.476809537326143e-05, "loss": 0.02284492552280426, "step": 88930 }, { "epoch": 25.24552937837071, "grad_norm": 1.382814884185791, "learning_rate": 7.476525688333807e-05, "loss": 0.018458887934684753, "step": 88940 }, { "epoch": 25.248367868294068, "grad_norm": 5.569324016571045, "learning_rate": 7.47624183934147e-05, "loss": 0.006845755130052566, "step": 88950 }, { "epoch": 25.251206358217427, "grad_norm": 3.414463996887207, "learning_rate": 7.475957990349135e-05, "loss": 0.009719160199165345, "step": 88960 }, { "epoch": 25.25404484814079, "grad_norm": 0.06429114192724228, "learning_rate": 7.475674141356799e-05, "loss": 0.012387323379516601, "step": 88970 }, { "epoch": 25.25688333806415, "grad_norm": 0.874298632144928, "learning_rate": 7.475390292364462e-05, "loss": 0.002412092499434948, "step": 88980 }, { "epoch": 25.259721827987512, "grad_norm": 0.8166298270225525, "learning_rate": 7.475106443372126e-05, "loss": 0.024586690962314604, "step": 88990 }, { "epoch": 25.26256031791087, "grad_norm": 14.481818199157715, "learning_rate": 7.47482259437979e-05, "loss": 0.02428887188434601, "step": 89000 }, { "epoch": 25.26256031791087, "eval_accuracy": 0.9753290519488778, "eval_loss": 0.08758728951215744, "eval_runtime": 34.9121, "eval_samples_per_second": 450.474, "eval_steps_per_second": 7.046, "step": 89000 }, { "epoch": 25.26539880783423, "grad_norm": 8.20954704284668, "learning_rate": 7.474538745387454e-05, "loss": 0.006214327365159989, "step": 89010 }, { "epoch": 25.268237297757594, "grad_norm": 0.2667808532714844, "learning_rate": 7.474254896395118e-05, "loss": 0.010198402404785156, "step": 89020 }, { "epoch": 25.271075787680953, "grad_norm": 0.13991011679172516, "learning_rate": 7.473971047402783e-05, "loss": 0.013243408501148224, "step": 89030 }, { "epoch": 25.273914277604316, "grad_norm": 0.8312278985977173, "learning_rate": 7.473687198410447e-05, "loss": 0.0018977493047714234, "step": 89040 }, { "epoch": 25.276752767527675, "grad_norm": 0.2713484764099121, "learning_rate": 7.47340334941811e-05, "loss": 0.006650206446647644, "step": 89050 }, { "epoch": 25.279591257451035, "grad_norm": 0.3860657811164856, "learning_rate": 7.473119500425774e-05, "loss": 0.0075859732925891874, "step": 89060 }, { "epoch": 25.282429747374398, "grad_norm": 0.007391764782369137, "learning_rate": 7.472835651433438e-05, "loss": 0.008018223941326142, "step": 89070 }, { "epoch": 25.285268237297757, "grad_norm": 0.20527750253677368, "learning_rate": 7.472580187340336e-05, "loss": 0.017699411511421202, "step": 89080 }, { "epoch": 25.28810672722112, "grad_norm": 0.09870373457670212, "learning_rate": 7.472296338348e-05, "loss": 0.007139772176742554, "step": 89090 }, { "epoch": 25.29094521714448, "grad_norm": 0.9971146583557129, "learning_rate": 7.472012489355663e-05, "loss": 0.018797481060028078, "step": 89100 }, { "epoch": 25.29378370706784, "grad_norm": 0.04105079919099808, "learning_rate": 7.471728640363327e-05, "loss": 0.012601383030414581, "step": 89110 }, { "epoch": 25.2966221969912, "grad_norm": 0.9899020791053772, "learning_rate": 7.471444791370991e-05, "loss": 0.002791275084018707, "step": 89120 }, { "epoch": 25.29946068691456, "grad_norm": 1.6622728109359741, "learning_rate": 7.471160942378654e-05, "loss": 0.022127172350883482, "step": 89130 }, { "epoch": 25.302299176837924, "grad_norm": 10.056769371032715, "learning_rate": 7.470877093386319e-05, "loss": 0.019124582409858704, "step": 89140 }, { "epoch": 25.305137666761283, "grad_norm": 0.17042095959186554, "learning_rate": 7.470593244393983e-05, "loss": 0.01073194071650505, "step": 89150 }, { "epoch": 25.307976156684642, "grad_norm": 0.3682926297187805, "learning_rate": 7.470309395401646e-05, "loss": 0.006983482837677002, "step": 89160 }, { "epoch": 25.310814646608005, "grad_norm": 0.09030769020318985, "learning_rate": 7.47002554640931e-05, "loss": 0.008091433346271515, "step": 89170 }, { "epoch": 25.313653136531364, "grad_norm": 1.0536514520645142, "learning_rate": 7.469741697416974e-05, "loss": 0.028129559755325318, "step": 89180 }, { "epoch": 25.316491626454727, "grad_norm": 0.24886077642440796, "learning_rate": 7.469457848424639e-05, "loss": 0.004738155007362366, "step": 89190 }, { "epoch": 25.319330116378087, "grad_norm": 2.776676654815674, "learning_rate": 7.469173999432303e-05, "loss": 0.01066562533378601, "step": 89200 }, { "epoch": 25.322168606301446, "grad_norm": 0.05236824229359627, "learning_rate": 7.468890150439967e-05, "loss": 0.011852312088012695, "step": 89210 }, { "epoch": 25.32500709622481, "grad_norm": 1.0052992105484009, "learning_rate": 7.468606301447631e-05, "loss": 0.009817180782556533, "step": 89220 }, { "epoch": 25.327845586148168, "grad_norm": 4.107451915740967, "learning_rate": 7.468322452455294e-05, "loss": 0.014459228515625, "step": 89230 }, { "epoch": 25.33068407607153, "grad_norm": 2.6572747230529785, "learning_rate": 7.468038603462958e-05, "loss": 0.011076144874095917, "step": 89240 }, { "epoch": 25.33352256599489, "grad_norm": 0.9125170707702637, "learning_rate": 7.467754754470622e-05, "loss": 0.019557181000709533, "step": 89250 }, { "epoch": 25.336361055918253, "grad_norm": 15.165132522583008, "learning_rate": 7.467470905478285e-05, "loss": 0.028925609588623048, "step": 89260 }, { "epoch": 25.339199545841613, "grad_norm": 1.3368538618087769, "learning_rate": 7.46718705648595e-05, "loss": 0.007370691001415253, "step": 89270 }, { "epoch": 25.342038035764972, "grad_norm": 24.129772186279297, "learning_rate": 7.466903207493614e-05, "loss": 0.017780700325965883, "step": 89280 }, { "epoch": 25.344876525688335, "grad_norm": 0.19532722234725952, "learning_rate": 7.466619358501277e-05, "loss": 0.0104860857129097, "step": 89290 }, { "epoch": 25.347715015611694, "grad_norm": 8.0968017578125, "learning_rate": 7.466335509508941e-05, "loss": 0.01406552642583847, "step": 89300 }, { "epoch": 25.350553505535057, "grad_norm": 0.17204110324382782, "learning_rate": 7.466051660516606e-05, "loss": 0.0041920609772205355, "step": 89310 }, { "epoch": 25.353391995458416, "grad_norm": 1.0107172727584839, "learning_rate": 7.46576781152427e-05, "loss": 0.012988731265068054, "step": 89320 }, { "epoch": 25.356230485381776, "grad_norm": 2.56569504737854, "learning_rate": 7.465483962531932e-05, "loss": 0.010880158096551896, "step": 89330 }, { "epoch": 25.35906897530514, "grad_norm": 0.3445502519607544, "learning_rate": 7.465200113539598e-05, "loss": 0.00801250860095024, "step": 89340 }, { "epoch": 25.361907465228498, "grad_norm": 0.07670461386442184, "learning_rate": 7.464916264547262e-05, "loss": 0.0036760665476322173, "step": 89350 }, { "epoch": 25.36474595515186, "grad_norm": 5.229856491088867, "learning_rate": 7.464632415554925e-05, "loss": 0.010169997066259383, "step": 89360 }, { "epoch": 25.36758444507522, "grad_norm": 2.0796070098876953, "learning_rate": 7.464348566562589e-05, "loss": 0.006594269722700119, "step": 89370 }, { "epoch": 25.37042293499858, "grad_norm": 0.11833211779594421, "learning_rate": 7.464064717570253e-05, "loss": 0.008965201675891876, "step": 89380 }, { "epoch": 25.373261424921942, "grad_norm": 2.9189958572387695, "learning_rate": 7.463780868577916e-05, "loss": 0.008238789439201356, "step": 89390 }, { "epoch": 25.3760999148453, "grad_norm": 14.72905158996582, "learning_rate": 7.463497019585581e-05, "loss": 0.015299195051193237, "step": 89400 }, { "epoch": 25.378938404768665, "grad_norm": 0.20023587346076965, "learning_rate": 7.463213170593246e-05, "loss": 0.02264712154865265, "step": 89410 }, { "epoch": 25.381776894692024, "grad_norm": 2.067068576812744, "learning_rate": 7.462929321600908e-05, "loss": 0.008425582945346833, "step": 89420 }, { "epoch": 25.384615384615383, "grad_norm": 3.3919179439544678, "learning_rate": 7.462645472608572e-05, "loss": 0.004353434592485428, "step": 89430 }, { "epoch": 25.387453874538746, "grad_norm": 4.077432632446289, "learning_rate": 7.462361623616237e-05, "loss": 0.01260494589805603, "step": 89440 }, { "epoch": 25.390292364462105, "grad_norm": 10.949095726013184, "learning_rate": 7.462077774623901e-05, "loss": 0.01818329393863678, "step": 89450 }, { "epoch": 25.39313085438547, "grad_norm": 2.222620725631714, "learning_rate": 7.461793925631564e-05, "loss": 0.026535722613334655, "step": 89460 }, { "epoch": 25.395969344308828, "grad_norm": 7.772825717926025, "learning_rate": 7.461510076639229e-05, "loss": 0.028572279214859008, "step": 89470 }, { "epoch": 25.398807834232187, "grad_norm": 0.6361588835716248, "learning_rate": 7.461226227646893e-05, "loss": 0.019240787625312804, "step": 89480 }, { "epoch": 25.40164632415555, "grad_norm": 0.41541755199432373, "learning_rate": 7.460942378654556e-05, "loss": 0.010220983624458313, "step": 89490 }, { "epoch": 25.40448481407891, "grad_norm": 1.5935853719711304, "learning_rate": 7.46065852966222e-05, "loss": 0.007900816947221756, "step": 89500 }, { "epoch": 25.40448481407891, "eval_accuracy": 0.9718318814777135, "eval_loss": 0.10468579083681107, "eval_runtime": 34.3205, "eval_samples_per_second": 458.239, "eval_steps_per_second": 7.168, "step": 89500 }, { "epoch": 25.407323304002272, "grad_norm": 0.5211023092269897, "learning_rate": 7.460374680669884e-05, "loss": 0.006472687423229218, "step": 89510 }, { "epoch": 25.41016179392563, "grad_norm": 2.8675215244293213, "learning_rate": 7.460090831677547e-05, "loss": 0.008588509261608123, "step": 89520 }, { "epoch": 25.41300028384899, "grad_norm": 0.15373991429805756, "learning_rate": 7.459806982685211e-05, "loss": 0.008348079025745391, "step": 89530 }, { "epoch": 25.415838773772354, "grad_norm": 0.1768060326576233, "learning_rate": 7.459523133692877e-05, "loss": 0.009678984433412552, "step": 89540 }, { "epoch": 25.418677263695713, "grad_norm": 0.9347082376480103, "learning_rate": 7.45923928470054e-05, "loss": 0.0012735534459352494, "step": 89550 }, { "epoch": 25.421515753619076, "grad_norm": 3.3758621215820312, "learning_rate": 7.458955435708204e-05, "loss": 0.0036798864603042603, "step": 89560 }, { "epoch": 25.424354243542435, "grad_norm": 0.2576419413089752, "learning_rate": 7.458671586715868e-05, "loss": 0.006099139153957367, "step": 89570 }, { "epoch": 25.427192733465795, "grad_norm": 1.527796983718872, "learning_rate": 7.458387737723532e-05, "loss": 0.01099817305803299, "step": 89580 }, { "epoch": 25.430031223389157, "grad_norm": 5.967081069946289, "learning_rate": 7.458103888731195e-05, "loss": 0.0034110452979803085, "step": 89590 }, { "epoch": 25.432869713312517, "grad_norm": 0.01856265962123871, "learning_rate": 7.45782003973886e-05, "loss": 0.009201809763908386, "step": 89600 }, { "epoch": 25.43570820323588, "grad_norm": 0.05427063629031181, "learning_rate": 7.457536190746523e-05, "loss": 0.00935910940170288, "step": 89610 }, { "epoch": 25.43854669315924, "grad_norm": 2.8150973320007324, "learning_rate": 7.457252341754187e-05, "loss": 0.002500970847904682, "step": 89620 }, { "epoch": 25.4413851830826, "grad_norm": 0.10413269698619843, "learning_rate": 7.456968492761851e-05, "loss": 0.014055897295475007, "step": 89630 }, { "epoch": 25.44422367300596, "grad_norm": 0.9635423421859741, "learning_rate": 7.456684643769515e-05, "loss": 0.013131356239318848, "step": 89640 }, { "epoch": 25.44706216292932, "grad_norm": 4.752671718597412, "learning_rate": 7.456400794777178e-05, "loss": 0.009670261293649673, "step": 89650 }, { "epoch": 25.449900652852683, "grad_norm": 0.6108294129371643, "learning_rate": 7.456116945784842e-05, "loss": 0.006192868202924728, "step": 89660 }, { "epoch": 25.452739142776043, "grad_norm": 0.20196227729320526, "learning_rate": 7.455833096792508e-05, "loss": 0.006017037853598595, "step": 89670 }, { "epoch": 25.455577632699406, "grad_norm": 2.5153284072875977, "learning_rate": 7.45554924780017e-05, "loss": 0.0049983255565166475, "step": 89680 }, { "epoch": 25.458416122622765, "grad_norm": 8.107276916503906, "learning_rate": 7.455265398807835e-05, "loss": 0.008491210639476776, "step": 89690 }, { "epoch": 25.461254612546124, "grad_norm": 1.1691640615463257, "learning_rate": 7.454981549815499e-05, "loss": 0.011321494728326798, "step": 89700 }, { "epoch": 25.464093102469487, "grad_norm": 1.6124094724655151, "learning_rate": 7.454697700823163e-05, "loss": 0.007193145900964737, "step": 89710 }, { "epoch": 25.466931592392847, "grad_norm": 0.12145328521728516, "learning_rate": 7.454413851830826e-05, "loss": 0.011361632496118546, "step": 89720 }, { "epoch": 25.46977008231621, "grad_norm": 5.719797611236572, "learning_rate": 7.454130002838491e-05, "loss": 0.003918404132127762, "step": 89730 }, { "epoch": 25.47260857223957, "grad_norm": 4.829936981201172, "learning_rate": 7.453846153846154e-05, "loss": 0.0037638887763023375, "step": 89740 }, { "epoch": 25.475447062162928, "grad_norm": 4.496588230133057, "learning_rate": 7.453562304853818e-05, "loss": 0.00436876192688942, "step": 89750 }, { "epoch": 25.47828555208629, "grad_norm": 5.545335292816162, "learning_rate": 7.453278455861482e-05, "loss": 0.004189334809780121, "step": 89760 }, { "epoch": 25.48112404200965, "grad_norm": 1.6113344430923462, "learning_rate": 7.452994606869146e-05, "loss": 0.0069550670683383945, "step": 89770 }, { "epoch": 25.483962531933013, "grad_norm": 6.148491382598877, "learning_rate": 7.452710757876809e-05, "loss": 0.0070911340415477754, "step": 89780 }, { "epoch": 25.486801021856373, "grad_norm": 2.216336727142334, "learning_rate": 7.452426908884473e-05, "loss": 0.0016270671039819717, "step": 89790 }, { "epoch": 25.489639511779732, "grad_norm": 0.39050740003585815, "learning_rate": 7.452143059892139e-05, "loss": 0.006063970923423767, "step": 89800 }, { "epoch": 25.492478001703095, "grad_norm": 6.07155704498291, "learning_rate": 7.451859210899802e-05, "loss": 0.021789900958538055, "step": 89810 }, { "epoch": 25.495316491626454, "grad_norm": 3.860375165939331, "learning_rate": 7.451575361907466e-05, "loss": 0.023509667813777925, "step": 89820 }, { "epoch": 25.498154981549817, "grad_norm": 9.236740112304688, "learning_rate": 7.45129151291513e-05, "loss": 0.007684427499771118, "step": 89830 }, { "epoch": 25.500993471473176, "grad_norm": 7.683574676513672, "learning_rate": 7.451007663922793e-05, "loss": 0.005167032778263092, "step": 89840 }, { "epoch": 25.503831961396536, "grad_norm": 0.09034428000450134, "learning_rate": 7.450723814930457e-05, "loss": 0.002180076763033867, "step": 89850 }, { "epoch": 25.5066704513199, "grad_norm": 0.5686853528022766, "learning_rate": 7.450439965938121e-05, "loss": 0.005781760066747665, "step": 89860 }, { "epoch": 25.509508941243258, "grad_norm": 0.42421457171440125, "learning_rate": 7.450156116945785e-05, "loss": 0.0013933818787336349, "step": 89870 }, { "epoch": 25.51234743116662, "grad_norm": 1.4137674570083618, "learning_rate": 7.449872267953449e-05, "loss": 0.005943059176206589, "step": 89880 }, { "epoch": 25.51518592108998, "grad_norm": 2.848198413848877, "learning_rate": 7.449588418961113e-05, "loss": 0.00412396639585495, "step": 89890 }, { "epoch": 25.51802441101334, "grad_norm": 3.518526792526245, "learning_rate": 7.449304569968777e-05, "loss": 0.007343976199626923, "step": 89900 }, { "epoch": 25.520862900936702, "grad_norm": 0.7878851294517517, "learning_rate": 7.44902072097644e-05, "loss": 0.007183082401752472, "step": 89910 }, { "epoch": 25.52370139086006, "grad_norm": 1.320884346961975, "learning_rate": 7.448736871984104e-05, "loss": 0.012407417595386504, "step": 89920 }, { "epoch": 25.526539880783425, "grad_norm": 0.379658579826355, "learning_rate": 7.44845302299177e-05, "loss": 0.03151839673519134, "step": 89930 }, { "epoch": 25.529378370706784, "grad_norm": 6.490708351135254, "learning_rate": 7.448169173999433e-05, "loss": 0.021738195419311525, "step": 89940 }, { "epoch": 25.532216860630143, "grad_norm": 3.3521580696105957, "learning_rate": 7.447885325007097e-05, "loss": 0.008082441240549087, "step": 89950 }, { "epoch": 25.535055350553506, "grad_norm": 13.28322982788086, "learning_rate": 7.447601476014761e-05, "loss": 0.012851449847221374, "step": 89960 }, { "epoch": 25.537893840476865, "grad_norm": 0.25009796023368835, "learning_rate": 7.447317627022424e-05, "loss": 0.020899713039398193, "step": 89970 }, { "epoch": 25.54073233040023, "grad_norm": 11.038787841796875, "learning_rate": 7.447033778030088e-05, "loss": 0.02715252637863159, "step": 89980 }, { "epoch": 25.543570820323588, "grad_norm": 7.100213050842285, "learning_rate": 7.446749929037752e-05, "loss": 0.018731169402599335, "step": 89990 }, { "epoch": 25.546409310246947, "grad_norm": 1.319427728652954, "learning_rate": 7.446466080045416e-05, "loss": 0.03679172694683075, "step": 90000 }, { "epoch": 25.546409310246947, "eval_accuracy": 0.9730399949132066, "eval_loss": 0.1059207171201706, "eval_runtime": 38.4147, "eval_samples_per_second": 409.401, "eval_steps_per_second": 6.404, "step": 90000 }, { "epoch": 25.54924780017031, "grad_norm": 0.06629546731710434, "learning_rate": 7.44618223105308e-05, "loss": 0.00289053451269865, "step": 90010 }, { "epoch": 25.55208629009367, "grad_norm": 5.9544358253479, "learning_rate": 7.445898382060744e-05, "loss": 0.004323823750019074, "step": 90020 }, { "epoch": 25.554924780017032, "grad_norm": 7.492887496948242, "learning_rate": 7.445614533068409e-05, "loss": 0.007971149682998658, "step": 90030 }, { "epoch": 25.55776326994039, "grad_norm": 8.679960250854492, "learning_rate": 7.445330684076071e-05, "loss": 0.010010477155447006, "step": 90040 }, { "epoch": 25.56060175986375, "grad_norm": 0.547473132610321, "learning_rate": 7.445046835083735e-05, "loss": 0.016594310104846955, "step": 90050 }, { "epoch": 25.563440249787114, "grad_norm": 7.6033759117126465, "learning_rate": 7.4447629860914e-05, "loss": 0.015636473894119263, "step": 90060 }, { "epoch": 25.566278739710473, "grad_norm": 7.949481964111328, "learning_rate": 7.444479137099064e-05, "loss": 0.007727138698101044, "step": 90070 }, { "epoch": 25.569117229633836, "grad_norm": 14.19365406036377, "learning_rate": 7.444195288106728e-05, "loss": 0.021856357157230378, "step": 90080 }, { "epoch": 25.571955719557195, "grad_norm": 1.2881098985671997, "learning_rate": 7.443911439114392e-05, "loss": 0.004821647703647613, "step": 90090 }, { "epoch": 25.574794209480558, "grad_norm": 0.27400773763656616, "learning_rate": 7.443627590122055e-05, "loss": 0.002646210789680481, "step": 90100 }, { "epoch": 25.577632699403917, "grad_norm": 2.9354419708251953, "learning_rate": 7.443343741129719e-05, "loss": 0.00978771597146988, "step": 90110 }, { "epoch": 25.580471189327277, "grad_norm": 3.8324592113494873, "learning_rate": 7.443059892137383e-05, "loss": 0.0018224112689495086, "step": 90120 }, { "epoch": 25.58330967925064, "grad_norm": 2.26293683052063, "learning_rate": 7.442776043145047e-05, "loss": 0.0022734371945261956, "step": 90130 }, { "epoch": 25.586148169174, "grad_norm": 0.32572561502456665, "learning_rate": 7.442492194152711e-05, "loss": 0.0018168186768889427, "step": 90140 }, { "epoch": 25.588986659097362, "grad_norm": 9.456544876098633, "learning_rate": 7.442208345160375e-05, "loss": 0.013214340806007386, "step": 90150 }, { "epoch": 25.59182514902072, "grad_norm": 0.4530196785926819, "learning_rate": 7.44192449616804e-05, "loss": 0.005967195332050324, "step": 90160 }, { "epoch": 25.59466363894408, "grad_norm": 8.34504508972168, "learning_rate": 7.441640647175702e-05, "loss": 0.00857282504439354, "step": 90170 }, { "epoch": 25.597502128867443, "grad_norm": 1.3959994316101074, "learning_rate": 7.441356798183367e-05, "loss": 0.019687439501285552, "step": 90180 }, { "epoch": 25.600340618790803, "grad_norm": 6.008676528930664, "learning_rate": 7.44107294919103e-05, "loss": 0.008654215931892395, "step": 90190 }, { "epoch": 25.603179108714166, "grad_norm": 10.209891319274902, "learning_rate": 7.440789100198695e-05, "loss": 0.011956464499235153, "step": 90200 }, { "epoch": 25.606017598637525, "grad_norm": 0.4612743854522705, "learning_rate": 7.440505251206359e-05, "loss": 0.007368840277194977, "step": 90210 }, { "epoch": 25.608856088560884, "grad_norm": 2.0282275676727295, "learning_rate": 7.440221402214023e-05, "loss": 0.008639366924762725, "step": 90220 }, { "epoch": 25.611694578484247, "grad_norm": 0.14561258256435394, "learning_rate": 7.439937553221686e-05, "loss": 0.00387871116399765, "step": 90230 }, { "epoch": 25.614533068407606, "grad_norm": 9.446569442749023, "learning_rate": 7.43965370422935e-05, "loss": 0.008237958699464799, "step": 90240 }, { "epoch": 25.61737155833097, "grad_norm": 9.671032905578613, "learning_rate": 7.439369855237014e-05, "loss": 0.034179466962814334, "step": 90250 }, { "epoch": 25.62021004825433, "grad_norm": 11.84526538848877, "learning_rate": 7.439086006244678e-05, "loss": 0.018435278534889223, "step": 90260 }, { "epoch": 25.623048538177688, "grad_norm": 0.08934514969587326, "learning_rate": 7.438802157252342e-05, "loss": 0.019590693712234496, "step": 90270 }, { "epoch": 25.62588702810105, "grad_norm": 1.590696930885315, "learning_rate": 7.438518308260007e-05, "loss": 0.018366165459156036, "step": 90280 }, { "epoch": 25.62872551802441, "grad_norm": 0.27407315373420715, "learning_rate": 7.43823445926767e-05, "loss": 0.015609249472618103, "step": 90290 }, { "epoch": 25.631564007947773, "grad_norm": 4.723912239074707, "learning_rate": 7.437950610275333e-05, "loss": 0.012045377492904663, "step": 90300 }, { "epoch": 25.634402497871132, "grad_norm": 1.457156777381897, "learning_rate": 7.437666761282998e-05, "loss": 0.014488820731639863, "step": 90310 }, { "epoch": 25.637240987794492, "grad_norm": 0.6744175553321838, "learning_rate": 7.437382912290662e-05, "loss": 0.005132757872343063, "step": 90320 }, { "epoch": 25.640079477717855, "grad_norm": 0.3449016511440277, "learning_rate": 7.437099063298326e-05, "loss": 0.011341096460819244, "step": 90330 }, { "epoch": 25.642917967641214, "grad_norm": 20.3939208984375, "learning_rate": 7.43681521430599e-05, "loss": 0.004098481684923172, "step": 90340 }, { "epoch": 25.645756457564577, "grad_norm": 0.09690175205469131, "learning_rate": 7.436531365313654e-05, "loss": 0.02108256220817566, "step": 90350 }, { "epoch": 25.648594947487936, "grad_norm": 1.1698541641235352, "learning_rate": 7.436247516321317e-05, "loss": 0.007732470333576202, "step": 90360 }, { "epoch": 25.651433437411296, "grad_norm": 0.3727177083492279, "learning_rate": 7.435963667328981e-05, "loss": 0.007169266790151596, "step": 90370 }, { "epoch": 25.65427192733466, "grad_norm": 0.4876358211040497, "learning_rate": 7.435679818336645e-05, "loss": 0.026082155108451844, "step": 90380 }, { "epoch": 25.657110417258018, "grad_norm": 13.98908519744873, "learning_rate": 7.43539596934431e-05, "loss": 0.018990057706832885, "step": 90390 }, { "epoch": 25.65994890718138, "grad_norm": 5.643368244171143, "learning_rate": 7.435112120351973e-05, "loss": 0.009845898300409318, "step": 90400 }, { "epoch": 25.66278739710474, "grad_norm": 0.15166756510734558, "learning_rate": 7.434828271359638e-05, "loss": 0.0026114687323570253, "step": 90410 }, { "epoch": 25.6656258870281, "grad_norm": 0.14776280522346497, "learning_rate": 7.434544422367302e-05, "loss": 0.01565372943878174, "step": 90420 }, { "epoch": 25.668464376951462, "grad_norm": 0.3697112500667572, "learning_rate": 7.434260573374965e-05, "loss": 0.003227857127785683, "step": 90430 }, { "epoch": 25.67130286687482, "grad_norm": 0.2735258936882019, "learning_rate": 7.433976724382629e-05, "loss": 0.014258910715579987, "step": 90440 }, { "epoch": 25.674141356798184, "grad_norm": 1.6049648523330688, "learning_rate": 7.433692875390293e-05, "loss": 0.01120239645242691, "step": 90450 }, { "epoch": 25.676979846721544, "grad_norm": 7.8492326736450195, "learning_rate": 7.433409026397956e-05, "loss": 0.010595157742500305, "step": 90460 }, { "epoch": 25.679818336644907, "grad_norm": 9.3235445022583, "learning_rate": 7.433125177405621e-05, "loss": 0.012274020910263061, "step": 90470 }, { "epoch": 25.682656826568266, "grad_norm": 0.5151907801628113, "learning_rate": 7.432841328413285e-05, "loss": 0.008494933694601059, "step": 90480 }, { "epoch": 25.685495316491625, "grad_norm": 0.07204311341047287, "learning_rate": 7.432557479420948e-05, "loss": 0.00476681962609291, "step": 90490 }, { "epoch": 25.688333806414988, "grad_norm": 3.658038377761841, "learning_rate": 7.432273630428612e-05, "loss": 0.0024803126230835914, "step": 90500 }, { "epoch": 25.688333806414988, "eval_accuracy": 0.976918674890316, "eval_loss": 0.08920512348413467, "eval_runtime": 36.1918, "eval_samples_per_second": 434.546, "eval_steps_per_second": 6.797, "step": 90500 }, { "epoch": 25.691172296338348, "grad_norm": 1.596082091331482, "learning_rate": 7.431989781436276e-05, "loss": 0.004657389968633652, "step": 90510 }, { "epoch": 25.69401078626171, "grad_norm": 1.200546383857727, "learning_rate": 7.43170593244394e-05, "loss": 0.011848270148038863, "step": 90520 }, { "epoch": 25.69684927618507, "grad_norm": 0.20451949536800385, "learning_rate": 7.431422083451605e-05, "loss": 0.00390227809548378, "step": 90530 }, { "epoch": 25.69968776610843, "grad_norm": 0.04777665063738823, "learning_rate": 7.431138234459269e-05, "loss": 0.003412488102912903, "step": 90540 }, { "epoch": 25.702526256031792, "grad_norm": 0.22634007036685944, "learning_rate": 7.430854385466931e-05, "loss": 0.020008663833141326, "step": 90550 }, { "epoch": 25.70536474595515, "grad_norm": 4.147204399108887, "learning_rate": 7.430570536474596e-05, "loss": 0.010874567925930024, "step": 90560 }, { "epoch": 25.708203235878514, "grad_norm": 0.46047350764274597, "learning_rate": 7.43028668748226e-05, "loss": 0.007573287189006806, "step": 90570 }, { "epoch": 25.711041725801874, "grad_norm": 0.6512267589569092, "learning_rate": 7.430002838489924e-05, "loss": 0.006233476102352142, "step": 90580 }, { "epoch": 25.713880215725233, "grad_norm": 0.7248436212539673, "learning_rate": 7.429718989497587e-05, "loss": 0.011059671640396118, "step": 90590 }, { "epoch": 25.716718705648596, "grad_norm": 10.983268737792969, "learning_rate": 7.429435140505252e-05, "loss": 0.02045983225107193, "step": 90600 }, { "epoch": 25.719557195571955, "grad_norm": 5.1710309982299805, "learning_rate": 7.429151291512916e-05, "loss": 0.010479378700256347, "step": 90610 }, { "epoch": 25.722395685495318, "grad_norm": 0.10862262547016144, "learning_rate": 7.428867442520579e-05, "loss": 0.007108961790800094, "step": 90620 }, { "epoch": 25.725234175418677, "grad_norm": 2.7657389640808105, "learning_rate": 7.428583593528243e-05, "loss": 0.01564975380897522, "step": 90630 }, { "epoch": 25.728072665342037, "grad_norm": 0.40548062324523926, "learning_rate": 7.428299744535907e-05, "loss": 0.010296119749546051, "step": 90640 }, { "epoch": 25.7309111552654, "grad_norm": 0.2951337397098541, "learning_rate": 7.42801589554357e-05, "loss": 0.03203726708889008, "step": 90650 }, { "epoch": 25.73374964518876, "grad_norm": 9.785198211669922, "learning_rate": 7.427732046551234e-05, "loss": 0.005106996372342109, "step": 90660 }, { "epoch": 25.73658813511212, "grad_norm": 0.34616735577583313, "learning_rate": 7.4274481975589e-05, "loss": 0.0031748920679092406, "step": 90670 }, { "epoch": 25.73942662503548, "grad_norm": 2.4601571559906006, "learning_rate": 7.427164348566563e-05, "loss": 0.009922322630882264, "step": 90680 }, { "epoch": 25.74226511495884, "grad_norm": 17.826318740844727, "learning_rate": 7.426880499574227e-05, "loss": 0.026730209589004517, "step": 90690 }, { "epoch": 25.745103604882203, "grad_norm": 1.4654284715652466, "learning_rate": 7.426596650581891e-05, "loss": 0.0057504050433635715, "step": 90700 }, { "epoch": 25.747942094805563, "grad_norm": 0.123029924929142, "learning_rate": 7.426312801589555e-05, "loss": 0.02197980284690857, "step": 90710 }, { "epoch": 25.750780584728926, "grad_norm": 0.22152920067310333, "learning_rate": 7.426028952597218e-05, "loss": 0.0016427993774414062, "step": 90720 }, { "epoch": 25.753619074652285, "grad_norm": 2.6659295558929443, "learning_rate": 7.425745103604883e-05, "loss": 0.013203345239162445, "step": 90730 }, { "epoch": 25.756457564575644, "grad_norm": 0.23230421543121338, "learning_rate": 7.425461254612547e-05, "loss": 0.002724645100533962, "step": 90740 }, { "epoch": 25.759296054499007, "grad_norm": 5.83518123626709, "learning_rate": 7.42517740562021e-05, "loss": 0.004138267040252686, "step": 90750 }, { "epoch": 25.762134544422366, "grad_norm": 0.09652126580476761, "learning_rate": 7.424893556627874e-05, "loss": 0.017558446526527403, "step": 90760 }, { "epoch": 25.76497303434573, "grad_norm": 1.8642833232879639, "learning_rate": 7.424609707635538e-05, "loss": 0.01959360986948013, "step": 90770 }, { "epoch": 25.76781152426909, "grad_norm": 7.835997581481934, "learning_rate": 7.424325858643201e-05, "loss": 0.03096606731414795, "step": 90780 }, { "epoch": 25.770650014192448, "grad_norm": 12.57453727722168, "learning_rate": 7.424042009650865e-05, "loss": 0.017753252387046815, "step": 90790 }, { "epoch": 25.77348850411581, "grad_norm": 1.8900455236434937, "learning_rate": 7.423758160658531e-05, "loss": 0.031305831670761106, "step": 90800 }, { "epoch": 25.77632699403917, "grad_norm": 8.930179595947266, "learning_rate": 7.423474311666194e-05, "loss": 0.01714027225971222, "step": 90810 }, { "epoch": 25.779165483962533, "grad_norm": 0.751675546169281, "learning_rate": 7.423190462673858e-05, "loss": 0.011656686663627625, "step": 90820 }, { "epoch": 25.782003973885892, "grad_norm": 0.4106620252132416, "learning_rate": 7.422906613681522e-05, "loss": 0.002770135924220085, "step": 90830 }, { "epoch": 25.784842463809255, "grad_norm": 1.0539788007736206, "learning_rate": 7.422622764689186e-05, "loss": 0.005170294642448425, "step": 90840 }, { "epoch": 25.787680953732615, "grad_norm": 0.24699023365974426, "learning_rate": 7.422338915696849e-05, "loss": 0.009753601253032684, "step": 90850 }, { "epoch": 25.790519443655974, "grad_norm": 0.2708549201488495, "learning_rate": 7.422055066704513e-05, "loss": 0.037208306789398196, "step": 90860 }, { "epoch": 25.793357933579337, "grad_norm": 2.35905122756958, "learning_rate": 7.421771217712178e-05, "loss": 0.0037753500044345857, "step": 90870 }, { "epoch": 25.796196423502696, "grad_norm": 1.4087483882904053, "learning_rate": 7.421487368719841e-05, "loss": 0.007854456454515458, "step": 90880 }, { "epoch": 25.79903491342606, "grad_norm": 7.331217288970947, "learning_rate": 7.421203519727505e-05, "loss": 0.010463355481624604, "step": 90890 }, { "epoch": 25.80187340334942, "grad_norm": 0.08100829273462296, "learning_rate": 7.42091967073517e-05, "loss": 0.0042828857898712155, "step": 90900 }, { "epoch": 25.804711893272778, "grad_norm": 0.29085925221443176, "learning_rate": 7.420635821742832e-05, "loss": 0.005879982560873032, "step": 90910 }, { "epoch": 25.80755038319614, "grad_norm": 0.1821000874042511, "learning_rate": 7.420351972750496e-05, "loss": 0.007377101480960846, "step": 90920 }, { "epoch": 25.8103888731195, "grad_norm": 0.3089791536331177, "learning_rate": 7.420068123758162e-05, "loss": 0.006254767626523971, "step": 90930 }, { "epoch": 25.813227363042863, "grad_norm": 1.1321921348571777, "learning_rate": 7.419784274765825e-05, "loss": 0.004498739540576935, "step": 90940 }, { "epoch": 25.816065852966222, "grad_norm": 0.13529618084430695, "learning_rate": 7.419500425773489e-05, "loss": 0.0017082126811146735, "step": 90950 }, { "epoch": 25.81890434288958, "grad_norm": 5.9605255126953125, "learning_rate": 7.419216576781153e-05, "loss": 0.012191721051931382, "step": 90960 }, { "epoch": 25.821742832812944, "grad_norm": 1.975387692451477, "learning_rate": 7.418932727788817e-05, "loss": 0.002340700849890709, "step": 90970 }, { "epoch": 25.824581322736304, "grad_norm": 0.35525795817375183, "learning_rate": 7.41864887879648e-05, "loss": 0.00600910484790802, "step": 90980 }, { "epoch": 25.827419812659667, "grad_norm": 1.1531991958618164, "learning_rate": 7.418365029804144e-05, "loss": 0.008205151557922364, "step": 90990 }, { "epoch": 25.830258302583026, "grad_norm": 0.5567787289619446, "learning_rate": 7.41808118081181e-05, "loss": 0.0035665404051542283, "step": 91000 }, { "epoch": 25.830258302583026, "eval_accuracy": 0.9742481083486997, "eval_loss": 0.09537310898303986, "eval_runtime": 34.4963, "eval_samples_per_second": 455.903, "eval_steps_per_second": 7.131, "step": 91000 }, { "epoch": 25.833096792506385, "grad_norm": 0.626004159450531, "learning_rate": 7.417797331819472e-05, "loss": 0.010678833723068238, "step": 91010 }, { "epoch": 25.835935282429748, "grad_norm": 1.9588618278503418, "learning_rate": 7.417513482827136e-05, "loss": 0.012261223793029786, "step": 91020 }, { "epoch": 25.838773772353107, "grad_norm": 0.2341264933347702, "learning_rate": 7.4172296338348e-05, "loss": 0.0128349170088768, "step": 91030 }, { "epoch": 25.84161226227647, "grad_norm": 2.6454482078552246, "learning_rate": 7.416945784842463e-05, "loss": 0.0044191133230924605, "step": 91040 }, { "epoch": 25.84445075219983, "grad_norm": 6.375576019287109, "learning_rate": 7.416661935850127e-05, "loss": 0.004205168411135674, "step": 91050 }, { "epoch": 25.84728924212319, "grad_norm": 1.8870586156845093, "learning_rate": 7.416378086857793e-05, "loss": 0.004602600634098053, "step": 91060 }, { "epoch": 25.850127732046552, "grad_norm": 0.07884453237056732, "learning_rate": 7.416094237865456e-05, "loss": 0.01457187682390213, "step": 91070 }, { "epoch": 25.85296622196991, "grad_norm": 0.05852966010570526, "learning_rate": 7.41581038887312e-05, "loss": 0.01518678367137909, "step": 91080 }, { "epoch": 25.855804711893274, "grad_norm": 2.661982774734497, "learning_rate": 7.415526539880784e-05, "loss": 0.011443178355693816, "step": 91090 }, { "epoch": 25.858643201816633, "grad_norm": 9.290881156921387, "learning_rate": 7.415242690888448e-05, "loss": 0.0223710760474205, "step": 91100 }, { "epoch": 25.861481691739993, "grad_norm": 0.46637535095214844, "learning_rate": 7.414958841896111e-05, "loss": 0.009091371297836303, "step": 91110 }, { "epoch": 25.864320181663356, "grad_norm": 6.1386847496032715, "learning_rate": 7.414674992903775e-05, "loss": 0.00709676519036293, "step": 91120 }, { "epoch": 25.867158671586715, "grad_norm": 0.20754201710224152, "learning_rate": 7.41439114391144e-05, "loss": 0.012213093042373658, "step": 91130 }, { "epoch": 25.869997161510078, "grad_norm": 0.7994604706764221, "learning_rate": 7.414107294919103e-05, "loss": 0.010436104238033294, "step": 91140 }, { "epoch": 25.872835651433437, "grad_norm": 4.1098222732543945, "learning_rate": 7.413823445926768e-05, "loss": 0.011023641377687455, "step": 91150 }, { "epoch": 25.875674141356797, "grad_norm": 8.16196346282959, "learning_rate": 7.413539596934432e-05, "loss": 0.016523543000221252, "step": 91160 }, { "epoch": 25.87851263128016, "grad_norm": 1.9083107709884644, "learning_rate": 7.413255747942094e-05, "loss": 0.013069486618041993, "step": 91170 }, { "epoch": 25.88135112120352, "grad_norm": 1.0371488332748413, "learning_rate": 7.412971898949759e-05, "loss": 0.006054795160889625, "step": 91180 }, { "epoch": 25.88418961112688, "grad_norm": 4.146736145019531, "learning_rate": 7.412688049957423e-05, "loss": 0.01808626502752304, "step": 91190 }, { "epoch": 25.88702810105024, "grad_norm": 3.6422159671783447, "learning_rate": 7.412404200965087e-05, "loss": 0.015602894127368927, "step": 91200 }, { "epoch": 25.8898665909736, "grad_norm": 5.420840263366699, "learning_rate": 7.412120351972751e-05, "loss": 0.013052111864089966, "step": 91210 }, { "epoch": 25.892705080896963, "grad_norm": 0.10339271277189255, "learning_rate": 7.411836502980415e-05, "loss": 0.006180905178189278, "step": 91220 }, { "epoch": 25.895543570820323, "grad_norm": 1.292170524597168, "learning_rate": 7.411552653988079e-05, "loss": 0.004742559790611267, "step": 91230 }, { "epoch": 25.898382060743685, "grad_norm": 0.06830086559057236, "learning_rate": 7.411268804995742e-05, "loss": 0.0053810164332389835, "step": 91240 }, { "epoch": 25.901220550667045, "grad_norm": 0.4103396236896515, "learning_rate": 7.410984956003406e-05, "loss": 0.0025139987468719483, "step": 91250 }, { "epoch": 25.904059040590404, "grad_norm": 0.15858392417430878, "learning_rate": 7.410701107011072e-05, "loss": 0.004786726832389831, "step": 91260 }, { "epoch": 25.906897530513767, "grad_norm": 0.9127637147903442, "learning_rate": 7.410417258018734e-05, "loss": 0.010462059825658798, "step": 91270 }, { "epoch": 25.909736020437126, "grad_norm": 3.224330425262451, "learning_rate": 7.410133409026399e-05, "loss": 0.017303255200386048, "step": 91280 }, { "epoch": 25.91257451036049, "grad_norm": 1.1234402656555176, "learning_rate": 7.409849560034063e-05, "loss": 0.009633800387382508, "step": 91290 }, { "epoch": 25.91541300028385, "grad_norm": 6.054301738739014, "learning_rate": 7.409565711041726e-05, "loss": 0.01152384877204895, "step": 91300 }, { "epoch": 25.91825149020721, "grad_norm": 1.9088680744171143, "learning_rate": 7.40928186204939e-05, "loss": 0.0028315989300608637, "step": 91310 }, { "epoch": 25.92108998013057, "grad_norm": 5.4804840087890625, "learning_rate": 7.408998013057054e-05, "loss": 0.011763445287942886, "step": 91320 }, { "epoch": 25.92392847005393, "grad_norm": 12.73325252532959, "learning_rate": 7.408714164064718e-05, "loss": 0.028307965397834776, "step": 91330 }, { "epoch": 25.926766959977293, "grad_norm": 1.1643331050872803, "learning_rate": 7.408430315072382e-05, "loss": 0.004331960529088974, "step": 91340 }, { "epoch": 25.929605449900652, "grad_norm": 2.451917886734009, "learning_rate": 7.408146466080046e-05, "loss": 0.01932062804698944, "step": 91350 }, { "epoch": 25.932443939824015, "grad_norm": 8.509427070617676, "learning_rate": 7.40786261708771e-05, "loss": 0.013611596822738648, "step": 91360 }, { "epoch": 25.935282429747375, "grad_norm": 0.22106792032718658, "learning_rate": 7.407578768095373e-05, "loss": 0.018000566959381105, "step": 91370 }, { "epoch": 25.938120919670734, "grad_norm": 0.936858057975769, "learning_rate": 7.407294919103037e-05, "loss": 0.0029943037778139116, "step": 91380 }, { "epoch": 25.940959409594097, "grad_norm": 2.1331872940063477, "learning_rate": 7.407011070110701e-05, "loss": 0.00337049625813961, "step": 91390 }, { "epoch": 25.943797899517456, "grad_norm": 0.10331152379512787, "learning_rate": 7.406727221118366e-05, "loss": 0.010290178656578063, "step": 91400 }, { "epoch": 25.94663638944082, "grad_norm": 0.07448949664831161, "learning_rate": 7.40644337212603e-05, "loss": 0.006349889934062958, "step": 91410 }, { "epoch": 25.94947487936418, "grad_norm": 10.520699501037598, "learning_rate": 7.406159523133694e-05, "loss": 0.0037140190601348878, "step": 91420 }, { "epoch": 25.952313369287538, "grad_norm": 1.2791374921798706, "learning_rate": 7.405875674141357e-05, "loss": 0.006757649034261704, "step": 91430 }, { "epoch": 25.9551518592109, "grad_norm": 2.618232488632202, "learning_rate": 7.405591825149021e-05, "loss": 0.0032900750637054445, "step": 91440 }, { "epoch": 25.95799034913426, "grad_norm": 0.12307963520288467, "learning_rate": 7.405307976156685e-05, "loss": 0.00275792870670557, "step": 91450 }, { "epoch": 25.960828839057623, "grad_norm": 0.5972929000854492, "learning_rate": 7.405024127164349e-05, "loss": 0.010327323526144027, "step": 91460 }, { "epoch": 25.963667328980982, "grad_norm": 0.15014107525348663, "learning_rate": 7.404740278172013e-05, "loss": 0.00307040698826313, "step": 91470 }, { "epoch": 25.96650581890434, "grad_norm": 0.7961591482162476, "learning_rate": 7.404456429179677e-05, "loss": 0.012142488360404968, "step": 91480 }, { "epoch": 25.969344308827704, "grad_norm": 0.853769838809967, "learning_rate": 7.40417258018734e-05, "loss": 0.015189388394355774, "step": 91490 }, { "epoch": 25.972182798751064, "grad_norm": Infinity, "learning_rate": 7.403888731195004e-05, "loss": 0.021388289332389832, "step": 91500 }, { "epoch": 25.972182798751064, "eval_accuracy": 0.9740573535957271, "eval_loss": 0.09595824778079987, "eval_runtime": 36.1205, "eval_samples_per_second": 435.403, "eval_steps_per_second": 6.811, "step": 91500 }, { "epoch": 25.975021288674427, "grad_norm": 1.2391926050186157, "learning_rate": 7.403633267101902e-05, "loss": 0.016084039211273195, "step": 91510 }, { "epoch": 25.977859778597786, "grad_norm": 0.14118437469005585, "learning_rate": 7.403349418109566e-05, "loss": 0.01445084810256958, "step": 91520 }, { "epoch": 25.980698268521145, "grad_norm": 0.6673913598060608, "learning_rate": 7.40306556911723e-05, "loss": 0.011621369421482087, "step": 91530 }, { "epoch": 25.983536758444508, "grad_norm": 0.1806657314300537, "learning_rate": 7.402781720124893e-05, "loss": 0.006336788833141327, "step": 91540 }, { "epoch": 25.986375248367867, "grad_norm": 0.26063916087150574, "learning_rate": 7.402497871132557e-05, "loss": 0.009993743896484376, "step": 91550 }, { "epoch": 25.98921373829123, "grad_norm": 4.961780071258545, "learning_rate": 7.402214022140222e-05, "loss": 0.005147572606801987, "step": 91560 }, { "epoch": 25.99205222821459, "grad_norm": 0.28459757566452026, "learning_rate": 7.401930173147886e-05, "loss": 0.010612759739160538, "step": 91570 }, { "epoch": 25.99489071813795, "grad_norm": 0.056450869888067245, "learning_rate": 7.40164632415555e-05, "loss": 0.017287518084049224, "step": 91580 }, { "epoch": 25.997729208061312, "grad_norm": 1.6212162971496582, "learning_rate": 7.401362475163214e-05, "loss": 0.02125656008720398, "step": 91590 }, { "epoch": 26.00056769798467, "grad_norm": 0.1951427161693573, "learning_rate": 7.401078626170878e-05, "loss": 0.005535628646612167, "step": 91600 }, { "epoch": 26.003406187908034, "grad_norm": 0.13092954456806183, "learning_rate": 7.400794777178541e-05, "loss": 0.010591034591197968, "step": 91610 }, { "epoch": 26.006244677831393, "grad_norm": 2.8049745559692383, "learning_rate": 7.400510928186205e-05, "loss": 0.005990657582879066, "step": 91620 }, { "epoch": 26.009083167754753, "grad_norm": 0.03410916030406952, "learning_rate": 7.400227079193869e-05, "loss": 0.005318024754524231, "step": 91630 }, { "epoch": 26.011921657678116, "grad_norm": 0.8406043648719788, "learning_rate": 7.399943230201533e-05, "loss": 0.0036844901740550997, "step": 91640 }, { "epoch": 26.014760147601475, "grad_norm": 0.7568299770355225, "learning_rate": 7.399659381209197e-05, "loss": 0.009923578798770904, "step": 91650 }, { "epoch": 26.017598637524838, "grad_norm": 0.7906540036201477, "learning_rate": 7.399375532216862e-05, "loss": 0.0050684154033660885, "step": 91660 }, { "epoch": 26.020437127448197, "grad_norm": 0.06842824071645737, "learning_rate": 7.399091683224524e-05, "loss": 0.009675013273954392, "step": 91670 }, { "epoch": 26.02327561737156, "grad_norm": 3.982264995574951, "learning_rate": 7.398807834232188e-05, "loss": 0.00524321123957634, "step": 91680 }, { "epoch": 26.02611410729492, "grad_norm": 0.09640176594257355, "learning_rate": 7.398523985239853e-05, "loss": 0.004994227737188339, "step": 91690 }, { "epoch": 26.02895259721828, "grad_norm": 2.1117396354675293, "learning_rate": 7.398240136247517e-05, "loss": 0.006109175086021423, "step": 91700 }, { "epoch": 26.03179108714164, "grad_norm": 3.2494521141052246, "learning_rate": 7.397956287255181e-05, "loss": 0.004483112320303917, "step": 91710 }, { "epoch": 26.034629577065, "grad_norm": 8.58973217010498, "learning_rate": 7.397672438262845e-05, "loss": 0.0031079897657036782, "step": 91720 }, { "epoch": 26.037468066988364, "grad_norm": 0.759901225566864, "learning_rate": 7.397388589270509e-05, "loss": 0.006570176780223846, "step": 91730 }, { "epoch": 26.040306556911723, "grad_norm": 0.03660787642002106, "learning_rate": 7.397104740278172e-05, "loss": 0.0015486760064959526, "step": 91740 }, { "epoch": 26.043145046835082, "grad_norm": 1.1340198516845703, "learning_rate": 7.396820891285836e-05, "loss": 0.001006012037396431, "step": 91750 }, { "epoch": 26.045983536758445, "grad_norm": 0.12481207400560379, "learning_rate": 7.3965370422935e-05, "loss": 0.001652991957962513, "step": 91760 }, { "epoch": 26.048822026681805, "grad_norm": 0.17681358754634857, "learning_rate": 7.396253193301163e-05, "loss": 0.004864652454853058, "step": 91770 }, { "epoch": 26.051660516605168, "grad_norm": 0.1922876089811325, "learning_rate": 7.395969344308828e-05, "loss": 0.0013655811548233033, "step": 91780 }, { "epoch": 26.054499006528527, "grad_norm": 3.8119001388549805, "learning_rate": 7.395685495316493e-05, "loss": 0.0031180428341031075, "step": 91790 }, { "epoch": 26.057337496451886, "grad_norm": 1.8494467735290527, "learning_rate": 7.395401646324155e-05, "loss": 0.002806270308792591, "step": 91800 }, { "epoch": 26.06017598637525, "grad_norm": 4.280714511871338, "learning_rate": 7.39511779733182e-05, "loss": 0.005010434240102768, "step": 91810 }, { "epoch": 26.06301447629861, "grad_norm": 0.02855149656534195, "learning_rate": 7.394833948339484e-05, "loss": 0.003159220144152641, "step": 91820 }, { "epoch": 26.06585296622197, "grad_norm": 0.12079708278179169, "learning_rate": 7.394550099347148e-05, "loss": 0.00651448592543602, "step": 91830 }, { "epoch": 26.06869145614533, "grad_norm": 10.806979179382324, "learning_rate": 7.394266250354812e-05, "loss": 0.005351019650697708, "step": 91840 }, { "epoch": 26.07152994606869, "grad_norm": 14.130577087402344, "learning_rate": 7.393982401362476e-05, "loss": 0.016328398883342744, "step": 91850 }, { "epoch": 26.074368435992053, "grad_norm": 1.604524850845337, "learning_rate": 7.39369855237014e-05, "loss": 0.005274773389101028, "step": 91860 }, { "epoch": 26.077206925915412, "grad_norm": 0.542428195476532, "learning_rate": 7.393414703377803e-05, "loss": 0.001771467737853527, "step": 91870 }, { "epoch": 26.080045415838775, "grad_norm": 0.4347881078720093, "learning_rate": 7.393130854385467e-05, "loss": 0.010172620415687561, "step": 91880 }, { "epoch": 26.082883905762134, "grad_norm": 0.9384946227073669, "learning_rate": 7.392847005393131e-05, "loss": 0.013081638514995575, "step": 91890 }, { "epoch": 26.085722395685494, "grad_norm": 0.2068076878786087, "learning_rate": 7.392563156400794e-05, "loss": 0.002265900187194347, "step": 91900 }, { "epoch": 26.088560885608857, "grad_norm": 2.70790433883667, "learning_rate": 7.39227930740846e-05, "loss": 0.004055812582373619, "step": 91910 }, { "epoch": 26.091399375532216, "grad_norm": 0.1203959584236145, "learning_rate": 7.391995458416124e-05, "loss": 0.01428346335887909, "step": 91920 }, { "epoch": 26.09423786545558, "grad_norm": 0.022389452904462814, "learning_rate": 7.391711609423786e-05, "loss": 0.012636464834213258, "step": 91930 }, { "epoch": 26.097076355378938, "grad_norm": 9.202261924743652, "learning_rate": 7.39142776043145e-05, "loss": 0.01364661604166031, "step": 91940 }, { "epoch": 26.099914845302298, "grad_norm": 0.42958056926727295, "learning_rate": 7.391143911439115e-05, "loss": 0.018281419575214387, "step": 91950 }, { "epoch": 26.10275333522566, "grad_norm": 0.04878581315279007, "learning_rate": 7.390860062446779e-05, "loss": 0.003880906105041504, "step": 91960 }, { "epoch": 26.10559182514902, "grad_norm": 0.10314712673425674, "learning_rate": 7.390576213454442e-05, "loss": 0.012735025584697723, "step": 91970 }, { "epoch": 26.108430315072383, "grad_norm": 0.1762269139289856, "learning_rate": 7.390292364462107e-05, "loss": 0.01052793711423874, "step": 91980 }, { "epoch": 26.111268804995742, "grad_norm": 0.2103620320558548, "learning_rate": 7.390008515469771e-05, "loss": 0.007240165024995804, "step": 91990 }, { "epoch": 26.1141072949191, "grad_norm": 0.40989357233047485, "learning_rate": 7.389724666477434e-05, "loss": 0.0036813773214817045, "step": 92000 }, { "epoch": 26.1141072949191, "eval_accuracy": 0.979779996184905, "eval_loss": 0.07219325006008148, "eval_runtime": 36.3815, "eval_samples_per_second": 432.28, "eval_steps_per_second": 6.762, "step": 92000 }, { "epoch": 26.116945784842464, "grad_norm": 3.778400421142578, "learning_rate": 7.389440817485098e-05, "loss": 0.006613416969776154, "step": 92010 }, { "epoch": 26.119784274765824, "grad_norm": 5.65266227722168, "learning_rate": 7.389156968492762e-05, "loss": 0.002437240444123745, "step": 92020 }, { "epoch": 26.122622764689186, "grad_norm": 0.06298966705799103, "learning_rate": 7.388873119500425e-05, "loss": 0.0029605535790324213, "step": 92030 }, { "epoch": 26.125461254612546, "grad_norm": 0.1747565120458603, "learning_rate": 7.38858927050809e-05, "loss": 0.00861537754535675, "step": 92040 }, { "epoch": 26.12829974453591, "grad_norm": 1.6659907102584839, "learning_rate": 7.388305421515755e-05, "loss": 0.004911017417907715, "step": 92050 }, { "epoch": 26.131138234459268, "grad_norm": 0.22365887463092804, "learning_rate": 7.388021572523418e-05, "loss": 0.004333820566534996, "step": 92060 }, { "epoch": 26.133976724382627, "grad_norm": 0.16316790878772736, "learning_rate": 7.387737723531082e-05, "loss": 0.004883112758398056, "step": 92070 }, { "epoch": 26.13681521430599, "grad_norm": 1.7224043607711792, "learning_rate": 7.387453874538746e-05, "loss": 0.012025406956672669, "step": 92080 }, { "epoch": 26.13965370422935, "grad_norm": 1.4409387111663818, "learning_rate": 7.38717002554641e-05, "loss": 0.007734479010105133, "step": 92090 }, { "epoch": 26.142492194152712, "grad_norm": 0.39640432596206665, "learning_rate": 7.386886176554073e-05, "loss": 0.002345899865031242, "step": 92100 }, { "epoch": 26.14533068407607, "grad_norm": 4.235286235809326, "learning_rate": 7.386602327561738e-05, "loss": 0.013323460519313813, "step": 92110 }, { "epoch": 26.14816917399943, "grad_norm": 6.540951251983643, "learning_rate": 7.386318478569402e-05, "loss": 0.021131980419158935, "step": 92120 }, { "epoch": 26.151007663922794, "grad_norm": 0.011205131188035011, "learning_rate": 7.386034629577065e-05, "loss": 0.003063059598207474, "step": 92130 }, { "epoch": 26.153846153846153, "grad_norm": 1.3129066228866577, "learning_rate": 7.385750780584729e-05, "loss": 0.008998376876115799, "step": 92140 }, { "epoch": 26.156684643769516, "grad_norm": 0.07025274634361267, "learning_rate": 7.385466931592393e-05, "loss": 0.0008516460657119751, "step": 92150 }, { "epoch": 26.159523133692876, "grad_norm": 0.8274905681610107, "learning_rate": 7.385183082600056e-05, "loss": 0.008098473399877548, "step": 92160 }, { "epoch": 26.162361623616235, "grad_norm": 0.24072498083114624, "learning_rate": 7.38489923360772e-05, "loss": 0.0010915627703070641, "step": 92170 }, { "epoch": 26.165200113539598, "grad_norm": 4.466319561004639, "learning_rate": 7.384615384615386e-05, "loss": 0.006939110159873962, "step": 92180 }, { "epoch": 26.168038603462957, "grad_norm": 2.0063295364379883, "learning_rate": 7.384331535623049e-05, "loss": 0.009781721979379654, "step": 92190 }, { "epoch": 26.17087709338632, "grad_norm": 0.041852474212646484, "learning_rate": 7.384047686630713e-05, "loss": 0.004162976145744323, "step": 92200 }, { "epoch": 26.17371558330968, "grad_norm": 0.16561879217624664, "learning_rate": 7.383763837638377e-05, "loss": 0.004683750495314598, "step": 92210 }, { "epoch": 26.17655407323304, "grad_norm": 0.5053877234458923, "learning_rate": 7.383479988646041e-05, "loss": 0.016119755804538727, "step": 92220 }, { "epoch": 26.1793925631564, "grad_norm": 0.23357565701007843, "learning_rate": 7.383196139653704e-05, "loss": 0.00848400816321373, "step": 92230 }, { "epoch": 26.18223105307976, "grad_norm": 1.9849011898040771, "learning_rate": 7.382912290661369e-05, "loss": 0.028456076979637146, "step": 92240 }, { "epoch": 26.185069543003124, "grad_norm": 4.962953567504883, "learning_rate": 7.382628441669033e-05, "loss": 0.011364394426345825, "step": 92250 }, { "epoch": 26.187908032926483, "grad_norm": 4.8476881980896, "learning_rate": 7.382344592676696e-05, "loss": 0.010311997681856155, "step": 92260 }, { "epoch": 26.190746522849842, "grad_norm": 1.779200553894043, "learning_rate": 7.38206074368436e-05, "loss": 0.006517575681209564, "step": 92270 }, { "epoch": 26.193585012773205, "grad_norm": 1.9734307527542114, "learning_rate": 7.381776894692025e-05, "loss": 0.007188377529382705, "step": 92280 }, { "epoch": 26.196423502696565, "grad_norm": 0.059694647789001465, "learning_rate": 7.381493045699687e-05, "loss": 0.0022750185802578927, "step": 92290 }, { "epoch": 26.199261992619927, "grad_norm": 4.376164436340332, "learning_rate": 7.381209196707351e-05, "loss": 0.008384886384010314, "step": 92300 }, { "epoch": 26.202100482543287, "grad_norm": 0.08954562246799469, "learning_rate": 7.380925347715017e-05, "loss": 0.017320559918880464, "step": 92310 }, { "epoch": 26.204938972466646, "grad_norm": 1.380570650100708, "learning_rate": 7.38064149872268e-05, "loss": 0.006501758098602295, "step": 92320 }, { "epoch": 26.20777746239001, "grad_norm": 3.8025829792022705, "learning_rate": 7.380357649730344e-05, "loss": 0.019759747385978698, "step": 92330 }, { "epoch": 26.21061595231337, "grad_norm": 4.754166126251221, "learning_rate": 7.380073800738008e-05, "loss": 0.021310645341873168, "step": 92340 }, { "epoch": 26.21345444223673, "grad_norm": 0.07094799727201462, "learning_rate": 7.379789951745672e-05, "loss": 0.01162678524851799, "step": 92350 }, { "epoch": 26.21629293216009, "grad_norm": 13.295445442199707, "learning_rate": 7.379506102753335e-05, "loss": 0.006674978882074356, "step": 92360 }, { "epoch": 26.21913142208345, "grad_norm": 1.5520365238189697, "learning_rate": 7.379222253760999e-05, "loss": 0.0033065401017665862, "step": 92370 }, { "epoch": 26.221969912006813, "grad_norm": 1.0812454223632812, "learning_rate": 7.378938404768663e-05, "loss": 0.008079191297292709, "step": 92380 }, { "epoch": 26.224808401930172, "grad_norm": 2.760507345199585, "learning_rate": 7.378654555776327e-05, "loss": 0.004339981079101563, "step": 92390 }, { "epoch": 26.227646891853535, "grad_norm": 0.2586928904056549, "learning_rate": 7.378370706783991e-05, "loss": 0.007437346130609512, "step": 92400 }, { "epoch": 26.230485381776894, "grad_norm": 5.423314094543457, "learning_rate": 7.378086857791656e-05, "loss": 0.012376972287893296, "step": 92410 }, { "epoch": 26.233323871700254, "grad_norm": 8.311347007751465, "learning_rate": 7.377803008799318e-05, "loss": 0.0037691570818424226, "step": 92420 }, { "epoch": 26.236162361623617, "grad_norm": 3.7563493251800537, "learning_rate": 7.377519159806983e-05, "loss": 0.0036594554781913756, "step": 92430 }, { "epoch": 26.239000851546976, "grad_norm": 1.0557137727737427, "learning_rate": 7.377235310814648e-05, "loss": 0.012873601913452149, "step": 92440 }, { "epoch": 26.24183934147034, "grad_norm": 0.4949854016304016, "learning_rate": 7.376951461822311e-05, "loss": 0.0071238458156585695, "step": 92450 }, { "epoch": 26.244677831393698, "grad_norm": 4.170505523681641, "learning_rate": 7.376667612829975e-05, "loss": 0.01686444878578186, "step": 92460 }, { "epoch": 26.24751632131706, "grad_norm": 5.702193260192871, "learning_rate": 7.376383763837639e-05, "loss": 0.006639612466096878, "step": 92470 }, { "epoch": 26.25035481124042, "grad_norm": 1.6724475622177124, "learning_rate": 7.376099914845302e-05, "loss": 0.014659762382507324, "step": 92480 }, { "epoch": 26.25319330116378, "grad_norm": 0.07979373633861542, "learning_rate": 7.375816065852966e-05, "loss": 0.008388154953718186, "step": 92490 }, { "epoch": 26.256031791087143, "grad_norm": 0.6238313913345337, "learning_rate": 7.37553221686063e-05, "loss": 0.004408072680234909, "step": 92500 }, { "epoch": 26.256031791087143, "eval_accuracy": 0.9752018821135626, "eval_loss": 0.08746636658906937, "eval_runtime": 33.2599, "eval_samples_per_second": 472.852, "eval_steps_per_second": 7.396, "step": 92500 }, { "epoch": 26.258870281010502, "grad_norm": 0.17250476777553558, "learning_rate": 7.375248367868294e-05, "loss": 0.006417251378297806, "step": 92510 }, { "epoch": 26.261708770933865, "grad_norm": 0.2411089390516281, "learning_rate": 7.374964518875958e-05, "loss": 0.004690470546483994, "step": 92520 }, { "epoch": 26.264547260857224, "grad_norm": 0.038666993379592896, "learning_rate": 7.374680669883623e-05, "loss": 0.003022298216819763, "step": 92530 }, { "epoch": 26.267385750780583, "grad_norm": 0.16132159531116486, "learning_rate": 7.374396820891287e-05, "loss": 0.0013180451467633247, "step": 92540 }, { "epoch": 26.270224240703946, "grad_norm": 0.33899620175361633, "learning_rate": 7.37411297189895e-05, "loss": 0.003740587830543518, "step": 92550 }, { "epoch": 26.273062730627306, "grad_norm": 0.09298969060182571, "learning_rate": 7.373829122906614e-05, "loss": 0.003589988127350807, "step": 92560 }, { "epoch": 26.27590122055067, "grad_norm": 0.0195973739027977, "learning_rate": 7.373545273914278e-05, "loss": 0.013388068974018097, "step": 92570 }, { "epoch": 26.278739710474028, "grad_norm": 1.3190635442733765, "learning_rate": 7.373261424921942e-05, "loss": 0.0037392914295196534, "step": 92580 }, { "epoch": 26.281578200397387, "grad_norm": 2.2241978645324707, "learning_rate": 7.372977575929606e-05, "loss": 0.010201576352119445, "step": 92590 }, { "epoch": 26.28441669032075, "grad_norm": 2.5979647636413574, "learning_rate": 7.37269372693727e-05, "loss": 0.010032417625188828, "step": 92600 }, { "epoch": 26.28725518024411, "grad_norm": 0.05346444621682167, "learning_rate": 7.372409877944933e-05, "loss": 0.008664514124393462, "step": 92610 }, { "epoch": 26.290093670167472, "grad_norm": 0.2571755647659302, "learning_rate": 7.372126028952597e-05, "loss": 0.0018459845334291459, "step": 92620 }, { "epoch": 26.29293216009083, "grad_norm": 0.5992124676704407, "learning_rate": 7.371842179960261e-05, "loss": 0.002913190424442291, "step": 92630 }, { "epoch": 26.29577065001419, "grad_norm": 0.043511081486940384, "learning_rate": 7.371558330967925e-05, "loss": 0.005229470133781433, "step": 92640 }, { "epoch": 26.298609139937554, "grad_norm": 0.05940861254930496, "learning_rate": 7.37127448197559e-05, "loss": 0.014342689514160156, "step": 92650 }, { "epoch": 26.301447629860913, "grad_norm": 0.884525716304779, "learning_rate": 7.370990632983254e-05, "loss": 0.005033398792147636, "step": 92660 }, { "epoch": 26.304286119784276, "grad_norm": 9.38374137878418, "learning_rate": 7.370706783990918e-05, "loss": 0.021759653091430665, "step": 92670 }, { "epoch": 26.307124609707635, "grad_norm": 0.19269722700119019, "learning_rate": 7.37042293499858e-05, "loss": 0.0014976589009165764, "step": 92680 }, { "epoch": 26.309963099630995, "grad_norm": 0.19822393357753754, "learning_rate": 7.370139086006245e-05, "loss": 0.003928793221712112, "step": 92690 }, { "epoch": 26.312801589554358, "grad_norm": 23.74862289428711, "learning_rate": 7.369855237013909e-05, "loss": 0.015284869074821471, "step": 92700 }, { "epoch": 26.315640079477717, "grad_norm": 0.052190061658620834, "learning_rate": 7.369571388021573e-05, "loss": 0.008253778517246246, "step": 92710 }, { "epoch": 26.31847856940108, "grad_norm": 0.31942346692085266, "learning_rate": 7.369287539029237e-05, "loss": 0.0026949742808938027, "step": 92720 }, { "epoch": 26.32131705932444, "grad_norm": 0.2969001233577728, "learning_rate": 7.369003690036901e-05, "loss": 0.004731913655996322, "step": 92730 }, { "epoch": 26.3241555492478, "grad_norm": 1.6066533327102661, "learning_rate": 7.368719841044564e-05, "loss": 0.010899043828248977, "step": 92740 }, { "epoch": 26.32699403917116, "grad_norm": 7.406820774078369, "learning_rate": 7.368435992052228e-05, "loss": 0.009660941362380982, "step": 92750 }, { "epoch": 26.32983252909452, "grad_norm": 5.182804584503174, "learning_rate": 7.368152143059892e-05, "loss": 0.013084498047828675, "step": 92760 }, { "epoch": 26.332671019017884, "grad_norm": 11.294466972351074, "learning_rate": 7.367868294067556e-05, "loss": 0.010358063876628876, "step": 92770 }, { "epoch": 26.335509508941243, "grad_norm": 0.6472300887107849, "learning_rate": 7.36758444507522e-05, "loss": 0.013010245561599732, "step": 92780 }, { "epoch": 26.338347998864602, "grad_norm": 0.1478501856327057, "learning_rate": 7.367300596082885e-05, "loss": 0.004171881824731827, "step": 92790 }, { "epoch": 26.341186488787965, "grad_norm": 0.38775932788848877, "learning_rate": 7.367016747090549e-05, "loss": 0.008743955194950104, "step": 92800 }, { "epoch": 26.344024978711325, "grad_norm": 1.2332587242126465, "learning_rate": 7.366732898098212e-05, "loss": 0.016776081919670106, "step": 92810 }, { "epoch": 26.346863468634687, "grad_norm": 2.6411659717559814, "learning_rate": 7.366449049105876e-05, "loss": 0.004315183311700821, "step": 92820 }, { "epoch": 26.349701958558047, "grad_norm": 11.147245407104492, "learning_rate": 7.36616520011354e-05, "loss": 0.019908224046230317, "step": 92830 }, { "epoch": 26.352540448481406, "grad_norm": 4.589935302734375, "learning_rate": 7.365881351121204e-05, "loss": 0.006573638319969178, "step": 92840 }, { "epoch": 26.35537893840477, "grad_norm": 0.717535674571991, "learning_rate": 7.365597502128868e-05, "loss": 0.01762666553258896, "step": 92850 }, { "epoch": 26.35821742832813, "grad_norm": 0.21592792868614197, "learning_rate": 7.365313653136532e-05, "loss": 0.001660596765577793, "step": 92860 }, { "epoch": 26.36105591825149, "grad_norm": 5.816927909851074, "learning_rate": 7.365029804144195e-05, "loss": 0.0195779949426651, "step": 92870 }, { "epoch": 26.36389440817485, "grad_norm": 0.043969787657260895, "learning_rate": 7.364745955151859e-05, "loss": 0.030395805835723877, "step": 92880 }, { "epoch": 26.366732898098213, "grad_norm": 0.038862597197294235, "learning_rate": 7.364462106159523e-05, "loss": 0.0011962102726101875, "step": 92890 }, { "epoch": 26.369571388021573, "grad_norm": 6.909491062164307, "learning_rate": 7.364178257167187e-05, "loss": 0.01272435039281845, "step": 92900 }, { "epoch": 26.372409877944932, "grad_norm": 8.356128692626953, "learning_rate": 7.363894408174852e-05, "loss": 0.018346968293190002, "step": 92910 }, { "epoch": 26.375248367868295, "grad_norm": 10.799378395080566, "learning_rate": 7.363610559182516e-05, "loss": 0.009065844118595123, "step": 92920 }, { "epoch": 26.378086857791654, "grad_norm": 9.546369552612305, "learning_rate": 7.36332671019018e-05, "loss": 0.006680983304977417, "step": 92930 }, { "epoch": 26.380925347715017, "grad_norm": 0.5368165373802185, "learning_rate": 7.363042861197843e-05, "loss": 0.021683238446712494, "step": 92940 }, { "epoch": 26.383763837638377, "grad_norm": 0.038491323590278625, "learning_rate": 7.362759012205507e-05, "loss": 0.0027929799631237983, "step": 92950 }, { "epoch": 26.386602327561736, "grad_norm": 0.05424189195036888, "learning_rate": 7.362475163213171e-05, "loss": 0.0013034196570515632, "step": 92960 }, { "epoch": 26.3894408174851, "grad_norm": 0.5322719812393188, "learning_rate": 7.362191314220835e-05, "loss": 0.008274302631616593, "step": 92970 }, { "epoch": 26.392279307408458, "grad_norm": 3.0250020027160645, "learning_rate": 7.361907465228499e-05, "loss": 0.0011677782982587813, "step": 92980 }, { "epoch": 26.39511779733182, "grad_norm": 0.9323439002037048, "learning_rate": 7.361623616236163e-05, "loss": 0.011957509815692902, "step": 92990 }, { "epoch": 26.39795628725518, "grad_norm": 0.37520483136177063, "learning_rate": 7.361339767243826e-05, "loss": 0.008021405339241028, "step": 93000 }, { "epoch": 26.39795628725518, "eval_accuracy": 0.9757741463724805, "eval_loss": 0.09279440343379974, "eval_runtime": 37.0812, "eval_samples_per_second": 424.123, "eval_steps_per_second": 6.634, "step": 93000 }, { "epoch": 26.40079477717854, "grad_norm": 0.04228000342845917, "learning_rate": 7.36105591825149e-05, "loss": 0.029520130157470702, "step": 93010 }, { "epoch": 26.403633267101903, "grad_norm": 0.8986439108848572, "learning_rate": 7.360772069259154e-05, "loss": 0.019593633711338043, "step": 93020 }, { "epoch": 26.406471757025262, "grad_norm": 0.16009442508220673, "learning_rate": 7.360488220266819e-05, "loss": 0.001571657881140709, "step": 93030 }, { "epoch": 26.409310246948625, "grad_norm": 1.6268101930618286, "learning_rate": 7.360204371274483e-05, "loss": 0.0020611224696040153, "step": 93040 }, { "epoch": 26.412148736871984, "grad_norm": 1.5809743404388428, "learning_rate": 7.359920522282147e-05, "loss": 0.004779575765132904, "step": 93050 }, { "epoch": 26.414987226795343, "grad_norm": 0.4864044785499573, "learning_rate": 7.359636673289811e-05, "loss": 0.01068490520119667, "step": 93060 }, { "epoch": 26.417825716718706, "grad_norm": 11.233759880065918, "learning_rate": 7.359352824297474e-05, "loss": 0.04663640260696411, "step": 93070 }, { "epoch": 26.420664206642066, "grad_norm": 0.13635040819644928, "learning_rate": 7.359068975305138e-05, "loss": 0.011202546954154968, "step": 93080 }, { "epoch": 26.42350269656543, "grad_norm": 1.207575798034668, "learning_rate": 7.358785126312802e-05, "loss": 0.025628486275672914, "step": 93090 }, { "epoch": 26.426341186488788, "grad_norm": 6.426862716674805, "learning_rate": 7.358501277320465e-05, "loss": 0.007404165714979172, "step": 93100 }, { "epoch": 26.429179676412147, "grad_norm": 0.33498281240463257, "learning_rate": 7.35821742832813e-05, "loss": 0.022066777944564818, "step": 93110 }, { "epoch": 26.43201816633551, "grad_norm": 2.8254849910736084, "learning_rate": 7.357933579335794e-05, "loss": 0.0042917471379041675, "step": 93120 }, { "epoch": 26.43485665625887, "grad_norm": 1.1739438772201538, "learning_rate": 7.357649730343457e-05, "loss": 0.029477089643478394, "step": 93130 }, { "epoch": 26.437695146182232, "grad_norm": 6.155190467834473, "learning_rate": 7.357365881351121e-05, "loss": 0.008832606673240661, "step": 93140 }, { "epoch": 26.44053363610559, "grad_norm": 8.126018524169922, "learning_rate": 7.357082032358785e-05, "loss": 0.010158158093690871, "step": 93150 }, { "epoch": 26.44337212602895, "grad_norm": 2.996365547180176, "learning_rate": 7.35679818336645e-05, "loss": 0.016564324498176575, "step": 93160 }, { "epoch": 26.446210615952314, "grad_norm": 15.738774299621582, "learning_rate": 7.356514334374114e-05, "loss": 0.005570579320192337, "step": 93170 }, { "epoch": 26.449049105875673, "grad_norm": 0.39430010318756104, "learning_rate": 7.356230485381778e-05, "loss": 0.004599653929471969, "step": 93180 }, { "epoch": 26.451887595799036, "grad_norm": 0.08501777797937393, "learning_rate": 7.355946636389442e-05, "loss": 0.002724158577620983, "step": 93190 }, { "epoch": 26.454726085722395, "grad_norm": 16.640626907348633, "learning_rate": 7.355662787397105e-05, "loss": 0.0105758398771286, "step": 93200 }, { "epoch": 26.457564575645755, "grad_norm": 1.4892446994781494, "learning_rate": 7.355378938404769e-05, "loss": 0.002036173269152641, "step": 93210 }, { "epoch": 26.460403065569118, "grad_norm": 8.729501724243164, "learning_rate": 7.355095089412433e-05, "loss": 0.006166741251945496, "step": 93220 }, { "epoch": 26.463241555492477, "grad_norm": 3.006065845489502, "learning_rate": 7.354811240420096e-05, "loss": 0.016905239224433898, "step": 93230 }, { "epoch": 26.46608004541584, "grad_norm": 0.3908097743988037, "learning_rate": 7.354527391427761e-05, "loss": 0.009960367530584335, "step": 93240 }, { "epoch": 26.4689185353392, "grad_norm": 0.8719068765640259, "learning_rate": 7.354243542435426e-05, "loss": 0.015059533715248107, "step": 93250 }, { "epoch": 26.471757025262562, "grad_norm": 10.414698600769043, "learning_rate": 7.353959693443088e-05, "loss": 0.004803689941763878, "step": 93260 }, { "epoch": 26.47459551518592, "grad_norm": 8.914247512817383, "learning_rate": 7.353675844450752e-05, "loss": 0.004537843912839889, "step": 93270 }, { "epoch": 26.47743400510928, "grad_norm": 10.11979866027832, "learning_rate": 7.353391995458417e-05, "loss": 0.009140965342521668, "step": 93280 }, { "epoch": 26.480272495032644, "grad_norm": 1.1282063722610474, "learning_rate": 7.353108146466081e-05, "loss": 0.016886462271213532, "step": 93290 }, { "epoch": 26.483110984956003, "grad_norm": 1.0387040376663208, "learning_rate": 7.352824297473743e-05, "loss": 0.0042385995388031, "step": 93300 }, { "epoch": 26.485949474879366, "grad_norm": 14.90156078338623, "learning_rate": 7.352540448481409e-05, "loss": 0.010878781974315643, "step": 93310 }, { "epoch": 26.488787964802725, "grad_norm": 0.558928370475769, "learning_rate": 7.352256599489072e-05, "loss": 0.022115546464920043, "step": 93320 }, { "epoch": 26.491626454726084, "grad_norm": 0.288654088973999, "learning_rate": 7.351972750496736e-05, "loss": 0.02551790177822113, "step": 93330 }, { "epoch": 26.494464944649447, "grad_norm": 4.137213706970215, "learning_rate": 7.3516889015044e-05, "loss": 0.016204580664634705, "step": 93340 }, { "epoch": 26.497303434572807, "grad_norm": 0.5755549073219299, "learning_rate": 7.351405052512064e-05, "loss": 0.013886408507823944, "step": 93350 }, { "epoch": 26.50014192449617, "grad_norm": 0.15842501819133759, "learning_rate": 7.351121203519727e-05, "loss": 0.011776229739189148, "step": 93360 }, { "epoch": 26.50298041441953, "grad_norm": 0.15543653070926666, "learning_rate": 7.350837354527392e-05, "loss": 0.006134608760476112, "step": 93370 }, { "epoch": 26.50581890434289, "grad_norm": 0.049740713089704514, "learning_rate": 7.350553505535057e-05, "loss": 0.013569115102291108, "step": 93380 }, { "epoch": 26.50865739426625, "grad_norm": 0.18827922642230988, "learning_rate": 7.35026965654272e-05, "loss": 0.005913239344954491, "step": 93390 }, { "epoch": 26.51149588418961, "grad_norm": 6.1157660484313965, "learning_rate": 7.349985807550384e-05, "loss": 0.026229771971702575, "step": 93400 }, { "epoch": 26.514334374112973, "grad_norm": 0.32527679204940796, "learning_rate": 7.349701958558048e-05, "loss": 0.02571679949760437, "step": 93410 }, { "epoch": 26.517172864036333, "grad_norm": 6.026065826416016, "learning_rate": 7.34941810956571e-05, "loss": 0.004239270836114884, "step": 93420 }, { "epoch": 26.520011353959692, "grad_norm": 0.2399892359972, "learning_rate": 7.349134260573375e-05, "loss": 0.009320127964019775, "step": 93430 }, { "epoch": 26.522849843883055, "grad_norm": 1.8576768636703491, "learning_rate": 7.34885041158104e-05, "loss": 0.0034124955534935, "step": 93440 }, { "epoch": 26.525688333806414, "grad_norm": 0.4383585453033447, "learning_rate": 7.348566562588703e-05, "loss": 0.015893734991550446, "step": 93450 }, { "epoch": 26.528526823729777, "grad_norm": 0.21347609162330627, "learning_rate": 7.348282713596367e-05, "loss": 0.002653723955154419, "step": 93460 }, { "epoch": 26.531365313653136, "grad_norm": 0.21554584801197052, "learning_rate": 7.347998864604031e-05, "loss": 0.006712811440229416, "step": 93470 }, { "epoch": 26.534203803576496, "grad_norm": 11.495932579040527, "learning_rate": 7.347715015611695e-05, "loss": 0.018076400458812713, "step": 93480 }, { "epoch": 26.53704229349986, "grad_norm": 25.666780471801758, "learning_rate": 7.347431166619358e-05, "loss": 0.03996581733226776, "step": 93490 }, { "epoch": 26.539880783423218, "grad_norm": 12.01734447479248, "learning_rate": 7.347147317627022e-05, "loss": 0.03242193758487701, "step": 93500 }, { "epoch": 26.539880783423218, "eval_accuracy": 0.9745660329369873, "eval_loss": 0.09675255417823792, "eval_runtime": 61.4888, "eval_samples_per_second": 255.77, "eval_steps_per_second": 4.001, "step": 93500 }, { "epoch": 26.54271927334658, "grad_norm": 8.68567180633545, "learning_rate": 7.346863468634688e-05, "loss": 0.017679065465927124, "step": 93510 }, { "epoch": 26.54555776326994, "grad_norm": 0.3216482400894165, "learning_rate": 7.34657961964235e-05, "loss": 0.00382707342505455, "step": 93520 }, { "epoch": 26.5483962531933, "grad_norm": 1.6361730098724365, "learning_rate": 7.346295770650015e-05, "loss": 0.016781651973724367, "step": 93530 }, { "epoch": 26.551234743116662, "grad_norm": 0.305652379989624, "learning_rate": 7.346011921657679e-05, "loss": 0.01272982805967331, "step": 93540 }, { "epoch": 26.55407323304002, "grad_norm": 13.862508773803711, "learning_rate": 7.345728072665342e-05, "loss": 0.018196898698806762, "step": 93550 }, { "epoch": 26.556911722963385, "grad_norm": 0.7835137844085693, "learning_rate": 7.345444223673006e-05, "loss": 0.004287457466125489, "step": 93560 }, { "epoch": 26.559750212886744, "grad_norm": 7.103923320770264, "learning_rate": 7.345160374680671e-05, "loss": 0.004404862225055694, "step": 93570 }, { "epoch": 26.562588702810103, "grad_norm": 1.6961346864700317, "learning_rate": 7.344876525688334e-05, "loss": 0.004574844241142273, "step": 93580 }, { "epoch": 26.565427192733466, "grad_norm": 0.359235942363739, "learning_rate": 7.344592676695998e-05, "loss": 0.01844668239355087, "step": 93590 }, { "epoch": 26.568265682656826, "grad_norm": 6.142025470733643, "learning_rate": 7.344308827703662e-05, "loss": 0.00441087856888771, "step": 93600 }, { "epoch": 26.57110417258019, "grad_norm": 1.5472910404205322, "learning_rate": 7.344024978711326e-05, "loss": 0.008280350267887116, "step": 93610 }, { "epoch": 26.573942662503548, "grad_norm": 2.466799020767212, "learning_rate": 7.343741129718989e-05, "loss": 0.0018982525914907455, "step": 93620 }, { "epoch": 26.57678115242691, "grad_norm": 9.54333782196045, "learning_rate": 7.343457280726653e-05, "loss": 0.008162859082221984, "step": 93630 }, { "epoch": 26.57961964235027, "grad_norm": 1.4404504299163818, "learning_rate": 7.343173431734319e-05, "loss": 0.012541727721691131, "step": 93640 }, { "epoch": 26.58245813227363, "grad_norm": 0.08636336028575897, "learning_rate": 7.342889582741982e-05, "loss": 0.01488102674484253, "step": 93650 }, { "epoch": 26.585296622196992, "grad_norm": 4.76480770111084, "learning_rate": 7.342605733749646e-05, "loss": 0.010775667428970338, "step": 93660 }, { "epoch": 26.58813511212035, "grad_norm": 0.08748584240674973, "learning_rate": 7.34232188475731e-05, "loss": 0.0012903502210974693, "step": 93670 }, { "epoch": 26.590973602043714, "grad_norm": 0.0534069649875164, "learning_rate": 7.342038035764973e-05, "loss": 0.002478298917412758, "step": 93680 }, { "epoch": 26.593812091967074, "grad_norm": 0.23160788416862488, "learning_rate": 7.341754186772637e-05, "loss": 0.005024296790361404, "step": 93690 }, { "epoch": 26.596650581890433, "grad_norm": 0.8853673338890076, "learning_rate": 7.341470337780301e-05, "loss": 0.00692291259765625, "step": 93700 }, { "epoch": 26.599489071813796, "grad_norm": 0.11979923397302628, "learning_rate": 7.341186488787965e-05, "loss": 0.0014727935194969177, "step": 93710 }, { "epoch": 26.602327561737155, "grad_norm": 2.0351810455322266, "learning_rate": 7.340902639795629e-05, "loss": 0.0082613006234169, "step": 93720 }, { "epoch": 26.605166051660518, "grad_norm": 1.522212028503418, "learning_rate": 7.340618790803293e-05, "loss": 0.00811540186405182, "step": 93730 }, { "epoch": 26.608004541583878, "grad_norm": 14.096010208129883, "learning_rate": 7.340334941810957e-05, "loss": 0.009686029702425002, "step": 93740 }, { "epoch": 26.610843031507237, "grad_norm": 0.025250032544136047, "learning_rate": 7.34005109281862e-05, "loss": 0.004253275692462921, "step": 93750 }, { "epoch": 26.6136815214306, "grad_norm": 1.1713007688522339, "learning_rate": 7.339767243826284e-05, "loss": 0.007434092462062836, "step": 93760 }, { "epoch": 26.61652001135396, "grad_norm": 3.9488396644592285, "learning_rate": 7.33948339483395e-05, "loss": 0.006273451447486878, "step": 93770 }, { "epoch": 26.619358501277322, "grad_norm": 0.020776871591806412, "learning_rate": 7.339199545841613e-05, "loss": 0.022768929600715637, "step": 93780 }, { "epoch": 26.62219699120068, "grad_norm": 1.454048752784729, "learning_rate": 7.338915696849277e-05, "loss": 0.0037695083767175673, "step": 93790 }, { "epoch": 26.62503548112404, "grad_norm": 0.30290427803993225, "learning_rate": 7.338631847856941e-05, "loss": 0.018882274627685547, "step": 93800 }, { "epoch": 26.627873971047404, "grad_norm": 0.1386343389749527, "learning_rate": 7.338347998864604e-05, "loss": 0.0027641275897622108, "step": 93810 }, { "epoch": 26.630712460970763, "grad_norm": 0.40798479318618774, "learning_rate": 7.338064149872268e-05, "loss": 0.009711472690105439, "step": 93820 }, { "epoch": 26.633550950894126, "grad_norm": 0.1948966532945633, "learning_rate": 7.337780300879932e-05, "loss": 0.0030011631548404693, "step": 93830 }, { "epoch": 26.636389440817485, "grad_norm": 0.2852543592453003, "learning_rate": 7.337496451887596e-05, "loss": 0.005642514303326606, "step": 93840 }, { "epoch": 26.639227930740844, "grad_norm": 0.41545331478118896, "learning_rate": 7.33721260289526e-05, "loss": 0.008796853572130203, "step": 93850 }, { "epoch": 26.642066420664207, "grad_norm": 0.11369207501411438, "learning_rate": 7.336928753902924e-05, "loss": 0.009110226482152938, "step": 93860 }, { "epoch": 26.644904910587567, "grad_norm": 2.1863880157470703, "learning_rate": 7.336644904910588e-05, "loss": 0.01698225289583206, "step": 93870 }, { "epoch": 26.64774340051093, "grad_norm": 1.2305718660354614, "learning_rate": 7.336361055918251e-05, "loss": 0.008395685255527497, "step": 93880 }, { "epoch": 26.65058189043429, "grad_norm": 0.7233121395111084, "learning_rate": 7.336077206925915e-05, "loss": 0.004745637625455856, "step": 93890 }, { "epoch": 26.653420380357648, "grad_norm": 0.16007235646247864, "learning_rate": 7.33579335793358e-05, "loss": 0.006169053912162781, "step": 93900 }, { "epoch": 26.65625887028101, "grad_norm": 2.109121322631836, "learning_rate": 7.335509508941244e-05, "loss": 0.003503977507352829, "step": 93910 }, { "epoch": 26.65909736020437, "grad_norm": 0.3949493169784546, "learning_rate": 7.335225659948908e-05, "loss": 0.011246296018362046, "step": 93920 }, { "epoch": 26.661935850127733, "grad_norm": 0.07763362675905228, "learning_rate": 7.334941810956572e-05, "loss": 0.010795446485280991, "step": 93930 }, { "epoch": 26.664774340051093, "grad_norm": 0.363701194524765, "learning_rate": 7.334657961964235e-05, "loss": 0.004698735475540161, "step": 93940 }, { "epoch": 26.667612829974452, "grad_norm": 0.06449534744024277, "learning_rate": 7.334374112971899e-05, "loss": 0.02081031948328018, "step": 93950 }, { "epoch": 26.670451319897815, "grad_norm": 0.336947500705719, "learning_rate": 7.334090263979563e-05, "loss": 0.011101084947586059, "step": 93960 }, { "epoch": 26.673289809821174, "grad_norm": 0.5915369391441345, "learning_rate": 7.333806414987227e-05, "loss": 0.003601716458797455, "step": 93970 }, { "epoch": 26.676128299744537, "grad_norm": 2.0717453956604004, "learning_rate": 7.333522565994891e-05, "loss": 0.007763583213090897, "step": 93980 }, { "epoch": 26.678966789667896, "grad_norm": 0.5727964043617249, "learning_rate": 7.333238717002555e-05, "loss": 0.010077650845050811, "step": 93990 }, { "epoch": 26.68180527959126, "grad_norm": 0.02604478783905506, "learning_rate": 7.33295486801022e-05, "loss": 0.00428830198943615, "step": 94000 }, { "epoch": 26.68180527959126, "eval_accuracy": 0.9749475424429326, "eval_loss": 0.09798960387706757, "eval_runtime": 33.5427, "eval_samples_per_second": 468.865, "eval_steps_per_second": 7.334, "step": 94000 }, { "epoch": 26.68464376951462, "grad_norm": 3.4521384239196777, "learning_rate": 7.332671019017882e-05, "loss": 0.004387082159519195, "step": 94010 }, { "epoch": 26.687482259437978, "grad_norm": 0.04527798667550087, "learning_rate": 7.332387170025546e-05, "loss": 0.011689522117376328, "step": 94020 }, { "epoch": 26.69032074936134, "grad_norm": 0.06909022480249405, "learning_rate": 7.33210332103321e-05, "loss": 0.0062458798289299015, "step": 94030 }, { "epoch": 26.6931592392847, "grad_norm": 0.5618939995765686, "learning_rate": 7.331819472040875e-05, "loss": 0.00589996762573719, "step": 94040 }, { "epoch": 26.695997729208063, "grad_norm": 0.5312628746032715, "learning_rate": 7.331535623048539e-05, "loss": 0.008764253556728363, "step": 94050 }, { "epoch": 26.698836219131422, "grad_norm": 11.23067855834961, "learning_rate": 7.331251774056203e-05, "loss": 0.0223658949136734, "step": 94060 }, { "epoch": 26.70167470905478, "grad_norm": 2.8426806926727295, "learning_rate": 7.330967925063866e-05, "loss": 0.00798931047320366, "step": 94070 }, { "epoch": 26.704513198978145, "grad_norm": 0.505329966545105, "learning_rate": 7.33068407607153e-05, "loss": 0.005554669350385666, "step": 94080 }, { "epoch": 26.707351688901504, "grad_norm": 6.819518566131592, "learning_rate": 7.330400227079194e-05, "loss": 0.015557478368282317, "step": 94090 }, { "epoch": 26.710190178824867, "grad_norm": 1.5690298080444336, "learning_rate": 7.330116378086858e-05, "loss": 0.02901533842086792, "step": 94100 }, { "epoch": 26.713028668748226, "grad_norm": 3.9745283126831055, "learning_rate": 7.329832529094522e-05, "loss": 0.0032583646476268767, "step": 94110 }, { "epoch": 26.715867158671585, "grad_norm": 2.597682237625122, "learning_rate": 7.329548680102187e-05, "loss": 0.010782717168331147, "step": 94120 }, { "epoch": 26.71870564859495, "grad_norm": 11.90617847442627, "learning_rate": 7.329264831109849e-05, "loss": 0.008693935722112656, "step": 94130 }, { "epoch": 26.721544138518308, "grad_norm": 5.4357829093933105, "learning_rate": 7.328980982117513e-05, "loss": 0.011988414824008942, "step": 94140 }, { "epoch": 26.72438262844167, "grad_norm": 4.777623653411865, "learning_rate": 7.328697133125178e-05, "loss": 0.011448879539966584, "step": 94150 }, { "epoch": 26.72722111836503, "grad_norm": 9.509434700012207, "learning_rate": 7.328413284132842e-05, "loss": 0.008337825536727905, "step": 94160 }, { "epoch": 26.73005960828839, "grad_norm": 0.2998010516166687, "learning_rate": 7.328129435140506e-05, "loss": 0.0077266603708267215, "step": 94170 }, { "epoch": 26.732898098211752, "grad_norm": 0.38704031705856323, "learning_rate": 7.32784558614817e-05, "loss": 0.019797971844673155, "step": 94180 }, { "epoch": 26.73573658813511, "grad_norm": 12.656953811645508, "learning_rate": 7.327561737155834e-05, "loss": 0.01170608401298523, "step": 94190 }, { "epoch": 26.738575078058474, "grad_norm": 0.1109757348895073, "learning_rate": 7.327277888163497e-05, "loss": 0.0015058407559990882, "step": 94200 }, { "epoch": 26.741413567981834, "grad_norm": 0.34378960728645325, "learning_rate": 7.326994039171161e-05, "loss": 0.013615195453166962, "step": 94210 }, { "epoch": 26.744252057905193, "grad_norm": 8.683734893798828, "learning_rate": 7.326710190178825e-05, "loss": 0.013397210836410522, "step": 94220 }, { "epoch": 26.747090547828556, "grad_norm": 9.085851669311523, "learning_rate": 7.326426341186489e-05, "loss": 0.006608855724334717, "step": 94230 }, { "epoch": 26.749929037751915, "grad_norm": 0.25243300199508667, "learning_rate": 7.326142492194153e-05, "loss": 0.009010089933872223, "step": 94240 }, { "epoch": 26.752767527675278, "grad_norm": 1.0410150289535522, "learning_rate": 7.325858643201818e-05, "loss": 0.001588423363864422, "step": 94250 }, { "epoch": 26.755606017598637, "grad_norm": 1.4190027713775635, "learning_rate": 7.32557479420948e-05, "loss": 0.014464949071407319, "step": 94260 }, { "epoch": 26.758444507521997, "grad_norm": 0.11180123686790466, "learning_rate": 7.325290945217144e-05, "loss": 0.0018937349319458007, "step": 94270 }, { "epoch": 26.76128299744536, "grad_norm": 0.1103350818157196, "learning_rate": 7.325007096224809e-05, "loss": 0.01898016631603241, "step": 94280 }, { "epoch": 26.76412148736872, "grad_norm": 8.774584770202637, "learning_rate": 7.324723247232473e-05, "loss": 0.02497519850730896, "step": 94290 }, { "epoch": 26.766959977292082, "grad_norm": 12.941317558288574, "learning_rate": 7.324439398240137e-05, "loss": 0.02186019569635391, "step": 94300 }, { "epoch": 26.76979846721544, "grad_norm": 16.42685317993164, "learning_rate": 7.324155549247801e-05, "loss": 0.03755285739898682, "step": 94310 }, { "epoch": 26.7726369571388, "grad_norm": 0.5576061606407166, "learning_rate": 7.323871700255465e-05, "loss": 0.0013256749138236047, "step": 94320 }, { "epoch": 26.775475447062163, "grad_norm": 1.7726424932479858, "learning_rate": 7.323587851263128e-05, "loss": 0.013489252328872681, "step": 94330 }, { "epoch": 26.778313936985523, "grad_norm": 0.4024863839149475, "learning_rate": 7.323304002270792e-05, "loss": 0.007497423887252807, "step": 94340 }, { "epoch": 26.781152426908886, "grad_norm": 1.9977052211761475, "learning_rate": 7.323020153278456e-05, "loss": 0.004708604514598846, "step": 94350 }, { "epoch": 26.783990916832245, "grad_norm": 1.1593822240829468, "learning_rate": 7.322736304286119e-05, "loss": 0.007992751151323318, "step": 94360 }, { "epoch": 26.786829406755604, "grad_norm": 2.1567845344543457, "learning_rate": 7.322452455293785e-05, "loss": 0.008021972328424453, "step": 94370 }, { "epoch": 26.789667896678967, "grad_norm": 0.6355655193328857, "learning_rate": 7.322168606301449e-05, "loss": 0.003861437737941742, "step": 94380 }, { "epoch": 26.792506386602327, "grad_norm": 0.07638424634933472, "learning_rate": 7.321884757309111e-05, "loss": 0.005824956297874451, "step": 94390 }, { "epoch": 26.79534487652569, "grad_norm": 0.26440489292144775, "learning_rate": 7.321600908316776e-05, "loss": 0.01155632734298706, "step": 94400 }, { "epoch": 26.79818336644905, "grad_norm": 0.011045672930777073, "learning_rate": 7.32131705932444e-05, "loss": 0.003925139829516411, "step": 94410 }, { "epoch": 26.801021856372408, "grad_norm": 0.43670961260795593, "learning_rate": 7.321033210332104e-05, "loss": 0.004569348692893982, "step": 94420 }, { "epoch": 26.80386034629577, "grad_norm": 0.31826251745224, "learning_rate": 7.320749361339767e-05, "loss": 0.002291092649102211, "step": 94430 }, { "epoch": 26.80669883621913, "grad_norm": 2.593918561935425, "learning_rate": 7.320465512347432e-05, "loss": 0.01153557151556015, "step": 94440 }, { "epoch": 26.809537326142493, "grad_norm": 3.7047390937805176, "learning_rate": 7.320181663355096e-05, "loss": 0.0030713409185409548, "step": 94450 }, { "epoch": 26.812375816065853, "grad_norm": 13.05156421661377, "learning_rate": 7.319897814362759e-05, "loss": 0.014699283242225646, "step": 94460 }, { "epoch": 26.815214305989215, "grad_norm": 0.04394020512700081, "learning_rate": 7.319613965370423e-05, "loss": 0.016251036524772645, "step": 94470 }, { "epoch": 26.818052795912575, "grad_norm": 0.09740649908781052, "learning_rate": 7.319330116378087e-05, "loss": 0.005095839500427246, "step": 94480 }, { "epoch": 26.820891285835934, "grad_norm": 0.4117019474506378, "learning_rate": 7.31904626738575e-05, "loss": 0.011678279936313629, "step": 94490 }, { "epoch": 26.823729775759297, "grad_norm": 0.5032704472541809, "learning_rate": 7.318762418393416e-05, "loss": 0.003041370399296284, "step": 94500 }, { "epoch": 26.823729775759297, "eval_accuracy": 0.9781267883258091, "eval_loss": 0.08410006016492844, "eval_runtime": 34.1336, "eval_samples_per_second": 460.748, "eval_steps_per_second": 7.207, "step": 94500 }, { "epoch": 26.826568265682656, "grad_norm": 4.265335559844971, "learning_rate": 7.31847856940108e-05, "loss": 0.002294172905385494, "step": 94510 }, { "epoch": 26.82940675560602, "grad_norm": 0.2087344378232956, "learning_rate": 7.318194720408743e-05, "loss": 0.0034171998500823974, "step": 94520 }, { "epoch": 26.83224524552938, "grad_norm": 0.6375102996826172, "learning_rate": 7.317910871416407e-05, "loss": 0.0194838747382164, "step": 94530 }, { "epoch": 26.835083735452738, "grad_norm": 0.8439090251922607, "learning_rate": 7.317627022424071e-05, "loss": 0.009358112514019013, "step": 94540 }, { "epoch": 26.8379222253761, "grad_norm": 0.09478717297315598, "learning_rate": 7.317343173431735e-05, "loss": 0.010836734622716903, "step": 94550 }, { "epoch": 26.84076071529946, "grad_norm": 0.08857100456953049, "learning_rate": 7.317059324439398e-05, "loss": 0.021503618359565733, "step": 94560 }, { "epoch": 26.843599205222823, "grad_norm": 0.31207603216171265, "learning_rate": 7.316775475447063e-05, "loss": 0.010822094231843948, "step": 94570 }, { "epoch": 26.846437695146182, "grad_norm": 11.526782035827637, "learning_rate": 7.316491626454727e-05, "loss": 0.01022503525018692, "step": 94580 }, { "epoch": 26.84927618506954, "grad_norm": 0.026100007817149162, "learning_rate": 7.31620777746239e-05, "loss": 0.01470533162355423, "step": 94590 }, { "epoch": 26.852114674992904, "grad_norm": 8.93354606628418, "learning_rate": 7.315923928470054e-05, "loss": 0.0038064174354076386, "step": 94600 }, { "epoch": 26.854953164916264, "grad_norm": 4.536352634429932, "learning_rate": 7.315640079477718e-05, "loss": 0.004451452568173408, "step": 94610 }, { "epoch": 26.857791654839627, "grad_norm": 0.41527071595191956, "learning_rate": 7.315356230485381e-05, "loss": 0.0024415027350187303, "step": 94620 }, { "epoch": 26.860630144762986, "grad_norm": 0.051431749016046524, "learning_rate": 7.315072381493045e-05, "loss": 0.009303665906190871, "step": 94630 }, { "epoch": 26.863468634686345, "grad_norm": 1.4410449266433716, "learning_rate": 7.314788532500711e-05, "loss": 0.003644676133990288, "step": 94640 }, { "epoch": 26.86630712460971, "grad_norm": 0.4227358102798462, "learning_rate": 7.314504683508374e-05, "loss": 0.030327796936035156, "step": 94650 }, { "epoch": 26.869145614533068, "grad_norm": 4.336110591888428, "learning_rate": 7.314220834516038e-05, "loss": 0.004834429174661636, "step": 94660 }, { "epoch": 26.87198410445643, "grad_norm": Infinity, "learning_rate": 7.313936985523702e-05, "loss": 0.01770984083414078, "step": 94670 }, { "epoch": 26.87482259437979, "grad_norm": 3.8551957607269287, "learning_rate": 7.3136815214306e-05, "loss": 0.010193520039319993, "step": 94680 }, { "epoch": 26.87766108430315, "grad_norm": 10.593040466308594, "learning_rate": 7.313397672438264e-05, "loss": 0.013084140419960023, "step": 94690 }, { "epoch": 26.880499574226512, "grad_norm": 0.5938636660575867, "learning_rate": 7.313113823445927e-05, "loss": 0.0054472915828227995, "step": 94700 }, { "epoch": 26.88333806414987, "grad_norm": 6.150427341461182, "learning_rate": 7.312829974453591e-05, "loss": 0.018688490986824034, "step": 94710 }, { "epoch": 26.886176554073234, "grad_norm": 12.146310806274414, "learning_rate": 7.312546125461255e-05, "loss": 0.01153932735323906, "step": 94720 }, { "epoch": 26.889015043996594, "grad_norm": 1.4764411449432373, "learning_rate": 7.312262276468919e-05, "loss": 0.015989363193511963, "step": 94730 }, { "epoch": 26.891853533919953, "grad_norm": 0.4598442018032074, "learning_rate": 7.311978427476582e-05, "loss": 0.008534294366836549, "step": 94740 }, { "epoch": 26.894692023843316, "grad_norm": 0.2975207269191742, "learning_rate": 7.311694578484247e-05, "loss": 0.007080565392971039, "step": 94750 }, { "epoch": 26.897530513766675, "grad_norm": 5.777853965759277, "learning_rate": 7.311410729491912e-05, "loss": 0.0074697092175483705, "step": 94760 }, { "epoch": 26.900369003690038, "grad_norm": 0.39511096477508545, "learning_rate": 7.311126880499574e-05, "loss": 0.004882235080003738, "step": 94770 }, { "epoch": 26.903207493613397, "grad_norm": 0.6280848383903503, "learning_rate": 7.310843031507239e-05, "loss": 0.0018717877566814423, "step": 94780 }, { "epoch": 26.906045983536757, "grad_norm": 0.07105320692062378, "learning_rate": 7.310559182514903e-05, "loss": 0.00494973137974739, "step": 94790 }, { "epoch": 26.90888447346012, "grad_norm": 4.209155082702637, "learning_rate": 7.310275333522565e-05, "loss": 0.0029169077053666113, "step": 94800 }, { "epoch": 26.91172296338348, "grad_norm": 0.05443667992949486, "learning_rate": 7.30999148453023e-05, "loss": 0.002174543961882591, "step": 94810 }, { "epoch": 26.914561453306842, "grad_norm": 2.849930763244629, "learning_rate": 7.309707635537895e-05, "loss": 0.016251349449157716, "step": 94820 }, { "epoch": 26.9173999432302, "grad_norm": 9.839822769165039, "learning_rate": 7.309423786545558e-05, "loss": 0.009311595559120178, "step": 94830 }, { "epoch": 26.920238433153564, "grad_norm": 2.1470816135406494, "learning_rate": 7.309139937553222e-05, "loss": 0.008400061726570129, "step": 94840 }, { "epoch": 26.923076923076923, "grad_norm": 9.517404556274414, "learning_rate": 7.308856088560886e-05, "loss": 0.030034577846527098, "step": 94850 }, { "epoch": 26.925915413000283, "grad_norm": 0.0802091658115387, "learning_rate": 7.30857223956855e-05, "loss": 0.00893595963716507, "step": 94860 }, { "epoch": 26.928753902923646, "grad_norm": 0.4208381474018097, "learning_rate": 7.308288390576213e-05, "loss": 0.01131254956126213, "step": 94870 }, { "epoch": 26.931592392847005, "grad_norm": 0.19098296761512756, "learning_rate": 7.308004541583879e-05, "loss": 0.01693856418132782, "step": 94880 }, { "epoch": 26.934430882770368, "grad_norm": 0.2732996940612793, "learning_rate": 7.307720692591543e-05, "loss": 0.010540908575057984, "step": 94890 }, { "epoch": 26.937269372693727, "grad_norm": 9.279861450195312, "learning_rate": 7.307436843599205e-05, "loss": 0.00890711322426796, "step": 94900 }, { "epoch": 26.940107862617086, "grad_norm": 0.7074282169342041, "learning_rate": 7.30715299460687e-05, "loss": 0.0014222733676433562, "step": 94910 }, { "epoch": 26.94294635254045, "grad_norm": 0.7740960717201233, "learning_rate": 7.306869145614534e-05, "loss": 0.024463069438934327, "step": 94920 }, { "epoch": 26.94578484246381, "grad_norm": 0.9101386666297913, "learning_rate": 7.306585296622197e-05, "loss": 0.0018668025732040405, "step": 94930 }, { "epoch": 26.94862333238717, "grad_norm": 0.14248210191726685, "learning_rate": 7.30630144762986e-05, "loss": 0.012463627010583877, "step": 94940 }, { "epoch": 26.95146182231053, "grad_norm": 1.3166074752807617, "learning_rate": 7.306017598637526e-05, "loss": 0.010025148838758468, "step": 94950 }, { "epoch": 26.95430031223389, "grad_norm": 2.1346521377563477, "learning_rate": 7.305733749645189e-05, "loss": 0.0035996384918689727, "step": 94960 }, { "epoch": 26.957138802157253, "grad_norm": 3.770421266555786, "learning_rate": 7.305449900652853e-05, "loss": 0.003893129527568817, "step": 94970 }, { "epoch": 26.959977292080612, "grad_norm": 0.18907922506332397, "learning_rate": 7.305166051660517e-05, "loss": 0.009776365756988526, "step": 94980 }, { "epoch": 26.962815782003975, "grad_norm": 4.208271503448486, "learning_rate": 7.304882202668181e-05, "loss": 0.01744593381881714, "step": 94990 }, { "epoch": 26.965654271927335, "grad_norm": 0.1304244101047516, "learning_rate": 7.304598353675844e-05, "loss": 0.0068020448088645935, "step": 95000 }, { "epoch": 26.965654271927335, "eval_accuracy": 0.9771094296432886, "eval_loss": 0.08922598510980606, "eval_runtime": 33.6147, "eval_samples_per_second": 467.861, "eval_steps_per_second": 7.318, "step": 95000 }, { "epoch": 26.968492761850694, "grad_norm": 0.6081731915473938, "learning_rate": 7.304314504683508e-05, "loss": 0.004764524847269058, "step": 95010 }, { "epoch": 26.971331251774057, "grad_norm": 0.12444649636745453, "learning_rate": 7.304030655691172e-05, "loss": 0.004072895646095276, "step": 95020 }, { "epoch": 26.974169741697416, "grad_norm": 0.059234749525785446, "learning_rate": 7.303746806698837e-05, "loss": 0.008356679975986481, "step": 95030 }, { "epoch": 26.97700823162078, "grad_norm": 3.1815006732940674, "learning_rate": 7.303462957706501e-05, "loss": 0.009719131141901016, "step": 95040 }, { "epoch": 26.97984672154414, "grad_norm": 8.515005111694336, "learning_rate": 7.303179108714165e-05, "loss": 0.01265561580657959, "step": 95050 }, { "epoch": 26.982685211467498, "grad_norm": 0.2253808230161667, "learning_rate": 7.302895259721828e-05, "loss": 0.00699293315410614, "step": 95060 }, { "epoch": 26.98552370139086, "grad_norm": 2.2510838508605957, "learning_rate": 7.302611410729492e-05, "loss": 0.018230715394020082, "step": 95070 }, { "epoch": 26.98836219131422, "grad_norm": 0.8572126626968384, "learning_rate": 7.302327561737157e-05, "loss": 0.005697437003254891, "step": 95080 }, { "epoch": 26.991200681237583, "grad_norm": 1.4221898317337036, "learning_rate": 7.30204371274482e-05, "loss": 0.0017194589599967003, "step": 95090 }, { "epoch": 26.994039171160942, "grad_norm": 12.58106803894043, "learning_rate": 7.301759863752484e-05, "loss": 0.03694300949573517, "step": 95100 }, { "epoch": 26.9968776610843, "grad_norm": 5.24924373626709, "learning_rate": 7.301476014760148e-05, "loss": 0.017975480854511262, "step": 95110 }, { "epoch": 26.999716151007664, "grad_norm": 6.751262187957764, "learning_rate": 7.301192165767811e-05, "loss": 0.004690876603126526, "step": 95120 }, { "epoch": 27.002554640931024, "grad_norm": 3.142519950866699, "learning_rate": 7.300908316775475e-05, "loss": 0.001520065777003765, "step": 95130 }, { "epoch": 27.005393130854387, "grad_norm": 0.061055731028318405, "learning_rate": 7.30062446778314e-05, "loss": 0.003607887774705887, "step": 95140 }, { "epoch": 27.008231620777746, "grad_norm": 0.1506660133600235, "learning_rate": 7.300340618790803e-05, "loss": 0.0007533039897680283, "step": 95150 }, { "epoch": 27.011070110701105, "grad_norm": 0.32523417472839355, "learning_rate": 7.300056769798468e-05, "loss": 0.0012320384383201599, "step": 95160 }, { "epoch": 27.013908600624468, "grad_norm": 0.011167525313794613, "learning_rate": 7.299772920806132e-05, "loss": 0.0020912844687700272, "step": 95170 }, { "epoch": 27.016747090547828, "grad_norm": 0.0864245593547821, "learning_rate": 7.299489071813796e-05, "loss": 0.0022738080471754072, "step": 95180 }, { "epoch": 27.01958558047119, "grad_norm": 6.497503757476807, "learning_rate": 7.299205222821459e-05, "loss": 0.002292226441204548, "step": 95190 }, { "epoch": 27.02242407039455, "grad_norm": 1.2218358516693115, "learning_rate": 7.298921373829123e-05, "loss": 0.0017925074324011802, "step": 95200 }, { "epoch": 27.025262560317913, "grad_norm": 0.02418048307299614, "learning_rate": 7.298637524836787e-05, "loss": 0.0016493571922183037, "step": 95210 }, { "epoch": 27.028101050241272, "grad_norm": 0.48527809977531433, "learning_rate": 7.298353675844451e-05, "loss": 0.0019326457753777505, "step": 95220 }, { "epoch": 27.03093954016463, "grad_norm": 0.04735812172293663, "learning_rate": 7.298069826852115e-05, "loss": 0.01717640310525894, "step": 95230 }, { "epoch": 27.033778030087994, "grad_norm": 0.43390727043151855, "learning_rate": 7.29778597785978e-05, "loss": 0.00532221570611, "step": 95240 }, { "epoch": 27.036616520011354, "grad_norm": 0.4683663249015808, "learning_rate": 7.297502128867442e-05, "loss": 0.008456090837717057, "step": 95250 }, { "epoch": 27.039455009934716, "grad_norm": 0.04981847479939461, "learning_rate": 7.297218279875106e-05, "loss": 0.0027939526364207266, "step": 95260 }, { "epoch": 27.042293499858076, "grad_norm": 0.1438787281513214, "learning_rate": 7.29693443088277e-05, "loss": 0.0018105398863554, "step": 95270 }, { "epoch": 27.045131989781435, "grad_norm": 0.6251360177993774, "learning_rate": 7.296650581890435e-05, "loss": 0.004078377783298492, "step": 95280 }, { "epoch": 27.047970479704798, "grad_norm": 0.9837616086006165, "learning_rate": 7.296366732898099e-05, "loss": 0.004784095287322998, "step": 95290 }, { "epoch": 27.050808969628157, "grad_norm": 1.8969568014144897, "learning_rate": 7.296082883905763e-05, "loss": 0.004830897599458694, "step": 95300 }, { "epoch": 27.05364745955152, "grad_norm": 0.07591401040554047, "learning_rate": 7.295799034913427e-05, "loss": 0.005518239364027977, "step": 95310 }, { "epoch": 27.05648594947488, "grad_norm": 3.028299570083618, "learning_rate": 7.29551518592109e-05, "loss": 0.004913496971130371, "step": 95320 }, { "epoch": 27.05932443939824, "grad_norm": 5.548740863800049, "learning_rate": 7.295231336928754e-05, "loss": 0.0049406267702579495, "step": 95330 }, { "epoch": 27.0621629293216, "grad_norm": 0.31197935342788696, "learning_rate": 7.294947487936418e-05, "loss": 0.00553242489695549, "step": 95340 }, { "epoch": 27.06500141924496, "grad_norm": 1.9237626791000366, "learning_rate": 7.294663638944082e-05, "loss": 0.003160182386636734, "step": 95350 }, { "epoch": 27.067839909168324, "grad_norm": 0.5650404095649719, "learning_rate": 7.294379789951746e-05, "loss": 0.017268012464046478, "step": 95360 }, { "epoch": 27.070678399091683, "grad_norm": 4.443408489227295, "learning_rate": 7.29409594095941e-05, "loss": 0.008371354639530182, "step": 95370 }, { "epoch": 27.073516889015043, "grad_norm": 0.41993728280067444, "learning_rate": 7.293812091967073e-05, "loss": 0.0014042710885405541, "step": 95380 }, { "epoch": 27.076355378938405, "grad_norm": 0.11591331660747528, "learning_rate": 7.293528242974737e-05, "loss": 0.0009815050289034843, "step": 95390 }, { "epoch": 27.079193868861765, "grad_norm": 0.05469759181141853, "learning_rate": 7.293244393982401e-05, "loss": 0.0012808138504624366, "step": 95400 }, { "epoch": 27.082032358785128, "grad_norm": 0.007060602307319641, "learning_rate": 7.292960544990066e-05, "loss": 0.00120452418923378, "step": 95410 }, { "epoch": 27.084870848708487, "grad_norm": 0.021882154047489166, "learning_rate": 7.29267669599773e-05, "loss": 0.004042915254831314, "step": 95420 }, { "epoch": 27.087709338631846, "grad_norm": 0.041335105895996094, "learning_rate": 7.292392847005394e-05, "loss": 0.011628802865743637, "step": 95430 }, { "epoch": 27.09054782855521, "grad_norm": 0.08443986624479294, "learning_rate": 7.292108998013058e-05, "loss": 0.0018487146124243736, "step": 95440 }, { "epoch": 27.09338631847857, "grad_norm": 0.5417053699493408, "learning_rate": 7.291825149020721e-05, "loss": 0.0011501513421535493, "step": 95450 }, { "epoch": 27.09622480840193, "grad_norm": 0.19032758474349976, "learning_rate": 7.291541300028385e-05, "loss": 0.008980826288461686, "step": 95460 }, { "epoch": 27.09906329832529, "grad_norm": 0.4790182411670685, "learning_rate": 7.291257451036049e-05, "loss": 0.002088732272386551, "step": 95470 }, { "epoch": 27.10190178824865, "grad_norm": 1.8154184818267822, "learning_rate": 7.290973602043713e-05, "loss": 0.005860096216201783, "step": 95480 }, { "epoch": 27.104740278172013, "grad_norm": 0.9710548520088196, "learning_rate": 7.290689753051377e-05, "loss": 0.0016810407862067223, "step": 95490 }, { "epoch": 27.107578768095372, "grad_norm": 0.02103779837489128, "learning_rate": 7.290405904059042e-05, "loss": 0.001880667358636856, "step": 95500 }, { "epoch": 27.107578768095372, "eval_accuracy": 0.9785718827494119, "eval_loss": 0.08370623737573624, "eval_runtime": 34.6623, "eval_samples_per_second": 453.721, "eval_steps_per_second": 7.097, "step": 95500 }, { "epoch": 27.110417258018735, "grad_norm": 0.659302294254303, "learning_rate": 7.290122055066704e-05, "loss": 0.0021719736978411674, "step": 95510 }, { "epoch": 27.113255747942095, "grad_norm": 9.88335132598877, "learning_rate": 7.289838206074368e-05, "loss": 0.008195067942142486, "step": 95520 }, { "epoch": 27.116094237865454, "grad_norm": 11.295275688171387, "learning_rate": 7.289554357082033e-05, "loss": 0.007400480657815933, "step": 95530 }, { "epoch": 27.118932727788817, "grad_norm": 2.8026363849639893, "learning_rate": 7.289270508089697e-05, "loss": 0.002499700151383877, "step": 95540 }, { "epoch": 27.121771217712176, "grad_norm": 2.077645778656006, "learning_rate": 7.288986659097361e-05, "loss": 0.005567096173763275, "step": 95550 }, { "epoch": 27.12460970763554, "grad_norm": 0.9897991418838501, "learning_rate": 7.288702810105025e-05, "loss": 0.002854766324162483, "step": 95560 }, { "epoch": 27.1274481975589, "grad_norm": 0.2928079068660736, "learning_rate": 7.288418961112689e-05, "loss": 0.003269396722316742, "step": 95570 }, { "epoch": 27.130286687482258, "grad_norm": 1.4505547285079956, "learning_rate": 7.288135112120352e-05, "loss": 0.013690488040447235, "step": 95580 }, { "epoch": 27.13312517740562, "grad_norm": 2.427708625793457, "learning_rate": 7.287851263128016e-05, "loss": 0.01806209236383438, "step": 95590 }, { "epoch": 27.13596366732898, "grad_norm": 6.36585807800293, "learning_rate": 7.28756741413568e-05, "loss": 0.011223309487104417, "step": 95600 }, { "epoch": 27.138802157252343, "grad_norm": 2.625903844833374, "learning_rate": 7.287283565143343e-05, "loss": 0.006681904941797256, "step": 95610 }, { "epoch": 27.141640647175702, "grad_norm": 0.6938783526420593, "learning_rate": 7.286999716151008e-05, "loss": 0.009985217452049255, "step": 95620 }, { "epoch": 27.144479137099065, "grad_norm": 1.742042899131775, "learning_rate": 7.286715867158673e-05, "loss": 0.009000875055789948, "step": 95630 }, { "epoch": 27.147317627022424, "grad_norm": 0.056310106068849564, "learning_rate": 7.286432018166335e-05, "loss": 0.007384529709815979, "step": 95640 }, { "epoch": 27.150156116945784, "grad_norm": 0.1407613903284073, "learning_rate": 7.286148169174e-05, "loss": 0.020370705425739287, "step": 95650 }, { "epoch": 27.152994606869147, "grad_norm": 0.3541724979877472, "learning_rate": 7.285864320181664e-05, "loss": 0.009230858087539673, "step": 95660 }, { "epoch": 27.155833096792506, "grad_norm": 0.02830605022609234, "learning_rate": 7.285580471189328e-05, "loss": 0.0038279779255390167, "step": 95670 }, { "epoch": 27.15867158671587, "grad_norm": 0.08605963736772537, "learning_rate": 7.285296622196992e-05, "loss": 0.002991901710629463, "step": 95680 }, { "epoch": 27.161510076639228, "grad_norm": 0.03823945298790932, "learning_rate": 7.285012773204656e-05, "loss": 0.005172581598162651, "step": 95690 }, { "epoch": 27.164348566562587, "grad_norm": 0.2861536145210266, "learning_rate": 7.28472892421232e-05, "loss": 0.0013753855600953102, "step": 95700 }, { "epoch": 27.16718705648595, "grad_norm": 0.06355776637792587, "learning_rate": 7.284445075219983e-05, "loss": 0.0011572793126106262, "step": 95710 }, { "epoch": 27.17002554640931, "grad_norm": 1.3275755643844604, "learning_rate": 7.284161226227647e-05, "loss": 0.00105625931173563, "step": 95720 }, { "epoch": 27.172864036332673, "grad_norm": 2.5164825916290283, "learning_rate": 7.283877377235311e-05, "loss": 0.0016612477600574493, "step": 95730 }, { "epoch": 27.175702526256032, "grad_norm": 0.0735091045498848, "learning_rate": 7.283593528242974e-05, "loss": 0.01171310618519783, "step": 95740 }, { "epoch": 27.17854101617939, "grad_norm": 0.35712435841560364, "learning_rate": 7.28330967925064e-05, "loss": 0.0038382068276405336, "step": 95750 }, { "epoch": 27.181379506102754, "grad_norm": 0.19970285892486572, "learning_rate": 7.283025830258304e-05, "loss": 0.005152300000190735, "step": 95760 }, { "epoch": 27.184217996026113, "grad_norm": 16.70682144165039, "learning_rate": 7.282741981265966e-05, "loss": 0.007089538872241974, "step": 95770 }, { "epoch": 27.187056485949476, "grad_norm": 0.0533771850168705, "learning_rate": 7.28245813227363e-05, "loss": 0.022129304707050323, "step": 95780 }, { "epoch": 27.189894975872836, "grad_norm": 0.043277643620967865, "learning_rate": 7.282174283281295e-05, "loss": 0.005180387198925019, "step": 95790 }, { "epoch": 27.192733465796195, "grad_norm": 0.32747653126716614, "learning_rate": 7.281890434288959e-05, "loss": 0.002481214702129364, "step": 95800 }, { "epoch": 27.195571955719558, "grad_norm": 0.6040971279144287, "learning_rate": 7.281606585296623e-05, "loss": 0.014130841195583343, "step": 95810 }, { "epoch": 27.198410445642917, "grad_norm": 1.769221544265747, "learning_rate": 7.281322736304287e-05, "loss": 0.0036196641623973845, "step": 95820 }, { "epoch": 27.20124893556628, "grad_norm": 0.47742876410484314, "learning_rate": 7.281038887311951e-05, "loss": 0.008638271689414978, "step": 95830 }, { "epoch": 27.20408742548964, "grad_norm": 0.8268797397613525, "learning_rate": 7.280755038319614e-05, "loss": 0.01397179365158081, "step": 95840 }, { "epoch": 27.206925915413, "grad_norm": 0.1875763088464737, "learning_rate": 7.280471189327278e-05, "loss": 0.00633937418460846, "step": 95850 }, { "epoch": 27.20976440533636, "grad_norm": 0.1410299688577652, "learning_rate": 7.280187340334942e-05, "loss": 0.0029922068119049073, "step": 95860 }, { "epoch": 27.21260289525972, "grad_norm": 0.3568504750728607, "learning_rate": 7.279903491342605e-05, "loss": 0.006019023060798645, "step": 95870 }, { "epoch": 27.215441385183084, "grad_norm": 0.6447275280952454, "learning_rate": 7.27961964235027e-05, "loss": 0.003779519349336624, "step": 95880 }, { "epoch": 27.218279875106443, "grad_norm": 0.5278103351593018, "learning_rate": 7.279335793357935e-05, "loss": 0.005723432451486587, "step": 95890 }, { "epoch": 27.221118365029803, "grad_norm": 0.05052858218550682, "learning_rate": 7.279051944365598e-05, "loss": 0.0035453565418720246, "step": 95900 }, { "epoch": 27.223956854953165, "grad_norm": 0.45540061593055725, "learning_rate": 7.278768095373262e-05, "loss": 0.01387341320514679, "step": 95910 }, { "epoch": 27.226795344876525, "grad_norm": 15.862494468688965, "learning_rate": 7.278484246380926e-05, "loss": 0.013920006155967713, "step": 95920 }, { "epoch": 27.229633834799888, "grad_norm": 0.8664196729660034, "learning_rate": 7.27820039738859e-05, "loss": 0.009170180559158326, "step": 95930 }, { "epoch": 27.232472324723247, "grad_norm": 3.4543914794921875, "learning_rate": 7.277916548396253e-05, "loss": 0.022152999043464662, "step": 95940 }, { "epoch": 27.235310814646606, "grad_norm": 0.07890743762254715, "learning_rate": 7.277632699403918e-05, "loss": 0.008522035181522369, "step": 95950 }, { "epoch": 27.23814930456997, "grad_norm": 9.85473918914795, "learning_rate": 7.277348850411581e-05, "loss": 0.009672321379184723, "step": 95960 }, { "epoch": 27.24098779449333, "grad_norm": 2.889181613922119, "learning_rate": 7.277065001419245e-05, "loss": 0.008040715008974075, "step": 95970 }, { "epoch": 27.24382628441669, "grad_norm": 6.430579662322998, "learning_rate": 7.276781152426909e-05, "loss": 0.0060943134129047396, "step": 95980 }, { "epoch": 27.24666477434005, "grad_norm": 3.244088649749756, "learning_rate": 7.276497303434573e-05, "loss": 0.0015734130516648292, "step": 95990 }, { "epoch": 27.249503264263414, "grad_norm": 0.9645456671714783, "learning_rate": 7.276213454442236e-05, "loss": 0.010027721524238586, "step": 96000 }, { "epoch": 27.249503264263414, "eval_accuracy": 0.9751382971959052, "eval_loss": 0.09602522850036621, "eval_runtime": 34.8567, "eval_samples_per_second": 451.19, "eval_steps_per_second": 7.057, "step": 96000 }, { "epoch": 27.252341754186773, "grad_norm": 10.547228813171387, "learning_rate": 7.275929605449902e-05, "loss": 0.011031201481819153, "step": 96010 }, { "epoch": 27.255180244110132, "grad_norm": 0.03486720845103264, "learning_rate": 7.275645756457566e-05, "loss": 0.0017884068191051483, "step": 96020 }, { "epoch": 27.258018734033495, "grad_norm": 13.99393367767334, "learning_rate": 7.275361907465229e-05, "loss": 0.016446830332279207, "step": 96030 }, { "epoch": 27.260857223956855, "grad_norm": 0.05621734634041786, "learning_rate": 7.275078058472893e-05, "loss": 0.005709293484687805, "step": 96040 }, { "epoch": 27.263695713880217, "grad_norm": 1.8264456987380981, "learning_rate": 7.274794209480557e-05, "loss": 0.009731321781873702, "step": 96050 }, { "epoch": 27.266534203803577, "grad_norm": 0.29912957549095154, "learning_rate": 7.27451036048822e-05, "loss": 0.003181523084640503, "step": 96060 }, { "epoch": 27.269372693726936, "grad_norm": 2.683715343475342, "learning_rate": 7.274226511495884e-05, "loss": 0.00295831523835659, "step": 96070 }, { "epoch": 27.2722111836503, "grad_norm": 0.1239648163318634, "learning_rate": 7.273942662503549e-05, "loss": 0.004626821726560593, "step": 96080 }, { "epoch": 27.27504967357366, "grad_norm": 8.984890937805176, "learning_rate": 7.273658813511212e-05, "loss": 0.0029262656345963476, "step": 96090 }, { "epoch": 27.27788816349702, "grad_norm": 0.140621155500412, "learning_rate": 7.273374964518876e-05, "loss": 0.017857691645622252, "step": 96100 }, { "epoch": 27.28072665342038, "grad_norm": 0.1530575007200241, "learning_rate": 7.27309111552654e-05, "loss": 0.0018099108710885048, "step": 96110 }, { "epoch": 27.28356514334374, "grad_norm": 0.6121329069137573, "learning_rate": 7.272807266534204e-05, "loss": 0.0014873171225190164, "step": 96120 }, { "epoch": 27.286403633267103, "grad_norm": 2.746446371078491, "learning_rate": 7.272523417541867e-05, "loss": 0.009694429486989975, "step": 96130 }, { "epoch": 27.289242123190462, "grad_norm": 4.560548305511475, "learning_rate": 7.272239568549531e-05, "loss": 0.004727952182292938, "step": 96140 }, { "epoch": 27.292080613113825, "grad_norm": 0.5452786087989807, "learning_rate": 7.271955719557197e-05, "loss": 0.007820501178503036, "step": 96150 }, { "epoch": 27.294919103037184, "grad_norm": 0.5095433592796326, "learning_rate": 7.27167187056486e-05, "loss": 0.004856949299573898, "step": 96160 }, { "epoch": 27.297757592960544, "grad_norm": 6.898279666900635, "learning_rate": 7.271388021572524e-05, "loss": 0.010692077875137328, "step": 96170 }, { "epoch": 27.300596082883906, "grad_norm": 3.6773245334625244, "learning_rate": 7.271104172580188e-05, "loss": 0.02808126211166382, "step": 96180 }, { "epoch": 27.303434572807266, "grad_norm": 0.9612772464752197, "learning_rate": 7.270820323587851e-05, "loss": 0.005396145582199097, "step": 96190 }, { "epoch": 27.30627306273063, "grad_norm": 0.3263259828090668, "learning_rate": 7.270536474595515e-05, "loss": 0.01182601973414421, "step": 96200 }, { "epoch": 27.309111552653988, "grad_norm": 0.8952674865722656, "learning_rate": 7.27025262560318e-05, "loss": 0.0018984602764248848, "step": 96210 }, { "epoch": 27.311950042577347, "grad_norm": 1.731255292892456, "learning_rate": 7.269968776610843e-05, "loss": 0.001396307721734047, "step": 96220 }, { "epoch": 27.31478853250071, "grad_norm": 0.20632019639015198, "learning_rate": 7.269684927618507e-05, "loss": 0.004953015595674515, "step": 96230 }, { "epoch": 27.31762702242407, "grad_norm": 0.8562304973602295, "learning_rate": 7.269401078626171e-05, "loss": 0.001804298534989357, "step": 96240 }, { "epoch": 27.320465512347432, "grad_norm": 0.029065052047371864, "learning_rate": 7.269117229633836e-05, "loss": 0.006143723428249359, "step": 96250 }, { "epoch": 27.323304002270792, "grad_norm": 11.46688461303711, "learning_rate": 7.268833380641498e-05, "loss": 0.028428050875663757, "step": 96260 }, { "epoch": 27.32614249219415, "grad_norm": 0.5348675847053528, "learning_rate": 7.268549531649162e-05, "loss": 0.015438328683376312, "step": 96270 }, { "epoch": 27.328980982117514, "grad_norm": 11.064452171325684, "learning_rate": 7.268265682656828e-05, "loss": 0.015745861828327178, "step": 96280 }, { "epoch": 27.331819472040873, "grad_norm": 0.33625495433807373, "learning_rate": 7.267981833664491e-05, "loss": 0.003426486626267433, "step": 96290 }, { "epoch": 27.334657961964236, "grad_norm": 15.229154586791992, "learning_rate": 7.267697984672155e-05, "loss": 0.007411926984786987, "step": 96300 }, { "epoch": 27.337496451887596, "grad_norm": 3.798114061355591, "learning_rate": 7.267414135679819e-05, "loss": 0.001813364028930664, "step": 96310 }, { "epoch": 27.340334941810955, "grad_norm": 0.05049661174416542, "learning_rate": 7.267130286687482e-05, "loss": 0.012062403559684753, "step": 96320 }, { "epoch": 27.343173431734318, "grad_norm": 7.585714817047119, "learning_rate": 7.266846437695146e-05, "loss": 0.01547311693429947, "step": 96330 }, { "epoch": 27.346011921657677, "grad_norm": 0.04728105291724205, "learning_rate": 7.26656258870281e-05, "loss": 0.01279570758342743, "step": 96340 }, { "epoch": 27.34885041158104, "grad_norm": 0.21177461743354797, "learning_rate": 7.266278739710474e-05, "loss": 0.004313618689775467, "step": 96350 }, { "epoch": 27.3516889015044, "grad_norm": 0.8954150676727295, "learning_rate": 7.265994890718138e-05, "loss": 0.0034503698348999023, "step": 96360 }, { "epoch": 27.35452739142776, "grad_norm": 3.4249613285064697, "learning_rate": 7.265711041725803e-05, "loss": 0.002411019057035446, "step": 96370 }, { "epoch": 27.35736588135112, "grad_norm": 0.6227315664291382, "learning_rate": 7.265427192733467e-05, "loss": 0.017073747515678406, "step": 96380 }, { "epoch": 27.36020437127448, "grad_norm": 0.13112600147724152, "learning_rate": 7.26514334374113e-05, "loss": 0.007861412316560745, "step": 96390 }, { "epoch": 27.363042861197844, "grad_norm": 21.282794952392578, "learning_rate": 7.264859494748794e-05, "loss": 0.02441975474357605, "step": 96400 }, { "epoch": 27.365881351121203, "grad_norm": 3.2690160274505615, "learning_rate": 7.264575645756459e-05, "loss": 0.0052418425679206845, "step": 96410 }, { "epoch": 27.368719841044566, "grad_norm": 0.4909859001636505, "learning_rate": 7.264291796764122e-05, "loss": 0.0020638398826122286, "step": 96420 }, { "epoch": 27.371558330967925, "grad_norm": 6.272863388061523, "learning_rate": 7.264007947771786e-05, "loss": 0.00542963407933712, "step": 96430 }, { "epoch": 27.374396820891285, "grad_norm": 0.02075115405023098, "learning_rate": 7.26372409877945e-05, "loss": 0.003221285715699196, "step": 96440 }, { "epoch": 27.377235310814648, "grad_norm": 13.390822410583496, "learning_rate": 7.263440249787113e-05, "loss": 0.02125626802444458, "step": 96450 }, { "epoch": 27.380073800738007, "grad_norm": 0.17245863378047943, "learning_rate": 7.263156400794777e-05, "loss": 0.002406204119324684, "step": 96460 }, { "epoch": 27.38291229066137, "grad_norm": 0.6518900990486145, "learning_rate": 7.262872551802441e-05, "loss": 0.019400554895401, "step": 96470 }, { "epoch": 27.38575078058473, "grad_norm": 0.04488157853484154, "learning_rate": 7.262588702810105e-05, "loss": 0.009276924282312393, "step": 96480 }, { "epoch": 27.38858927050809, "grad_norm": 13.981741905212402, "learning_rate": 7.26230485381777e-05, "loss": 0.01791495531797409, "step": 96490 }, { "epoch": 27.39142776043145, "grad_norm": 0.04618290811777115, "learning_rate": 7.262021004825434e-05, "loss": 0.005946160852909088, "step": 96500 }, { "epoch": 27.39142776043145, "eval_accuracy": 0.9793984866789598, "eval_loss": 0.08002078533172607, "eval_runtime": 33.8081, "eval_samples_per_second": 465.184, "eval_steps_per_second": 7.276, "step": 96500 }, { "epoch": 27.39426625035481, "grad_norm": 0.05166146531701088, "learning_rate": 7.261737155833098e-05, "loss": 0.014650072157382964, "step": 96510 }, { "epoch": 27.397104740278174, "grad_norm": 12.379776954650879, "learning_rate": 7.26145330684076e-05, "loss": 0.010050324350595474, "step": 96520 }, { "epoch": 27.399943230201533, "grad_norm": 8.017269134521484, "learning_rate": 7.261169457848425e-05, "loss": 0.02458428144454956, "step": 96530 }, { "epoch": 27.402781720124892, "grad_norm": 1.0496536493301392, "learning_rate": 7.260885608856089e-05, "loss": 0.0014097129926085473, "step": 96540 }, { "epoch": 27.405620210048255, "grad_norm": 0.9317585825920105, "learning_rate": 7.260601759863753e-05, "loss": 0.002869780920445919, "step": 96550 }, { "epoch": 27.408458699971614, "grad_norm": 0.546519935131073, "learning_rate": 7.260317910871417e-05, "loss": 0.02560302019119263, "step": 96560 }, { "epoch": 27.411297189894977, "grad_norm": 0.007085329387336969, "learning_rate": 7.260034061879081e-05, "loss": 0.005018869042396545, "step": 96570 }, { "epoch": 27.414135679818337, "grad_norm": 0.03907964378595352, "learning_rate": 7.259750212886744e-05, "loss": 0.0037318810820579527, "step": 96580 }, { "epoch": 27.416974169741696, "grad_norm": 0.6875267028808594, "learning_rate": 7.259466363894408e-05, "loss": 0.008974210172891618, "step": 96590 }, { "epoch": 27.41981265966506, "grad_norm": 12.13978385925293, "learning_rate": 7.259182514902072e-05, "loss": 0.008776363730430604, "step": 96600 }, { "epoch": 27.422651149588418, "grad_norm": 12.679821968078613, "learning_rate": 7.258898665909736e-05, "loss": 0.012477617710828781, "step": 96610 }, { "epoch": 27.42548963951178, "grad_norm": 1.2721775770187378, "learning_rate": 7.2586148169174e-05, "loss": 0.015789541602134704, "step": 96620 }, { "epoch": 27.42832812943514, "grad_norm": 0.888314962387085, "learning_rate": 7.258330967925065e-05, "loss": 0.004226536303758621, "step": 96630 }, { "epoch": 27.4311666193585, "grad_norm": 0.08833857625722885, "learning_rate": 7.258047118932729e-05, "loss": 0.0009563215076923371, "step": 96640 }, { "epoch": 27.434005109281863, "grad_norm": 5.174928665161133, "learning_rate": 7.257763269940392e-05, "loss": 0.011080044507980346, "step": 96650 }, { "epoch": 27.436843599205222, "grad_norm": 0.02764897234737873, "learning_rate": 7.257479420948056e-05, "loss": 0.00425354465842247, "step": 96660 }, { "epoch": 27.439682089128585, "grad_norm": 0.29803451895713806, "learning_rate": 7.25719557195572e-05, "loss": 0.010756665468215942, "step": 96670 }, { "epoch": 27.442520579051944, "grad_norm": 0.6910187005996704, "learning_rate": 7.256911722963384e-05, "loss": 0.0011869786307215691, "step": 96680 }, { "epoch": 27.445359068975304, "grad_norm": 0.4766214191913605, "learning_rate": 7.256627873971048e-05, "loss": 0.006132545322179795, "step": 96690 }, { "epoch": 27.448197558898666, "grad_norm": 0.5879521369934082, "learning_rate": 7.256344024978712e-05, "loss": 0.0014057405292987823, "step": 96700 }, { "epoch": 27.451036048822026, "grad_norm": 6.094236850738525, "learning_rate": 7.256060175986375e-05, "loss": 0.0024663690477609635, "step": 96710 }, { "epoch": 27.45387453874539, "grad_norm": 0.26218903064727783, "learning_rate": 7.255776326994039e-05, "loss": 0.005590567737817765, "step": 96720 }, { "epoch": 27.456713028668748, "grad_norm": 1.1263902187347412, "learning_rate": 7.255492478001703e-05, "loss": 0.007985883951187133, "step": 96730 }, { "epoch": 27.459551518592107, "grad_norm": 6.598442554473877, "learning_rate": 7.255208629009367e-05, "loss": 0.00346999317407608, "step": 96740 }, { "epoch": 27.46239000851547, "grad_norm": 0.006668996997177601, "learning_rate": 7.254924780017032e-05, "loss": 0.003326469287276268, "step": 96750 }, { "epoch": 27.46522849843883, "grad_norm": 0.38971060514450073, "learning_rate": 7.254640931024696e-05, "loss": 0.0037871651351451875, "step": 96760 }, { "epoch": 27.468066988362192, "grad_norm": 0.08523242175579071, "learning_rate": 7.25435708203236e-05, "loss": 0.00449700653553009, "step": 96770 }, { "epoch": 27.47090547828555, "grad_norm": 0.12236186861991882, "learning_rate": 7.254073233040023e-05, "loss": 0.003476206213235855, "step": 96780 }, { "epoch": 27.473743968208915, "grad_norm": 0.03526316210627556, "learning_rate": 7.253789384047687e-05, "loss": 0.0031897757202386857, "step": 96790 }, { "epoch": 27.476582458132274, "grad_norm": 0.2274373173713684, "learning_rate": 7.253505535055351e-05, "loss": 0.0014999045059084892, "step": 96800 }, { "epoch": 27.479420948055633, "grad_norm": 3.196608066558838, "learning_rate": 7.253221686063015e-05, "loss": 0.002839774452149868, "step": 96810 }, { "epoch": 27.482259437978996, "grad_norm": 0.18053150177001953, "learning_rate": 7.252937837070679e-05, "loss": 0.0017186239361763, "step": 96820 }, { "epoch": 27.485097927902356, "grad_norm": 0.6801695227622986, "learning_rate": 7.252653988078343e-05, "loss": 0.0013903185725212097, "step": 96830 }, { "epoch": 27.48793641782572, "grad_norm": 0.42294448614120483, "learning_rate": 7.252370139086006e-05, "loss": 0.005723740160465241, "step": 96840 }, { "epoch": 27.490774907749078, "grad_norm": 3.020655632019043, "learning_rate": 7.25208629009367e-05, "loss": 0.007986024767160416, "step": 96850 }, { "epoch": 27.493613397672437, "grad_norm": 2.8338868618011475, "learning_rate": 7.251802441101334e-05, "loss": 0.006116124615073204, "step": 96860 }, { "epoch": 27.4964518875958, "grad_norm": 0.2951723337173462, "learning_rate": 7.251518592108999e-05, "loss": 0.010047144442796706, "step": 96870 }, { "epoch": 27.49929037751916, "grad_norm": 0.06595952063798904, "learning_rate": 7.251234743116663e-05, "loss": 0.011087631434202194, "step": 96880 }, { "epoch": 27.502128867442522, "grad_norm": 10.771748542785645, "learning_rate": 7.250950894124327e-05, "loss": 0.009040166437625886, "step": 96890 }, { "epoch": 27.50496735736588, "grad_norm": 0.8544277548789978, "learning_rate": 7.25066704513199e-05, "loss": 0.00322343111038208, "step": 96900 }, { "epoch": 27.50780584728924, "grad_norm": 0.10652811080217361, "learning_rate": 7.250383196139654e-05, "loss": 0.001299869827926159, "step": 96910 }, { "epoch": 27.510644337212604, "grad_norm": 2.8159871101379395, "learning_rate": 7.250099347147318e-05, "loss": 0.007146134227514267, "step": 96920 }, { "epoch": 27.513482827135963, "grad_norm": 0.028420766815543175, "learning_rate": 7.249815498154982e-05, "loss": 0.0010560141876339913, "step": 96930 }, { "epoch": 27.516321317059326, "grad_norm": 0.08361313492059708, "learning_rate": 7.249531649162645e-05, "loss": 0.011039716005325318, "step": 96940 }, { "epoch": 27.519159806982685, "grad_norm": 1.4300341606140137, "learning_rate": 7.24924780017031e-05, "loss": 0.010797150433063507, "step": 96950 }, { "epoch": 27.521998296906045, "grad_norm": 10.108821868896484, "learning_rate": 7.248963951177974e-05, "loss": 0.007318485528230667, "step": 96960 }, { "epoch": 27.524836786829407, "grad_norm": 6.83403205871582, "learning_rate": 7.248680102185637e-05, "loss": 0.005521109700202942, "step": 96970 }, { "epoch": 27.527675276752767, "grad_norm": 0.4653545618057251, "learning_rate": 7.248396253193301e-05, "loss": 0.010871192812919617, "step": 96980 }, { "epoch": 27.53051376667613, "grad_norm": 0.6578171253204346, "learning_rate": 7.248112404200965e-05, "loss": 0.004691153019666672, "step": 96990 }, { "epoch": 27.53335225659949, "grad_norm": 0.3413366973400116, "learning_rate": 7.247828555208628e-05, "loss": 0.003320831432938576, "step": 97000 }, { "epoch": 27.53335225659949, "eval_accuracy": 0.9782539581611241, "eval_loss": 0.08192121982574463, "eval_runtime": 38.5956, "eval_samples_per_second": 407.482, "eval_steps_per_second": 6.374, "step": 97000 }, { "epoch": 27.53619074652285, "grad_norm": 0.192887544631958, "learning_rate": 7.247544706216294e-05, "loss": 0.018705017864704132, "step": 97010 }, { "epoch": 27.53902923644621, "grad_norm": 0.010816486552357674, "learning_rate": 7.247260857223958e-05, "loss": 0.003991532698273658, "step": 97020 }, { "epoch": 27.54186772636957, "grad_norm": 0.37644731998443604, "learning_rate": 7.24697700823162e-05, "loss": 0.0043860659003257755, "step": 97030 }, { "epoch": 27.544706216292933, "grad_norm": 0.09121926128864288, "learning_rate": 7.246693159239285e-05, "loss": 0.009319902956485748, "step": 97040 }, { "epoch": 27.547544706216293, "grad_norm": 0.05166967585682869, "learning_rate": 7.246409310246949e-05, "loss": 0.008026671409606934, "step": 97050 }, { "epoch": 27.550383196139652, "grad_norm": 0.40472450852394104, "learning_rate": 7.246125461254613e-05, "loss": 0.001774546131491661, "step": 97060 }, { "epoch": 27.553221686063015, "grad_norm": 1.489362120628357, "learning_rate": 7.245841612262276e-05, "loss": 0.03237197995185852, "step": 97070 }, { "epoch": 27.556060175986374, "grad_norm": 0.22250038385391235, "learning_rate": 7.245557763269941e-05, "loss": 0.029957303404808046, "step": 97080 }, { "epoch": 27.558898665909737, "grad_norm": 0.20749512314796448, "learning_rate": 7.245273914277605e-05, "loss": 0.004442504793405533, "step": 97090 }, { "epoch": 27.561737155833097, "grad_norm": 3.133195638656616, "learning_rate": 7.244990065285268e-05, "loss": 0.005918603390455246, "step": 97100 }, { "epoch": 27.564575645756456, "grad_norm": 7.982100009918213, "learning_rate": 7.244706216292932e-05, "loss": 0.007559237629175186, "step": 97110 }, { "epoch": 27.56741413567982, "grad_norm": 2.5231308937072754, "learning_rate": 7.244422367300597e-05, "loss": 0.0068789184093475345, "step": 97120 }, { "epoch": 27.570252625603178, "grad_norm": 0.10240662842988968, "learning_rate": 7.24413851830826e-05, "loss": 0.006177797168493271, "step": 97130 }, { "epoch": 27.57309111552654, "grad_norm": 0.3517960011959076, "learning_rate": 7.243854669315925e-05, "loss": 0.02077002078294754, "step": 97140 }, { "epoch": 27.5759296054499, "grad_norm": 0.09200505912303925, "learning_rate": 7.243570820323589e-05, "loss": 0.019553470611572265, "step": 97150 }, { "epoch": 27.578768095373263, "grad_norm": 1.3464946746826172, "learning_rate": 7.243286971331252e-05, "loss": 0.013914544880390168, "step": 97160 }, { "epoch": 27.581606585296623, "grad_norm": 1.5558844804763794, "learning_rate": 7.243003122338916e-05, "loss": 0.020598196983337404, "step": 97170 }, { "epoch": 27.584445075219982, "grad_norm": 0.08941595256328583, "learning_rate": 7.24271927334658e-05, "loss": 0.0017735045403242111, "step": 97180 }, { "epoch": 27.587283565143345, "grad_norm": 0.02656121551990509, "learning_rate": 7.242435424354244e-05, "loss": 0.0027299780398607255, "step": 97190 }, { "epoch": 27.590122055066704, "grad_norm": 0.9752245545387268, "learning_rate": 7.242151575361907e-05, "loss": 0.004615262895822525, "step": 97200 }, { "epoch": 27.592960544990063, "grad_norm": 1.7427401542663574, "learning_rate": 7.241867726369572e-05, "loss": 0.008520900458097457, "step": 97210 }, { "epoch": 27.595799034913426, "grad_norm": 0.08357635140419006, "learning_rate": 7.241583877377237e-05, "loss": 0.010135090351104737, "step": 97220 }, { "epoch": 27.598637524836786, "grad_norm": 1.1994341611862183, "learning_rate": 7.2413000283849e-05, "loss": 0.0035308055579662324, "step": 97230 }, { "epoch": 27.60147601476015, "grad_norm": 12.45112419128418, "learning_rate": 7.241016179392563e-05, "loss": 0.013129359483718872, "step": 97240 }, { "epoch": 27.604314504683508, "grad_norm": 0.6853893399238586, "learning_rate": 7.240732330400228e-05, "loss": 0.005615994334220886, "step": 97250 }, { "epoch": 27.60715299460687, "grad_norm": 0.2407887727022171, "learning_rate": 7.24044848140789e-05, "loss": 0.0023573609068989754, "step": 97260 }, { "epoch": 27.60999148453023, "grad_norm": 1.4138224124908447, "learning_rate": 7.240164632415555e-05, "loss": 0.026178985834121704, "step": 97270 }, { "epoch": 27.61282997445359, "grad_norm": 0.8983426094055176, "learning_rate": 7.23988078342322e-05, "loss": 0.00884723588824272, "step": 97280 }, { "epoch": 27.615668464376952, "grad_norm": 0.1151418462395668, "learning_rate": 7.239596934430883e-05, "loss": 0.0104530431330204, "step": 97290 }, { "epoch": 27.61850695430031, "grad_norm": 0.1542503386735916, "learning_rate": 7.239313085438547e-05, "loss": 0.014122171700000763, "step": 97300 }, { "epoch": 27.621345444223675, "grad_norm": 0.10416947305202484, "learning_rate": 7.239029236446211e-05, "loss": 0.0013408999890089034, "step": 97310 }, { "epoch": 27.624183934147034, "grad_norm": 0.4515661299228668, "learning_rate": 7.238745387453875e-05, "loss": 0.017455309629440308, "step": 97320 }, { "epoch": 27.627022424070393, "grad_norm": 3.6904516220092773, "learning_rate": 7.238461538461538e-05, "loss": 0.0022825652733445166, "step": 97330 }, { "epoch": 27.629860913993756, "grad_norm": 4.691474914550781, "learning_rate": 7.238177689469204e-05, "loss": 0.007043027877807617, "step": 97340 }, { "epoch": 27.632699403917115, "grad_norm": 1.6902151107788086, "learning_rate": 7.2379222253761e-05, "loss": 0.0041501600295305256, "step": 97350 }, { "epoch": 27.63553789384048, "grad_norm": 0.12083960324525833, "learning_rate": 7.237638376383764e-05, "loss": 0.005424722284078598, "step": 97360 }, { "epoch": 27.638376383763838, "grad_norm": 1.368170976638794, "learning_rate": 7.237354527391428e-05, "loss": 0.0015940193086862564, "step": 97370 }, { "epoch": 27.641214873687197, "grad_norm": 4.427603244781494, "learning_rate": 7.237070678399091e-05, "loss": 0.004123768210411072, "step": 97380 }, { "epoch": 27.64405336361056, "grad_norm": 0.40506696701049805, "learning_rate": 7.236786829406757e-05, "loss": 0.006930405646562577, "step": 97390 }, { "epoch": 27.64689185353392, "grad_norm": 0.35580891370773315, "learning_rate": 7.236502980414421e-05, "loss": 0.0033085688948631285, "step": 97400 }, { "epoch": 27.649730343457282, "grad_norm": 0.0664389356970787, "learning_rate": 7.236219131422084e-05, "loss": 0.01076638475060463, "step": 97410 }, { "epoch": 27.65256883338064, "grad_norm": 0.9532930850982666, "learning_rate": 7.235935282429748e-05, "loss": 0.0034529320895671845, "step": 97420 }, { "epoch": 27.655407323304, "grad_norm": 1.2071279287338257, "learning_rate": 7.235651433437412e-05, "loss": 0.013354653120040893, "step": 97430 }, { "epoch": 27.658245813227364, "grad_norm": 0.148728609085083, "learning_rate": 7.235367584445075e-05, "loss": 0.011944805085659028, "step": 97440 }, { "epoch": 27.661084303150723, "grad_norm": 5.102467060089111, "learning_rate": 7.235083735452739e-05, "loss": 0.007969412207603454, "step": 97450 }, { "epoch": 27.663922793074086, "grad_norm": 8.773560523986816, "learning_rate": 7.234799886460404e-05, "loss": 0.014997152984142304, "step": 97460 }, { "epoch": 27.666761282997445, "grad_norm": 0.09603577107191086, "learning_rate": 7.234516037468067e-05, "loss": 0.01715960502624512, "step": 97470 }, { "epoch": 27.669599772920805, "grad_norm": 2.83699107170105, "learning_rate": 7.234232188475731e-05, "loss": 0.006029859185218811, "step": 97480 }, { "epoch": 27.672438262844167, "grad_norm": 5.555549621582031, "learning_rate": 7.233948339483395e-05, "loss": 0.008136823773384094, "step": 97490 }, { "epoch": 27.675276752767527, "grad_norm": 0.7954596281051636, "learning_rate": 7.23366449049106e-05, "loss": 0.0017490891739726066, "step": 97500 }, { "epoch": 27.675276752767527, "eval_accuracy": 0.9760284860431105, "eval_loss": 0.09283332526683807, "eval_runtime": 39.1723, "eval_samples_per_second": 401.483, "eval_steps_per_second": 6.28, "step": 97500 }, { "epoch": 27.67811524269089, "grad_norm": 1.892218828201294, "learning_rate": 7.233380641498722e-05, "loss": 0.01595041751861572, "step": 97510 }, { "epoch": 27.68095373261425, "grad_norm": 3.9127347469329834, "learning_rate": 7.233096792506388e-05, "loss": 0.0028047343716025353, "step": 97520 }, { "epoch": 27.68379222253761, "grad_norm": 5.196074485778809, "learning_rate": 7.232812943514052e-05, "loss": 0.011661006510257721, "step": 97530 }, { "epoch": 27.68663071246097, "grad_norm": 0.3320988118648529, "learning_rate": 7.232529094521715e-05, "loss": 0.00513499304652214, "step": 97540 }, { "epoch": 27.68946920238433, "grad_norm": 5.697993278503418, "learning_rate": 7.232245245529379e-05, "loss": 0.00231747142970562, "step": 97550 }, { "epoch": 27.692307692307693, "grad_norm": 0.07812555879354477, "learning_rate": 7.231961396537043e-05, "loss": 0.00493914783000946, "step": 97560 }, { "epoch": 27.695146182231053, "grad_norm": 1.1621365547180176, "learning_rate": 7.231677547544706e-05, "loss": 0.004425373673439026, "step": 97570 }, { "epoch": 27.697984672154412, "grad_norm": 0.3675200045108795, "learning_rate": 7.23139369855237e-05, "loss": 0.002232515811920166, "step": 97580 }, { "epoch": 27.700823162077775, "grad_norm": 0.07100116461515427, "learning_rate": 7.231109849560035e-05, "loss": 0.00609513521194458, "step": 97590 }, { "epoch": 27.703661652001134, "grad_norm": 4.451079845428467, "learning_rate": 7.230826000567698e-05, "loss": 0.021011435985565187, "step": 97600 }, { "epoch": 27.706500141924497, "grad_norm": 0.14940911531448364, "learning_rate": 7.230542151575362e-05, "loss": 0.008579467982053756, "step": 97610 }, { "epoch": 27.709338631847857, "grad_norm": 7.59970760345459, "learning_rate": 7.230258302583026e-05, "loss": 0.015054668486118316, "step": 97620 }, { "epoch": 27.71217712177122, "grad_norm": 1.2204277515411377, "learning_rate": 7.22997445359069e-05, "loss": 0.003108901157975197, "step": 97630 }, { "epoch": 27.71501561169458, "grad_norm": 0.25696390867233276, "learning_rate": 7.229690604598353e-05, "loss": 0.01889011263847351, "step": 97640 }, { "epoch": 27.717854101617938, "grad_norm": 0.0712360143661499, "learning_rate": 7.229406755606017e-05, "loss": 0.002451682835817337, "step": 97650 }, { "epoch": 27.7206925915413, "grad_norm": 0.6327062845230103, "learning_rate": 7.229122906613682e-05, "loss": 0.026120179891586305, "step": 97660 }, { "epoch": 27.72353108146466, "grad_norm": 3.274928331375122, "learning_rate": 7.228839057621346e-05, "loss": 0.004114814847707748, "step": 97670 }, { "epoch": 27.726369571388023, "grad_norm": 0.5407480001449585, "learning_rate": 7.22855520862901e-05, "loss": 0.02035228759050369, "step": 97680 }, { "epoch": 27.729208061311382, "grad_norm": 10.300958633422852, "learning_rate": 7.228271359636674e-05, "loss": 0.01983426809310913, "step": 97690 }, { "epoch": 27.732046551234742, "grad_norm": 0.36424580216407776, "learning_rate": 7.227987510644337e-05, "loss": 0.008119792491197587, "step": 97700 }, { "epoch": 27.734885041158105, "grad_norm": 0.12770092487335205, "learning_rate": 7.227703661652001e-05, "loss": 0.002463865652680397, "step": 97710 }, { "epoch": 27.737723531081464, "grad_norm": 0.2621767520904541, "learning_rate": 7.227419812659666e-05, "loss": 0.002421605214476585, "step": 97720 }, { "epoch": 27.740562021004827, "grad_norm": 0.21816004812717438, "learning_rate": 7.227135963667329e-05, "loss": 0.002832166664302349, "step": 97730 }, { "epoch": 27.743400510928186, "grad_norm": 0.8486698269844055, "learning_rate": 7.226852114674993e-05, "loss": 0.0037271767854690553, "step": 97740 }, { "epoch": 27.746239000851546, "grad_norm": 0.4028666615486145, "learning_rate": 7.226568265682658e-05, "loss": 0.013119104504585265, "step": 97750 }, { "epoch": 27.74907749077491, "grad_norm": 8.798704147338867, "learning_rate": 7.22628441669032e-05, "loss": 0.011658205091953278, "step": 97760 }, { "epoch": 27.751915980698268, "grad_norm": 0.19419363141059875, "learning_rate": 7.226000567697984e-05, "loss": 0.013714560866355896, "step": 97770 }, { "epoch": 27.75475447062163, "grad_norm": 3.210604667663574, "learning_rate": 7.225716718705649e-05, "loss": 0.00460277758538723, "step": 97780 }, { "epoch": 27.75759296054499, "grad_norm": 3.4467852115631104, "learning_rate": 7.225432869713313e-05, "loss": 0.002350696548819542, "step": 97790 }, { "epoch": 27.76043145046835, "grad_norm": 0.12413390725851059, "learning_rate": 7.225149020720977e-05, "loss": 0.02469281554222107, "step": 97800 }, { "epoch": 27.763269940391712, "grad_norm": 1.8002818822860718, "learning_rate": 7.224865171728641e-05, "loss": 0.012320424616336822, "step": 97810 }, { "epoch": 27.76610843031507, "grad_norm": 0.4590282440185547, "learning_rate": 7.224581322736305e-05, "loss": 0.00488179549574852, "step": 97820 }, { "epoch": 27.768946920238434, "grad_norm": 6.746788501739502, "learning_rate": 7.224297473743968e-05, "loss": 0.002985762991011143, "step": 97830 }, { "epoch": 27.771785410161794, "grad_norm": 0.6530354619026184, "learning_rate": 7.224013624751632e-05, "loss": 0.0054592698812484745, "step": 97840 }, { "epoch": 27.774623900085153, "grad_norm": 1.8635096549987793, "learning_rate": 7.223729775759296e-05, "loss": 0.0034407205879688264, "step": 97850 }, { "epoch": 27.777462390008516, "grad_norm": 1.4154552221298218, "learning_rate": 7.22344592676696e-05, "loss": 0.007369782030582428, "step": 97860 }, { "epoch": 27.780300879931875, "grad_norm": 0.4375300109386444, "learning_rate": 7.223162077774624e-05, "loss": 0.001288614794611931, "step": 97870 }, { "epoch": 27.78313936985524, "grad_norm": 2.110095977783203, "learning_rate": 7.222878228782289e-05, "loss": 0.008738376200199127, "step": 97880 }, { "epoch": 27.785977859778598, "grad_norm": 0.1568612903356552, "learning_rate": 7.222594379789951e-05, "loss": 0.001327976956963539, "step": 97890 }, { "epoch": 27.788816349701957, "grad_norm": 0.020799115300178528, "learning_rate": 7.222310530797616e-05, "loss": 0.0025373205542564394, "step": 97900 }, { "epoch": 27.79165483962532, "grad_norm": 0.8713732957839966, "learning_rate": 7.22202668180528e-05, "loss": 0.020708708465099333, "step": 97910 }, { "epoch": 27.79449332954868, "grad_norm": 0.9857670664787292, "learning_rate": 7.221742832812944e-05, "loss": 0.0101993165910244, "step": 97920 }, { "epoch": 27.797331819472042, "grad_norm": 1.0326110124588013, "learning_rate": 7.221458983820608e-05, "loss": 0.008144833892583848, "step": 97930 }, { "epoch": 27.8001703093954, "grad_norm": 3.2439379692077637, "learning_rate": 7.221175134828272e-05, "loss": 0.00883943885564804, "step": 97940 }, { "epoch": 27.80300879931876, "grad_norm": 4.263828277587891, "learning_rate": 7.220891285835936e-05, "loss": 0.003248819708824158, "step": 97950 }, { "epoch": 27.805847289242124, "grad_norm": 0.2934854030609131, "learning_rate": 7.220607436843599e-05, "loss": 0.025409579277038574, "step": 97960 }, { "epoch": 27.808685779165483, "grad_norm": 7.261455059051514, "learning_rate": 7.220323587851263e-05, "loss": 0.018280257284641267, "step": 97970 }, { "epoch": 27.811524269088846, "grad_norm": 1.8416060209274292, "learning_rate": 7.220039738858927e-05, "loss": 0.010193935036659241, "step": 97980 }, { "epoch": 27.814362759012205, "grad_norm": 0.9720316529273987, "learning_rate": 7.219755889866591e-05, "loss": 0.0020320884883403777, "step": 97990 }, { "epoch": 27.817201248935568, "grad_norm": 0.060075193643569946, "learning_rate": 7.219472040874256e-05, "loss": 0.004979802295565605, "step": 98000 }, { "epoch": 27.817201248935568, "eval_accuracy": 0.9774909391492338, "eval_loss": 0.08498906344175339, "eval_runtime": 44.9913, "eval_samples_per_second": 349.556, "eval_steps_per_second": 5.468, "step": 98000 }, { "epoch": 27.820039738858927, "grad_norm": 2.632995843887329, "learning_rate": 7.21918819188192e-05, "loss": 0.009663437306880952, "step": 98010 }, { "epoch": 27.822878228782287, "grad_norm": 3.3976099491119385, "learning_rate": 7.218904342889582e-05, "loss": 0.012192010134458541, "step": 98020 }, { "epoch": 27.82571671870565, "grad_norm": 3.77644944190979, "learning_rate": 7.218620493897247e-05, "loss": 0.023091286420822144, "step": 98030 }, { "epoch": 27.82855520862901, "grad_norm": 2.5411765575408936, "learning_rate": 7.218336644904911e-05, "loss": 0.01259341835975647, "step": 98040 }, { "epoch": 27.83139369855237, "grad_norm": 0.2684828042984009, "learning_rate": 7.218052795912575e-05, "loss": 0.009865278005599975, "step": 98050 }, { "epoch": 27.83423218847573, "grad_norm": 0.26925626397132874, "learning_rate": 7.217768946920239e-05, "loss": 0.004677130654454231, "step": 98060 }, { "epoch": 27.83707067839909, "grad_norm": 1.2511063814163208, "learning_rate": 7.217485097927903e-05, "loss": 0.003606342151761055, "step": 98070 }, { "epoch": 27.839909168322453, "grad_norm": 0.723735511302948, "learning_rate": 7.217201248935567e-05, "loss": 0.005466923862695694, "step": 98080 }, { "epoch": 27.842747658245813, "grad_norm": 6.25101375579834, "learning_rate": 7.21691739994323e-05, "loss": 0.010225855559110642, "step": 98090 }, { "epoch": 27.845586148169176, "grad_norm": 7.042778015136719, "learning_rate": 7.216633550950894e-05, "loss": 0.018143463134765624, "step": 98100 }, { "epoch": 27.848424638092535, "grad_norm": 3.2664709091186523, "learning_rate": 7.216349701958558e-05, "loss": 0.011576950550079346, "step": 98110 }, { "epoch": 27.851263128015894, "grad_norm": 1.805419921875, "learning_rate": 7.216065852966222e-05, "loss": 0.01934765875339508, "step": 98120 }, { "epoch": 27.854101617939257, "grad_norm": 3.199418067932129, "learning_rate": 7.215782003973887e-05, "loss": 0.02524043321609497, "step": 98130 }, { "epoch": 27.856940107862616, "grad_norm": 0.04511936381459236, "learning_rate": 7.215498154981551e-05, "loss": 0.007469873875379562, "step": 98140 }, { "epoch": 27.85977859778598, "grad_norm": 0.37526434659957886, "learning_rate": 7.215214305989214e-05, "loss": 0.0032871343195438386, "step": 98150 }, { "epoch": 27.86261708770934, "grad_norm": 3.103142499923706, "learning_rate": 7.214930456996878e-05, "loss": 0.006270253658294677, "step": 98160 }, { "epoch": 27.865455577632698, "grad_norm": 1.0277327299118042, "learning_rate": 7.214646608004542e-05, "loss": 0.008662254363298417, "step": 98170 }, { "epoch": 27.86829406755606, "grad_norm": 10.598877906799316, "learning_rate": 7.214362759012206e-05, "loss": 0.00756821408867836, "step": 98180 }, { "epoch": 27.87113255747942, "grad_norm": 2.7983434200286865, "learning_rate": 7.21407891001987e-05, "loss": 0.012965750694274903, "step": 98190 }, { "epoch": 27.873971047402783, "grad_norm": 0.1950972080230713, "learning_rate": 7.213795061027534e-05, "loss": 0.004463245719671249, "step": 98200 }, { "epoch": 27.876809537326142, "grad_norm": 0.14695411920547485, "learning_rate": 7.213511212035198e-05, "loss": 0.019544902443885803, "step": 98210 }, { "epoch": 27.8796480272495, "grad_norm": 0.69439297914505, "learning_rate": 7.213227363042861e-05, "loss": 0.0030153270810842512, "step": 98220 }, { "epoch": 27.882486517172865, "grad_norm": 0.8096264600753784, "learning_rate": 7.212943514050525e-05, "loss": 0.006162770837545395, "step": 98230 }, { "epoch": 27.885325007096224, "grad_norm": 0.010020723566412926, "learning_rate": 7.21265966505819e-05, "loss": 0.00940844863653183, "step": 98240 }, { "epoch": 27.888163497019587, "grad_norm": 0.28858253359794617, "learning_rate": 7.212375816065852e-05, "loss": 0.005982121452689171, "step": 98250 }, { "epoch": 27.891001986942946, "grad_norm": 5.12346887588501, "learning_rate": 7.212091967073518e-05, "loss": 0.0029640918597579, "step": 98260 }, { "epoch": 27.893840476866306, "grad_norm": 0.022798318415880203, "learning_rate": 7.211808118081182e-05, "loss": 0.0019254596903920175, "step": 98270 }, { "epoch": 27.89667896678967, "grad_norm": 0.045451994985342026, "learning_rate": 7.211524269088845e-05, "loss": 0.0029431164264678957, "step": 98280 }, { "epoch": 27.899517456713028, "grad_norm": 0.04231337085366249, "learning_rate": 7.211240420096509e-05, "loss": 0.0011370472609996795, "step": 98290 }, { "epoch": 27.90235594663639, "grad_norm": 0.5439503192901611, "learning_rate": 7.210956571104173e-05, "loss": 0.0036114633083343508, "step": 98300 }, { "epoch": 27.90519443655975, "grad_norm": 3.5129566192626953, "learning_rate": 7.210672722111837e-05, "loss": 0.004822839796543121, "step": 98310 }, { "epoch": 27.90803292648311, "grad_norm": 0.018869219347834587, "learning_rate": 7.210388873119501e-05, "loss": 0.006548144668340683, "step": 98320 }, { "epoch": 27.910871416406472, "grad_norm": 1.332864761352539, "learning_rate": 7.210105024127165e-05, "loss": 0.006155834347009659, "step": 98330 }, { "epoch": 27.91370990632983, "grad_norm": 0.020353183150291443, "learning_rate": 7.20982117513483e-05, "loss": 0.007226744294166565, "step": 98340 }, { "epoch": 27.916548396253194, "grad_norm": 3.8499748706817627, "learning_rate": 7.209537326142492e-05, "loss": 0.00741204023361206, "step": 98350 }, { "epoch": 27.919386886176554, "grad_norm": 0.2584708631038666, "learning_rate": 7.209253477150156e-05, "loss": 0.014169147610664368, "step": 98360 }, { "epoch": 27.922225376099917, "grad_norm": 0.20435307919979095, "learning_rate": 7.20896962815782e-05, "loss": 0.010263603925704957, "step": 98370 }, { "epoch": 27.925063866023276, "grad_norm": 0.08942757546901703, "learning_rate": 7.208685779165483e-05, "loss": 0.00311399195343256, "step": 98380 }, { "epoch": 27.927902355946635, "grad_norm": 2.1784043312072754, "learning_rate": 7.208401930173149e-05, "loss": 0.01601067781448364, "step": 98390 }, { "epoch": 27.930740845869998, "grad_norm": 0.2611997127532959, "learning_rate": 7.208118081180813e-05, "loss": 0.006405844539403916, "step": 98400 }, { "epoch": 27.933579335793358, "grad_norm": 0.12544728815555573, "learning_rate": 7.207834232188476e-05, "loss": 0.013924810290336608, "step": 98410 }, { "epoch": 27.93641782571672, "grad_norm": 8.564568519592285, "learning_rate": 7.20755038319614e-05, "loss": 0.012175031751394273, "step": 98420 }, { "epoch": 27.93925631564008, "grad_norm": 1.2398169040679932, "learning_rate": 7.207266534203804e-05, "loss": 0.026965823769569398, "step": 98430 }, { "epoch": 27.94209480556344, "grad_norm": 6.560933589935303, "learning_rate": 7.206982685211468e-05, "loss": 0.017092892527580263, "step": 98440 }, { "epoch": 27.944933295486802, "grad_norm": 0.9185156226158142, "learning_rate": 7.206698836219131e-05, "loss": 0.004038448631763458, "step": 98450 }, { "epoch": 27.94777178541016, "grad_norm": 0.1771397739648819, "learning_rate": 7.206414987226796e-05, "loss": 0.004645472764968872, "step": 98460 }, { "epoch": 27.950610275333524, "grad_norm": 0.5906181335449219, "learning_rate": 7.20613113823446e-05, "loss": 0.0035210974514484406, "step": 98470 }, { "epoch": 27.953448765256883, "grad_norm": 0.12714911997318268, "learning_rate": 7.205847289242123e-05, "loss": 0.004990947991609573, "step": 98480 }, { "epoch": 27.956287255180243, "grad_norm": 1.4492743015289307, "learning_rate": 7.205563440249787e-05, "loss": 0.0016443854197859763, "step": 98490 }, { "epoch": 27.959125745103606, "grad_norm": 0.06717529147863388, "learning_rate": 7.205279591257452e-05, "loss": 0.021219809353351594, "step": 98500 }, { "epoch": 27.959125745103606, "eval_accuracy": 0.977045844725631, "eval_loss": 0.08619817346334457, "eval_runtime": 33.4032, "eval_samples_per_second": 470.823, "eval_steps_per_second": 7.365, "step": 98500 }, { "epoch": 27.961964235026965, "grad_norm": 0.6067889332771301, "learning_rate": 7.204995742265114e-05, "loss": 0.010838344693183899, "step": 98510 }, { "epoch": 27.964802724950328, "grad_norm": 1.7893460988998413, "learning_rate": 7.20471189327278e-05, "loss": 0.005433059111237526, "step": 98520 }, { "epoch": 27.967641214873687, "grad_norm": 0.1730564534664154, "learning_rate": 7.204428044280444e-05, "loss": 0.011169853806495666, "step": 98530 }, { "epoch": 27.970479704797047, "grad_norm": 0.021796556189656258, "learning_rate": 7.204144195288107e-05, "loss": 0.008608918637037277, "step": 98540 }, { "epoch": 27.97331819472041, "grad_norm": 2.911735773086548, "learning_rate": 7.203860346295771e-05, "loss": 0.01040896624326706, "step": 98550 }, { "epoch": 27.97615668464377, "grad_norm": 0.7668499946594238, "learning_rate": 7.203576497303435e-05, "loss": 0.021011742949485778, "step": 98560 }, { "epoch": 27.97899517456713, "grad_norm": 1.976503610610962, "learning_rate": 7.203292648311099e-05, "loss": 0.006416148692369461, "step": 98570 }, { "epoch": 27.98183366449049, "grad_norm": 7.871675968170166, "learning_rate": 7.203008799318762e-05, "loss": 0.003411639481782913, "step": 98580 }, { "epoch": 27.98467215441385, "grad_norm": 0.16208553314208984, "learning_rate": 7.202724950326427e-05, "loss": 0.0009235871955752372, "step": 98590 }, { "epoch": 27.987510644337213, "grad_norm": 0.13714148104190826, "learning_rate": 7.20244110133409e-05, "loss": 0.005801773071289063, "step": 98600 }, { "epoch": 27.990349134260573, "grad_norm": 0.0644344687461853, "learning_rate": 7.202157252341754e-05, "loss": 0.006738351285457611, "step": 98610 }, { "epoch": 27.993187624183935, "grad_norm": 0.3682508170604706, "learning_rate": 7.201873403349419e-05, "loss": 0.003234684467315674, "step": 98620 }, { "epoch": 27.996026114107295, "grad_norm": 0.24948188662528992, "learning_rate": 7.201589554357083e-05, "loss": 0.005937628820538521, "step": 98630 }, { "epoch": 27.998864604030654, "grad_norm": 8.044355392456055, "learning_rate": 7.201305705364745e-05, "loss": 0.00800129771232605, "step": 98640 }, { "epoch": 28.001703093954017, "grad_norm": 0.5187531113624573, "learning_rate": 7.20102185637241e-05, "loss": 0.01480429470539093, "step": 98650 }, { "epoch": 28.004541583877376, "grad_norm": 3.4122753143310547, "learning_rate": 7.200738007380075e-05, "loss": 0.0016600027680397033, "step": 98660 }, { "epoch": 28.00738007380074, "grad_norm": 0.9488581418991089, "learning_rate": 7.200454158387738e-05, "loss": 0.010912813246250153, "step": 98670 }, { "epoch": 28.0102185637241, "grad_norm": 10.785249710083008, "learning_rate": 7.200170309395402e-05, "loss": 0.024662669003009795, "step": 98680 }, { "epoch": 28.013057053647458, "grad_norm": 0.3422929644584656, "learning_rate": 7.199886460403066e-05, "loss": 0.013471642136573791, "step": 98690 }, { "epoch": 28.01589554357082, "grad_norm": 15.171196937561035, "learning_rate": 7.199602611410729e-05, "loss": 0.013747020065784455, "step": 98700 }, { "epoch": 28.01873403349418, "grad_norm": 16.009130477905273, "learning_rate": 7.199318762418393e-05, "loss": 0.010975538194179535, "step": 98710 }, { "epoch": 28.021572523417543, "grad_norm": 7.093069076538086, "learning_rate": 7.199034913426059e-05, "loss": 0.011661844700574875, "step": 98720 }, { "epoch": 28.024411013340902, "grad_norm": 0.04478953406214714, "learning_rate": 7.198751064433721e-05, "loss": 0.005387580394744873, "step": 98730 }, { "epoch": 28.02724950326426, "grad_norm": 0.09838862717151642, "learning_rate": 7.198467215441385e-05, "loss": 0.0005203111097216607, "step": 98740 }, { "epoch": 28.030087993187625, "grad_norm": 0.24161095917224884, "learning_rate": 7.19818336644905e-05, "loss": 0.0050207749009132385, "step": 98750 }, { "epoch": 28.032926483110984, "grad_norm": 0.031230328604578972, "learning_rate": 7.197899517456714e-05, "loss": 0.0079730823636055, "step": 98760 }, { "epoch": 28.035764973034347, "grad_norm": 0.6476194262504578, "learning_rate": 7.197615668464376e-05, "loss": 0.006628254801034928, "step": 98770 }, { "epoch": 28.038603462957706, "grad_norm": 3.030296564102173, "learning_rate": 7.19733181947204e-05, "loss": 0.004305101186037064, "step": 98780 }, { "epoch": 28.04144195288107, "grad_norm": 3.8257689476013184, "learning_rate": 7.197047970479706e-05, "loss": 0.004554927349090576, "step": 98790 }, { "epoch": 28.04428044280443, "grad_norm": 7.122034549713135, "learning_rate": 7.196764121487369e-05, "loss": 0.007962174713611603, "step": 98800 }, { "epoch": 28.047118932727788, "grad_norm": 0.67912757396698, "learning_rate": 7.196480272495033e-05, "loss": 0.012740479409694671, "step": 98810 }, { "epoch": 28.04995742265115, "grad_norm": 1.0922307968139648, "learning_rate": 7.196196423502697e-05, "loss": 0.007108790427446365, "step": 98820 }, { "epoch": 28.05279591257451, "grad_norm": 0.020735491067171097, "learning_rate": 7.19591257451036e-05, "loss": 0.009994381666183471, "step": 98830 }, { "epoch": 28.055634402497873, "grad_norm": 2.780320882797241, "learning_rate": 7.195628725518024e-05, "loss": 0.008089569211006165, "step": 98840 }, { "epoch": 28.058472892421232, "grad_norm": 0.12046290189027786, "learning_rate": 7.19534487652569e-05, "loss": 0.00164228156208992, "step": 98850 }, { "epoch": 28.06131138234459, "grad_norm": 0.19140416383743286, "learning_rate": 7.195061027533352e-05, "loss": 0.00223433505743742, "step": 98860 }, { "epoch": 28.064149872267954, "grad_norm": 7.538446426391602, "learning_rate": 7.194777178541017e-05, "loss": 0.005436063557863235, "step": 98870 }, { "epoch": 28.066988362191314, "grad_norm": 1.3919719457626343, "learning_rate": 7.19449332954868e-05, "loss": 0.01349714696407318, "step": 98880 }, { "epoch": 28.069826852114677, "grad_norm": 1.4338464736938477, "learning_rate": 7.194209480556345e-05, "loss": 0.006491224467754364, "step": 98890 }, { "epoch": 28.072665342038036, "grad_norm": 0.2037169486284256, "learning_rate": 7.193925631564008e-05, "loss": 0.0027789339423179627, "step": 98900 }, { "epoch": 28.075503831961395, "grad_norm": 0.03480837121605873, "learning_rate": 7.193641782571672e-05, "loss": 0.010146568715572356, "step": 98910 }, { "epoch": 28.078342321884758, "grad_norm": 4.374433994293213, "learning_rate": 7.193357933579337e-05, "loss": 0.007920544594526291, "step": 98920 }, { "epoch": 28.081180811808117, "grad_norm": 0.1593402922153473, "learning_rate": 7.193074084587e-05, "loss": 0.0012274650856852532, "step": 98930 }, { "epoch": 28.08401930173148, "grad_norm": 0.49462637305259705, "learning_rate": 7.192790235594664e-05, "loss": 0.002676321752369404, "step": 98940 }, { "epoch": 28.08685779165484, "grad_norm": 2.3837268352508545, "learning_rate": 7.192506386602328e-05, "loss": 0.001686190441250801, "step": 98950 }, { "epoch": 28.0896962815782, "grad_norm": 1.5033870935440063, "learning_rate": 7.192222537609991e-05, "loss": 0.00299566388130188, "step": 98960 }, { "epoch": 28.092534771501562, "grad_norm": 0.32125648856163025, "learning_rate": 7.191938688617655e-05, "loss": 0.006602812558412552, "step": 98970 }, { "epoch": 28.09537326142492, "grad_norm": 0.005469473544508219, "learning_rate": 7.191654839625319e-05, "loss": 0.011294330656528472, "step": 98980 }, { "epoch": 28.098211751348284, "grad_norm": 0.07319587469100952, "learning_rate": 7.191370990632983e-05, "loss": 0.0150419682264328, "step": 98990 }, { "epoch": 28.101050241271643, "grad_norm": 0.09328707307577133, "learning_rate": 7.191087141640648e-05, "loss": 0.0014279305934906006, "step": 99000 }, { "epoch": 28.101050241271643, "eval_accuracy": 0.9792713168436447, "eval_loss": 0.07684072852134705, "eval_runtime": 37.3601, "eval_samples_per_second": 420.958, "eval_steps_per_second": 6.585, "step": 99000 }, { "epoch": 28.103888731195003, "grad_norm": 0.39824068546295166, "learning_rate": 7.190803292648312e-05, "loss": 0.002925644628703594, "step": 99010 }, { "epoch": 28.106727221118366, "grad_norm": 0.03778435289859772, "learning_rate": 7.190519443655976e-05, "loss": 0.0008029906079173088, "step": 99020 }, { "epoch": 28.109565711041725, "grad_norm": 1.2921497821807861, "learning_rate": 7.190235594663639e-05, "loss": 0.0027612267062067986, "step": 99030 }, { "epoch": 28.112404200965088, "grad_norm": 0.4664885103702545, "learning_rate": 7.189951745671303e-05, "loss": 0.006472586840391159, "step": 99040 }, { "epoch": 28.115242690888447, "grad_norm": 0.6399241089820862, "learning_rate": 7.189667896678968e-05, "loss": 0.0014554843306541442, "step": 99050 }, { "epoch": 28.118081180811807, "grad_norm": 0.07075900584459305, "learning_rate": 7.189384047686631e-05, "loss": 0.0020307209342718124, "step": 99060 }, { "epoch": 28.12091967073517, "grad_norm": 0.15147827565670013, "learning_rate": 7.189100198694295e-05, "loss": 0.009658052027225495, "step": 99070 }, { "epoch": 28.12375816065853, "grad_norm": 0.03647730126976967, "learning_rate": 7.18881634970196e-05, "loss": 0.00842028334736824, "step": 99080 }, { "epoch": 28.12659665058189, "grad_norm": 0.5445790886878967, "learning_rate": 7.188532500709622e-05, "loss": 0.00415266752243042, "step": 99090 }, { "epoch": 28.12943514050525, "grad_norm": 0.023248447105288506, "learning_rate": 7.188248651717286e-05, "loss": 0.00502505898475647, "step": 99100 }, { "epoch": 28.13227363042861, "grad_norm": 0.09311160445213318, "learning_rate": 7.18796480272495e-05, "loss": 0.0005749007686972618, "step": 99110 }, { "epoch": 28.135112120351973, "grad_norm": 0.0710529014468193, "learning_rate": 7.187680953732615e-05, "loss": 0.0023481691256165504, "step": 99120 }, { "epoch": 28.137950610275333, "grad_norm": 0.636303186416626, "learning_rate": 7.187397104740279e-05, "loss": 0.00744413286447525, "step": 99130 }, { "epoch": 28.140789100198695, "grad_norm": 5.658812999725342, "learning_rate": 7.187113255747943e-05, "loss": 0.004930984228849411, "step": 99140 }, { "epoch": 28.143627590122055, "grad_norm": 0.14712083339691162, "learning_rate": 7.186829406755607e-05, "loss": 0.013173434138298034, "step": 99150 }, { "epoch": 28.146466080045414, "grad_norm": 1.001529335975647, "learning_rate": 7.18654555776327e-05, "loss": 0.0016558574512600898, "step": 99160 }, { "epoch": 28.149304569968777, "grad_norm": 0.37034928798675537, "learning_rate": 7.186261708770934e-05, "loss": 0.0021742787212133408, "step": 99170 }, { "epoch": 28.152143059892136, "grad_norm": 0.19260162115097046, "learning_rate": 7.185977859778598e-05, "loss": 0.004271704703569412, "step": 99180 }, { "epoch": 28.1549815498155, "grad_norm": 0.2508613169193268, "learning_rate": 7.185694010786262e-05, "loss": 0.012393180280923843, "step": 99190 }, { "epoch": 28.15782003973886, "grad_norm": 1.5295356512069702, "learning_rate": 7.185410161793926e-05, "loss": 0.012424253672361375, "step": 99200 }, { "epoch": 28.16065852966222, "grad_norm": 0.090813547372818, "learning_rate": 7.18512631280159e-05, "loss": 0.0028349969536066054, "step": 99210 }, { "epoch": 28.16349701958558, "grad_norm": 10.514691352844238, "learning_rate": 7.184842463809253e-05, "loss": 0.004020857065916062, "step": 99220 }, { "epoch": 28.16633550950894, "grad_norm": 0.1018192395567894, "learning_rate": 7.184558614816917e-05, "loss": 0.009615316241979598, "step": 99230 }, { "epoch": 28.169173999432303, "grad_norm": 1.5458210706710815, "learning_rate": 7.184274765824581e-05, "loss": 0.005121297389268875, "step": 99240 }, { "epoch": 28.172012489355662, "grad_norm": 0.21362987160682678, "learning_rate": 7.183990916832246e-05, "loss": 0.002326224558055401, "step": 99250 }, { "epoch": 28.174850979279025, "grad_norm": 1.9402625560760498, "learning_rate": 7.18370706783991e-05, "loss": 0.004799947887659073, "step": 99260 }, { "epoch": 28.177689469202384, "grad_norm": 6.231479167938232, "learning_rate": 7.183423218847574e-05, "loss": 0.004884021729230881, "step": 99270 }, { "epoch": 28.180527959125744, "grad_norm": 1.3899283409118652, "learning_rate": 7.183139369855238e-05, "loss": 0.002759569324553013, "step": 99280 }, { "epoch": 28.183366449049107, "grad_norm": 0.20318672060966492, "learning_rate": 7.182855520862901e-05, "loss": 0.008805024623870849, "step": 99290 }, { "epoch": 28.186204938972466, "grad_norm": 9.899087905883789, "learning_rate": 7.182571671870565e-05, "loss": 0.006540262699127197, "step": 99300 }, { "epoch": 28.18904342889583, "grad_norm": 9.43331527709961, "learning_rate": 7.182287822878229e-05, "loss": 0.008572376519441604, "step": 99310 }, { "epoch": 28.19188191881919, "grad_norm": 9.40549087524414, "learning_rate": 7.182003973885893e-05, "loss": 0.030203789472579956, "step": 99320 }, { "epoch": 28.194720408742548, "grad_norm": 8.990379333496094, "learning_rate": 7.181720124893557e-05, "loss": 0.024065528810024262, "step": 99330 }, { "epoch": 28.19755889866591, "grad_norm": 9.05500602722168, "learning_rate": 7.181436275901221e-05, "loss": 0.006160599738359451, "step": 99340 }, { "epoch": 28.20039738858927, "grad_norm": 3.1120100021362305, "learning_rate": 7.181152426908884e-05, "loss": 0.016867542266845705, "step": 99350 }, { "epoch": 28.203235878512633, "grad_norm": 6.227585315704346, "learning_rate": 7.180868577916548e-05, "loss": 0.007275266945362091, "step": 99360 }, { "epoch": 28.206074368435992, "grad_norm": 1.4923298358917236, "learning_rate": 7.180584728924213e-05, "loss": 0.003335319459438324, "step": 99370 }, { "epoch": 28.20891285835935, "grad_norm": 0.31400442123413086, "learning_rate": 7.180300879931877e-05, "loss": 0.01115594208240509, "step": 99380 }, { "epoch": 28.211751348282714, "grad_norm": 0.18326985836029053, "learning_rate": 7.180017030939541e-05, "loss": 0.011911024153232575, "step": 99390 }, { "epoch": 28.214589838206074, "grad_norm": 0.10668357461690903, "learning_rate": 7.179733181947205e-05, "loss": 0.0016158845275640488, "step": 99400 }, { "epoch": 28.217428328129436, "grad_norm": 0.29853978753089905, "learning_rate": 7.179449332954869e-05, "loss": 0.0021814459934830664, "step": 99410 }, { "epoch": 28.220266818052796, "grad_norm": 6.552924633026123, "learning_rate": 7.179165483962532e-05, "loss": 0.0023689007386565207, "step": 99420 }, { "epoch": 28.223105307976155, "grad_norm": 1.244964599609375, "learning_rate": 7.178881634970196e-05, "loss": 0.0044053420424461365, "step": 99430 }, { "epoch": 28.225943797899518, "grad_norm": 2.794093608856201, "learning_rate": 7.17859778597786e-05, "loss": 0.00482596606016159, "step": 99440 }, { "epoch": 28.228782287822877, "grad_norm": 6.594037055969238, "learning_rate": 7.178313936985524e-05, "loss": 0.008460083603858947, "step": 99450 }, { "epoch": 28.23162077774624, "grad_norm": 2.4435548782348633, "learning_rate": 7.178030087993188e-05, "loss": 0.003919018805027008, "step": 99460 }, { "epoch": 28.2344592676696, "grad_norm": 13.864236831665039, "learning_rate": 7.177746239000853e-05, "loss": 0.031040668487548828, "step": 99470 }, { "epoch": 28.23729775759296, "grad_norm": 0.18977677822113037, "learning_rate": 7.177462390008515e-05, "loss": 0.0007792364805936814, "step": 99480 }, { "epoch": 28.240136247516322, "grad_norm": 15.359691619873047, "learning_rate": 7.17717854101618e-05, "loss": 0.006849560141563416, "step": 99490 }, { "epoch": 28.24297473743968, "grad_norm": 3.7404768466949463, "learning_rate": 7.176894692023844e-05, "loss": 0.008462579548358917, "step": 99500 }, { "epoch": 28.24297473743968, "eval_accuracy": 0.9760284860431105, "eval_loss": 0.09178148955106735, "eval_runtime": 33.3926, "eval_samples_per_second": 470.972, "eval_steps_per_second": 7.367, "step": 99500 }, { "epoch": 28.245813227363044, "grad_norm": 0.13487465679645538, "learning_rate": 7.176610843031508e-05, "loss": 0.009955111145973205, "step": 99510 }, { "epoch": 28.248651717286403, "grad_norm": 0.1532452255487442, "learning_rate": 7.176326994039172e-05, "loss": 0.0005965515971183777, "step": 99520 }, { "epoch": 28.251490207209763, "grad_norm": 0.02373301051557064, "learning_rate": 7.176043145046836e-05, "loss": 0.004626193642616272, "step": 99530 }, { "epoch": 28.254328697133126, "grad_norm": 0.02200991101562977, "learning_rate": 7.175759296054499e-05, "loss": 0.0011803390458226203, "step": 99540 }, { "epoch": 28.257167187056485, "grad_norm": 0.36990058422088623, "learning_rate": 7.175475447062163e-05, "loss": 0.01475212424993515, "step": 99550 }, { "epoch": 28.260005676979848, "grad_norm": 0.9962258338928223, "learning_rate": 7.175191598069827e-05, "loss": 0.004870931059122086, "step": 99560 }, { "epoch": 28.262844166903207, "grad_norm": 0.419647753238678, "learning_rate": 7.174907749077491e-05, "loss": 0.002248052321374416, "step": 99570 }, { "epoch": 28.26568265682657, "grad_norm": 6.45016622543335, "learning_rate": 7.174623900085154e-05, "loss": 0.003477710485458374, "step": 99580 }, { "epoch": 28.26852114674993, "grad_norm": 0.52093106508255, "learning_rate": 7.17434005109282e-05, "loss": 0.003936703503131867, "step": 99590 }, { "epoch": 28.27135963667329, "grad_norm": 0.26057907938957214, "learning_rate": 7.174056202100484e-05, "loss": 0.004101785272359848, "step": 99600 }, { "epoch": 28.27419812659665, "grad_norm": 2.4284772872924805, "learning_rate": 7.173772353108146e-05, "loss": 0.007277241349220276, "step": 99610 }, { "epoch": 28.27703661652001, "grad_norm": 0.023208873346447945, "learning_rate": 7.17348850411581e-05, "loss": 0.005625830218195915, "step": 99620 }, { "epoch": 28.279875106443374, "grad_norm": 0.02830580435693264, "learning_rate": 7.173204655123475e-05, "loss": 0.010438821464776992, "step": 99630 }, { "epoch": 28.282713596366733, "grad_norm": 5.906933784484863, "learning_rate": 7.172920806131137e-05, "loss": 0.007761420309543609, "step": 99640 }, { "epoch": 28.285552086290092, "grad_norm": 0.1420840620994568, "learning_rate": 7.172636957138803e-05, "loss": 0.007231409102678299, "step": 99650 }, { "epoch": 28.288390576213455, "grad_norm": 7.795370101928711, "learning_rate": 7.172353108146467e-05, "loss": 0.005458582192659378, "step": 99660 }, { "epoch": 28.291229066136815, "grad_norm": 0.07247145473957062, "learning_rate": 7.17206925915413e-05, "loss": 0.006552186608314514, "step": 99670 }, { "epoch": 28.294067556060178, "grad_norm": 0.16598844528198242, "learning_rate": 7.171785410161794e-05, "loss": 0.010784877836704254, "step": 99680 }, { "epoch": 28.296906045983537, "grad_norm": 1.4105430841445923, "learning_rate": 7.171501561169458e-05, "loss": 0.005340661108493805, "step": 99690 }, { "epoch": 28.299744535906896, "grad_norm": 0.09489722549915314, "learning_rate": 7.171217712177122e-05, "loss": 0.005371788516640663, "step": 99700 }, { "epoch": 28.30258302583026, "grad_norm": 0.1948104053735733, "learning_rate": 7.170933863184785e-05, "loss": 0.0010668443515896796, "step": 99710 }, { "epoch": 28.30542151575362, "grad_norm": 0.1192571297287941, "learning_rate": 7.17065001419245e-05, "loss": 0.0023729858919978144, "step": 99720 }, { "epoch": 28.30826000567698, "grad_norm": 0.07317768037319183, "learning_rate": 7.170366165200115e-05, "loss": 0.005571439862251282, "step": 99730 }, { "epoch": 28.31109849560034, "grad_norm": 0.22084389626979828, "learning_rate": 7.170082316207778e-05, "loss": 0.0026230869814753534, "step": 99740 }, { "epoch": 28.3139369855237, "grad_norm": 0.04947201535105705, "learning_rate": 7.169798467215442e-05, "loss": 0.00799390971660614, "step": 99750 }, { "epoch": 28.316775475447063, "grad_norm": 8.862832069396973, "learning_rate": 7.169514618223106e-05, "loss": 0.014869146049022675, "step": 99760 }, { "epoch": 28.319613965370422, "grad_norm": 0.07936211675405502, "learning_rate": 7.169230769230769e-05, "loss": 0.0047590769827365875, "step": 99770 }, { "epoch": 28.322452455293785, "grad_norm": 0.05143002048134804, "learning_rate": 7.168946920238433e-05, "loss": 0.006295879185199737, "step": 99780 }, { "epoch": 28.325290945217144, "grad_norm": 0.35683876276016235, "learning_rate": 7.168663071246098e-05, "loss": 0.0044185459613800045, "step": 99790 }, { "epoch": 28.328129435140504, "grad_norm": 0.5497774481773376, "learning_rate": 7.168379222253761e-05, "loss": 0.011412338167428971, "step": 99800 }, { "epoch": 28.330967925063867, "grad_norm": 10.286226272583008, "learning_rate": 7.168095373261425e-05, "loss": 0.015380994975566864, "step": 99810 }, { "epoch": 28.333806414987226, "grad_norm": 1.9151215553283691, "learning_rate": 7.167811524269089e-05, "loss": 0.0033800236880779265, "step": 99820 }, { "epoch": 28.33664490491059, "grad_norm": 0.41516485810279846, "learning_rate": 7.167527675276753e-05, "loss": 0.01686961352825165, "step": 99830 }, { "epoch": 28.339483394833948, "grad_norm": 0.25488799810409546, "learning_rate": 7.167243826284416e-05, "loss": 0.00321621298789978, "step": 99840 }, { "epoch": 28.342321884757308, "grad_norm": 0.007995839230716228, "learning_rate": 7.166959977292082e-05, "loss": 0.004924032837152481, "step": 99850 }, { "epoch": 28.34516037468067, "grad_norm": 0.03905520960688591, "learning_rate": 7.166676128299746e-05, "loss": 0.003580489009618759, "step": 99860 }, { "epoch": 28.34799886460403, "grad_norm": 0.5398147702217102, "learning_rate": 7.166392279307409e-05, "loss": 0.01339045912027359, "step": 99870 }, { "epoch": 28.350837354527393, "grad_norm": 5.063945770263672, "learning_rate": 7.166108430315073e-05, "loss": 0.00951528325676918, "step": 99880 }, { "epoch": 28.353675844450752, "grad_norm": 11.559226989746094, "learning_rate": 7.165824581322737e-05, "loss": 0.013809311389923095, "step": 99890 }, { "epoch": 28.35651433437411, "grad_norm": 8.667190551757812, "learning_rate": 7.1655407323304e-05, "loss": 0.004789919033646584, "step": 99900 }, { "epoch": 28.359352824297474, "grad_norm": 9.223078727722168, "learning_rate": 7.165256883338064e-05, "loss": 0.0076169267296791075, "step": 99910 }, { "epoch": 28.362191314220834, "grad_norm": 0.043675921857357025, "learning_rate": 7.164973034345729e-05, "loss": 0.002128135785460472, "step": 99920 }, { "epoch": 28.365029804144196, "grad_norm": 0.31551697850227356, "learning_rate": 7.164689185353392e-05, "loss": 0.011172695457935334, "step": 99930 }, { "epoch": 28.367868294067556, "grad_norm": 1.653792142868042, "learning_rate": 7.164405336361056e-05, "loss": 0.009764029830694198, "step": 99940 }, { "epoch": 28.37070678399092, "grad_norm": 11.963765144348145, "learning_rate": 7.16412148736872e-05, "loss": 0.012000133842229843, "step": 99950 }, { "epoch": 28.373545273914278, "grad_norm": 0.1300283968448639, "learning_rate": 7.163837638376384e-05, "loss": 0.003918551653623581, "step": 99960 }, { "epoch": 28.376383763837637, "grad_norm": 1.2535253763198853, "learning_rate": 7.163553789384047e-05, "loss": 0.0017103837803006173, "step": 99970 }, { "epoch": 28.379222253761, "grad_norm": 1.4552257061004639, "learning_rate": 7.163269940391711e-05, "loss": 0.009121895581483842, "step": 99980 }, { "epoch": 28.38206074368436, "grad_norm": 7.420195579528809, "learning_rate": 7.162986091399377e-05, "loss": 0.029114842414855957, "step": 99990 }, { "epoch": 28.384899233607722, "grad_norm": 8.191390991210938, "learning_rate": 7.16270224240704e-05, "loss": 0.0204701229929924, "step": 100000 }, { "epoch": 28.384899233607722, "eval_accuracy": 0.9767279201373434, "eval_loss": 0.08750419318675995, "eval_runtime": 34.5619, "eval_samples_per_second": 455.039, "eval_steps_per_second": 7.118, "step": 100000 }, { "epoch": 28.38773772353108, "grad_norm": 14.593439102172852, "learning_rate": 7.162418393414704e-05, "loss": 0.020326575636863707, "step": 100010 }, { "epoch": 28.39057621345444, "grad_norm": 0.23052601516246796, "learning_rate": 7.162134544422368e-05, "loss": 0.012265653163194657, "step": 100020 }, { "epoch": 28.393414703377804, "grad_norm": 1.81650972366333, "learning_rate": 7.161850695430031e-05, "loss": 0.0031732410192489625, "step": 100030 }, { "epoch": 28.396253193301163, "grad_norm": 0.6668066382408142, "learning_rate": 7.161566846437695e-05, "loss": 0.0132771298289299, "step": 100040 }, { "epoch": 28.399091683224526, "grad_norm": 0.06932319700717926, "learning_rate": 7.16128299744536e-05, "loss": 0.004440277069807053, "step": 100050 }, { "epoch": 28.401930173147885, "grad_norm": 0.12021110951900482, "learning_rate": 7.160999148453023e-05, "loss": 0.0033875487744808195, "step": 100060 }, { "epoch": 28.404768663071245, "grad_norm": 2.708115339279175, "learning_rate": 7.160715299460687e-05, "loss": 0.0038336038589477537, "step": 100070 }, { "epoch": 28.407607152994608, "grad_norm": 3.333279848098755, "learning_rate": 7.160431450468351e-05, "loss": 0.0035594575107097625, "step": 100080 }, { "epoch": 28.410445642917967, "grad_norm": 0.22882463037967682, "learning_rate": 7.160147601476016e-05, "loss": 0.004982605576515198, "step": 100090 }, { "epoch": 28.41328413284133, "grad_norm": 0.12016742676496506, "learning_rate": 7.159863752483678e-05, "loss": 0.016329528391361238, "step": 100100 }, { "epoch": 28.41612262276469, "grad_norm": 3.9379870891571045, "learning_rate": 7.159579903491342e-05, "loss": 0.012372253090143203, "step": 100110 }, { "epoch": 28.41896111268805, "grad_norm": 1.2523640394210815, "learning_rate": 7.15932443939824e-05, "loss": 0.021855442225933074, "step": 100120 }, { "epoch": 28.42179960261141, "grad_norm": 1.5252548456192017, "learning_rate": 7.159040590405905e-05, "loss": 0.005551298707723617, "step": 100130 }, { "epoch": 28.42463809253477, "grad_norm": 0.398727148771286, "learning_rate": 7.158756741413569e-05, "loss": 0.0009678959846496582, "step": 100140 }, { "epoch": 28.427476582458134, "grad_norm": 1.261574625968933, "learning_rate": 7.158472892421232e-05, "loss": 0.005854340270161629, "step": 100150 }, { "epoch": 28.430315072381493, "grad_norm": 0.027525775134563446, "learning_rate": 7.158189043428896e-05, "loss": 0.000919957458972931, "step": 100160 }, { "epoch": 28.433153562304852, "grad_norm": 3.272564649581909, "learning_rate": 7.157905194436561e-05, "loss": 0.004507201910018921, "step": 100170 }, { "epoch": 28.435992052228215, "grad_norm": 5.561164379119873, "learning_rate": 7.157621345444224e-05, "loss": 0.006631986051797867, "step": 100180 }, { "epoch": 28.438830542151575, "grad_norm": 1.151025652885437, "learning_rate": 7.157337496451888e-05, "loss": 0.00318082794547081, "step": 100190 }, { "epoch": 28.441669032074937, "grad_norm": 0.15758520364761353, "learning_rate": 7.157053647459552e-05, "loss": 0.004598491638898849, "step": 100200 }, { "epoch": 28.444507521998297, "grad_norm": 1.4913239479064941, "learning_rate": 7.156769798467215e-05, "loss": 0.003518460690975189, "step": 100210 }, { "epoch": 28.447346011921656, "grad_norm": 7.859156608581543, "learning_rate": 7.156485949474879e-05, "loss": 0.0036037910729646684, "step": 100220 }, { "epoch": 28.45018450184502, "grad_norm": 3.874732732772827, "learning_rate": 7.156202100482545e-05, "loss": 0.013685479760169983, "step": 100230 }, { "epoch": 28.45302299176838, "grad_norm": 0.09251154959201813, "learning_rate": 7.155918251490207e-05, "loss": 0.004049088060855866, "step": 100240 }, { "epoch": 28.45586148169174, "grad_norm": 9.476046562194824, "learning_rate": 7.155634402497872e-05, "loss": 0.00568506196141243, "step": 100250 }, { "epoch": 28.4586999716151, "grad_norm": 0.05267538130283356, "learning_rate": 7.155350553505536e-05, "loss": 0.012565702199935913, "step": 100260 }, { "epoch": 28.46153846153846, "grad_norm": 0.04005808383226395, "learning_rate": 7.1550667045132e-05, "loss": 0.017858725786209107, "step": 100270 }, { "epoch": 28.464376951461823, "grad_norm": 0.2781580686569214, "learning_rate": 7.154782855520863e-05, "loss": 0.009272876381874084, "step": 100280 }, { "epoch": 28.467215441385182, "grad_norm": 0.35082104802131653, "learning_rate": 7.154499006528527e-05, "loss": 0.007768505811691284, "step": 100290 }, { "epoch": 28.470053931308545, "grad_norm": 3.719534397125244, "learning_rate": 7.154215157536191e-05, "loss": 0.013902543485164643, "step": 100300 }, { "epoch": 28.472892421231904, "grad_norm": 0.29381391406059265, "learning_rate": 7.153931308543855e-05, "loss": 0.0033833958208560944, "step": 100310 }, { "epoch": 28.475730911155264, "grad_norm": 1.021438479423523, "learning_rate": 7.153647459551519e-05, "loss": 0.0031431034207344056, "step": 100320 }, { "epoch": 28.478569401078627, "grad_norm": 2.074313163757324, "learning_rate": 7.153363610559183e-05, "loss": 0.010175687819719314, "step": 100330 }, { "epoch": 28.481407891001986, "grad_norm": 0.9326604604721069, "learning_rate": 7.153079761566846e-05, "loss": 0.014598742127418518, "step": 100340 }, { "epoch": 28.48424638092535, "grad_norm": 0.4040287137031555, "learning_rate": 7.15279591257451e-05, "loss": 0.008074738085269928, "step": 100350 }, { "epoch": 28.487084870848708, "grad_norm": 8.11521053314209, "learning_rate": 7.152512063582174e-05, "loss": 0.012790821492671967, "step": 100360 }, { "epoch": 28.48992336077207, "grad_norm": 0.3473169207572937, "learning_rate": 7.152228214589838e-05, "loss": 0.011178337782621384, "step": 100370 }, { "epoch": 28.49276185069543, "grad_norm": 1.1768358945846558, "learning_rate": 7.151944365597503e-05, "loss": 0.0016227176412940026, "step": 100380 }, { "epoch": 28.49560034061879, "grad_norm": 13.817501068115234, "learning_rate": 7.151660516605167e-05, "loss": 0.01845378875732422, "step": 100390 }, { "epoch": 28.498438830542153, "grad_norm": 0.26393765211105347, "learning_rate": 7.151376667612831e-05, "loss": 0.008610354363918304, "step": 100400 }, { "epoch": 28.501277320465512, "grad_norm": 0.20385129749774933, "learning_rate": 7.151092818620494e-05, "loss": 0.0006546791642904282, "step": 100410 }, { "epoch": 28.504115810388875, "grad_norm": 0.18785835802555084, "learning_rate": 7.150808969628158e-05, "loss": 0.0009200561791658402, "step": 100420 }, { "epoch": 28.506954300312234, "grad_norm": 1.7692313194274902, "learning_rate": 7.150525120635822e-05, "loss": 0.003552760183811188, "step": 100430 }, { "epoch": 28.509792790235593, "grad_norm": 3.6804020404815674, "learning_rate": 7.150241271643486e-05, "loss": 0.00919075906276703, "step": 100440 }, { "epoch": 28.512631280158956, "grad_norm": 1.8511006832122803, "learning_rate": 7.14995742265115e-05, "loss": 0.004269715771079064, "step": 100450 }, { "epoch": 28.515469770082316, "grad_norm": 0.27966877818107605, "learning_rate": 7.149673573658814e-05, "loss": 0.014757883548736573, "step": 100460 }, { "epoch": 28.51830826000568, "grad_norm": 0.2319938838481903, "learning_rate": 7.149389724666477e-05, "loss": 0.0015794627368450164, "step": 100470 }, { "epoch": 28.521146749929038, "grad_norm": 0.03900507465004921, "learning_rate": 7.149105875674141e-05, "loss": 0.0033635832369327545, "step": 100480 }, { "epoch": 28.523985239852397, "grad_norm": 0.7777408361434937, "learning_rate": 7.148822026681805e-05, "loss": 0.0070543564856052395, "step": 100490 }, { "epoch": 28.52682372977576, "grad_norm": 0.01887352578341961, "learning_rate": 7.14853817768947e-05, "loss": 0.0005571307614445686, "step": 100500 }, { "epoch": 28.52682372977576, "eval_accuracy": 0.9776181089845488, "eval_loss": 0.08635498583316803, "eval_runtime": 33.1453, "eval_samples_per_second": 474.486, "eval_steps_per_second": 7.422, "step": 100500 }, { "epoch": 28.52966221969912, "grad_norm": 4.668610095977783, "learning_rate": 7.148254328697134e-05, "loss": 0.017178955674171447, "step": 100510 }, { "epoch": 28.532500709622482, "grad_norm": 3.3150954246520996, "learning_rate": 7.147970479704798e-05, "loss": 0.0019041813910007476, "step": 100520 }, { "epoch": 28.53533919954584, "grad_norm": 1.8489141464233398, "learning_rate": 7.14768663071246e-05, "loss": 0.004713127389550209, "step": 100530 }, { "epoch": 28.5381776894692, "grad_norm": 13.638215065002441, "learning_rate": 7.147402781720125e-05, "loss": 0.014160898327827454, "step": 100540 }, { "epoch": 28.541016179392564, "grad_norm": 10.353297233581543, "learning_rate": 7.147118932727789e-05, "loss": 0.021773865818977355, "step": 100550 }, { "epoch": 28.543854669315923, "grad_norm": 1.189612865447998, "learning_rate": 7.146835083735453e-05, "loss": 0.025927209854125978, "step": 100560 }, { "epoch": 28.546693159239286, "grad_norm": 5.545487403869629, "learning_rate": 7.146551234743117e-05, "loss": 0.006600335985422134, "step": 100570 }, { "epoch": 28.549531649162645, "grad_norm": 0.05802226811647415, "learning_rate": 7.146267385750781e-05, "loss": 0.013143467903137206, "step": 100580 }, { "epoch": 28.552370139086005, "grad_norm": 0.3603195548057556, "learning_rate": 7.145983536758445e-05, "loss": 0.0048817116767168045, "step": 100590 }, { "epoch": 28.555208629009368, "grad_norm": 3.4267940521240234, "learning_rate": 7.145699687766108e-05, "loss": 0.004318711161613464, "step": 100600 }, { "epoch": 28.558047118932727, "grad_norm": 3.738210916519165, "learning_rate": 7.145415838773772e-05, "loss": 0.02279566377401352, "step": 100610 }, { "epoch": 28.56088560885609, "grad_norm": 0.020009715110063553, "learning_rate": 7.145131989781436e-05, "loss": 0.008564041554927826, "step": 100620 }, { "epoch": 28.56372409877945, "grad_norm": 0.8230623006820679, "learning_rate": 7.1448481407891e-05, "loss": 0.004783297330141068, "step": 100630 }, { "epoch": 28.56656258870281, "grad_norm": 6.2263383865356445, "learning_rate": 7.144564291796765e-05, "loss": 0.002353375218808651, "step": 100640 }, { "epoch": 28.56940107862617, "grad_norm": 0.869212806224823, "learning_rate": 7.144280442804429e-05, "loss": 0.0012684499844908715, "step": 100650 }, { "epoch": 28.57223956854953, "grad_norm": 2.2359747886657715, "learning_rate": 7.143996593812092e-05, "loss": 0.007416322082281113, "step": 100660 }, { "epoch": 28.575078058472894, "grad_norm": 0.27182886004447937, "learning_rate": 7.143712744819756e-05, "loss": 0.011250807344913483, "step": 100670 }, { "epoch": 28.577916548396253, "grad_norm": 2.734229803085327, "learning_rate": 7.14342889582742e-05, "loss": 0.005554275214672088, "step": 100680 }, { "epoch": 28.580755038319612, "grad_norm": 1.4229520559310913, "learning_rate": 7.143145046835084e-05, "loss": 0.009299169480800628, "step": 100690 }, { "epoch": 28.583593528242975, "grad_norm": 0.8149111270904541, "learning_rate": 7.142861197842748e-05, "loss": 0.006900467723608017, "step": 100700 }, { "epoch": 28.586432018166335, "grad_norm": 1.1129982471466064, "learning_rate": 7.142577348850412e-05, "loss": 0.011939001083374024, "step": 100710 }, { "epoch": 28.589270508089697, "grad_norm": 21.05967140197754, "learning_rate": 7.142293499858077e-05, "loss": 0.017169708013534547, "step": 100720 }, { "epoch": 28.592108998013057, "grad_norm": 9.524868965148926, "learning_rate": 7.142009650865739e-05, "loss": 0.02077637016773224, "step": 100730 }, { "epoch": 28.594947487936416, "grad_norm": 2.299494504928589, "learning_rate": 7.141725801873403e-05, "loss": 0.009338860213756562, "step": 100740 }, { "epoch": 28.59778597785978, "grad_norm": 15.834598541259766, "learning_rate": 7.141441952881068e-05, "loss": 0.02941901683807373, "step": 100750 }, { "epoch": 28.60062446778314, "grad_norm": 12.181879997253418, "learning_rate": 7.141158103888732e-05, "loss": 0.007035313546657563, "step": 100760 }, { "epoch": 28.6034629577065, "grad_norm": 0.38105857372283936, "learning_rate": 7.140874254896396e-05, "loss": 0.006087318807840347, "step": 100770 }, { "epoch": 28.60630144762986, "grad_norm": 0.03279640153050423, "learning_rate": 7.14059040590406e-05, "loss": 0.014921718835830688, "step": 100780 }, { "epoch": 28.609139937553223, "grad_norm": 3.1654882431030273, "learning_rate": 7.140306556911723e-05, "loss": 0.004564492404460907, "step": 100790 }, { "epoch": 28.611978427476583, "grad_norm": 1.529170036315918, "learning_rate": 7.140022707919387e-05, "loss": 0.0015643257647752762, "step": 100800 }, { "epoch": 28.614816917399942, "grad_norm": 2.255664587020874, "learning_rate": 7.139738858927051e-05, "loss": 0.008463224768638611, "step": 100810 }, { "epoch": 28.617655407323305, "grad_norm": 0.2389989048242569, "learning_rate": 7.139455009934715e-05, "loss": 0.006184495612978935, "step": 100820 }, { "epoch": 28.620493897246664, "grad_norm": 1.5729719400405884, "learning_rate": 7.139171160942379e-05, "loss": 0.006584526598453521, "step": 100830 }, { "epoch": 28.623332387170027, "grad_norm": 0.10649455338716507, "learning_rate": 7.138887311950043e-05, "loss": 0.010198096930980682, "step": 100840 }, { "epoch": 28.626170877093386, "grad_norm": 0.13009221851825714, "learning_rate": 7.138603462957708e-05, "loss": 0.0042096726596355435, "step": 100850 }, { "epoch": 28.629009367016746, "grad_norm": 3.4456090927124023, "learning_rate": 7.13831961396537e-05, "loss": 0.022081714868545533, "step": 100860 }, { "epoch": 28.63184785694011, "grad_norm": 8.137109756469727, "learning_rate": 7.138035764973035e-05, "loss": 0.0030114490538835524, "step": 100870 }, { "epoch": 28.634686346863468, "grad_norm": 0.20731209218502045, "learning_rate": 7.137751915980699e-05, "loss": 0.004587481915950775, "step": 100880 }, { "epoch": 28.63752483678683, "grad_norm": 0.08005807548761368, "learning_rate": 7.137468066988361e-05, "loss": 0.002321994863450527, "step": 100890 }, { "epoch": 28.64036332671019, "grad_norm": 0.06978342682123184, "learning_rate": 7.137184217996027e-05, "loss": 0.018048979341983795, "step": 100900 }, { "epoch": 28.64320181663355, "grad_norm": 0.40133869647979736, "learning_rate": 7.136900369003691e-05, "loss": 0.008585403114557267, "step": 100910 }, { "epoch": 28.646040306556912, "grad_norm": 0.03316863253712654, "learning_rate": 7.136616520011354e-05, "loss": 0.015354153513908387, "step": 100920 }, { "epoch": 28.648878796480272, "grad_norm": 5.483931064605713, "learning_rate": 7.136332671019018e-05, "loss": 0.009842197597026824, "step": 100930 }, { "epoch": 28.651717286403635, "grad_norm": 1.8974372148513794, "learning_rate": 7.136048822026682e-05, "loss": 0.010685420781373977, "step": 100940 }, { "epoch": 28.654555776326994, "grad_norm": 2.2531514167785645, "learning_rate": 7.135764973034346e-05, "loss": 0.00501871407032013, "step": 100950 }, { "epoch": 28.657394266250353, "grad_norm": 2.1099069118499756, "learning_rate": 7.13548112404201e-05, "loss": 0.010700026154518127, "step": 100960 }, { "epoch": 28.660232756173716, "grad_norm": 4.375858783721924, "learning_rate": 7.135197275049675e-05, "loss": 0.03753417134284973, "step": 100970 }, { "epoch": 28.663071246097076, "grad_norm": 0.2647154927253723, "learning_rate": 7.134913426057339e-05, "loss": 0.004317381978034973, "step": 100980 }, { "epoch": 28.66590973602044, "grad_norm": 0.2429247945547104, "learning_rate": 7.134629577065001e-05, "loss": 0.00564587339758873, "step": 100990 }, { "epoch": 28.668748225943798, "grad_norm": 0.938694179058075, "learning_rate": 7.134345728072666e-05, "loss": 0.002023565024137497, "step": 101000 }, { "epoch": 28.668748225943798, "eval_accuracy": 0.9739937686780695, "eval_loss": 0.09527021646499634, "eval_runtime": 40.5939, "eval_samples_per_second": 387.423, "eval_steps_per_second": 6.06, "step": 101000 }, { "epoch": 28.671586715867157, "grad_norm": 0.053940776735544205, "learning_rate": 7.13406187908033e-05, "loss": 0.01459483802318573, "step": 101010 }, { "epoch": 28.67442520579052, "grad_norm": 1.5995012521743774, "learning_rate": 7.133778030087992e-05, "loss": 0.003683196008205414, "step": 101020 }, { "epoch": 28.67726369571388, "grad_norm": 8.050959587097168, "learning_rate": 7.133494181095658e-05, "loss": 0.005612199753522873, "step": 101030 }, { "epoch": 28.680102185637242, "grad_norm": 0.02898932248353958, "learning_rate": 7.133210332103322e-05, "loss": 0.003096621669828892, "step": 101040 }, { "epoch": 28.6829406755606, "grad_norm": 12.085624694824219, "learning_rate": 7.132926483110985e-05, "loss": 0.005219636112451553, "step": 101050 }, { "epoch": 28.68577916548396, "grad_norm": 7.324666500091553, "learning_rate": 7.132642634118649e-05, "loss": 0.009150078892707825, "step": 101060 }, { "epoch": 28.688617655407324, "grad_norm": 0.820233166217804, "learning_rate": 7.132358785126313e-05, "loss": 0.004469205439090729, "step": 101070 }, { "epoch": 28.691456145330683, "grad_norm": 0.22355899214744568, "learning_rate": 7.132074936133977e-05, "loss": 0.029906824231147766, "step": 101080 }, { "epoch": 28.694294635254046, "grad_norm": 1.2345755100250244, "learning_rate": 7.13179108714164e-05, "loss": 0.0017614329233765602, "step": 101090 }, { "epoch": 28.697133125177405, "grad_norm": 0.3571625053882599, "learning_rate": 7.131507238149306e-05, "loss": 0.007394891232252121, "step": 101100 }, { "epoch": 28.699971615100765, "grad_norm": 0.07242067903280258, "learning_rate": 7.13122338915697e-05, "loss": 0.005272742360830307, "step": 101110 }, { "epoch": 28.702810105024128, "grad_norm": 3.593526601791382, "learning_rate": 7.130939540164633e-05, "loss": 0.0022347653284668922, "step": 101120 }, { "epoch": 28.705648594947487, "grad_norm": 0.3810332715511322, "learning_rate": 7.130655691172297e-05, "loss": 0.011276833713054657, "step": 101130 }, { "epoch": 28.70848708487085, "grad_norm": 0.06652132421731949, "learning_rate": 7.130371842179961e-05, "loss": 0.0042766381055116655, "step": 101140 }, { "epoch": 28.71132557479421, "grad_norm": 6.280457019805908, "learning_rate": 7.130087993187624e-05, "loss": 0.020737612247467042, "step": 101150 }, { "epoch": 28.714164064717572, "grad_norm": 0.5311075448989868, "learning_rate": 7.129804144195289e-05, "loss": 0.0067108385264873505, "step": 101160 }, { "epoch": 28.71700255464093, "grad_norm": 7.407909393310547, "learning_rate": 7.129520295202953e-05, "loss": 0.014285434782505036, "step": 101170 }, { "epoch": 28.71984104456429, "grad_norm": 1.3686609268188477, "learning_rate": 7.129236446210616e-05, "loss": 0.003527410700917244, "step": 101180 }, { "epoch": 28.722679534487654, "grad_norm": 2.2411158084869385, "learning_rate": 7.12895259721828e-05, "loss": 0.004616087675094605, "step": 101190 }, { "epoch": 28.725518024411013, "grad_norm": 0.5638934969902039, "learning_rate": 7.128668748225944e-05, "loss": 0.0031273704022169112, "step": 101200 }, { "epoch": 28.728356514334376, "grad_norm": 0.37958937883377075, "learning_rate": 7.128384899233608e-05, "loss": 0.006182232499122619, "step": 101210 }, { "epoch": 28.731195004257735, "grad_norm": 0.12192791700363159, "learning_rate": 7.128101050241271e-05, "loss": 0.0044686682522296906, "step": 101220 }, { "epoch": 28.734033494181094, "grad_norm": 0.04999832436442375, "learning_rate": 7.127817201248937e-05, "loss": 0.006055596470832825, "step": 101230 }, { "epoch": 28.736871984104457, "grad_norm": 1.3990614414215088, "learning_rate": 7.1275333522566e-05, "loss": 0.004922275245189667, "step": 101240 }, { "epoch": 28.739710474027817, "grad_norm": 0.16286161541938782, "learning_rate": 7.127249503264264e-05, "loss": 0.016663452982902525, "step": 101250 }, { "epoch": 28.74254896395118, "grad_norm": 0.029895998537540436, "learning_rate": 7.126965654271928e-05, "loss": 0.012970861792564393, "step": 101260 }, { "epoch": 28.74538745387454, "grad_norm": 0.05637240782380104, "learning_rate": 7.126681805279592e-05, "loss": 0.0017910659313201903, "step": 101270 }, { "epoch": 28.748225943797898, "grad_norm": 12.613818168640137, "learning_rate": 7.126397956287255e-05, "loss": 0.011501072347164154, "step": 101280 }, { "epoch": 28.75106443372126, "grad_norm": 5.295426368713379, "learning_rate": 7.126114107294919e-05, "loss": 0.008532915264368057, "step": 101290 }, { "epoch": 28.75390292364462, "grad_norm": 0.4965521991252899, "learning_rate": 7.125830258302584e-05, "loss": 0.0032832369208335875, "step": 101300 }, { "epoch": 28.756741413567983, "grad_norm": 2.6785190105438232, "learning_rate": 7.125546409310247e-05, "loss": 0.005869368091225624, "step": 101310 }, { "epoch": 28.759579903491343, "grad_norm": 0.11178973317146301, "learning_rate": 7.125262560317911e-05, "loss": 0.00174157302826643, "step": 101320 }, { "epoch": 28.762418393414702, "grad_norm": 2.7437283992767334, "learning_rate": 7.124978711325575e-05, "loss": 0.029703661799430847, "step": 101330 }, { "epoch": 28.765256883338065, "grad_norm": 0.16215945780277252, "learning_rate": 7.124694862333238e-05, "loss": 0.008883269131183624, "step": 101340 }, { "epoch": 28.768095373261424, "grad_norm": 2.967451810836792, "learning_rate": 7.124411013340902e-05, "loss": 0.015254102647304535, "step": 101350 }, { "epoch": 28.770933863184787, "grad_norm": 7.989067554473877, "learning_rate": 7.124127164348568e-05, "loss": 0.005526739358901978, "step": 101360 }, { "epoch": 28.773772353108146, "grad_norm": 0.1572941094636917, "learning_rate": 7.12384331535623e-05, "loss": 0.005023849755525589, "step": 101370 }, { "epoch": 28.776610843031506, "grad_norm": 0.2801760733127594, "learning_rate": 7.123559466363895e-05, "loss": 0.01696062386035919, "step": 101380 }, { "epoch": 28.77944933295487, "grad_norm": 0.09212621301412582, "learning_rate": 7.123275617371559e-05, "loss": 0.0015644386410713195, "step": 101390 }, { "epoch": 28.782287822878228, "grad_norm": 1.8375394344329834, "learning_rate": 7.122991768379223e-05, "loss": 0.006651724874973297, "step": 101400 }, { "epoch": 28.78512631280159, "grad_norm": 1.1243854761123657, "learning_rate": 7.122707919386886e-05, "loss": 0.005872528627514839, "step": 101410 }, { "epoch": 28.78796480272495, "grad_norm": 0.05809410661458969, "learning_rate": 7.12242407039455e-05, "loss": 0.009146924316883086, "step": 101420 }, { "epoch": 28.79080329264831, "grad_norm": 5.073337078094482, "learning_rate": 7.122140221402215e-05, "loss": 0.024688911437988282, "step": 101430 }, { "epoch": 28.793641782571672, "grad_norm": 10.45237922668457, "learning_rate": 7.121856372409878e-05, "loss": 0.01293121576309204, "step": 101440 }, { "epoch": 28.79648027249503, "grad_norm": 1.5726439952850342, "learning_rate": 7.121572523417542e-05, "loss": 0.007134757936000824, "step": 101450 }, { "epoch": 28.799318762418395, "grad_norm": 10.721437454223633, "learning_rate": 7.121288674425206e-05, "loss": 0.009092658013105392, "step": 101460 }, { "epoch": 28.802157252341754, "grad_norm": 0.15472757816314697, "learning_rate": 7.121004825432869e-05, "loss": 0.007055079191923141, "step": 101470 }, { "epoch": 28.804995742265113, "grad_norm": 0.3221628963947296, "learning_rate": 7.120720976440533e-05, "loss": 0.002906768210232258, "step": 101480 }, { "epoch": 28.807834232188476, "grad_norm": 0.598436713218689, "learning_rate": 7.120437127448197e-05, "loss": 0.004531119391322136, "step": 101490 }, { "epoch": 28.810672722111835, "grad_norm": 5.1679887771606445, "learning_rate": 7.120153278455862e-05, "loss": 0.007263277471065521, "step": 101500 }, { "epoch": 28.810672722111835, "eval_accuracy": 0.9760920709607681, "eval_loss": 0.09429065883159637, "eval_runtime": 34.638, "eval_samples_per_second": 454.039, "eval_steps_per_second": 7.102, "step": 101500 }, { "epoch": 28.8135112120352, "grad_norm": 0.23314045369625092, "learning_rate": 7.119869429463526e-05, "loss": 0.010326100885868073, "step": 101510 }, { "epoch": 28.816349701958558, "grad_norm": 0.061449699103832245, "learning_rate": 7.11958558047119e-05, "loss": 0.0019689898937940596, "step": 101520 }, { "epoch": 28.81918819188192, "grad_norm": 0.17196224629878998, "learning_rate": 7.119301731478854e-05, "loss": 0.009500715136528014, "step": 101530 }, { "epoch": 28.82202668180528, "grad_norm": 1.9290777444839478, "learning_rate": 7.119017882486517e-05, "loss": 0.009543681144714355, "step": 101540 }, { "epoch": 28.82486517172864, "grad_norm": 0.12134458869695663, "learning_rate": 7.118734033494181e-05, "loss": 0.005692127719521522, "step": 101550 }, { "epoch": 28.827703661652002, "grad_norm": 6.74662446975708, "learning_rate": 7.118450184501846e-05, "loss": 0.016024252772331236, "step": 101560 }, { "epoch": 28.83054215157536, "grad_norm": 7.745357990264893, "learning_rate": 7.118166335509509e-05, "loss": 0.010141895711421966, "step": 101570 }, { "epoch": 28.833380641498724, "grad_norm": 10.06021499633789, "learning_rate": 7.117882486517173e-05, "loss": 0.006186030805110931, "step": 101580 }, { "epoch": 28.836219131422084, "grad_norm": 1.887127161026001, "learning_rate": 7.117598637524837e-05, "loss": 0.008414709568023681, "step": 101590 }, { "epoch": 28.839057621345443, "grad_norm": 0.013576538302004337, "learning_rate": 7.1173147885325e-05, "loss": 0.01042981892824173, "step": 101600 }, { "epoch": 28.841896111268806, "grad_norm": 1.1570019721984863, "learning_rate": 7.117030939540164e-05, "loss": 0.00882389023900032, "step": 101610 }, { "epoch": 28.844734601192165, "grad_norm": 0.10365495830774307, "learning_rate": 7.116747090547829e-05, "loss": 0.013228864967823028, "step": 101620 }, { "epoch": 28.847573091115528, "grad_norm": 0.17051784694194794, "learning_rate": 7.116463241555493e-05, "loss": 0.001926121674478054, "step": 101630 }, { "epoch": 28.850411581038887, "grad_norm": 0.06419861316680908, "learning_rate": 7.116179392563157e-05, "loss": 0.0028492823243141175, "step": 101640 }, { "epoch": 28.853250070962247, "grad_norm": 0.5821008086204529, "learning_rate": 7.115895543570821e-05, "loss": 0.0051035694777965546, "step": 101650 }, { "epoch": 28.85608856088561, "grad_norm": 0.03569401428103447, "learning_rate": 7.115611694578485e-05, "loss": 0.010847404599189758, "step": 101660 }, { "epoch": 28.85892705080897, "grad_norm": 5.821361541748047, "learning_rate": 7.115327845586148e-05, "loss": 0.0229514017701149, "step": 101670 }, { "epoch": 28.861765540732332, "grad_norm": 1.0860556364059448, "learning_rate": 7.115043996593812e-05, "loss": 0.009710386395454407, "step": 101680 }, { "epoch": 28.86460403065569, "grad_norm": 0.14940841495990753, "learning_rate": 7.114760147601478e-05, "loss": 0.016968542337417604, "step": 101690 }, { "epoch": 28.86744252057905, "grad_norm": 0.1667504608631134, "learning_rate": 7.11447629860914e-05, "loss": 0.0336951494216919, "step": 101700 }, { "epoch": 28.870281010502413, "grad_norm": 0.19103844463825226, "learning_rate": 7.114192449616804e-05, "loss": 0.00388486310839653, "step": 101710 }, { "epoch": 28.873119500425773, "grad_norm": 0.3235238492488861, "learning_rate": 7.113908600624469e-05, "loss": 0.03126699328422546, "step": 101720 }, { "epoch": 28.875957990349136, "grad_norm": 4.208121299743652, "learning_rate": 7.113624751632131e-05, "loss": 0.02488974332809448, "step": 101730 }, { "epoch": 28.878796480272495, "grad_norm": 0.3190845847129822, "learning_rate": 7.113340902639795e-05, "loss": 0.0074510321021080015, "step": 101740 }, { "epoch": 28.881634970195854, "grad_norm": 0.37923476099967957, "learning_rate": 7.11305705364746e-05, "loss": 0.004789967834949493, "step": 101750 }, { "epoch": 28.884473460119217, "grad_norm": 0.047557584941387177, "learning_rate": 7.112773204655124e-05, "loss": 0.012989778816699982, "step": 101760 }, { "epoch": 28.887311950042577, "grad_norm": 3.3514814376831055, "learning_rate": 7.112489355662788e-05, "loss": 0.006993254274129867, "step": 101770 }, { "epoch": 28.89015043996594, "grad_norm": 0.09681878238916397, "learning_rate": 7.112205506670452e-05, "loss": 0.005912154912948608, "step": 101780 }, { "epoch": 28.8929889298893, "grad_norm": 0.6057702302932739, "learning_rate": 7.111921657678116e-05, "loss": 0.006787490099668503, "step": 101790 }, { "epoch": 28.895827419812658, "grad_norm": 1.2969409227371216, "learning_rate": 7.111637808685779e-05, "loss": 0.010072321444749833, "step": 101800 }, { "epoch": 28.89866590973602, "grad_norm": 3.0071630477905273, "learning_rate": 7.111353959693443e-05, "loss": 0.036550205945968625, "step": 101810 }, { "epoch": 28.90150439965938, "grad_norm": 0.8719659447669983, "learning_rate": 7.111070110701107e-05, "loss": 0.004226415231823921, "step": 101820 }, { "epoch": 28.904342889582743, "grad_norm": 0.04191483557224274, "learning_rate": 7.110786261708771e-05, "loss": 0.017452168464660644, "step": 101830 }, { "epoch": 28.907181379506103, "grad_norm": 7.754847049713135, "learning_rate": 7.110502412716436e-05, "loss": 0.004167599976062775, "step": 101840 }, { "epoch": 28.910019869429462, "grad_norm": 0.01792372390627861, "learning_rate": 7.1102185637241e-05, "loss": 0.0042451489716768265, "step": 101850 }, { "epoch": 28.912858359352825, "grad_norm": 1.1712180376052856, "learning_rate": 7.109934714731762e-05, "loss": 0.007308217138051987, "step": 101860 }, { "epoch": 28.915696849276184, "grad_norm": 0.39991676807403564, "learning_rate": 7.109650865739427e-05, "loss": 0.002386322058737278, "step": 101870 }, { "epoch": 28.918535339199547, "grad_norm": 0.07559480518102646, "learning_rate": 7.109367016747091e-05, "loss": 0.030326339602470397, "step": 101880 }, { "epoch": 28.921373829122906, "grad_norm": 0.40767356753349304, "learning_rate": 7.109083167754755e-05, "loss": 0.0014854943379759788, "step": 101890 }, { "epoch": 28.92421231904627, "grad_norm": 0.0073531088419258595, "learning_rate": 7.108799318762419e-05, "loss": 0.006440618634223938, "step": 101900 }, { "epoch": 28.92705080896963, "grad_norm": 0.33045682311058044, "learning_rate": 7.108515469770083e-05, "loss": 0.0024454841390252114, "step": 101910 }, { "epoch": 28.929889298892988, "grad_norm": 5.0154337882995605, "learning_rate": 7.108231620777747e-05, "loss": 0.005689529329538345, "step": 101920 }, { "epoch": 28.93272778881635, "grad_norm": 0.29993438720703125, "learning_rate": 7.10794777178541e-05, "loss": 0.005230653285980225, "step": 101930 }, { "epoch": 28.93556627873971, "grad_norm": 1.5636703968048096, "learning_rate": 7.107663922793074e-05, "loss": 0.008893543481826782, "step": 101940 }, { "epoch": 28.93840476866307, "grad_norm": 7.839364528656006, "learning_rate": 7.107380073800738e-05, "loss": 0.005069836229085922, "step": 101950 }, { "epoch": 28.941243258586432, "grad_norm": 0.014136923477053642, "learning_rate": 7.107096224808402e-05, "loss": 0.003139740973711014, "step": 101960 }, { "epoch": 28.94408174850979, "grad_norm": 3.5689380168914795, "learning_rate": 7.106812375816067e-05, "loss": 0.004564220085740089, "step": 101970 }, { "epoch": 28.946920238433155, "grad_norm": 0.17039412260055542, "learning_rate": 7.106528526823731e-05, "loss": 0.004075460880994797, "step": 101980 }, { "epoch": 28.949758728356514, "grad_norm": 6.96528434753418, "learning_rate": 7.106244677831394e-05, "loss": 0.00682251900434494, "step": 101990 }, { "epoch": 28.952597218279877, "grad_norm": 0.0328461118042469, "learning_rate": 7.105960828839058e-05, "loss": 0.0061874911189079285, "step": 102000 }, { "epoch": 28.952597218279877, "eval_accuracy": 0.9792077319259872, "eval_loss": 0.0766717940568924, "eval_runtime": 34.8692, "eval_samples_per_second": 451.029, "eval_steps_per_second": 7.055, "step": 102000 }, { "epoch": 28.955435708203236, "grad_norm": 0.08056804537773132, "learning_rate": 7.105676979846722e-05, "loss": 0.0019787248224020005, "step": 102010 }, { "epoch": 28.958274198126595, "grad_norm": 0.04963751509785652, "learning_rate": 7.105393130854386e-05, "loss": 0.006129699945449829, "step": 102020 }, { "epoch": 28.96111268804996, "grad_norm": 0.050748560577631, "learning_rate": 7.10510928186205e-05, "loss": 0.007289968430995941, "step": 102030 }, { "epoch": 28.963951177973318, "grad_norm": 0.6646664142608643, "learning_rate": 7.104825432869714e-05, "loss": 0.016764166951179504, "step": 102040 }, { "epoch": 28.96678966789668, "grad_norm": 1.0716813802719116, "learning_rate": 7.104541583877378e-05, "loss": 0.005003368854522705, "step": 102050 }, { "epoch": 28.96962815782004, "grad_norm": 5.546133995056152, "learning_rate": 7.104257734885041e-05, "loss": 0.004633001983165741, "step": 102060 }, { "epoch": 28.9724666477434, "grad_norm": 0.11671075969934464, "learning_rate": 7.103973885892705e-05, "loss": 0.008197021484375, "step": 102070 }, { "epoch": 28.975305137666762, "grad_norm": 0.5917112231254578, "learning_rate": 7.10369003690037e-05, "loss": 0.002373579703271389, "step": 102080 }, { "epoch": 28.97814362759012, "grad_norm": 2.5105490684509277, "learning_rate": 7.103406187908034e-05, "loss": 0.005134443193674088, "step": 102090 }, { "epoch": 28.980982117513484, "grad_norm": 0.04253967106342316, "learning_rate": 7.103122338915698e-05, "loss": 0.014869335293769836, "step": 102100 }, { "epoch": 28.983820607436844, "grad_norm": 2.7728536128997803, "learning_rate": 7.102838489923362e-05, "loss": 0.0013429773971438408, "step": 102110 }, { "epoch": 28.986659097360203, "grad_norm": 0.621212899684906, "learning_rate": 7.102554640931025e-05, "loss": 0.0016776839271187781, "step": 102120 }, { "epoch": 28.989497587283566, "grad_norm": 0.7426930069923401, "learning_rate": 7.102270791938689e-05, "loss": 0.0037782356142997743, "step": 102130 }, { "epoch": 28.992336077206925, "grad_norm": 0.32435911893844604, "learning_rate": 7.101986942946353e-05, "loss": 0.028221020102500917, "step": 102140 }, { "epoch": 28.995174567130288, "grad_norm": 13.37521743774414, "learning_rate": 7.101703093954017e-05, "loss": 0.009029510617256164, "step": 102150 }, { "epoch": 28.998013057053647, "grad_norm": 3.2148728370666504, "learning_rate": 7.101419244961681e-05, "loss": 0.006379666179418564, "step": 102160 }, { "epoch": 29.000851546977007, "grad_norm": 0.15644866228103638, "learning_rate": 7.101135395969345e-05, "loss": 0.005445703864097595, "step": 102170 }, { "epoch": 29.00369003690037, "grad_norm": 0.5927413105964661, "learning_rate": 7.100851546977008e-05, "loss": 0.006464226543903351, "step": 102180 }, { "epoch": 29.00652852682373, "grad_norm": 0.07261708378791809, "learning_rate": 7.100567697984672e-05, "loss": 0.007652934640645981, "step": 102190 }, { "epoch": 29.009367016747092, "grad_norm": 1.029008388519287, "learning_rate": 7.100283848992336e-05, "loss": 0.006443069875240326, "step": 102200 }, { "epoch": 29.01220550667045, "grad_norm": 0.027061859145760536, "learning_rate": 7.1e-05, "loss": 0.013810694217681885, "step": 102210 }, { "epoch": 29.01504399659381, "grad_norm": 0.015477757900953293, "learning_rate": 7.099716151007663e-05, "loss": 0.006912966817617416, "step": 102220 }, { "epoch": 29.017882486517173, "grad_norm": 4.870151519775391, "learning_rate": 7.099432302015329e-05, "loss": 0.0026150880381464957, "step": 102230 }, { "epoch": 29.020720976440533, "grad_norm": 0.14951063692569733, "learning_rate": 7.099148453022993e-05, "loss": 0.006289609521627426, "step": 102240 }, { "epoch": 29.023559466363896, "grad_norm": 1.134745717048645, "learning_rate": 7.098864604030656e-05, "loss": 0.007550932466983795, "step": 102250 }, { "epoch": 29.026397956287255, "grad_norm": 0.020302800461649895, "learning_rate": 7.09858075503832e-05, "loss": 0.00046539362519979475, "step": 102260 }, { "epoch": 29.029236446210614, "grad_norm": 0.11173997819423676, "learning_rate": 7.098296906045984e-05, "loss": 0.000894884206354618, "step": 102270 }, { "epoch": 29.032074936133977, "grad_norm": 0.335809588432312, "learning_rate": 7.098013057053647e-05, "loss": 0.0006922150030732154, "step": 102280 }, { "epoch": 29.034913426057336, "grad_norm": 5.482174873352051, "learning_rate": 7.097729208061312e-05, "loss": 0.003874439001083374, "step": 102290 }, { "epoch": 29.0377519159807, "grad_norm": 6.691117763519287, "learning_rate": 7.097445359068976e-05, "loss": 0.006083516776561737, "step": 102300 }, { "epoch": 29.04059040590406, "grad_norm": 0.03845866769552231, "learning_rate": 7.097161510076639e-05, "loss": 0.00621531493961811, "step": 102310 }, { "epoch": 29.043428895827418, "grad_norm": 0.05516831949353218, "learning_rate": 7.096877661084303e-05, "loss": 0.0035025861114263536, "step": 102320 }, { "epoch": 29.04626738575078, "grad_norm": 5.9516777992248535, "learning_rate": 7.096593812091967e-05, "loss": 0.01230928897857666, "step": 102330 }, { "epoch": 29.04910587567414, "grad_norm": 14.813668251037598, "learning_rate": 7.096309963099632e-05, "loss": 0.018799395859241487, "step": 102340 }, { "epoch": 29.051944365597503, "grad_norm": 0.2695561349391937, "learning_rate": 7.096026114107294e-05, "loss": 0.00420982539653778, "step": 102350 }, { "epoch": 29.054782855520862, "grad_norm": 4.467052936553955, "learning_rate": 7.09574226511496e-05, "loss": 0.01636870950460434, "step": 102360 }, { "epoch": 29.057621345444225, "grad_norm": 0.3870987296104431, "learning_rate": 7.095458416122624e-05, "loss": 0.003549434244632721, "step": 102370 }, { "epoch": 29.060459835367585, "grad_norm": 0.46010902523994446, "learning_rate": 7.095174567130287e-05, "loss": 0.004823384433984756, "step": 102380 }, { "epoch": 29.063298325290944, "grad_norm": 5.2582268714904785, "learning_rate": 7.094890718137951e-05, "loss": 0.006130936741828919, "step": 102390 }, { "epoch": 29.066136815214307, "grad_norm": 0.07010307163000107, "learning_rate": 7.094606869145615e-05, "loss": 0.004003661498427391, "step": 102400 }, { "epoch": 29.068975305137666, "grad_norm": 11.031844139099121, "learning_rate": 7.094323020153278e-05, "loss": 0.01002945601940155, "step": 102410 }, { "epoch": 29.07181379506103, "grad_norm": 12.924904823303223, "learning_rate": 7.094039171160942e-05, "loss": 0.004756616801023484, "step": 102420 }, { "epoch": 29.07465228498439, "grad_norm": 0.024492811411619186, "learning_rate": 7.093755322168607e-05, "loss": 0.004010432213544845, "step": 102430 }, { "epoch": 29.077490774907748, "grad_norm": 11.652758598327637, "learning_rate": 7.09347147317627e-05, "loss": 0.00983874499797821, "step": 102440 }, { "epoch": 29.08032926483111, "grad_norm": 0.03566334396600723, "learning_rate": 7.093187624183934e-05, "loss": 0.0022249264642596246, "step": 102450 }, { "epoch": 29.08316775475447, "grad_norm": 0.009804179891943932, "learning_rate": 7.092903775191598e-05, "loss": 0.001459248922765255, "step": 102460 }, { "epoch": 29.086006244677833, "grad_norm": 2.996426582336426, "learning_rate": 7.092619926199263e-05, "loss": 0.006142298132181168, "step": 102470 }, { "epoch": 29.088844734601192, "grad_norm": 0.10556428134441376, "learning_rate": 7.092336077206925e-05, "loss": 0.025636661052703857, "step": 102480 }, { "epoch": 29.09168322452455, "grad_norm": 0.4737178683280945, "learning_rate": 7.092052228214591e-05, "loss": 0.011525024473667145, "step": 102490 }, { "epoch": 29.094521714447914, "grad_norm": 0.022816753014922142, "learning_rate": 7.091768379222255e-05, "loss": 0.005685494095087051, "step": 102500 }, { "epoch": 29.094521714447914, "eval_accuracy": 0.9790169771730146, "eval_loss": 0.07227671146392822, "eval_runtime": 33.7991, "eval_samples_per_second": 465.308, "eval_steps_per_second": 7.278, "step": 102500 }, { "epoch": 29.097360204371274, "grad_norm": 0.01731047034263611, "learning_rate": 7.091484530229918e-05, "loss": 0.0038013923913240433, "step": 102510 }, { "epoch": 29.100198694294637, "grad_norm": 0.5261936783790588, "learning_rate": 7.091200681237582e-05, "loss": 0.0024498097598552706, "step": 102520 }, { "epoch": 29.103037184217996, "grad_norm": 0.3974960744380951, "learning_rate": 7.090916832245246e-05, "loss": 0.007683080434799194, "step": 102530 }, { "epoch": 29.105875674141355, "grad_norm": 0.35825201869010925, "learning_rate": 7.090632983252909e-05, "loss": 0.0008521744981408119, "step": 102540 }, { "epoch": 29.10871416406472, "grad_norm": 0.13007603585720062, "learning_rate": 7.090349134260573e-05, "loss": 0.005171350762248039, "step": 102550 }, { "epoch": 29.111552653988078, "grad_norm": 8.8914213180542, "learning_rate": 7.090065285268238e-05, "loss": 0.004473544657230377, "step": 102560 }, { "epoch": 29.11439114391144, "grad_norm": 1.9084700345993042, "learning_rate": 7.089781436275901e-05, "loss": 0.00606318898499012, "step": 102570 }, { "epoch": 29.1172296338348, "grad_norm": 0.4334997534751892, "learning_rate": 7.089497587283565e-05, "loss": 0.0017402177676558495, "step": 102580 }, { "epoch": 29.12006812375816, "grad_norm": 0.2294914573431015, "learning_rate": 7.08921373829123e-05, "loss": 0.009240365028381348, "step": 102590 }, { "epoch": 29.122906613681522, "grad_norm": 0.006051816511899233, "learning_rate": 7.088929889298894e-05, "loss": 0.007548283040523529, "step": 102600 }, { "epoch": 29.12574510360488, "grad_norm": 0.0606999546289444, "learning_rate": 7.088646040306556e-05, "loss": 0.01871337890625, "step": 102610 }, { "epoch": 29.128583593528244, "grad_norm": 2.113617420196533, "learning_rate": 7.08836219131422e-05, "loss": 0.002655950002372265, "step": 102620 }, { "epoch": 29.131422083451604, "grad_norm": 1.0896068811416626, "learning_rate": 7.088078342321886e-05, "loss": 0.0013158554211258888, "step": 102630 }, { "epoch": 29.134260573374963, "grad_norm": 0.21763846278190613, "learning_rate": 7.087794493329549e-05, "loss": 0.004308376461267471, "step": 102640 }, { "epoch": 29.137099063298326, "grad_norm": 0.04795778542757034, "learning_rate": 7.087510644337213e-05, "loss": 0.002835904806852341, "step": 102650 }, { "epoch": 29.139937553221685, "grad_norm": 2.509918451309204, "learning_rate": 7.087226795344877e-05, "loss": 0.003853001818060875, "step": 102660 }, { "epoch": 29.142776043145048, "grad_norm": 0.1685977727174759, "learning_rate": 7.08694294635254e-05, "loss": 0.0009495628997683525, "step": 102670 }, { "epoch": 29.145614533068407, "grad_norm": 0.766886293888092, "learning_rate": 7.086659097360204e-05, "loss": 0.0011022070422768593, "step": 102680 }, { "epoch": 29.148453022991767, "grad_norm": 6.475676536560059, "learning_rate": 7.08637524836787e-05, "loss": 0.004429520294070244, "step": 102690 }, { "epoch": 29.15129151291513, "grad_norm": 0.354703426361084, "learning_rate": 7.086091399375532e-05, "loss": 0.00040097013115882876, "step": 102700 }, { "epoch": 29.15413000283849, "grad_norm": 0.10722704231739044, "learning_rate": 7.085807550383196e-05, "loss": 0.008743734657764434, "step": 102710 }, { "epoch": 29.15696849276185, "grad_norm": 0.07849187403917313, "learning_rate": 7.08552370139086e-05, "loss": 0.00544859915971756, "step": 102720 }, { "epoch": 29.15980698268521, "grad_norm": 0.14524637162685394, "learning_rate": 7.085239852398525e-05, "loss": 0.008424367755651474, "step": 102730 }, { "epoch": 29.162645472608574, "grad_norm": 0.09704538434743881, "learning_rate": 7.084956003406188e-05, "loss": 0.0037971146404743193, "step": 102740 }, { "epoch": 29.165483962531933, "grad_norm": 0.6476771235466003, "learning_rate": 7.084672154413852e-05, "loss": 0.009509602934122086, "step": 102750 }, { "epoch": 29.168322452455293, "grad_norm": 0.7927878499031067, "learning_rate": 7.084388305421517e-05, "loss": 0.009265125542879105, "step": 102760 }, { "epoch": 29.171160942378656, "grad_norm": 1.261000633239746, "learning_rate": 7.08410445642918e-05, "loss": 0.0008103368803858757, "step": 102770 }, { "epoch": 29.173999432302015, "grad_norm": 9.624361038208008, "learning_rate": 7.083820607436844e-05, "loss": 0.0054599568247795105, "step": 102780 }, { "epoch": 29.176837922225378, "grad_norm": 0.0648004561662674, "learning_rate": 7.083536758444508e-05, "loss": 0.001725742034614086, "step": 102790 }, { "epoch": 29.179676412148737, "grad_norm": 0.6407598853111267, "learning_rate": 7.083252909452171e-05, "loss": 0.0023335646837949753, "step": 102800 }, { "epoch": 29.182514902072096, "grad_norm": 0.16056415438652039, "learning_rate": 7.082969060459835e-05, "loss": 0.0012657200917601586, "step": 102810 }, { "epoch": 29.18535339199546, "grad_norm": 1.4782663583755493, "learning_rate": 7.082685211467499e-05, "loss": 0.006105136498808861, "step": 102820 }, { "epoch": 29.18819188191882, "grad_norm": 0.07077052444219589, "learning_rate": 7.082401362475163e-05, "loss": 0.0012661622837185859, "step": 102830 }, { "epoch": 29.19103037184218, "grad_norm": 1.6142715215682983, "learning_rate": 7.082117513482828e-05, "loss": 0.008815893530845642, "step": 102840 }, { "epoch": 29.19386886176554, "grad_norm": 0.15770773589611053, "learning_rate": 7.081833664490492e-05, "loss": 0.014140075445175171, "step": 102850 }, { "epoch": 29.1967073516889, "grad_norm": 0.07133042812347412, "learning_rate": 7.081549815498156e-05, "loss": 0.014475718140602112, "step": 102860 }, { "epoch": 29.199545841612263, "grad_norm": 0.8998302817344666, "learning_rate": 7.081265966505819e-05, "loss": 0.005640248954296112, "step": 102870 }, { "epoch": 29.202384331535622, "grad_norm": 0.8757505416870117, "learning_rate": 7.080982117513483e-05, "loss": 0.004390257969498634, "step": 102880 }, { "epoch": 29.205222821458985, "grad_norm": 0.6571523547172546, "learning_rate": 7.080698268521148e-05, "loss": 0.0024113286286592483, "step": 102890 }, { "epoch": 29.208061311382345, "grad_norm": 0.5449387431144714, "learning_rate": 7.080442804428045e-05, "loss": 0.013666708767414094, "step": 102900 }, { "epoch": 29.210899801305704, "grad_norm": 0.571879506111145, "learning_rate": 7.080158955435709e-05, "loss": 0.014597156643867492, "step": 102910 }, { "epoch": 29.213738291229067, "grad_norm": 0.4552755653858185, "learning_rate": 7.079875106443372e-05, "loss": 0.02109955996274948, "step": 102920 }, { "epoch": 29.216576781152426, "grad_norm": 0.43700703978538513, "learning_rate": 7.079591257451036e-05, "loss": 0.0053682573139667514, "step": 102930 }, { "epoch": 29.21941527107579, "grad_norm": 0.24321036040782928, "learning_rate": 7.079307408458701e-05, "loss": 0.0034367814660072327, "step": 102940 }, { "epoch": 29.22225376099915, "grad_norm": 0.13117676973342896, "learning_rate": 7.079023559466364e-05, "loss": 0.007901807129383088, "step": 102950 }, { "epoch": 29.225092250922508, "grad_norm": 10.312271118164062, "learning_rate": 7.078739710474028e-05, "loss": 0.004375645518302917, "step": 102960 }, { "epoch": 29.22793074084587, "grad_norm": 6.883121013641357, "learning_rate": 7.078455861481693e-05, "loss": 0.007875862717628478, "step": 102970 }, { "epoch": 29.23076923076923, "grad_norm": 2.5622525215148926, "learning_rate": 7.078172012489355e-05, "loss": 0.001372130773961544, "step": 102980 }, { "epoch": 29.233607720692593, "grad_norm": 0.08059811592102051, "learning_rate": 7.07788816349702e-05, "loss": 0.004394899308681488, "step": 102990 }, { "epoch": 29.236446210615952, "grad_norm": 1.0022445917129517, "learning_rate": 7.077604314504684e-05, "loss": 0.004058905690908432, "step": 103000 }, { "epoch": 29.236446210615952, "eval_accuracy": 0.9793349017613022, "eval_loss": 0.07529354840517044, "eval_runtime": 33.7799, "eval_samples_per_second": 465.573, "eval_steps_per_second": 7.282, "step": 103000 }, { "epoch": 29.23928470053931, "grad_norm": 0.18722817301750183, "learning_rate": 7.077320465512348e-05, "loss": 0.004021821543574333, "step": 103010 }, { "epoch": 29.242123190462674, "grad_norm": 1.9830344915390015, "learning_rate": 7.077036616520012e-05, "loss": 0.004460936412215233, "step": 103020 }, { "epoch": 29.244961680386034, "grad_norm": 0.9294431209564209, "learning_rate": 7.076752767527676e-05, "loss": 0.0035419344902038576, "step": 103030 }, { "epoch": 29.247800170309397, "grad_norm": 0.11343692243099213, "learning_rate": 7.07646891853534e-05, "loss": 0.003240836411714554, "step": 103040 }, { "epoch": 29.250638660232756, "grad_norm": 0.7251267433166504, "learning_rate": 7.076185069543003e-05, "loss": 0.0019871525466442106, "step": 103050 }, { "epoch": 29.253477150156115, "grad_norm": 0.1308235377073288, "learning_rate": 7.075901220550667e-05, "loss": 0.005168920755386353, "step": 103060 }, { "epoch": 29.256315640079478, "grad_norm": 8.033523559570312, "learning_rate": 7.075617371558331e-05, "loss": 0.0072627395391464235, "step": 103070 }, { "epoch": 29.259154130002837, "grad_norm": 1.1037684679031372, "learning_rate": 7.075333522565995e-05, "loss": 0.003783738613128662, "step": 103080 }, { "epoch": 29.2619926199262, "grad_norm": 0.08481528609991074, "learning_rate": 7.07504967357366e-05, "loss": 0.01669275164604187, "step": 103090 }, { "epoch": 29.26483110984956, "grad_norm": 2.19024395942688, "learning_rate": 7.074765824581324e-05, "loss": 0.00596361979842186, "step": 103100 }, { "epoch": 29.267669599772923, "grad_norm": 1.7565184831619263, "learning_rate": 7.074481975588986e-05, "loss": 0.01975354701280594, "step": 103110 }, { "epoch": 29.270508089696282, "grad_norm": 0.11035668849945068, "learning_rate": 7.07419812659665e-05, "loss": 0.003303154185414314, "step": 103120 }, { "epoch": 29.27334657961964, "grad_norm": 2.933713912963867, "learning_rate": 7.073914277604315e-05, "loss": 0.004727249592542648, "step": 103130 }, { "epoch": 29.276185069543004, "grad_norm": 0.06221223995089531, "learning_rate": 7.073630428611979e-05, "loss": 0.02678188979625702, "step": 103140 }, { "epoch": 29.279023559466363, "grad_norm": 4.544602870941162, "learning_rate": 7.073346579619643e-05, "loss": 0.01661834716796875, "step": 103150 }, { "epoch": 29.281862049389726, "grad_norm": 0.49613770842552185, "learning_rate": 7.073062730627307e-05, "loss": 0.007673544436693191, "step": 103160 }, { "epoch": 29.284700539313086, "grad_norm": 1.1618659496307373, "learning_rate": 7.07277888163497e-05, "loss": 0.004149064049124717, "step": 103170 }, { "epoch": 29.287539029236445, "grad_norm": 4.713666915893555, "learning_rate": 7.072495032642634e-05, "loss": 0.025551941990852357, "step": 103180 }, { "epoch": 29.290377519159808, "grad_norm": 4.189531326293945, "learning_rate": 7.072211183650298e-05, "loss": 0.003262512758374214, "step": 103190 }, { "epoch": 29.293216009083167, "grad_norm": 0.2386840134859085, "learning_rate": 7.071927334657962e-05, "loss": 0.0027745887637138367, "step": 103200 }, { "epoch": 29.29605449900653, "grad_norm": 9.557560920715332, "learning_rate": 7.071643485665626e-05, "loss": 0.019274011254310608, "step": 103210 }, { "epoch": 29.29889298892989, "grad_norm": 0.019039712846279144, "learning_rate": 7.07135963667329e-05, "loss": 0.007492866367101669, "step": 103220 }, { "epoch": 29.30173147885325, "grad_norm": 6.981192588806152, "learning_rate": 7.071075787680955e-05, "loss": 0.009932079166173936, "step": 103230 }, { "epoch": 29.30456996877661, "grad_norm": 0.13508816063404083, "learning_rate": 7.070791938688617e-05, "loss": 0.003403358906507492, "step": 103240 }, { "epoch": 29.30740845869997, "grad_norm": 6.1778459548950195, "learning_rate": 7.070508089696282e-05, "loss": 0.007055351138114929, "step": 103250 }, { "epoch": 29.310246948623334, "grad_norm": 2.8572630882263184, "learning_rate": 7.070224240703946e-05, "loss": 0.006114448606967926, "step": 103260 }, { "epoch": 29.313085438546693, "grad_norm": 9.886648178100586, "learning_rate": 7.06994039171161e-05, "loss": 0.008831842243671418, "step": 103270 }, { "epoch": 29.315923928470053, "grad_norm": 0.1504235863685608, "learning_rate": 7.069656542719274e-05, "loss": 0.005071530491113663, "step": 103280 }, { "epoch": 29.318762418393415, "grad_norm": 0.19676612317562103, "learning_rate": 7.069372693726938e-05, "loss": 0.021581700444221495, "step": 103290 }, { "epoch": 29.321600908316775, "grad_norm": 0.21250270307064056, "learning_rate": 7.069088844734601e-05, "loss": 0.0015125162899494171, "step": 103300 }, { "epoch": 29.324439398240138, "grad_norm": 0.0631900280714035, "learning_rate": 7.068804995742265e-05, "loss": 0.0041439436376094815, "step": 103310 }, { "epoch": 29.327277888163497, "grad_norm": 1.065211296081543, "learning_rate": 7.068521146749929e-05, "loss": 0.0016290927305817604, "step": 103320 }, { "epoch": 29.330116378086856, "grad_norm": 9.665593147277832, "learning_rate": 7.068237297757593e-05, "loss": 0.012089668214321137, "step": 103330 }, { "epoch": 29.33295486801022, "grad_norm": 0.1582736372947693, "learning_rate": 7.067953448765257e-05, "loss": 0.00973098874092102, "step": 103340 }, { "epoch": 29.33579335793358, "grad_norm": 1.0072888135910034, "learning_rate": 7.067669599772922e-05, "loss": 0.0018404146656394004, "step": 103350 }, { "epoch": 29.33863184785694, "grad_norm": 3.4666810035705566, "learning_rate": 7.067385750780586e-05, "loss": 0.009426610171794891, "step": 103360 }, { "epoch": 29.3414703377803, "grad_norm": 5.2840895652771, "learning_rate": 7.067101901788249e-05, "loss": 0.010027870535850525, "step": 103370 }, { "epoch": 29.34430882770366, "grad_norm": 8.551250457763672, "learning_rate": 7.066818052795913e-05, "loss": 0.010856781154870987, "step": 103380 }, { "epoch": 29.347147317627023, "grad_norm": 4.956713676452637, "learning_rate": 7.066534203803577e-05, "loss": 0.0030553072690963747, "step": 103390 }, { "epoch": 29.349985807550382, "grad_norm": 12.655106544494629, "learning_rate": 7.066250354811241e-05, "loss": 0.015293611586093903, "step": 103400 }, { "epoch": 29.352824297473745, "grad_norm": 0.004563560709357262, "learning_rate": 7.065966505818905e-05, "loss": 0.006276577711105347, "step": 103410 }, { "epoch": 29.355662787397105, "grad_norm": 0.8805276155471802, "learning_rate": 7.065682656826569e-05, "loss": 0.001511424407362938, "step": 103420 }, { "epoch": 29.358501277320464, "grad_norm": 0.10281529277563095, "learning_rate": 7.065398807834232e-05, "loss": 0.006677160412073136, "step": 103430 }, { "epoch": 29.361339767243827, "grad_norm": 0.9019641876220703, "learning_rate": 7.065114958841896e-05, "loss": 0.008610501885414124, "step": 103440 }, { "epoch": 29.364178257167186, "grad_norm": 2.6431617736816406, "learning_rate": 7.06483110984956e-05, "loss": 0.029936271905899047, "step": 103450 }, { "epoch": 29.36701674709055, "grad_norm": 0.04111369326710701, "learning_rate": 7.064547260857224e-05, "loss": 0.009259331971406937, "step": 103460 }, { "epoch": 29.36985523701391, "grad_norm": 0.11282511055469513, "learning_rate": 7.064263411864889e-05, "loss": 0.003902393206954002, "step": 103470 }, { "epoch": 29.372693726937268, "grad_norm": 0.7466879487037659, "learning_rate": 7.063979562872553e-05, "loss": 0.009782949090003967, "step": 103480 }, { "epoch": 29.37553221686063, "grad_norm": 2.511017084121704, "learning_rate": 7.063695713880217e-05, "loss": 0.012595964968204499, "step": 103490 }, { "epoch": 29.37837070678399, "grad_norm": 5.983576774597168, "learning_rate": 7.06341186488788e-05, "loss": 0.004464036226272583, "step": 103500 }, { "epoch": 29.37837070678399, "eval_accuracy": 0.9768550899726585, "eval_loss": 0.09286462515592575, "eval_runtime": 34.6645, "eval_samples_per_second": 453.692, "eval_steps_per_second": 7.097, "step": 103500 }, { "epoch": 29.381209196707353, "grad_norm": 11.216988563537598, "learning_rate": 7.063128015895544e-05, "loss": 0.033862724900245667, "step": 103510 }, { "epoch": 29.384047686630712, "grad_norm": 0.12008658051490784, "learning_rate": 7.062844166903208e-05, "loss": 0.026339516043663025, "step": 103520 }, { "epoch": 29.386886176554075, "grad_norm": 0.029495183378458023, "learning_rate": 7.06256031791087e-05, "loss": 0.03241734504699707, "step": 103530 }, { "epoch": 29.389724666477434, "grad_norm": 1.622011661529541, "learning_rate": 7.062276468918536e-05, "loss": 0.012670771777629852, "step": 103540 }, { "epoch": 29.392563156400794, "grad_norm": 1.468421459197998, "learning_rate": 7.0619926199262e-05, "loss": 0.009817813336849213, "step": 103550 }, { "epoch": 29.395401646324157, "grad_norm": 1.7738088369369507, "learning_rate": 7.061708770933863e-05, "loss": 0.010206743329763412, "step": 103560 }, { "epoch": 29.398240136247516, "grad_norm": 0.14673887193202972, "learning_rate": 7.061424921941527e-05, "loss": 0.012163796275854111, "step": 103570 }, { "epoch": 29.40107862617088, "grad_norm": 0.1447172909975052, "learning_rate": 7.061141072949191e-05, "loss": 0.0033224403858184816, "step": 103580 }, { "epoch": 29.403917116094238, "grad_norm": 0.7190161347389221, "learning_rate": 7.060857223956855e-05, "loss": 0.0028880968689918517, "step": 103590 }, { "epoch": 29.406755606017597, "grad_norm": 0.07111704349517822, "learning_rate": 7.06057337496452e-05, "loss": 0.008915935456752778, "step": 103600 }, { "epoch": 29.40959409594096, "grad_norm": 0.928236186504364, "learning_rate": 7.060289525972184e-05, "loss": 0.01855330914258957, "step": 103610 }, { "epoch": 29.41243258586432, "grad_norm": 0.10174515098333359, "learning_rate": 7.060005676979848e-05, "loss": 0.030215197801589967, "step": 103620 }, { "epoch": 29.415271075787683, "grad_norm": 3.7573792934417725, "learning_rate": 7.05972182798751e-05, "loss": 0.007297547906637192, "step": 103630 }, { "epoch": 29.418109565711042, "grad_norm": 0.1066623330116272, "learning_rate": 7.059437978995175e-05, "loss": 0.0018370306119322778, "step": 103640 }, { "epoch": 29.4209480556344, "grad_norm": 0.23169462382793427, "learning_rate": 7.059154130002839e-05, "loss": 0.003007340803742409, "step": 103650 }, { "epoch": 29.423786545557764, "grad_norm": 4.806514263153076, "learning_rate": 7.058870281010502e-05, "loss": 0.002914068289101124, "step": 103660 }, { "epoch": 29.426625035481123, "grad_norm": 5.021941184997559, "learning_rate": 7.058586432018167e-05, "loss": 0.006899186968803405, "step": 103670 }, { "epoch": 29.429463525404486, "grad_norm": 0.888260006904602, "learning_rate": 7.058302583025831e-05, "loss": 0.013815516233444214, "step": 103680 }, { "epoch": 29.432302015327846, "grad_norm": 11.069615364074707, "learning_rate": 7.058018734033494e-05, "loss": 0.010396827012300491, "step": 103690 }, { "epoch": 29.435140505251205, "grad_norm": 0.13415996730327606, "learning_rate": 7.057734885041158e-05, "loss": 0.003909799456596375, "step": 103700 }, { "epoch": 29.437978995174568, "grad_norm": 4.073919773101807, "learning_rate": 7.057451036048822e-05, "loss": 0.008818449079990387, "step": 103710 }, { "epoch": 29.440817485097927, "grad_norm": 0.3331654667854309, "learning_rate": 7.057167187056487e-05, "loss": 0.004777035117149353, "step": 103720 }, { "epoch": 29.44365597502129, "grad_norm": 1.7960302829742432, "learning_rate": 7.05688333806415e-05, "loss": 0.01357143372297287, "step": 103730 }, { "epoch": 29.44649446494465, "grad_norm": 0.21773917973041534, "learning_rate": 7.056599489071815e-05, "loss": 0.017319586873054505, "step": 103740 }, { "epoch": 29.44933295486801, "grad_norm": 2.9844822883605957, "learning_rate": 7.056315640079479e-05, "loss": 0.01165100634098053, "step": 103750 }, { "epoch": 29.45217144479137, "grad_norm": 0.426104873418808, "learning_rate": 7.056031791087142e-05, "loss": 0.015801315009593964, "step": 103760 }, { "epoch": 29.45500993471473, "grad_norm": 0.9602954387664795, "learning_rate": 7.055747942094806e-05, "loss": 0.009628866612911225, "step": 103770 }, { "epoch": 29.457848424638094, "grad_norm": 0.09776807576417923, "learning_rate": 7.05546409310247e-05, "loss": 0.0030048536136746407, "step": 103780 }, { "epoch": 29.460686914561453, "grad_norm": 0.2873948812484741, "learning_rate": 7.055180244110133e-05, "loss": 0.0038507327437400816, "step": 103790 }, { "epoch": 29.463525404484812, "grad_norm": 0.2374514788389206, "learning_rate": 7.054896395117798e-05, "loss": 0.00885273888707161, "step": 103800 }, { "epoch": 29.466363894408175, "grad_norm": 3.758695363998413, "learning_rate": 7.054612546125462e-05, "loss": 0.004356545954942703, "step": 103810 }, { "epoch": 29.469202384331535, "grad_norm": 3.9880428314208984, "learning_rate": 7.054328697133125e-05, "loss": 0.002058972045779228, "step": 103820 }, { "epoch": 29.472040874254898, "grad_norm": 0.5955458879470825, "learning_rate": 7.05404484814079e-05, "loss": 0.01422138512134552, "step": 103830 }, { "epoch": 29.474879364178257, "grad_norm": 0.39462944865226746, "learning_rate": 7.053760999148453e-05, "loss": 0.0062198486179113385, "step": 103840 }, { "epoch": 29.477717854101616, "grad_norm": 0.03203849866986275, "learning_rate": 7.053477150156118e-05, "loss": 0.013858599960803986, "step": 103850 }, { "epoch": 29.48055634402498, "grad_norm": 0.46606898307800293, "learning_rate": 7.05319330116378e-05, "loss": 0.000899435207247734, "step": 103860 }, { "epoch": 29.48339483394834, "grad_norm": 9.989876747131348, "learning_rate": 7.052909452171446e-05, "loss": 0.008823873102664947, "step": 103870 }, { "epoch": 29.4862333238717, "grad_norm": 0.03247122839093208, "learning_rate": 7.05262560317911e-05, "loss": 0.005499240010976791, "step": 103880 }, { "epoch": 29.48907181379506, "grad_norm": 1.796862244606018, "learning_rate": 7.052341754186773e-05, "loss": 0.0008478935807943345, "step": 103890 }, { "epoch": 29.49191030371842, "grad_norm": 0.025143835693597794, "learning_rate": 7.052057905194437e-05, "loss": 0.0019030187278985978, "step": 103900 }, { "epoch": 29.494748793641783, "grad_norm": 1.0880335569381714, "learning_rate": 7.051774056202101e-05, "loss": 0.012386520206928254, "step": 103910 }, { "epoch": 29.497587283565142, "grad_norm": 0.2634827494621277, "learning_rate": 7.051490207209764e-05, "loss": 0.007631619274616241, "step": 103920 }, { "epoch": 29.500425773488505, "grad_norm": 1.4252289533615112, "learning_rate": 7.051206358217428e-05, "loss": 0.019164110720157623, "step": 103930 }, { "epoch": 29.503264263411864, "grad_norm": 0.0055681634694337845, "learning_rate": 7.050922509225094e-05, "loss": 0.011327625811100006, "step": 103940 }, { "epoch": 29.506102753335227, "grad_norm": 0.32038414478302, "learning_rate": 7.050638660232756e-05, "loss": 0.007739594578742981, "step": 103950 }, { "epoch": 29.508941243258587, "grad_norm": 10.765769958496094, "learning_rate": 7.05035481124042e-05, "loss": 0.007653757929801941, "step": 103960 }, { "epoch": 29.511779733181946, "grad_norm": 0.16425219178199768, "learning_rate": 7.050070962248085e-05, "loss": 0.00582299679517746, "step": 103970 }, { "epoch": 29.51461822310531, "grad_norm": 0.01900593750178814, "learning_rate": 7.049787113255749e-05, "loss": 0.002057766169309616, "step": 103980 }, { "epoch": 29.51745671302867, "grad_norm": 0.27938827872276306, "learning_rate": 7.049503264263411e-05, "loss": 0.004652947559952736, "step": 103990 }, { "epoch": 29.52029520295203, "grad_norm": 0.41102135181427, "learning_rate": 7.049219415271077e-05, "loss": 0.01096629500389099, "step": 104000 }, { "epoch": 29.52029520295203, "eval_accuracy": 0.9696699942773574, "eval_loss": 0.11692774295806885, "eval_runtime": 40.3184, "eval_samples_per_second": 390.07, "eval_steps_per_second": 6.101, "step": 104000 }, { "epoch": 29.52313369287539, "grad_norm": 0.4741039574146271, "learning_rate": 7.04893556627874e-05, "loss": 0.017605021595954895, "step": 104010 }, { "epoch": 29.52597218279875, "grad_norm": 12.083264350891113, "learning_rate": 7.048651717286404e-05, "loss": 0.03439448475837707, "step": 104020 }, { "epoch": 29.528810672722113, "grad_norm": 0.2570129930973053, "learning_rate": 7.048367868294068e-05, "loss": 0.0031785499304533005, "step": 104030 }, { "epoch": 29.531649162645472, "grad_norm": 2.634061336517334, "learning_rate": 7.048084019301732e-05, "loss": 0.00900358408689499, "step": 104040 }, { "epoch": 29.534487652568835, "grad_norm": 3.2833728790283203, "learning_rate": 7.047800170309395e-05, "loss": 0.0021716903895139692, "step": 104050 }, { "epoch": 29.537326142492194, "grad_norm": 0.039194803684949875, "learning_rate": 7.047516321317059e-05, "loss": 0.004054096713662148, "step": 104060 }, { "epoch": 29.540164632415554, "grad_norm": 3.9811508655548096, "learning_rate": 7.047232472324725e-05, "loss": 0.008692532032728194, "step": 104070 }, { "epoch": 29.543003122338916, "grad_norm": 1.7783180475234985, "learning_rate": 7.046948623332387e-05, "loss": 0.0022165695205330848, "step": 104080 }, { "epoch": 29.545841612262276, "grad_norm": 0.6104259490966797, "learning_rate": 7.046664774340052e-05, "loss": 0.0028809169307351112, "step": 104090 }, { "epoch": 29.54868010218564, "grad_norm": 1.7129415273666382, "learning_rate": 7.046380925347716e-05, "loss": 0.0024476051330566406, "step": 104100 }, { "epoch": 29.551518592108998, "grad_norm": 0.4522014260292053, "learning_rate": 7.046097076355378e-05, "loss": 0.0046954374760389325, "step": 104110 }, { "epoch": 29.554357082032357, "grad_norm": 0.10867507755756378, "learning_rate": 7.045813227363043e-05, "loss": 0.007964217662811279, "step": 104120 }, { "epoch": 29.55719557195572, "grad_norm": 0.31159549951553345, "learning_rate": 7.045529378370707e-05, "loss": 0.011020883917808533, "step": 104130 }, { "epoch": 29.56003406187908, "grad_norm": 0.24300289154052734, "learning_rate": 7.045245529378371e-05, "loss": 0.0049760833382606505, "step": 104140 }, { "epoch": 29.562872551802442, "grad_norm": 0.5867039561271667, "learning_rate": 7.044961680386035e-05, "loss": 0.009458965808153152, "step": 104150 }, { "epoch": 29.565711041725802, "grad_norm": 7.1302995681762695, "learning_rate": 7.044677831393699e-05, "loss": 0.00648997575044632, "step": 104160 }, { "epoch": 29.56854953164916, "grad_norm": 7.827655792236328, "learning_rate": 7.044393982401363e-05, "loss": 0.0029808782041072846, "step": 104170 }, { "epoch": 29.571388021572524, "grad_norm": 0.1206723302602768, "learning_rate": 7.044110133409026e-05, "loss": 0.00687478631734848, "step": 104180 }, { "epoch": 29.574226511495883, "grad_norm": 0.02733111009001732, "learning_rate": 7.04382628441669e-05, "loss": 0.005626809969544411, "step": 104190 }, { "epoch": 29.577065001419246, "grad_norm": 1.153218388557434, "learning_rate": 7.043542435424356e-05, "loss": 0.0062436796724796295, "step": 104200 }, { "epoch": 29.579903491342606, "grad_norm": 9.907938003540039, "learning_rate": 7.043258586432018e-05, "loss": 0.005605717748403549, "step": 104210 }, { "epoch": 29.582741981265965, "grad_norm": 11.575383186340332, "learning_rate": 7.042974737439683e-05, "loss": 0.014098662137985229, "step": 104220 }, { "epoch": 29.585580471189328, "grad_norm": 9.408905029296875, "learning_rate": 7.042690888447347e-05, "loss": 0.014408381283283233, "step": 104230 }, { "epoch": 29.588418961112687, "grad_norm": 13.9119234085083, "learning_rate": 7.04240703945501e-05, "loss": 0.01890595257282257, "step": 104240 }, { "epoch": 29.59125745103605, "grad_norm": 1.4995061159133911, "learning_rate": 7.042123190462674e-05, "loss": 0.026689592003822326, "step": 104250 }, { "epoch": 29.59409594095941, "grad_norm": 0.4793344736099243, "learning_rate": 7.041839341470338e-05, "loss": 0.013016800582408904, "step": 104260 }, { "epoch": 29.59693443088277, "grad_norm": 0.16791535913944244, "learning_rate": 7.041555492478002e-05, "loss": 0.01818939596414566, "step": 104270 }, { "epoch": 29.59977292080613, "grad_norm": 2.7713959217071533, "learning_rate": 7.041271643485666e-05, "loss": 0.04296115040779114, "step": 104280 }, { "epoch": 29.60261141072949, "grad_norm": 0.13261035084724426, "learning_rate": 7.04098779449333e-05, "loss": 0.011190207302570343, "step": 104290 }, { "epoch": 29.605449900652854, "grad_norm": 10.679628372192383, "learning_rate": 7.040703945500994e-05, "loss": 0.005079788714647293, "step": 104300 }, { "epoch": 29.608288390576213, "grad_norm": 0.4355761706829071, "learning_rate": 7.040420096508657e-05, "loss": 0.00399339348077774, "step": 104310 }, { "epoch": 29.611126880499576, "grad_norm": 0.33626800775527954, "learning_rate": 7.040136247516321e-05, "loss": 0.005849495902657509, "step": 104320 }, { "epoch": 29.613965370422935, "grad_norm": 0.05790066719055176, "learning_rate": 7.039852398523985e-05, "loss": 0.0022392852231860163, "step": 104330 }, { "epoch": 29.616803860346295, "grad_norm": 0.7098363041877747, "learning_rate": 7.03956854953165e-05, "loss": 0.0021981958299875258, "step": 104340 }, { "epoch": 29.619642350269658, "grad_norm": 0.40363550186157227, "learning_rate": 7.039284700539314e-05, "loss": 0.0043246690183877945, "step": 104350 }, { "epoch": 29.622480840193017, "grad_norm": 0.2202400118112564, "learning_rate": 7.039000851546978e-05, "loss": 0.01169324740767479, "step": 104360 }, { "epoch": 29.62531933011638, "grad_norm": 0.12550804018974304, "learning_rate": 7.03871700255464e-05, "loss": 0.003038795851171017, "step": 104370 }, { "epoch": 29.62815782003974, "grad_norm": 0.8417041301727295, "learning_rate": 7.038433153562305e-05, "loss": 0.015857328474521638, "step": 104380 }, { "epoch": 29.6309963099631, "grad_norm": 0.3053523600101471, "learning_rate": 7.038149304569969e-05, "loss": 0.005307696759700775, "step": 104390 }, { "epoch": 29.63383479988646, "grad_norm": 0.13620400428771973, "learning_rate": 7.037865455577633e-05, "loss": 0.010741746425628662, "step": 104400 }, { "epoch": 29.63667328980982, "grad_norm": 6.057865142822266, "learning_rate": 7.037581606585297e-05, "loss": 0.004363071173429489, "step": 104410 }, { "epoch": 29.639511779733184, "grad_norm": 0.055535778403282166, "learning_rate": 7.037297757592961e-05, "loss": 0.004672433435916901, "step": 104420 }, { "epoch": 29.642350269656543, "grad_norm": 0.15063011646270752, "learning_rate": 7.037013908600625e-05, "loss": 0.00786498561501503, "step": 104430 }, { "epoch": 29.645188759579902, "grad_norm": 0.019659265875816345, "learning_rate": 7.036730059608288e-05, "loss": 0.0018452003598213196, "step": 104440 }, { "epoch": 29.648027249503265, "grad_norm": 0.06074156612157822, "learning_rate": 7.036446210615952e-05, "loss": 0.00478348396718502, "step": 104450 }, { "epoch": 29.650865739426624, "grad_norm": 0.22832505404949188, "learning_rate": 7.036162361623616e-05, "loss": 0.013571366667747498, "step": 104460 }, { "epoch": 29.653704229349987, "grad_norm": 2.3611814975738525, "learning_rate": 7.03587851263128e-05, "loss": 0.053974831104278566, "step": 104470 }, { "epoch": 29.656542719273347, "grad_norm": 15.754728317260742, "learning_rate": 7.035594663638945e-05, "loss": 0.026185524463653565, "step": 104480 }, { "epoch": 29.659381209196706, "grad_norm": 0.16022558510303497, "learning_rate": 7.035310814646609e-05, "loss": 0.0016054334118962288, "step": 104490 }, { "epoch": 29.66221969912007, "grad_norm": 0.5218151211738586, "learning_rate": 7.035026965654272e-05, "loss": 0.006125032901763916, "step": 104500 }, { "epoch": 29.66221969912007, "eval_accuracy": 0.9793984866789598, "eval_loss": 0.07866594940423965, "eval_runtime": 37.2607, "eval_samples_per_second": 422.08, "eval_steps_per_second": 6.602, "step": 104500 }, { "epoch": 29.665058189043428, "grad_norm": 0.03097228892147541, "learning_rate": 7.034743116661936e-05, "loss": 0.014207938313484192, "step": 104510 }, { "epoch": 29.66789667896679, "grad_norm": 0.03639503940939903, "learning_rate": 7.0344592676696e-05, "loss": 0.010864658653736115, "step": 104520 }, { "epoch": 29.67073516889015, "grad_norm": 6.166848182678223, "learning_rate": 7.034175418677264e-05, "loss": 0.011114349961280823, "step": 104530 }, { "epoch": 29.67357365881351, "grad_norm": 0.8317022323608398, "learning_rate": 7.033891569684928e-05, "loss": 0.0025549616664648058, "step": 104540 }, { "epoch": 29.676412148736873, "grad_norm": 0.11059737205505371, "learning_rate": 7.033607720692592e-05, "loss": 0.011132447421550751, "step": 104550 }, { "epoch": 29.679250638660232, "grad_norm": 14.669717788696289, "learning_rate": 7.033323871700256e-05, "loss": 0.01383877992630005, "step": 104560 }, { "epoch": 29.682089128583595, "grad_norm": 2.162325620651245, "learning_rate": 7.033040022707919e-05, "loss": 0.02490386962890625, "step": 104570 }, { "epoch": 29.684927618506954, "grad_norm": 0.07277125865221024, "learning_rate": 7.032756173715583e-05, "loss": 0.007985685020685196, "step": 104580 }, { "epoch": 29.687766108430313, "grad_norm": 0.19047331809997559, "learning_rate": 7.032472324723248e-05, "loss": 0.00914025753736496, "step": 104590 }, { "epoch": 29.690604598353676, "grad_norm": 0.0326804481446743, "learning_rate": 7.032188475730912e-05, "loss": 0.0065506711602210995, "step": 104600 }, { "epoch": 29.693443088277036, "grad_norm": 2.223926305770874, "learning_rate": 7.031904626738576e-05, "loss": 0.008251075446605683, "step": 104610 }, { "epoch": 29.6962815782004, "grad_norm": 0.5905383229255676, "learning_rate": 7.03162077774624e-05, "loss": 0.002385716326534748, "step": 104620 }, { "epoch": 29.699120068123758, "grad_norm": 10.321869850158691, "learning_rate": 7.031336928753903e-05, "loss": 0.00955633819103241, "step": 104630 }, { "epoch": 29.701958558047117, "grad_norm": 0.04827513545751572, "learning_rate": 7.031053079761567e-05, "loss": 0.001875384896993637, "step": 104640 }, { "epoch": 29.70479704797048, "grad_norm": 0.3807523846626282, "learning_rate": 7.030769230769231e-05, "loss": 0.001556350663304329, "step": 104650 }, { "epoch": 29.70763553789384, "grad_norm": 10.818826675415039, "learning_rate": 7.030485381776895e-05, "loss": 0.00466485321521759, "step": 104660 }, { "epoch": 29.710474027817202, "grad_norm": 0.4464725852012634, "learning_rate": 7.030201532784559e-05, "loss": 0.00362769216299057, "step": 104670 }, { "epoch": 29.71331251774056, "grad_norm": 11.564745903015137, "learning_rate": 7.029917683792223e-05, "loss": 0.013236090540885925, "step": 104680 }, { "epoch": 29.716151007663925, "grad_norm": 1.0625172853469849, "learning_rate": 7.029633834799888e-05, "loss": 0.006999219954013825, "step": 104690 }, { "epoch": 29.718989497587284, "grad_norm": 0.47960716485977173, "learning_rate": 7.02934998580755e-05, "loss": 0.0008283751085400581, "step": 104700 }, { "epoch": 29.721827987510643, "grad_norm": 2.7839486598968506, "learning_rate": 7.029066136815214e-05, "loss": 0.0035537946969270706, "step": 104710 }, { "epoch": 29.724666477434006, "grad_norm": 0.6846930384635925, "learning_rate": 7.028782287822879e-05, "loss": 0.005527329444885254, "step": 104720 }, { "epoch": 29.727504967357365, "grad_norm": 0.30071941018104553, "learning_rate": 7.028498438830543e-05, "loss": 0.0018810754641890525, "step": 104730 }, { "epoch": 29.73034345728073, "grad_norm": 5.454932689666748, "learning_rate": 7.028214589838207e-05, "loss": 0.003600745648145676, "step": 104740 }, { "epoch": 29.733181947204088, "grad_norm": 12.749947547912598, "learning_rate": 7.027930740845871e-05, "loss": 0.014507630467414856, "step": 104750 }, { "epoch": 29.736020437127447, "grad_norm": 0.32865938544273376, "learning_rate": 7.027646891853534e-05, "loss": 0.004880086332559585, "step": 104760 }, { "epoch": 29.73885892705081, "grad_norm": 0.14995701611042023, "learning_rate": 7.027363042861198e-05, "loss": 0.003620816022157669, "step": 104770 }, { "epoch": 29.74169741697417, "grad_norm": 13.65723705291748, "learning_rate": 7.027079193868862e-05, "loss": 0.02340906411409378, "step": 104780 }, { "epoch": 29.744535906897532, "grad_norm": 12.638481140136719, "learning_rate": 7.026795344876526e-05, "loss": 0.018222993612289427, "step": 104790 }, { "epoch": 29.74737439682089, "grad_norm": 14.852761268615723, "learning_rate": 7.02651149588419e-05, "loss": 0.03909916579723358, "step": 104800 }, { "epoch": 29.75021288674425, "grad_norm": 0.3762814402580261, "learning_rate": 7.026227646891854e-05, "loss": 0.0018516691401600839, "step": 104810 }, { "epoch": 29.753051376667614, "grad_norm": 2.3613524436950684, "learning_rate": 7.025943797899517e-05, "loss": 0.01245633065700531, "step": 104820 }, { "epoch": 29.755889866590973, "grad_norm": 9.531704902648926, "learning_rate": 7.025659948907181e-05, "loss": 0.005646585673093796, "step": 104830 }, { "epoch": 29.758728356514336, "grad_norm": 13.053693771362305, "learning_rate": 7.025376099914846e-05, "loss": 0.01972360610961914, "step": 104840 }, { "epoch": 29.761566846437695, "grad_norm": 0.010592392645776272, "learning_rate": 7.02509225092251e-05, "loss": 0.001736806333065033, "step": 104850 }, { "epoch": 29.764405336361055, "grad_norm": 0.22966904938220978, "learning_rate": 7.024808401930172e-05, "loss": 0.00047459118068218233, "step": 104860 }, { "epoch": 29.767243826284417, "grad_norm": 4.261645317077637, "learning_rate": 7.024524552937838e-05, "loss": 0.003225118666887283, "step": 104870 }, { "epoch": 29.770082316207777, "grad_norm": 7.717243194580078, "learning_rate": 7.024240703945502e-05, "loss": 0.009378612041473389, "step": 104880 }, { "epoch": 29.77292080613114, "grad_norm": 18.060535430908203, "learning_rate": 7.023956854953165e-05, "loss": 0.012392349541187286, "step": 104890 }, { "epoch": 29.7757592960545, "grad_norm": 0.5941178798675537, "learning_rate": 7.023673005960829e-05, "loss": 0.0045360889285802845, "step": 104900 }, { "epoch": 29.77859778597786, "grad_norm": 0.12621711194515228, "learning_rate": 7.023389156968493e-05, "loss": 0.009635479748249054, "step": 104910 }, { "epoch": 29.78143627590122, "grad_norm": 0.6371712684631348, "learning_rate": 7.023105307976157e-05, "loss": 0.0042336761951446535, "step": 104920 }, { "epoch": 29.78427476582458, "grad_norm": 0.09829859435558319, "learning_rate": 7.022821458983821e-05, "loss": 0.008691619336605071, "step": 104930 }, { "epoch": 29.787113255747943, "grad_norm": 0.09240065515041351, "learning_rate": 7.022537609991486e-05, "loss": 0.0010464765131473542, "step": 104940 }, { "epoch": 29.789951745671303, "grad_norm": 0.5223007798194885, "learning_rate": 7.022253760999148e-05, "loss": 0.007033780217170715, "step": 104950 }, { "epoch": 29.792790235594662, "grad_norm": 1.8872053623199463, "learning_rate": 7.021969912006812e-05, "loss": 0.008488944172859192, "step": 104960 }, { "epoch": 29.795628725518025, "grad_norm": 0.42803439497947693, "learning_rate": 7.021686063014477e-05, "loss": 0.011988784372806548, "step": 104970 }, { "epoch": 29.798467215441384, "grad_norm": 0.2282242774963379, "learning_rate": 7.021402214022141e-05, "loss": 0.018305541574954988, "step": 104980 }, { "epoch": 29.801305705364747, "grad_norm": 0.0979098454117775, "learning_rate": 7.021118365029804e-05, "loss": 0.0022762950509786608, "step": 104990 }, { "epoch": 29.804144195288107, "grad_norm": 0.07788702845573425, "learning_rate": 7.020834516037469e-05, "loss": 0.0011026043444871902, "step": 105000 }, { "epoch": 29.804144195288107, "eval_accuracy": 0.9797164112672474, "eval_loss": 0.07634983956813812, "eval_runtime": 36.0926, "eval_samples_per_second": 435.741, "eval_steps_per_second": 6.816, "step": 105000 }, { "epoch": 29.806982685211466, "grad_norm": 0.26526764035224915, "learning_rate": 7.020550667045133e-05, "loss": 0.002020653896033764, "step": 105010 }, { "epoch": 29.80982117513483, "grad_norm": 0.21614046394824982, "learning_rate": 7.020266818052796e-05, "loss": 0.006563082337379456, "step": 105020 }, { "epoch": 29.812659665058188, "grad_norm": 0.030713649466633797, "learning_rate": 7.01998296906046e-05, "loss": 0.009301010519266129, "step": 105030 }, { "epoch": 29.81549815498155, "grad_norm": 2.4495084285736084, "learning_rate": 7.019699120068124e-05, "loss": 0.003066035732626915, "step": 105040 }, { "epoch": 29.81833664490491, "grad_norm": 0.2349637746810913, "learning_rate": 7.019415271075787e-05, "loss": 0.004753347486257553, "step": 105050 }, { "epoch": 29.82117513482827, "grad_norm": 0.203217551112175, "learning_rate": 7.019131422083451e-05, "loss": 0.01954064667224884, "step": 105060 }, { "epoch": 29.824013624751633, "grad_norm": 0.42894691228866577, "learning_rate": 7.018847573091117e-05, "loss": 0.006595294177532196, "step": 105070 }, { "epoch": 29.826852114674992, "grad_norm": 0.3220183253288269, "learning_rate": 7.01856372409878e-05, "loss": 0.015787962079048156, "step": 105080 }, { "epoch": 29.829690604598355, "grad_norm": 0.19603605568408966, "learning_rate": 7.018279875106444e-05, "loss": 0.005059588700532913, "step": 105090 }, { "epoch": 29.832529094521714, "grad_norm": 8.971372604370117, "learning_rate": 7.017996026114108e-05, "loss": 0.01972014307975769, "step": 105100 }, { "epoch": 29.835367584445073, "grad_norm": 5.584841251373291, "learning_rate": 7.017712177121772e-05, "loss": 0.0060475192964077, "step": 105110 }, { "epoch": 29.838206074368436, "grad_norm": 5.1389312744140625, "learning_rate": 7.017428328129435e-05, "loss": 0.019716440141201018, "step": 105120 }, { "epoch": 29.841044564291796, "grad_norm": 0.022648915648460388, "learning_rate": 7.0171444791371e-05, "loss": 0.005696957930922508, "step": 105130 }, { "epoch": 29.84388305421516, "grad_norm": 0.8203586339950562, "learning_rate": 7.016860630144764e-05, "loss": 0.009664452821016311, "step": 105140 }, { "epoch": 29.846721544138518, "grad_norm": 12.464323043823242, "learning_rate": 7.016576781152427e-05, "loss": 0.012049168348312378, "step": 105150 }, { "epoch": 29.84956003406188, "grad_norm": 0.6183509230613708, "learning_rate": 7.016292932160091e-05, "loss": 0.002764243632555008, "step": 105160 }, { "epoch": 29.85239852398524, "grad_norm": 1.3188225030899048, "learning_rate": 7.016009083167755e-05, "loss": 0.005904610827565193, "step": 105170 }, { "epoch": 29.8552370139086, "grad_norm": 0.43102845549583435, "learning_rate": 7.015725234175418e-05, "loss": 0.0034043677151203156, "step": 105180 }, { "epoch": 29.858075503831962, "grad_norm": 11.341339111328125, "learning_rate": 7.015441385183082e-05, "loss": 0.008077405393123627, "step": 105190 }, { "epoch": 29.86091399375532, "grad_norm": 0.23254576325416565, "learning_rate": 7.015157536190748e-05, "loss": 0.0031148724257946013, "step": 105200 }, { "epoch": 29.863752483678685, "grad_norm": 0.20268210768699646, "learning_rate": 7.01487368719841e-05, "loss": 0.0047397613525390625, "step": 105210 }, { "epoch": 29.866590973602044, "grad_norm": 2.843921422958374, "learning_rate": 7.014589838206075e-05, "loss": 0.007148700952529907, "step": 105220 }, { "epoch": 29.869429463525403, "grad_norm": 0.02697058767080307, "learning_rate": 7.014305989213739e-05, "loss": 0.0063497394323349, "step": 105230 }, { "epoch": 29.872267953448766, "grad_norm": 5.931494235992432, "learning_rate": 7.014022140221403e-05, "loss": 0.0072091430425643924, "step": 105240 }, { "epoch": 29.875106443372125, "grad_norm": 0.32165834307670593, "learning_rate": 7.013738291229066e-05, "loss": 0.007886459678411483, "step": 105250 }, { "epoch": 29.87794493329549, "grad_norm": 0.39552590250968933, "learning_rate": 7.01345444223673e-05, "loss": 0.005017712712287903, "step": 105260 }, { "epoch": 29.880783423218848, "grad_norm": 5.754908084869385, "learning_rate": 7.013170593244395e-05, "loss": 0.0075731068849563595, "step": 105270 }, { "epoch": 29.883621913142207, "grad_norm": 8.328323364257812, "learning_rate": 7.012886744252058e-05, "loss": 0.004991422593593598, "step": 105280 }, { "epoch": 29.88646040306557, "grad_norm": 0.24464401602745056, "learning_rate": 7.012602895259722e-05, "loss": 0.0009146898984909058, "step": 105290 }, { "epoch": 29.88929889298893, "grad_norm": 9.141841888427734, "learning_rate": 7.012319046267386e-05, "loss": 0.003516557440161705, "step": 105300 }, { "epoch": 29.892137382912292, "grad_norm": 0.036441948264837265, "learning_rate": 7.012035197275049e-05, "loss": 0.009752649068832397, "step": 105310 }, { "epoch": 29.89497587283565, "grad_norm": 0.947446346282959, "learning_rate": 7.011751348282713e-05, "loss": 0.00723826140165329, "step": 105320 }, { "epoch": 29.89781436275901, "grad_norm": 0.28217047452926636, "learning_rate": 7.011467499290379e-05, "loss": 0.000791272521018982, "step": 105330 }, { "epoch": 29.900652852682374, "grad_norm": 0.10848884284496307, "learning_rate": 7.011183650298042e-05, "loss": 0.002096044272184372, "step": 105340 }, { "epoch": 29.903491342605733, "grad_norm": 2.6259958744049072, "learning_rate": 7.010899801305706e-05, "loss": 0.002365650795400143, "step": 105350 }, { "epoch": 29.906329832529096, "grad_norm": 2.041146993637085, "learning_rate": 7.01061595231337e-05, "loss": 0.008135033398866653, "step": 105360 }, { "epoch": 29.909168322452455, "grad_norm": 0.03861755505204201, "learning_rate": 7.010332103321034e-05, "loss": 0.002494787983596325, "step": 105370 }, { "epoch": 29.912006812375814, "grad_norm": 1.6384340524673462, "learning_rate": 7.010048254328697e-05, "loss": 0.005030780285596848, "step": 105380 }, { "epoch": 29.914845302299177, "grad_norm": 0.24984674155712128, "learning_rate": 7.009764405336361e-05, "loss": 0.01204833686351776, "step": 105390 }, { "epoch": 29.917683792222537, "grad_norm": 4.174016952514648, "learning_rate": 7.009508941243259e-05, "loss": 0.01756483465433121, "step": 105400 }, { "epoch": 29.9205222821459, "grad_norm": 8.825915336608887, "learning_rate": 7.009225092250923e-05, "loss": 0.02463042438030243, "step": 105410 }, { "epoch": 29.92336077206926, "grad_norm": 0.05201965570449829, "learning_rate": 7.008941243258587e-05, "loss": 0.0013614773750305175, "step": 105420 }, { "epoch": 29.92619926199262, "grad_norm": 1.1030659675598145, "learning_rate": 7.00865739426625e-05, "loss": 0.014642062783241271, "step": 105430 }, { "epoch": 29.92903775191598, "grad_norm": 1.4973117113113403, "learning_rate": 7.008373545273914e-05, "loss": 0.00880921259522438, "step": 105440 }, { "epoch": 29.93187624183934, "grad_norm": 0.14143489301204681, "learning_rate": 7.00808969628158e-05, "loss": 0.002325163409113884, "step": 105450 }, { "epoch": 29.934714731762703, "grad_norm": 1.4384751319885254, "learning_rate": 7.007805847289242e-05, "loss": 0.010586122423410416, "step": 105460 }, { "epoch": 29.937553221686063, "grad_norm": 4.771899700164795, "learning_rate": 7.007521998296907e-05, "loss": 0.03004520237445831, "step": 105470 }, { "epoch": 29.940391711609422, "grad_norm": 14.201830863952637, "learning_rate": 7.00723814930457e-05, "loss": 0.027274689078330992, "step": 105480 }, { "epoch": 29.943230201532785, "grad_norm": 0.0545487143099308, "learning_rate": 7.006954300312233e-05, "loss": 0.00874134972691536, "step": 105490 }, { "epoch": 29.946068691456144, "grad_norm": 11.436427116394043, "learning_rate": 7.006670451319898e-05, "loss": 0.010111993551254273, "step": 105500 }, { "epoch": 29.946068691456144, "eval_accuracy": 0.976791505055001, "eval_loss": 0.08885300904512405, "eval_runtime": 41.0626, "eval_samples_per_second": 383.001, "eval_steps_per_second": 5.991, "step": 105500 }, { "epoch": 29.948907181379507, "grad_norm": 0.1975284069776535, "learning_rate": 7.006386602327563e-05, "loss": 0.01339842975139618, "step": 105510 }, { "epoch": 29.951745671302866, "grad_norm": 6.9377827644348145, "learning_rate": 7.006102753335226e-05, "loss": 0.008547855913639069, "step": 105520 }, { "epoch": 29.95458416122623, "grad_norm": 0.342695027589798, "learning_rate": 7.00581890434289e-05, "loss": 0.006942776590585708, "step": 105530 }, { "epoch": 29.95742265114959, "grad_norm": 0.6198069453239441, "learning_rate": 7.005535055350554e-05, "loss": 0.014619725942611694, "step": 105540 }, { "epoch": 29.960261141072948, "grad_norm": 0.043149564415216446, "learning_rate": 7.005251206358218e-05, "loss": 0.003975979238748551, "step": 105550 }, { "epoch": 29.96309963099631, "grad_norm": 0.13954222202301025, "learning_rate": 7.004967357365881e-05, "loss": 0.0050769723951816555, "step": 105560 }, { "epoch": 29.96593812091967, "grad_norm": 0.8098927140235901, "learning_rate": 7.004683508373545e-05, "loss": 0.011373057216405868, "step": 105570 }, { "epoch": 29.968776610843033, "grad_norm": 0.08523678779602051, "learning_rate": 7.00439965938121e-05, "loss": 0.01820022165775299, "step": 105580 }, { "epoch": 29.971615100766392, "grad_norm": 7.3693389892578125, "learning_rate": 7.004115810388873e-05, "loss": 0.006942632794380188, "step": 105590 }, { "epoch": 29.974453590689752, "grad_norm": 0.0762842670083046, "learning_rate": 7.003831961396538e-05, "loss": 0.0019969558343291283, "step": 105600 }, { "epoch": 29.977292080613115, "grad_norm": 0.5044325590133667, "learning_rate": 7.003548112404202e-05, "loss": 0.0022515242919325827, "step": 105610 }, { "epoch": 29.980130570536474, "grad_norm": 0.04925083369016647, "learning_rate": 7.003264263411865e-05, "loss": 0.002902640774846077, "step": 105620 }, { "epoch": 29.982969060459837, "grad_norm": 0.2009287029504776, "learning_rate": 7.002980414419529e-05, "loss": 0.001916479505598545, "step": 105630 }, { "epoch": 29.985807550383196, "grad_norm": 0.13478079438209534, "learning_rate": 7.002696565427193e-05, "loss": 0.008181408792734147, "step": 105640 }, { "epoch": 29.988646040306556, "grad_norm": 0.12612465023994446, "learning_rate": 7.002412716434857e-05, "loss": 0.0008441591635346413, "step": 105650 }, { "epoch": 29.99148453022992, "grad_norm": 0.1700642853975296, "learning_rate": 7.002128867442521e-05, "loss": 0.0060045354068279265, "step": 105660 }, { "epoch": 29.994323020153278, "grad_norm": 8.891749382019043, "learning_rate": 7.001845018450185e-05, "loss": 0.008689849823713302, "step": 105670 }, { "epoch": 29.99716151007664, "grad_norm": 10.88906478881836, "learning_rate": 7.00156116945785e-05, "loss": 0.01955513060092926, "step": 105680 }, { "epoch": 30.0, "grad_norm": 0.024598971009254456, "learning_rate": 7.001277320465512e-05, "loss": 0.002655475027859211, "step": 105690 }, { "epoch": 30.00283848992336, "grad_norm": 0.029611993581056595, "learning_rate": 7.000993471473176e-05, "loss": 0.0014307910576462745, "step": 105700 }, { "epoch": 30.005676979846722, "grad_norm": 0.1840161830186844, "learning_rate": 7.00070962248084e-05, "loss": 0.003181833028793335, "step": 105710 }, { "epoch": 30.00851546977008, "grad_norm": 0.017110604792833328, "learning_rate": 7.000425773488505e-05, "loss": 0.0027003856375813484, "step": 105720 }, { "epoch": 30.011353959693444, "grad_norm": 0.07765510678291321, "learning_rate": 7.000141924496169e-05, "loss": 0.0008284587413072586, "step": 105730 }, { "epoch": 30.014192449616804, "grad_norm": 0.05155980587005615, "learning_rate": 6.999858075503833e-05, "loss": 0.0010170621797442436, "step": 105740 }, { "epoch": 30.017030939540163, "grad_norm": 0.05861717835068703, "learning_rate": 6.999574226511496e-05, "loss": 0.0006217192858457565, "step": 105750 }, { "epoch": 30.019869429463526, "grad_norm": 0.3519217371940613, "learning_rate": 6.99929037751916e-05, "loss": 0.004423163831233978, "step": 105760 }, { "epoch": 30.022707919386885, "grad_norm": 0.17510278522968292, "learning_rate": 6.999006528526824e-05, "loss": 0.0034424055367708204, "step": 105770 }, { "epoch": 30.025546409310248, "grad_norm": 0.6225839853286743, "learning_rate": 6.998722679534488e-05, "loss": 0.0006158031523227691, "step": 105780 }, { "epoch": 30.028384899233608, "grad_norm": 0.05688975378870964, "learning_rate": 6.998438830542152e-05, "loss": 0.004506301134824753, "step": 105790 }, { "epoch": 30.031223389156967, "grad_norm": 10.071413040161133, "learning_rate": 6.998154981549816e-05, "loss": 0.019865140318870544, "step": 105800 }, { "epoch": 30.03406187908033, "grad_norm": 2.560915946960449, "learning_rate": 6.997871132557479e-05, "loss": 0.0055978305637836455, "step": 105810 }, { "epoch": 30.03690036900369, "grad_norm": 9.69041633605957, "learning_rate": 6.997587283565143e-05, "loss": 0.0087028369307518, "step": 105820 }, { "epoch": 30.039738858927052, "grad_norm": 0.03961024060845375, "learning_rate": 6.997303434572807e-05, "loss": 0.006828199326992035, "step": 105830 }, { "epoch": 30.04257734885041, "grad_norm": 1.7301018238067627, "learning_rate": 6.997019585580471e-05, "loss": 0.0022797010838985442, "step": 105840 }, { "epoch": 30.04541583877377, "grad_norm": 10.722440719604492, "learning_rate": 6.996735736588136e-05, "loss": 0.0208003968000412, "step": 105850 }, { "epoch": 30.048254328697134, "grad_norm": 0.010339085012674332, "learning_rate": 6.9964518875958e-05, "loss": 0.017690154910087585, "step": 105860 }, { "epoch": 30.051092818620493, "grad_norm": 0.5591611266136169, "learning_rate": 6.996168038603464e-05, "loss": 0.00461575947701931, "step": 105870 }, { "epoch": 30.053931308543856, "grad_norm": 2.772139549255371, "learning_rate": 6.995884189611127e-05, "loss": 0.00340351015329361, "step": 105880 }, { "epoch": 30.056769798467215, "grad_norm": 0.03154519945383072, "learning_rate": 6.995600340618791e-05, "loss": 0.0028292395174503325, "step": 105890 }, { "epoch": 30.059608288390578, "grad_norm": 0.09353585541248322, "learning_rate": 6.995316491626455e-05, "loss": 0.005626800656318665, "step": 105900 }, { "epoch": 30.062446778313937, "grad_norm": 0.06821340322494507, "learning_rate": 6.995032642634119e-05, "loss": 0.007695724070072174, "step": 105910 }, { "epoch": 30.065285268237297, "grad_norm": 0.033598411828279495, "learning_rate": 6.994748793641783e-05, "loss": 0.014626097679138184, "step": 105920 }, { "epoch": 30.06812375816066, "grad_norm": 0.041249629110097885, "learning_rate": 6.994464944649447e-05, "loss": 0.00434776246547699, "step": 105930 }, { "epoch": 30.07096224808402, "grad_norm": 1.3313554525375366, "learning_rate": 6.99418109565711e-05, "loss": 0.0036745384335517884, "step": 105940 }, { "epoch": 30.07380073800738, "grad_norm": 0.23842979967594147, "learning_rate": 6.993897246664774e-05, "loss": 0.005667246133089066, "step": 105950 }, { "epoch": 30.07663922793074, "grad_norm": 0.039151862263679504, "learning_rate": 6.993613397672438e-05, "loss": 0.0026280654594302176, "step": 105960 }, { "epoch": 30.0794777178541, "grad_norm": 0.034230899065732956, "learning_rate": 6.993329548680103e-05, "loss": 0.0005789715796709061, "step": 105970 }, { "epoch": 30.082316207777463, "grad_norm": 0.26601526141166687, "learning_rate": 6.993045699687767e-05, "loss": 0.004922967404127121, "step": 105980 }, { "epoch": 30.085154697700823, "grad_norm": 0.01713525503873825, "learning_rate": 6.992761850695431e-05, "loss": 0.007632723450660706, "step": 105990 }, { "epoch": 30.087993187624186, "grad_norm": 0.06239868327975273, "learning_rate": 6.992478001703095e-05, "loss": 0.006330890953540802, "step": 106000 }, { "epoch": 30.087993187624186, "eval_accuracy": 0.9799707509378776, "eval_loss": 0.07588774710893631, "eval_runtime": 50.0427, "eval_samples_per_second": 314.271, "eval_steps_per_second": 4.916, "step": 106000 }, { "epoch": 30.090831677547545, "grad_norm": 0.05150999128818512, "learning_rate": 6.992194152710758e-05, "loss": 0.003563813865184784, "step": 106010 }, { "epoch": 30.093670167470904, "grad_norm": 0.008722471073269844, "learning_rate": 6.991910303718422e-05, "loss": 0.002305770292878151, "step": 106020 }, { "epoch": 30.096508657394267, "grad_norm": 0.11602462828159332, "learning_rate": 6.991626454726086e-05, "loss": 0.0009524635970592499, "step": 106030 }, { "epoch": 30.099347147317626, "grad_norm": 1.495321273803711, "learning_rate": 6.991342605733749e-05, "loss": 0.019066745042800905, "step": 106040 }, { "epoch": 30.10218563724099, "grad_norm": 0.24989311397075653, "learning_rate": 6.991058756741414e-05, "loss": 0.005828274786472321, "step": 106050 }, { "epoch": 30.10502412716435, "grad_norm": 1.0468835830688477, "learning_rate": 6.990774907749078e-05, "loss": 0.002725880593061447, "step": 106060 }, { "epoch": 30.107862617087708, "grad_norm": 0.28454041481018066, "learning_rate": 6.990491058756741e-05, "loss": 0.0010823497548699379, "step": 106070 }, { "epoch": 30.11070110701107, "grad_norm": 0.5292192697525024, "learning_rate": 6.990207209764405e-05, "loss": 0.006508550047874451, "step": 106080 }, { "epoch": 30.11353959693443, "grad_norm": 0.08209606260061264, "learning_rate": 6.98992336077207e-05, "loss": 0.006531278789043427, "step": 106090 }, { "epoch": 30.116378086857793, "grad_norm": 9.441426277160645, "learning_rate": 6.989639511779734e-05, "loss": 0.006971447914838791, "step": 106100 }, { "epoch": 30.119216576781152, "grad_norm": 0.7987284064292908, "learning_rate": 6.989355662787398e-05, "loss": 0.0037715066224336623, "step": 106110 }, { "epoch": 30.12205506670451, "grad_norm": 0.044932279735803604, "learning_rate": 6.989071813795062e-05, "loss": 0.01008867397904396, "step": 106120 }, { "epoch": 30.124893556627875, "grad_norm": 9.828948974609375, "learning_rate": 6.988787964802726e-05, "loss": 0.0032803975045681, "step": 106130 }, { "epoch": 30.127732046551234, "grad_norm": 7.225677967071533, "learning_rate": 6.988504115810389e-05, "loss": 0.008944135904312134, "step": 106140 }, { "epoch": 30.130570536474597, "grad_norm": 0.19644777476787567, "learning_rate": 6.988220266818053e-05, "loss": 0.009919747710227966, "step": 106150 }, { "epoch": 30.133409026397956, "grad_norm": 0.20084965229034424, "learning_rate": 6.987936417825717e-05, "loss": 0.008240371197462081, "step": 106160 }, { "epoch": 30.136247516321315, "grad_norm": 0.06719125807285309, "learning_rate": 6.98765256883338e-05, "loss": 0.008950628340244293, "step": 106170 }, { "epoch": 30.13908600624468, "grad_norm": 0.058082763105630875, "learning_rate": 6.987368719841045e-05, "loss": 0.004798870533704758, "step": 106180 }, { "epoch": 30.141924496168038, "grad_norm": 1.0668498277664185, "learning_rate": 6.98708487084871e-05, "loss": 0.023797670006752016, "step": 106190 }, { "epoch": 30.1447629860914, "grad_norm": 1.0734931230545044, "learning_rate": 6.986801021856372e-05, "loss": 0.005035821348428726, "step": 106200 }, { "epoch": 30.14760147601476, "grad_norm": 1.7851450443267822, "learning_rate": 6.986517172864036e-05, "loss": 0.006329485774040222, "step": 106210 }, { "epoch": 30.15043996593812, "grad_norm": 2.5314440727233887, "learning_rate": 6.9862333238717e-05, "loss": 0.005723815038800239, "step": 106220 }, { "epoch": 30.153278455861482, "grad_norm": 0.09133248031139374, "learning_rate": 6.985949474879365e-05, "loss": 0.008039522916078568, "step": 106230 }, { "epoch": 30.15611694578484, "grad_norm": 1.122937798500061, "learning_rate": 6.985665625887027e-05, "loss": 0.0039427332580089566, "step": 106240 }, { "epoch": 30.158955435708204, "grad_norm": 0.2583063244819641, "learning_rate": 6.985381776894693e-05, "loss": 0.010951116681098938, "step": 106250 }, { "epoch": 30.161793925631564, "grad_norm": 12.267629623413086, "learning_rate": 6.985097927902357e-05, "loss": 0.012059054523706435, "step": 106260 }, { "epoch": 30.164632415554923, "grad_norm": 0.5131176114082336, "learning_rate": 6.98481407891002e-05, "loss": 0.006644503772258758, "step": 106270 }, { "epoch": 30.167470905478286, "grad_norm": 0.13666339218616486, "learning_rate": 6.984530229917684e-05, "loss": 0.010691936314105987, "step": 106280 }, { "epoch": 30.170309395401645, "grad_norm": 0.11236665397882462, "learning_rate": 6.984246380925348e-05, "loss": 0.0038580071181058885, "step": 106290 }, { "epoch": 30.173147885325008, "grad_norm": 0.21959036588668823, "learning_rate": 6.983962531933011e-05, "loss": 0.0016616426408290863, "step": 106300 }, { "epoch": 30.175986375248367, "grad_norm": 0.30282488465309143, "learning_rate": 6.983678682940676e-05, "loss": 0.01221366748213768, "step": 106310 }, { "epoch": 30.17882486517173, "grad_norm": 0.9510934948921204, "learning_rate": 6.98339483394834e-05, "loss": 0.01394842267036438, "step": 106320 }, { "epoch": 30.18166335509509, "grad_norm": 0.021009910851716995, "learning_rate": 6.983110984956003e-05, "loss": 0.0011046014726161957, "step": 106330 }, { "epoch": 30.18450184501845, "grad_norm": 0.017298022285103798, "learning_rate": 6.982827135963668e-05, "loss": 0.010340652614831924, "step": 106340 }, { "epoch": 30.187340334941812, "grad_norm": 8.820735931396484, "learning_rate": 6.982543286971332e-05, "loss": 0.0024993667379021645, "step": 106350 }, { "epoch": 30.19017882486517, "grad_norm": 0.0782211497426033, "learning_rate": 6.982259437978996e-05, "loss": 0.004154453054070473, "step": 106360 }, { "epoch": 30.193017314788534, "grad_norm": 3.808377265930176, "learning_rate": 6.981975588986659e-05, "loss": 0.003195016086101532, "step": 106370 }, { "epoch": 30.195855804711893, "grad_norm": 0.13907785713672638, "learning_rate": 6.981691739994324e-05, "loss": 0.01892424374818802, "step": 106380 }, { "epoch": 30.198694294635253, "grad_norm": 11.289539337158203, "learning_rate": 6.981407891001988e-05, "loss": 0.015257959067821503, "step": 106390 }, { "epoch": 30.201532784558616, "grad_norm": 0.1023612767457962, "learning_rate": 6.981124042009651e-05, "loss": 0.014093858003616334, "step": 106400 }, { "epoch": 30.204371274481975, "grad_norm": 0.007787133567035198, "learning_rate": 6.980840193017315e-05, "loss": 0.0008469542488455772, "step": 106410 }, { "epoch": 30.207209764405338, "grad_norm": 5.542105197906494, "learning_rate": 6.980556344024979e-05, "loss": 0.012811873853206635, "step": 106420 }, { "epoch": 30.210048254328697, "grad_norm": 0.021343301981687546, "learning_rate": 6.980272495032642e-05, "loss": 0.00422401875257492, "step": 106430 }, { "epoch": 30.212886744252057, "grad_norm": 2.122499704360962, "learning_rate": 6.979988646040308e-05, "loss": 0.0037278708070516585, "step": 106440 }, { "epoch": 30.21572523417542, "grad_norm": 8.423645973205566, "learning_rate": 6.979704797047972e-05, "loss": 0.012956076860427856, "step": 106450 }, { "epoch": 30.21856372409878, "grad_norm": 1.6086416244506836, "learning_rate": 6.979420948055634e-05, "loss": 0.0014655346050858498, "step": 106460 }, { "epoch": 30.22140221402214, "grad_norm": 1.271066665649414, "learning_rate": 6.979137099063299e-05, "loss": 0.007086391746997833, "step": 106470 }, { "epoch": 30.2242407039455, "grad_norm": 0.8183472752571106, "learning_rate": 6.978853250070963e-05, "loss": 0.02619315981864929, "step": 106480 }, { "epoch": 30.22707919386886, "grad_norm": 0.00751589797437191, "learning_rate": 6.978569401078627e-05, "loss": 0.004820607975125313, "step": 106490 }, { "epoch": 30.229917683792223, "grad_norm": 3.8728318214416504, "learning_rate": 6.97828555208629e-05, "loss": 0.004894532263278961, "step": 106500 }, { "epoch": 30.229917683792223, "eval_accuracy": 0.9774273542315762, "eval_loss": 0.08227147907018661, "eval_runtime": 33.95, "eval_samples_per_second": 463.24, "eval_steps_per_second": 7.246, "step": 106500 }, { "epoch": 30.232756173715583, "grad_norm": 0.3702247738838196, "learning_rate": 6.978001703093955e-05, "loss": 0.0017437173053622245, "step": 106510 }, { "epoch": 30.235594663638945, "grad_norm": 5.486508846282959, "learning_rate": 6.977717854101619e-05, "loss": 0.010826426744461059, "step": 106520 }, { "epoch": 30.238433153562305, "grad_norm": 0.204158216714859, "learning_rate": 6.977434005109282e-05, "loss": 0.0016418471932411194, "step": 106530 }, { "epoch": 30.241271643485664, "grad_norm": 0.05974847823381424, "learning_rate": 6.977150156116946e-05, "loss": 0.009307356178760528, "step": 106540 }, { "epoch": 30.244110133409027, "grad_norm": 0.17712222039699554, "learning_rate": 6.97686630712461e-05, "loss": 0.006134046614170075, "step": 106550 }, { "epoch": 30.246948623332386, "grad_norm": 3.2121641635894775, "learning_rate": 6.976582458132273e-05, "loss": 0.0069960102438926695, "step": 106560 }, { "epoch": 30.24978711325575, "grad_norm": 0.5961222648620605, "learning_rate": 6.976298609139937e-05, "loss": 0.004086894541978836, "step": 106570 }, { "epoch": 30.25262560317911, "grad_norm": 0.4347838759422302, "learning_rate": 6.976014760147603e-05, "loss": 0.0037392821162939073, "step": 106580 }, { "epoch": 30.255464093102468, "grad_norm": 0.1365523785352707, "learning_rate": 6.975730911155266e-05, "loss": 0.0019605264067649843, "step": 106590 }, { "epoch": 30.25830258302583, "grad_norm": 0.15986816585063934, "learning_rate": 6.97544706216293e-05, "loss": 0.0004956463351845741, "step": 106600 }, { "epoch": 30.26114107294919, "grad_norm": 0.4919562041759491, "learning_rate": 6.975163213170594e-05, "loss": 0.0010944755747914314, "step": 106610 }, { "epoch": 30.263979562872553, "grad_norm": 0.008542114868760109, "learning_rate": 6.974879364178258e-05, "loss": 0.002453404292464256, "step": 106620 }, { "epoch": 30.266818052795912, "grad_norm": 0.3675309717655182, "learning_rate": 6.974595515185921e-05, "loss": 0.011640360951423645, "step": 106630 }, { "epoch": 30.26965654271927, "grad_norm": 0.5969154238700867, "learning_rate": 6.974311666193586e-05, "loss": 0.0024069031700491906, "step": 106640 }, { "epoch": 30.272495032642635, "grad_norm": 3.987888813018799, "learning_rate": 6.974027817201249e-05, "loss": 0.005454409122467041, "step": 106650 }, { "epoch": 30.275333522565994, "grad_norm": 4.3792033195495605, "learning_rate": 6.973743968208913e-05, "loss": 0.007985111325979233, "step": 106660 }, { "epoch": 30.278172012489357, "grad_norm": 0.3984444737434387, "learning_rate": 6.973460119216577e-05, "loss": 0.0027647480368614198, "step": 106670 }, { "epoch": 30.281010502412716, "grad_norm": 0.2548582851886749, "learning_rate": 6.973176270224241e-05, "loss": 0.0006993908435106277, "step": 106680 }, { "epoch": 30.283848992336075, "grad_norm": 0.506186842918396, "learning_rate": 6.972892421231904e-05, "loss": 0.0007074801251292229, "step": 106690 }, { "epoch": 30.28668748225944, "grad_norm": 12.808219909667969, "learning_rate": 6.972608572239568e-05, "loss": 0.007655033469200134, "step": 106700 }, { "epoch": 30.289525972182798, "grad_norm": 4.150643348693848, "learning_rate": 6.972324723247234e-05, "loss": 0.007675600051879883, "step": 106710 }, { "epoch": 30.29236446210616, "grad_norm": 1.1586244106292725, "learning_rate": 6.972040874254897e-05, "loss": 0.00559418685734272, "step": 106720 }, { "epoch": 30.29520295202952, "grad_norm": 4.960024356842041, "learning_rate": 6.971757025262561e-05, "loss": 0.014095625281333924, "step": 106730 }, { "epoch": 30.298041441952883, "grad_norm": 0.1537298858165741, "learning_rate": 6.971473176270225e-05, "loss": 0.004706782847642898, "step": 106740 }, { "epoch": 30.300879931876242, "grad_norm": 2.306123971939087, "learning_rate": 6.971189327277888e-05, "loss": 0.007489168643951416, "step": 106750 }, { "epoch": 30.3037184217996, "grad_norm": 0.5289342999458313, "learning_rate": 6.970905478285552e-05, "loss": 0.0025857722386717795, "step": 106760 }, { "epoch": 30.306556911722964, "grad_norm": 0.7038741707801819, "learning_rate": 6.970621629293216e-05, "loss": 0.003010045364499092, "step": 106770 }, { "epoch": 30.309395401646324, "grad_norm": 0.6938719749450684, "learning_rate": 6.97033778030088e-05, "loss": 0.01315535455942154, "step": 106780 }, { "epoch": 30.312233891569687, "grad_norm": 0.8704838156700134, "learning_rate": 6.970053931308544e-05, "loss": 0.01124773770570755, "step": 106790 }, { "epoch": 30.315072381493046, "grad_norm": 3.930603504180908, "learning_rate": 6.969770082316208e-05, "loss": 0.00966494232416153, "step": 106800 }, { "epoch": 30.317910871416405, "grad_norm": 0.025471696630120277, "learning_rate": 6.969486233323872e-05, "loss": 0.008960571885108948, "step": 106810 }, { "epoch": 30.320749361339768, "grad_norm": 2.4394404888153076, "learning_rate": 6.969202384331535e-05, "loss": 0.020686131715774537, "step": 106820 }, { "epoch": 30.323587851263127, "grad_norm": 0.687146782875061, "learning_rate": 6.9689185353392e-05, "loss": 0.0140766903758049, "step": 106830 }, { "epoch": 30.32642634118649, "grad_norm": 7.023665428161621, "learning_rate": 6.968634686346865e-05, "loss": 0.009798501431941987, "step": 106840 }, { "epoch": 30.32926483110985, "grad_norm": 4.326694011688232, "learning_rate": 6.968350837354528e-05, "loss": 0.0037689611315727236, "step": 106850 }, { "epoch": 30.33210332103321, "grad_norm": 15.643959045410156, "learning_rate": 6.968066988362192e-05, "loss": 0.009677016735076904, "step": 106860 }, { "epoch": 30.334941810956572, "grad_norm": 0.46826446056365967, "learning_rate": 6.967783139369856e-05, "loss": 0.0052745997905731205, "step": 106870 }, { "epoch": 30.33778030087993, "grad_norm": 0.33792057633399963, "learning_rate": 6.967499290377519e-05, "loss": 0.01710291802883148, "step": 106880 }, { "epoch": 30.340618790803294, "grad_norm": 6.246099948883057, "learning_rate": 6.967215441385183e-05, "loss": 0.003993334621191025, "step": 106890 }, { "epoch": 30.343457280726653, "grad_norm": 0.043562501668930054, "learning_rate": 6.966931592392847e-05, "loss": 0.00793798640370369, "step": 106900 }, { "epoch": 30.346295770650013, "grad_norm": 0.057571083307266235, "learning_rate": 6.966647743400511e-05, "loss": 0.012824052572250366, "step": 106910 }, { "epoch": 30.349134260573376, "grad_norm": 3.8386728763580322, "learning_rate": 6.966363894408175e-05, "loss": 0.01392972469329834, "step": 106920 }, { "epoch": 30.351972750496735, "grad_norm": 0.13169583678245544, "learning_rate": 6.96608004541584e-05, "loss": 0.004092078655958176, "step": 106930 }, { "epoch": 30.354811240420098, "grad_norm": 0.3890060782432556, "learning_rate": 6.965796196423504e-05, "loss": 0.0008203258737921714, "step": 106940 }, { "epoch": 30.357649730343457, "grad_norm": 0.08006663620471954, "learning_rate": 6.965512347431166e-05, "loss": 0.0021565625444054603, "step": 106950 }, { "epoch": 30.360488220266816, "grad_norm": 6.508913993835449, "learning_rate": 6.96522849843883e-05, "loss": 0.01450411081314087, "step": 106960 }, { "epoch": 30.36332671019018, "grad_norm": 0.4528236985206604, "learning_rate": 6.964944649446495e-05, "loss": 0.0077635422348976135, "step": 106970 }, { "epoch": 30.36616520011354, "grad_norm": 1.6359766721725464, "learning_rate": 6.964660800454159e-05, "loss": 0.0045054048299789425, "step": 106980 }, { "epoch": 30.3690036900369, "grad_norm": 0.1753338873386383, "learning_rate": 6.964376951461823e-05, "loss": 0.0020952258259058, "step": 106990 }, { "epoch": 30.37184217996026, "grad_norm": 8.836193084716797, "learning_rate": 6.964093102469487e-05, "loss": 0.00985419899225235, "step": 107000 }, { "epoch": 30.37184217996026, "eval_accuracy": 0.9793984866789598, "eval_loss": 0.07859137654304504, "eval_runtime": 33.2696, "eval_samples_per_second": 472.714, "eval_steps_per_second": 7.394, "step": 107000 }, { "epoch": 30.37468066988362, "grad_norm": 0.2508452832698822, "learning_rate": 6.96380925347715e-05, "loss": 0.013552029430866242, "step": 107010 }, { "epoch": 30.377519159806983, "grad_norm": 9.532111167907715, "learning_rate": 6.963525404484814e-05, "loss": 0.014036425948143005, "step": 107020 }, { "epoch": 30.380357649730342, "grad_norm": 0.03661639988422394, "learning_rate": 6.963241555492478e-05, "loss": 0.012157070636749267, "step": 107030 }, { "epoch": 30.383196139653705, "grad_norm": 0.2900063395500183, "learning_rate": 6.962957706500142e-05, "loss": 0.0034712061285972597, "step": 107040 }, { "epoch": 30.386034629577065, "grad_norm": 4.048138618469238, "learning_rate": 6.962673857507806e-05, "loss": 0.007642611116170883, "step": 107050 }, { "epoch": 30.388873119500424, "grad_norm": 8.514728546142578, "learning_rate": 6.96239000851547e-05, "loss": 0.012257188558578491, "step": 107060 }, { "epoch": 30.391711609423787, "grad_norm": 1.4823447465896606, "learning_rate": 6.962106159523135e-05, "loss": 0.012845970690250397, "step": 107070 }, { "epoch": 30.394550099347146, "grad_norm": 14.457612991333008, "learning_rate": 6.961822310530797e-05, "loss": 0.013372284173965455, "step": 107080 }, { "epoch": 30.39738858927051, "grad_norm": 0.4343775510787964, "learning_rate": 6.961538461538462e-05, "loss": 0.0017523681744933128, "step": 107090 }, { "epoch": 30.40022707919387, "grad_norm": 1.5974109172821045, "learning_rate": 6.961254612546126e-05, "loss": 0.004767395555973053, "step": 107100 }, { "epoch": 30.40306556911723, "grad_norm": 0.09775742143392563, "learning_rate": 6.96097076355379e-05, "loss": 0.0011869631707668304, "step": 107110 }, { "epoch": 30.40590405904059, "grad_norm": 0.4624710977077484, "learning_rate": 6.960686914561454e-05, "loss": 0.0014107979834079743, "step": 107120 }, { "epoch": 30.40874254896395, "grad_norm": 0.07530441135168076, "learning_rate": 6.960403065569118e-05, "loss": 0.0012116551399230957, "step": 107130 }, { "epoch": 30.411581038887313, "grad_norm": 0.6519750356674194, "learning_rate": 6.960119216576781e-05, "loss": 0.004637713730335236, "step": 107140 }, { "epoch": 30.414419528810672, "grad_norm": 0.5188882350921631, "learning_rate": 6.959835367584445e-05, "loss": 0.0012626087293028832, "step": 107150 }, { "epoch": 30.417258018734035, "grad_norm": 1.5533466339111328, "learning_rate": 6.959551518592109e-05, "loss": 0.002967786230146885, "step": 107160 }, { "epoch": 30.420096508657394, "grad_norm": 0.10911139845848083, "learning_rate": 6.959267669599773e-05, "loss": 0.0014434268698096276, "step": 107170 }, { "epoch": 30.422934998580754, "grad_norm": 0.021685000509023666, "learning_rate": 6.958983820607437e-05, "loss": 0.005216620862483978, "step": 107180 }, { "epoch": 30.425773488504117, "grad_norm": 0.2778770625591278, "learning_rate": 6.958699971615102e-05, "loss": 0.002741062268614769, "step": 107190 }, { "epoch": 30.428611978427476, "grad_norm": 0.3250596225261688, "learning_rate": 6.958416122622766e-05, "loss": 0.0036105319857597353, "step": 107200 }, { "epoch": 30.43145046835084, "grad_norm": 5.79102087020874, "learning_rate": 6.958132273630428e-05, "loss": 0.002946893684566021, "step": 107210 }, { "epoch": 30.434288958274198, "grad_norm": 0.29823559522628784, "learning_rate": 6.957848424638093e-05, "loss": 0.006180089339613915, "step": 107220 }, { "epoch": 30.437127448197558, "grad_norm": 0.06558950990438461, "learning_rate": 6.957564575645757e-05, "loss": 0.0029355233535170557, "step": 107230 }, { "epoch": 30.43996593812092, "grad_norm": 0.38492152094841003, "learning_rate": 6.957280726653421e-05, "loss": 0.009316299855709077, "step": 107240 }, { "epoch": 30.44280442804428, "grad_norm": 4.429741859436035, "learning_rate": 6.956996877661085e-05, "loss": 0.007248310744762421, "step": 107250 }, { "epoch": 30.445642917967643, "grad_norm": 0.29801812767982483, "learning_rate": 6.956713028668749e-05, "loss": 0.0018342763185501098, "step": 107260 }, { "epoch": 30.448481407891002, "grad_norm": 1.049345850944519, "learning_rate": 6.956429179676412e-05, "loss": 0.0023627391085028647, "step": 107270 }, { "epoch": 30.45131989781436, "grad_norm": 0.011576322838664055, "learning_rate": 6.956145330684076e-05, "loss": 0.0025567237287759783, "step": 107280 }, { "epoch": 30.454158387737724, "grad_norm": 4.311575412750244, "learning_rate": 6.95586148169174e-05, "loss": 0.004655152931809426, "step": 107290 }, { "epoch": 30.456996877661084, "grad_norm": 2.9919936656951904, "learning_rate": 6.955577632699404e-05, "loss": 0.013831986486911774, "step": 107300 }, { "epoch": 30.459835367584446, "grad_norm": 0.026728995144367218, "learning_rate": 6.955293783707069e-05, "loss": 0.009760627150535583, "step": 107310 }, { "epoch": 30.462673857507806, "grad_norm": 0.24411021173000336, "learning_rate": 6.955009934714733e-05, "loss": 0.0008522501215338707, "step": 107320 }, { "epoch": 30.465512347431165, "grad_norm": 0.10732957720756531, "learning_rate": 6.954726085722397e-05, "loss": 0.011399731040000916, "step": 107330 }, { "epoch": 30.468350837354528, "grad_norm": 0.07201115787029266, "learning_rate": 6.95444223673006e-05, "loss": 0.006180825084447861, "step": 107340 }, { "epoch": 30.471189327277887, "grad_norm": 1.4205970764160156, "learning_rate": 6.954158387737724e-05, "loss": 0.0016565414145588875, "step": 107350 }, { "epoch": 30.47402781720125, "grad_norm": 0.08344289660453796, "learning_rate": 6.953874538745388e-05, "loss": 0.00695580467581749, "step": 107360 }, { "epoch": 30.47686630712461, "grad_norm": 0.19820131361484528, "learning_rate": 6.95359068975305e-05, "loss": 0.0017263928428292274, "step": 107370 }, { "epoch": 30.47970479704797, "grad_norm": 0.26969489455223083, "learning_rate": 6.953306840760716e-05, "loss": 0.001418069563806057, "step": 107380 }, { "epoch": 30.48254328697133, "grad_norm": 0.24978168308734894, "learning_rate": 6.95302299176838e-05, "loss": 0.002162408083677292, "step": 107390 }, { "epoch": 30.48538177689469, "grad_norm": 0.9687621593475342, "learning_rate": 6.952739142776043e-05, "loss": 0.0037429310381412507, "step": 107400 }, { "epoch": 30.488220266818054, "grad_norm": 0.05145876482129097, "learning_rate": 6.952455293783707e-05, "loss": 0.013428725302219391, "step": 107410 }, { "epoch": 30.491058756741413, "grad_norm": 4.346484661102295, "learning_rate": 6.952171444791371e-05, "loss": 0.009682483971118927, "step": 107420 }, { "epoch": 30.493897246664773, "grad_norm": 9.953840255737305, "learning_rate": 6.951887595799035e-05, "loss": 0.005867573618888855, "step": 107430 }, { "epoch": 30.496735736588136, "grad_norm": 0.020336592569947243, "learning_rate": 6.9516037468067e-05, "loss": 0.023401209712028505, "step": 107440 }, { "epoch": 30.499574226511495, "grad_norm": 2.9002153873443604, "learning_rate": 6.951319897814364e-05, "loss": 0.003905762732028961, "step": 107450 }, { "epoch": 30.502412716434858, "grad_norm": 2.389639377593994, "learning_rate": 6.951036048822028e-05, "loss": 0.006286025792360306, "step": 107460 }, { "epoch": 30.505251206358217, "grad_norm": 0.07482670992612839, "learning_rate": 6.95075219982969e-05, "loss": 0.0032881833612918854, "step": 107470 }, { "epoch": 30.50808969628158, "grad_norm": 1.3567136526107788, "learning_rate": 6.950468350837355e-05, "loss": 0.001485673524439335, "step": 107480 }, { "epoch": 30.51092818620494, "grad_norm": 8.324963569641113, "learning_rate": 6.950184501845019e-05, "loss": 0.015989518165588378, "step": 107490 }, { "epoch": 30.5137666761283, "grad_norm": 3.4397714138031006, "learning_rate": 6.949900652852682e-05, "loss": 0.003710074722766876, "step": 107500 }, { "epoch": 30.5137666761283, "eval_accuracy": 0.9795892414319324, "eval_loss": 0.07525604963302612, "eval_runtime": 37.5096, "eval_samples_per_second": 419.279, "eval_steps_per_second": 6.558, "step": 107500 }, { "epoch": 30.51660516605166, "grad_norm": 0.23641683161258698, "learning_rate": 6.949616803860347e-05, "loss": 0.012397095561027527, "step": 107510 }, { "epoch": 30.51944365597502, "grad_norm": 1.0739985704421997, "learning_rate": 6.949332954868011e-05, "loss": 0.003933754563331604, "step": 107520 }, { "epoch": 30.522282145898384, "grad_norm": 1.3159370422363281, "learning_rate": 6.949049105875674e-05, "loss": 0.011560305953025818, "step": 107530 }, { "epoch": 30.525120635821743, "grad_norm": 0.23531506955623627, "learning_rate": 6.948765256883338e-05, "loss": 0.012322460860013961, "step": 107540 }, { "epoch": 30.527959125745102, "grad_norm": 2.090839385986328, "learning_rate": 6.948481407891002e-05, "loss": 0.0047388307750225065, "step": 107550 }, { "epoch": 30.530797615668465, "grad_norm": 1.0427722930908203, "learning_rate": 6.948197558898667e-05, "loss": 0.0022786540910601618, "step": 107560 }, { "epoch": 30.533636105591825, "grad_norm": 7.226040363311768, "learning_rate": 6.947913709906329e-05, "loss": 0.009245666861534118, "step": 107570 }, { "epoch": 30.536474595515188, "grad_norm": 0.14986011385917664, "learning_rate": 6.947629860913995e-05, "loss": 0.010126468539237977, "step": 107580 }, { "epoch": 30.539313085438547, "grad_norm": 2.2406885623931885, "learning_rate": 6.947346011921658e-05, "loss": 0.0049380861222743985, "step": 107590 }, { "epoch": 30.542151575361906, "grad_norm": 0.11954472213983536, "learning_rate": 6.947062162929322e-05, "loss": 0.017338749766349793, "step": 107600 }, { "epoch": 30.54499006528527, "grad_norm": 0.6688138246536255, "learning_rate": 6.946778313936986e-05, "loss": 0.014412981271743775, "step": 107610 }, { "epoch": 30.54782855520863, "grad_norm": 10.971508979797363, "learning_rate": 6.94649446494465e-05, "loss": 0.012951719760894775, "step": 107620 }, { "epoch": 30.55066704513199, "grad_norm": 0.6616188287734985, "learning_rate": 6.946210615952313e-05, "loss": 0.005254191905260086, "step": 107630 }, { "epoch": 30.55350553505535, "grad_norm": 7.703917503356934, "learning_rate": 6.945926766959978e-05, "loss": 0.002844984270632267, "step": 107640 }, { "epoch": 30.55634402497871, "grad_norm": 0.5119197964668274, "learning_rate": 6.945642917967642e-05, "loss": 0.0044668219983577725, "step": 107650 }, { "epoch": 30.559182514902073, "grad_norm": 6.359673500061035, "learning_rate": 6.945359068975305e-05, "loss": 0.010740150511264802, "step": 107660 }, { "epoch": 30.562021004825432, "grad_norm": 0.072955422103405, "learning_rate": 6.94507521998297e-05, "loss": 0.0020030202344059945, "step": 107670 }, { "epoch": 30.564859494748795, "grad_norm": 8.942790985107422, "learning_rate": 6.944791370990633e-05, "loss": 0.008657935261726379, "step": 107680 }, { "epoch": 30.567697984672154, "grad_norm": 0.18281416594982147, "learning_rate": 6.944507521998296e-05, "loss": 0.005740272998809815, "step": 107690 }, { "epoch": 30.570536474595514, "grad_norm": 0.1502779871225357, "learning_rate": 6.94422367300596e-05, "loss": 0.0024468742311000826, "step": 107700 }, { "epoch": 30.573374964518877, "grad_norm": 3.3667311668395996, "learning_rate": 6.943939824013626e-05, "loss": 0.011748193204402924, "step": 107710 }, { "epoch": 30.576213454442236, "grad_norm": 13.547590255737305, "learning_rate": 6.943655975021289e-05, "loss": 0.0069601103663444516, "step": 107720 }, { "epoch": 30.5790519443656, "grad_norm": 0.25972679257392883, "learning_rate": 6.943372126028953e-05, "loss": 0.005829716846346855, "step": 107730 }, { "epoch": 30.581890434288958, "grad_norm": 0.1592096984386444, "learning_rate": 6.943088277036617e-05, "loss": 0.005496920645236969, "step": 107740 }, { "epoch": 30.584728924212317, "grad_norm": 0.16010285913944244, "learning_rate": 6.942804428044281e-05, "loss": 0.0041168928146362305, "step": 107750 }, { "epoch": 30.58756741413568, "grad_norm": 0.45397934317588806, "learning_rate": 6.942520579051944e-05, "loss": 0.00680830255150795, "step": 107760 }, { "epoch": 30.59040590405904, "grad_norm": 0.3911172151565552, "learning_rate": 6.94223673005961e-05, "loss": 0.005551011115312576, "step": 107770 }, { "epoch": 30.593244393982403, "grad_norm": 0.2747761905193329, "learning_rate": 6.941952881067273e-05, "loss": 0.008719279617071151, "step": 107780 }, { "epoch": 30.596082883905762, "grad_norm": 0.2951708734035492, "learning_rate": 6.941669032074936e-05, "loss": 0.02112719416618347, "step": 107790 }, { "epoch": 30.59892137382912, "grad_norm": 1.813321828842163, "learning_rate": 6.9413851830826e-05, "loss": 0.004771574586629868, "step": 107800 }, { "epoch": 30.601759863752484, "grad_norm": 0.9633409380912781, "learning_rate": 6.941101334090265e-05, "loss": 0.014348353445529937, "step": 107810 }, { "epoch": 30.604598353675843, "grad_norm": 0.8773604035377502, "learning_rate": 6.940817485097927e-05, "loss": 0.0028191011399030684, "step": 107820 }, { "epoch": 30.607436843599206, "grad_norm": 0.3878115117549896, "learning_rate": 6.940533636105591e-05, "loss": 0.0015360703691840172, "step": 107830 }, { "epoch": 30.610275333522566, "grad_norm": 0.2230089157819748, "learning_rate": 6.940249787113257e-05, "loss": 0.026551839709281922, "step": 107840 }, { "epoch": 30.61311382344593, "grad_norm": 0.6443318128585815, "learning_rate": 6.93996593812092e-05, "loss": 0.0034631386399269102, "step": 107850 }, { "epoch": 30.615952313369288, "grad_norm": 0.0493696928024292, "learning_rate": 6.939682089128584e-05, "loss": 0.0061296563595533374, "step": 107860 }, { "epoch": 30.618790803292647, "grad_norm": 1.5160272121429443, "learning_rate": 6.939398240136248e-05, "loss": 0.009260007739067077, "step": 107870 }, { "epoch": 30.62162929321601, "grad_norm": 0.12052606791257858, "learning_rate": 6.939114391143912e-05, "loss": 0.006757406890392304, "step": 107880 }, { "epoch": 30.62446778313937, "grad_norm": 7.316381931304932, "learning_rate": 6.938830542151575e-05, "loss": 0.0047642853111028675, "step": 107890 }, { "epoch": 30.627306273062732, "grad_norm": 0.4488105773925781, "learning_rate": 6.938546693159239e-05, "loss": 0.005969647318124771, "step": 107900 }, { "epoch": 30.63014476298609, "grad_norm": 0.4250623285770416, "learning_rate": 6.938262844166905e-05, "loss": 0.005646659806370735, "step": 107910 }, { "epoch": 30.63298325290945, "grad_norm": 0.03811980038881302, "learning_rate": 6.937978995174567e-05, "loss": 0.0009760810062289238, "step": 107920 }, { "epoch": 30.635821742832814, "grad_norm": 0.031956348568201065, "learning_rate": 6.937695146182231e-05, "loss": 0.021445736289024353, "step": 107930 }, { "epoch": 30.638660232756173, "grad_norm": 0.4700480103492737, "learning_rate": 6.937411297189896e-05, "loss": 0.0015636486932635307, "step": 107940 }, { "epoch": 30.641498722679536, "grad_norm": 0.2748659551143646, "learning_rate": 6.937127448197558e-05, "loss": 0.004171665757894516, "step": 107950 }, { "epoch": 30.644337212602895, "grad_norm": 0.6833020448684692, "learning_rate": 6.936843599205223e-05, "loss": 0.017458462715148927, "step": 107960 }, { "epoch": 30.647175702526255, "grad_norm": 6.472978115081787, "learning_rate": 6.936559750212888e-05, "loss": 0.0031718060374259947, "step": 107970 }, { "epoch": 30.650014192449618, "grad_norm": 0.21620352566242218, "learning_rate": 6.936275901220551e-05, "loss": 0.0024871423840522764, "step": 107980 }, { "epoch": 30.652852682372977, "grad_norm": 0.28120744228363037, "learning_rate": 6.935992052228215e-05, "loss": 0.0027242844924330713, "step": 107990 }, { "epoch": 30.65569117229634, "grad_norm": 0.5924471020698547, "learning_rate": 6.935708203235879e-05, "loss": 0.0007509872317314148, "step": 108000 }, { "epoch": 30.65569117229634, "eval_accuracy": 0.9779996184904941, "eval_loss": 0.07975208014249802, "eval_runtime": 37.3037, "eval_samples_per_second": 421.594, "eval_steps_per_second": 6.595, "step": 108000 }, { "epoch": 30.6585296622197, "grad_norm": 0.018931128084659576, "learning_rate": 6.935424354243543e-05, "loss": 0.0037925288081169127, "step": 108010 }, { "epoch": 30.66136815214306, "grad_norm": 1.5574383735656738, "learning_rate": 6.935140505251206e-05, "loss": 0.0015428559854626656, "step": 108020 }, { "epoch": 30.66420664206642, "grad_norm": 0.03110641799867153, "learning_rate": 6.93485665625887e-05, "loss": 0.0037709534168243407, "step": 108030 }, { "epoch": 30.66704513198978, "grad_norm": 0.9474026560783386, "learning_rate": 6.934572807266536e-05, "loss": 0.006162228062748909, "step": 108040 }, { "epoch": 30.669883621913144, "grad_norm": 8.797964096069336, "learning_rate": 6.934288958274198e-05, "loss": 0.00494425967335701, "step": 108050 }, { "epoch": 30.672722111836503, "grad_norm": 0.10535237193107605, "learning_rate": 6.934005109281863e-05, "loss": 0.0164783775806427, "step": 108060 }, { "epoch": 30.675560601759862, "grad_norm": 1.1595720052719116, "learning_rate": 6.933721260289527e-05, "loss": 0.011751831322908402, "step": 108070 }, { "epoch": 30.678399091683225, "grad_norm": 0.27942121028900146, "learning_rate": 6.93343741129719e-05, "loss": 0.009819858521223069, "step": 108080 }, { "epoch": 30.681237581606585, "grad_norm": 1.435105323791504, "learning_rate": 6.933153562304854e-05, "loss": 0.001850496232509613, "step": 108090 }, { "epoch": 30.684076071529947, "grad_norm": 0.7482427954673767, "learning_rate": 6.932869713312518e-05, "loss": 0.004522161185741424, "step": 108100 }, { "epoch": 30.686914561453307, "grad_norm": 0.6540265679359436, "learning_rate": 6.932585864320182e-05, "loss": 0.004065690562129021, "step": 108110 }, { "epoch": 30.689753051376666, "grad_norm": 0.14624956250190735, "learning_rate": 6.932302015327846e-05, "loss": 0.009493355453014374, "step": 108120 }, { "epoch": 30.69259154130003, "grad_norm": 0.5431278347969055, "learning_rate": 6.93201816633551e-05, "loss": 0.021827049553394318, "step": 108130 }, { "epoch": 30.69543003122339, "grad_norm": 0.06207168847322464, "learning_rate": 6.931734317343174e-05, "loss": 0.0018446702510118485, "step": 108140 }, { "epoch": 30.69826852114675, "grad_norm": 0.9280859231948853, "learning_rate": 6.931450468350837e-05, "loss": 0.002419883199036121, "step": 108150 }, { "epoch": 30.70110701107011, "grad_norm": 0.47097107768058777, "learning_rate": 6.931166619358501e-05, "loss": 0.0022803129628300666, "step": 108160 }, { "epoch": 30.70394550099347, "grad_norm": 0.7749335169792175, "learning_rate": 6.930882770366167e-05, "loss": 0.0118465393781662, "step": 108170 }, { "epoch": 30.706783990916833, "grad_norm": 0.10718490928411484, "learning_rate": 6.93059892137383e-05, "loss": 0.01206122487783432, "step": 108180 }, { "epoch": 30.709622480840192, "grad_norm": 5.714625358581543, "learning_rate": 6.930315072381494e-05, "loss": 0.013557769358158112, "step": 108190 }, { "epoch": 30.712460970763555, "grad_norm": 8.24272632598877, "learning_rate": 6.930031223389158e-05, "loss": 0.014035208523273468, "step": 108200 }, { "epoch": 30.715299460686914, "grad_norm": 0.37876397371292114, "learning_rate": 6.92974737439682e-05, "loss": 0.009621980041265488, "step": 108210 }, { "epoch": 30.718137950610274, "grad_norm": 11.64427375793457, "learning_rate": 6.929463525404485e-05, "loss": 0.026225820183753967, "step": 108220 }, { "epoch": 30.720976440533637, "grad_norm": 1.118370532989502, "learning_rate": 6.929179676412149e-05, "loss": 0.002909292094409466, "step": 108230 }, { "epoch": 30.723814930456996, "grad_norm": 3.020066738128662, "learning_rate": 6.928895827419813e-05, "loss": 0.013750138878822326, "step": 108240 }, { "epoch": 30.72665342038036, "grad_norm": 0.08844844996929169, "learning_rate": 6.928611978427477e-05, "loss": 0.0024586312472820284, "step": 108250 }, { "epoch": 30.729491910303718, "grad_norm": 0.6481117606163025, "learning_rate": 6.928328129435141e-05, "loss": 0.005306126177310943, "step": 108260 }, { "epoch": 30.732330400227077, "grad_norm": 0.06444388628005981, "learning_rate": 6.928044280442805e-05, "loss": 0.008626962453126908, "step": 108270 }, { "epoch": 30.73516889015044, "grad_norm": 0.5858505964279175, "learning_rate": 6.927760431450468e-05, "loss": 0.0011006200686097146, "step": 108280 }, { "epoch": 30.7380073800738, "grad_norm": 0.5385359525680542, "learning_rate": 6.927476582458132e-05, "loss": 0.0018259074538946151, "step": 108290 }, { "epoch": 30.740845869997163, "grad_norm": 0.032164283096790314, "learning_rate": 6.927192733465796e-05, "loss": 0.006496167182922364, "step": 108300 }, { "epoch": 30.743684359920522, "grad_norm": 0.31094130873680115, "learning_rate": 6.92690888447346e-05, "loss": 0.0065031498670578, "step": 108310 }, { "epoch": 30.746522849843885, "grad_norm": 0.006170004140585661, "learning_rate": 6.926625035481125e-05, "loss": 0.0010889772325754165, "step": 108320 }, { "epoch": 30.749361339767244, "grad_norm": 4.115619659423828, "learning_rate": 6.926341186488789e-05, "loss": 0.0013675007969141007, "step": 108330 }, { "epoch": 30.752199829690603, "grad_norm": 0.35531243681907654, "learning_rate": 6.926057337496452e-05, "loss": 0.00294010266661644, "step": 108340 }, { "epoch": 30.755038319613966, "grad_norm": 0.014607315883040428, "learning_rate": 6.925773488504116e-05, "loss": 0.0030873442068696023, "step": 108350 }, { "epoch": 30.757876809537326, "grad_norm": 0.11161608248949051, "learning_rate": 6.92548963951178e-05, "loss": 0.009470687061548234, "step": 108360 }, { "epoch": 30.76071529946069, "grad_norm": 1.2855242490768433, "learning_rate": 6.925205790519444e-05, "loss": 0.005928444117307663, "step": 108370 }, { "epoch": 30.763553789384048, "grad_norm": 8.517634391784668, "learning_rate": 6.924921941527108e-05, "loss": 0.0031967416405677797, "step": 108380 }, { "epoch": 30.766392279307407, "grad_norm": 0.9256747961044312, "learning_rate": 6.924638092534772e-05, "loss": 0.0006654625758528709, "step": 108390 }, { "epoch": 30.76923076923077, "grad_norm": 0.7435775399208069, "learning_rate": 6.924354243542436e-05, "loss": 0.0034090764820575714, "step": 108400 }, { "epoch": 30.77206925915413, "grad_norm": 0.03763056546449661, "learning_rate": 6.924070394550099e-05, "loss": 0.0009192680940032005, "step": 108410 }, { "epoch": 30.774907749077492, "grad_norm": 0.059776708483695984, "learning_rate": 6.923814930456997e-05, "loss": 0.014356769621372223, "step": 108420 }, { "epoch": 30.77774623900085, "grad_norm": 2.246271848678589, "learning_rate": 6.923531081464661e-05, "loss": 0.0035866878926753996, "step": 108430 }, { "epoch": 30.78058472892421, "grad_norm": 0.3374806344509125, "learning_rate": 6.923247232472326e-05, "loss": 0.0031007349491119383, "step": 108440 }, { "epoch": 30.783423218847574, "grad_norm": 9.104666709899902, "learning_rate": 6.922963383479988e-05, "loss": 0.003489021584391594, "step": 108450 }, { "epoch": 30.786261708770933, "grad_norm": 0.23354220390319824, "learning_rate": 6.922679534487652e-05, "loss": 0.00159030444920063, "step": 108460 }, { "epoch": 30.789100198694296, "grad_norm": 0.02888496033847332, "learning_rate": 6.922395685495317e-05, "loss": 0.0006195120513439178, "step": 108470 }, { "epoch": 30.791938688617655, "grad_norm": 3.364622116088867, "learning_rate": 6.922111836502981e-05, "loss": 0.0016037831082940102, "step": 108480 }, { "epoch": 30.794777178541015, "grad_norm": 0.06178366392850876, "learning_rate": 6.921827987510645e-05, "loss": 0.0008469242602586746, "step": 108490 }, { "epoch": 30.797615668464378, "grad_norm": 0.051293011754751205, "learning_rate": 6.921544138518309e-05, "loss": 0.005991822481155396, "step": 108500 }, { "epoch": 30.797615668464378, "eval_accuracy": 0.9801615056908501, "eval_loss": 0.07503755390644073, "eval_runtime": 33.8836, "eval_samples_per_second": 464.148, "eval_steps_per_second": 7.26, "step": 108500 }, { "epoch": 30.800454158387737, "grad_norm": 0.0051955352537333965, "learning_rate": 6.921260289525973e-05, "loss": 0.016278031468391418, "step": 108510 }, { "epoch": 30.8032926483111, "grad_norm": 1.7619706392288208, "learning_rate": 6.920976440533636e-05, "loss": 0.001720694825053215, "step": 108520 }, { "epoch": 30.80613113823446, "grad_norm": 1.2075951099395752, "learning_rate": 6.9206925915413e-05, "loss": 0.002347819693386555, "step": 108530 }, { "epoch": 30.80896962815782, "grad_norm": 5.609406471252441, "learning_rate": 6.920408742548964e-05, "loss": 0.011848895251750946, "step": 108540 }, { "epoch": 30.81180811808118, "grad_norm": 0.6706169247627258, "learning_rate": 6.920124893556628e-05, "loss": 0.004371052235364914, "step": 108550 }, { "epoch": 30.81464660800454, "grad_norm": 3.916738986968994, "learning_rate": 6.919841044564292e-05, "loss": 0.004040157794952393, "step": 108560 }, { "epoch": 30.817485097927904, "grad_norm": 2.787436008453369, "learning_rate": 6.919557195571957e-05, "loss": 0.006536125391721726, "step": 108570 }, { "epoch": 30.820323587851263, "grad_norm": 0.20616716146469116, "learning_rate": 6.91927334657962e-05, "loss": 0.01685407608747482, "step": 108580 }, { "epoch": 30.823162077774622, "grad_norm": 0.5529012680053711, "learning_rate": 6.918989497587284e-05, "loss": 0.011098890006542206, "step": 108590 }, { "epoch": 30.826000567697985, "grad_norm": 0.25611555576324463, "learning_rate": 6.918705648594948e-05, "loss": 0.011644576489925385, "step": 108600 }, { "epoch": 30.828839057621344, "grad_norm": 0.6763485670089722, "learning_rate": 6.918421799602612e-05, "loss": 0.0018392238765954972, "step": 108610 }, { "epoch": 30.831677547544707, "grad_norm": 1.0611716508865356, "learning_rate": 6.918137950610276e-05, "loss": 0.003977135568857193, "step": 108620 }, { "epoch": 30.834516037468067, "grad_norm": 0.1290370225906372, "learning_rate": 6.91785410161794e-05, "loss": 0.004806114733219147, "step": 108630 }, { "epoch": 30.837354527391426, "grad_norm": 0.27851754426956177, "learning_rate": 6.917570252625604e-05, "loss": 0.02119053453207016, "step": 108640 }, { "epoch": 30.84019301731479, "grad_norm": 23.189977645874023, "learning_rate": 6.917286403633267e-05, "loss": 0.01776282340288162, "step": 108650 }, { "epoch": 30.84303150723815, "grad_norm": 4.145711421966553, "learning_rate": 6.917002554640931e-05, "loss": 0.017257942259311675, "step": 108660 }, { "epoch": 30.84586999716151, "grad_norm": 1.3441448211669922, "learning_rate": 6.916718705648595e-05, "loss": 0.007483571022748947, "step": 108670 }, { "epoch": 30.84870848708487, "grad_norm": 1.403166651725769, "learning_rate": 6.916434856656258e-05, "loss": 0.010715021938085555, "step": 108680 }, { "epoch": 30.851546977008233, "grad_norm": 4.665699481964111, "learning_rate": 6.916151007663924e-05, "loss": 0.007256000488996506, "step": 108690 }, { "epoch": 30.854385466931593, "grad_norm": 2.835566997528076, "learning_rate": 6.915867158671588e-05, "loss": 0.013278736174106598, "step": 108700 }, { "epoch": 30.857223956854952, "grad_norm": 0.3482981324195862, "learning_rate": 6.91558330967925e-05, "loss": 0.003210350126028061, "step": 108710 }, { "epoch": 30.860062446778315, "grad_norm": 0.16713804006576538, "learning_rate": 6.915299460686915e-05, "loss": 0.0045403853058815, "step": 108720 }, { "epoch": 30.862900936701674, "grad_norm": 1.4939804077148438, "learning_rate": 6.915015611694579e-05, "loss": 0.0035046562552452087, "step": 108730 }, { "epoch": 30.865739426625037, "grad_norm": 0.843696653842926, "learning_rate": 6.914731762702243e-05, "loss": 0.006421273946762085, "step": 108740 }, { "epoch": 30.868577916548396, "grad_norm": 0.1773897111415863, "learning_rate": 6.914447913709907e-05, "loss": 0.012150131165981293, "step": 108750 }, { "epoch": 30.871416406471756, "grad_norm": 13.401080131530762, "learning_rate": 6.914164064717571e-05, "loss": 0.011283870786428452, "step": 108760 }, { "epoch": 30.87425489639512, "grad_norm": 0.1270187646150589, "learning_rate": 6.913880215725235e-05, "loss": 0.009788621962070466, "step": 108770 }, { "epoch": 30.877093386318478, "grad_norm": 3.2920923233032227, "learning_rate": 6.913596366732898e-05, "loss": 0.012835235893726349, "step": 108780 }, { "epoch": 30.87993187624184, "grad_norm": 1.2606042623519897, "learning_rate": 6.913312517740562e-05, "loss": 0.021090054512023927, "step": 108790 }, { "epoch": 30.8827703661652, "grad_norm": 0.21886342763900757, "learning_rate": 6.913028668748226e-05, "loss": 0.006083443015813828, "step": 108800 }, { "epoch": 30.88560885608856, "grad_norm": 3.2993578910827637, "learning_rate": 6.912744819755889e-05, "loss": 0.0035469137132167817, "step": 108810 }, { "epoch": 30.888447346011922, "grad_norm": 0.768591046333313, "learning_rate": 6.912460970763555e-05, "loss": 0.00258859358727932, "step": 108820 }, { "epoch": 30.89128583593528, "grad_norm": 10.090304374694824, "learning_rate": 6.912177121771219e-05, "loss": 0.008553179353475571, "step": 108830 }, { "epoch": 30.894124325858645, "grad_norm": 0.7707882523536682, "learning_rate": 6.911893272778882e-05, "loss": 0.004205245524644852, "step": 108840 }, { "epoch": 30.896962815782004, "grad_norm": 0.3494410216808319, "learning_rate": 6.911609423786546e-05, "loss": 0.003217540681362152, "step": 108850 }, { "epoch": 30.899801305705363, "grad_norm": 0.5055429339408875, "learning_rate": 6.91132557479421e-05, "loss": 0.003849566727876663, "step": 108860 }, { "epoch": 30.902639795628726, "grad_norm": 0.5354936122894287, "learning_rate": 6.911041725801874e-05, "loss": 0.008175268769264221, "step": 108870 }, { "epoch": 30.905478285552086, "grad_norm": 0.6796396970748901, "learning_rate": 6.910757876809537e-05, "loss": 0.0018556466326117515, "step": 108880 }, { "epoch": 30.90831677547545, "grad_norm": 0.1501392126083374, "learning_rate": 6.910474027817202e-05, "loss": 0.0005577052012085915, "step": 108890 }, { "epoch": 30.911155265398808, "grad_norm": 13.566251754760742, "learning_rate": 6.910190178824866e-05, "loss": 0.017585664987564087, "step": 108900 }, { "epoch": 30.913993755322167, "grad_norm": 0.4438706338405609, "learning_rate": 6.909906329832529e-05, "loss": 0.009445513784885406, "step": 108910 }, { "epoch": 30.91683224524553, "grad_norm": 2.123636484146118, "learning_rate": 6.909622480840193e-05, "loss": 0.0025211695581674578, "step": 108920 }, { "epoch": 30.91967073516889, "grad_norm": 6.616878509521484, "learning_rate": 6.909338631847857e-05, "loss": 0.006300760060548782, "step": 108930 }, { "epoch": 30.922509225092252, "grad_norm": 0.05496203526854515, "learning_rate": 6.90905478285552e-05, "loss": 0.005288013443350792, "step": 108940 }, { "epoch": 30.92534771501561, "grad_norm": 0.3469291627407074, "learning_rate": 6.908770933863186e-05, "loss": 0.0019156357273459434, "step": 108950 }, { "epoch": 30.92818620493897, "grad_norm": 0.1333954930305481, "learning_rate": 6.90848708487085e-05, "loss": 0.005966836959123612, "step": 108960 }, { "epoch": 30.931024694862334, "grad_norm": 0.046174850314855576, "learning_rate": 6.908203235878513e-05, "loss": 0.0006417496129870415, "step": 108970 }, { "epoch": 30.933863184785693, "grad_norm": 0.13653521239757538, "learning_rate": 6.907919386886177e-05, "loss": 0.00268581323325634, "step": 108980 }, { "epoch": 30.936701674709056, "grad_norm": 0.2843410074710846, "learning_rate": 6.907635537893841e-05, "loss": 0.02254500538110733, "step": 108990 }, { "epoch": 30.939540164632415, "grad_norm": 0.19084322452545166, "learning_rate": 6.907351688901505e-05, "loss": 0.0021833179518580435, "step": 109000 }, { "epoch": 30.939540164632415, "eval_accuracy": 0.9754562217841928, "eval_loss": 0.09870609641075134, "eval_runtime": 37.8596, "eval_samples_per_second": 415.403, "eval_steps_per_second": 6.498, "step": 109000 }, { "epoch": 30.942378654555775, "grad_norm": 9.940494537353516, "learning_rate": 6.907067839909168e-05, "loss": 0.023348306119441987, "step": 109010 }, { "epoch": 30.945217144479138, "grad_norm": 0.40241920948028564, "learning_rate": 6.906783990916833e-05, "loss": 0.00753706693649292, "step": 109020 }, { "epoch": 30.948055634402497, "grad_norm": 0.2005467414855957, "learning_rate": 6.906500141924497e-05, "loss": 0.003384283185005188, "step": 109030 }, { "epoch": 30.95089412432586, "grad_norm": 0.06576107442378998, "learning_rate": 6.90621629293216e-05, "loss": 0.005423664301633835, "step": 109040 }, { "epoch": 30.95373261424922, "grad_norm": 5.1738972663879395, "learning_rate": 6.905932443939824e-05, "loss": 0.002138357795774937, "step": 109050 }, { "epoch": 30.956571104172582, "grad_norm": 0.0725049376487732, "learning_rate": 6.905648594947488e-05, "loss": 0.006853576004505158, "step": 109060 }, { "epoch": 30.95940959409594, "grad_norm": 0.11232155561447144, "learning_rate": 6.905364745955151e-05, "loss": 0.00946674644947052, "step": 109070 }, { "epoch": 30.9622480840193, "grad_norm": 0.11753447353839874, "learning_rate": 6.905080896962815e-05, "loss": 0.004704110324382782, "step": 109080 }, { "epoch": 30.965086573942664, "grad_norm": 4.697356700897217, "learning_rate": 6.904797047970481e-05, "loss": 0.005336402356624604, "step": 109090 }, { "epoch": 30.967925063866023, "grad_norm": 2.4119155406951904, "learning_rate": 6.904513198978144e-05, "loss": 0.0012200405821204186, "step": 109100 }, { "epoch": 30.970763553789386, "grad_norm": 9.856350898742676, "learning_rate": 6.904229349985808e-05, "loss": 0.010966842621564865, "step": 109110 }, { "epoch": 30.973602043712745, "grad_norm": 0.15150731801986694, "learning_rate": 6.903945500993472e-05, "loss": 0.004723504185676575, "step": 109120 }, { "epoch": 30.976440533636104, "grad_norm": 7.058999061584473, "learning_rate": 6.903661652001136e-05, "loss": 0.008852184563875199, "step": 109130 }, { "epoch": 30.979279023559467, "grad_norm": 5.044285774230957, "learning_rate": 6.903377803008799e-05, "loss": 0.007142987102270126, "step": 109140 }, { "epoch": 30.982117513482827, "grad_norm": 5.120532989501953, "learning_rate": 6.903093954016464e-05, "loss": 0.015495629608631134, "step": 109150 }, { "epoch": 30.98495600340619, "grad_norm": 0.5924476385116577, "learning_rate": 6.902810105024129e-05, "loss": 0.013055524230003357, "step": 109160 }, { "epoch": 30.98779449332955, "grad_norm": 1.8624985218048096, "learning_rate": 6.902526256031791e-05, "loss": 0.005524621158838272, "step": 109170 }, { "epoch": 30.990632983252908, "grad_norm": 1.3329607248306274, "learning_rate": 6.902242407039455e-05, "loss": 0.0021551916375756263, "step": 109180 }, { "epoch": 30.99347147317627, "grad_norm": 8.528079986572266, "learning_rate": 6.90195855804712e-05, "loss": 0.004179446771740913, "step": 109190 }, { "epoch": 30.99630996309963, "grad_norm": 0.05008833482861519, "learning_rate": 6.901674709054782e-05, "loss": 0.004381439834833145, "step": 109200 }, { "epoch": 30.999148453022993, "grad_norm": 0.16804540157318115, "learning_rate": 6.901390860062446e-05, "loss": 0.0005899921059608459, "step": 109210 }, { "epoch": 31.001986942946353, "grad_norm": 0.12668229639530182, "learning_rate": 6.901107011070112e-05, "loss": 0.0006059461273252964, "step": 109220 }, { "epoch": 31.004825432869712, "grad_norm": 0.061036527156829834, "learning_rate": 6.900823162077775e-05, "loss": 0.002416827902197838, "step": 109230 }, { "epoch": 31.007663922793075, "grad_norm": 0.017848990857601166, "learning_rate": 6.900539313085439e-05, "loss": 0.006847367435693741, "step": 109240 }, { "epoch": 31.010502412716434, "grad_norm": 0.017299039289355278, "learning_rate": 6.900255464093103e-05, "loss": 0.007240422070026398, "step": 109250 }, { "epoch": 31.013340902639797, "grad_norm": 1.0237042903900146, "learning_rate": 6.899971615100767e-05, "loss": 0.011809718608856202, "step": 109260 }, { "epoch": 31.016179392563156, "grad_norm": 2.9141194820404053, "learning_rate": 6.89968776610843e-05, "loss": 0.004341079294681549, "step": 109270 }, { "epoch": 31.019017882486516, "grad_norm": 0.10049811750650406, "learning_rate": 6.899403917116094e-05, "loss": 0.003938692063093186, "step": 109280 }, { "epoch": 31.02185637240988, "grad_norm": 1.697452187538147, "learning_rate": 6.899120068123758e-05, "loss": 0.0024065757170319557, "step": 109290 }, { "epoch": 31.024694862333238, "grad_norm": 0.38384759426116943, "learning_rate": 6.898836219131422e-05, "loss": 0.011996664851903916, "step": 109300 }, { "epoch": 31.0275333522566, "grad_norm": 0.47233307361602783, "learning_rate": 6.898552370139086e-05, "loss": 0.00401681661605835, "step": 109310 }, { "epoch": 31.03037184217996, "grad_norm": 0.16332842409610748, "learning_rate": 6.89826852114675e-05, "loss": 0.0037011750042438506, "step": 109320 }, { "epoch": 31.03321033210332, "grad_norm": 0.2671343684196472, "learning_rate": 6.897984672154413e-05, "loss": 0.010512886941432953, "step": 109330 }, { "epoch": 31.036048822026682, "grad_norm": 0.04335208237171173, "learning_rate": 6.897700823162078e-05, "loss": 0.0021683948114514353, "step": 109340 }, { "epoch": 31.03888731195004, "grad_norm": 3.3529248237609863, "learning_rate": 6.897416974169743e-05, "loss": 0.0018401958048343658, "step": 109350 }, { "epoch": 31.041725801873405, "grad_norm": 0.4839143455028534, "learning_rate": 6.897133125177406e-05, "loss": 0.0034644387662410734, "step": 109360 }, { "epoch": 31.044564291796764, "grad_norm": 0.02560669369995594, "learning_rate": 6.89684927618507e-05, "loss": 0.02387857884168625, "step": 109370 }, { "epoch": 31.047402781720123, "grad_norm": 10.526697158813477, "learning_rate": 6.896565427192734e-05, "loss": 0.004608814418315887, "step": 109380 }, { "epoch": 31.050241271643486, "grad_norm": 7.562223434448242, "learning_rate": 6.896281578200397e-05, "loss": 0.005469638854265213, "step": 109390 }, { "epoch": 31.053079761566845, "grad_norm": 0.5527409911155701, "learning_rate": 6.895997729208061e-05, "loss": 0.002963198907673359, "step": 109400 }, { "epoch": 31.05591825149021, "grad_norm": 12.267217636108398, "learning_rate": 6.895713880215725e-05, "loss": 0.01034160926938057, "step": 109410 }, { "epoch": 31.058756741413568, "grad_norm": 0.012510541826486588, "learning_rate": 6.895430031223389e-05, "loss": 0.005216305702924728, "step": 109420 }, { "epoch": 31.061595231336927, "grad_norm": 0.07905686646699905, "learning_rate": 6.895146182231053e-05, "loss": 0.004976677149534226, "step": 109430 }, { "epoch": 31.06443372126029, "grad_norm": 0.1799657940864563, "learning_rate": 6.894862333238718e-05, "loss": 0.003760189563035965, "step": 109440 }, { "epoch": 31.06727221118365, "grad_norm": 2.031839609146118, "learning_rate": 6.894578484246382e-05, "loss": 0.02107417434453964, "step": 109450 }, { "epoch": 31.070110701107012, "grad_norm": 0.32211583852767944, "learning_rate": 6.894294635254044e-05, "loss": 0.010341110825538635, "step": 109460 }, { "epoch": 31.07294919103037, "grad_norm": 9.915963172912598, "learning_rate": 6.894010786261709e-05, "loss": 0.005789018422365189, "step": 109470 }, { "epoch": 31.075787680953734, "grad_norm": 0.3127898871898651, "learning_rate": 6.893726937269374e-05, "loss": 0.009811395406723022, "step": 109480 }, { "epoch": 31.078626170877094, "grad_norm": 13.452810287475586, "learning_rate": 6.893443088277037e-05, "loss": 0.01691042184829712, "step": 109490 }, { "epoch": 31.081464660800453, "grad_norm": 11.313800811767578, "learning_rate": 6.893159239284701e-05, "loss": 0.028371462225914003, "step": 109500 }, { "epoch": 31.081464660800453, "eval_accuracy": 0.9641381064411522, "eval_loss": 0.1513548344373703, "eval_runtime": 35.7388, "eval_samples_per_second": 440.054, "eval_steps_per_second": 6.883, "step": 109500 }, { "epoch": 31.084303150723816, "grad_norm": 0.2988686263561249, "learning_rate": 6.892875390292365e-05, "loss": 0.021642419695854186, "step": 109510 }, { "epoch": 31.087141640647175, "grad_norm": 0.046858225017786026, "learning_rate": 6.892591541300028e-05, "loss": 0.008884438127279282, "step": 109520 }, { "epoch": 31.089980130570538, "grad_norm": 0.2563813328742981, "learning_rate": 6.892307692307692e-05, "loss": 0.0060292273759841915, "step": 109530 }, { "epoch": 31.092818620493897, "grad_norm": 4.315605640411377, "learning_rate": 6.892023843315356e-05, "loss": 0.008783783018589019, "step": 109540 }, { "epoch": 31.095657110417257, "grad_norm": 0.12882687151432037, "learning_rate": 6.89173999432302e-05, "loss": 0.008572359383106232, "step": 109550 }, { "epoch": 31.09849560034062, "grad_norm": 5.575016498565674, "learning_rate": 6.891456145330685e-05, "loss": 0.006530748307704925, "step": 109560 }, { "epoch": 31.10133409026398, "grad_norm": 2.4421069622039795, "learning_rate": 6.891172296338349e-05, "loss": 0.018341691792011262, "step": 109570 }, { "epoch": 31.104172580187342, "grad_norm": 1.7246856689453125, "learning_rate": 6.890888447346013e-05, "loss": 0.02073686271905899, "step": 109580 }, { "epoch": 31.1070110701107, "grad_norm": 0.04072080925107002, "learning_rate": 6.890604598353676e-05, "loss": 0.007897714525461197, "step": 109590 }, { "epoch": 31.10984956003406, "grad_norm": 0.0277627632021904, "learning_rate": 6.89032074936134e-05, "loss": 0.004130616411566734, "step": 109600 }, { "epoch": 31.112688049957423, "grad_norm": 0.35136282444000244, "learning_rate": 6.890036900369004e-05, "loss": 0.015590246021747588, "step": 109610 }, { "epoch": 31.115526539880783, "grad_norm": 4.696053981781006, "learning_rate": 6.889753051376668e-05, "loss": 0.024378713965415955, "step": 109620 }, { "epoch": 31.118365029804146, "grad_norm": 11.01807689666748, "learning_rate": 6.889469202384332e-05, "loss": 0.010085536539554596, "step": 109630 }, { "epoch": 31.121203519727505, "grad_norm": 0.03502332791686058, "learning_rate": 6.889185353391996e-05, "loss": 0.004782991856336594, "step": 109640 }, { "epoch": 31.124042009650864, "grad_norm": 0.4681687355041504, "learning_rate": 6.888901504399659e-05, "loss": 0.02481200695037842, "step": 109650 }, { "epoch": 31.126880499574227, "grad_norm": 14.779802322387695, "learning_rate": 6.888617655407323e-05, "loss": 0.013231851160526276, "step": 109660 }, { "epoch": 31.129718989497587, "grad_norm": 1.9270581007003784, "learning_rate": 6.888333806414987e-05, "loss": 0.007096486538648606, "step": 109670 }, { "epoch": 31.13255747942095, "grad_norm": 0.10003042221069336, "learning_rate": 6.888049957422651e-05, "loss": 0.0019540801644325257, "step": 109680 }, { "epoch": 31.13539596934431, "grad_norm": 0.02300691418349743, "learning_rate": 6.887766108430316e-05, "loss": 0.016792482137680052, "step": 109690 }, { "epoch": 31.138234459267668, "grad_norm": 1.0505192279815674, "learning_rate": 6.88748225943798e-05, "loss": 0.018463405966758727, "step": 109700 }, { "epoch": 31.14107294919103, "grad_norm": 7.424200057983398, "learning_rate": 6.887198410445644e-05, "loss": 0.013392281532287598, "step": 109710 }, { "epoch": 31.14391143911439, "grad_norm": 0.029171336442232132, "learning_rate": 6.886914561453307e-05, "loss": 0.0013604097068309784, "step": 109720 }, { "epoch": 31.146749929037753, "grad_norm": 0.013673871755599976, "learning_rate": 6.886630712460971e-05, "loss": 0.0015854639932513237, "step": 109730 }, { "epoch": 31.149588418961113, "grad_norm": 0.15398047864437103, "learning_rate": 6.886346863468635e-05, "loss": 0.005296848714351654, "step": 109740 }, { "epoch": 31.152426908884472, "grad_norm": 0.2321297824382782, "learning_rate": 6.886063014476299e-05, "loss": 0.0020369529724121095, "step": 109750 }, { "epoch": 31.155265398807835, "grad_norm": 0.1351226270198822, "learning_rate": 6.885779165483963e-05, "loss": 0.023468878865242005, "step": 109760 }, { "epoch": 31.158103888731194, "grad_norm": 0.6900603175163269, "learning_rate": 6.885495316491627e-05, "loss": 0.0009498601779341697, "step": 109770 }, { "epoch": 31.160942378654557, "grad_norm": 0.3155710697174072, "learning_rate": 6.88521146749929e-05, "loss": 0.00245947428047657, "step": 109780 }, { "epoch": 31.163780868577916, "grad_norm": 0.0897998958826065, "learning_rate": 6.884927618506954e-05, "loss": 0.004901527613401413, "step": 109790 }, { "epoch": 31.166619358501276, "grad_norm": 1.327096939086914, "learning_rate": 6.884643769514618e-05, "loss": 0.010461710393428802, "step": 109800 }, { "epoch": 31.16945784842464, "grad_norm": 1.6871525049209595, "learning_rate": 6.884359920522283e-05, "loss": 0.005492468923330307, "step": 109810 }, { "epoch": 31.172296338347998, "grad_norm": 0.2973088324069977, "learning_rate": 6.884076071529947e-05, "loss": 0.013548652827739715, "step": 109820 }, { "epoch": 31.17513482827136, "grad_norm": 0.3699015974998474, "learning_rate": 6.883792222537611e-05, "loss": 0.000653131864964962, "step": 109830 }, { "epoch": 31.17797331819472, "grad_norm": 0.09296751022338867, "learning_rate": 6.883508373545275e-05, "loss": 0.021385486423969268, "step": 109840 }, { "epoch": 31.18081180811808, "grad_norm": 0.08847342431545258, "learning_rate": 6.883224524552938e-05, "loss": 0.0031149575486779215, "step": 109850 }, { "epoch": 31.183650298041442, "grad_norm": 0.04327888414263725, "learning_rate": 6.882940675560602e-05, "loss": 0.000564432330429554, "step": 109860 }, { "epoch": 31.1864887879648, "grad_norm": 0.2731773257255554, "learning_rate": 6.882656826568266e-05, "loss": 0.0018766345456242561, "step": 109870 }, { "epoch": 31.189327277888165, "grad_norm": 0.0649159848690033, "learning_rate": 6.88237297757593e-05, "loss": 0.012916894257068634, "step": 109880 }, { "epoch": 31.192165767811524, "grad_norm": 0.9247746467590332, "learning_rate": 6.882089128583594e-05, "loss": 0.002699973247945309, "step": 109890 }, { "epoch": 31.195004257734887, "grad_norm": 0.4448305070400238, "learning_rate": 6.881805279591258e-05, "loss": 0.0023522069677710533, "step": 109900 }, { "epoch": 31.197842747658246, "grad_norm": 0.5304642915725708, "learning_rate": 6.881521430598921e-05, "loss": 0.0030606821179389953, "step": 109910 }, { "epoch": 31.200681237581605, "grad_norm": 0.3419625759124756, "learning_rate": 6.881237581606585e-05, "loss": 0.013526612520217895, "step": 109920 }, { "epoch": 31.20351972750497, "grad_norm": 0.30689969658851624, "learning_rate": 6.88095373261425e-05, "loss": 0.0023218072950839997, "step": 109930 }, { "epoch": 31.206358217428328, "grad_norm": 9.06937026977539, "learning_rate": 6.880669883621914e-05, "loss": 0.004963326081633568, "step": 109940 }, { "epoch": 31.20919670735169, "grad_norm": 0.3411327600479126, "learning_rate": 6.880386034629578e-05, "loss": 0.003288961946964264, "step": 109950 }, { "epoch": 31.21203519727505, "grad_norm": 0.23771502077579498, "learning_rate": 6.880102185637242e-05, "loss": 0.0068480193614959715, "step": 109960 }, { "epoch": 31.21487368719841, "grad_norm": 11.986871719360352, "learning_rate": 6.879818336644906e-05, "loss": 0.0051754888147115706, "step": 109970 }, { "epoch": 31.217712177121772, "grad_norm": 0.686471700668335, "learning_rate": 6.879534487652569e-05, "loss": 0.004353117197751999, "step": 109980 }, { "epoch": 31.22055066704513, "grad_norm": 0.9910867214202881, "learning_rate": 6.879250638660233e-05, "loss": 0.0016613226383924485, "step": 109990 }, { "epoch": 31.223389156968494, "grad_norm": 0.16537384688854218, "learning_rate": 6.878966789667897e-05, "loss": 0.0010543152689933776, "step": 110000 }, { "epoch": 31.223389156968494, "eval_accuracy": 0.9799707509378776, "eval_loss": 0.07210727035999298, "eval_runtime": 37.8234, "eval_samples_per_second": 415.801, "eval_steps_per_second": 6.504, "step": 110000 }, { "epoch": 31.226227646891854, "grad_norm": 0.026608632877469063, "learning_rate": 6.87868294067556e-05, "loss": 0.0008706633001565933, "step": 110010 }, { "epoch": 31.229066136815213, "grad_norm": 1.0672883987426758, "learning_rate": 6.878399091683225e-05, "loss": 0.001624923199415207, "step": 110020 }, { "epoch": 31.231904626738576, "grad_norm": 0.021938323974609375, "learning_rate": 6.87811524269089e-05, "loss": 0.0004857627674937248, "step": 110030 }, { "epoch": 31.234743116661935, "grad_norm": 0.46502625942230225, "learning_rate": 6.877831393698552e-05, "loss": 0.004696225002408028, "step": 110040 }, { "epoch": 31.237581606585298, "grad_norm": 1.5468593835830688, "learning_rate": 6.877547544706216e-05, "loss": 0.0018336635082960128, "step": 110050 }, { "epoch": 31.240420096508657, "grad_norm": 0.028233686462044716, "learning_rate": 6.87726369571388e-05, "loss": 0.0010898087173700332, "step": 110060 }, { "epoch": 31.243258586432017, "grad_norm": 0.34023168683052063, "learning_rate": 6.876979846721545e-05, "loss": 0.006666634231805801, "step": 110070 }, { "epoch": 31.24609707635538, "grad_norm": 4.9193434715271, "learning_rate": 6.876695997729209e-05, "loss": 0.0033955663442611695, "step": 110080 }, { "epoch": 31.24893556627874, "grad_norm": 3.187549591064453, "learning_rate": 6.876412148736873e-05, "loss": 0.0013257237151265144, "step": 110090 }, { "epoch": 31.251774056202102, "grad_norm": 5.063961029052734, "learning_rate": 6.876128299744537e-05, "loss": 0.0032634936273097993, "step": 110100 }, { "epoch": 31.25461254612546, "grad_norm": 0.11462600529193878, "learning_rate": 6.8758444507522e-05, "loss": 0.005429989844560623, "step": 110110 }, { "epoch": 31.25745103604882, "grad_norm": 0.2462206333875656, "learning_rate": 6.875560601759864e-05, "loss": 0.0011326028034090996, "step": 110120 }, { "epoch": 31.260289525972183, "grad_norm": 0.1082332506775856, "learning_rate": 6.875276752767528e-05, "loss": 0.008225914090871811, "step": 110130 }, { "epoch": 31.263128015895543, "grad_norm": 0.39465633034706116, "learning_rate": 6.874992903775191e-05, "loss": 0.001136239804327488, "step": 110140 }, { "epoch": 31.265966505818906, "grad_norm": 0.047707654535770416, "learning_rate": 6.874709054782856e-05, "loss": 0.0036076001822948458, "step": 110150 }, { "epoch": 31.268804995742265, "grad_norm": 0.07308752834796906, "learning_rate": 6.87442520579052e-05, "loss": 0.0012331156060099602, "step": 110160 }, { "epoch": 31.271643485665624, "grad_norm": 0.18951310217380524, "learning_rate": 6.874141356798183e-05, "loss": 0.004703918471932411, "step": 110170 }, { "epoch": 31.274481975588987, "grad_norm": 3.2542953491210938, "learning_rate": 6.873857507805847e-05, "loss": 0.002170596271753311, "step": 110180 }, { "epoch": 31.277320465512346, "grad_norm": 0.036146629601716995, "learning_rate": 6.873573658813512e-05, "loss": 0.0009807946160435676, "step": 110190 }, { "epoch": 31.28015895543571, "grad_norm": 0.7454248666763306, "learning_rate": 6.873289809821176e-05, "loss": 0.0013190396130084991, "step": 110200 }, { "epoch": 31.28299744535907, "grad_norm": 0.3340536653995514, "learning_rate": 6.873005960828839e-05, "loss": 0.0069990381598472595, "step": 110210 }, { "epoch": 31.285835935282428, "grad_norm": 0.017678160220384598, "learning_rate": 6.872722111836504e-05, "loss": 0.006103499978780747, "step": 110220 }, { "epoch": 31.28867442520579, "grad_norm": 0.004596114624291658, "learning_rate": 6.872438262844167e-05, "loss": 0.0005209960043430328, "step": 110230 }, { "epoch": 31.29151291512915, "grad_norm": 0.37339261174201965, "learning_rate": 6.872154413851831e-05, "loss": 0.002099849097430706, "step": 110240 }, { "epoch": 31.294351405052513, "grad_norm": 1.8850312232971191, "learning_rate": 6.871870564859495e-05, "loss": 0.014542514085769653, "step": 110250 }, { "epoch": 31.297189894975872, "grad_norm": 0.04967155307531357, "learning_rate": 6.871586715867159e-05, "loss": 0.004037985205650329, "step": 110260 }, { "epoch": 31.300028384899235, "grad_norm": 0.06643661111593246, "learning_rate": 6.871302866874822e-05, "loss": 0.0056188274174928665, "step": 110270 }, { "epoch": 31.302866874822595, "grad_norm": 0.2627907395362854, "learning_rate": 6.871019017882488e-05, "loss": 0.008196885138750077, "step": 110280 }, { "epoch": 31.305705364745954, "grad_norm": 5.0574445724487305, "learning_rate": 6.870735168890152e-05, "loss": 0.001587606966495514, "step": 110290 }, { "epoch": 31.308543854669317, "grad_norm": 1.6990537643432617, "learning_rate": 6.870451319897814e-05, "loss": 0.003079248778522015, "step": 110300 }, { "epoch": 31.311382344592676, "grad_norm": 0.016181500628590584, "learning_rate": 6.870167470905479e-05, "loss": 0.0012942334637045861, "step": 110310 }, { "epoch": 31.31422083451604, "grad_norm": 0.054735757410526276, "learning_rate": 6.869883621913143e-05, "loss": 0.0016829561442136764, "step": 110320 }, { "epoch": 31.3170593244394, "grad_norm": 0.07430920749902725, "learning_rate": 6.869599772920805e-05, "loss": 0.0005777206271886826, "step": 110330 }, { "epoch": 31.319897814362758, "grad_norm": 0.0255576241761446, "learning_rate": 6.86931592392847e-05, "loss": 0.007065671682357788, "step": 110340 }, { "epoch": 31.32273630428612, "grad_norm": 1.748060941696167, "learning_rate": 6.869032074936135e-05, "loss": 0.0027185771614313126, "step": 110350 }, { "epoch": 31.32557479420948, "grad_norm": 1.7736647129058838, "learning_rate": 6.868748225943798e-05, "loss": 0.00147741436958313, "step": 110360 }, { "epoch": 31.328413284132843, "grad_norm": 0.5256888270378113, "learning_rate": 6.868464376951462e-05, "loss": 0.0009526925161480904, "step": 110370 }, { "epoch": 31.331251774056202, "grad_norm": 0.04150933772325516, "learning_rate": 6.868180527959126e-05, "loss": 0.0012520572170615196, "step": 110380 }, { "epoch": 31.33409026397956, "grad_norm": 1.259807825088501, "learning_rate": 6.86789667896679e-05, "loss": 0.003445548564195633, "step": 110390 }, { "epoch": 31.336928753902924, "grad_norm": 0.024712737649679184, "learning_rate": 6.867612829974453e-05, "loss": 0.0007450640201568604, "step": 110400 }, { "epoch": 31.339767243826284, "grad_norm": 1.9132165908813477, "learning_rate": 6.867328980982117e-05, "loss": 0.006825102865695954, "step": 110410 }, { "epoch": 31.342605733749647, "grad_norm": 0.2823460102081299, "learning_rate": 6.867045131989783e-05, "loss": 0.016162168979644776, "step": 110420 }, { "epoch": 31.345444223673006, "grad_norm": 0.7884186506271362, "learning_rate": 6.866761282997445e-05, "loss": 0.0035040974617004395, "step": 110430 }, { "epoch": 31.348282713596365, "grad_norm": 0.08391150087118149, "learning_rate": 6.86647743400511e-05, "loss": 0.005183306336402893, "step": 110440 }, { "epoch": 31.351121203519728, "grad_norm": 13.423528671264648, "learning_rate": 6.866193585012774e-05, "loss": 0.03309859037399292, "step": 110450 }, { "epoch": 31.353959693443088, "grad_norm": 12.147981643676758, "learning_rate": 6.865909736020437e-05, "loss": 0.004046395421028137, "step": 110460 }, { "epoch": 31.35679818336645, "grad_norm": 0.05061406269669533, "learning_rate": 6.865625887028101e-05, "loss": 0.005337664857506752, "step": 110470 }, { "epoch": 31.35963667328981, "grad_norm": 0.27121084928512573, "learning_rate": 6.865342038035766e-05, "loss": 0.0018667567521333695, "step": 110480 }, { "epoch": 31.36247516321317, "grad_norm": 0.14181476831436157, "learning_rate": 6.865058189043429e-05, "loss": 0.0031008047983050345, "step": 110490 }, { "epoch": 31.365313653136532, "grad_norm": 0.34339573979377747, "learning_rate": 6.864774340051093e-05, "loss": 0.010095790773630143, "step": 110500 }, { "epoch": 31.365313653136532, "eval_accuracy": 0.9809881096203981, "eval_loss": 0.07121887803077698, "eval_runtime": 34.5838, "eval_samples_per_second": 454.75, "eval_steps_per_second": 7.113, "step": 110500 }, { "epoch": 31.36815214305989, "grad_norm": 0.10148178040981293, "learning_rate": 6.864490491058757e-05, "loss": 0.0010139962658286094, "step": 110510 }, { "epoch": 31.370990632983254, "grad_norm": 0.3538142442703247, "learning_rate": 6.864235026965654e-05, "loss": 0.012743707001209258, "step": 110520 }, { "epoch": 31.373829122906614, "grad_norm": 0.20735348761081696, "learning_rate": 6.86395117797332e-05, "loss": 0.0044346295297145845, "step": 110530 }, { "epoch": 31.376667612829973, "grad_norm": 0.7269258499145508, "learning_rate": 6.863667328980982e-05, "loss": 0.0011805566027760506, "step": 110540 }, { "epoch": 31.379506102753336, "grad_norm": 7.950889587402344, "learning_rate": 6.863383479988646e-05, "loss": 0.004986066371202469, "step": 110550 }, { "epoch": 31.382344592676695, "grad_norm": 2.179025888442993, "learning_rate": 6.86309963099631e-05, "loss": 0.005115795508027076, "step": 110560 }, { "epoch": 31.385183082600058, "grad_norm": 0.1330290138721466, "learning_rate": 6.862815782003975e-05, "loss": 0.003808712586760521, "step": 110570 }, { "epoch": 31.388021572523417, "grad_norm": 0.18411630392074585, "learning_rate": 6.862531933011637e-05, "loss": 0.0019894812256097795, "step": 110580 }, { "epoch": 31.390860062446777, "grad_norm": 0.29706671833992004, "learning_rate": 6.862248084019301e-05, "loss": 0.0060018360614776615, "step": 110590 }, { "epoch": 31.39369855237014, "grad_norm": 1.4245715141296387, "learning_rate": 6.861964235026967e-05, "loss": 0.00525846965610981, "step": 110600 }, { "epoch": 31.3965370422935, "grad_norm": 4.389762878417969, "learning_rate": 6.86168038603463e-05, "loss": 0.0061382360756397246, "step": 110610 }, { "epoch": 31.39937553221686, "grad_norm": 0.015724072232842445, "learning_rate": 6.861396537042294e-05, "loss": 0.005449182540178299, "step": 110620 }, { "epoch": 31.40221402214022, "grad_norm": 0.46780869364738464, "learning_rate": 6.861112688049958e-05, "loss": 0.0021314822137355805, "step": 110630 }, { "epoch": 31.405052512063584, "grad_norm": 0.8261268734931946, "learning_rate": 6.860828839057621e-05, "loss": 0.010704531520605087, "step": 110640 }, { "epoch": 31.407891001986943, "grad_norm": 0.7176552414894104, "learning_rate": 6.860544990065285e-05, "loss": 0.012783223390579223, "step": 110650 }, { "epoch": 31.410729491910303, "grad_norm": 0.09593373537063599, "learning_rate": 6.86026114107295e-05, "loss": 0.018745028972625734, "step": 110660 }, { "epoch": 31.413567981833665, "grad_norm": 7.330688953399658, "learning_rate": 6.859977292080613e-05, "loss": 0.014092136919498444, "step": 110670 }, { "epoch": 31.416406471757025, "grad_norm": 0.008313361555337906, "learning_rate": 6.859693443088277e-05, "loss": 0.00201076976954937, "step": 110680 }, { "epoch": 31.419244961680388, "grad_norm": 2.8427164554595947, "learning_rate": 6.859409594095942e-05, "loss": 0.003682321310043335, "step": 110690 }, { "epoch": 31.422083451603747, "grad_norm": 0.8326147198677063, "learning_rate": 6.859125745103606e-05, "loss": 0.005683421716094017, "step": 110700 }, { "epoch": 31.424921941527106, "grad_norm": 4.706357955932617, "learning_rate": 6.858841896111268e-05, "loss": 0.008719277381896973, "step": 110710 }, { "epoch": 31.42776043145047, "grad_norm": 11.62217903137207, "learning_rate": 6.858558047118933e-05, "loss": 0.013668470084667206, "step": 110720 }, { "epoch": 31.43059892137383, "grad_norm": 4.365249156951904, "learning_rate": 6.858274198126598e-05, "loss": 0.021844978630542754, "step": 110730 }, { "epoch": 31.43343741129719, "grad_norm": 7.007303714752197, "learning_rate": 6.857990349134261e-05, "loss": 0.002145897224545479, "step": 110740 }, { "epoch": 31.43627590122055, "grad_norm": 5.930025577545166, "learning_rate": 6.857706500141925e-05, "loss": 0.004048363119363785, "step": 110750 }, { "epoch": 31.43911439114391, "grad_norm": 6.474993705749512, "learning_rate": 6.857422651149589e-05, "loss": 0.009289970993995667, "step": 110760 }, { "epoch": 31.441952881067273, "grad_norm": 0.1553315967321396, "learning_rate": 6.857138802157252e-05, "loss": 0.001924046315252781, "step": 110770 }, { "epoch": 31.444791370990632, "grad_norm": 0.1164921373128891, "learning_rate": 6.856854953164916e-05, "loss": 0.004657508805394173, "step": 110780 }, { "epoch": 31.447629860913995, "grad_norm": 1.5011332035064697, "learning_rate": 6.85657110417258e-05, "loss": 0.014498165249824524, "step": 110790 }, { "epoch": 31.450468350837355, "grad_norm": 1.2398691177368164, "learning_rate": 6.856287255180244e-05, "loss": 0.0019002798944711685, "step": 110800 }, { "epoch": 31.453306840760714, "grad_norm": 0.19809995591640472, "learning_rate": 6.856003406187908e-05, "loss": 0.018311037123203276, "step": 110810 }, { "epoch": 31.456145330684077, "grad_norm": 4.439850807189941, "learning_rate": 6.855719557195573e-05, "loss": 0.0033529069274663924, "step": 110820 }, { "epoch": 31.458983820607436, "grad_norm": 1.9044972658157349, "learning_rate": 6.855435708203237e-05, "loss": 0.01605898141860962, "step": 110830 }, { "epoch": 31.4618223105308, "grad_norm": 0.10623032599687576, "learning_rate": 6.8551518592109e-05, "loss": 0.0032541751861572266, "step": 110840 }, { "epoch": 31.46466080045416, "grad_norm": 0.0980861708521843, "learning_rate": 6.854868010218564e-05, "loss": 0.007955242693424226, "step": 110850 }, { "epoch": 31.467499290377518, "grad_norm": 0.02421102300286293, "learning_rate": 6.854584161226229e-05, "loss": 0.007505740970373154, "step": 110860 }, { "epoch": 31.47033778030088, "grad_norm": 1.3965702056884766, "learning_rate": 6.854300312233892e-05, "loss": 0.006751044094562531, "step": 110870 }, { "epoch": 31.47317627022424, "grad_norm": 0.12384036928415298, "learning_rate": 6.854016463241556e-05, "loss": 0.0022736703976988792, "step": 110880 }, { "epoch": 31.476014760147603, "grad_norm": 11.257307052612305, "learning_rate": 6.85373261424922e-05, "loss": 0.013403666019439698, "step": 110890 }, { "epoch": 31.478853250070962, "grad_norm": 1.5627498626708984, "learning_rate": 6.853448765256883e-05, "loss": 0.0013321209698915482, "step": 110900 }, { "epoch": 31.48169173999432, "grad_norm": 0.9195828437805176, "learning_rate": 6.853164916264547e-05, "loss": 0.0017260583117604256, "step": 110910 }, { "epoch": 31.484530229917684, "grad_norm": 0.36274605989456177, "learning_rate": 6.852881067272211e-05, "loss": 0.00612756498157978, "step": 110920 }, { "epoch": 31.487368719841044, "grad_norm": 0.048454541712999344, "learning_rate": 6.852597218279875e-05, "loss": 0.0058830704540014265, "step": 110930 }, { "epoch": 31.490207209764407, "grad_norm": 8.038537979125977, "learning_rate": 6.85231336928754e-05, "loss": 0.016672386229038237, "step": 110940 }, { "epoch": 31.493045699687766, "grad_norm": 0.041774455457925797, "learning_rate": 6.852029520295204e-05, "loss": 0.0022445790469646453, "step": 110950 }, { "epoch": 31.495884189611125, "grad_norm": 0.7809493541717529, "learning_rate": 6.851745671302868e-05, "loss": 0.0007353324443101883, "step": 110960 }, { "epoch": 31.498722679534488, "grad_norm": 10.30208683013916, "learning_rate": 6.85146182231053e-05, "loss": 0.01774216294288635, "step": 110970 }, { "epoch": 31.501561169457847, "grad_norm": 0.10225125402212143, "learning_rate": 6.851177973318195e-05, "loss": 0.008976614475250244, "step": 110980 }, { "epoch": 31.50439965938121, "grad_norm": 0.45895594358444214, "learning_rate": 6.850894124325859e-05, "loss": 0.01064496859908104, "step": 110990 }, { "epoch": 31.50723814930457, "grad_norm": 0.20798568427562714, "learning_rate": 6.850610275333523e-05, "loss": 0.011443892121315002, "step": 111000 }, { "epoch": 31.50723814930457, "eval_accuracy": 0.9780632034081516, "eval_loss": 0.08346070349216461, "eval_runtime": 37.6006, "eval_samples_per_second": 418.264, "eval_steps_per_second": 6.542, "step": 111000 }, { "epoch": 31.510076639227933, "grad_norm": 0.7893832325935364, "learning_rate": 6.850326426341187e-05, "loss": 0.01516708880662918, "step": 111010 }, { "epoch": 31.512915129151292, "grad_norm": 17.3980770111084, "learning_rate": 6.850042577348851e-05, "loss": 0.014112183451652527, "step": 111020 }, { "epoch": 31.51575361907465, "grad_norm": 2.765277624130249, "learning_rate": 6.849758728356514e-05, "loss": 0.004672259464859963, "step": 111030 }, { "epoch": 31.518592108998014, "grad_norm": 0.12295620143413544, "learning_rate": 6.849474879364178e-05, "loss": 0.0038588516414165496, "step": 111040 }, { "epoch": 31.521430598921373, "grad_norm": 0.2321615070104599, "learning_rate": 6.849191030371842e-05, "loss": 0.005476983264088631, "step": 111050 }, { "epoch": 31.524269088844733, "grad_norm": 0.4660363793373108, "learning_rate": 6.848907181379506e-05, "loss": 0.0035879772156476976, "step": 111060 }, { "epoch": 31.527107578768096, "grad_norm": 0.03201242908835411, "learning_rate": 6.84862333238717e-05, "loss": 0.01611921638250351, "step": 111070 }, { "epoch": 31.529946068691455, "grad_norm": 0.163895845413208, "learning_rate": 6.848339483394835e-05, "loss": 0.004127808660268783, "step": 111080 }, { "epoch": 31.532784558614818, "grad_norm": 1.4810476303100586, "learning_rate": 6.848055634402498e-05, "loss": 0.002919791452586651, "step": 111090 }, { "epoch": 31.535623048538177, "grad_norm": 0.3224228024482727, "learning_rate": 6.847771785410162e-05, "loss": 0.0030068907886743545, "step": 111100 }, { "epoch": 31.53846153846154, "grad_norm": 0.0061888121999800205, "learning_rate": 6.847487936417826e-05, "loss": 0.0038178306072950362, "step": 111110 }, { "epoch": 31.5413000283849, "grad_norm": 0.056714944541454315, "learning_rate": 6.84720408742549e-05, "loss": 0.006853241473436356, "step": 111120 }, { "epoch": 31.54413851830826, "grad_norm": 2.5598835945129395, "learning_rate": 6.846920238433154e-05, "loss": 0.014121437072753906, "step": 111130 }, { "epoch": 31.54697700823162, "grad_norm": 2.542787551879883, "learning_rate": 6.846636389440818e-05, "loss": 0.0140104740858078, "step": 111140 }, { "epoch": 31.54981549815498, "grad_norm": 0.08625393360853195, "learning_rate": 6.846352540448482e-05, "loss": 0.00845133364200592, "step": 111150 }, { "epoch": 31.552653988078344, "grad_norm": 7.459018707275391, "learning_rate": 6.846068691456145e-05, "loss": 0.006429338455200195, "step": 111160 }, { "epoch": 31.555492478001703, "grad_norm": 8.560524940490723, "learning_rate": 6.845784842463809e-05, "loss": 0.011125846952199935, "step": 111170 }, { "epoch": 31.558330967925063, "grad_norm": 0.7408723831176758, "learning_rate": 6.845500993471473e-05, "loss": 0.006588120013475418, "step": 111180 }, { "epoch": 31.561169457848425, "grad_norm": 0.8012625575065613, "learning_rate": 6.845217144479138e-05, "loss": 0.0014671545475721359, "step": 111190 }, { "epoch": 31.564007947771785, "grad_norm": 3.6847410202026367, "learning_rate": 6.844933295486802e-05, "loss": 0.00986478552222252, "step": 111200 }, { "epoch": 31.566846437695148, "grad_norm": 0.09522539377212524, "learning_rate": 6.844649446494466e-05, "loss": 0.035245728492736814, "step": 111210 }, { "epoch": 31.569684927618507, "grad_norm": 1.135860562324524, "learning_rate": 6.844365597502129e-05, "loss": 0.0026248805224895477, "step": 111220 }, { "epoch": 31.572523417541866, "grad_norm": 0.8624247312545776, "learning_rate": 6.844081748509793e-05, "loss": 0.007376720011234283, "step": 111230 }, { "epoch": 31.57536190746523, "grad_norm": 0.0483066700398922, "learning_rate": 6.843797899517457e-05, "loss": 0.005604828149080277, "step": 111240 }, { "epoch": 31.57820039738859, "grad_norm": 1.299328327178955, "learning_rate": 6.843514050525121e-05, "loss": 0.00205338466912508, "step": 111250 }, { "epoch": 31.58103888731195, "grad_norm": 0.2163316309452057, "learning_rate": 6.843230201532785e-05, "loss": 0.0015761034563183784, "step": 111260 }, { "epoch": 31.58387737723531, "grad_norm": 3.2132601737976074, "learning_rate": 6.842946352540449e-05, "loss": 0.02271636724472046, "step": 111270 }, { "epoch": 31.58671586715867, "grad_norm": 5.0923848152160645, "learning_rate": 6.842662503548113e-05, "loss": 0.0143254354596138, "step": 111280 }, { "epoch": 31.589554357082033, "grad_norm": 0.31920626759529114, "learning_rate": 6.842378654555776e-05, "loss": 0.006438520550727844, "step": 111290 }, { "epoch": 31.592392847005392, "grad_norm": 0.712855339050293, "learning_rate": 6.84209480556344e-05, "loss": 0.006547770649194718, "step": 111300 }, { "epoch": 31.595231336928755, "grad_norm": 0.013940670527517796, "learning_rate": 6.841810956571104e-05, "loss": 0.013484680652618408, "step": 111310 }, { "epoch": 31.598069826852115, "grad_norm": 1.0725181102752686, "learning_rate": 6.841527107578767e-05, "loss": 0.009908188134431839, "step": 111320 }, { "epoch": 31.600908316775474, "grad_norm": 0.01533547230064869, "learning_rate": 6.841243258586433e-05, "loss": 0.006462614983320236, "step": 111330 }, { "epoch": 31.603746806698837, "grad_norm": 0.06673911213874817, "learning_rate": 6.840959409594097e-05, "loss": 0.0021257445216178892, "step": 111340 }, { "epoch": 31.606585296622196, "grad_norm": 1.5282500982284546, "learning_rate": 6.84067556060176e-05, "loss": 0.003496837243437767, "step": 111350 }, { "epoch": 31.60942378654556, "grad_norm": 0.4541719853878021, "learning_rate": 6.840391711609424e-05, "loss": 0.008748266845941544, "step": 111360 }, { "epoch": 31.61226227646892, "grad_norm": 5.611433506011963, "learning_rate": 6.840107862617088e-05, "loss": 0.007057390362024307, "step": 111370 }, { "epoch": 31.615100766392278, "grad_norm": 0.35493117570877075, "learning_rate": 6.839824013624752e-05, "loss": 0.020226606726646425, "step": 111380 }, { "epoch": 31.61793925631564, "grad_norm": 0.07666556537151337, "learning_rate": 6.839540164632416e-05, "loss": 0.005522971600294113, "step": 111390 }, { "epoch": 31.620777746239, "grad_norm": 5.555471420288086, "learning_rate": 6.83925631564008e-05, "loss": 0.003882547840476036, "step": 111400 }, { "epoch": 31.623616236162363, "grad_norm": 13.454217910766602, "learning_rate": 6.838972466647745e-05, "loss": 0.004348168149590492, "step": 111410 }, { "epoch": 31.626454726085722, "grad_norm": 2.611283540725708, "learning_rate": 6.838688617655407e-05, "loss": 0.011864610016345978, "step": 111420 }, { "epoch": 31.62929321600908, "grad_norm": 0.11803017556667328, "learning_rate": 6.838404768663071e-05, "loss": 0.0016474464908242225, "step": 111430 }, { "epoch": 31.632131705932444, "grad_norm": 4.568045616149902, "learning_rate": 6.838120919670736e-05, "loss": 0.005575326085090637, "step": 111440 }, { "epoch": 31.634970195855804, "grad_norm": 0.02473355270922184, "learning_rate": 6.837837070678398e-05, "loss": 0.010312335193157196, "step": 111450 }, { "epoch": 31.637808685779166, "grad_norm": 2.8843064308166504, "learning_rate": 6.837553221686064e-05, "loss": 0.001862199231982231, "step": 111460 }, { "epoch": 31.640647175702526, "grad_norm": 0.048356082290410995, "learning_rate": 6.837269372693728e-05, "loss": 0.003676772490143776, "step": 111470 }, { "epoch": 31.64348566562589, "grad_norm": 0.6042098999023438, "learning_rate": 6.836985523701391e-05, "loss": 0.0051002025604248045, "step": 111480 }, { "epoch": 31.646324155549248, "grad_norm": 0.35737067461013794, "learning_rate": 6.836701674709055e-05, "loss": 0.0039050646126270296, "step": 111490 }, { "epoch": 31.649162645472607, "grad_norm": 0.13520079851150513, "learning_rate": 6.836417825716719e-05, "loss": 0.01906677335500717, "step": 111500 }, { "epoch": 31.649162645472607, "eval_accuracy": 0.9771094296432886, "eval_loss": 0.08679317682981491, "eval_runtime": 36.4749, "eval_samples_per_second": 431.173, "eval_steps_per_second": 6.744, "step": 111500 }, { "epoch": 31.65200113539597, "grad_norm": 0.12331493943929672, "learning_rate": 6.836133976724383e-05, "loss": 0.017106497287750246, "step": 111510 }, { "epoch": 31.65483962531933, "grad_norm": 3.107395887374878, "learning_rate": 6.835850127732046e-05, "loss": 0.006228804215788841, "step": 111520 }, { "epoch": 31.657678115242692, "grad_norm": 0.07832195609807968, "learning_rate": 6.835566278739711e-05, "loss": 0.010472393780946731, "step": 111530 }, { "epoch": 31.660516605166052, "grad_norm": 1.4896985292434692, "learning_rate": 6.835282429747376e-05, "loss": 0.003530469909310341, "step": 111540 }, { "epoch": 31.66335509508941, "grad_norm": 3.002685308456421, "learning_rate": 6.834998580755038e-05, "loss": 0.018684367835521697, "step": 111550 }, { "epoch": 31.666193585012774, "grad_norm": 0.02774527482688427, "learning_rate": 6.834714731762702e-05, "loss": 0.019742511212825775, "step": 111560 }, { "epoch": 31.669032074936133, "grad_norm": 0.12324922531843185, "learning_rate": 6.834430882770367e-05, "loss": 0.03224378824234009, "step": 111570 }, { "epoch": 31.671870564859496, "grad_norm": 10.114863395690918, "learning_rate": 6.83414703377803e-05, "loss": 0.011897037923336028, "step": 111580 }, { "epoch": 31.674709054782856, "grad_norm": 1.550437569618225, "learning_rate": 6.833863184785695e-05, "loss": 0.01022840365767479, "step": 111590 }, { "epoch": 31.677547544706215, "grad_norm": 0.10978815704584122, "learning_rate": 6.833579335793359e-05, "loss": 0.012474798411130906, "step": 111600 }, { "epoch": 31.680386034629578, "grad_norm": 2.0425446033477783, "learning_rate": 6.833295486801022e-05, "loss": 0.03648076355457306, "step": 111610 }, { "epoch": 31.683224524552937, "grad_norm": 3.3033971786499023, "learning_rate": 6.833011637808686e-05, "loss": 0.0061614990234375, "step": 111620 }, { "epoch": 31.6860630144763, "grad_norm": 9.80512809753418, "learning_rate": 6.83272778881635e-05, "loss": 0.01026354283094406, "step": 111630 }, { "epoch": 31.68890150439966, "grad_norm": 0.3256895840167999, "learning_rate": 6.832443939824014e-05, "loss": 0.004477899894118309, "step": 111640 }, { "epoch": 31.69173999432302, "grad_norm": 0.7830937504768372, "learning_rate": 6.832160090831677e-05, "loss": 0.0033670470118522646, "step": 111650 }, { "epoch": 31.69457848424638, "grad_norm": 0.9213022589683533, "learning_rate": 6.831876241839343e-05, "loss": 0.003763742744922638, "step": 111660 }, { "epoch": 31.69741697416974, "grad_norm": 0.06471579521894455, "learning_rate": 6.831592392847007e-05, "loss": 0.007257795333862305, "step": 111670 }, { "epoch": 31.700255464093104, "grad_norm": 2.073945999145508, "learning_rate": 6.83130854385467e-05, "loss": 0.0026820965111255647, "step": 111680 }, { "epoch": 31.703093954016463, "grad_norm": 0.30254635214805603, "learning_rate": 6.831024694862334e-05, "loss": 0.0016274290159344674, "step": 111690 }, { "epoch": 31.705932443939822, "grad_norm": 0.03385793790221214, "learning_rate": 6.830740845869998e-05, "loss": 0.013766662776470184, "step": 111700 }, { "epoch": 31.708770933863185, "grad_norm": 0.29931575059890747, "learning_rate": 6.83045699687766e-05, "loss": 0.0007529221475124359, "step": 111710 }, { "epoch": 31.711609423786545, "grad_norm": 12.894917488098145, "learning_rate": 6.830173147885325e-05, "loss": 0.021864059567451476, "step": 111720 }, { "epoch": 31.714447913709908, "grad_norm": 12.70197582244873, "learning_rate": 6.82988929889299e-05, "loss": 0.016255487501621247, "step": 111730 }, { "epoch": 31.717286403633267, "grad_norm": 0.14956440031528473, "learning_rate": 6.829605449900653e-05, "loss": 0.00418318547308445, "step": 111740 }, { "epoch": 31.720124893556626, "grad_norm": 3.348362922668457, "learning_rate": 6.829321600908317e-05, "loss": 0.02002493441104889, "step": 111750 }, { "epoch": 31.72296338347999, "grad_norm": 6.393068790435791, "learning_rate": 6.829037751915981e-05, "loss": 0.005480251833796501, "step": 111760 }, { "epoch": 31.72580187340335, "grad_norm": 6.691692352294922, "learning_rate": 6.828753902923645e-05, "loss": 0.0125237375497818, "step": 111770 }, { "epoch": 31.72864036332671, "grad_norm": 0.7343763113021851, "learning_rate": 6.828470053931308e-05, "loss": 0.0064181797206401825, "step": 111780 }, { "epoch": 31.73147885325007, "grad_norm": 0.1299266219139099, "learning_rate": 6.828186204938974e-05, "loss": 0.009853550791740417, "step": 111790 }, { "epoch": 31.73431734317343, "grad_norm": 0.08752453327178955, "learning_rate": 6.827902355946638e-05, "loss": 0.006200600042939186, "step": 111800 }, { "epoch": 31.737155833096793, "grad_norm": 2.213582754135132, "learning_rate": 6.8276185069543e-05, "loss": 0.009037727117538452, "step": 111810 }, { "epoch": 31.739994323020152, "grad_norm": 0.668047308921814, "learning_rate": 6.827334657961965e-05, "loss": 0.002954813838005066, "step": 111820 }, { "epoch": 31.742832812943515, "grad_norm": 0.5972014665603638, "learning_rate": 6.827050808969629e-05, "loss": 0.00831105262041092, "step": 111830 }, { "epoch": 31.745671302866874, "grad_norm": 3.5902633666992188, "learning_rate": 6.826766959977292e-05, "loss": 0.00661296546459198, "step": 111840 }, { "epoch": 31.748509792790237, "grad_norm": 0.0447242371737957, "learning_rate": 6.826483110984956e-05, "loss": 0.014372140169143677, "step": 111850 }, { "epoch": 31.751348282713597, "grad_norm": 4.43621301651001, "learning_rate": 6.826199261992621e-05, "loss": 0.014533643424510957, "step": 111860 }, { "epoch": 31.754186772636956, "grad_norm": 1.166634440422058, "learning_rate": 6.825915413000284e-05, "loss": 0.004484548419713974, "step": 111870 }, { "epoch": 31.75702526256032, "grad_norm": 0.2383296936750412, "learning_rate": 6.825631564007948e-05, "loss": 0.00732434093952179, "step": 111880 }, { "epoch": 31.759863752483678, "grad_norm": 5.1628289222717285, "learning_rate": 6.825347715015612e-05, "loss": 0.002337944135069847, "step": 111890 }, { "epoch": 31.76270224240704, "grad_norm": 0.11741632223129272, "learning_rate": 6.825063866023276e-05, "loss": 0.0033892493695020674, "step": 111900 }, { "epoch": 31.7655407323304, "grad_norm": 7.4330644607543945, "learning_rate": 6.824780017030939e-05, "loss": 0.004455066472291947, "step": 111910 }, { "epoch": 31.76837922225376, "grad_norm": 3.470581293106079, "learning_rate": 6.824496168038603e-05, "loss": 0.004526837915182114, "step": 111920 }, { "epoch": 31.771217712177123, "grad_norm": 0.37346428632736206, "learning_rate": 6.824212319046267e-05, "loss": 0.0016699377447366714, "step": 111930 }, { "epoch": 31.774056202100482, "grad_norm": 1.6267069578170776, "learning_rate": 6.823928470053932e-05, "loss": 0.0016365833580493927, "step": 111940 }, { "epoch": 31.776894692023845, "grad_norm": 0.03259873762726784, "learning_rate": 6.823644621061596e-05, "loss": 0.009490133076906205, "step": 111950 }, { "epoch": 31.779733181947204, "grad_norm": 0.14269231259822845, "learning_rate": 6.82336077206926e-05, "loss": 0.00366348959505558, "step": 111960 }, { "epoch": 31.782571671870564, "grad_norm": 0.2500723600387573, "learning_rate": 6.823076923076923e-05, "loss": 0.006182758882641792, "step": 111970 }, { "epoch": 31.785410161793926, "grad_norm": 1.1872628927230835, "learning_rate": 6.822793074084587e-05, "loss": 0.0037215795367956162, "step": 111980 }, { "epoch": 31.788248651717286, "grad_norm": 0.06820143014192581, "learning_rate": 6.822509225092252e-05, "loss": 0.001197039522230625, "step": 111990 }, { "epoch": 31.79108714164065, "grad_norm": 1.0262269973754883, "learning_rate": 6.822225376099915e-05, "loss": 0.0018609631806612015, "step": 112000 }, { "epoch": 31.79108714164065, "eval_accuracy": 0.9808609397850829, "eval_loss": 0.07514362782239914, "eval_runtime": 36.0634, "eval_samples_per_second": 436.093, "eval_steps_per_second": 6.821, "step": 112000 }, { "epoch": 31.793925631564008, "grad_norm": 6.675536155700684, "learning_rate": 6.821941527107579e-05, "loss": 0.004072707891464233, "step": 112010 }, { "epoch": 31.796764121487367, "grad_norm": 0.07049150764942169, "learning_rate": 6.821657678115243e-05, "loss": 0.021766826510429382, "step": 112020 }, { "epoch": 31.79960261141073, "grad_norm": 0.16084975004196167, "learning_rate": 6.821373829122906e-05, "loss": 0.001972678117454052, "step": 112030 }, { "epoch": 31.80244110133409, "grad_norm": 0.39600682258605957, "learning_rate": 6.82108998013057e-05, "loss": 0.0018569577485322952, "step": 112040 }, { "epoch": 31.805279591257452, "grad_norm": 0.6629458069801331, "learning_rate": 6.820806131138234e-05, "loss": 0.0021632442250847815, "step": 112050 }, { "epoch": 31.80811808118081, "grad_norm": 0.4313388168811798, "learning_rate": 6.820522282145899e-05, "loss": 0.010695400834083556, "step": 112060 }, { "epoch": 31.81095657110417, "grad_norm": 0.303666889667511, "learning_rate": 6.820238433153563e-05, "loss": 0.021585629880428316, "step": 112070 }, { "epoch": 31.813795061027534, "grad_norm": 1.898411512374878, "learning_rate": 6.819954584161227e-05, "loss": 0.0059521738439798355, "step": 112080 }, { "epoch": 31.816633550950893, "grad_norm": 0.7556636929512024, "learning_rate": 6.819670735168891e-05, "loss": 0.004391541332006454, "step": 112090 }, { "epoch": 31.819472040874256, "grad_norm": 2.0127689838409424, "learning_rate": 6.819386886176554e-05, "loss": 0.010012400895357132, "step": 112100 }, { "epoch": 31.822310530797616, "grad_norm": 14.981685638427734, "learning_rate": 6.819103037184218e-05, "loss": 0.027894759178161622, "step": 112110 }, { "epoch": 31.825149020720975, "grad_norm": 12.902413368225098, "learning_rate": 6.818819188191882e-05, "loss": 0.010683482885360718, "step": 112120 }, { "epoch": 31.827987510644338, "grad_norm": 4.189155578613281, "learning_rate": 6.818535339199546e-05, "loss": 0.011329040676355363, "step": 112130 }, { "epoch": 31.830826000567697, "grad_norm": 0.3171860873699188, "learning_rate": 6.81825149020721e-05, "loss": 0.00404188297688961, "step": 112140 }, { "epoch": 31.83366449049106, "grad_norm": 11.82907485961914, "learning_rate": 6.817967641214874e-05, "loss": 0.007795780897140503, "step": 112150 }, { "epoch": 31.83650298041442, "grad_norm": 15.915590286254883, "learning_rate": 6.817683792222537e-05, "loss": 0.011885396391153335, "step": 112160 }, { "epoch": 31.83934147033778, "grad_norm": 0.17144086956977844, "learning_rate": 6.817399943230201e-05, "loss": 0.023567602038383484, "step": 112170 }, { "epoch": 31.84217996026114, "grad_norm": 0.016746660694479942, "learning_rate": 6.817116094237865e-05, "loss": 0.0015491539612412452, "step": 112180 }, { "epoch": 31.8450184501845, "grad_norm": 0.27861472964286804, "learning_rate": 6.81683224524553e-05, "loss": 0.009381115436553955, "step": 112190 }, { "epoch": 31.847856940107864, "grad_norm": 1.532272458076477, "learning_rate": 6.816548396253194e-05, "loss": 0.004289066046476364, "step": 112200 }, { "epoch": 31.850695430031223, "grad_norm": 10.970272064208984, "learning_rate": 6.816264547260858e-05, "loss": 0.018634818494319916, "step": 112210 }, { "epoch": 31.853533919954586, "grad_norm": 0.10058299452066422, "learning_rate": 6.815980698268522e-05, "loss": 0.002865607291460037, "step": 112220 }, { "epoch": 31.856372409877945, "grad_norm": 0.07757601141929626, "learning_rate": 6.815696849276185e-05, "loss": 0.010062752664089203, "step": 112230 }, { "epoch": 31.859210899801305, "grad_norm": 0.06829041987657547, "learning_rate": 6.815413000283849e-05, "loss": 0.0007981564849615097, "step": 112240 }, { "epoch": 31.862049389724667, "grad_norm": 2.63499116897583, "learning_rate": 6.815129151291513e-05, "loss": 0.006070542335510254, "step": 112250 }, { "epoch": 31.864887879648027, "grad_norm": 9.307660102844238, "learning_rate": 6.814845302299177e-05, "loss": 0.006106268614530563, "step": 112260 }, { "epoch": 31.86772636957139, "grad_norm": 0.0822724848985672, "learning_rate": 6.814561453306841e-05, "loss": 0.009550497680902482, "step": 112270 }, { "epoch": 31.87056485949475, "grad_norm": 0.03372730687260628, "learning_rate": 6.814277604314505e-05, "loss": 0.004713936895132065, "step": 112280 }, { "epoch": 31.87340334941811, "grad_norm": 0.5194056034088135, "learning_rate": 6.813993755322168e-05, "loss": 0.0009140351787209511, "step": 112290 }, { "epoch": 31.87624183934147, "grad_norm": 0.35418954491615295, "learning_rate": 6.813709906329832e-05, "loss": 0.00520714521408081, "step": 112300 }, { "epoch": 31.87908032926483, "grad_norm": 0.15515466034412384, "learning_rate": 6.813426057337497e-05, "loss": 0.002607612684369087, "step": 112310 }, { "epoch": 31.881918819188193, "grad_norm": 0.5631701350212097, "learning_rate": 6.813142208345161e-05, "loss": 0.0063105359673500065, "step": 112320 }, { "epoch": 31.884757309111553, "grad_norm": 0.5389623045921326, "learning_rate": 6.812858359352825e-05, "loss": 0.012971943616867066, "step": 112330 }, { "epoch": 31.887595799034912, "grad_norm": 1.9934179782867432, "learning_rate": 6.812574510360489e-05, "loss": 0.006767873466014862, "step": 112340 }, { "epoch": 31.890434288958275, "grad_norm": 0.16046391427516937, "learning_rate": 6.812290661368153e-05, "loss": 0.003534939885139465, "step": 112350 }, { "epoch": 31.893272778881634, "grad_norm": 2.5258965492248535, "learning_rate": 6.812006812375816e-05, "loss": 0.01579393744468689, "step": 112360 }, { "epoch": 31.896111268804997, "grad_norm": 0.3318716883659363, "learning_rate": 6.81172296338348e-05, "loss": 0.011149276793003083, "step": 112370 }, { "epoch": 31.898949758728357, "grad_norm": 1.6795010566711426, "learning_rate": 6.811439114391144e-05, "loss": 0.011619828641414642, "step": 112380 }, { "epoch": 31.901788248651716, "grad_norm": 7.138566017150879, "learning_rate": 6.811155265398808e-05, "loss": 0.003174944221973419, "step": 112390 }, { "epoch": 31.90462673857508, "grad_norm": 0.20555469393730164, "learning_rate": 6.810871416406472e-05, "loss": 0.012025538086891174, "step": 112400 }, { "epoch": 31.907465228498438, "grad_norm": 0.02328181266784668, "learning_rate": 6.810587567414137e-05, "loss": 0.012419378757476807, "step": 112410 }, { "epoch": 31.9103037184218, "grad_norm": 1.1935399770736694, "learning_rate": 6.8103037184218e-05, "loss": 0.005185045301914215, "step": 112420 }, { "epoch": 31.91314220834516, "grad_norm": 0.13613803684711456, "learning_rate": 6.810019869429463e-05, "loss": 0.016016100347042084, "step": 112430 }, { "epoch": 31.91598069826852, "grad_norm": 0.24506540596485138, "learning_rate": 6.809736020437128e-05, "loss": 0.01870710551738739, "step": 112440 }, { "epoch": 31.918819188191883, "grad_norm": 0.7991397380828857, "learning_rate": 6.809452171444792e-05, "loss": 0.020408816635608673, "step": 112450 }, { "epoch": 31.921657678115242, "grad_norm": 1.0899919271469116, "learning_rate": 6.809168322452456e-05, "loss": 0.001395563967525959, "step": 112460 }, { "epoch": 31.924496168038605, "grad_norm": 0.900557816028595, "learning_rate": 6.80888447346012e-05, "loss": 0.0016548115760087966, "step": 112470 }, { "epoch": 31.927334657961964, "grad_norm": 0.40434470772743225, "learning_rate": 6.808600624467784e-05, "loss": 0.023502573370933533, "step": 112480 }, { "epoch": 31.930173147885323, "grad_norm": 3.900493860244751, "learning_rate": 6.808316775475447e-05, "loss": 0.004679206013679505, "step": 112490 }, { "epoch": 31.933011637808686, "grad_norm": 0.10173594951629639, "learning_rate": 6.808032926483111e-05, "loss": 0.01435564160346985, "step": 112500 }, { "epoch": 31.933011637808686, "eval_accuracy": 0.9785082978317543, "eval_loss": 0.0838349387049675, "eval_runtime": 37.1885, "eval_samples_per_second": 422.9, "eval_steps_per_second": 6.615, "step": 112500 }, { "epoch": 31.935850127732046, "grad_norm": 0.13702452182769775, "learning_rate": 6.807749077490775e-05, "loss": 0.011638298630714417, "step": 112510 }, { "epoch": 31.93868861765541, "grad_norm": 0.41872265934944153, "learning_rate": 6.80746522849844e-05, "loss": 0.004603913426399231, "step": 112520 }, { "epoch": 31.941527107578768, "grad_norm": 0.017561934888362885, "learning_rate": 6.807181379506104e-05, "loss": 0.0017622316256165505, "step": 112530 }, { "epoch": 31.944365597502127, "grad_norm": 0.05497167259454727, "learning_rate": 6.806897530513768e-05, "loss": 0.009433518350124358, "step": 112540 }, { "epoch": 31.94720408742549, "grad_norm": 0.08188667893409729, "learning_rate": 6.80661368152143e-05, "loss": 0.023478280007839202, "step": 112550 }, { "epoch": 31.95004257734885, "grad_norm": 0.90416419506073, "learning_rate": 6.806329832529095e-05, "loss": 0.0043086234480142595, "step": 112560 }, { "epoch": 31.952881067272212, "grad_norm": 0.12357954680919647, "learning_rate": 6.806045983536759e-05, "loss": 0.002241789922118187, "step": 112570 }, { "epoch": 31.95571955719557, "grad_norm": 0.05403192713856697, "learning_rate": 6.805762134544423e-05, "loss": 0.008343557268381119, "step": 112580 }, { "epoch": 31.958558047118935, "grad_norm": 0.38246336579322815, "learning_rate": 6.805478285552087e-05, "loss": 0.002622745931148529, "step": 112590 }, { "epoch": 31.961396537042294, "grad_norm": 0.4961470365524292, "learning_rate": 6.805194436559751e-05, "loss": 0.0055367894470691684, "step": 112600 }, { "epoch": 31.964235026965653, "grad_norm": 1.1522431373596191, "learning_rate": 6.804910587567415e-05, "loss": 0.004209998995065689, "step": 112610 }, { "epoch": 31.967073516889016, "grad_norm": 0.7693986892700195, "learning_rate": 6.804626738575078e-05, "loss": 0.024302327632904054, "step": 112620 }, { "epoch": 31.969912006812375, "grad_norm": 0.0575907863676548, "learning_rate": 6.804342889582742e-05, "loss": 0.0042587719857692715, "step": 112630 }, { "epoch": 31.972750496735735, "grad_norm": 0.709811806678772, "learning_rate": 6.804059040590406e-05, "loss": 0.005559299141168594, "step": 112640 }, { "epoch": 31.975588986659098, "grad_norm": 0.21934708952903748, "learning_rate": 6.803775191598069e-05, "loss": 0.0033092275261878967, "step": 112650 }, { "epoch": 31.978427476582457, "grad_norm": 3.124892234802246, "learning_rate": 6.803491342605735e-05, "loss": 0.013441681861877441, "step": 112660 }, { "epoch": 31.98126596650582, "grad_norm": 0.3489237427711487, "learning_rate": 6.803207493613399e-05, "loss": 0.0017520710825920105, "step": 112670 }, { "epoch": 31.98410445642918, "grad_norm": 0.07279693335294724, "learning_rate": 6.802923644621061e-05, "loss": 0.0019017815589904784, "step": 112680 }, { "epoch": 31.986942946352542, "grad_norm": 4.986855983734131, "learning_rate": 6.802639795628726e-05, "loss": 0.009699252992868423, "step": 112690 }, { "epoch": 31.9897814362759, "grad_norm": 0.8552514910697937, "learning_rate": 6.80235594663639e-05, "loss": 0.0013594955205917358, "step": 112700 }, { "epoch": 31.99261992619926, "grad_norm": 0.2465766966342926, "learning_rate": 6.802072097644054e-05, "loss": 0.007643173635005951, "step": 112710 }, { "epoch": 31.995458416122624, "grad_norm": 0.3139595687389374, "learning_rate": 6.801788248651718e-05, "loss": 0.0022508585825562477, "step": 112720 }, { "epoch": 31.998296906045983, "grad_norm": 0.11571436375379562, "learning_rate": 6.801504399659382e-05, "loss": 0.0018386764451861382, "step": 112730 }, { "epoch": 32.00113539596934, "grad_norm": 14.523801803588867, "learning_rate": 6.801220550667046e-05, "loss": 0.010720564424991608, "step": 112740 }, { "epoch": 32.003973885892705, "grad_norm": 0.08339695632457733, "learning_rate": 6.800936701674709e-05, "loss": 0.005773204192519188, "step": 112750 }, { "epoch": 32.00681237581607, "grad_norm": 0.49585914611816406, "learning_rate": 6.800652852682373e-05, "loss": 0.002934390492737293, "step": 112760 }, { "epoch": 32.009650865739424, "grad_norm": 6.308122158050537, "learning_rate": 6.800369003690037e-05, "loss": 0.012361048907041549, "step": 112770 }, { "epoch": 32.01248935566279, "grad_norm": 0.05189976468682289, "learning_rate": 6.8000851546977e-05, "loss": 0.010759448260068893, "step": 112780 }, { "epoch": 32.01532784558615, "grad_norm": 0.6120983958244324, "learning_rate": 6.799801305705366e-05, "loss": 0.0016174245625734328, "step": 112790 }, { "epoch": 32.018166335509505, "grad_norm": 0.2008655071258545, "learning_rate": 6.79951745671303e-05, "loss": 0.008450514823198318, "step": 112800 }, { "epoch": 32.02100482543287, "grad_norm": 2.7622952461242676, "learning_rate": 6.799233607720693e-05, "loss": 0.001770658604800701, "step": 112810 }, { "epoch": 32.02384331535623, "grad_norm": 0.13511395454406738, "learning_rate": 6.798949758728357e-05, "loss": 0.0028279177844524384, "step": 112820 }, { "epoch": 32.026681805279594, "grad_norm": 2.697314500808716, "learning_rate": 6.798665909736021e-05, "loss": 0.0024139901623129846, "step": 112830 }, { "epoch": 32.02952029520295, "grad_norm": 0.44643762707710266, "learning_rate": 6.798382060743685e-05, "loss": 0.005474664270877838, "step": 112840 }, { "epoch": 32.03235878512631, "grad_norm": 1.4649831056594849, "learning_rate": 6.798098211751348e-05, "loss": 0.027172040939331055, "step": 112850 }, { "epoch": 32.035197275049676, "grad_norm": 0.20213913917541504, "learning_rate": 6.797814362759013e-05, "loss": 0.010975870490074157, "step": 112860 }, { "epoch": 32.03803576497303, "grad_norm": 1.06629478931427, "learning_rate": 6.797530513766676e-05, "loss": 0.010942411422729493, "step": 112870 }, { "epoch": 32.040874254896394, "grad_norm": 1.7403236627578735, "learning_rate": 6.79724666477434e-05, "loss": 0.0013751378282904625, "step": 112880 }, { "epoch": 32.04371274481976, "grad_norm": 0.7667296528816223, "learning_rate": 6.796962815782004e-05, "loss": 0.0011279931291937827, "step": 112890 }, { "epoch": 32.04655123474312, "grad_norm": 0.017409706488251686, "learning_rate": 6.796678966789668e-05, "loss": 0.004016145318746567, "step": 112900 }, { "epoch": 32.049389724666476, "grad_norm": 0.10059306025505066, "learning_rate": 6.796395117797331e-05, "loss": 0.0009714977815747261, "step": 112910 }, { "epoch": 32.05222821458984, "grad_norm": 0.32355889678001404, "learning_rate": 6.796111268804997e-05, "loss": 0.004030934721231461, "step": 112920 }, { "epoch": 32.0550667045132, "grad_norm": 0.059355150908231735, "learning_rate": 6.795827419812661e-05, "loss": 0.00072732362896204, "step": 112930 }, { "epoch": 32.05790519443656, "grad_norm": 0.6371223330497742, "learning_rate": 6.795543570820324e-05, "loss": 0.0020486114546656607, "step": 112940 }, { "epoch": 32.06074368435992, "grad_norm": 0.29325070977211, "learning_rate": 6.795259721827988e-05, "loss": 0.010332415997982024, "step": 112950 }, { "epoch": 32.06358217428328, "grad_norm": 9.62952709197998, "learning_rate": 6.794975872835652e-05, "loss": 0.011058630049228668, "step": 112960 }, { "epoch": 32.06642066420664, "grad_norm": 0.09125205129384995, "learning_rate": 6.794692023843315e-05, "loss": 0.0029261546209454537, "step": 112970 }, { "epoch": 32.06925915413, "grad_norm": 0.48098236322402954, "learning_rate": 6.794408174850979e-05, "loss": 0.0043171849101781845, "step": 112980 }, { "epoch": 32.072097644053365, "grad_norm": 1.2242636680603027, "learning_rate": 6.794124325858644e-05, "loss": 0.002479686215519905, "step": 112990 }, { "epoch": 32.07493613397673, "grad_norm": 0.18309716880321503, "learning_rate": 6.793840476866307e-05, "loss": 0.00302668996155262, "step": 113000 }, { "epoch": 32.07493613397673, "eval_accuracy": 0.9811788643733707, "eval_loss": 0.06751129776239395, "eval_runtime": 38.5503, "eval_samples_per_second": 407.96, "eval_steps_per_second": 6.381, "step": 113000 }, { "epoch": 32.07777462390008, "grad_norm": 0.07326049357652664, "learning_rate": 6.793556627873971e-05, "loss": 0.0010475683957338333, "step": 113010 }, { "epoch": 32.080613113823446, "grad_norm": 0.46126437187194824, "learning_rate": 6.793272778881635e-05, "loss": 0.0008481340482831001, "step": 113020 }, { "epoch": 32.08345160374681, "grad_norm": 0.0714845061302185, "learning_rate": 6.7929889298893e-05, "loss": 0.001501747779548168, "step": 113030 }, { "epoch": 32.086290093670165, "grad_norm": 4.939540386199951, "learning_rate": 6.792705080896962e-05, "loss": 0.004543003439903259, "step": 113040 }, { "epoch": 32.08912858359353, "grad_norm": 2.982614278793335, "learning_rate": 6.792421231904626e-05, "loss": 0.0025597913190722466, "step": 113050 }, { "epoch": 32.09196707351689, "grad_norm": 0.0860133245587349, "learning_rate": 6.792137382912292e-05, "loss": 0.0015674296766519547, "step": 113060 }, { "epoch": 32.09480556344025, "grad_norm": 0.2629123628139496, "learning_rate": 6.791853533919955e-05, "loss": 0.00261008832603693, "step": 113070 }, { "epoch": 32.09764405336361, "grad_norm": 8.052972793579102, "learning_rate": 6.791569684927619e-05, "loss": 0.003162672370672226, "step": 113080 }, { "epoch": 32.10048254328697, "grad_norm": 3.102416515350342, "learning_rate": 6.791285835935283e-05, "loss": 0.003938512504100799, "step": 113090 }, { "epoch": 32.103321033210335, "grad_norm": 3.1913468837738037, "learning_rate": 6.791001986942946e-05, "loss": 0.0020044164732098578, "step": 113100 }, { "epoch": 32.10615952313369, "grad_norm": 1.704526662826538, "learning_rate": 6.79071813795061e-05, "loss": 0.009151145815849304, "step": 113110 }, { "epoch": 32.108998013057054, "grad_norm": 0.19059035181999207, "learning_rate": 6.790434288958275e-05, "loss": 0.0019147604703903197, "step": 113120 }, { "epoch": 32.11183650298042, "grad_norm": 0.0703873261809349, "learning_rate": 6.790150439965938e-05, "loss": 0.0022937200963497164, "step": 113130 }, { "epoch": 32.11467499290377, "grad_norm": 1.4885607957839966, "learning_rate": 6.789866590973602e-05, "loss": 0.01008826494216919, "step": 113140 }, { "epoch": 32.117513482827135, "grad_norm": 2.172741651535034, "learning_rate": 6.789582741981266e-05, "loss": 0.0022097470238804815, "step": 113150 }, { "epoch": 32.1203519727505, "grad_norm": 0.24997277557849884, "learning_rate": 6.78929889298893e-05, "loss": 0.0019743314012885095, "step": 113160 }, { "epoch": 32.123190462673854, "grad_norm": 1.0191253423690796, "learning_rate": 6.789015043996593e-05, "loss": 0.009312212467193604, "step": 113170 }, { "epoch": 32.12602895259722, "grad_norm": 0.06626996397972107, "learning_rate": 6.788731195004258e-05, "loss": 0.0015799306333065034, "step": 113180 }, { "epoch": 32.12886744252058, "grad_norm": 0.0389140248298645, "learning_rate": 6.788447346011923e-05, "loss": 0.0012815555557608605, "step": 113190 }, { "epoch": 32.13170593244394, "grad_norm": 0.004632581956684589, "learning_rate": 6.788163497019586e-05, "loss": 0.001934184692800045, "step": 113200 }, { "epoch": 32.1345444223673, "grad_norm": 0.47306349873542786, "learning_rate": 6.78787964802725e-05, "loss": 0.0008109265938401223, "step": 113210 }, { "epoch": 32.13738291229066, "grad_norm": 0.12202760577201843, "learning_rate": 6.787595799034914e-05, "loss": 0.002163127064704895, "step": 113220 }, { "epoch": 32.140221402214024, "grad_norm": 2.3933534622192383, "learning_rate": 6.787311950042577e-05, "loss": 0.001205168478190899, "step": 113230 }, { "epoch": 32.14305989213738, "grad_norm": 8.350204467773438, "learning_rate": 6.787028101050241e-05, "loss": 0.0028204619884490967, "step": 113240 }, { "epoch": 32.14589838206074, "grad_norm": 4.227628231048584, "learning_rate": 6.786744252057905e-05, "loss": 0.0018836671486496925, "step": 113250 }, { "epoch": 32.148736871984106, "grad_norm": 0.04652860760688782, "learning_rate": 6.786460403065569e-05, "loss": 0.0006655693054199219, "step": 113260 }, { "epoch": 32.15157536190747, "grad_norm": 3.737379789352417, "learning_rate": 6.786176554073233e-05, "loss": 0.002638237737119198, "step": 113270 }, { "epoch": 32.154413851830824, "grad_norm": 0.966570258140564, "learning_rate": 6.785892705080898e-05, "loss": 0.0011252839118242263, "step": 113280 }, { "epoch": 32.15725234175419, "grad_norm": 1.2164398431777954, "learning_rate": 6.785608856088562e-05, "loss": 0.01300824135541916, "step": 113290 }, { "epoch": 32.16009083167755, "grad_norm": 0.17227624356746674, "learning_rate": 6.785325007096224e-05, "loss": 0.00208548866212368, "step": 113300 }, { "epoch": 32.162929321600906, "grad_norm": 1.635103702545166, "learning_rate": 6.785041158103889e-05, "loss": 0.003455328196287155, "step": 113310 }, { "epoch": 32.16576781152427, "grad_norm": 0.023046696558594704, "learning_rate": 6.784757309111554e-05, "loss": 0.0024990182369947433, "step": 113320 }, { "epoch": 32.16860630144763, "grad_norm": 0.040312159806489944, "learning_rate": 6.784473460119217e-05, "loss": 0.0029676394537091256, "step": 113330 }, { "epoch": 32.17144479137099, "grad_norm": 0.013987556099891663, "learning_rate": 6.784189611126881e-05, "loss": 0.012713950872421265, "step": 113340 }, { "epoch": 32.17428328129435, "grad_norm": 0.14148584008216858, "learning_rate": 6.783905762134545e-05, "loss": 0.003788084909319878, "step": 113350 }, { "epoch": 32.17712177121771, "grad_norm": 0.03348509594798088, "learning_rate": 6.783621913142208e-05, "loss": 0.0020942777395248414, "step": 113360 }, { "epoch": 32.179960261141076, "grad_norm": 0.10452992469072342, "learning_rate": 6.783338064149872e-05, "loss": 0.002750825695693493, "step": 113370 }, { "epoch": 32.18279875106443, "grad_norm": 3.314361095428467, "learning_rate": 6.783054215157536e-05, "loss": 0.012247095257043839, "step": 113380 }, { "epoch": 32.185637240987795, "grad_norm": 0.14826585352420807, "learning_rate": 6.7827703661652e-05, "loss": 0.003682929649949074, "step": 113390 }, { "epoch": 32.18847573091116, "grad_norm": 0.8735467791557312, "learning_rate": 6.782486517172864e-05, "loss": 0.005335051566362381, "step": 113400 }, { "epoch": 32.19131422083451, "grad_norm": 0.10717332363128662, "learning_rate": 6.782202668180529e-05, "loss": 0.004156583175063133, "step": 113410 }, { "epoch": 32.194152710757876, "grad_norm": 0.7201699018478394, "learning_rate": 6.781918819188193e-05, "loss": 0.00549987331032753, "step": 113420 }, { "epoch": 32.19699120068124, "grad_norm": 10.19570255279541, "learning_rate": 6.781634970195856e-05, "loss": 0.010771410167217254, "step": 113430 }, { "epoch": 32.199829690604595, "grad_norm": 0.04747678339481354, "learning_rate": 6.78135112120352e-05, "loss": 0.0031986676156520845, "step": 113440 }, { "epoch": 32.20266818052796, "grad_norm": 1.113993763923645, "learning_rate": 6.781067272211184e-05, "loss": 0.010002873092889785, "step": 113450 }, { "epoch": 32.20550667045132, "grad_norm": 0.7131061553955078, "learning_rate": 6.780783423218848e-05, "loss": 0.005947320908308029, "step": 113460 }, { "epoch": 32.208345160374684, "grad_norm": 5.845598220825195, "learning_rate": 6.780499574226512e-05, "loss": 0.0033040568232536316, "step": 113470 }, { "epoch": 32.21118365029804, "grad_norm": 0.7549793720245361, "learning_rate": 6.780215725234176e-05, "loss": 0.0041345726698637005, "step": 113480 }, { "epoch": 32.2140221402214, "grad_norm": 2.954411268234253, "learning_rate": 6.779931876241839e-05, "loss": 0.0014928335323929787, "step": 113490 }, { "epoch": 32.216860630144765, "grad_norm": 7.998927116394043, "learning_rate": 6.779648027249503e-05, "loss": 0.00569138415157795, "step": 113500 }, { "epoch": 32.216860630144765, "eval_accuracy": 0.9820054683029186, "eval_loss": 0.07017359137535095, "eval_runtime": 45.33, "eval_samples_per_second": 346.945, "eval_steps_per_second": 5.427, "step": 113500 }, { "epoch": 32.21969912006812, "grad_norm": 0.9416159391403198, "learning_rate": 6.779364178257167e-05, "loss": 0.0013344213366508484, "step": 113510 }, { "epoch": 32.222537609991484, "grad_norm": 0.35054293274879456, "learning_rate": 6.779080329264831e-05, "loss": 0.0043958287686109545, "step": 113520 }, { "epoch": 32.22537609991485, "grad_norm": 2.231074810028076, "learning_rate": 6.778796480272496e-05, "loss": 0.00907873809337616, "step": 113530 }, { "epoch": 32.2282145898382, "grad_norm": 0.16974981129169464, "learning_rate": 6.77851263128016e-05, "loss": 0.004245173931121826, "step": 113540 }, { "epoch": 32.231053079761566, "grad_norm": 0.0577772855758667, "learning_rate": 6.778228782287824e-05, "loss": 0.005842685699462891, "step": 113550 }, { "epoch": 32.23389156968493, "grad_norm": 0.06934240460395813, "learning_rate": 6.777944933295487e-05, "loss": 0.007682345807552338, "step": 113560 }, { "epoch": 32.23673005960829, "grad_norm": 0.1693786084651947, "learning_rate": 6.777661084303151e-05, "loss": 0.0030367569997906685, "step": 113570 }, { "epoch": 32.23956854953165, "grad_norm": 0.20482240617275238, "learning_rate": 6.777377235310815e-05, "loss": 0.01222735270857811, "step": 113580 }, { "epoch": 32.24240703945501, "grad_norm": 0.6188503503799438, "learning_rate": 6.777093386318479e-05, "loss": 0.0021142831072211267, "step": 113590 }, { "epoch": 32.24524552937837, "grad_norm": 0.081989586353302, "learning_rate": 6.776809537326143e-05, "loss": 0.00046812053769826887, "step": 113600 }, { "epoch": 32.24808401930173, "grad_norm": 0.23025017976760864, "learning_rate": 6.776525688333807e-05, "loss": 0.003464978188276291, "step": 113610 }, { "epoch": 32.25092250922509, "grad_norm": 0.07550424337387085, "learning_rate": 6.77624183934147e-05, "loss": 0.007591759413480758, "step": 113620 }, { "epoch": 32.253760999148454, "grad_norm": 0.08636195212602615, "learning_rate": 6.775957990349134e-05, "loss": 0.0011756030842661857, "step": 113630 }, { "epoch": 32.25659948907182, "grad_norm": 0.062687948346138, "learning_rate": 6.775674141356798e-05, "loss": 0.0023677857592701913, "step": 113640 }, { "epoch": 32.25943797899517, "grad_norm": 2.974059581756592, "learning_rate": 6.775390292364463e-05, "loss": 0.008598271757364273, "step": 113650 }, { "epoch": 32.262276468918536, "grad_norm": 0.04739794507622719, "learning_rate": 6.775106443372127e-05, "loss": 0.001512908935546875, "step": 113660 }, { "epoch": 32.2651149588419, "grad_norm": 11.224630355834961, "learning_rate": 6.774822594379791e-05, "loss": 0.013849005103111267, "step": 113670 }, { "epoch": 32.267953448765255, "grad_norm": 0.78826504945755, "learning_rate": 6.774538745387455e-05, "loss": 0.002270679920911789, "step": 113680 }, { "epoch": 32.27079193868862, "grad_norm": 4.613251686096191, "learning_rate": 6.774254896395118e-05, "loss": 0.0021920947358012198, "step": 113690 }, { "epoch": 32.27363042861198, "grad_norm": 0.08019803464412689, "learning_rate": 6.773971047402782e-05, "loss": 0.0012122567743062973, "step": 113700 }, { "epoch": 32.276468918535336, "grad_norm": 0.5686661601066589, "learning_rate": 6.773687198410446e-05, "loss": 0.005924680083990097, "step": 113710 }, { "epoch": 32.2793074084587, "grad_norm": 4.028510570526123, "learning_rate": 6.77340334941811e-05, "loss": 0.011122354865074157, "step": 113720 }, { "epoch": 32.28214589838206, "grad_norm": 0.3244401812553406, "learning_rate": 6.773119500425774e-05, "loss": 0.004088877886533737, "step": 113730 }, { "epoch": 32.284984388305425, "grad_norm": 0.10172554105520248, "learning_rate": 6.772835651433438e-05, "loss": 0.003782095015048981, "step": 113740 }, { "epoch": 32.28782287822878, "grad_norm": 2.1995151042938232, "learning_rate": 6.772551802441101e-05, "loss": 0.0045441586524248125, "step": 113750 }, { "epoch": 32.29066136815214, "grad_norm": 0.02667686715722084, "learning_rate": 6.772267953448765e-05, "loss": 0.0008935999125242233, "step": 113760 }, { "epoch": 32.293499858075506, "grad_norm": 2.190990447998047, "learning_rate": 6.77198410445643e-05, "loss": 0.0051227472722530365, "step": 113770 }, { "epoch": 32.29633834799886, "grad_norm": 0.12946809828281403, "learning_rate": 6.771700255464094e-05, "loss": 0.00521881952881813, "step": 113780 }, { "epoch": 32.299176837922225, "grad_norm": 1.1040929555892944, "learning_rate": 6.771416406471758e-05, "loss": 0.017396734654903413, "step": 113790 }, { "epoch": 32.30201532784559, "grad_norm": 0.9229865670204163, "learning_rate": 6.771132557479422e-05, "loss": 0.0008361911401152611, "step": 113800 }, { "epoch": 32.304853817768944, "grad_norm": 2.6649880409240723, "learning_rate": 6.770848708487085e-05, "loss": 0.002042866498231888, "step": 113810 }, { "epoch": 32.30769230769231, "grad_norm": 7.156626224517822, "learning_rate": 6.770564859494749e-05, "loss": 0.009526080638170242, "step": 113820 }, { "epoch": 32.31053079761567, "grad_norm": 7.879796981811523, "learning_rate": 6.770281010502413e-05, "loss": 0.010286284238100052, "step": 113830 }, { "epoch": 32.31336928753903, "grad_norm": 6.841385841369629, "learning_rate": 6.769997161510077e-05, "loss": 0.005596169829368591, "step": 113840 }, { "epoch": 32.31620777746239, "grad_norm": 7.091487407684326, "learning_rate": 6.769713312517741e-05, "loss": 0.005100179463624954, "step": 113850 }, { "epoch": 32.31904626738575, "grad_norm": 0.04734085127711296, "learning_rate": 6.769429463525405e-05, "loss": 0.018936926126480104, "step": 113860 }, { "epoch": 32.321884757309114, "grad_norm": 0.7027969360351562, "learning_rate": 6.76914561453307e-05, "loss": 0.01067001074552536, "step": 113870 }, { "epoch": 32.32472324723247, "grad_norm": 0.2991061806678772, "learning_rate": 6.768861765540732e-05, "loss": 0.006674773991107941, "step": 113880 }, { "epoch": 32.32756173715583, "grad_norm": 7.534132480621338, "learning_rate": 6.768577916548396e-05, "loss": 0.005333800241351128, "step": 113890 }, { "epoch": 32.330400227079195, "grad_norm": 3.50868558883667, "learning_rate": 6.76829406755606e-05, "loss": 0.006148515269160271, "step": 113900 }, { "epoch": 32.33323871700255, "grad_norm": 5.1778154373168945, "learning_rate": 6.768010218563723e-05, "loss": 0.010675667226314545, "step": 113910 }, { "epoch": 32.336077206925914, "grad_norm": 0.19260387122631073, "learning_rate": 6.767726369571389e-05, "loss": 0.004810651391744613, "step": 113920 }, { "epoch": 32.33891569684928, "grad_norm": 12.806052207946777, "learning_rate": 6.767442520579053e-05, "loss": 0.02164035439491272, "step": 113930 }, { "epoch": 32.34175418677264, "grad_norm": 7.921098232269287, "learning_rate": 6.767158671586716e-05, "loss": 0.006641998887062073, "step": 113940 }, { "epoch": 32.344592676695996, "grad_norm": 0.32752513885498047, "learning_rate": 6.76687482259438e-05, "loss": 0.005580586940050125, "step": 113950 }, { "epoch": 32.34743116661936, "grad_norm": 1.4735490083694458, "learning_rate": 6.766590973602044e-05, "loss": 0.00248092170804739, "step": 113960 }, { "epoch": 32.35026965654272, "grad_norm": 3.923022508621216, "learning_rate": 6.766307124609708e-05, "loss": 0.003896508365869522, "step": 113970 }, { "epoch": 32.35310814646608, "grad_norm": 0.1496472805738449, "learning_rate": 6.766023275617371e-05, "loss": 0.0008547229692339898, "step": 113980 }, { "epoch": 32.35594663638944, "grad_norm": 4.06162691116333, "learning_rate": 6.765739426625036e-05, "loss": 0.00665922686457634, "step": 113990 }, { "epoch": 32.3587851263128, "grad_norm": 2.834336280822754, "learning_rate": 6.7654555776327e-05, "loss": 0.01586110144853592, "step": 114000 }, { "epoch": 32.3587851263128, "eval_accuracy": 0.9774273542315762, "eval_loss": 0.08712475001811981, "eval_runtime": 37.7876, "eval_samples_per_second": 416.195, "eval_steps_per_second": 6.51, "step": 114000 }, { "epoch": 32.36162361623616, "grad_norm": 7.560249328613281, "learning_rate": 6.765171728640363e-05, "loss": 0.009403932839632034, "step": 114010 }, { "epoch": 32.36446210615952, "grad_norm": 11.953810691833496, "learning_rate": 6.764916264547261e-05, "loss": 0.04003007709980011, "step": 114020 }, { "epoch": 32.367300596082885, "grad_norm": 0.23096127808094025, "learning_rate": 6.764632415554924e-05, "loss": 0.006514255702495575, "step": 114030 }, { "epoch": 32.37013908600625, "grad_norm": 0.5377674102783203, "learning_rate": 6.76434856656259e-05, "loss": 0.0014079341664910317, "step": 114040 }, { "epoch": 32.3729775759296, "grad_norm": 4.96317195892334, "learning_rate": 6.764064717570254e-05, "loss": 0.004779626056551934, "step": 114050 }, { "epoch": 32.375816065852966, "grad_norm": 0.02350626513361931, "learning_rate": 6.763780868577917e-05, "loss": 0.00471135824918747, "step": 114060 }, { "epoch": 32.37865455577633, "grad_norm": 0.599720299243927, "learning_rate": 6.76349701958558e-05, "loss": 0.004146309942007065, "step": 114070 }, { "epoch": 32.381493045699685, "grad_norm": 16.489200592041016, "learning_rate": 6.763213170593245e-05, "loss": 0.020003560185432433, "step": 114080 }, { "epoch": 32.38433153562305, "grad_norm": 0.08096812665462494, "learning_rate": 6.762929321600908e-05, "loss": 0.005378960445523262, "step": 114090 }, { "epoch": 32.38717002554641, "grad_norm": 2.644455671310425, "learning_rate": 6.762645472608573e-05, "loss": 0.003600980341434479, "step": 114100 }, { "epoch": 32.39000851546977, "grad_norm": 1.2839454412460327, "learning_rate": 6.762361623616237e-05, "loss": 0.0032270073890686033, "step": 114110 }, { "epoch": 32.39284700539313, "grad_norm": 0.40738600492477417, "learning_rate": 6.7620777746239e-05, "loss": 0.005680116266012192, "step": 114120 }, { "epoch": 32.39568549531649, "grad_norm": 0.3113716244697571, "learning_rate": 6.761793925631564e-05, "loss": 0.0053767658770084385, "step": 114130 }, { "epoch": 32.398523985239855, "grad_norm": 0.03353322669863701, "learning_rate": 6.761510076639228e-05, "loss": 0.005913880467414856, "step": 114140 }, { "epoch": 32.40136247516321, "grad_norm": 0.546964168548584, "learning_rate": 6.761226227646892e-05, "loss": 0.011770550906658173, "step": 114150 }, { "epoch": 32.404200965086574, "grad_norm": 11.095614433288574, "learning_rate": 6.760942378654555e-05, "loss": 0.015455204248428344, "step": 114160 }, { "epoch": 32.40703945500994, "grad_norm": 0.10472125560045242, "learning_rate": 6.76065852966222e-05, "loss": 0.0009333027526736259, "step": 114170 }, { "epoch": 32.40987794493329, "grad_norm": 0.675320565700531, "learning_rate": 6.760374680669885e-05, "loss": 0.01317942440509796, "step": 114180 }, { "epoch": 32.412716434856655, "grad_norm": 0.14482325315475464, "learning_rate": 6.760090831677548e-05, "loss": 0.005757142975926399, "step": 114190 }, { "epoch": 32.41555492478002, "grad_norm": 9.954489707946777, "learning_rate": 6.759806982685212e-05, "loss": 0.00840342864394188, "step": 114200 }, { "epoch": 32.41839341470338, "grad_norm": 0.048980485647916794, "learning_rate": 6.759523133692876e-05, "loss": 0.01210416853427887, "step": 114210 }, { "epoch": 32.42123190462674, "grad_norm": 0.27393272519111633, "learning_rate": 6.759239284700539e-05, "loss": 0.005969209223985672, "step": 114220 }, { "epoch": 32.4240703945501, "grad_norm": 0.7051840424537659, "learning_rate": 6.758955435708204e-05, "loss": 0.016082359850406645, "step": 114230 }, { "epoch": 32.42690888447346, "grad_norm": 0.4361644387245178, "learning_rate": 6.758671586715868e-05, "loss": 0.0025932040065526963, "step": 114240 }, { "epoch": 32.42974737439682, "grad_norm": 5.031918525695801, "learning_rate": 6.758387737723531e-05, "loss": 0.008626267313957214, "step": 114250 }, { "epoch": 32.43258586432018, "grad_norm": 0.17241919040679932, "learning_rate": 6.758103888731195e-05, "loss": 0.012232260406017303, "step": 114260 }, { "epoch": 32.435424354243544, "grad_norm": 0.30716750025749207, "learning_rate": 6.75782003973886e-05, "loss": 0.01365923285484314, "step": 114270 }, { "epoch": 32.4382628441669, "grad_norm": 0.05446196347475052, "learning_rate": 6.757536190746523e-05, "loss": 0.001996135152876377, "step": 114280 }, { "epoch": 32.44110133409026, "grad_norm": 5.380481243133545, "learning_rate": 6.757252341754186e-05, "loss": 0.0026045218110084534, "step": 114290 }, { "epoch": 32.443939824013626, "grad_norm": 0.3682308793067932, "learning_rate": 6.756968492761852e-05, "loss": 0.001360434480011463, "step": 114300 }, { "epoch": 32.44677831393699, "grad_norm": 0.02981436252593994, "learning_rate": 6.756684643769516e-05, "loss": 0.01003475785255432, "step": 114310 }, { "epoch": 32.449616803860344, "grad_norm": 0.4948399066925049, "learning_rate": 6.756400794777179e-05, "loss": 0.009991858154535294, "step": 114320 }, { "epoch": 32.45245529378371, "grad_norm": 10.741972923278809, "learning_rate": 6.756116945784843e-05, "loss": 0.016574576497077942, "step": 114330 }, { "epoch": 32.45529378370707, "grad_norm": 3.1867520809173584, "learning_rate": 6.755833096792507e-05, "loss": 0.0031610999256372454, "step": 114340 }, { "epoch": 32.458132273630426, "grad_norm": 0.06286652386188507, "learning_rate": 6.75554924780017e-05, "loss": 0.004063401371240616, "step": 114350 }, { "epoch": 32.46097076355379, "grad_norm": 0.04529953747987747, "learning_rate": 6.755265398807834e-05, "loss": 0.001109796203672886, "step": 114360 }, { "epoch": 32.46380925347715, "grad_norm": 0.06832098215818405, "learning_rate": 6.7549815498155e-05, "loss": 0.0029121644794940947, "step": 114370 }, { "epoch": 32.46664774340051, "grad_norm": 0.19736576080322266, "learning_rate": 6.754697700823162e-05, "loss": 0.0053702622652053835, "step": 114380 }, { "epoch": 32.46948623332387, "grad_norm": 0.16143587231636047, "learning_rate": 6.754413851830826e-05, "loss": 0.0004287492483854294, "step": 114390 }, { "epoch": 32.47232472324723, "grad_norm": 0.09874866157770157, "learning_rate": 6.75413000283849e-05, "loss": 0.0031410571187734603, "step": 114400 }, { "epoch": 32.475163213170596, "grad_norm": 0.1389043927192688, "learning_rate": 6.753846153846155e-05, "loss": 0.0033970147371292113, "step": 114410 }, { "epoch": 32.47800170309395, "grad_norm": 4.991461753845215, "learning_rate": 6.753562304853817e-05, "loss": 0.006587091833353043, "step": 114420 }, { "epoch": 32.480840193017315, "grad_norm": 0.42774611711502075, "learning_rate": 6.753278455861483e-05, "loss": 0.00048552565276622773, "step": 114430 }, { "epoch": 32.48367868294068, "grad_norm": 0.21969740092754364, "learning_rate": 6.752994606869147e-05, "loss": 0.005809883773326874, "step": 114440 }, { "epoch": 32.48651717286403, "grad_norm": 0.18078318238258362, "learning_rate": 6.75271075787681e-05, "loss": 0.00479080006480217, "step": 114450 }, { "epoch": 32.489355662787396, "grad_norm": 6.329003810882568, "learning_rate": 6.752426908884474e-05, "loss": 0.00772140622138977, "step": 114460 }, { "epoch": 32.49219415271076, "grad_norm": 0.6858393549919128, "learning_rate": 6.752143059892138e-05, "loss": 0.006368814408779145, "step": 114470 }, { "epoch": 32.49503264263412, "grad_norm": 0.019492102786898613, "learning_rate": 6.751859210899801e-05, "loss": 0.0014190260320901872, "step": 114480 }, { "epoch": 32.49787113255748, "grad_norm": 0.04729132726788521, "learning_rate": 6.751575361907465e-05, "loss": 0.0007154002785682678, "step": 114490 }, { "epoch": 32.50070962248084, "grad_norm": 0.0974271148443222, "learning_rate": 6.75129151291513e-05, "loss": 0.0020119067281484603, "step": 114500 }, { "epoch": 32.50070962248084, "eval_accuracy": 0.9799707509378776, "eval_loss": 0.08007471263408661, "eval_runtime": 37.639, "eval_samples_per_second": 417.837, "eval_steps_per_second": 6.536, "step": 114500 }, { "epoch": 32.503548112404204, "grad_norm": 17.988264083862305, "learning_rate": 6.751007663922793e-05, "loss": 0.01303163766860962, "step": 114510 }, { "epoch": 32.50638660232756, "grad_norm": 0.24098849296569824, "learning_rate": 6.750723814930457e-05, "loss": 0.002260972745716572, "step": 114520 }, { "epoch": 32.50922509225092, "grad_norm": 1.994267463684082, "learning_rate": 6.750439965938121e-05, "loss": 0.00435103327035904, "step": 114530 }, { "epoch": 32.512063582174285, "grad_norm": 0.032066743820905685, "learning_rate": 6.750156116945786e-05, "loss": 0.007737505435943604, "step": 114540 }, { "epoch": 32.51490207209764, "grad_norm": 0.050340645015239716, "learning_rate": 6.749872267953448e-05, "loss": 0.0028028307482600213, "step": 114550 }, { "epoch": 32.517740562021004, "grad_norm": 0.2155630886554718, "learning_rate": 6.749588418961113e-05, "loss": 0.0020587248727679253, "step": 114560 }, { "epoch": 32.52057905194437, "grad_norm": 0.2656296491622925, "learning_rate": 6.749304569968777e-05, "loss": 0.0039482396095991135, "step": 114570 }, { "epoch": 32.52341754186773, "grad_norm": 8.929291725158691, "learning_rate": 6.749020720976441e-05, "loss": 0.005150778219103813, "step": 114580 }, { "epoch": 32.526256031791085, "grad_norm": 0.10331594944000244, "learning_rate": 6.748736871984105e-05, "loss": 0.03145190477371216, "step": 114590 }, { "epoch": 32.52909452171445, "grad_norm": 2.036040782928467, "learning_rate": 6.748453022991769e-05, "loss": 0.010507086664438248, "step": 114600 }, { "epoch": 32.53193301163781, "grad_norm": 0.10751274228096008, "learning_rate": 6.748169173999432e-05, "loss": 0.018629036843776703, "step": 114610 }, { "epoch": 32.53477150156117, "grad_norm": 0.09477870166301727, "learning_rate": 6.747885325007096e-05, "loss": 0.0023122908547520637, "step": 114620 }, { "epoch": 32.53760999148453, "grad_norm": 15.652482986450195, "learning_rate": 6.747601476014762e-05, "loss": 0.023128366470336913, "step": 114630 }, { "epoch": 32.54044848140789, "grad_norm": 1.5583592653274536, "learning_rate": 6.747317627022424e-05, "loss": 0.0015567641705274582, "step": 114640 }, { "epoch": 32.54328697133125, "grad_norm": 0.054387226700782776, "learning_rate": 6.747033778030088e-05, "loss": 0.00186182651668787, "step": 114650 }, { "epoch": 32.54612546125461, "grad_norm": 0.09402450919151306, "learning_rate": 6.746749929037753e-05, "loss": 0.00370519682765007, "step": 114660 }, { "epoch": 32.548963951177974, "grad_norm": 0.03613465651869774, "learning_rate": 6.746466080045417e-05, "loss": 0.0028425376862287522, "step": 114670 }, { "epoch": 32.55180244110134, "grad_norm": 2.491123676300049, "learning_rate": 6.74618223105308e-05, "loss": 0.0022749330848455427, "step": 114680 }, { "epoch": 32.55464093102469, "grad_norm": 0.06355983018875122, "learning_rate": 6.745898382060744e-05, "loss": 0.008654017746448518, "step": 114690 }, { "epoch": 32.557479420948056, "grad_norm": 0.23915013670921326, "learning_rate": 6.745614533068408e-05, "loss": 0.009348556399345398, "step": 114700 }, { "epoch": 32.56031791087142, "grad_norm": 0.021596450358629227, "learning_rate": 6.745330684076072e-05, "loss": 0.004156605154275894, "step": 114710 }, { "epoch": 32.563156400794774, "grad_norm": 6.17777681350708, "learning_rate": 6.745046835083736e-05, "loss": 0.0040039196610450745, "step": 114720 }, { "epoch": 32.56599489071814, "grad_norm": 0.37953636050224304, "learning_rate": 6.7447629860914e-05, "loss": 0.018315052986145018, "step": 114730 }, { "epoch": 32.5688333806415, "grad_norm": 1.4359852075576782, "learning_rate": 6.744479137099063e-05, "loss": 0.005865984410047531, "step": 114740 }, { "epoch": 32.571671870564856, "grad_norm": 14.182202339172363, "learning_rate": 6.744195288106727e-05, "loss": 0.013124102354049682, "step": 114750 }, { "epoch": 32.57451036048822, "grad_norm": 1.5095113515853882, "learning_rate": 6.743911439114391e-05, "loss": 0.0033541005104780197, "step": 114760 }, { "epoch": 32.57734885041158, "grad_norm": 0.058014076203107834, "learning_rate": 6.743627590122055e-05, "loss": 0.01043219342827797, "step": 114770 }, { "epoch": 32.580187340334945, "grad_norm": 0.058495886623859406, "learning_rate": 6.74334374112972e-05, "loss": 0.003884213790297508, "step": 114780 }, { "epoch": 32.5830258302583, "grad_norm": 0.36106768250465393, "learning_rate": 6.743059892137384e-05, "loss": 0.005227448418736458, "step": 114790 }, { "epoch": 32.58586432018166, "grad_norm": 8.617347717285156, "learning_rate": 6.742776043145046e-05, "loss": 0.007249203324317932, "step": 114800 }, { "epoch": 32.588702810105026, "grad_norm": 0.03707880154252052, "learning_rate": 6.74249219415271e-05, "loss": 0.030651959776878356, "step": 114810 }, { "epoch": 32.59154130002838, "grad_norm": 0.07854291051626205, "learning_rate": 6.742208345160375e-05, "loss": 0.0005783036351203919, "step": 114820 }, { "epoch": 32.594379789951745, "grad_norm": 5.614202976226807, "learning_rate": 6.741924496168039e-05, "loss": 0.003442440927028656, "step": 114830 }, { "epoch": 32.59721827987511, "grad_norm": 0.011544346809387207, "learning_rate": 6.741640647175703e-05, "loss": 0.0005873667076230049, "step": 114840 }, { "epoch": 32.60005676979847, "grad_norm": 0.055698610842227936, "learning_rate": 6.741356798183367e-05, "loss": 0.0005435694009065628, "step": 114850 }, { "epoch": 32.60289525972183, "grad_norm": 3.0731241703033447, "learning_rate": 6.741072949191031e-05, "loss": 0.003245591372251511, "step": 114860 }, { "epoch": 32.60573374964519, "grad_norm": 0.04284783825278282, "learning_rate": 6.740789100198694e-05, "loss": 0.003738018870353699, "step": 114870 }, { "epoch": 32.60857223956855, "grad_norm": 0.022851327434182167, "learning_rate": 6.740505251206358e-05, "loss": 0.0006885919719934464, "step": 114880 }, { "epoch": 32.61141072949191, "grad_norm": 0.04896998032927513, "learning_rate": 6.740221402214022e-05, "loss": 0.005958343297243119, "step": 114890 }, { "epoch": 32.61424921941527, "grad_norm": 10.379579544067383, "learning_rate": 6.739937553221686e-05, "loss": 0.007520644366741181, "step": 114900 }, { "epoch": 32.617087709338634, "grad_norm": 7.4861321449279785, "learning_rate": 6.73965370422935e-05, "loss": 0.007841396331787109, "step": 114910 }, { "epoch": 32.61992619926199, "grad_norm": 0.026555828750133514, "learning_rate": 6.739369855237015e-05, "loss": 0.01271587461233139, "step": 114920 }, { "epoch": 32.62276468918535, "grad_norm": 0.08677726238965988, "learning_rate": 6.739086006244677e-05, "loss": 0.015784946084022523, "step": 114930 }, { "epoch": 32.625603179108715, "grad_norm": 0.2982737123966217, "learning_rate": 6.738802157252342e-05, "loss": 0.00266190804541111, "step": 114940 }, { "epoch": 32.62844166903208, "grad_norm": 0.2331753373146057, "learning_rate": 6.738518308260006e-05, "loss": 0.003536364808678627, "step": 114950 }, { "epoch": 32.631280158955434, "grad_norm": 10.072357177734375, "learning_rate": 6.73823445926767e-05, "loss": 0.005135820806026458, "step": 114960 }, { "epoch": 32.6341186488788, "grad_norm": 0.47259655594825745, "learning_rate": 6.737950610275334e-05, "loss": 0.022447675466537476, "step": 114970 }, { "epoch": 32.63695713880216, "grad_norm": 0.08672060817480087, "learning_rate": 6.737666761282998e-05, "loss": 0.0012648273259401321, "step": 114980 }, { "epoch": 32.639795628725516, "grad_norm": 0.01614515110850334, "learning_rate": 6.737382912290662e-05, "loss": 0.00042419228702783585, "step": 114990 }, { "epoch": 32.64263411864888, "grad_norm": 0.36748558282852173, "learning_rate": 6.737099063298325e-05, "loss": 0.0033564042299985886, "step": 115000 }, { "epoch": 32.64263411864888, "eval_accuracy": 0.9817511286322884, "eval_loss": 0.06988764554262161, "eval_runtime": 37.0429, "eval_samples_per_second": 424.562, "eval_steps_per_second": 6.641, "step": 115000 }, { "epoch": 32.64547260857224, "grad_norm": 0.5987921357154846, "learning_rate": 6.736815214305989e-05, "loss": 0.0008588923141360283, "step": 115010 }, { "epoch": 32.6483110984956, "grad_norm": 1.7520004510879517, "learning_rate": 6.736531365313653e-05, "loss": 0.0024900803342461585, "step": 115020 }, { "epoch": 32.65114958841896, "grad_norm": 1.197582721710205, "learning_rate": 6.736247516321318e-05, "loss": 0.002209388092160225, "step": 115030 }, { "epoch": 32.65398807834232, "grad_norm": 7.724530220031738, "learning_rate": 6.735963667328982e-05, "loss": 0.0073419734835624695, "step": 115040 }, { "epoch": 32.656826568265686, "grad_norm": 0.05772159993648529, "learning_rate": 6.735679818336646e-05, "loss": 0.0024494901299476624, "step": 115050 }, { "epoch": 32.65966505818904, "grad_norm": 1.225870132446289, "learning_rate": 6.735395969344309e-05, "loss": 0.0062365088611841205, "step": 115060 }, { "epoch": 32.662503548112404, "grad_norm": 2.3368053436279297, "learning_rate": 6.735112120351973e-05, "loss": 0.002470892108976841, "step": 115070 }, { "epoch": 32.66534203803577, "grad_norm": 1.8568211793899536, "learning_rate": 6.734828271359637e-05, "loss": 0.001349279284477234, "step": 115080 }, { "epoch": 32.66818052795912, "grad_norm": 1.4141619205474854, "learning_rate": 6.734544422367301e-05, "loss": 0.0016955973580479622, "step": 115090 }, { "epoch": 32.671019017882486, "grad_norm": 0.008772218599915504, "learning_rate": 6.734260573374965e-05, "loss": 0.005917279422283173, "step": 115100 }, { "epoch": 32.67385750780585, "grad_norm": 0.4783889651298523, "learning_rate": 6.733976724382629e-05, "loss": 0.01444573700428009, "step": 115110 }, { "epoch": 32.676695997729205, "grad_norm": 7.1426496505737305, "learning_rate": 6.733692875390293e-05, "loss": 0.017557233572006226, "step": 115120 }, { "epoch": 32.67953448765257, "grad_norm": 0.15905162692070007, "learning_rate": 6.733409026397956e-05, "loss": 0.0060407426208257675, "step": 115130 }, { "epoch": 32.68237297757593, "grad_norm": 0.0361337848007679, "learning_rate": 6.73312517740562e-05, "loss": 0.004079294204711914, "step": 115140 }, { "epoch": 32.68521146749929, "grad_norm": 0.029840795323252678, "learning_rate": 6.732841328413284e-05, "loss": 0.007979363948106766, "step": 115150 }, { "epoch": 32.68804995742265, "grad_norm": 0.6522260308265686, "learning_rate": 6.732557479420947e-05, "loss": 0.008920988440513611, "step": 115160 }, { "epoch": 32.69088844734601, "grad_norm": 2.1792171001434326, "learning_rate": 6.732273630428613e-05, "loss": 0.0035223431885242464, "step": 115170 }, { "epoch": 32.693726937269375, "grad_norm": 7.528707027435303, "learning_rate": 6.731989781436277e-05, "loss": 0.00883939564228058, "step": 115180 }, { "epoch": 32.69656542719273, "grad_norm": 14.118414878845215, "learning_rate": 6.73170593244394e-05, "loss": 0.012726087868213654, "step": 115190 }, { "epoch": 32.69940391711609, "grad_norm": 2.223851203918457, "learning_rate": 6.731422083451604e-05, "loss": 0.0022952212020754814, "step": 115200 }, { "epoch": 32.702242407039456, "grad_norm": 0.9885993599891663, "learning_rate": 6.731138234459268e-05, "loss": 0.0038086555898189543, "step": 115210 }, { "epoch": 32.70508089696281, "grad_norm": 0.10827240347862244, "learning_rate": 6.730854385466932e-05, "loss": 0.0005593894049525261, "step": 115220 }, { "epoch": 32.707919386886175, "grad_norm": 15.818256378173828, "learning_rate": 6.730570536474596e-05, "loss": 0.02335064858198166, "step": 115230 }, { "epoch": 32.71075787680954, "grad_norm": 0.8417431116104126, "learning_rate": 6.73028668748226e-05, "loss": 0.017483727633953096, "step": 115240 }, { "epoch": 32.7135963667329, "grad_norm": 0.5840616822242737, "learning_rate": 6.730002838489924e-05, "loss": 0.011471319198608398, "step": 115250 }, { "epoch": 32.71643485665626, "grad_norm": 1.7906863689422607, "learning_rate": 6.729718989497587e-05, "loss": 0.0030981972813606262, "step": 115260 }, { "epoch": 32.71927334657962, "grad_norm": 1.1738306283950806, "learning_rate": 6.729435140505251e-05, "loss": 0.018564595282077788, "step": 115270 }, { "epoch": 32.72211183650298, "grad_norm": 1.5727635622024536, "learning_rate": 6.729151291512916e-05, "loss": 0.029869663715362548, "step": 115280 }, { "epoch": 32.72495032642634, "grad_norm": 8.593466758728027, "learning_rate": 6.728867442520578e-05, "loss": 0.010429519414901733, "step": 115290 }, { "epoch": 32.7277888163497, "grad_norm": 1.1088589429855347, "learning_rate": 6.728583593528244e-05, "loss": 0.013384000957012176, "step": 115300 }, { "epoch": 32.730627306273064, "grad_norm": 0.6185814142227173, "learning_rate": 6.728299744535908e-05, "loss": 0.010928134620189666, "step": 115310 }, { "epoch": 32.73346579619643, "grad_norm": 0.5119034647941589, "learning_rate": 6.728015895543571e-05, "loss": 0.010070094466209411, "step": 115320 }, { "epoch": 32.73630428611978, "grad_norm": 9.965852737426758, "learning_rate": 6.727732046551235e-05, "loss": 0.0048790894448757175, "step": 115330 }, { "epoch": 32.739142776043145, "grad_norm": 0.16092820465564728, "learning_rate": 6.727448197558899e-05, "loss": 0.005509594455361366, "step": 115340 }, { "epoch": 32.74198126596651, "grad_norm": 10.854374885559082, "learning_rate": 6.727164348566563e-05, "loss": 0.010521016269922256, "step": 115350 }, { "epoch": 32.744819755889864, "grad_norm": 2.095468282699585, "learning_rate": 6.726880499574226e-05, "loss": 0.008881182968616485, "step": 115360 }, { "epoch": 32.74765824581323, "grad_norm": 2.1168315410614014, "learning_rate": 6.726596650581891e-05, "loss": 0.002045045979321003, "step": 115370 }, { "epoch": 32.75049673573659, "grad_norm": 2.7574262619018555, "learning_rate": 6.726312801589556e-05, "loss": 0.005743959173560143, "step": 115380 }, { "epoch": 32.753335225659946, "grad_norm": 1.5483633279800415, "learning_rate": 6.726028952597218e-05, "loss": 0.0032996051013469696, "step": 115390 }, { "epoch": 32.75617371558331, "grad_norm": 0.7235838770866394, "learning_rate": 6.725745103604882e-05, "loss": 0.00510307140648365, "step": 115400 }, { "epoch": 32.75901220550667, "grad_norm": 0.03441060334444046, "learning_rate": 6.725461254612547e-05, "loss": 0.006230661645531654, "step": 115410 }, { "epoch": 32.761850695430034, "grad_norm": 0.016389377415180206, "learning_rate": 6.72517740562021e-05, "loss": 0.003398095816373825, "step": 115420 }, { "epoch": 32.76468918535339, "grad_norm": 9.060834884643555, "learning_rate": 6.724893556627875e-05, "loss": 0.004841665178537369, "step": 115430 }, { "epoch": 32.76752767527675, "grad_norm": 2.3897271156311035, "learning_rate": 6.724609707635539e-05, "loss": 0.004336610808968544, "step": 115440 }, { "epoch": 32.770366165200116, "grad_norm": 0.317351371049881, "learning_rate": 6.724325858643202e-05, "loss": 0.0011380070820450783, "step": 115450 }, { "epoch": 32.77320465512347, "grad_norm": 0.03888026252388954, "learning_rate": 6.724042009650866e-05, "loss": 0.006226158514618873, "step": 115460 }, { "epoch": 32.776043145046835, "grad_norm": 0.1306099146604538, "learning_rate": 6.72375816065853e-05, "loss": 0.008636984229087829, "step": 115470 }, { "epoch": 32.7788816349702, "grad_norm": 0.06315547227859497, "learning_rate": 6.723474311666194e-05, "loss": 0.0008447887375950813, "step": 115480 }, { "epoch": 32.78172012489355, "grad_norm": 0.22278690338134766, "learning_rate": 6.723190462673857e-05, "loss": 0.010799533128738404, "step": 115490 }, { "epoch": 32.784558614816916, "grad_norm": 0.009275462478399277, "learning_rate": 6.722906613681522e-05, "loss": 0.0027032503858208656, "step": 115500 }, { "epoch": 32.784558614816916, "eval_accuracy": 0.9818782984676034, "eval_loss": 0.06736987084150314, "eval_runtime": 34.8997, "eval_samples_per_second": 450.634, "eval_steps_per_second": 7.049, "step": 115500 }, { "epoch": 32.78739710474028, "grad_norm": 0.8604802489280701, "learning_rate": 6.722622764689185e-05, "loss": 0.005117473006248474, "step": 115510 }, { "epoch": 32.79023559466364, "grad_norm": 0.05745973065495491, "learning_rate": 6.72233891569685e-05, "loss": 0.009720875322818756, "step": 115520 }, { "epoch": 32.793074084587, "grad_norm": 0.1751333475112915, "learning_rate": 6.722055066704514e-05, "loss": 0.0015902178362011909, "step": 115530 }, { "epoch": 32.79591257451036, "grad_norm": 2.5215442180633545, "learning_rate": 6.721771217712178e-05, "loss": 0.002424665354192257, "step": 115540 }, { "epoch": 32.79875106443372, "grad_norm": 5.956653594970703, "learning_rate": 6.72148736871984e-05, "loss": 0.001900552585721016, "step": 115550 }, { "epoch": 32.80158955435708, "grad_norm": 0.37581831216812134, "learning_rate": 6.721203519727506e-05, "loss": 0.0034672558307647706, "step": 115560 }, { "epoch": 32.80442804428044, "grad_norm": 12.57548713684082, "learning_rate": 6.72091967073517e-05, "loss": 0.02423868775367737, "step": 115570 }, { "epoch": 32.807266534203805, "grad_norm": 0.005635580513626337, "learning_rate": 6.720635821742833e-05, "loss": 0.0062651924788951876, "step": 115580 }, { "epoch": 32.81010502412717, "grad_norm": 0.9730538129806519, "learning_rate": 6.720351972750497e-05, "loss": 0.0015264956280589103, "step": 115590 }, { "epoch": 32.812943514050524, "grad_norm": 9.407282829284668, "learning_rate": 6.720068123758161e-05, "loss": 0.012949973344802856, "step": 115600 }, { "epoch": 32.81578200397389, "grad_norm": 0.154144287109375, "learning_rate": 6.719784274765825e-05, "loss": 0.001374381221830845, "step": 115610 }, { "epoch": 32.81862049389725, "grad_norm": 0.10285119712352753, "learning_rate": 6.719500425773488e-05, "loss": 0.0016664970666170121, "step": 115620 }, { "epoch": 32.821458983820605, "grad_norm": 4.853670120239258, "learning_rate": 6.719216576781154e-05, "loss": 0.0022271597757935525, "step": 115630 }, { "epoch": 32.82429747374397, "grad_norm": 0.10711920261383057, "learning_rate": 6.718932727788816e-05, "loss": 0.003030586428940296, "step": 115640 }, { "epoch": 32.82713596366733, "grad_norm": 7.919442176818848, "learning_rate": 6.71864887879648e-05, "loss": 0.010530199855566025, "step": 115650 }, { "epoch": 32.82997445359069, "grad_norm": 0.2596246302127838, "learning_rate": 6.718365029804145e-05, "loss": 0.0020994305610656737, "step": 115660 }, { "epoch": 32.83281294351405, "grad_norm": 0.3671135902404785, "learning_rate": 6.718081180811809e-05, "loss": 0.018359410762786865, "step": 115670 }, { "epoch": 32.83565143343741, "grad_norm": 0.12671414017677307, "learning_rate": 6.717797331819472e-05, "loss": 0.009848378598690033, "step": 115680 }, { "epoch": 32.838489923360775, "grad_norm": 0.07302480936050415, "learning_rate": 6.717513482827136e-05, "loss": 0.008232537657022476, "step": 115690 }, { "epoch": 32.84132841328413, "grad_norm": 0.08686067163944244, "learning_rate": 6.717229633834801e-05, "loss": 0.0075740814208984375, "step": 115700 }, { "epoch": 32.844166903207494, "grad_norm": 8.358445167541504, "learning_rate": 6.716945784842464e-05, "loss": 0.0033353008329868317, "step": 115710 }, { "epoch": 32.84700539313086, "grad_norm": 1.3210904598236084, "learning_rate": 6.716661935850128e-05, "loss": 0.005610007047653198, "step": 115720 }, { "epoch": 32.84984388305421, "grad_norm": 0.026645544916391373, "learning_rate": 6.716378086857792e-05, "loss": 0.0014412131160497666, "step": 115730 }, { "epoch": 32.852682372977576, "grad_norm": 0.07111597061157227, "learning_rate": 6.716094237865455e-05, "loss": 0.002173510193824768, "step": 115740 }, { "epoch": 32.85552086290094, "grad_norm": 0.18615636229515076, "learning_rate": 6.715810388873119e-05, "loss": 0.0023820785805583, "step": 115750 }, { "epoch": 32.858359352824294, "grad_norm": 1.092203974723816, "learning_rate": 6.715526539880785e-05, "loss": 0.0037251044064760207, "step": 115760 }, { "epoch": 32.86119784274766, "grad_norm": 0.25380462408065796, "learning_rate": 6.715242690888447e-05, "loss": 0.0028632620349526407, "step": 115770 }, { "epoch": 32.86403633267102, "grad_norm": 4.7834858894348145, "learning_rate": 6.714958841896112e-05, "loss": 0.002264612726867199, "step": 115780 }, { "epoch": 32.86687482259438, "grad_norm": 0.017612561583518982, "learning_rate": 6.714674992903776e-05, "loss": 0.003397315740585327, "step": 115790 }, { "epoch": 32.86971331251774, "grad_norm": 0.20209135115146637, "learning_rate": 6.71439114391144e-05, "loss": 0.001487847790122032, "step": 115800 }, { "epoch": 32.8725518024411, "grad_norm": 0.07265114039182663, "learning_rate": 6.714107294919103e-05, "loss": 0.004842830076813698, "step": 115810 }, { "epoch": 32.875390292364465, "grad_norm": 0.07286660373210907, "learning_rate": 6.713823445926767e-05, "loss": 0.002341262623667717, "step": 115820 }, { "epoch": 32.87822878228782, "grad_norm": 3.5491795539855957, "learning_rate": 6.713539596934432e-05, "loss": 0.004468339309096336, "step": 115830 }, { "epoch": 32.88106727221118, "grad_norm": 2.12357234954834, "learning_rate": 6.713255747942095e-05, "loss": 0.001622641272842884, "step": 115840 }, { "epoch": 32.883905762134546, "grad_norm": 0.825917661190033, "learning_rate": 6.712971898949759e-05, "loss": 0.002963782101869583, "step": 115850 }, { "epoch": 32.8867442520579, "grad_norm": 9.74405288696289, "learning_rate": 6.712688049957423e-05, "loss": 0.004125053063035011, "step": 115860 }, { "epoch": 32.889582741981265, "grad_norm": 7.872115612030029, "learning_rate": 6.712404200965086e-05, "loss": 0.00821732059121132, "step": 115870 }, { "epoch": 32.89242123190463, "grad_norm": 0.024337299168109894, "learning_rate": 6.71212035197275e-05, "loss": 0.009269344806671142, "step": 115880 }, { "epoch": 32.89525972182799, "grad_norm": 0.276040643453598, "learning_rate": 6.711836502980414e-05, "loss": 0.01774805784225464, "step": 115890 }, { "epoch": 32.898098211751346, "grad_norm": 7.250348091125488, "learning_rate": 6.711552653988079e-05, "loss": 0.00894710123538971, "step": 115900 }, { "epoch": 32.90093670167471, "grad_norm": 4.94080114364624, "learning_rate": 6.711268804995743e-05, "loss": 0.0022767091169953345, "step": 115910 }, { "epoch": 32.90377519159807, "grad_norm": 7.283491134643555, "learning_rate": 6.710984956003407e-05, "loss": 0.0021829167380928993, "step": 115920 }, { "epoch": 32.90661368152143, "grad_norm": 0.2967607080936432, "learning_rate": 6.710701107011071e-05, "loss": 0.01130576804280281, "step": 115930 }, { "epoch": 32.90945217144479, "grad_norm": 9.006250381469727, "learning_rate": 6.710417258018734e-05, "loss": 0.008502615988254547, "step": 115940 }, { "epoch": 32.912290661368154, "grad_norm": 1.8420523405075073, "learning_rate": 6.710133409026398e-05, "loss": 0.0009386820718646049, "step": 115950 }, { "epoch": 32.91512915129151, "grad_norm": 0.3669402301311493, "learning_rate": 6.709849560034063e-05, "loss": 0.0032589659094810486, "step": 115960 }, { "epoch": 32.91796764121487, "grad_norm": 0.06491213291883469, "learning_rate": 6.709565711041726e-05, "loss": 0.0068365782499313354, "step": 115970 }, { "epoch": 32.920806131138235, "grad_norm": 0.6106870174407959, "learning_rate": 6.70928186204939e-05, "loss": 0.00959177166223526, "step": 115980 }, { "epoch": 32.9236446210616, "grad_norm": 2.9874281883239746, "learning_rate": 6.708998013057054e-05, "loss": 0.0022723421454429625, "step": 115990 }, { "epoch": 32.926483110984954, "grad_norm": 3.5597472190856934, "learning_rate": 6.708714164064717e-05, "loss": 0.01044917106628418, "step": 116000 }, { "epoch": 32.926483110984954, "eval_accuracy": 0.9788262224200419, "eval_loss": 0.08267034590244293, "eval_runtime": 39.4164, "eval_samples_per_second": 398.996, "eval_steps_per_second": 6.241, "step": 116000 }, { "epoch": 32.92932160090832, "grad_norm": 0.7365328073501587, "learning_rate": 6.708430315072381e-05, "loss": 0.0017657797783613206, "step": 116010 }, { "epoch": 32.93216009083168, "grad_norm": 9.164934158325195, "learning_rate": 6.708146466080045e-05, "loss": 0.007038326561450958, "step": 116020 }, { "epoch": 32.934998580755035, "grad_norm": 9.201010704040527, "learning_rate": 6.70786261708771e-05, "loss": 0.012917309999465942, "step": 116030 }, { "epoch": 32.9378370706784, "grad_norm": 0.43749189376831055, "learning_rate": 6.707578768095374e-05, "loss": 0.006228803843259812, "step": 116040 }, { "epoch": 32.94067556060176, "grad_norm": 4.600038528442383, "learning_rate": 6.707294919103038e-05, "loss": 0.002172117680311203, "step": 116050 }, { "epoch": 32.943514050525124, "grad_norm": 0.4795561730861664, "learning_rate": 6.707011070110702e-05, "loss": 0.011324767768383027, "step": 116060 }, { "epoch": 32.94635254044848, "grad_norm": 2.651142120361328, "learning_rate": 6.706727221118365e-05, "loss": 0.008716784417629242, "step": 116070 }, { "epoch": 32.94919103037184, "grad_norm": 0.5946139097213745, "learning_rate": 6.706443372126029e-05, "loss": 0.004989980161190033, "step": 116080 }, { "epoch": 32.952029520295206, "grad_norm": 0.1492352932691574, "learning_rate": 6.706159523133693e-05, "loss": 0.0031155625358223913, "step": 116090 }, { "epoch": 32.95486801021856, "grad_norm": 5.738687992095947, "learning_rate": 6.705875674141357e-05, "loss": 0.01354798823595047, "step": 116100 }, { "epoch": 32.957706500141924, "grad_norm": 9.639321327209473, "learning_rate": 6.705591825149021e-05, "loss": 0.008230944722890854, "step": 116110 }, { "epoch": 32.96054499006529, "grad_norm": 0.1811612844467163, "learning_rate": 6.705307976156685e-05, "loss": 0.005621297657489777, "step": 116120 }, { "epoch": 32.96338347998864, "grad_norm": 0.5774857401847839, "learning_rate": 6.705024127164348e-05, "loss": 0.007992690801620484, "step": 116130 }, { "epoch": 32.966221969912006, "grad_norm": 0.1640755832195282, "learning_rate": 6.704740278172012e-05, "loss": 0.0021892812103033064, "step": 116140 }, { "epoch": 32.96906045983537, "grad_norm": 0.04836976155638695, "learning_rate": 6.704456429179677e-05, "loss": 0.011316446214914322, "step": 116150 }, { "epoch": 32.97189894975873, "grad_norm": 0.04663780331611633, "learning_rate": 6.70417258018734e-05, "loss": 0.014330577850341798, "step": 116160 }, { "epoch": 32.97473743968209, "grad_norm": 0.022855086252093315, "learning_rate": 6.703888731195005e-05, "loss": 0.011835461854934693, "step": 116170 }, { "epoch": 32.97757592960545, "grad_norm": 1.789455533027649, "learning_rate": 6.703604882202669e-05, "loss": 0.005349169671535492, "step": 116180 }, { "epoch": 32.98041441952881, "grad_norm": 10.215633392333984, "learning_rate": 6.703321033210333e-05, "loss": 0.012733514606952667, "step": 116190 }, { "epoch": 32.98325290945217, "grad_norm": 2.7822208404541016, "learning_rate": 6.703037184217996e-05, "loss": 0.00851227417588234, "step": 116200 }, { "epoch": 32.98609139937553, "grad_norm": 1.3343533277511597, "learning_rate": 6.70275333522566e-05, "loss": 0.004502113908529282, "step": 116210 }, { "epoch": 32.988929889298895, "grad_norm": 0.9471992254257202, "learning_rate": 6.702469486233324e-05, "loss": 0.002028048038482666, "step": 116220 }, { "epoch": 32.99176837922225, "grad_norm": 0.02737145684659481, "learning_rate": 6.702185637240988e-05, "loss": 0.0022799227386713027, "step": 116230 }, { "epoch": 32.99460686914561, "grad_norm": 2.3910951614379883, "learning_rate": 6.701901788248652e-05, "loss": 0.00944119393825531, "step": 116240 }, { "epoch": 32.997445359068976, "grad_norm": 1.8649864196777344, "learning_rate": 6.701617939256317e-05, "loss": 0.0011688621714711189, "step": 116250 }, { "epoch": 33.00028384899234, "grad_norm": 0.30151933431625366, "learning_rate": 6.701334090263979e-05, "loss": 0.0028031138703227044, "step": 116260 }, { "epoch": 33.003122338915695, "grad_norm": 0.04772806167602539, "learning_rate": 6.701050241271643e-05, "loss": 0.006588833779096604, "step": 116270 }, { "epoch": 33.00596082883906, "grad_norm": 0.04940832778811455, "learning_rate": 6.700766392279308e-05, "loss": 0.001451434940099716, "step": 116280 }, { "epoch": 33.00879931876242, "grad_norm": 0.12613864243030548, "learning_rate": 6.700482543286972e-05, "loss": 0.00789545252919197, "step": 116290 }, { "epoch": 33.01163780868578, "grad_norm": 3.2078168392181396, "learning_rate": 6.700198694294636e-05, "loss": 0.001331864856183529, "step": 116300 }, { "epoch": 33.01447629860914, "grad_norm": 1.3390696048736572, "learning_rate": 6.6999148453023e-05, "loss": 0.003948202356696129, "step": 116310 }, { "epoch": 33.0173147885325, "grad_norm": 0.38363736867904663, "learning_rate": 6.699630996309964e-05, "loss": 0.0049702949821949, "step": 116320 }, { "epoch": 33.02015327845586, "grad_norm": 0.05639426037669182, "learning_rate": 6.699347147317627e-05, "loss": 0.0020933594554662705, "step": 116330 }, { "epoch": 33.02299176837922, "grad_norm": 0.02982463128864765, "learning_rate": 6.699063298325291e-05, "loss": 0.005966927856206894, "step": 116340 }, { "epoch": 33.025830258302584, "grad_norm": 0.17889104783535004, "learning_rate": 6.698779449332955e-05, "loss": 0.003529367595911026, "step": 116350 }, { "epoch": 33.02866874822595, "grad_norm": 2.4252471923828125, "learning_rate": 6.69849560034062e-05, "loss": 0.0013346079736948012, "step": 116360 }, { "epoch": 33.0315072381493, "grad_norm": 0.10620643943548203, "learning_rate": 6.698211751348283e-05, "loss": 0.0014808306470513344, "step": 116370 }, { "epoch": 33.034345728072665, "grad_norm": 0.026580004021525383, "learning_rate": 6.697927902355948e-05, "loss": 0.00461781993508339, "step": 116380 }, { "epoch": 33.03718421799603, "grad_norm": 1.101383090019226, "learning_rate": 6.69764405336361e-05, "loss": 0.004486881196498871, "step": 116390 }, { "epoch": 33.040022707919384, "grad_norm": 4.052335739135742, "learning_rate": 6.697360204371275e-05, "loss": 0.001631484180688858, "step": 116400 }, { "epoch": 33.04286119784275, "grad_norm": 1.1677566766738892, "learning_rate": 6.697076355378939e-05, "loss": 0.0013265511021018027, "step": 116410 }, { "epoch": 33.04569968776611, "grad_norm": 1.2540374994277954, "learning_rate": 6.696792506386603e-05, "loss": 0.0009229317307472229, "step": 116420 }, { "epoch": 33.04853817768947, "grad_norm": 1.549575686454773, "learning_rate": 6.696508657394267e-05, "loss": 0.0020175084471702577, "step": 116430 }, { "epoch": 33.05137666761283, "grad_norm": 0.4531232714653015, "learning_rate": 6.696224808401931e-05, "loss": 0.000685400702059269, "step": 116440 }, { "epoch": 33.05421515753619, "grad_norm": 0.35570189356803894, "learning_rate": 6.695940959409594e-05, "loss": 0.003602328151464462, "step": 116450 }, { "epoch": 33.057053647459554, "grad_norm": 0.13056319952011108, "learning_rate": 6.695657110417258e-05, "loss": 0.0011389149352908135, "step": 116460 }, { "epoch": 33.05989213738291, "grad_norm": 0.057958927005529404, "learning_rate": 6.695373261424922e-05, "loss": 0.0014943141490221024, "step": 116470 }, { "epoch": 33.06273062730627, "grad_norm": 0.11711937934160233, "learning_rate": 6.695089412432586e-05, "loss": 0.000851454958319664, "step": 116480 }, { "epoch": 33.065569117229636, "grad_norm": 0.48561540246009827, "learning_rate": 6.694805563440249e-05, "loss": 0.0032224122434854506, "step": 116490 }, { "epoch": 33.06840760715299, "grad_norm": 0.31075406074523926, "learning_rate": 6.694521714447915e-05, "loss": 0.006114789843559265, "step": 116500 }, { "epoch": 33.06840760715299, "eval_accuracy": 0.9806066001144529, "eval_loss": 0.07545190304517746, "eval_runtime": 36.4154, "eval_samples_per_second": 431.878, "eval_steps_per_second": 6.755, "step": 116500 }, { "epoch": 33.071246097076354, "grad_norm": 0.011628352105617523, "learning_rate": 6.694237865455579e-05, "loss": 0.011433450877666474, "step": 116510 }, { "epoch": 33.07408458699972, "grad_norm": 0.16553795337677002, "learning_rate": 6.693954016463241e-05, "loss": 0.0009460266679525376, "step": 116520 }, { "epoch": 33.07692307692308, "grad_norm": 0.1565055549144745, "learning_rate": 6.693670167470906e-05, "loss": 0.0014659754931926727, "step": 116530 }, { "epoch": 33.079761566846436, "grad_norm": 1.9614278078079224, "learning_rate": 6.69338631847857e-05, "loss": 0.001235012523829937, "step": 116540 }, { "epoch": 33.0826000567698, "grad_norm": 0.0336444117128849, "learning_rate": 6.693102469486233e-05, "loss": 0.00135517418384552, "step": 116550 }, { "epoch": 33.08543854669316, "grad_norm": 0.013185838237404823, "learning_rate": 6.692818620493898e-05, "loss": 0.0005711127072572708, "step": 116560 }, { "epoch": 33.08827703661652, "grad_norm": 0.036254510283470154, "learning_rate": 6.692534771501562e-05, "loss": 0.003907288610935211, "step": 116570 }, { "epoch": 33.09111552653988, "grad_norm": 0.07210094481706619, "learning_rate": 6.692250922509225e-05, "loss": 0.006119371205568313, "step": 116580 }, { "epoch": 33.09395401646324, "grad_norm": 0.8094743490219116, "learning_rate": 6.691967073516889e-05, "loss": 0.004016109555959701, "step": 116590 }, { "epoch": 33.0967925063866, "grad_norm": 0.3563868999481201, "learning_rate": 6.691683224524553e-05, "loss": 0.00904526486992836, "step": 116600 }, { "epoch": 33.09963099630996, "grad_norm": 0.9565075635910034, "learning_rate": 6.691399375532217e-05, "loss": 0.0015688346698880195, "step": 116610 }, { "epoch": 33.102469486233325, "grad_norm": 5.021378517150879, "learning_rate": 6.69111552653988e-05, "loss": 0.005596960335969925, "step": 116620 }, { "epoch": 33.10530797615669, "grad_norm": 0.20138885080814362, "learning_rate": 6.690831677547546e-05, "loss": 0.0099314846098423, "step": 116630 }, { "epoch": 33.10814646608004, "grad_norm": 0.4211396276950836, "learning_rate": 6.69054782855521e-05, "loss": 0.006709654629230499, "step": 116640 }, { "epoch": 33.110984956003406, "grad_norm": 9.913997650146484, "learning_rate": 6.690263979562873e-05, "loss": 0.006997358798980713, "step": 116650 }, { "epoch": 33.11382344592677, "grad_norm": 0.21106532216072083, "learning_rate": 6.689980130570537e-05, "loss": 0.009315518289804458, "step": 116660 }, { "epoch": 33.116661935850125, "grad_norm": 2.573094606399536, "learning_rate": 6.689696281578201e-05, "loss": 0.004550368338823318, "step": 116670 }, { "epoch": 33.11950042577349, "grad_norm": 2.169468879699707, "learning_rate": 6.689412432585864e-05, "loss": 0.010161110758781433, "step": 116680 }, { "epoch": 33.12233891569685, "grad_norm": 2.7326865196228027, "learning_rate": 6.689128583593528e-05, "loss": 0.021151092648506165, "step": 116690 }, { "epoch": 33.12517740562021, "grad_norm": 0.2831386625766754, "learning_rate": 6.688844734601193e-05, "loss": 0.0026957079768180846, "step": 116700 }, { "epoch": 33.12801589554357, "grad_norm": 0.07883128523826599, "learning_rate": 6.688560885608856e-05, "loss": 0.0010104503482580185, "step": 116710 }, { "epoch": 33.13085438546693, "grad_norm": 0.04013151675462723, "learning_rate": 6.68827703661652e-05, "loss": 0.00045544467866420746, "step": 116720 }, { "epoch": 33.133692875390295, "grad_norm": 0.026153378188610077, "learning_rate": 6.687993187624184e-05, "loss": 0.002191595546901226, "step": 116730 }, { "epoch": 33.13653136531365, "grad_norm": 0.028091011568903923, "learning_rate": 6.687709338631848e-05, "loss": 0.004006192833185196, "step": 116740 }, { "epoch": 33.139369855237014, "grad_norm": 0.6106548309326172, "learning_rate": 6.687425489639511e-05, "loss": 0.003902819752693176, "step": 116750 }, { "epoch": 33.14220834516038, "grad_norm": 0.2084183692932129, "learning_rate": 6.687141640647177e-05, "loss": 0.014672598242759705, "step": 116760 }, { "epoch": 33.14504683508373, "grad_norm": 4.2750067710876465, "learning_rate": 6.686857791654841e-05, "loss": 0.007639256864786148, "step": 116770 }, { "epoch": 33.147885325007096, "grad_norm": 0.09132722020149231, "learning_rate": 6.686573942662504e-05, "loss": 0.012508408725261688, "step": 116780 }, { "epoch": 33.15072381493046, "grad_norm": 0.053322166204452515, "learning_rate": 6.686290093670168e-05, "loss": 0.014283393323421479, "step": 116790 }, { "epoch": 33.15356230485382, "grad_norm": 2.297430992126465, "learning_rate": 6.686006244677832e-05, "loss": 0.007989005744457245, "step": 116800 }, { "epoch": 33.15640079477718, "grad_norm": 0.022376790642738342, "learning_rate": 6.685722395685495e-05, "loss": 0.009492124617099761, "step": 116810 }, { "epoch": 33.15923928470054, "grad_norm": 0.1506757289171219, "learning_rate": 6.685438546693159e-05, "loss": 0.008976607024669648, "step": 116820 }, { "epoch": 33.1620777746239, "grad_norm": 1.2446775436401367, "learning_rate": 6.685154697700824e-05, "loss": 0.0021873177960515023, "step": 116830 }, { "epoch": 33.16491626454726, "grad_norm": 0.03482142835855484, "learning_rate": 6.684870848708487e-05, "loss": 0.0003721404820680618, "step": 116840 }, { "epoch": 33.16775475447062, "grad_norm": 2.229429006576538, "learning_rate": 6.684586999716151e-05, "loss": 0.003446166589856148, "step": 116850 }, { "epoch": 33.170593244393984, "grad_norm": 0.0728578045964241, "learning_rate": 6.684303150723815e-05, "loss": 0.0034969646483659746, "step": 116860 }, { "epoch": 33.17343173431734, "grad_norm": 0.06457675993442535, "learning_rate": 6.68401930173148e-05, "loss": 0.0021480562165379522, "step": 116870 }, { "epoch": 33.1762702242407, "grad_norm": 0.3300822377204895, "learning_rate": 6.683735452739142e-05, "loss": 0.0006129974499344826, "step": 116880 }, { "epoch": 33.179108714164066, "grad_norm": 0.03377392143011093, "learning_rate": 6.683451603746808e-05, "loss": 0.0019664719700813295, "step": 116890 }, { "epoch": 33.18194720408743, "grad_norm": 3.646103858947754, "learning_rate": 6.683167754754472e-05, "loss": 0.006631339341402054, "step": 116900 }, { "epoch": 33.184785694010785, "grad_norm": 0.01401121448725462, "learning_rate": 6.682883905762135e-05, "loss": 0.004051449149847031, "step": 116910 }, { "epoch": 33.18762418393415, "grad_norm": 9.631101608276367, "learning_rate": 6.682600056769799e-05, "loss": 0.01231047660112381, "step": 116920 }, { "epoch": 33.19046267385751, "grad_norm": 2.204819440841675, "learning_rate": 6.682316207777463e-05, "loss": 0.0049948573112487795, "step": 116930 }, { "epoch": 33.193301163780866, "grad_norm": 0.06630460917949677, "learning_rate": 6.682032358785126e-05, "loss": 0.0043741147965192795, "step": 116940 }, { "epoch": 33.19613965370423, "grad_norm": 0.40099433064460754, "learning_rate": 6.68174850979279e-05, "loss": 0.002163928560912609, "step": 116950 }, { "epoch": 33.19897814362759, "grad_norm": 0.4979151785373688, "learning_rate": 6.681464660800455e-05, "loss": 0.0008824186399579049, "step": 116960 }, { "epoch": 33.20181663355095, "grad_norm": 0.46067163348197937, "learning_rate": 6.681180811808118e-05, "loss": 0.001842484064400196, "step": 116970 }, { "epoch": 33.20465512347431, "grad_norm": 1.6530888080596924, "learning_rate": 6.680896962815782e-05, "loss": 0.0020790264010429383, "step": 116980 }, { "epoch": 33.20749361339767, "grad_norm": 2.3059728145599365, "learning_rate": 6.680613113823446e-05, "loss": 0.002476835064589977, "step": 116990 }, { "epoch": 33.210332103321036, "grad_norm": 0.03144270181655884, "learning_rate": 6.68032926483111e-05, "loss": 0.01923192888498306, "step": 117000 }, { "epoch": 33.210332103321036, "eval_accuracy": 0.9785718827494119, "eval_loss": 0.08626900613307953, "eval_runtime": 36.2451, "eval_samples_per_second": 433.906, "eval_steps_per_second": 6.787, "step": 117000 }, { "epoch": 33.21317059324439, "grad_norm": 0.07916983217000961, "learning_rate": 6.680045415838773e-05, "loss": 0.014544449746608734, "step": 117010 }, { "epoch": 33.216009083167755, "grad_norm": 5.655854225158691, "learning_rate": 6.679761566846438e-05, "loss": 0.008870673179626466, "step": 117020 }, { "epoch": 33.21884757309112, "grad_norm": 3.0102782249450684, "learning_rate": 6.679477717854103e-05, "loss": 0.011139096319675445, "step": 117030 }, { "epoch": 33.221686063014474, "grad_norm": 7.15107536315918, "learning_rate": 6.679193868861766e-05, "loss": 0.008346974104642867, "step": 117040 }, { "epoch": 33.22452455293784, "grad_norm": 4.582878589630127, "learning_rate": 6.67891001986943e-05, "loss": 0.012753713130950927, "step": 117050 }, { "epoch": 33.2273630428612, "grad_norm": 0.016860440373420715, "learning_rate": 6.678626170877094e-05, "loss": 0.004232428222894669, "step": 117060 }, { "epoch": 33.230201532784555, "grad_norm": 0.40730273723602295, "learning_rate": 6.678342321884757e-05, "loss": 0.006754094362258911, "step": 117070 }, { "epoch": 33.23304002270792, "grad_norm": 0.17766165733337402, "learning_rate": 6.678058472892421e-05, "loss": 0.003537414222955704, "step": 117080 }, { "epoch": 33.23587851263128, "grad_norm": 0.31607070565223694, "learning_rate": 6.677774623900086e-05, "loss": 0.006954492628574371, "step": 117090 }, { "epoch": 33.238717002554644, "grad_norm": 0.551232635974884, "learning_rate": 6.677490774907749e-05, "loss": 0.0012095684185624122, "step": 117100 }, { "epoch": 33.241555492478, "grad_norm": 0.5044060349464417, "learning_rate": 6.677206925915413e-05, "loss": 0.00250462107360363, "step": 117110 }, { "epoch": 33.24439398240136, "grad_norm": 0.4205917418003082, "learning_rate": 6.676923076923078e-05, "loss": 0.010872340947389602, "step": 117120 }, { "epoch": 33.247232472324725, "grad_norm": 0.01135061215609312, "learning_rate": 6.676639227930742e-05, "loss": 0.0018391426652669906, "step": 117130 }, { "epoch": 33.25007096224808, "grad_norm": 1.3225314617156982, "learning_rate": 6.676355378938404e-05, "loss": 0.022842241823673247, "step": 117140 }, { "epoch": 33.252909452171444, "grad_norm": 6.6407341957092285, "learning_rate": 6.676071529946069e-05, "loss": 0.009613636881113052, "step": 117150 }, { "epoch": 33.25574794209481, "grad_norm": 0.6793514490127563, "learning_rate": 6.675787680953734e-05, "loss": 0.004643005132675171, "step": 117160 }, { "epoch": 33.25858643201816, "grad_norm": 0.1942180097103119, "learning_rate": 6.675503831961397e-05, "loss": 0.00495639331638813, "step": 117170 }, { "epoch": 33.261424921941526, "grad_norm": 0.22809430956840515, "learning_rate": 6.675219982969061e-05, "loss": 0.004553357511758805, "step": 117180 }, { "epoch": 33.26426341186489, "grad_norm": 0.2268037050962448, "learning_rate": 6.674936133976725e-05, "loss": 0.0012424984946846963, "step": 117190 }, { "epoch": 33.26710190178825, "grad_norm": 0.19155314564704895, "learning_rate": 6.674652284984388e-05, "loss": 0.007326685637235641, "step": 117200 }, { "epoch": 33.26994039171161, "grad_norm": 6.030980110168457, "learning_rate": 6.674368435992052e-05, "loss": 0.01263304054737091, "step": 117210 }, { "epoch": 33.27277888163497, "grad_norm": 2.5544450283050537, "learning_rate": 6.674084586999716e-05, "loss": 0.002410713396966457, "step": 117220 }, { "epoch": 33.27561737155833, "grad_norm": 0.7603204846382141, "learning_rate": 6.67380073800738e-05, "loss": 0.0021179305389523507, "step": 117230 }, { "epoch": 33.27845586148169, "grad_norm": 7.9641499519348145, "learning_rate": 6.673516889015044e-05, "loss": 0.002895362675189972, "step": 117240 }, { "epoch": 33.28129435140505, "grad_norm": 2.930880308151245, "learning_rate": 6.673233040022709e-05, "loss": 0.002329271286725998, "step": 117250 }, { "epoch": 33.284132841328415, "grad_norm": 6.073428630828857, "learning_rate": 6.672949191030373e-05, "loss": 0.010914608836174011, "step": 117260 }, { "epoch": 33.28697133125178, "grad_norm": 8.424337387084961, "learning_rate": 6.672665342038036e-05, "loss": 0.005303645879030228, "step": 117270 }, { "epoch": 33.28980982117513, "grad_norm": 2.2002532482147217, "learning_rate": 6.6723814930457e-05, "loss": 0.006519553065299988, "step": 117280 }, { "epoch": 33.292648311098496, "grad_norm": 0.10872282087802887, "learning_rate": 6.672097644053364e-05, "loss": 0.0032998844981193542, "step": 117290 }, { "epoch": 33.29548680102186, "grad_norm": 0.08938205987215042, "learning_rate": 6.671813795061028e-05, "loss": 0.01423729956150055, "step": 117300 }, { "epoch": 33.298325290945215, "grad_norm": 0.28041577339172363, "learning_rate": 6.671529946068692e-05, "loss": 0.009578115493059158, "step": 117310 }, { "epoch": 33.30116378086858, "grad_norm": 0.029245467856526375, "learning_rate": 6.671246097076356e-05, "loss": 0.005345798283815384, "step": 117320 }, { "epoch": 33.30400227079194, "grad_norm": 0.6194859743118286, "learning_rate": 6.670962248084019e-05, "loss": 0.006991929560899735, "step": 117330 }, { "epoch": 33.306840760715296, "grad_norm": 0.2384757399559021, "learning_rate": 6.670678399091683e-05, "loss": 0.002694045566022396, "step": 117340 }, { "epoch": 33.30967925063866, "grad_norm": 0.33606505393981934, "learning_rate": 6.670394550099347e-05, "loss": 0.0012093557044863701, "step": 117350 }, { "epoch": 33.31251774056202, "grad_norm": 0.2155752331018448, "learning_rate": 6.670110701107011e-05, "loss": 0.0009657267481088638, "step": 117360 }, { "epoch": 33.315356230485385, "grad_norm": 0.19226467609405518, "learning_rate": 6.669826852114676e-05, "loss": 0.0011476745828986167, "step": 117370 }, { "epoch": 33.31819472040874, "grad_norm": 0.023866897448897362, "learning_rate": 6.66954300312234e-05, "loss": 0.0068305589258670805, "step": 117380 }, { "epoch": 33.321033210332104, "grad_norm": 1.834012508392334, "learning_rate": 6.669259154130002e-05, "loss": 0.006413164734840393, "step": 117390 }, { "epoch": 33.32387170025547, "grad_norm": 3.7063136100769043, "learning_rate": 6.668975305137667e-05, "loss": 0.008515474200248719, "step": 117400 }, { "epoch": 33.32671019017882, "grad_norm": 2.73311710357666, "learning_rate": 6.668691456145331e-05, "loss": 0.008138413727283477, "step": 117410 }, { "epoch": 33.329548680102185, "grad_norm": 0.004466993268579245, "learning_rate": 6.668407607152995e-05, "loss": 0.0009000509977340698, "step": 117420 }, { "epoch": 33.33238717002555, "grad_norm": 0.7722700238227844, "learning_rate": 6.668123758160659e-05, "loss": 0.003061119094491005, "step": 117430 }, { "epoch": 33.335225659948904, "grad_norm": 0.27519282698631287, "learning_rate": 6.667839909168323e-05, "loss": 0.0018402960151433945, "step": 117440 }, { "epoch": 33.33806414987227, "grad_norm": 1.0144996643066406, "learning_rate": 6.667556060175987e-05, "loss": 0.001968810521066189, "step": 117450 }, { "epoch": 33.34090263979563, "grad_norm": 4.0912652015686035, "learning_rate": 6.66727221118365e-05, "loss": 0.011178255081176758, "step": 117460 }, { "epoch": 33.34374112971899, "grad_norm": 0.2905208170413971, "learning_rate": 6.666988362191314e-05, "loss": 0.00034719947725534437, "step": 117470 }, { "epoch": 33.34657961964235, "grad_norm": 0.04273831844329834, "learning_rate": 6.666704513198978e-05, "loss": 0.004416188225150108, "step": 117480 }, { "epoch": 33.34941810956571, "grad_norm": 0.10347925871610641, "learning_rate": 6.666420664206642e-05, "loss": 0.002524291351437569, "step": 117490 }, { "epoch": 33.352256599489074, "grad_norm": 0.045379944145679474, "learning_rate": 6.666136815214307e-05, "loss": 0.0009293155744671822, "step": 117500 }, { "epoch": 33.352256599489074, "eval_accuracy": 0.9790805620906721, "eval_loss": 0.07892116159200668, "eval_runtime": 34.4162, "eval_samples_per_second": 456.965, "eval_steps_per_second": 7.148, "step": 117500 }, { "epoch": 33.35509508941243, "grad_norm": 1.198503017425537, "learning_rate": 6.665852966221971e-05, "loss": 0.004590630158782005, "step": 117510 }, { "epoch": 33.35793357933579, "grad_norm": 2.3214449882507324, "learning_rate": 6.665569117229634e-05, "loss": 0.0013369815424084663, "step": 117520 }, { "epoch": 33.360772069259156, "grad_norm": 1.073341965675354, "learning_rate": 6.665285268237298e-05, "loss": 0.007204031944274903, "step": 117530 }, { "epoch": 33.36361055918251, "grad_norm": 0.21417948603630066, "learning_rate": 6.665001419244962e-05, "loss": 0.00046111308038234713, "step": 117540 }, { "epoch": 33.366449049105874, "grad_norm": 0.050856757909059525, "learning_rate": 6.664717570252626e-05, "loss": 0.0003948058933019638, "step": 117550 }, { "epoch": 33.36928753902924, "grad_norm": 0.14497187733650208, "learning_rate": 6.66443372126029e-05, "loss": 0.0014201357960700988, "step": 117560 }, { "epoch": 33.3721260289526, "grad_norm": 11.770901679992676, "learning_rate": 6.664149872267954e-05, "loss": 0.005146569013595581, "step": 117570 }, { "epoch": 33.374964518875956, "grad_norm": 0.15110671520233154, "learning_rate": 6.663866023275618e-05, "loss": 0.0016857722774147987, "step": 117580 }, { "epoch": 33.37780300879932, "grad_norm": 5.133335113525391, "learning_rate": 6.663582174283281e-05, "loss": 0.0018069176003336906, "step": 117590 }, { "epoch": 33.38064149872268, "grad_norm": 0.10142680257558823, "learning_rate": 6.663298325290945e-05, "loss": 0.003570644184947014, "step": 117600 }, { "epoch": 33.38347998864604, "grad_norm": 0.23053288459777832, "learning_rate": 6.66301447629861e-05, "loss": 0.0024633027613162996, "step": 117610 }, { "epoch": 33.3863184785694, "grad_norm": 0.3783843219280243, "learning_rate": 6.662730627306272e-05, "loss": 0.00299628134816885, "step": 117620 }, { "epoch": 33.38915696849276, "grad_norm": 0.06814775615930557, "learning_rate": 6.662446778313938e-05, "loss": 0.009946314990520478, "step": 117630 }, { "epoch": 33.391995458416126, "grad_norm": 6.568148612976074, "learning_rate": 6.662162929321602e-05, "loss": 0.004145854711532592, "step": 117640 }, { "epoch": 33.39483394833948, "grad_norm": 0.2048298716545105, "learning_rate": 6.661879080329265e-05, "loss": 0.0062776684761047365, "step": 117650 }, { "epoch": 33.397672438262845, "grad_norm": 0.03226618841290474, "learning_rate": 6.661595231336929e-05, "loss": 0.0021555095911026, "step": 117660 }, { "epoch": 33.40051092818621, "grad_norm": 0.09949576109647751, "learning_rate": 6.661311382344593e-05, "loss": 0.003998170793056488, "step": 117670 }, { "epoch": 33.40334941810956, "grad_norm": 1.7366881370544434, "learning_rate": 6.661027533352257e-05, "loss": 0.011891967058181763, "step": 117680 }, { "epoch": 33.406187908032926, "grad_norm": 0.4373701214790344, "learning_rate": 6.660743684359921e-05, "loss": 0.0016133423894643783, "step": 117690 }, { "epoch": 33.40902639795629, "grad_norm": 0.02602618746459484, "learning_rate": 6.660459835367585e-05, "loss": 0.005409995466470719, "step": 117700 }, { "epoch": 33.411864887879645, "grad_norm": 0.030193405225872993, "learning_rate": 6.66017598637525e-05, "loss": 0.008112281560897827, "step": 117710 }, { "epoch": 33.41470337780301, "grad_norm": 0.06328485161066055, "learning_rate": 6.659892137382912e-05, "loss": 0.0055146940052509304, "step": 117720 }, { "epoch": 33.41754186772637, "grad_norm": 0.05033858120441437, "learning_rate": 6.659608288390576e-05, "loss": 0.0018517594784498216, "step": 117730 }, { "epoch": 33.420380357649734, "grad_norm": 14.081085205078125, "learning_rate": 6.65932443939824e-05, "loss": 0.005934026837348938, "step": 117740 }, { "epoch": 33.42321884757309, "grad_norm": 0.1360110193490982, "learning_rate": 6.659040590405903e-05, "loss": 0.0028015140444040297, "step": 117750 }, { "epoch": 33.42605733749645, "grad_norm": 0.010982898063957691, "learning_rate": 6.658756741413569e-05, "loss": 0.007183463871479034, "step": 117760 }, { "epoch": 33.428895827419815, "grad_norm": 0.07916972786188126, "learning_rate": 6.658472892421233e-05, "loss": 0.012677665054798126, "step": 117770 }, { "epoch": 33.43173431734317, "grad_norm": 0.8646460771560669, "learning_rate": 6.658189043428896e-05, "loss": 0.0012692421674728393, "step": 117780 }, { "epoch": 33.434572807266534, "grad_norm": 0.02190527506172657, "learning_rate": 6.65790519443656e-05, "loss": 0.0015092354267835617, "step": 117790 }, { "epoch": 33.4374112971899, "grad_norm": 0.20412811636924744, "learning_rate": 6.657621345444224e-05, "loss": 0.0007627921178936959, "step": 117800 }, { "epoch": 33.44024978711325, "grad_norm": 2.291775941848755, "learning_rate": 6.657337496451888e-05, "loss": 0.002638016827404499, "step": 117810 }, { "epoch": 33.443088277036615, "grad_norm": 0.09806045144796371, "learning_rate": 6.657053647459551e-05, "loss": 0.0019210385158658027, "step": 117820 }, { "epoch": 33.44592676695998, "grad_norm": 7.6052961349487305, "learning_rate": 6.656769798467216e-05, "loss": 0.008160994946956634, "step": 117830 }, { "epoch": 33.44876525688334, "grad_norm": 0.017530998215079308, "learning_rate": 6.65648594947488e-05, "loss": 0.0026067856699228285, "step": 117840 }, { "epoch": 33.4516037468067, "grad_norm": 8.660244941711426, "learning_rate": 6.656202100482543e-05, "loss": 0.03319918513298035, "step": 117850 }, { "epoch": 33.45444223673006, "grad_norm": 0.7897076606750488, "learning_rate": 6.655918251490207e-05, "loss": 0.017993128299713133, "step": 117860 }, { "epoch": 33.45728072665342, "grad_norm": 4.393404006958008, "learning_rate": 6.655634402497872e-05, "loss": 0.003013812564313412, "step": 117870 }, { "epoch": 33.46011921657678, "grad_norm": 0.023161631077528, "learning_rate": 6.655350553505534e-05, "loss": 0.002432249113917351, "step": 117880 }, { "epoch": 33.46295770650014, "grad_norm": 0.8820413947105408, "learning_rate": 6.6550667045132e-05, "loss": 0.0010740747675299644, "step": 117890 }, { "epoch": 33.465796196423504, "grad_norm": 0.39331719279289246, "learning_rate": 6.654782855520864e-05, "loss": 0.007527497410774231, "step": 117900 }, { "epoch": 33.46863468634686, "grad_norm": 2.3141660690307617, "learning_rate": 6.654499006528527e-05, "loss": 0.0035012029111385345, "step": 117910 }, { "epoch": 33.47147317627022, "grad_norm": 0.013123479671776295, "learning_rate": 6.654215157536191e-05, "loss": 0.005382116138935089, "step": 117920 }, { "epoch": 33.474311666193586, "grad_norm": 0.17607815563678741, "learning_rate": 6.653931308543855e-05, "loss": 0.002184981666505337, "step": 117930 }, { "epoch": 33.47715015611695, "grad_norm": 0.017658047378063202, "learning_rate": 6.653647459551519e-05, "loss": 0.0025842418894171716, "step": 117940 }, { "epoch": 33.479988646040304, "grad_norm": 0.12143804877996445, "learning_rate": 6.653363610559182e-05, "loss": 0.003834286704659462, "step": 117950 }, { "epoch": 33.48282713596367, "grad_norm": 0.24489594995975494, "learning_rate": 6.653079761566847e-05, "loss": 0.004366081207990646, "step": 117960 }, { "epoch": 33.48566562588703, "grad_norm": 0.9088349342346191, "learning_rate": 6.652795912574512e-05, "loss": 0.0032115451991558074, "step": 117970 }, { "epoch": 33.488504115810386, "grad_norm": 0.9062942862510681, "learning_rate": 6.652512063582174e-05, "loss": 0.006619678437709808, "step": 117980 }, { "epoch": 33.49134260573375, "grad_norm": 0.2212306559085846, "learning_rate": 6.652228214589839e-05, "loss": 0.0009771810844540596, "step": 117990 }, { "epoch": 33.49418109565711, "grad_norm": 0.05995320528745651, "learning_rate": 6.651944365597503e-05, "loss": 0.008037105947732926, "step": 118000 }, { "epoch": 33.49418109565711, "eval_accuracy": 0.9784447129140967, "eval_loss": 0.08692049235105515, "eval_runtime": 38.2204, "eval_samples_per_second": 411.482, "eval_steps_per_second": 6.436, "step": 118000 }, { "epoch": 33.497019585580475, "grad_norm": 0.3609340786933899, "learning_rate": 6.651660516605165e-05, "loss": 0.0012656964361667633, "step": 118010 }, { "epoch": 33.49985807550383, "grad_norm": 0.276705801486969, "learning_rate": 6.651376667612831e-05, "loss": 0.0012000342831015587, "step": 118020 }, { "epoch": 33.50269656542719, "grad_norm": 0.07945383340120316, "learning_rate": 6.651092818620495e-05, "loss": 0.0021678853780031206, "step": 118030 }, { "epoch": 33.505535055350556, "grad_norm": 0.026106635108590126, "learning_rate": 6.650808969628158e-05, "loss": 0.005395721644163132, "step": 118040 }, { "epoch": 33.50837354527391, "grad_norm": 0.4319443702697754, "learning_rate": 6.650525120635822e-05, "loss": 0.0035046979784965513, "step": 118050 }, { "epoch": 33.511212035197275, "grad_norm": 0.03334973379969597, "learning_rate": 6.650241271643486e-05, "loss": 0.0025594783946871758, "step": 118060 }, { "epoch": 33.51405052512064, "grad_norm": 0.008389974012970924, "learning_rate": 6.64995742265115e-05, "loss": 0.0017676768824458123, "step": 118070 }, { "epoch": 33.51688901504399, "grad_norm": 1.1781275272369385, "learning_rate": 6.649673573658813e-05, "loss": 0.004908459633588791, "step": 118080 }, { "epoch": 33.519727504967356, "grad_norm": 5.800688743591309, "learning_rate": 6.649389724666479e-05, "loss": 0.0037035726010799406, "step": 118090 }, { "epoch": 33.52256599489072, "grad_norm": 0.08477868884801865, "learning_rate": 6.649105875674143e-05, "loss": 0.0015859246253967286, "step": 118100 }, { "epoch": 33.52540448481408, "grad_norm": 0.05745018273591995, "learning_rate": 6.648822026681805e-05, "loss": 0.014223317801952361, "step": 118110 }, { "epoch": 33.52824297473744, "grad_norm": 2.0510003566741943, "learning_rate": 6.64853817768947e-05, "loss": 0.004439353197813034, "step": 118120 }, { "epoch": 33.5310814646608, "grad_norm": 2.6129238605499268, "learning_rate": 6.648254328697134e-05, "loss": 0.006052439659833908, "step": 118130 }, { "epoch": 33.533919954584164, "grad_norm": 0.04002993926405907, "learning_rate": 6.647970479704797e-05, "loss": 0.0014126300811767578, "step": 118140 }, { "epoch": 33.53675844450752, "grad_norm": 0.16344551742076874, "learning_rate": 6.64768663071246e-05, "loss": 0.0016123808920383454, "step": 118150 }, { "epoch": 33.53959693443088, "grad_norm": 0.364570289850235, "learning_rate": 6.647402781720126e-05, "loss": 0.0021337166428565977, "step": 118160 }, { "epoch": 33.542435424354245, "grad_norm": 1.0394712686538696, "learning_rate": 6.647118932727789e-05, "loss": 0.005087512359023094, "step": 118170 }, { "epoch": 33.5452739142776, "grad_norm": 0.01823657751083374, "learning_rate": 6.646835083735453e-05, "loss": 0.0019560620188713075, "step": 118180 }, { "epoch": 33.548112404200964, "grad_norm": 0.02288013882935047, "learning_rate": 6.646551234743117e-05, "loss": 0.007053039222955704, "step": 118190 }, { "epoch": 33.55095089412433, "grad_norm": 0.03476344048976898, "learning_rate": 6.646267385750781e-05, "loss": 0.008354153484106064, "step": 118200 }, { "epoch": 33.55378938404769, "grad_norm": 0.33585211634635925, "learning_rate": 6.64601192165768e-05, "loss": 0.010534433275461197, "step": 118210 }, { "epoch": 33.556627873971046, "grad_norm": 1.1691902875900269, "learning_rate": 6.645728072665342e-05, "loss": 0.01783905029296875, "step": 118220 }, { "epoch": 33.55946636389441, "grad_norm": 2.8973798751831055, "learning_rate": 6.645444223673006e-05, "loss": 0.00404040589928627, "step": 118230 }, { "epoch": 33.56230485381777, "grad_norm": 0.5181781649589539, "learning_rate": 6.64516037468067e-05, "loss": 0.0026810297742486, "step": 118240 }, { "epoch": 33.56514334374113, "grad_norm": 0.6277620792388916, "learning_rate": 6.644876525688335e-05, "loss": 0.016150730848312377, "step": 118250 }, { "epoch": 33.56798183366449, "grad_norm": 0.07933603972196579, "learning_rate": 6.644592676695997e-05, "loss": 0.007038997113704681, "step": 118260 }, { "epoch": 33.57082032358785, "grad_norm": 0.004311082419008017, "learning_rate": 6.644308827703663e-05, "loss": 0.009483999013900757, "step": 118270 }, { "epoch": 33.57365881351121, "grad_norm": 0.10496167093515396, "learning_rate": 6.644024978711326e-05, "loss": 0.0022281168028712274, "step": 118280 }, { "epoch": 33.57649730343457, "grad_norm": 0.08653490245342255, "learning_rate": 6.64374112971899e-05, "loss": 0.0050222337245941166, "step": 118290 }, { "epoch": 33.579335793357934, "grad_norm": 1.1714611053466797, "learning_rate": 6.643457280726654e-05, "loss": 0.03476744294166565, "step": 118300 }, { "epoch": 33.5821742832813, "grad_norm": 13.39780044555664, "learning_rate": 6.643173431734318e-05, "loss": 0.0158488467335701, "step": 118310 }, { "epoch": 33.58501277320465, "grad_norm": 8.61961555480957, "learning_rate": 6.642889582741981e-05, "loss": 0.013498353958129882, "step": 118320 }, { "epoch": 33.587851263128016, "grad_norm": 14.276915550231934, "learning_rate": 6.642605733749645e-05, "loss": 0.010148833692073821, "step": 118330 }, { "epoch": 33.59068975305138, "grad_norm": 3.608036994934082, "learning_rate": 6.64232188475731e-05, "loss": 0.005728677660226822, "step": 118340 }, { "epoch": 33.593528242974735, "grad_norm": 1.5705616474151611, "learning_rate": 6.642038035764973e-05, "loss": 0.006653338670730591, "step": 118350 }, { "epoch": 33.5963667328981, "grad_norm": 3.98262095451355, "learning_rate": 6.641754186772637e-05, "loss": 0.003652578592300415, "step": 118360 }, { "epoch": 33.59920522282146, "grad_norm": 0.16051772236824036, "learning_rate": 6.641470337780301e-05, "loss": 0.0021467098966240885, "step": 118370 }, { "epoch": 33.602043712744816, "grad_norm": 13.43254280090332, "learning_rate": 6.641186488787964e-05, "loss": 0.019127455353736878, "step": 118380 }, { "epoch": 33.60488220266818, "grad_norm": 0.7751497626304626, "learning_rate": 6.640902639795628e-05, "loss": 0.0032516863197088242, "step": 118390 }, { "epoch": 33.60772069259154, "grad_norm": 0.0940680205821991, "learning_rate": 6.640618790803294e-05, "loss": 0.0018132146447896957, "step": 118400 }, { "epoch": 33.610559182514905, "grad_norm": 0.12415971606969833, "learning_rate": 6.640334941810957e-05, "loss": 0.009208150207996368, "step": 118410 }, { "epoch": 33.61339767243826, "grad_norm": 10.271712303161621, "learning_rate": 6.640051092818621e-05, "loss": 0.006243344023823738, "step": 118420 }, { "epoch": 33.61623616236162, "grad_norm": 0.569420576095581, "learning_rate": 6.639767243826285e-05, "loss": 0.012868756055831909, "step": 118430 }, { "epoch": 33.619074652284986, "grad_norm": 1.4828004837036133, "learning_rate": 6.639483394833949e-05, "loss": 0.0016078762710094452, "step": 118440 }, { "epoch": 33.62191314220834, "grad_norm": 3.1917545795440674, "learning_rate": 6.639199545841612e-05, "loss": 0.003562162816524506, "step": 118450 }, { "epoch": 33.624751632131705, "grad_norm": 6.84275484085083, "learning_rate": 6.638915696849276e-05, "loss": 0.012487512081861496, "step": 118460 }, { "epoch": 33.62759012205507, "grad_norm": 3.7584469318389893, "learning_rate": 6.638631847856941e-05, "loss": 0.009606659412384033, "step": 118470 }, { "epoch": 33.63042861197843, "grad_norm": 1.0004552602767944, "learning_rate": 6.638347998864604e-05, "loss": 0.0029518431052565574, "step": 118480 }, { "epoch": 33.63326710190179, "grad_norm": 0.040329139679670334, "learning_rate": 6.638064149872268e-05, "loss": 0.0007754964753985405, "step": 118490 }, { "epoch": 33.63610559182515, "grad_norm": 0.19457368552684784, "learning_rate": 6.637780300879933e-05, "loss": 0.004225339740514755, "step": 118500 }, { "epoch": 33.63610559182515, "eval_accuracy": 0.9803522604438227, "eval_loss": 0.07666290551424026, "eval_runtime": 35.8185, "eval_samples_per_second": 439.075, "eval_steps_per_second": 6.868, "step": 118500 }, { "epoch": 33.63894408174851, "grad_norm": 1.3859169483184814, "learning_rate": 6.637496451887595e-05, "loss": 0.001255500316619873, "step": 118510 }, { "epoch": 33.64178257167187, "grad_norm": 0.4287114143371582, "learning_rate": 6.63721260289526e-05, "loss": 0.0008315984159708023, "step": 118520 }, { "epoch": 33.64462106159523, "grad_norm": 0.036777544766664505, "learning_rate": 6.636928753902924e-05, "loss": 0.0008493203669786454, "step": 118530 }, { "epoch": 33.647459551518594, "grad_norm": 7.291935920715332, "learning_rate": 6.636644904910588e-05, "loss": 0.005969047546386719, "step": 118540 }, { "epoch": 33.65029804144195, "grad_norm": 0.4791955351829529, "learning_rate": 6.636361055918252e-05, "loss": 0.0024512162432074546, "step": 118550 }, { "epoch": 33.65313653136531, "grad_norm": 3.8679327964782715, "learning_rate": 6.636105591825149e-05, "loss": 0.010388413816690445, "step": 118560 }, { "epoch": 33.655975021288675, "grad_norm": 0.2430776208639145, "learning_rate": 6.635821742832813e-05, "loss": 0.0010616108775138855, "step": 118570 }, { "epoch": 33.65881351121204, "grad_norm": 0.7193125486373901, "learning_rate": 6.635537893840477e-05, "loss": 0.006386704742908478, "step": 118580 }, { "epoch": 33.661652001135394, "grad_norm": 0.07508734613656998, "learning_rate": 6.635254044848141e-05, "loss": 0.007941914349794387, "step": 118590 }, { "epoch": 33.66449049105876, "grad_norm": 0.8665478229522705, "learning_rate": 6.634970195855805e-05, "loss": 0.007821758091449738, "step": 118600 }, { "epoch": 33.66732898098212, "grad_norm": 0.10758692771196365, "learning_rate": 6.634686346863469e-05, "loss": 0.00424496978521347, "step": 118610 }, { "epoch": 33.670167470905476, "grad_norm": 0.11636397987604141, "learning_rate": 6.634402497871133e-05, "loss": 0.0035523537546396254, "step": 118620 }, { "epoch": 33.67300596082884, "grad_norm": 0.09883661568164825, "learning_rate": 6.634118648878796e-05, "loss": 0.0015558969229459763, "step": 118630 }, { "epoch": 33.6758444507522, "grad_norm": 6.018462657928467, "learning_rate": 6.63383479988646e-05, "loss": 0.007527086138725281, "step": 118640 }, { "epoch": 33.67868294067556, "grad_norm": 1.8031398057937622, "learning_rate": 6.633550950894126e-05, "loss": 0.0011917954310774804, "step": 118650 }, { "epoch": 33.68152143059892, "grad_norm": 0.049998778849840164, "learning_rate": 6.633267101901789e-05, "loss": 0.00157499723136425, "step": 118660 }, { "epoch": 33.68435992052228, "grad_norm": 3.096578598022461, "learning_rate": 6.632983252909453e-05, "loss": 0.0030185343697667123, "step": 118670 }, { "epoch": 33.687198410445646, "grad_norm": 13.34900951385498, "learning_rate": 6.632699403917117e-05, "loss": 0.0056820400059223175, "step": 118680 }, { "epoch": 33.690036900369, "grad_norm": 2.642449378967285, "learning_rate": 6.63241555492478e-05, "loss": 0.007593331485986709, "step": 118690 }, { "epoch": 33.692875390292365, "grad_norm": 1.4758727550506592, "learning_rate": 6.632131705932444e-05, "loss": 0.022231687605381013, "step": 118700 }, { "epoch": 33.69571388021573, "grad_norm": 0.4311659634113312, "learning_rate": 6.631847856940108e-05, "loss": 0.0012797903269529342, "step": 118710 }, { "epoch": 33.69855237013908, "grad_norm": 4.610264778137207, "learning_rate": 6.631564007947772e-05, "loss": 0.001315905898809433, "step": 118720 }, { "epoch": 33.701390860062446, "grad_norm": 1.7779473066329956, "learning_rate": 6.631280158955436e-05, "loss": 0.011694267392158508, "step": 118730 }, { "epoch": 33.70422934998581, "grad_norm": 0.04996763542294502, "learning_rate": 6.6309963099631e-05, "loss": 0.01332467347383499, "step": 118740 }, { "epoch": 33.70706783990917, "grad_norm": 0.4224510192871094, "learning_rate": 6.630712460970764e-05, "loss": 0.006651736050844193, "step": 118750 }, { "epoch": 33.70990632983253, "grad_norm": 0.023404259234666824, "learning_rate": 6.630428611978427e-05, "loss": 0.001605621725320816, "step": 118760 }, { "epoch": 33.71274481975589, "grad_norm": 7.382232189178467, "learning_rate": 6.630144762986091e-05, "loss": 0.016952945291996, "step": 118770 }, { "epoch": 33.71558330967925, "grad_norm": 0.200791135430336, "learning_rate": 6.629860913993755e-05, "loss": 0.00969424694776535, "step": 118780 }, { "epoch": 33.71842179960261, "grad_norm": 0.0712704136967659, "learning_rate": 6.62957706500142e-05, "loss": 0.005389175564050675, "step": 118790 }, { "epoch": 33.72126028952597, "grad_norm": 0.1269202083349228, "learning_rate": 6.629293216009084e-05, "loss": 0.02118697017431259, "step": 118800 }, { "epoch": 33.724098779449335, "grad_norm": 6.885107517242432, "learning_rate": 6.629009367016748e-05, "loss": 0.0065097369253635405, "step": 118810 }, { "epoch": 33.72693726937269, "grad_norm": 0.12850742042064667, "learning_rate": 6.62872551802441e-05, "loss": 0.0007113125175237656, "step": 118820 }, { "epoch": 33.729775759296054, "grad_norm": 15.037755966186523, "learning_rate": 6.628441669032075e-05, "loss": 0.031044411659240722, "step": 118830 }, { "epoch": 33.73261424921942, "grad_norm": 0.46731501817703247, "learning_rate": 6.628157820039739e-05, "loss": 0.002363436296582222, "step": 118840 }, { "epoch": 33.73545273914278, "grad_norm": 0.06534188240766525, "learning_rate": 6.627873971047403e-05, "loss": 0.00816275030374527, "step": 118850 }, { "epoch": 33.738291229066135, "grad_norm": 0.013150214217603207, "learning_rate": 6.627590122055067e-05, "loss": 0.006211091950535774, "step": 118860 }, { "epoch": 33.7411297189895, "grad_norm": 10.937414169311523, "learning_rate": 6.627306273062731e-05, "loss": 0.005030135065317154, "step": 118870 }, { "epoch": 33.74396820891286, "grad_norm": 2.4176084995269775, "learning_rate": 6.627022424070395e-05, "loss": 0.005538659915328026, "step": 118880 }, { "epoch": 33.74680669883622, "grad_norm": 0.9869166612625122, "learning_rate": 6.626738575078058e-05, "loss": 0.0026492074131965635, "step": 118890 }, { "epoch": 33.74964518875958, "grad_norm": 0.7623404264450073, "learning_rate": 6.626454726085722e-05, "loss": 0.00045075397938489916, "step": 118900 }, { "epoch": 33.75248367868294, "grad_norm": 0.23107317090034485, "learning_rate": 6.626170877093387e-05, "loss": 0.002417679317295551, "step": 118910 }, { "epoch": 33.7553221686063, "grad_norm": 11.41956615447998, "learning_rate": 6.625887028101051e-05, "loss": 0.003642793744802475, "step": 118920 }, { "epoch": 33.75816065852966, "grad_norm": 0.032227322459220886, "learning_rate": 6.625603179108715e-05, "loss": 0.007828684151172638, "step": 118930 }, { "epoch": 33.760999148453024, "grad_norm": 0.5535428524017334, "learning_rate": 6.625319330116379e-05, "loss": 0.0038871653378009796, "step": 118940 }, { "epoch": 33.76383763837639, "grad_norm": 0.39419543743133545, "learning_rate": 6.625035481124042e-05, "loss": 0.002927979454398155, "step": 118950 }, { "epoch": 33.76667612829974, "grad_norm": 0.08489658683538437, "learning_rate": 6.624751632131706e-05, "loss": 0.0030015163123607636, "step": 118960 }, { "epoch": 33.769514618223106, "grad_norm": 0.02509884350001812, "learning_rate": 6.62446778313937e-05, "loss": 0.0013624263927340507, "step": 118970 }, { "epoch": 33.77235310814647, "grad_norm": 0.021605757996439934, "learning_rate": 6.624183934147034e-05, "loss": 0.004505070298910141, "step": 118980 }, { "epoch": 33.775191598069824, "grad_norm": 0.5725984573364258, "learning_rate": 6.623900085154698e-05, "loss": 0.008202524483203888, "step": 118990 }, { "epoch": 33.77803008799319, "grad_norm": 1.3558719158172607, "learning_rate": 6.623616236162362e-05, "loss": 0.002666151523590088, "step": 119000 }, { "epoch": 33.77803008799319, "eval_accuracy": 0.9763464106313983, "eval_loss": 0.0875706821680069, "eval_runtime": 34.7363, "eval_samples_per_second": 452.754, "eval_steps_per_second": 7.082, "step": 119000 }, { "epoch": 33.78086857791655, "grad_norm": 0.04850904271006584, "learning_rate": 6.623332387170027e-05, "loss": 0.0017119746655225754, "step": 119010 }, { "epoch": 33.783707067839906, "grad_norm": 1.162245750427246, "learning_rate": 6.62304853817769e-05, "loss": 0.011249242722988129, "step": 119020 }, { "epoch": 33.78654555776327, "grad_norm": 7.2815375328063965, "learning_rate": 6.622764689185353e-05, "loss": 0.019598662853240967, "step": 119030 }, { "epoch": 33.78938404768663, "grad_norm": 0.6860941052436829, "learning_rate": 6.622480840193018e-05, "loss": 0.007290070503950119, "step": 119040 }, { "epoch": 33.792222537609995, "grad_norm": 0.1087602898478508, "learning_rate": 6.622196991200682e-05, "loss": 0.012071063369512558, "step": 119050 }, { "epoch": 33.79506102753335, "grad_norm": 0.09066743403673172, "learning_rate": 6.621913142208346e-05, "loss": 0.0015530752018094063, "step": 119060 }, { "epoch": 33.79789951745671, "grad_norm": 1.2571715116500854, "learning_rate": 6.62162929321601e-05, "loss": 0.004276954755187035, "step": 119070 }, { "epoch": 33.800738007380076, "grad_norm": 0.01090940460562706, "learning_rate": 6.621345444223673e-05, "loss": 0.015084661543369293, "step": 119080 }, { "epoch": 33.80357649730343, "grad_norm": 1.0248197317123413, "learning_rate": 6.621061595231337e-05, "loss": 0.001319378986954689, "step": 119090 }, { "epoch": 33.806414987226795, "grad_norm": 0.013530848547816277, "learning_rate": 6.620777746239001e-05, "loss": 0.008121532946825027, "step": 119100 }, { "epoch": 33.80925347715016, "grad_norm": 0.525258481502533, "learning_rate": 6.620493897246665e-05, "loss": 0.004001867026090622, "step": 119110 }, { "epoch": 33.81209196707351, "grad_norm": 1.8852320909500122, "learning_rate": 6.62021004825433e-05, "loss": 0.004378589987754822, "step": 119120 }, { "epoch": 33.814930456996876, "grad_norm": 1.681583285331726, "learning_rate": 6.619926199261994e-05, "loss": 0.0013621883466839791, "step": 119130 }, { "epoch": 33.81776894692024, "grad_norm": 0.2494502067565918, "learning_rate": 6.619642350269656e-05, "loss": 0.0022167669609189033, "step": 119140 }, { "epoch": 33.8206074368436, "grad_norm": 1.3858872652053833, "learning_rate": 6.61935850127732e-05, "loss": 0.0014360764995217322, "step": 119150 }, { "epoch": 33.82344592676696, "grad_norm": 0.39681869745254517, "learning_rate": 6.619074652284985e-05, "loss": 0.0018471159040927888, "step": 119160 }, { "epoch": 33.82628441669032, "grad_norm": 0.48821699619293213, "learning_rate": 6.618790803292649e-05, "loss": 0.01022171527147293, "step": 119170 }, { "epoch": 33.829122906613684, "grad_norm": 0.1327144056558609, "learning_rate": 6.618506954300313e-05, "loss": 0.004054941982030868, "step": 119180 }, { "epoch": 33.83196139653704, "grad_norm": 0.18390455842018127, "learning_rate": 6.618223105307977e-05, "loss": 0.0018940184265375138, "step": 119190 }, { "epoch": 33.8347998864604, "grad_norm": 0.24113690853118896, "learning_rate": 6.617939256315641e-05, "loss": 0.002934148348867893, "step": 119200 }, { "epoch": 33.837638376383765, "grad_norm": 7.9720377922058105, "learning_rate": 6.617655407323304e-05, "loss": 0.01119447946548462, "step": 119210 }, { "epoch": 33.84047686630713, "grad_norm": 0.043407026678323746, "learning_rate": 6.617371558330968e-05, "loss": 0.008815149962902068, "step": 119220 }, { "epoch": 33.843315356230484, "grad_norm": 0.09008646756410599, "learning_rate": 6.617087709338632e-05, "loss": 0.0019442299380898476, "step": 119230 }, { "epoch": 33.84615384615385, "grad_norm": 0.16115760803222656, "learning_rate": 6.616803860346295e-05, "loss": 0.001485440693795681, "step": 119240 }, { "epoch": 33.84899233607721, "grad_norm": 0.32348576188087463, "learning_rate": 6.61652001135396e-05, "loss": 0.0050655737519264225, "step": 119250 }, { "epoch": 33.851830826000565, "grad_norm": 0.851874589920044, "learning_rate": 6.616236162361625e-05, "loss": 0.003209203854203224, "step": 119260 }, { "epoch": 33.85466931592393, "grad_norm": 0.13697245717048645, "learning_rate": 6.615952313369287e-05, "loss": 0.0010792644694447517, "step": 119270 }, { "epoch": 33.85750780584729, "grad_norm": 0.2222518026828766, "learning_rate": 6.615668464376952e-05, "loss": 0.0007402995601296425, "step": 119280 }, { "epoch": 33.86034629577065, "grad_norm": 0.9138836860656738, "learning_rate": 6.615384615384616e-05, "loss": 0.002678207494318485, "step": 119290 }, { "epoch": 33.86318478569401, "grad_norm": 0.04973456636071205, "learning_rate": 6.61510076639228e-05, "loss": 0.0031643785536289216, "step": 119300 }, { "epoch": 33.86602327561737, "grad_norm": 0.42368024587631226, "learning_rate": 6.614816917399943e-05, "loss": 0.0014313757419586182, "step": 119310 }, { "epoch": 33.868861765540736, "grad_norm": 8.622706413269043, "learning_rate": 6.614533068407608e-05, "loss": 0.005269835889339447, "step": 119320 }, { "epoch": 33.87170025546409, "grad_norm": 0.5438757538795471, "learning_rate": 6.614249219415272e-05, "loss": 0.0009073009714484215, "step": 119330 }, { "epoch": 33.874538745387454, "grad_norm": 2.1112308502197266, "learning_rate": 6.613965370422935e-05, "loss": 0.0011424612253904342, "step": 119340 }, { "epoch": 33.87737723531082, "grad_norm": 2.731435775756836, "learning_rate": 6.613681521430599e-05, "loss": 0.0017558922991156578, "step": 119350 }, { "epoch": 33.88021572523417, "grad_norm": 0.03108225017786026, "learning_rate": 6.613397672438263e-05, "loss": 0.00630977675318718, "step": 119360 }, { "epoch": 33.883054215157536, "grad_norm": 0.5562329292297363, "learning_rate": 6.613113823445926e-05, "loss": 0.0010226070880889893, "step": 119370 }, { "epoch": 33.8858927050809, "grad_norm": 0.26860758662223816, "learning_rate": 6.612829974453592e-05, "loss": 0.0038079410791397096, "step": 119380 }, { "epoch": 33.888731195004254, "grad_norm": 8.17755126953125, "learning_rate": 6.612546125461256e-05, "loss": 0.006123463809490204, "step": 119390 }, { "epoch": 33.89156968492762, "grad_norm": 0.36804690957069397, "learning_rate": 6.612262276468918e-05, "loss": 0.016346862912178038, "step": 119400 }, { "epoch": 33.89440817485098, "grad_norm": 0.0063311317935585976, "learning_rate": 6.611978427476583e-05, "loss": 0.005041595548391342, "step": 119410 }, { "epoch": 33.89724666477434, "grad_norm": 0.5479374527931213, "learning_rate": 6.611694578484247e-05, "loss": 0.0035877954214811325, "step": 119420 }, { "epoch": 33.9000851546977, "grad_norm": 6.059084415435791, "learning_rate": 6.611410729491911e-05, "loss": 0.004062448441982269, "step": 119430 }, { "epoch": 33.90292364462106, "grad_norm": 0.06644263863563538, "learning_rate": 6.611126880499574e-05, "loss": 0.0018143318593502045, "step": 119440 }, { "epoch": 33.905762134544425, "grad_norm": 0.023669075220823288, "learning_rate": 6.610843031507239e-05, "loss": 0.005579018220305443, "step": 119450 }, { "epoch": 33.90860062446778, "grad_norm": 6.388303279876709, "learning_rate": 6.610559182514903e-05, "loss": 0.01029946655035019, "step": 119460 }, { "epoch": 33.91143911439114, "grad_norm": 2.9134891033172607, "learning_rate": 6.610275333522566e-05, "loss": 0.0025755004957318307, "step": 119470 }, { "epoch": 33.914277604314506, "grad_norm": 10.44554615020752, "learning_rate": 6.60999148453023e-05, "loss": 0.029320186376571654, "step": 119480 }, { "epoch": 33.91711609423786, "grad_norm": 0.09177708625793457, "learning_rate": 6.609707635537894e-05, "loss": 0.0016703419387340546, "step": 119490 }, { "epoch": 33.919954584161225, "grad_norm": 0.027280744165182114, "learning_rate": 6.609423786545557e-05, "loss": 0.004225380718708038, "step": 119500 }, { "epoch": 33.919954584161225, "eval_accuracy": 0.9741209385133847, "eval_loss": 0.09636817127466202, "eval_runtime": 33.9918, "eval_samples_per_second": 462.67, "eval_steps_per_second": 7.237, "step": 119500 }, { "epoch": 33.92279307408459, "grad_norm": 0.3260224759578705, "learning_rate": 6.609139937553221e-05, "loss": 0.02140115350484848, "step": 119510 }, { "epoch": 33.92563156400795, "grad_norm": 5.181106090545654, "learning_rate": 6.608856088560887e-05, "loss": 0.0033278390765190125, "step": 119520 }, { "epoch": 33.928470053931306, "grad_norm": 0.1682506650686264, "learning_rate": 6.60857223956855e-05, "loss": 0.00421578399837017, "step": 119530 }, { "epoch": 33.93130854385467, "grad_norm": 0.08261603116989136, "learning_rate": 6.608288390576214e-05, "loss": 0.00779276192188263, "step": 119540 }, { "epoch": 33.93414703377803, "grad_norm": 0.39014047384262085, "learning_rate": 6.608004541583878e-05, "loss": 0.009470651298761368, "step": 119550 }, { "epoch": 33.93698552370139, "grad_norm": 4.099247455596924, "learning_rate": 6.607720692591542e-05, "loss": 0.005302262678742409, "step": 119560 }, { "epoch": 33.93982401362475, "grad_norm": 0.5568833351135254, "learning_rate": 6.607436843599205e-05, "loss": 0.00346563421189785, "step": 119570 }, { "epoch": 33.942662503548114, "grad_norm": 0.07312905043363571, "learning_rate": 6.60715299460687e-05, "loss": 0.002348898723721504, "step": 119580 }, { "epoch": 33.94550099347148, "grad_norm": 1.6768970489501953, "learning_rate": 6.606869145614534e-05, "loss": 0.003786543756723404, "step": 119590 }, { "epoch": 33.94833948339483, "grad_norm": 1.130746603012085, "learning_rate": 6.606585296622197e-05, "loss": 0.006312453746795654, "step": 119600 }, { "epoch": 33.951177973318195, "grad_norm": 0.18490491807460785, "learning_rate": 6.606301447629861e-05, "loss": 0.0017824687063694, "step": 119610 }, { "epoch": 33.95401646324156, "grad_norm": 0.4618832767009735, "learning_rate": 6.606017598637525e-05, "loss": 0.0013403864577412605, "step": 119620 }, { "epoch": 33.956854953164914, "grad_norm": 1.4754183292388916, "learning_rate": 6.605733749645188e-05, "loss": 0.006254643201828003, "step": 119630 }, { "epoch": 33.95969344308828, "grad_norm": 7.132420539855957, "learning_rate": 6.605449900652852e-05, "loss": 0.014975133538246154, "step": 119640 }, { "epoch": 33.96253193301164, "grad_norm": 0.3404487073421478, "learning_rate": 6.605166051660518e-05, "loss": 0.008425812423229217, "step": 119650 }, { "epoch": 33.965370422934996, "grad_norm": 0.038631319999694824, "learning_rate": 6.60488220266818e-05, "loss": 0.0050784960389137265, "step": 119660 }, { "epoch": 33.96820891285836, "grad_norm": 1.859325647354126, "learning_rate": 6.604598353675845e-05, "loss": 0.006181416660547256, "step": 119670 }, { "epoch": 33.97104740278172, "grad_norm": 0.27655819058418274, "learning_rate": 6.604314504683509e-05, "loss": 0.00709584578871727, "step": 119680 }, { "epoch": 33.973885892705084, "grad_norm": 0.811234176158905, "learning_rate": 6.604030655691173e-05, "loss": 0.0019970208406448363, "step": 119690 }, { "epoch": 33.97672438262844, "grad_norm": 0.019688501954078674, "learning_rate": 6.603746806698836e-05, "loss": 0.0006953397765755654, "step": 119700 }, { "epoch": 33.9795628725518, "grad_norm": 0.005969308782368898, "learning_rate": 6.6034629577065e-05, "loss": 0.0032757848501205446, "step": 119710 }, { "epoch": 33.982401362475166, "grad_norm": 0.8173145055770874, "learning_rate": 6.603179108714165e-05, "loss": 0.009622532129287719, "step": 119720 }, { "epoch": 33.98523985239852, "grad_norm": 0.19474156200885773, "learning_rate": 6.602895259721828e-05, "loss": 0.008303099870681762, "step": 119730 }, { "epoch": 33.988078342321884, "grad_norm": 0.03996090218424797, "learning_rate": 6.602611410729492e-05, "loss": 0.006288936734199524, "step": 119740 }, { "epoch": 33.99091683224525, "grad_norm": 0.5759553909301758, "learning_rate": 6.602327561737156e-05, "loss": 0.007497886568307877, "step": 119750 }, { "epoch": 33.9937553221686, "grad_norm": 5.683635234832764, "learning_rate": 6.602043712744819e-05, "loss": 0.014990220963954925, "step": 119760 }, { "epoch": 33.996593812091966, "grad_norm": 0.4353529214859009, "learning_rate": 6.601759863752483e-05, "loss": 0.002090528979897499, "step": 119770 }, { "epoch": 33.99943230201533, "grad_norm": 0.27974453568458557, "learning_rate": 6.601476014760149e-05, "loss": 0.0019552808254957197, "step": 119780 }, { "epoch": 34.00227079193869, "grad_norm": 3.3392703533172607, "learning_rate": 6.601192165767812e-05, "loss": 0.006052836775779724, "step": 119790 }, { "epoch": 34.00510928186205, "grad_norm": 2.7418909072875977, "learning_rate": 6.600908316775476e-05, "loss": 0.0018298696726560594, "step": 119800 }, { "epoch": 34.00794777178541, "grad_norm": 0.19380129873752594, "learning_rate": 6.60062446778314e-05, "loss": 0.0005892358720302581, "step": 119810 }, { "epoch": 34.01078626170877, "grad_norm": 3.7807111740112305, "learning_rate": 6.600340618790804e-05, "loss": 0.002618926018476486, "step": 119820 }, { "epoch": 34.01362475163213, "grad_norm": 2.0485785007476807, "learning_rate": 6.600056769798467e-05, "loss": 0.003525380417704582, "step": 119830 }, { "epoch": 34.01646324155549, "grad_norm": 0.06404231488704681, "learning_rate": 6.599772920806131e-05, "loss": 0.0005224734544754028, "step": 119840 }, { "epoch": 34.019301731478855, "grad_norm": 0.3024646043777466, "learning_rate": 6.599489071813796e-05, "loss": 0.0005599843338131904, "step": 119850 }, { "epoch": 34.02214022140221, "grad_norm": 0.2744917571544647, "learning_rate": 6.599205222821459e-05, "loss": 0.005253379791975021, "step": 119860 }, { "epoch": 34.02497871132557, "grad_norm": 2.892648935317993, "learning_rate": 6.598921373829123e-05, "loss": 0.0008800547569990158, "step": 119870 }, { "epoch": 34.027817201248936, "grad_norm": 0.03578922152519226, "learning_rate": 6.598637524836788e-05, "loss": 0.012422124296426773, "step": 119880 }, { "epoch": 34.0306556911723, "grad_norm": 0.007651380728930235, "learning_rate": 6.59835367584445e-05, "loss": 0.0013491224497556686, "step": 119890 }, { "epoch": 34.033494181095655, "grad_norm": 0.0266469307243824, "learning_rate": 6.598069826852114e-05, "loss": 0.0017360581085085868, "step": 119900 }, { "epoch": 34.03633267101902, "grad_norm": 10.729642868041992, "learning_rate": 6.597785977859779e-05, "loss": 0.005020387098193168, "step": 119910 }, { "epoch": 34.03917116094238, "grad_norm": 0.20933684706687927, "learning_rate": 6.597502128867443e-05, "loss": 0.0006433498114347458, "step": 119920 }, { "epoch": 34.04200965086574, "grad_norm": 0.03772079572081566, "learning_rate": 6.597218279875107e-05, "loss": 0.0022040437906980515, "step": 119930 }, { "epoch": 34.0448481407891, "grad_norm": 0.3104272186756134, "learning_rate": 6.596934430882771e-05, "loss": 0.0013599636033177376, "step": 119940 }, { "epoch": 34.04768663071246, "grad_norm": 14.608319282531738, "learning_rate": 6.596650581890435e-05, "loss": 0.009510137885808945, "step": 119950 }, { "epoch": 34.050525120635825, "grad_norm": 0.0312742255628109, "learning_rate": 6.596366732898098e-05, "loss": 0.003946512565016746, "step": 119960 }, { "epoch": 34.05336361055918, "grad_norm": 0.33535581827163696, "learning_rate": 6.596082883905762e-05, "loss": 0.004642674326896667, "step": 119970 }, { "epoch": 34.056202100482544, "grad_norm": 0.1469077467918396, "learning_rate": 6.595799034913426e-05, "loss": 0.004567412286996841, "step": 119980 }, { "epoch": 34.05904059040591, "grad_norm": 0.04110793024301529, "learning_rate": 6.59551518592109e-05, "loss": 0.0028673209249973296, "step": 119990 }, { "epoch": 34.06187908032926, "grad_norm": 3.982940673828125, "learning_rate": 6.595231336928754e-05, "loss": 0.004414904862642288, "step": 120000 }, { "epoch": 34.06187908032926, "eval_accuracy": 0.9806701850321103, "eval_loss": 0.0710265263915062, "eval_runtime": 34.3055, "eval_samples_per_second": 458.439, "eval_steps_per_second": 7.171, "step": 120000 }, { "epoch": 34.064717570252625, "grad_norm": 8.664295196533203, "learning_rate": 6.594947487936419e-05, "loss": 0.006534130126237869, "step": 120010 }, { "epoch": 34.06755606017599, "grad_norm": 5.211541175842285, "learning_rate": 6.594663638944081e-05, "loss": 0.0041853789240121845, "step": 120020 }, { "epoch": 34.070394550099344, "grad_norm": 1.1147971153259277, "learning_rate": 6.594379789951746e-05, "loss": 0.0023598313331604005, "step": 120030 }, { "epoch": 34.07323304002271, "grad_norm": 0.48543789982795715, "learning_rate": 6.59409594095941e-05, "loss": 0.002595251612365246, "step": 120040 }, { "epoch": 34.07607152994607, "grad_norm": 1.719138264656067, "learning_rate": 6.593812091967074e-05, "loss": 0.001478576846420765, "step": 120050 }, { "epoch": 34.07891001986943, "grad_norm": 0.4581162929534912, "learning_rate": 6.593528242974738e-05, "loss": 0.0014006281271576881, "step": 120060 }, { "epoch": 34.08174850979279, "grad_norm": 0.21042893826961517, "learning_rate": 6.593244393982402e-05, "loss": 0.002723981998860836, "step": 120070 }, { "epoch": 34.08458699971615, "grad_norm": 2.370666980743408, "learning_rate": 6.592960544990065e-05, "loss": 0.0017228145152330399, "step": 120080 }, { "epoch": 34.087425489639514, "grad_norm": 16.563718795776367, "learning_rate": 6.592676695997729e-05, "loss": 0.005303649976849556, "step": 120090 }, { "epoch": 34.09026397956287, "grad_norm": 0.1121537834405899, "learning_rate": 6.592392847005393e-05, "loss": 0.0042193055152893065, "step": 120100 }, { "epoch": 34.09310246948623, "grad_norm": 0.05780801549553871, "learning_rate": 6.592108998013057e-05, "loss": 0.0007723132148385048, "step": 120110 }, { "epoch": 34.095940959409596, "grad_norm": 10.186444282531738, "learning_rate": 6.591825149020721e-05, "loss": 0.006486070156097412, "step": 120120 }, { "epoch": 34.09877944933295, "grad_norm": 7.746127605438232, "learning_rate": 6.591541300028386e-05, "loss": 0.013898824155330659, "step": 120130 }, { "epoch": 34.101617939256315, "grad_norm": 9.631966590881348, "learning_rate": 6.59125745103605e-05, "loss": 0.0055408008396625515, "step": 120140 }, { "epoch": 34.10445642917968, "grad_norm": 0.08059514313936234, "learning_rate": 6.590973602043712e-05, "loss": 0.007794471830129624, "step": 120150 }, { "epoch": 34.10729491910304, "grad_norm": 0.05566677823662758, "learning_rate": 6.590689753051377e-05, "loss": 0.009602095186710357, "step": 120160 }, { "epoch": 34.110133409026396, "grad_norm": 6.911024570465088, "learning_rate": 6.590405904059041e-05, "loss": 0.009761782735586167, "step": 120170 }, { "epoch": 34.11297189894976, "grad_norm": 0.18805408477783203, "learning_rate": 6.590122055066705e-05, "loss": 0.020956018567085268, "step": 120180 }, { "epoch": 34.11581038887312, "grad_norm": 1.0862871408462524, "learning_rate": 6.589838206074369e-05, "loss": 0.013195376098155975, "step": 120190 }, { "epoch": 34.11864887879648, "grad_norm": 0.38532140851020813, "learning_rate": 6.589554357082033e-05, "loss": 0.0041417405009269714, "step": 120200 }, { "epoch": 34.12148736871984, "grad_norm": 0.12961508333683014, "learning_rate": 6.589270508089696e-05, "loss": 0.004855966567993164, "step": 120210 }, { "epoch": 34.1243258586432, "grad_norm": 1.4283761978149414, "learning_rate": 6.58898665909736e-05, "loss": 0.008941882848739624, "step": 120220 }, { "epoch": 34.12716434856656, "grad_norm": 0.26361361145973206, "learning_rate": 6.588702810105024e-05, "loss": 0.01830787807703018, "step": 120230 }, { "epoch": 34.13000283848992, "grad_norm": 0.8549072742462158, "learning_rate": 6.588418961112688e-05, "loss": 0.04620328843593598, "step": 120240 }, { "epoch": 34.132841328413285, "grad_norm": 0.23609595000743866, "learning_rate": 6.588135112120353e-05, "loss": 0.0050118230283260345, "step": 120250 }, { "epoch": 34.13567981833665, "grad_norm": 0.09931660443544388, "learning_rate": 6.587851263128017e-05, "loss": 0.005431956052780152, "step": 120260 }, { "epoch": 34.138518308260004, "grad_norm": 0.06324197351932526, "learning_rate": 6.587567414135681e-05, "loss": 0.01559511423110962, "step": 120270 }, { "epoch": 34.14135679818337, "grad_norm": 0.024333300068974495, "learning_rate": 6.587283565143344e-05, "loss": 0.003150699660181999, "step": 120280 }, { "epoch": 34.14419528810673, "grad_norm": 0.0262185987085104, "learning_rate": 6.586999716151008e-05, "loss": 0.007974471151828765, "step": 120290 }, { "epoch": 34.147033778030085, "grad_norm": 0.03815938159823418, "learning_rate": 6.586715867158672e-05, "loss": 0.002371375635266304, "step": 120300 }, { "epoch": 34.14987226795345, "grad_norm": 0.6584746241569519, "learning_rate": 6.586432018166336e-05, "loss": 0.003098090551793575, "step": 120310 }, { "epoch": 34.15271075787681, "grad_norm": 2.023371458053589, "learning_rate": 6.586148169174e-05, "loss": 0.002083117701113224, "step": 120320 }, { "epoch": 34.15554924780017, "grad_norm": 7.84483528137207, "learning_rate": 6.585864320181664e-05, "loss": 0.052892130613327024, "step": 120330 }, { "epoch": 34.15838773772353, "grad_norm": 0.003792363218963146, "learning_rate": 6.585580471189327e-05, "loss": 0.0034030135720968246, "step": 120340 }, { "epoch": 34.16122622764689, "grad_norm": 0.06758051365613937, "learning_rate": 6.585296622196991e-05, "loss": 0.0022671077400445937, "step": 120350 }, { "epoch": 34.164064717570255, "grad_norm": 0.14652559161186218, "learning_rate": 6.585012773204655e-05, "loss": 0.010776925086975097, "step": 120360 }, { "epoch": 34.16690320749361, "grad_norm": 5.790385723114014, "learning_rate": 6.58472892421232e-05, "loss": 0.012346293032169341, "step": 120370 }, { "epoch": 34.169741697416974, "grad_norm": 0.11210482567548752, "learning_rate": 6.584445075219984e-05, "loss": 0.003098754771053791, "step": 120380 }, { "epoch": 34.17258018734034, "grad_norm": 6.648543357849121, "learning_rate": 6.584161226227648e-05, "loss": 0.007065550982952118, "step": 120390 }, { "epoch": 34.17541867726369, "grad_norm": 0.04792241007089615, "learning_rate": 6.583877377235312e-05, "loss": 0.0012268826365470886, "step": 120400 }, { "epoch": 34.178257167187056, "grad_norm": 0.0925612822175026, "learning_rate": 6.583593528242975e-05, "loss": 0.0010269394144415856, "step": 120410 }, { "epoch": 34.18109565711042, "grad_norm": 1.5966662168502808, "learning_rate": 6.583309679250639e-05, "loss": 0.0014116814360022544, "step": 120420 }, { "epoch": 34.18393414703378, "grad_norm": 10.422369003295898, "learning_rate": 6.583025830258303e-05, "loss": 0.006739802658557892, "step": 120430 }, { "epoch": 34.18677263695714, "grad_norm": 2.3707544803619385, "learning_rate": 6.582741981265966e-05, "loss": 0.001744193769991398, "step": 120440 }, { "epoch": 34.1896111268805, "grad_norm": 0.009535347111523151, "learning_rate": 6.582458132273631e-05, "loss": 0.0031828828155994414, "step": 120450 }, { "epoch": 34.19244961680386, "grad_norm": 0.01628679595887661, "learning_rate": 6.582174283281295e-05, "loss": 0.0007433464750647544, "step": 120460 }, { "epoch": 34.19528810672722, "grad_norm": 0.15618611872196198, "learning_rate": 6.581890434288958e-05, "loss": 0.00042880438268184663, "step": 120470 }, { "epoch": 34.19812659665058, "grad_norm": 0.024930279701948166, "learning_rate": 6.581606585296622e-05, "loss": 0.0005561929196119308, "step": 120480 }, { "epoch": 34.200965086573945, "grad_norm": 4.3107757568359375, "learning_rate": 6.581322736304286e-05, "loss": 0.00599360242486, "step": 120490 }, { "epoch": 34.2038035764973, "grad_norm": 0.04137492924928665, "learning_rate": 6.58103888731195e-05, "loss": 0.0011291923001408577, "step": 120500 }, { "epoch": 34.2038035764973, "eval_accuracy": 0.9807973548674255, "eval_loss": 0.07474569976329803, "eval_runtime": 33.8362, "eval_samples_per_second": 464.798, "eval_steps_per_second": 7.27, "step": 120500 }, { "epoch": 34.20664206642066, "grad_norm": 12.216864585876465, "learning_rate": 6.580755038319615e-05, "loss": 0.004547149315476418, "step": 120510 }, { "epoch": 34.209480556344026, "grad_norm": 0.09973646700382233, "learning_rate": 6.580471189327279e-05, "loss": 0.004509694501757621, "step": 120520 }, { "epoch": 34.21231904626739, "grad_norm": 0.8820620775222778, "learning_rate": 6.580187340334943e-05, "loss": 0.004440927505493164, "step": 120530 }, { "epoch": 34.215157536190745, "grad_norm": 0.0950508564710617, "learning_rate": 6.579903491342606e-05, "loss": 0.012411713600158691, "step": 120540 }, { "epoch": 34.21799602611411, "grad_norm": 0.11170898377895355, "learning_rate": 6.57961964235027e-05, "loss": 0.006239572167396545, "step": 120550 }, { "epoch": 34.22083451603747, "grad_norm": 2.7603108882904053, "learning_rate": 6.579335793357934e-05, "loss": 0.0011956077069044113, "step": 120560 }, { "epoch": 34.223673005960826, "grad_norm": 1.3359439373016357, "learning_rate": 6.579051944365597e-05, "loss": 0.003363964706659317, "step": 120570 }, { "epoch": 34.22651149588419, "grad_norm": 0.07373345643281937, "learning_rate": 6.578768095373262e-05, "loss": 0.0008915688842535019, "step": 120580 }, { "epoch": 34.22934998580755, "grad_norm": 0.056678660213947296, "learning_rate": 6.578484246380926e-05, "loss": 0.01931237429380417, "step": 120590 }, { "epoch": 34.23218847573091, "grad_norm": 0.9676885008811951, "learning_rate": 6.578200397388589e-05, "loss": 0.004407708346843719, "step": 120600 }, { "epoch": 34.23502696565427, "grad_norm": 0.3782937824726105, "learning_rate": 6.577916548396253e-05, "loss": 0.0018465492874383926, "step": 120610 }, { "epoch": 34.237865455577634, "grad_norm": 0.0908208042383194, "learning_rate": 6.577632699403917e-05, "loss": 0.006952321529388428, "step": 120620 }, { "epoch": 34.240703945501, "grad_norm": 5.2981977462768555, "learning_rate": 6.577348850411582e-05, "loss": 0.015754860639572144, "step": 120630 }, { "epoch": 34.24354243542435, "grad_norm": 0.10721073299646378, "learning_rate": 6.577065001419244e-05, "loss": 0.0012903986498713493, "step": 120640 }, { "epoch": 34.246380925347715, "grad_norm": 0.2137644737958908, "learning_rate": 6.57678115242691e-05, "loss": 0.005928069353103638, "step": 120650 }, { "epoch": 34.24921941527108, "grad_norm": 0.7724074721336365, "learning_rate": 6.576497303434574e-05, "loss": 0.0012690704315900803, "step": 120660 }, { "epoch": 34.252057905194434, "grad_norm": 0.027761392295360565, "learning_rate": 6.576213454442237e-05, "loss": 0.002806895226240158, "step": 120670 }, { "epoch": 34.2548963951178, "grad_norm": 0.12600377202033997, "learning_rate": 6.575929605449901e-05, "loss": 0.0008753834292292595, "step": 120680 }, { "epoch": 34.25773488504116, "grad_norm": 0.5736311078071594, "learning_rate": 6.575645756457565e-05, "loss": 0.007129979133605957, "step": 120690 }, { "epoch": 34.260573374964515, "grad_norm": 0.2692219018936157, "learning_rate": 6.575361907465228e-05, "loss": 0.003993768990039825, "step": 120700 }, { "epoch": 34.26341186488788, "grad_norm": 1.110845923423767, "learning_rate": 6.575078058472893e-05, "loss": 0.0005883926525712014, "step": 120710 }, { "epoch": 34.26625035481124, "grad_norm": 0.8097335696220398, "learning_rate": 6.574794209480557e-05, "loss": 0.0009864853695034982, "step": 120720 }, { "epoch": 34.269088844734604, "grad_norm": 0.02222394570708275, "learning_rate": 6.57451036048822e-05, "loss": 0.0013472201302647592, "step": 120730 }, { "epoch": 34.27192733465796, "grad_norm": 0.020931746810674667, "learning_rate": 6.574226511495884e-05, "loss": 0.009032388031482697, "step": 120740 }, { "epoch": 34.27476582458132, "grad_norm": 10.5999174118042, "learning_rate": 6.573942662503549e-05, "loss": 0.005458477884531021, "step": 120750 }, { "epoch": 34.277604314504686, "grad_norm": 0.24001438915729523, "learning_rate": 6.573658813511213e-05, "loss": 0.006288004666566848, "step": 120760 }, { "epoch": 34.28044280442804, "grad_norm": 3.5703155994415283, "learning_rate": 6.573374964518875e-05, "loss": 0.004214295744895935, "step": 120770 }, { "epoch": 34.283281294351404, "grad_norm": 2.6797502040863037, "learning_rate": 6.573091115526541e-05, "loss": 0.004411026835441589, "step": 120780 }, { "epoch": 34.28611978427477, "grad_norm": 0.014363267458975315, "learning_rate": 6.572807266534205e-05, "loss": 0.0008180493488907814, "step": 120790 }, { "epoch": 34.28895827419813, "grad_norm": 0.42332765460014343, "learning_rate": 6.572523417541868e-05, "loss": 0.0013417446985840797, "step": 120800 }, { "epoch": 34.291796764121486, "grad_norm": 0.39159607887268066, "learning_rate": 6.572239568549532e-05, "loss": 0.0012770935893058777, "step": 120810 }, { "epoch": 34.29463525404485, "grad_norm": 11.337034225463867, "learning_rate": 6.571955719557196e-05, "loss": 0.013191723823547363, "step": 120820 }, { "epoch": 34.29747374396821, "grad_norm": 17.24374008178711, "learning_rate": 6.571671870564859e-05, "loss": 0.015360915660858154, "step": 120830 }, { "epoch": 34.30031223389157, "grad_norm": 0.135091170668602, "learning_rate": 6.571388021572523e-05, "loss": 0.01540045440196991, "step": 120840 }, { "epoch": 34.30315072381493, "grad_norm": 0.10803650319576263, "learning_rate": 6.571104172580189e-05, "loss": 0.005015735328197479, "step": 120850 }, { "epoch": 34.30598921373829, "grad_norm": 0.0490671843290329, "learning_rate": 6.570820323587851e-05, "loss": 0.0009883033111691476, "step": 120860 }, { "epoch": 34.30882770366165, "grad_norm": 0.12627293169498444, "learning_rate": 6.570536474595515e-05, "loss": 0.002101302333176136, "step": 120870 }, { "epoch": 34.31166619358501, "grad_norm": 2.674340009689331, "learning_rate": 6.57025262560318e-05, "loss": 0.0028935901820659636, "step": 120880 }, { "epoch": 34.314504683508375, "grad_norm": 9.02028751373291, "learning_rate": 6.569968776610844e-05, "loss": 0.018889468908309937, "step": 120890 }, { "epoch": 34.31734317343174, "grad_norm": 1.0077141523361206, "learning_rate": 6.569684927618507e-05, "loss": 0.004995648190379143, "step": 120900 }, { "epoch": 34.32018166335509, "grad_norm": 2.736100912094116, "learning_rate": 6.569401078626172e-05, "loss": 0.0191461443901062, "step": 120910 }, { "epoch": 34.323020153278456, "grad_norm": 0.6042355298995972, "learning_rate": 6.569117229633835e-05, "loss": 0.0012513522058725357, "step": 120920 }, { "epoch": 34.32585864320182, "grad_norm": 0.10971114039421082, "learning_rate": 6.568833380641499e-05, "loss": 0.0028620591387152673, "step": 120930 }, { "epoch": 34.328697133125175, "grad_norm": 0.09406564384698868, "learning_rate": 6.568549531649163e-05, "loss": 0.0015230385586619377, "step": 120940 }, { "epoch": 34.33153562304854, "grad_norm": 0.05219341441988945, "learning_rate": 6.568265682656827e-05, "loss": 0.002076907455921173, "step": 120950 }, { "epoch": 34.3343741129719, "grad_norm": 0.07376644760370255, "learning_rate": 6.56798183366449e-05, "loss": 0.01225736066699028, "step": 120960 }, { "epoch": 34.33721260289526, "grad_norm": 0.22495974600315094, "learning_rate": 6.567697984672154e-05, "loss": 0.0036752205342054367, "step": 120970 }, { "epoch": 34.34005109281862, "grad_norm": 2.7323482036590576, "learning_rate": 6.56741413567982e-05, "loss": 0.010143934190273285, "step": 120980 }, { "epoch": 34.34288958274198, "grad_norm": 3.412902593612671, "learning_rate": 6.567130286687482e-05, "loss": 0.005004578083753586, "step": 120990 }, { "epoch": 34.345728072665345, "grad_norm": 5.6697306632995605, "learning_rate": 6.566846437695147e-05, "loss": 0.019697943329811098, "step": 121000 }, { "epoch": 34.345728072665345, "eval_accuracy": 0.9757105614548229, "eval_loss": 0.09650394320487976, "eval_runtime": 34.0518, "eval_samples_per_second": 461.855, "eval_steps_per_second": 7.224, "step": 121000 }, { "epoch": 34.3485665625887, "grad_norm": 1.0126115083694458, "learning_rate": 6.566562588702811e-05, "loss": 0.005374939367175102, "step": 121010 }, { "epoch": 34.351405052512064, "grad_norm": 3.1466472148895264, "learning_rate": 6.566278739710473e-05, "loss": 0.002569963224232197, "step": 121020 }, { "epoch": 34.35424354243543, "grad_norm": 14.012662887573242, "learning_rate": 6.565994890718138e-05, "loss": 0.009390918910503388, "step": 121030 }, { "epoch": 34.35708203235878, "grad_norm": 10.098669052124023, "learning_rate": 6.565711041725802e-05, "loss": 0.007510758936405182, "step": 121040 }, { "epoch": 34.359920522282145, "grad_norm": 4.098156452178955, "learning_rate": 6.565427192733466e-05, "loss": 0.0037875279784202577, "step": 121050 }, { "epoch": 34.36275901220551, "grad_norm": 13.006037712097168, "learning_rate": 6.56514334374113e-05, "loss": 0.016244634985923767, "step": 121060 }, { "epoch": 34.365597502128864, "grad_norm": 2.092751979827881, "learning_rate": 6.564859494748794e-05, "loss": 0.003017153963446617, "step": 121070 }, { "epoch": 34.36843599205223, "grad_norm": 0.7494295835494995, "learning_rate": 6.564575645756458e-05, "loss": 0.0022613221779465677, "step": 121080 }, { "epoch": 34.37127448197559, "grad_norm": 0.02512420527637005, "learning_rate": 6.564291796764121e-05, "loss": 0.004491118341684341, "step": 121090 }, { "epoch": 34.37411297189895, "grad_norm": 2.6017441749572754, "learning_rate": 6.564007947771785e-05, "loss": 0.0026943162083625793, "step": 121100 }, { "epoch": 34.37695146182231, "grad_norm": 3.179140329360962, "learning_rate": 6.563724098779451e-05, "loss": 0.0017863884568214417, "step": 121110 }, { "epoch": 34.37978995174567, "grad_norm": 0.08171575516462326, "learning_rate": 6.563440249787113e-05, "loss": 0.0010490210726857186, "step": 121120 }, { "epoch": 34.382628441669034, "grad_norm": 5.473512172698975, "learning_rate": 6.563156400794778e-05, "loss": 0.0037100322544574736, "step": 121130 }, { "epoch": 34.38546693159239, "grad_norm": 15.310783386230469, "learning_rate": 6.562872551802442e-05, "loss": 0.029602518677711485, "step": 121140 }, { "epoch": 34.38830542151575, "grad_norm": 0.01725846529006958, "learning_rate": 6.562588702810105e-05, "loss": 0.0033577784895896913, "step": 121150 }, { "epoch": 34.391143911439116, "grad_norm": 0.07651416212320328, "learning_rate": 6.562304853817769e-05, "loss": 0.002404669485986233, "step": 121160 }, { "epoch": 34.39398240136248, "grad_norm": 0.4696718156337738, "learning_rate": 6.562021004825433e-05, "loss": 0.002069702185690403, "step": 121170 }, { "epoch": 34.396820891285834, "grad_norm": 0.030339429154992104, "learning_rate": 6.561737155833097e-05, "loss": 0.0010023757815361023, "step": 121180 }, { "epoch": 34.3996593812092, "grad_norm": 3.485079765319824, "learning_rate": 6.561453306840761e-05, "loss": 0.004249865934252739, "step": 121190 }, { "epoch": 34.40249787113256, "grad_norm": 7.812013626098633, "learning_rate": 6.561169457848425e-05, "loss": 0.0056616999208927155, "step": 121200 }, { "epoch": 34.405336361055916, "grad_norm": 4.964232921600342, "learning_rate": 6.56088560885609e-05, "loss": 0.0023743642494082453, "step": 121210 }, { "epoch": 34.40817485097928, "grad_norm": 0.16711153090000153, "learning_rate": 6.560601759863752e-05, "loss": 0.011415702104568482, "step": 121220 }, { "epoch": 34.41101334090264, "grad_norm": 0.026932410895824432, "learning_rate": 6.560317910871416e-05, "loss": 0.0030535975471138953, "step": 121230 }, { "epoch": 34.413851830826, "grad_norm": 0.01786528341472149, "learning_rate": 6.56003406187908e-05, "loss": 0.0034457042813301085, "step": 121240 }, { "epoch": 34.41669032074936, "grad_norm": 0.58486008644104, "learning_rate": 6.559750212886745e-05, "loss": 0.005692251026630402, "step": 121250 }, { "epoch": 34.41952881067272, "grad_norm": 0.6077947020530701, "learning_rate": 6.559466363894409e-05, "loss": 0.0007563373073935509, "step": 121260 }, { "epoch": 34.422367300596086, "grad_norm": 4.167333602905273, "learning_rate": 6.559182514902073e-05, "loss": 0.003024040907621384, "step": 121270 }, { "epoch": 34.42520579051944, "grad_norm": 0.2842555642127991, "learning_rate": 6.558898665909736e-05, "loss": 0.003351614996790886, "step": 121280 }, { "epoch": 34.428044280442805, "grad_norm": 0.15120616555213928, "learning_rate": 6.5586148169174e-05, "loss": 0.0021059650927782057, "step": 121290 }, { "epoch": 34.43088277036617, "grad_norm": 0.008970403112471104, "learning_rate": 6.558330967925064e-05, "loss": 0.0007127897813916206, "step": 121300 }, { "epoch": 34.43372126028952, "grad_norm": 2.7567062377929688, "learning_rate": 6.558047118932728e-05, "loss": 0.005402503907680512, "step": 121310 }, { "epoch": 34.436559750212886, "grad_norm": 14.49254322052002, "learning_rate": 6.557763269940392e-05, "loss": 0.007865450531244277, "step": 121320 }, { "epoch": 34.43939824013625, "grad_norm": 0.10969557613134384, "learning_rate": 6.557479420948056e-05, "loss": 0.021596525609493256, "step": 121330 }, { "epoch": 34.442236730059605, "grad_norm": 0.632413387298584, "learning_rate": 6.55719557195572e-05, "loss": 0.004316645860671997, "step": 121340 }, { "epoch": 34.44507521998297, "grad_norm": 0.017330147325992584, "learning_rate": 6.556911722963383e-05, "loss": 0.002198258601129055, "step": 121350 }, { "epoch": 34.44791370990633, "grad_norm": 0.32907411456108093, "learning_rate": 6.556627873971047e-05, "loss": 0.002543767727911472, "step": 121360 }, { "epoch": 34.450752199829694, "grad_norm": 9.323118209838867, "learning_rate": 6.556344024978712e-05, "loss": 0.0058202814310789105, "step": 121370 }, { "epoch": 34.45359068975305, "grad_norm": 0.13264183700084686, "learning_rate": 6.556060175986376e-05, "loss": 0.001440284214913845, "step": 121380 }, { "epoch": 34.45642917967641, "grad_norm": 1.3752105236053467, "learning_rate": 6.55577632699404e-05, "loss": 0.010577797889709473, "step": 121390 }, { "epoch": 34.459267669599775, "grad_norm": 4.658768177032471, "learning_rate": 6.555492478001704e-05, "loss": 0.004559796303510666, "step": 121400 }, { "epoch": 34.46210615952313, "grad_norm": 0.35245320200920105, "learning_rate": 6.555208629009367e-05, "loss": 0.005998966842889785, "step": 121410 }, { "epoch": 34.464944649446494, "grad_norm": 3.0574147701263428, "learning_rate": 6.554924780017031e-05, "loss": 0.002145152352750301, "step": 121420 }, { "epoch": 34.46778313936986, "grad_norm": 0.34621474146842957, "learning_rate": 6.554669315923929e-05, "loss": 0.019093729555606842, "step": 121430 }, { "epoch": 34.47062162929321, "grad_norm": 0.5751723051071167, "learning_rate": 6.554385466931593e-05, "loss": 0.005522572994232177, "step": 121440 }, { "epoch": 34.473460119216575, "grad_norm": 0.05783211812376976, "learning_rate": 6.554101617939257e-05, "loss": 0.0004425913095474243, "step": 121450 }, { "epoch": 34.47629860913994, "grad_norm": 0.19486850500106812, "learning_rate": 6.55381776894692e-05, "loss": 0.015825290977954865, "step": 121460 }, { "epoch": 34.4791370990633, "grad_norm": 0.14067736268043518, "learning_rate": 6.553533919954584e-05, "loss": 0.002304179221391678, "step": 121470 }, { "epoch": 34.48197558898666, "grad_norm": 0.2167961448431015, "learning_rate": 6.553250070962248e-05, "loss": 0.004994480311870575, "step": 121480 }, { "epoch": 34.48481407891002, "grad_norm": 1.3459758758544922, "learning_rate": 6.552966221969912e-05, "loss": 0.001983615383505821, "step": 121490 }, { "epoch": 34.48765256883338, "grad_norm": 13.075315475463867, "learning_rate": 6.552682372977576e-05, "loss": 0.011016988009214402, "step": 121500 }, { "epoch": 34.48765256883338, "eval_accuracy": 0.9795892414319324, "eval_loss": 0.08307802677154541, "eval_runtime": 37.3811, "eval_samples_per_second": 420.721, "eval_steps_per_second": 6.581, "step": 121500 }, { "epoch": 34.49049105875674, "grad_norm": 10.21458911895752, "learning_rate": 6.55239852398524e-05, "loss": 0.007546155154705048, "step": 121510 }, { "epoch": 34.4933295486801, "grad_norm": 2.5022525787353516, "learning_rate": 6.552114674992905e-05, "loss": 0.011616096645593644, "step": 121520 }, { "epoch": 34.496168038603464, "grad_norm": 9.985952377319336, "learning_rate": 6.551830826000568e-05, "loss": 0.004208522289991379, "step": 121530 }, { "epoch": 34.49900652852683, "grad_norm": 0.871397078037262, "learning_rate": 6.551546977008232e-05, "loss": 0.004797196388244629, "step": 121540 }, { "epoch": 34.50184501845018, "grad_norm": 0.038609325885772705, "learning_rate": 6.551263128015896e-05, "loss": 0.005483441054821014, "step": 121550 }, { "epoch": 34.504683508373546, "grad_norm": 0.22192123532295227, "learning_rate": 6.55097927902356e-05, "loss": 0.03334164917469025, "step": 121560 }, { "epoch": 34.50752199829691, "grad_norm": 0.9827811121940613, "learning_rate": 6.550695430031224e-05, "loss": 0.0164742186665535, "step": 121570 }, { "epoch": 34.510360488220265, "grad_norm": 2.3919854164123535, "learning_rate": 6.550411581038888e-05, "loss": 0.005571752041578293, "step": 121580 }, { "epoch": 34.51319897814363, "grad_norm": 9.566225051879883, "learning_rate": 6.550127732046551e-05, "loss": 0.024892017245292664, "step": 121590 }, { "epoch": 34.51603746806699, "grad_norm": 1.9857887029647827, "learning_rate": 6.549843883054215e-05, "loss": 0.008524072915315628, "step": 121600 }, { "epoch": 34.518875957990346, "grad_norm": 2.702085256576538, "learning_rate": 6.549560034061879e-05, "loss": 0.01628555655479431, "step": 121610 }, { "epoch": 34.52171444791371, "grad_norm": 0.09185004979372025, "learning_rate": 6.549276185069543e-05, "loss": 0.006300994753837585, "step": 121620 }, { "epoch": 34.52455293783707, "grad_norm": 0.065005362033844, "learning_rate": 6.548992336077208e-05, "loss": 0.007944876700639725, "step": 121630 }, { "epoch": 34.527391427760435, "grad_norm": 0.6355201601982117, "learning_rate": 6.548708487084872e-05, "loss": 0.004487662389874458, "step": 121640 }, { "epoch": 34.53022991768379, "grad_norm": 0.5922333002090454, "learning_rate": 6.548424638092536e-05, "loss": 0.006768350303173065, "step": 121650 }, { "epoch": 34.53306840760715, "grad_norm": 0.21683019399642944, "learning_rate": 6.548140789100199e-05, "loss": 0.006931056082248688, "step": 121660 }, { "epoch": 34.535906897530516, "grad_norm": 0.1115192100405693, "learning_rate": 6.547856940107863e-05, "loss": 0.012893748283386231, "step": 121670 }, { "epoch": 34.53874538745387, "grad_norm": 0.336562842130661, "learning_rate": 6.547573091115527e-05, "loss": 0.006259485334157944, "step": 121680 }, { "epoch": 34.541583877377235, "grad_norm": 7.569923400878906, "learning_rate": 6.547289242123191e-05, "loss": 0.005293838679790497, "step": 121690 }, { "epoch": 34.5444223673006, "grad_norm": 0.08628959208726883, "learning_rate": 6.547005393130855e-05, "loss": 0.005834162980318069, "step": 121700 }, { "epoch": 34.547260857223954, "grad_norm": 0.10599163174629211, "learning_rate": 6.546721544138519e-05, "loss": 0.009105783700942994, "step": 121710 }, { "epoch": 34.55009934714732, "grad_norm": 0.9066378474235535, "learning_rate": 6.546437695146182e-05, "loss": 0.013465219736099243, "step": 121720 }, { "epoch": 34.55293783707068, "grad_norm": 4.239067077636719, "learning_rate": 6.546153846153846e-05, "loss": 0.0032295994460582733, "step": 121730 }, { "epoch": 34.55577632699404, "grad_norm": 2.357401132583618, "learning_rate": 6.54586999716151e-05, "loss": 0.0016834085807204246, "step": 121740 }, { "epoch": 34.5586148169174, "grad_norm": 0.5335928201675415, "learning_rate": 6.545586148169174e-05, "loss": 0.012992380559444428, "step": 121750 }, { "epoch": 34.56145330684076, "grad_norm": 0.42786404490470886, "learning_rate": 6.545302299176839e-05, "loss": 0.0038946978747844696, "step": 121760 }, { "epoch": 34.564291796764124, "grad_norm": 0.14118963479995728, "learning_rate": 6.545018450184503e-05, "loss": 0.002773720771074295, "step": 121770 }, { "epoch": 34.56713028668748, "grad_norm": 2.5257198810577393, "learning_rate": 6.544734601192166e-05, "loss": 0.004955326020717621, "step": 121780 }, { "epoch": 34.56996877661084, "grad_norm": 0.03475048393011093, "learning_rate": 6.54445075219983e-05, "loss": 0.02089465260505676, "step": 121790 }, { "epoch": 34.572807266534205, "grad_norm": 0.02836805209517479, "learning_rate": 6.544166903207494e-05, "loss": 0.0017255166545510291, "step": 121800 }, { "epoch": 34.57564575645756, "grad_norm": 16.76558494567871, "learning_rate": 6.543883054215158e-05, "loss": 0.010348150879144669, "step": 121810 }, { "epoch": 34.578484246380924, "grad_norm": 0.2200988531112671, "learning_rate": 6.543599205222822e-05, "loss": 0.008636921644210815, "step": 121820 }, { "epoch": 34.58132273630429, "grad_norm": 0.24941736459732056, "learning_rate": 6.543315356230486e-05, "loss": 0.010071459412574767, "step": 121830 }, { "epoch": 34.58416122622765, "grad_norm": 11.740715026855469, "learning_rate": 6.54303150723815e-05, "loss": 0.01961822807788849, "step": 121840 }, { "epoch": 34.586999716151006, "grad_norm": 0.3134099245071411, "learning_rate": 6.542747658245813e-05, "loss": 0.006567877531051636, "step": 121850 }, { "epoch": 34.58983820607437, "grad_norm": 0.07159314304590225, "learning_rate": 6.542463809253477e-05, "loss": 0.001722918450832367, "step": 121860 }, { "epoch": 34.59267669599773, "grad_norm": 0.03851757198572159, "learning_rate": 6.542179960261141e-05, "loss": 0.004692503064870834, "step": 121870 }, { "epoch": 34.59551518592109, "grad_norm": 4.418304443359375, "learning_rate": 6.541896111268806e-05, "loss": 0.002869945764541626, "step": 121880 }, { "epoch": 34.59835367584445, "grad_norm": 0.13194134831428528, "learning_rate": 6.54161226227647e-05, "loss": 0.014493288099765777, "step": 121890 }, { "epoch": 34.60119216576781, "grad_norm": 0.026239335536956787, "learning_rate": 6.541328413284134e-05, "loss": 0.013114538788795472, "step": 121900 }, { "epoch": 34.604030655691176, "grad_norm": 2.371185302734375, "learning_rate": 6.541044564291797e-05, "loss": 0.0046601701527833935, "step": 121910 }, { "epoch": 34.60686914561453, "grad_norm": 0.5931879878044128, "learning_rate": 6.540760715299461e-05, "loss": 0.0009013133123517037, "step": 121920 }, { "epoch": 34.609707635537895, "grad_norm": 0.782575249671936, "learning_rate": 6.540476866307125e-05, "loss": 0.0014301033690571785, "step": 121930 }, { "epoch": 34.61254612546126, "grad_norm": 0.5368093848228455, "learning_rate": 6.540193017314789e-05, "loss": 0.0006373902782797813, "step": 121940 }, { "epoch": 34.61538461538461, "grad_norm": 0.34258419275283813, "learning_rate": 6.539909168322452e-05, "loss": 0.0054230794310569765, "step": 121950 }, { "epoch": 34.618223105307976, "grad_norm": 0.04904958978295326, "learning_rate": 6.539625319330117e-05, "loss": 0.0010701771825551987, "step": 121960 }, { "epoch": 34.62106159523134, "grad_norm": 1.5681754350662231, "learning_rate": 6.539341470337781e-05, "loss": 0.0008468529209494591, "step": 121970 }, { "epoch": 34.623900085154695, "grad_norm": 0.0404791496694088, "learning_rate": 6.539057621345444e-05, "loss": 0.001527385413646698, "step": 121980 }, { "epoch": 34.62673857507806, "grad_norm": 0.09911773353815079, "learning_rate": 6.538773772353108e-05, "loss": 0.013050371408462524, "step": 121990 }, { "epoch": 34.62957706500142, "grad_norm": 0.4889812469482422, "learning_rate": 6.538489923360772e-05, "loss": 0.003219247609376907, "step": 122000 }, { "epoch": 34.62957706500142, "eval_accuracy": 0.9772365994786036, "eval_loss": 0.09409791976213455, "eval_runtime": 36.2177, "eval_samples_per_second": 434.235, "eval_steps_per_second": 6.792, "step": 122000 }, { "epoch": 34.63241555492478, "grad_norm": 0.014425955712795258, "learning_rate": 6.538206074368435e-05, "loss": 0.0021192913874983786, "step": 122010 }, { "epoch": 34.63525404484814, "grad_norm": 1.281370759010315, "learning_rate": 6.537922225376101e-05, "loss": 0.007016663998365402, "step": 122020 }, { "epoch": 34.6380925347715, "grad_norm": 0.8618423342704773, "learning_rate": 6.537638376383765e-05, "loss": 0.008458619564771652, "step": 122030 }, { "epoch": 34.640931024694865, "grad_norm": 4.540952682495117, "learning_rate": 6.537354527391428e-05, "loss": 0.003393801674246788, "step": 122040 }, { "epoch": 34.64376951461822, "grad_norm": 0.5108017325401306, "learning_rate": 6.537070678399092e-05, "loss": 0.0076453924179077145, "step": 122050 }, { "epoch": 34.646608004541584, "grad_norm": 0.15601994097232819, "learning_rate": 6.536786829406756e-05, "loss": 0.0051150180399417875, "step": 122060 }, { "epoch": 34.64944649446495, "grad_norm": 0.15214397013187408, "learning_rate": 6.53650298041442e-05, "loss": 0.0012855183333158493, "step": 122070 }, { "epoch": 34.6522849843883, "grad_norm": 9.572919845581055, "learning_rate": 6.536219131422083e-05, "loss": 0.004869310930371284, "step": 122080 }, { "epoch": 34.655123474311665, "grad_norm": 5.249383926391602, "learning_rate": 6.535935282429748e-05, "loss": 0.018235479295253754, "step": 122090 }, { "epoch": 34.65796196423503, "grad_norm": 1.2119641304016113, "learning_rate": 6.535651433437412e-05, "loss": 0.00740322694182396, "step": 122100 }, { "epoch": 34.66080045415839, "grad_norm": 4.1656813621521, "learning_rate": 6.535367584445075e-05, "loss": 0.012357363849878312, "step": 122110 }, { "epoch": 34.66363894408175, "grad_norm": 0.25186097621917725, "learning_rate": 6.53508373545274e-05, "loss": 0.007507012039422989, "step": 122120 }, { "epoch": 34.66647743400511, "grad_norm": 4.207736492156982, "learning_rate": 6.534799886460404e-05, "loss": 0.00486699566245079, "step": 122130 }, { "epoch": 34.66931592392847, "grad_norm": 0.03466227650642395, "learning_rate": 6.534516037468066e-05, "loss": 0.001579996943473816, "step": 122140 }, { "epoch": 34.67215441385183, "grad_norm": 1.0843229293823242, "learning_rate": 6.53423218847573e-05, "loss": 0.0009604837745428085, "step": 122150 }, { "epoch": 34.67499290377519, "grad_norm": 5.203357219696045, "learning_rate": 6.533948339483396e-05, "loss": 0.002628064714372158, "step": 122160 }, { "epoch": 34.677831393698554, "grad_norm": 0.8902357220649719, "learning_rate": 6.533664490491059e-05, "loss": 0.0033511802554130553, "step": 122170 }, { "epoch": 34.68066988362191, "grad_norm": 0.010610664263367653, "learning_rate": 6.533380641498723e-05, "loss": 0.0011605240404605865, "step": 122180 }, { "epoch": 34.68350837354527, "grad_norm": 0.09965512901544571, "learning_rate": 6.533096792506387e-05, "loss": 0.006383158266544342, "step": 122190 }, { "epoch": 34.686346863468636, "grad_norm": 0.7923334240913391, "learning_rate": 6.532812943514051e-05, "loss": 0.006361933052539825, "step": 122200 }, { "epoch": 34.689185353392, "grad_norm": 0.2702206075191498, "learning_rate": 6.532529094521714e-05, "loss": 0.001541166752576828, "step": 122210 }, { "epoch": 34.692023843315354, "grad_norm": 0.15047922730445862, "learning_rate": 6.53224524552938e-05, "loss": 0.0014695154502987861, "step": 122220 }, { "epoch": 34.69486233323872, "grad_norm": 1.4697608947753906, "learning_rate": 6.531961396537044e-05, "loss": 0.003005795180797577, "step": 122230 }, { "epoch": 34.69770082316208, "grad_norm": 0.36808204650878906, "learning_rate": 6.531677547544706e-05, "loss": 0.0025530491024255753, "step": 122240 }, { "epoch": 34.700539313085436, "grad_norm": 0.3209376931190491, "learning_rate": 6.53139369855237e-05, "loss": 0.005331715196371078, "step": 122250 }, { "epoch": 34.7033778030088, "grad_norm": 0.23613156378269196, "learning_rate": 6.531109849560035e-05, "loss": 0.0014438318088650704, "step": 122260 }, { "epoch": 34.70621629293216, "grad_norm": 0.09373326599597931, "learning_rate": 6.530826000567697e-05, "loss": 0.0017262117937207222, "step": 122270 }, { "epoch": 34.70905478285552, "grad_norm": 1.9929554462432861, "learning_rate": 6.530542151575362e-05, "loss": 0.005877168476581573, "step": 122280 }, { "epoch": 34.71189327277888, "grad_norm": 0.1288769394159317, "learning_rate": 6.530258302583027e-05, "loss": 0.011479905247688294, "step": 122290 }, { "epoch": 34.71473176270224, "grad_norm": 0.6436294317245483, "learning_rate": 6.52997445359069e-05, "loss": 0.0016009407117962837, "step": 122300 }, { "epoch": 34.717570252625606, "grad_norm": 1.3221981525421143, "learning_rate": 6.529690604598354e-05, "loss": 0.0067739695310592655, "step": 122310 }, { "epoch": 34.72040874254896, "grad_norm": 0.8000048398971558, "learning_rate": 6.529406755606018e-05, "loss": 0.008165231347084046, "step": 122320 }, { "epoch": 34.723247232472325, "grad_norm": 0.051594968885183334, "learning_rate": 6.529122906613682e-05, "loss": 0.016006141901016235, "step": 122330 }, { "epoch": 34.72608572239569, "grad_norm": 2.6181650161743164, "learning_rate": 6.528839057621345e-05, "loss": 0.0026988165453076364, "step": 122340 }, { "epoch": 34.72892421231904, "grad_norm": 10.61365795135498, "learning_rate": 6.528555208629009e-05, "loss": 0.01342524290084839, "step": 122350 }, { "epoch": 34.731762702242406, "grad_norm": 0.035788919776678085, "learning_rate": 6.528271359636675e-05, "loss": 0.005187216773629188, "step": 122360 }, { "epoch": 34.73460119216577, "grad_norm": 0.2273540049791336, "learning_rate": 6.527987510644337e-05, "loss": 0.00918940156698227, "step": 122370 }, { "epoch": 34.73743968208913, "grad_norm": 0.07889752089977264, "learning_rate": 6.527703661652002e-05, "loss": 0.007723254710435867, "step": 122380 }, { "epoch": 34.74027817201249, "grad_norm": 1.8169149160385132, "learning_rate": 6.527419812659666e-05, "loss": 0.009465964138507843, "step": 122390 }, { "epoch": 34.74311666193585, "grad_norm": 0.19394253194332123, "learning_rate": 6.527135963667328e-05, "loss": 0.0416136771440506, "step": 122400 }, { "epoch": 34.745955151859214, "grad_norm": 0.20521309971809387, "learning_rate": 6.526852114674993e-05, "loss": 0.012162991613149644, "step": 122410 }, { "epoch": 34.74879364178257, "grad_norm": 0.14953184127807617, "learning_rate": 6.526568265682658e-05, "loss": 0.003669684007763863, "step": 122420 }, { "epoch": 34.75163213170593, "grad_norm": 0.3710046410560608, "learning_rate": 6.526284416690321e-05, "loss": 0.013011385500431061, "step": 122430 }, { "epoch": 34.754470621629295, "grad_norm": 0.33915814757347107, "learning_rate": 6.526000567697985e-05, "loss": 0.0038836974650621413, "step": 122440 }, { "epoch": 34.75730911155265, "grad_norm": 0.27698084712028503, "learning_rate": 6.525716718705649e-05, "loss": 0.007708054780960083, "step": 122450 }, { "epoch": 34.760147601476014, "grad_norm": 11.410043716430664, "learning_rate": 6.525432869713313e-05, "loss": 0.01593768149614334, "step": 122460 }, { "epoch": 34.76298609139938, "grad_norm": 7.053443431854248, "learning_rate": 6.525149020720976e-05, "loss": 0.003275322541594505, "step": 122470 }, { "epoch": 34.76582458132274, "grad_norm": 0.14001865684986115, "learning_rate": 6.52486517172864e-05, "loss": 0.0042711589485406876, "step": 122480 }, { "epoch": 34.768663071246095, "grad_norm": 11.014032363891602, "learning_rate": 6.524581322736306e-05, "loss": 0.01135389655828476, "step": 122490 }, { "epoch": 34.77150156116946, "grad_norm": 0.7579474449157715, "learning_rate": 6.524297473743969e-05, "loss": 0.0013331610709428788, "step": 122500 }, { "epoch": 34.77150156116946, "eval_accuracy": 0.9817511286322884, "eval_loss": 0.07391149550676346, "eval_runtime": 38.3883, "eval_samples_per_second": 409.682, "eval_steps_per_second": 6.408, "step": 122500 }, { "epoch": 34.77434005109282, "grad_norm": 0.15292173624038696, "learning_rate": 6.524013624751633e-05, "loss": 0.0028070930391550066, "step": 122510 }, { "epoch": 34.77717854101618, "grad_norm": 0.04930102825164795, "learning_rate": 6.523729775759297e-05, "loss": 0.00022975802421569823, "step": 122520 }, { "epoch": 34.78001703093954, "grad_norm": 0.21324211359024048, "learning_rate": 6.52344592676696e-05, "loss": 0.0005834188312292099, "step": 122530 }, { "epoch": 34.7828555208629, "grad_norm": 0.8098063468933105, "learning_rate": 6.523162077774624e-05, "loss": 0.010402503609657287, "step": 122540 }, { "epoch": 34.78569401078626, "grad_norm": 0.17342565953731537, "learning_rate": 6.522878228782288e-05, "loss": 0.002386544272303581, "step": 122550 }, { "epoch": 34.78853250070962, "grad_norm": 0.5520505905151367, "learning_rate": 6.522594379789952e-05, "loss": 0.0015814045444130897, "step": 122560 }, { "epoch": 34.791370990632984, "grad_norm": 0.10885947942733765, "learning_rate": 6.522310530797616e-05, "loss": 0.016569581627845765, "step": 122570 }, { "epoch": 34.79420948055635, "grad_norm": 8.966185569763184, "learning_rate": 6.52202668180528e-05, "loss": 0.01676609516143799, "step": 122580 }, { "epoch": 34.7970479704797, "grad_norm": 0.6929372549057007, "learning_rate": 6.521742832812944e-05, "loss": 0.0012503448873758317, "step": 122590 }, { "epoch": 34.799886460403066, "grad_norm": 1.5317367315292358, "learning_rate": 6.521458983820607e-05, "loss": 0.0032865080982446672, "step": 122600 }, { "epoch": 34.80272495032643, "grad_norm": 0.31811439990997314, "learning_rate": 6.521175134828271e-05, "loss": 0.0008571038022637367, "step": 122610 }, { "epoch": 34.805563440249784, "grad_norm": 0.10783814638853073, "learning_rate": 6.520891285835935e-05, "loss": 0.002834249101579189, "step": 122620 }, { "epoch": 34.80840193017315, "grad_norm": 0.10075381398200989, "learning_rate": 6.5206074368436e-05, "loss": 0.007060011476278305, "step": 122630 }, { "epoch": 34.81124042009651, "grad_norm": 0.11699476093053818, "learning_rate": 6.520323587851264e-05, "loss": 0.006471367180347442, "step": 122640 }, { "epoch": 34.814078910019866, "grad_norm": 1.4807755947113037, "learning_rate": 6.520039738858928e-05, "loss": 0.0015289900824427606, "step": 122650 }, { "epoch": 34.81691739994323, "grad_norm": 0.1574181467294693, "learning_rate": 6.51975588986659e-05, "loss": 0.0123018816113472, "step": 122660 }, { "epoch": 34.81975588986659, "grad_norm": 0.6166355013847351, "learning_rate": 6.519472040874255e-05, "loss": 0.002204606682062149, "step": 122670 }, { "epoch": 34.822594379789955, "grad_norm": 0.682329535484314, "learning_rate": 6.519188191881919e-05, "loss": 0.0007020270451903344, "step": 122680 }, { "epoch": 34.82543286971331, "grad_norm": 0.18771083652973175, "learning_rate": 6.518904342889583e-05, "loss": 0.0018735038116574287, "step": 122690 }, { "epoch": 34.82827135963667, "grad_norm": 0.034794799983501434, "learning_rate": 6.518620493897247e-05, "loss": 0.001110372692346573, "step": 122700 }, { "epoch": 34.831109849560036, "grad_norm": 0.3922780454158783, "learning_rate": 6.518336644904911e-05, "loss": 0.0014934269711375237, "step": 122710 }, { "epoch": 34.83394833948339, "grad_norm": 1.1881606578826904, "learning_rate": 6.518052795912574e-05, "loss": 0.0021876346319913862, "step": 122720 }, { "epoch": 34.836786829406755, "grad_norm": 0.15866932272911072, "learning_rate": 6.517768946920238e-05, "loss": 0.006699728220701218, "step": 122730 }, { "epoch": 34.83962531933012, "grad_norm": 0.1461409032344818, "learning_rate": 6.517485097927902e-05, "loss": 0.014073586463928223, "step": 122740 }, { "epoch": 34.84246380925348, "grad_norm": 0.10644885897636414, "learning_rate": 6.517201248935567e-05, "loss": 0.006301476806402207, "step": 122750 }, { "epoch": 34.845302299176836, "grad_norm": 0.5357956290245056, "learning_rate": 6.51691739994323e-05, "loss": 0.0025371678173542024, "step": 122760 }, { "epoch": 34.8481407891002, "grad_norm": 2.279330253601074, "learning_rate": 6.516633550950895e-05, "loss": 0.002277919091284275, "step": 122770 }, { "epoch": 34.85097927902356, "grad_norm": 5.931685447692871, "learning_rate": 6.516349701958559e-05, "loss": 0.005177435278892517, "step": 122780 }, { "epoch": 34.85381776894692, "grad_norm": 0.22154854238033295, "learning_rate": 6.516065852966222e-05, "loss": 0.0020461158826947214, "step": 122790 }, { "epoch": 34.85665625887028, "grad_norm": 1.4689184427261353, "learning_rate": 6.515782003973886e-05, "loss": 0.004458785057067871, "step": 122800 }, { "epoch": 34.859494748793644, "grad_norm": 13.57446002960205, "learning_rate": 6.51549815498155e-05, "loss": 0.028621208667755128, "step": 122810 }, { "epoch": 34.862333238717, "grad_norm": 0.7531858086585999, "learning_rate": 6.515214305989214e-05, "loss": 0.007773163914680481, "step": 122820 }, { "epoch": 34.86517172864036, "grad_norm": 0.23772414028644562, "learning_rate": 6.514930456996878e-05, "loss": 0.005782382935285569, "step": 122830 }, { "epoch": 34.868010218563725, "grad_norm": 0.16198234260082245, "learning_rate": 6.514646608004542e-05, "loss": 0.0022294767200946807, "step": 122840 }, { "epoch": 34.87084870848709, "grad_norm": 0.19853557646274567, "learning_rate": 6.514362759012205e-05, "loss": 0.002318981848657131, "step": 122850 }, { "epoch": 34.873687198410444, "grad_norm": 0.2553081810474396, "learning_rate": 6.51407891001987e-05, "loss": 0.00657733827829361, "step": 122860 }, { "epoch": 34.87652568833381, "grad_norm": 0.07123234868049622, "learning_rate": 6.513795061027533e-05, "loss": 0.0018893258646130561, "step": 122870 }, { "epoch": 34.87936417825717, "grad_norm": 7.538267135620117, "learning_rate": 6.513511212035198e-05, "loss": 0.005703549087047577, "step": 122880 }, { "epoch": 34.882202668180526, "grad_norm": 0.04146289825439453, "learning_rate": 6.513227363042862e-05, "loss": 0.003437281399965286, "step": 122890 }, { "epoch": 34.88504115810389, "grad_norm": 0.09085894376039505, "learning_rate": 6.512943514050526e-05, "loss": 0.00788739174604416, "step": 122900 }, { "epoch": 34.88787964802725, "grad_norm": 1.1942429542541504, "learning_rate": 6.51265966505819e-05, "loss": 0.006122658774256706, "step": 122910 }, { "epoch": 34.89071813795061, "grad_norm": 0.052602618932724, "learning_rate": 6.512375816065853e-05, "loss": 0.00036046113818883897, "step": 122920 }, { "epoch": 34.89355662787397, "grad_norm": 0.060550857335329056, "learning_rate": 6.512091967073517e-05, "loss": 0.0030851785093545915, "step": 122930 }, { "epoch": 34.89639511779733, "grad_norm": 0.03363935276865959, "learning_rate": 6.511808118081181e-05, "loss": 0.0018647611141204834, "step": 122940 }, { "epoch": 34.899233607720696, "grad_norm": 0.15196493268013, "learning_rate": 6.511524269088844e-05, "loss": 0.004386322945356369, "step": 122950 }, { "epoch": 34.90207209764405, "grad_norm": 0.06520487368106842, "learning_rate": 6.51124042009651e-05, "loss": 0.0015680057927966117, "step": 122960 }, { "epoch": 34.904910587567414, "grad_norm": 0.011880912818014622, "learning_rate": 6.510956571104173e-05, "loss": 0.0025007745251059534, "step": 122970 }, { "epoch": 34.90774907749078, "grad_norm": 0.013382472097873688, "learning_rate": 6.510672722111836e-05, "loss": 0.0008995525538921356, "step": 122980 }, { "epoch": 34.91058756741413, "grad_norm": 0.640387237071991, "learning_rate": 6.5103888731195e-05, "loss": 0.000486336275935173, "step": 122990 }, { "epoch": 34.913426057337496, "grad_norm": 12.399213790893555, "learning_rate": 6.510105024127165e-05, "loss": 0.007293181121349334, "step": 123000 }, { "epoch": 34.913426057337496, "eval_accuracy": 0.9796528263495898, "eval_loss": 0.07607545703649521, "eval_runtime": 36.7871, "eval_samples_per_second": 427.514, "eval_steps_per_second": 6.687, "step": 123000 }, { "epoch": 34.91626454726086, "grad_norm": 0.18807850778102875, "learning_rate": 6.509821175134829e-05, "loss": 0.0007898412644863128, "step": 123010 }, { "epoch": 34.919103037184215, "grad_norm": 0.4516342878341675, "learning_rate": 6.509537326142493e-05, "loss": 0.0005842907354235649, "step": 123020 }, { "epoch": 34.92194152710758, "grad_norm": 0.2679778039455414, "learning_rate": 6.509253477150157e-05, "loss": 0.002417266555130482, "step": 123030 }, { "epoch": 34.92478001703094, "grad_norm": 0.10616938769817352, "learning_rate": 6.508969628157821e-05, "loss": 0.0018425952643156053, "step": 123040 }, { "epoch": 34.9276185069543, "grad_norm": 0.009424840100109577, "learning_rate": 6.508685779165484e-05, "loss": 0.001775861531496048, "step": 123050 }, { "epoch": 34.93045699687766, "grad_norm": 1.9731292724609375, "learning_rate": 6.508401930173148e-05, "loss": 0.02884286344051361, "step": 123060 }, { "epoch": 34.93329548680102, "grad_norm": 0.5605277419090271, "learning_rate": 6.508118081180812e-05, "loss": 0.0093752421438694, "step": 123070 }, { "epoch": 34.936133976724385, "grad_norm": 0.13792361319065094, "learning_rate": 6.507834232188475e-05, "loss": 0.008134254813194275, "step": 123080 }, { "epoch": 34.93897246664774, "grad_norm": 0.16005973517894745, "learning_rate": 6.50755038319614e-05, "loss": 0.002764469012618065, "step": 123090 }, { "epoch": 34.9418109565711, "grad_norm": 0.04637905955314636, "learning_rate": 6.507266534203805e-05, "loss": 0.0049994345754384995, "step": 123100 }, { "epoch": 34.944649446494466, "grad_norm": 6.464202404022217, "learning_rate": 6.506982685211467e-05, "loss": 0.0024586906656622887, "step": 123110 }, { "epoch": 34.94748793641783, "grad_norm": 0.005788188893347979, "learning_rate": 6.506698836219131e-05, "loss": 0.00893217921257019, "step": 123120 }, { "epoch": 34.950326426341185, "grad_norm": 0.18921402096748352, "learning_rate": 6.506414987226796e-05, "loss": 0.005322911590337753, "step": 123130 }, { "epoch": 34.95316491626455, "grad_norm": 0.24600842595100403, "learning_rate": 6.50613113823446e-05, "loss": 0.002672726660966873, "step": 123140 }, { "epoch": 34.95600340618791, "grad_norm": 5.515442371368408, "learning_rate": 6.505847289242124e-05, "loss": 0.003938392922282219, "step": 123150 }, { "epoch": 34.95884189611127, "grad_norm": 0.3108740746974945, "learning_rate": 6.505563440249788e-05, "loss": 0.004232876747846603, "step": 123160 }, { "epoch": 34.96168038603463, "grad_norm": 0.4928527772426605, "learning_rate": 6.505279591257452e-05, "loss": 0.003989210724830628, "step": 123170 }, { "epoch": 34.96451887595799, "grad_norm": 0.1743142306804657, "learning_rate": 6.504995742265115e-05, "loss": 0.0018789689987897874, "step": 123180 }, { "epoch": 34.96735736588135, "grad_norm": 0.10056214034557343, "learning_rate": 6.504711893272779e-05, "loss": 0.012625573575496674, "step": 123190 }, { "epoch": 34.97019585580471, "grad_norm": 0.3220556974411011, "learning_rate": 6.504428044280443e-05, "loss": 0.0022247254848480223, "step": 123200 }, { "epoch": 34.973034345728074, "grad_norm": 0.20200663805007935, "learning_rate": 6.504144195288106e-05, "loss": 0.007335352897644043, "step": 123210 }, { "epoch": 34.97587283565144, "grad_norm": 6.61941385269165, "learning_rate": 6.503860346295771e-05, "loss": 0.02345573902130127, "step": 123220 }, { "epoch": 34.97871132557479, "grad_norm": 0.12208070605993271, "learning_rate": 6.503576497303436e-05, "loss": 0.0028047084808349608, "step": 123230 }, { "epoch": 34.981549815498155, "grad_norm": 0.003833126276731491, "learning_rate": 6.503292648311098e-05, "loss": 0.01412026435136795, "step": 123240 }, { "epoch": 34.98438830542152, "grad_norm": 0.1621592789888382, "learning_rate": 6.503008799318763e-05, "loss": 0.0021712716668844225, "step": 123250 }, { "epoch": 34.987226795344874, "grad_norm": 2.048734664916992, "learning_rate": 6.502724950326427e-05, "loss": 0.011965331435203553, "step": 123260 }, { "epoch": 34.99006528526824, "grad_norm": 2.6311991214752197, "learning_rate": 6.502441101334091e-05, "loss": 0.006735803186893463, "step": 123270 }, { "epoch": 34.9929037751916, "grad_norm": 0.2660761773586273, "learning_rate": 6.502157252341754e-05, "loss": 0.00036423783749341967, "step": 123280 }, { "epoch": 34.995742265114956, "grad_norm": 2.695145845413208, "learning_rate": 6.501873403349419e-05, "loss": 0.0057644002139568325, "step": 123290 }, { "epoch": 34.99858075503832, "grad_norm": 0.6370438933372498, "learning_rate": 6.501589554357083e-05, "loss": 0.000436880998313427, "step": 123300 }, { "epoch": 35.00141924496168, "grad_norm": 0.049516189843416214, "learning_rate": 6.501305705364746e-05, "loss": 0.0010453773662447929, "step": 123310 }, { "epoch": 35.004257734885044, "grad_norm": 0.05307980999350548, "learning_rate": 6.50102185637241e-05, "loss": 0.0015102460980415343, "step": 123320 }, { "epoch": 35.0070962248084, "grad_norm": 0.021721815690398216, "learning_rate": 6.500738007380074e-05, "loss": 0.0015125978738069535, "step": 123330 }, { "epoch": 35.00993471473176, "grad_norm": 3.8435049057006836, "learning_rate": 6.500454158387737e-05, "loss": 0.001754285953938961, "step": 123340 }, { "epoch": 35.012773204655126, "grad_norm": 0.42079415917396545, "learning_rate": 6.500170309395403e-05, "loss": 0.0010932808741927146, "step": 123350 }, { "epoch": 35.01561169457848, "grad_norm": 0.09296359866857529, "learning_rate": 6.499886460403067e-05, "loss": 0.0004558200016617775, "step": 123360 }, { "epoch": 35.018450184501845, "grad_norm": 0.023139141499996185, "learning_rate": 6.49960261141073e-05, "loss": 0.0005970321595668793, "step": 123370 }, { "epoch": 35.02128867442521, "grad_norm": 0.28540363907814026, "learning_rate": 6.499318762418394e-05, "loss": 0.0007187850773334503, "step": 123380 }, { "epoch": 35.02412716434856, "grad_norm": 0.29550930857658386, "learning_rate": 6.499034913426058e-05, "loss": 0.0007444368675351143, "step": 123390 }, { "epoch": 35.026965654271926, "grad_norm": 14.752996444702148, "learning_rate": 6.498751064433722e-05, "loss": 0.010375263541936875, "step": 123400 }, { "epoch": 35.02980414419529, "grad_norm": 0.01710566319525242, "learning_rate": 6.498467215441385e-05, "loss": 0.0012709982693195343, "step": 123410 }, { "epoch": 35.03264263411865, "grad_norm": 0.013226098380982876, "learning_rate": 6.49818336644905e-05, "loss": 0.04055868983268738, "step": 123420 }, { "epoch": 35.03548112404201, "grad_norm": 0.009719572961330414, "learning_rate": 6.497899517456714e-05, "loss": 0.0006836378946900368, "step": 123430 }, { "epoch": 35.03831961396537, "grad_norm": 8.580182075500488, "learning_rate": 6.497615668464377e-05, "loss": 0.003999722748994827, "step": 123440 }, { "epoch": 35.04115810388873, "grad_norm": 1.2014278173446655, "learning_rate": 6.497331819472041e-05, "loss": 0.011301187425851822, "step": 123450 }, { "epoch": 35.04399659381209, "grad_norm": 6.077699661254883, "learning_rate": 6.497047970479705e-05, "loss": 0.008063750714063645, "step": 123460 }, { "epoch": 35.04683508373545, "grad_norm": 0.03590520843863487, "learning_rate": 6.496764121487368e-05, "loss": 0.004330066964030266, "step": 123470 }, { "epoch": 35.049673573658815, "grad_norm": 0.08007744699716568, "learning_rate": 6.496480272495032e-05, "loss": 0.0025504743680357933, "step": 123480 }, { "epoch": 35.05251206358217, "grad_norm": 0.62007737159729, "learning_rate": 6.496196423502698e-05, "loss": 0.004441060870885849, "step": 123490 }, { "epoch": 35.055350553505534, "grad_norm": 0.2899480164051056, "learning_rate": 6.49591257451036e-05, "loss": 0.0015163199976086616, "step": 123500 }, { "epoch": 35.055350553505534, "eval_accuracy": 0.9790169771730146, "eval_loss": 0.08398086577653885, "eval_runtime": 38.2593, "eval_samples_per_second": 411.063, "eval_steps_per_second": 6.43, "step": 123500 }, { "epoch": 35.0581890434289, "grad_norm": 0.14958742260932922, "learning_rate": 6.495628725518025e-05, "loss": 0.002635873854160309, "step": 123510 }, { "epoch": 35.06102753335226, "grad_norm": 11.139522552490234, "learning_rate": 6.495344876525689e-05, "loss": 0.011503055691719055, "step": 123520 }, { "epoch": 35.063866023275615, "grad_norm": 0.0314050018787384, "learning_rate": 6.495061027533353e-05, "loss": 0.001224864087998867, "step": 123530 }, { "epoch": 35.06670451319898, "grad_norm": 0.41283831000328064, "learning_rate": 6.494777178541016e-05, "loss": 0.0044818691909313205, "step": 123540 }, { "epoch": 35.06954300312234, "grad_norm": 13.768477439880371, "learning_rate": 6.494493329548681e-05, "loss": 0.0127620130777359, "step": 123550 }, { "epoch": 35.0723814930457, "grad_norm": 6.903811931610107, "learning_rate": 6.494209480556344e-05, "loss": 0.0041892193257808685, "step": 123560 }, { "epoch": 35.07521998296906, "grad_norm": 0.1377629190683365, "learning_rate": 6.493954016463242e-05, "loss": 0.013849377632141113, "step": 123570 }, { "epoch": 35.07805847289242, "grad_norm": 0.022815650328993797, "learning_rate": 6.493670167470906e-05, "loss": 0.0024609871208667753, "step": 123580 }, { "epoch": 35.080896962815785, "grad_norm": 0.650122880935669, "learning_rate": 6.493386318478569e-05, "loss": 0.023203952610492705, "step": 123590 }, { "epoch": 35.08373545273914, "grad_norm": 0.15257388353347778, "learning_rate": 6.493102469486234e-05, "loss": 0.0009814295917749405, "step": 123600 }, { "epoch": 35.086573942662504, "grad_norm": 7.189149856567383, "learning_rate": 6.492818620493897e-05, "loss": 0.016376031935214995, "step": 123610 }, { "epoch": 35.08941243258587, "grad_norm": 0.01726926676928997, "learning_rate": 6.492534771501561e-05, "loss": 0.005174913629889488, "step": 123620 }, { "epoch": 35.09225092250922, "grad_norm": 0.48672306537628174, "learning_rate": 6.492250922509226e-05, "loss": 0.005384146422147751, "step": 123630 }, { "epoch": 35.095089412432586, "grad_norm": 0.22220493853092194, "learning_rate": 6.49196707351689e-05, "loss": 0.007276126742362976, "step": 123640 }, { "epoch": 35.09792790235595, "grad_norm": 0.05245715752243996, "learning_rate": 6.491683224524552e-05, "loss": 0.00022797323763370513, "step": 123650 }, { "epoch": 35.100766392279304, "grad_norm": 0.12831860780715942, "learning_rate": 6.491399375532217e-05, "loss": 0.0012242598459124565, "step": 123660 }, { "epoch": 35.10360488220267, "grad_norm": 0.29633215069770813, "learning_rate": 6.491115526539882e-05, "loss": 0.000760449655354023, "step": 123670 }, { "epoch": 35.10644337212603, "grad_norm": 1.9251534938812256, "learning_rate": 6.490831677547545e-05, "loss": 0.003622104227542877, "step": 123680 }, { "epoch": 35.10928186204939, "grad_norm": 0.070241279900074, "learning_rate": 6.490547828555209e-05, "loss": 0.01020214930176735, "step": 123690 }, { "epoch": 35.11212035197275, "grad_norm": 0.7982892990112305, "learning_rate": 6.490263979562873e-05, "loss": 0.015201959013938903, "step": 123700 }, { "epoch": 35.11495884189611, "grad_norm": 0.009328081272542477, "learning_rate": 6.489980130570536e-05, "loss": 0.011221584677696229, "step": 123710 }, { "epoch": 35.117797331819474, "grad_norm": 0.339357852935791, "learning_rate": 6.4896962815782e-05, "loss": 0.001848510280251503, "step": 123720 }, { "epoch": 35.12063582174283, "grad_norm": 2.109921932220459, "learning_rate": 6.489412432585866e-05, "loss": 0.02281137853860855, "step": 123730 }, { "epoch": 35.12347431166619, "grad_norm": 0.004188202787190676, "learning_rate": 6.489128583593528e-05, "loss": 0.0006410703063011169, "step": 123740 }, { "epoch": 35.126312801589556, "grad_norm": 0.04830348864197731, "learning_rate": 6.488844734601192e-05, "loss": 0.005313502624630928, "step": 123750 }, { "epoch": 35.12915129151291, "grad_norm": 0.02076367288827896, "learning_rate": 6.488560885608857e-05, "loss": 0.006242383271455765, "step": 123760 }, { "epoch": 35.131989781436275, "grad_norm": 0.13393062353134155, "learning_rate": 6.488277036616521e-05, "loss": 0.0009056819602847099, "step": 123770 }, { "epoch": 35.13482827135964, "grad_norm": 0.05692385509610176, "learning_rate": 6.487993187624184e-05, "loss": 0.011040911078453064, "step": 123780 }, { "epoch": 35.137666761283, "grad_norm": 0.6177388429641724, "learning_rate": 6.487709338631848e-05, "loss": 0.0020878192037343977, "step": 123790 }, { "epoch": 35.140505251206356, "grad_norm": 0.06693873554468155, "learning_rate": 6.487425489639513e-05, "loss": 0.0028885383158922195, "step": 123800 }, { "epoch": 35.14334374112972, "grad_norm": 0.16910810768604279, "learning_rate": 6.487141640647176e-05, "loss": 0.005925703793764114, "step": 123810 }, { "epoch": 35.14618223105308, "grad_norm": 1.5228075981140137, "learning_rate": 6.48685779165484e-05, "loss": 0.005644802376627922, "step": 123820 }, { "epoch": 35.14902072097644, "grad_norm": 0.49124622344970703, "learning_rate": 6.486573942662504e-05, "loss": 0.006875814497470855, "step": 123830 }, { "epoch": 35.1518592108998, "grad_norm": 0.015348139218986034, "learning_rate": 6.486290093670167e-05, "loss": 0.008440633118152619, "step": 123840 }, { "epoch": 35.154697700823164, "grad_norm": 0.3425465226173401, "learning_rate": 6.486006244677831e-05, "loss": 0.004538555815815926, "step": 123850 }, { "epoch": 35.15753619074652, "grad_norm": 3.7228200435638428, "learning_rate": 6.485722395685495e-05, "loss": 0.008059795200824737, "step": 123860 }, { "epoch": 35.16037468066988, "grad_norm": 0.04328148439526558, "learning_rate": 6.48543854669316e-05, "loss": 0.0014240425080060959, "step": 123870 }, { "epoch": 35.163213170593245, "grad_norm": 0.007446335628628731, "learning_rate": 6.485154697700824e-05, "loss": 0.0033112399280071257, "step": 123880 }, { "epoch": 35.16605166051661, "grad_norm": 0.024084554985165596, "learning_rate": 6.484870848708488e-05, "loss": 0.0011993620544672012, "step": 123890 }, { "epoch": 35.168890150439964, "grad_norm": 0.6375259757041931, "learning_rate": 6.484586999716152e-05, "loss": 0.004378324002027511, "step": 123900 }, { "epoch": 35.17172864036333, "grad_norm": 0.09935866296291351, "learning_rate": 6.484303150723815e-05, "loss": 0.0010412482544779778, "step": 123910 }, { "epoch": 35.17456713028669, "grad_norm": 0.06936363875865936, "learning_rate": 6.484019301731479e-05, "loss": 0.0014139214530587197, "step": 123920 }, { "epoch": 35.177405620210045, "grad_norm": 0.34663790464401245, "learning_rate": 6.483735452739144e-05, "loss": 0.004851949587464333, "step": 123930 }, { "epoch": 35.18024411013341, "grad_norm": 1.1992849111557007, "learning_rate": 6.483451603746807e-05, "loss": 0.0030792169272899628, "step": 123940 }, { "epoch": 35.18308260005677, "grad_norm": 0.6974629759788513, "learning_rate": 6.483167754754471e-05, "loss": 0.000932309590280056, "step": 123950 }, { "epoch": 35.185921089980134, "grad_norm": 1.9948182106018066, "learning_rate": 6.482883905762135e-05, "loss": 0.024185468256473542, "step": 123960 }, { "epoch": 35.18875957990349, "grad_norm": 2.224121332168579, "learning_rate": 6.482600056769798e-05, "loss": 0.004386747628450394, "step": 123970 }, { "epoch": 35.19159806982685, "grad_norm": 0.04608380049467087, "learning_rate": 6.482316207777462e-05, "loss": 0.002337481640279293, "step": 123980 }, { "epoch": 35.194436559750216, "grad_norm": 0.01172657124698162, "learning_rate": 6.482032358785126e-05, "loss": 0.004720882326364517, "step": 123990 }, { "epoch": 35.19727504967357, "grad_norm": 7.966491222381592, "learning_rate": 6.48174850979279e-05, "loss": 0.005904601514339447, "step": 124000 }, { "epoch": 35.19727504967357, "eval_accuracy": 0.9767279201373434, "eval_loss": 0.09493730962276459, "eval_runtime": 33.3429, "eval_samples_per_second": 471.675, "eval_steps_per_second": 7.378, "step": 124000 }, { "epoch": 35.200113539596934, "grad_norm": 0.5382258296012878, "learning_rate": 6.481464660800455e-05, "loss": 0.029444620013237, "step": 124010 }, { "epoch": 35.2029520295203, "grad_norm": 0.6053229570388794, "learning_rate": 6.481180811808119e-05, "loss": 0.0038594935089349748, "step": 124020 }, { "epoch": 35.20579051944365, "grad_norm": 0.11745072156190872, "learning_rate": 6.480896962815783e-05, "loss": 0.0015139099210500716, "step": 124030 }, { "epoch": 35.208629009367016, "grad_norm": 0.012818113900721073, "learning_rate": 6.480613113823446e-05, "loss": 0.0012288911268115044, "step": 124040 }, { "epoch": 35.21146749929038, "grad_norm": 0.008590176701545715, "learning_rate": 6.48032926483111e-05, "loss": 0.0008869459852576256, "step": 124050 }, { "epoch": 35.21430598921374, "grad_norm": 4.432307243347168, "learning_rate": 6.480045415838774e-05, "loss": 0.003976985439658165, "step": 124060 }, { "epoch": 35.2171444791371, "grad_norm": 0.5116140842437744, "learning_rate": 6.479761566846438e-05, "loss": 0.0022502655163407327, "step": 124070 }, { "epoch": 35.21998296906046, "grad_norm": 3.2094454765319824, "learning_rate": 6.479477717854102e-05, "loss": 0.0066271178424358364, "step": 124080 }, { "epoch": 35.22282145898382, "grad_norm": 1.250599980354309, "learning_rate": 6.479193868861766e-05, "loss": 0.003456437960267067, "step": 124090 }, { "epoch": 35.22565994890718, "grad_norm": 0.09412109851837158, "learning_rate": 6.478910019869429e-05, "loss": 0.005153971910476685, "step": 124100 }, { "epoch": 35.22849843883054, "grad_norm": 0.028143858537077904, "learning_rate": 6.478626170877093e-05, "loss": 0.002704550698399544, "step": 124110 }, { "epoch": 35.231336928753905, "grad_norm": 0.0760527029633522, "learning_rate": 6.478342321884757e-05, "loss": 0.004979163408279419, "step": 124120 }, { "epoch": 35.23417541867726, "grad_norm": 0.26068612933158875, "learning_rate": 6.478058472892422e-05, "loss": 0.0058493237942457196, "step": 124130 }, { "epoch": 35.23701390860062, "grad_norm": 0.2730172276496887, "learning_rate": 6.477774623900086e-05, "loss": 0.00713442787528038, "step": 124140 }, { "epoch": 35.239852398523986, "grad_norm": 0.3479820787906647, "learning_rate": 6.47749077490775e-05, "loss": 0.007586299628019333, "step": 124150 }, { "epoch": 35.24269088844735, "grad_norm": 0.25075456500053406, "learning_rate": 6.477206925915414e-05, "loss": 0.009383343905210496, "step": 124160 }, { "epoch": 35.245529378370705, "grad_norm": 2.2304179668426514, "learning_rate": 6.476923076923077e-05, "loss": 0.012132878601551055, "step": 124170 }, { "epoch": 35.24836786829407, "grad_norm": 3.1994800567626953, "learning_rate": 6.476639227930741e-05, "loss": 0.020666366815567015, "step": 124180 }, { "epoch": 35.25120635821743, "grad_norm": 8.556618690490723, "learning_rate": 6.476355378938405e-05, "loss": 0.010692907869815827, "step": 124190 }, { "epoch": 35.254044848140786, "grad_norm": 0.1160670667886734, "learning_rate": 6.476071529946069e-05, "loss": 0.013519051671028137, "step": 124200 }, { "epoch": 35.25688333806415, "grad_norm": 7.519136905670166, "learning_rate": 6.475787680953733e-05, "loss": 0.010418834537267685, "step": 124210 }, { "epoch": 35.25972182798751, "grad_norm": 0.2545138895511627, "learning_rate": 6.475503831961397e-05, "loss": 0.009040071070194245, "step": 124220 }, { "epoch": 35.26256031791087, "grad_norm": 0.9234897494316101, "learning_rate": 6.47521998296906e-05, "loss": 0.0006136469542980194, "step": 124230 }, { "epoch": 35.26539880783423, "grad_norm": 0.04678085446357727, "learning_rate": 6.474936133976724e-05, "loss": 0.002965590730309486, "step": 124240 }, { "epoch": 35.268237297757594, "grad_norm": 0.6596891283988953, "learning_rate": 6.474652284984388e-05, "loss": 0.0009629972279071808, "step": 124250 }, { "epoch": 35.27107578768096, "grad_norm": 3.6739883422851562, "learning_rate": 6.474368435992053e-05, "loss": 0.007140924036502838, "step": 124260 }, { "epoch": 35.27391427760431, "grad_norm": 0.01605798304080963, "learning_rate": 6.474084586999717e-05, "loss": 0.005642064660787582, "step": 124270 }, { "epoch": 35.276752767527675, "grad_norm": 0.03146585822105408, "learning_rate": 6.473800738007381e-05, "loss": 0.0013707360252738, "step": 124280 }, { "epoch": 35.27959125745104, "grad_norm": 1.233693242073059, "learning_rate": 6.473516889015045e-05, "loss": 0.0009679760783910752, "step": 124290 }, { "epoch": 35.282429747374394, "grad_norm": 0.02957056649029255, "learning_rate": 6.473233040022708e-05, "loss": 0.00043645836412906645, "step": 124300 }, { "epoch": 35.28526823729776, "grad_norm": 0.2324937880039215, "learning_rate": 6.472949191030372e-05, "loss": 0.01030074954032898, "step": 124310 }, { "epoch": 35.28810672722112, "grad_norm": 0.010628834366798401, "learning_rate": 6.472665342038036e-05, "loss": 0.02133994847536087, "step": 124320 }, { "epoch": 35.29094521714448, "grad_norm": 1.508739709854126, "learning_rate": 6.4723814930457e-05, "loss": 0.023463344573974608, "step": 124330 }, { "epoch": 35.29378370706784, "grad_norm": 0.16910964250564575, "learning_rate": 6.472097644053364e-05, "loss": 0.0029903683811426164, "step": 124340 }, { "epoch": 35.2966221969912, "grad_norm": 7.550039768218994, "learning_rate": 6.471813795061028e-05, "loss": 0.0244795560836792, "step": 124350 }, { "epoch": 35.299460686914564, "grad_norm": 0.6013655662536621, "learning_rate": 6.471529946068691e-05, "loss": 0.00819055512547493, "step": 124360 }, { "epoch": 35.30229917683792, "grad_norm": 0.24071133136749268, "learning_rate": 6.471246097076355e-05, "loss": 0.004899998754262924, "step": 124370 }, { "epoch": 35.30513766676128, "grad_norm": 2.398905038833618, "learning_rate": 6.47096224808402e-05, "loss": 0.00930977687239647, "step": 124380 }, { "epoch": 35.307976156684646, "grad_norm": 0.027568424120545387, "learning_rate": 6.470678399091684e-05, "loss": 0.002627124823629856, "step": 124390 }, { "epoch": 35.310814646608, "grad_norm": 10.352180480957031, "learning_rate": 6.470394550099348e-05, "loss": 0.032127848267555235, "step": 124400 }, { "epoch": 35.313653136531364, "grad_norm": 0.09132587909698486, "learning_rate": 6.470110701107012e-05, "loss": 0.01727146953344345, "step": 124410 }, { "epoch": 35.31649162645473, "grad_norm": 0.6462915539741516, "learning_rate": 6.469826852114676e-05, "loss": 0.0018666421994566918, "step": 124420 }, { "epoch": 35.31933011637809, "grad_norm": 0.0174026470631361, "learning_rate": 6.469543003122339e-05, "loss": 0.0021919967606663705, "step": 124430 }, { "epoch": 35.322168606301446, "grad_norm": 2.06538987159729, "learning_rate": 6.469259154130003e-05, "loss": 0.005044910311698914, "step": 124440 }, { "epoch": 35.32500709622481, "grad_norm": 0.21077683568000793, "learning_rate": 6.468975305137667e-05, "loss": 0.0009031282737851143, "step": 124450 }, { "epoch": 35.32784558614817, "grad_norm": 0.08587754517793655, "learning_rate": 6.46869145614533e-05, "loss": 0.0018555898219347, "step": 124460 }, { "epoch": 35.33068407607153, "grad_norm": 0.4089818596839905, "learning_rate": 6.468407607152995e-05, "loss": 0.0030036384239792826, "step": 124470 }, { "epoch": 35.33352256599489, "grad_norm": 10.95134162902832, "learning_rate": 6.46812375816066e-05, "loss": 0.004241514950990677, "step": 124480 }, { "epoch": 35.33636105591825, "grad_norm": 0.25443941354751587, "learning_rate": 6.467839909168322e-05, "loss": 0.006456106901168823, "step": 124490 }, { "epoch": 35.33919954584161, "grad_norm": 0.17487551271915436, "learning_rate": 6.467556060175986e-05, "loss": 0.0007046220824122428, "step": 124500 }, { "epoch": 35.33919954584161, "eval_accuracy": 0.9791441470083296, "eval_loss": 0.07836875319480896, "eval_runtime": 33.6879, "eval_samples_per_second": 466.844, "eval_steps_per_second": 7.302, "step": 124500 }, { "epoch": 35.34203803576497, "grad_norm": 0.01482563279569149, "learning_rate": 6.46727221118365e-05, "loss": 0.0029656274244189264, "step": 124510 }, { "epoch": 35.344876525688335, "grad_norm": 0.03944365680217743, "learning_rate": 6.466988362191315e-05, "loss": 0.0006907861679792404, "step": 124520 }, { "epoch": 35.3477150156117, "grad_norm": 0.14088264107704163, "learning_rate": 6.466704513198979e-05, "loss": 0.002962801791727543, "step": 124530 }, { "epoch": 35.35055350553505, "grad_norm": 0.012436851859092712, "learning_rate": 6.466420664206643e-05, "loss": 0.0010235317051410675, "step": 124540 }, { "epoch": 35.353391995458416, "grad_norm": 0.36923062801361084, "learning_rate": 6.466136815214306e-05, "loss": 0.0206649050116539, "step": 124550 }, { "epoch": 35.35623048538178, "grad_norm": 0.05332881212234497, "learning_rate": 6.46585296622197e-05, "loss": 0.021599264442920686, "step": 124560 }, { "epoch": 35.359068975305135, "grad_norm": 0.03424537554383278, "learning_rate": 6.465569117229634e-05, "loss": 0.001566331833600998, "step": 124570 }, { "epoch": 35.3619074652285, "grad_norm": 0.019553272053599358, "learning_rate": 6.465285268237298e-05, "loss": 0.004275157302618027, "step": 124580 }, { "epoch": 35.36474595515186, "grad_norm": 0.09959264099597931, "learning_rate": 6.465001419244961e-05, "loss": 0.002325216308236122, "step": 124590 }, { "epoch": 35.36758444507522, "grad_norm": 0.368420273065567, "learning_rate": 6.464717570252627e-05, "loss": 0.0007521886378526688, "step": 124600 }, { "epoch": 35.37042293499858, "grad_norm": 0.242654487490654, "learning_rate": 6.46443372126029e-05, "loss": 0.0015836114063858986, "step": 124610 }, { "epoch": 35.37326142492194, "grad_norm": 0.01213917788118124, "learning_rate": 6.464149872267953e-05, "loss": 0.003270465135574341, "step": 124620 }, { "epoch": 35.376099914845305, "grad_norm": 0.44703391194343567, "learning_rate": 6.463866023275618e-05, "loss": 0.0006649676710367203, "step": 124630 }, { "epoch": 35.37893840476866, "grad_norm": 0.025034936144948006, "learning_rate": 6.463582174283282e-05, "loss": 0.004452743381261825, "step": 124640 }, { "epoch": 35.381776894692024, "grad_norm": 0.1391424834728241, "learning_rate": 6.463298325290944e-05, "loss": 0.0015278710052371025, "step": 124650 }, { "epoch": 35.38461538461539, "grad_norm": 2.6031577587127686, "learning_rate": 6.463014476298609e-05, "loss": 0.0038967899978160857, "step": 124660 }, { "epoch": 35.38745387453874, "grad_norm": 2.5494842529296875, "learning_rate": 6.462730627306274e-05, "loss": 0.005812613666057587, "step": 124670 }, { "epoch": 35.390292364462105, "grad_norm": 0.224797323346138, "learning_rate": 6.462446778313937e-05, "loss": 0.004894940555095673, "step": 124680 }, { "epoch": 35.39313085438547, "grad_norm": 8.064824104309082, "learning_rate": 6.462162929321601e-05, "loss": 0.003720247745513916, "step": 124690 }, { "epoch": 35.395969344308824, "grad_norm": 4.688438415527344, "learning_rate": 6.461879080329265e-05, "loss": 0.018343672156333923, "step": 124700 }, { "epoch": 35.39880783423219, "grad_norm": 2.1348609924316406, "learning_rate": 6.461595231336929e-05, "loss": 0.009096228331327439, "step": 124710 }, { "epoch": 35.40164632415555, "grad_norm": 0.19060809910297394, "learning_rate": 6.461311382344592e-05, "loss": 0.00965447872877121, "step": 124720 }, { "epoch": 35.40448481407891, "grad_norm": 0.35582417249679565, "learning_rate": 6.461027533352258e-05, "loss": 0.0006343049928545952, "step": 124730 }, { "epoch": 35.40732330400227, "grad_norm": 1.188257098197937, "learning_rate": 6.460743684359922e-05, "loss": 0.001591971144080162, "step": 124740 }, { "epoch": 35.41016179392563, "grad_norm": 8.136590957641602, "learning_rate": 6.460459835367585e-05, "loss": 0.005231785029172898, "step": 124750 }, { "epoch": 35.413000283848994, "grad_norm": 1.7803564071655273, "learning_rate": 6.460175986375249e-05, "loss": 0.002509509213268757, "step": 124760 }, { "epoch": 35.41583877377235, "grad_norm": 1.029913306236267, "learning_rate": 6.459892137382913e-05, "loss": 0.004324093833565712, "step": 124770 }, { "epoch": 35.41867726369571, "grad_norm": 11.195435523986816, "learning_rate": 6.459608288390576e-05, "loss": 0.004136242717504501, "step": 124780 }, { "epoch": 35.421515753619076, "grad_norm": 0.037287402898073196, "learning_rate": 6.45932443939824e-05, "loss": 0.012977346777915955, "step": 124790 }, { "epoch": 35.42435424354244, "grad_norm": 0.760664701461792, "learning_rate": 6.459040590405905e-05, "loss": 0.00368540957570076, "step": 124800 }, { "epoch": 35.427192733465795, "grad_norm": 0.3611898720264435, "learning_rate": 6.458756741413568e-05, "loss": 0.00047428589314222334, "step": 124810 }, { "epoch": 35.43003122338916, "grad_norm": 1.2219957113265991, "learning_rate": 6.458472892421232e-05, "loss": 0.003266918286681175, "step": 124820 }, { "epoch": 35.43286971331252, "grad_norm": 0.0079959100112319, "learning_rate": 6.458189043428896e-05, "loss": 0.002091800235211849, "step": 124830 }, { "epoch": 35.435708203235876, "grad_norm": 0.2601609230041504, "learning_rate": 6.45790519443656e-05, "loss": 0.0032780393958091736, "step": 124840 }, { "epoch": 35.43854669315924, "grad_norm": 4.35189151763916, "learning_rate": 6.457621345444223e-05, "loss": 0.0020841026678681375, "step": 124850 }, { "epoch": 35.4413851830826, "grad_norm": 5.1205525398254395, "learning_rate": 6.457337496451889e-05, "loss": 0.005609207600355148, "step": 124860 }, { "epoch": 35.44422367300596, "grad_norm": 0.08320329338312149, "learning_rate": 6.457053647459553e-05, "loss": 0.00202340092509985, "step": 124870 }, { "epoch": 35.44706216292932, "grad_norm": 12.296496391296387, "learning_rate": 6.456769798467216e-05, "loss": 0.010545090585947037, "step": 124880 }, { "epoch": 35.44990065285268, "grad_norm": 2.0327696800231934, "learning_rate": 6.45648594947488e-05, "loss": 0.002438398636877537, "step": 124890 }, { "epoch": 35.452739142776046, "grad_norm": 0.8804358839988708, "learning_rate": 6.456202100482544e-05, "loss": 0.004004732891917229, "step": 124900 }, { "epoch": 35.4555776326994, "grad_norm": 12.139473915100098, "learning_rate": 6.455918251490207e-05, "loss": 0.008508222550153733, "step": 124910 }, { "epoch": 35.458416122622765, "grad_norm": 0.13746222853660583, "learning_rate": 6.455634402497871e-05, "loss": 0.0004667576402425766, "step": 124920 }, { "epoch": 35.46125461254613, "grad_norm": 0.3221026659011841, "learning_rate": 6.455350553505536e-05, "loss": 0.0047178402543067936, "step": 124930 }, { "epoch": 35.464093102469484, "grad_norm": 0.0892428606748581, "learning_rate": 6.455066704513199e-05, "loss": 0.005122344195842743, "step": 124940 }, { "epoch": 35.46693159239285, "grad_norm": 0.5592729449272156, "learning_rate": 6.454782855520863e-05, "loss": 0.01064927652478218, "step": 124950 }, { "epoch": 35.46977008231621, "grad_norm": 0.09213431924581528, "learning_rate": 6.454499006528527e-05, "loss": 0.0052684381604194645, "step": 124960 }, { "epoch": 35.472608572239565, "grad_norm": 17.18667221069336, "learning_rate": 6.454215157536191e-05, "loss": 0.01169949844479561, "step": 124970 }, { "epoch": 35.47544706216293, "grad_norm": 0.30528536438941956, "learning_rate": 6.453931308543854e-05, "loss": 0.0006896050646901131, "step": 124980 }, { "epoch": 35.47828555208629, "grad_norm": 5.922438144683838, "learning_rate": 6.453647459551518e-05, "loss": 0.0038772013038396837, "step": 124990 }, { "epoch": 35.481124042009654, "grad_norm": 11.341552734375, "learning_rate": 6.453363610559184e-05, "loss": 0.007522968947887421, "step": 125000 }, { "epoch": 35.481124042009654, "eval_accuracy": 0.9801615056908501, "eval_loss": 0.07548976689577103, "eval_runtime": 37.1391, "eval_samples_per_second": 423.462, "eval_steps_per_second": 6.624, "step": 125000 }, { "epoch": 35.48396253193301, "grad_norm": 0.1546505242586136, "learning_rate": 6.453079761566847e-05, "loss": 0.003577769547700882, "step": 125010 }, { "epoch": 35.48680102185637, "grad_norm": 5.485890865325928, "learning_rate": 6.452795912574511e-05, "loss": 0.0058425508439540865, "step": 125020 }, { "epoch": 35.489639511779735, "grad_norm": 0.19853073358535767, "learning_rate": 6.452512063582175e-05, "loss": 0.0021618677303195, "step": 125030 }, { "epoch": 35.49247800170309, "grad_norm": 0.04161115735769272, "learning_rate": 6.452228214589838e-05, "loss": 0.0019450429826974869, "step": 125040 }, { "epoch": 35.495316491626454, "grad_norm": 0.1721782237291336, "learning_rate": 6.451944365597502e-05, "loss": 0.0027763418853282928, "step": 125050 }, { "epoch": 35.49815498154982, "grad_norm": 0.05827612802386284, "learning_rate": 6.451660516605167e-05, "loss": 0.0028866950422525408, "step": 125060 }, { "epoch": 35.50099347147318, "grad_norm": 0.08994437754154205, "learning_rate": 6.45137666761283e-05, "loss": 0.0005375655367970466, "step": 125070 }, { "epoch": 35.503831961396536, "grad_norm": 0.13831044733524323, "learning_rate": 6.451092818620494e-05, "loss": 0.0007477277889847755, "step": 125080 }, { "epoch": 35.5066704513199, "grad_norm": 1.9346476793289185, "learning_rate": 6.450808969628158e-05, "loss": 0.014847670495510102, "step": 125090 }, { "epoch": 35.50950894124326, "grad_norm": 0.08758946508169174, "learning_rate": 6.450525120635823e-05, "loss": 0.00262789037078619, "step": 125100 }, { "epoch": 35.51234743116662, "grad_norm": 0.09864947944879532, "learning_rate": 6.450241271643485e-05, "loss": 0.004297556728124619, "step": 125110 }, { "epoch": 35.51518592108998, "grad_norm": 0.1401648074388504, "learning_rate": 6.44995742265115e-05, "loss": 0.005194512754678726, "step": 125120 }, { "epoch": 35.51802441101334, "grad_norm": 4.637607097625732, "learning_rate": 6.449673573658815e-05, "loss": 0.014008691906929016, "step": 125130 }, { "epoch": 35.5208629009367, "grad_norm": 0.10561330616474152, "learning_rate": 6.449389724666478e-05, "loss": 0.001397874765098095, "step": 125140 }, { "epoch": 35.52370139086006, "grad_norm": 0.9672861695289612, "learning_rate": 6.449105875674142e-05, "loss": 0.0028969617560505867, "step": 125150 }, { "epoch": 35.526539880783425, "grad_norm": 2.2642765045166016, "learning_rate": 6.448822026681806e-05, "loss": 0.0007223721593618393, "step": 125160 }, { "epoch": 35.52937837070679, "grad_norm": 12.761719703674316, "learning_rate": 6.448538177689469e-05, "loss": 0.008127892762422562, "step": 125170 }, { "epoch": 35.53221686063014, "grad_norm": 3.1665775775909424, "learning_rate": 6.448254328697133e-05, "loss": 0.007235568761825561, "step": 125180 }, { "epoch": 35.535055350553506, "grad_norm": 0.19820532202720642, "learning_rate": 6.447970479704797e-05, "loss": 0.003495055437088013, "step": 125190 }, { "epoch": 35.53789384047687, "grad_norm": 0.9472000002861023, "learning_rate": 6.447686630712461e-05, "loss": 0.003105410560965538, "step": 125200 }, { "epoch": 35.540732330400225, "grad_norm": 9.241530418395996, "learning_rate": 6.447402781720125e-05, "loss": 0.007741802930831909, "step": 125210 }, { "epoch": 35.54357082032359, "grad_norm": 0.2086513340473175, "learning_rate": 6.44711893272779e-05, "loss": 0.003484625369310379, "step": 125220 }, { "epoch": 35.54640931024695, "grad_norm": 1.2896981239318848, "learning_rate": 6.446835083735454e-05, "loss": 0.003943552076816559, "step": 125230 }, { "epoch": 35.549247800170306, "grad_norm": 5.362439155578613, "learning_rate": 6.446551234743116e-05, "loss": 0.0021337078884243964, "step": 125240 }, { "epoch": 35.55208629009367, "grad_norm": 0.06840218603610992, "learning_rate": 6.44626738575078e-05, "loss": 0.0023602819070219993, "step": 125250 }, { "epoch": 35.55492478001703, "grad_norm": 0.0075789401307702065, "learning_rate": 6.445983536758445e-05, "loss": 0.005373644828796387, "step": 125260 }, { "epoch": 35.557763269940395, "grad_norm": 1.348443627357483, "learning_rate": 6.445699687766109e-05, "loss": 0.01099305972456932, "step": 125270 }, { "epoch": 35.56060175986375, "grad_norm": 4.289938926696777, "learning_rate": 6.445415838773773e-05, "loss": 0.00820753425359726, "step": 125280 }, { "epoch": 35.563440249787114, "grad_norm": 7.131179332733154, "learning_rate": 6.445131989781437e-05, "loss": 0.013246053457260131, "step": 125290 }, { "epoch": 35.56627873971048, "grad_norm": 0.48435017466545105, "learning_rate": 6.4448481407891e-05, "loss": 0.020599089562892914, "step": 125300 }, { "epoch": 35.56911722963383, "grad_norm": 0.1065853089094162, "learning_rate": 6.444564291796764e-05, "loss": 0.003444811701774597, "step": 125310 }, { "epoch": 35.571955719557195, "grad_norm": 0.10572010278701782, "learning_rate": 6.444280442804428e-05, "loss": 0.003503207489848137, "step": 125320 }, { "epoch": 35.57479420948056, "grad_norm": 0.31061437726020813, "learning_rate": 6.443996593812092e-05, "loss": 0.00283317007124424, "step": 125330 }, { "epoch": 35.577632699403914, "grad_norm": 7.612624168395996, "learning_rate": 6.443712744819756e-05, "loss": 0.005484159663319587, "step": 125340 }, { "epoch": 35.58047118932728, "grad_norm": 0.13799726963043213, "learning_rate": 6.44342889582742e-05, "loss": 0.002902453579008579, "step": 125350 }, { "epoch": 35.58330967925064, "grad_norm": 0.05516832694411278, "learning_rate": 6.443145046835085e-05, "loss": 0.009533137083053589, "step": 125360 }, { "epoch": 35.586148169174, "grad_norm": 2.0618348121643066, "learning_rate": 6.442861197842747e-05, "loss": 0.0040850814431905745, "step": 125370 }, { "epoch": 35.58898665909736, "grad_norm": 1.514143705368042, "learning_rate": 6.442577348850412e-05, "loss": 0.009955834597349167, "step": 125380 }, { "epoch": 35.59182514902072, "grad_norm": 0.24537764489650726, "learning_rate": 6.442293499858076e-05, "loss": 0.009663808345794677, "step": 125390 }, { "epoch": 35.594663638944084, "grad_norm": 0.09945521503686905, "learning_rate": 6.44200965086574e-05, "loss": 0.0023943202570080755, "step": 125400 }, { "epoch": 35.59750212886744, "grad_norm": 0.7495584487915039, "learning_rate": 6.441725801873404e-05, "loss": 0.0010842407122254373, "step": 125410 }, { "epoch": 35.6003406187908, "grad_norm": 0.928406834602356, "learning_rate": 6.441441952881068e-05, "loss": 0.0037149228155612946, "step": 125420 }, { "epoch": 35.603179108714166, "grad_norm": 0.3239251673221588, "learning_rate": 6.441158103888731e-05, "loss": 0.0066689826548099514, "step": 125430 }, { "epoch": 35.60601759863752, "grad_norm": 0.16033035516738892, "learning_rate": 6.440874254896395e-05, "loss": 0.00915396586060524, "step": 125440 }, { "epoch": 35.608856088560884, "grad_norm": Infinity, "learning_rate": 6.440590405904059e-05, "loss": 0.029920083284378052, "step": 125450 }, { "epoch": 35.61169457848425, "grad_norm": 0.14550283551216125, "learning_rate": 6.440334941810957e-05, "loss": 0.0009986910969018935, "step": 125460 }, { "epoch": 35.61453306840761, "grad_norm": 3.104323387145996, "learning_rate": 6.440051092818621e-05, "loss": 0.004250047355890274, "step": 125470 }, { "epoch": 35.617371558330966, "grad_norm": 0.07243557274341583, "learning_rate": 6.439767243826284e-05, "loss": 0.006592791527509689, "step": 125480 }, { "epoch": 35.62021004825433, "grad_norm": 0.010062283836305141, "learning_rate": 6.439483394833948e-05, "loss": 0.003779260441660881, "step": 125490 }, { "epoch": 35.62304853817769, "grad_norm": 8.123311996459961, "learning_rate": 6.439199545841612e-05, "loss": 0.0063145443797111515, "step": 125500 }, { "epoch": 35.62304853817769, "eval_accuracy": 0.9788898073376995, "eval_loss": 0.08037439733743668, "eval_runtime": 36.6855, "eval_samples_per_second": 428.698, "eval_steps_per_second": 6.706, "step": 125500 }, { "epoch": 35.62588702810105, "grad_norm": 0.02908613719046116, "learning_rate": 6.438915696849277e-05, "loss": 0.004178018122911453, "step": 125510 }, { "epoch": 35.62872551802441, "grad_norm": 0.5323150753974915, "learning_rate": 6.438631847856941e-05, "loss": 0.012352840602397918, "step": 125520 }, { "epoch": 35.63156400794777, "grad_norm": 0.054894544184207916, "learning_rate": 6.438347998864605e-05, "loss": 0.0016318585723638535, "step": 125530 }, { "epoch": 35.634402497871136, "grad_norm": 0.4623955488204956, "learning_rate": 6.438064149872268e-05, "loss": 0.0033130340278148653, "step": 125540 }, { "epoch": 35.63724098779449, "grad_norm": 0.22197331488132477, "learning_rate": 6.437780300879932e-05, "loss": 0.0030490878969430925, "step": 125550 }, { "epoch": 35.640079477717855, "grad_norm": 2.0847456455230713, "learning_rate": 6.437496451887596e-05, "loss": 0.0008412908762693405, "step": 125560 }, { "epoch": 35.64291796764122, "grad_norm": 0.19993826746940613, "learning_rate": 6.43721260289526e-05, "loss": 0.0011321842670440675, "step": 125570 }, { "epoch": 35.64575645756457, "grad_norm": 0.05693400278687477, "learning_rate": 6.436928753902924e-05, "loss": 0.008497275412082672, "step": 125580 }, { "epoch": 35.648594947487936, "grad_norm": 0.039090223610401154, "learning_rate": 6.436644904910588e-05, "loss": 0.009034518897533417, "step": 125590 }, { "epoch": 35.6514334374113, "grad_norm": 0.12480933219194412, "learning_rate": 6.436361055918252e-05, "loss": 0.007761605083942413, "step": 125600 }, { "epoch": 35.654271927334655, "grad_norm": 0.45686590671539307, "learning_rate": 6.436077206925915e-05, "loss": 0.00810842663049698, "step": 125610 }, { "epoch": 35.65711041725802, "grad_norm": 0.9164472818374634, "learning_rate": 6.43579335793358e-05, "loss": 0.004058118164539337, "step": 125620 }, { "epoch": 35.65994890718138, "grad_norm": 8.489727973937988, "learning_rate": 6.435509508941243e-05, "loss": 0.003299133852124214, "step": 125630 }, { "epoch": 35.66278739710474, "grad_norm": 0.6031113266944885, "learning_rate": 6.435225659948908e-05, "loss": 0.0009000243619084359, "step": 125640 }, { "epoch": 35.6656258870281, "grad_norm": 0.18828704953193665, "learning_rate": 6.434941810956572e-05, "loss": 0.002638954855501652, "step": 125650 }, { "epoch": 35.66846437695146, "grad_norm": 0.4188239574432373, "learning_rate": 6.434657961964236e-05, "loss": 0.0011456305161118507, "step": 125660 }, { "epoch": 35.671302866874825, "grad_norm": 9.469788551330566, "learning_rate": 6.434374112971899e-05, "loss": 0.004500041529536247, "step": 125670 }, { "epoch": 35.67414135679818, "grad_norm": 0.006330656353384256, "learning_rate": 6.434090263979563e-05, "loss": 0.003460119292140007, "step": 125680 }, { "epoch": 35.676979846721544, "grad_norm": 0.030840201303362846, "learning_rate": 6.433806414987227e-05, "loss": 0.0009338021278381348, "step": 125690 }, { "epoch": 35.67981833664491, "grad_norm": 0.05676133558154106, "learning_rate": 6.433522565994891e-05, "loss": 0.014948925375938416, "step": 125700 }, { "epoch": 35.68265682656826, "grad_norm": 2.3752028942108154, "learning_rate": 6.433238717002555e-05, "loss": 0.007323306053876877, "step": 125710 }, { "epoch": 35.685495316491625, "grad_norm": 0.0837702676653862, "learning_rate": 6.43295486801022e-05, "loss": 0.003096137382090092, "step": 125720 }, { "epoch": 35.68833380641499, "grad_norm": 1.3037681579589844, "learning_rate": 6.432671019017884e-05, "loss": 0.0022177580744028092, "step": 125730 }, { "epoch": 35.69117229633835, "grad_norm": 0.01357006561011076, "learning_rate": 6.432387170025546e-05, "loss": 0.0017501767724752426, "step": 125740 }, { "epoch": 35.69401078626171, "grad_norm": 0.12490779161453247, "learning_rate": 6.43210332103321e-05, "loss": 0.005509719997644424, "step": 125750 }, { "epoch": 35.69684927618507, "grad_norm": 9.208721160888672, "learning_rate": 6.431819472040875e-05, "loss": 0.003952761739492416, "step": 125760 }, { "epoch": 35.69968776610843, "grad_norm": 0.26799482107162476, "learning_rate": 6.431535623048537e-05, "loss": 0.0030146770179271696, "step": 125770 }, { "epoch": 35.70252625603179, "grad_norm": 0.010251465253531933, "learning_rate": 6.431251774056203e-05, "loss": 0.004486842080950737, "step": 125780 }, { "epoch": 35.70536474595515, "grad_norm": 4.28292989730835, "learning_rate": 6.430967925063867e-05, "loss": 0.005466730147600174, "step": 125790 }, { "epoch": 35.708203235878514, "grad_norm": 2.8842878341674805, "learning_rate": 6.43068407607153e-05, "loss": 0.0009996920824050903, "step": 125800 }, { "epoch": 35.71104172580187, "grad_norm": 19.278776168823242, "learning_rate": 6.430400227079194e-05, "loss": 0.013706119358539581, "step": 125810 }, { "epoch": 35.71388021572523, "grad_norm": 7.576572895050049, "learning_rate": 6.430116378086858e-05, "loss": 0.01652529537677765, "step": 125820 }, { "epoch": 35.716718705648596, "grad_norm": 0.18748129904270172, "learning_rate": 6.429832529094522e-05, "loss": 0.0128255695104599, "step": 125830 }, { "epoch": 35.71955719557196, "grad_norm": 0.11030813306570053, "learning_rate": 6.429548680102186e-05, "loss": 0.0015662644058465959, "step": 125840 }, { "epoch": 35.722395685495314, "grad_norm": 1.0282937288284302, "learning_rate": 6.42926483110985e-05, "loss": 0.0020008202642202376, "step": 125850 }, { "epoch": 35.72523417541868, "grad_norm": 10.008965492248535, "learning_rate": 6.428980982117515e-05, "loss": 0.003995196148753166, "step": 125860 }, { "epoch": 35.72807266534204, "grad_norm": 0.08083171397447586, "learning_rate": 6.428697133125177e-05, "loss": 0.0011394811794161797, "step": 125870 }, { "epoch": 35.730911155265396, "grad_norm": 2.058654308319092, "learning_rate": 6.428413284132842e-05, "loss": 0.0007203789427876472, "step": 125880 }, { "epoch": 35.73374964518876, "grad_norm": 0.009362260811030865, "learning_rate": 6.428129435140506e-05, "loss": 0.0013596866279840468, "step": 125890 }, { "epoch": 35.73658813511212, "grad_norm": 0.14614805579185486, "learning_rate": 6.427845586148168e-05, "loss": 0.002778252400457859, "step": 125900 }, { "epoch": 35.739426625035485, "grad_norm": 0.29549190402030945, "learning_rate": 6.427561737155834e-05, "loss": 0.002221069484949112, "step": 125910 }, { "epoch": 35.74226511495884, "grad_norm": 0.006155951879918575, "learning_rate": 6.427277888163498e-05, "loss": 0.0007173389196395874, "step": 125920 }, { "epoch": 35.7451036048822, "grad_norm": 0.008569830097258091, "learning_rate": 6.426994039171161e-05, "loss": 0.0017225177958607673, "step": 125930 }, { "epoch": 35.747942094805566, "grad_norm": 0.3479291498661041, "learning_rate": 6.426710190178825e-05, "loss": 0.0034058019518852234, "step": 125940 }, { "epoch": 35.75078058472892, "grad_norm": 0.11283716559410095, "learning_rate": 6.426426341186489e-05, "loss": 0.002229613997042179, "step": 125950 }, { "epoch": 35.753619074652285, "grad_norm": 0.6770791411399841, "learning_rate": 6.426142492194153e-05, "loss": 0.008133172988891602, "step": 125960 }, { "epoch": 35.75645756457565, "grad_norm": 0.23278291523456573, "learning_rate": 6.425858643201816e-05, "loss": 0.001558043621480465, "step": 125970 }, { "epoch": 35.759296054499, "grad_norm": 0.2419378012418747, "learning_rate": 6.425574794209482e-05, "loss": 0.010837803781032562, "step": 125980 }, { "epoch": 35.762134544422366, "grad_norm": 0.030212467536330223, "learning_rate": 6.425290945217146e-05, "loss": 0.008951932191848755, "step": 125990 }, { "epoch": 35.76497303434573, "grad_norm": 0.13098318874835968, "learning_rate": 6.425007096224808e-05, "loss": 0.0027817396447062494, "step": 126000 }, { "epoch": 35.76497303434573, "eval_accuracy": 0.979779996184905, "eval_loss": 0.07499147206544876, "eval_runtime": 34.9183, "eval_samples_per_second": 450.394, "eval_steps_per_second": 7.045, "step": 126000 }, { "epoch": 35.76781152426909, "grad_norm": 0.1974615603685379, "learning_rate": 6.424723247232473e-05, "loss": 0.004388582706451416, "step": 126010 }, { "epoch": 35.77065001419245, "grad_norm": 0.19195204973220825, "learning_rate": 6.424439398240137e-05, "loss": 0.0012548938393592834, "step": 126020 }, { "epoch": 35.77348850411581, "grad_norm": 0.04455172270536423, "learning_rate": 6.4241555492478e-05, "loss": 0.001027856208384037, "step": 126030 }, { "epoch": 35.776326994039174, "grad_norm": 0.06004544347524643, "learning_rate": 6.423871700255465e-05, "loss": 0.0043566271662712095, "step": 126040 }, { "epoch": 35.77916548396253, "grad_norm": 0.37165939807891846, "learning_rate": 6.423587851263129e-05, "loss": 0.004809189587831497, "step": 126050 }, { "epoch": 35.78200397388589, "grad_norm": 0.2554490864276886, "learning_rate": 6.423304002270792e-05, "loss": 0.0038470469415187837, "step": 126060 }, { "epoch": 35.784842463809255, "grad_norm": 8.376189231872559, "learning_rate": 6.423020153278456e-05, "loss": 0.022783182561397552, "step": 126070 }, { "epoch": 35.78768095373261, "grad_norm": 0.08394208550453186, "learning_rate": 6.42273630428612e-05, "loss": 0.007924771308898926, "step": 126080 }, { "epoch": 35.790519443655974, "grad_norm": 0.11531038582324982, "learning_rate": 6.422452455293784e-05, "loss": 0.00912972018122673, "step": 126090 }, { "epoch": 35.79335793357934, "grad_norm": 0.07834915071725845, "learning_rate": 6.422168606301447e-05, "loss": 0.0037667885422706603, "step": 126100 }, { "epoch": 35.7961964235027, "grad_norm": 0.024952877312898636, "learning_rate": 6.421884757309113e-05, "loss": 0.023769474029541014, "step": 126110 }, { "epoch": 35.799034913426055, "grad_norm": 0.17108899354934692, "learning_rate": 6.421600908316777e-05, "loss": 0.005484998971223831, "step": 126120 }, { "epoch": 35.80187340334942, "grad_norm": 12.99482536315918, "learning_rate": 6.42131705932444e-05, "loss": 0.010708672553300857, "step": 126130 }, { "epoch": 35.80471189327278, "grad_norm": 0.014017485082149506, "learning_rate": 6.421033210332104e-05, "loss": 0.014888206124305725, "step": 126140 }, { "epoch": 35.80755038319614, "grad_norm": 5.986766338348389, "learning_rate": 6.420749361339768e-05, "loss": 0.006130870431661606, "step": 126150 }, { "epoch": 35.8103888731195, "grad_norm": 0.11356528848409653, "learning_rate": 6.42046551234743e-05, "loss": 0.0007083244621753692, "step": 126160 }, { "epoch": 35.81322736304286, "grad_norm": 0.07231836020946503, "learning_rate": 6.420181663355095e-05, "loss": 0.001573168858885765, "step": 126170 }, { "epoch": 35.81606585296622, "grad_norm": 5.330711841583252, "learning_rate": 6.41989781436276e-05, "loss": 0.011733056604862213, "step": 126180 }, { "epoch": 35.81890434288958, "grad_norm": 0.24282442033290863, "learning_rate": 6.419613965370423e-05, "loss": 0.0006642764434218407, "step": 126190 }, { "epoch": 35.821742832812944, "grad_norm": 0.10239093005657196, "learning_rate": 6.419330116378087e-05, "loss": 0.009602467715740203, "step": 126200 }, { "epoch": 35.82458132273631, "grad_norm": 0.03134394437074661, "learning_rate": 6.419046267385751e-05, "loss": 0.003907680511474609, "step": 126210 }, { "epoch": 35.82741981265966, "grad_norm": 4.663364887237549, "learning_rate": 6.418762418393415e-05, "loss": 0.005031538754701614, "step": 126220 }, { "epoch": 35.830258302583026, "grad_norm": 0.10226548463106155, "learning_rate": 6.418478569401078e-05, "loss": 0.028547585010528564, "step": 126230 }, { "epoch": 35.83309679250639, "grad_norm": 0.46286511421203613, "learning_rate": 6.418194720408744e-05, "loss": 0.007617927342653275, "step": 126240 }, { "epoch": 35.835935282429745, "grad_norm": 0.44268763065338135, "learning_rate": 6.417910871416406e-05, "loss": 0.0033056817948818205, "step": 126250 }, { "epoch": 35.83877377235311, "grad_norm": 0.9528306722640991, "learning_rate": 6.41762702242407e-05, "loss": 0.000663500651717186, "step": 126260 }, { "epoch": 35.84161226227647, "grad_norm": 6.738482475280762, "learning_rate": 6.417343173431735e-05, "loss": 0.006849660724401474, "step": 126270 }, { "epoch": 35.84445075219983, "grad_norm": 0.255598247051239, "learning_rate": 6.417059324439399e-05, "loss": 0.0029690539464354514, "step": 126280 }, { "epoch": 35.84728924212319, "grad_norm": 0.38464123010635376, "learning_rate": 6.416775475447062e-05, "loss": 0.00423191674053669, "step": 126290 }, { "epoch": 35.85012773204655, "grad_norm": 0.3596348166465759, "learning_rate": 6.416491626454726e-05, "loss": 0.006248601526021957, "step": 126300 }, { "epoch": 35.852966221969915, "grad_norm": 0.35500892996788025, "learning_rate": 6.416207777462391e-05, "loss": 0.0024967465549707414, "step": 126310 }, { "epoch": 35.85580471189327, "grad_norm": 0.22798015177249908, "learning_rate": 6.415923928470054e-05, "loss": 0.001821071468293667, "step": 126320 }, { "epoch": 35.85864320181663, "grad_norm": 0.9581059217453003, "learning_rate": 6.415640079477718e-05, "loss": 0.003005058504641056, "step": 126330 }, { "epoch": 35.861481691739996, "grad_norm": 1.1322474479675293, "learning_rate": 6.415356230485382e-05, "loss": 0.01052318960428238, "step": 126340 }, { "epoch": 35.86432018166335, "grad_norm": 2.1096580028533936, "learning_rate": 6.415072381493045e-05, "loss": 0.005480524897575378, "step": 126350 }, { "epoch": 35.867158671586715, "grad_norm": 0.42646750807762146, "learning_rate": 6.414788532500709e-05, "loss": 0.003954295814037323, "step": 126360 }, { "epoch": 35.86999716151008, "grad_norm": 0.24624672532081604, "learning_rate": 6.414504683508373e-05, "loss": 0.00348053053021431, "step": 126370 }, { "epoch": 35.87283565143344, "grad_norm": 6.72012996673584, "learning_rate": 6.414220834516038e-05, "loss": 0.004575472325086594, "step": 126380 }, { "epoch": 35.8756741413568, "grad_norm": 1.0294519662857056, "learning_rate": 6.413936985523702e-05, "loss": 0.0022103024646639826, "step": 126390 }, { "epoch": 35.87851263128016, "grad_norm": 1.31969153881073, "learning_rate": 6.413653136531366e-05, "loss": 0.0038317009806632996, "step": 126400 }, { "epoch": 35.88135112120352, "grad_norm": 0.7554044723510742, "learning_rate": 6.41336928753903e-05, "loss": 0.005875607579946518, "step": 126410 }, { "epoch": 35.88418961112688, "grad_norm": 0.03356730565428734, "learning_rate": 6.413085438546693e-05, "loss": 0.00042801648378372193, "step": 126420 }, { "epoch": 35.88702810105024, "grad_norm": 5.565672874450684, "learning_rate": 6.412801589554357e-05, "loss": 0.0032983459532260895, "step": 126430 }, { "epoch": 35.889866590973604, "grad_norm": 0.1636205017566681, "learning_rate": 6.412517740562022e-05, "loss": 0.0009458376094698906, "step": 126440 }, { "epoch": 35.89270508089696, "grad_norm": 0.2895934283733368, "learning_rate": 6.412233891569685e-05, "loss": 0.002242652140557766, "step": 126450 }, { "epoch": 35.89554357082032, "grad_norm": 0.03643418475985527, "learning_rate": 6.411950042577349e-05, "loss": 0.0009454803541302681, "step": 126460 }, { "epoch": 35.898382060743685, "grad_norm": 0.14770086109638214, "learning_rate": 6.411666193585013e-05, "loss": 0.0008733894675970078, "step": 126470 }, { "epoch": 35.90122055066705, "grad_norm": 0.021099943667650223, "learning_rate": 6.411382344592676e-05, "loss": 0.0011212017387151718, "step": 126480 }, { "epoch": 35.904059040590404, "grad_norm": 0.8914493918418884, "learning_rate": 6.41109849560034e-05, "loss": 0.0011905822902917862, "step": 126490 }, { "epoch": 35.90689753051377, "grad_norm": 0.08477215468883514, "learning_rate": 6.410814646608004e-05, "loss": 0.0009529890492558479, "step": 126500 }, { "epoch": 35.90689753051377, "eval_accuracy": 0.9804158453614803, "eval_loss": 0.07777450978755951, "eval_runtime": 33.2275, "eval_samples_per_second": 473.313, "eval_steps_per_second": 7.404, "step": 126500 }, { "epoch": 35.90973602043713, "grad_norm": 0.020101141184568405, "learning_rate": 6.410530797615669e-05, "loss": 0.0003218440338969231, "step": 126510 }, { "epoch": 35.912574510360486, "grad_norm": 2.5312442779541016, "learning_rate": 6.410246948623333e-05, "loss": 0.003913724049925804, "step": 126520 }, { "epoch": 35.91541300028385, "grad_norm": 5.713993549346924, "learning_rate": 6.409963099630997e-05, "loss": 0.01074879765510559, "step": 126530 }, { "epoch": 35.91825149020721, "grad_norm": 9.832294464111328, "learning_rate": 6.409679250638661e-05, "loss": 0.004627268761396408, "step": 126540 }, { "epoch": 35.92108998013057, "grad_norm": 0.5263088941574097, "learning_rate": 6.409395401646324e-05, "loss": 0.007270976901054382, "step": 126550 }, { "epoch": 35.92392847005393, "grad_norm": 0.11096016317605972, "learning_rate": 6.409111552653988e-05, "loss": 0.0008166065439581871, "step": 126560 }, { "epoch": 35.92676695997729, "grad_norm": 0.1212499588727951, "learning_rate": 6.408827703661653e-05, "loss": 0.002044415473937988, "step": 126570 }, { "epoch": 35.929605449900656, "grad_norm": 0.2650720477104187, "learning_rate": 6.408543854669316e-05, "loss": 0.010066443681716919, "step": 126580 }, { "epoch": 35.93244393982401, "grad_norm": 0.3688288927078247, "learning_rate": 6.40826000567698e-05, "loss": 0.011094893515110015, "step": 126590 }, { "epoch": 35.935282429747375, "grad_norm": 0.33001357316970825, "learning_rate": 6.407976156684644e-05, "loss": 0.001900527998805046, "step": 126600 }, { "epoch": 35.93812091967074, "grad_norm": 0.023929398506879807, "learning_rate": 6.407692307692307e-05, "loss": 0.007791773229837417, "step": 126610 }, { "epoch": 35.94095940959409, "grad_norm": 2.1931071281433105, "learning_rate": 6.407408458699971e-05, "loss": 0.005575893819332123, "step": 126620 }, { "epoch": 35.943797899517456, "grad_norm": 1.7376747131347656, "learning_rate": 6.407124609707636e-05, "loss": 0.003783286735415459, "step": 126630 }, { "epoch": 35.94663638944082, "grad_norm": 0.04150514304637909, "learning_rate": 6.4068407607153e-05, "loss": 0.0049328915774822235, "step": 126640 }, { "epoch": 35.949474879364175, "grad_norm": 5.8543853759765625, "learning_rate": 6.406556911722964e-05, "loss": 0.012392321228981018, "step": 126650 }, { "epoch": 35.95231336928754, "grad_norm": 0.04192863777279854, "learning_rate": 6.406273062730628e-05, "loss": 0.010057692229747773, "step": 126660 }, { "epoch": 35.9551518592109, "grad_norm": 1.3077338933944702, "learning_rate": 6.405989213738292e-05, "loss": 0.0041047722101211544, "step": 126670 }, { "epoch": 35.95799034913426, "grad_norm": 0.21521136164665222, "learning_rate": 6.405705364745955e-05, "loss": 0.0034645922482013703, "step": 126680 }, { "epoch": 35.96082883905762, "grad_norm": 2.045409679412842, "learning_rate": 6.405421515753619e-05, "loss": 0.005302001908421517, "step": 126690 }, { "epoch": 35.96366732898098, "grad_norm": 11.878539085388184, "learning_rate": 6.405137666761283e-05, "loss": 0.010253901779651641, "step": 126700 }, { "epoch": 35.966505818904345, "grad_norm": 0.01844116486608982, "learning_rate": 6.404853817768947e-05, "loss": 0.006229083985090256, "step": 126710 }, { "epoch": 35.9693443088277, "grad_norm": 0.6845427751541138, "learning_rate": 6.404569968776611e-05, "loss": 0.005139093473553657, "step": 126720 }, { "epoch": 35.972182798751064, "grad_norm": 15.114039421081543, "learning_rate": 6.404286119784276e-05, "loss": 0.011887136101722717, "step": 126730 }, { "epoch": 35.97502128867443, "grad_norm": 3.1495022773742676, "learning_rate": 6.404002270791938e-05, "loss": 0.00386645644903183, "step": 126740 }, { "epoch": 35.97785977859779, "grad_norm": 2.588853120803833, "learning_rate": 6.403718421799602e-05, "loss": 0.004959242418408394, "step": 126750 }, { "epoch": 35.980698268521145, "grad_norm": 0.9089516997337341, "learning_rate": 6.403434572807267e-05, "loss": 0.0008921625092625618, "step": 126760 }, { "epoch": 35.98353675844451, "grad_norm": 0.604783296585083, "learning_rate": 6.403150723814931e-05, "loss": 0.0044844277203083035, "step": 126770 }, { "epoch": 35.98637524836787, "grad_norm": 0.04588114842772484, "learning_rate": 6.402866874822595e-05, "loss": 0.0005900749936699867, "step": 126780 }, { "epoch": 35.98921373829123, "grad_norm": 0.22676116228103638, "learning_rate": 6.402583025830259e-05, "loss": 0.002245096676051617, "step": 126790 }, { "epoch": 35.99205222821459, "grad_norm": 0.2573414444923401, "learning_rate": 6.402299176837923e-05, "loss": 0.0048059321939945224, "step": 126800 }, { "epoch": 35.99489071813795, "grad_norm": 0.3299231231212616, "learning_rate": 6.402015327845586e-05, "loss": 0.0008507020771503449, "step": 126810 }, { "epoch": 35.99772920806131, "grad_norm": 0.4754306674003601, "learning_rate": 6.40173147885325e-05, "loss": 0.008337423950433732, "step": 126820 }, { "epoch": 36.00056769798467, "grad_norm": 2.9649672508239746, "learning_rate": 6.401447629860914e-05, "loss": 0.013025817275047303, "step": 126830 }, { "epoch": 36.003406187908034, "grad_norm": 0.518805980682373, "learning_rate": 6.401163780868578e-05, "loss": 0.0009451458230614662, "step": 126840 }, { "epoch": 36.0062446778314, "grad_norm": 0.13823214173316956, "learning_rate": 6.400879931876243e-05, "loss": 0.004217361658811569, "step": 126850 }, { "epoch": 36.00908316775475, "grad_norm": 0.652084231376648, "learning_rate": 6.400596082883907e-05, "loss": 0.004219701886177063, "step": 126860 }, { "epoch": 36.011921657678116, "grad_norm": 0.36720722913742065, "learning_rate": 6.40031223389157e-05, "loss": 0.005573670566082001, "step": 126870 }, { "epoch": 36.01476014760148, "grad_norm": 0.11409512162208557, "learning_rate": 6.400028384899234e-05, "loss": 0.0010180821642279624, "step": 126880 }, { "epoch": 36.017598637524834, "grad_norm": 0.15623745322227478, "learning_rate": 6.399744535906898e-05, "loss": 0.0007325485348701477, "step": 126890 }, { "epoch": 36.0204371274482, "grad_norm": 0.1690439134836197, "learning_rate": 6.399460686914562e-05, "loss": 0.0015122126787900925, "step": 126900 }, { "epoch": 36.02327561737156, "grad_norm": 0.019635461270809174, "learning_rate": 6.399176837922226e-05, "loss": 0.0003349404782056808, "step": 126910 }, { "epoch": 36.026114107294916, "grad_norm": 0.9492222666740417, "learning_rate": 6.39889298892989e-05, "loss": 0.0005435949191451072, "step": 126920 }, { "epoch": 36.02895259721828, "grad_norm": 0.697754442691803, "learning_rate": 6.398609139937554e-05, "loss": 0.001267392374575138, "step": 126930 }, { "epoch": 36.03179108714164, "grad_norm": 0.017301077023148537, "learning_rate": 6.398325290945217e-05, "loss": 0.0005528261885046959, "step": 126940 }, { "epoch": 36.034629577065004, "grad_norm": 0.31308114528656006, "learning_rate": 6.398041441952881e-05, "loss": 0.0011047016829252242, "step": 126950 }, { "epoch": 36.03746806698836, "grad_norm": 0.005904946010559797, "learning_rate": 6.397757592960545e-05, "loss": 0.0030305493623018266, "step": 126960 }, { "epoch": 36.04030655691172, "grad_norm": 0.9885268807411194, "learning_rate": 6.39747374396821e-05, "loss": 0.001652628555893898, "step": 126970 }, { "epoch": 36.043145046835086, "grad_norm": 16.705610275268555, "learning_rate": 6.397189894975874e-05, "loss": 0.011378312110900879, "step": 126980 }, { "epoch": 36.04598353675844, "grad_norm": 2.1547346115112305, "learning_rate": 6.396906045983538e-05, "loss": 0.0024523379281163214, "step": 126990 }, { "epoch": 36.048822026681805, "grad_norm": 0.1016552671790123, "learning_rate": 6.3966221969912e-05, "loss": 0.009268662333488465, "step": 127000 }, { "epoch": 36.048822026681805, "eval_accuracy": 0.9738030139250969, "eval_loss": 0.09906970709562302, "eval_runtime": 35.9065, "eval_samples_per_second": 437.999, "eval_steps_per_second": 6.851, "step": 127000 }, { "epoch": 36.05166051660517, "grad_norm": 0.2820765972137451, "learning_rate": 6.396338347998865e-05, "loss": 0.016393522918224334, "step": 127010 }, { "epoch": 36.05449900652852, "grad_norm": 2.7495627403259277, "learning_rate": 6.396054499006529e-05, "loss": 0.01049189493060112, "step": 127020 }, { "epoch": 36.057337496451886, "grad_norm": 0.390097051858902, "learning_rate": 6.395770650014193e-05, "loss": 0.005693427100777626, "step": 127030 }, { "epoch": 36.06017598637525, "grad_norm": 0.05687711387872696, "learning_rate": 6.395486801021857e-05, "loss": 0.001409386843442917, "step": 127040 }, { "epoch": 36.06301447629861, "grad_norm": 0.4701940715312958, "learning_rate": 6.395202952029521e-05, "loss": 0.003020583651959896, "step": 127050 }, { "epoch": 36.06585296622197, "grad_norm": 0.10214724391698837, "learning_rate": 6.394919103037185e-05, "loss": 0.0019799862056970596, "step": 127060 }, { "epoch": 36.06869145614533, "grad_norm": 0.06895028799772263, "learning_rate": 6.394635254044848e-05, "loss": 0.0017231103032827378, "step": 127070 }, { "epoch": 36.071529946068694, "grad_norm": 0.02214064449071884, "learning_rate": 6.394351405052512e-05, "loss": 0.01474883258342743, "step": 127080 }, { "epoch": 36.07436843599205, "grad_norm": 0.20377866923809052, "learning_rate": 6.394067556060176e-05, "loss": 0.0024515453726053236, "step": 127090 }, { "epoch": 36.07720692591541, "grad_norm": 0.139778271317482, "learning_rate": 6.393783707067839e-05, "loss": 0.004090193659067154, "step": 127100 }, { "epoch": 36.080045415838775, "grad_norm": 0.43055957555770874, "learning_rate": 6.393499858075505e-05, "loss": 0.011294840276241303, "step": 127110 }, { "epoch": 36.08288390576214, "grad_norm": 0.6875730752944946, "learning_rate": 6.393216009083169e-05, "loss": 0.0016857178881764412, "step": 127120 }, { "epoch": 36.085722395685494, "grad_norm": 0.06960291415452957, "learning_rate": 6.392932160090832e-05, "loss": 0.012731529772281647, "step": 127130 }, { "epoch": 36.08856088560886, "grad_norm": 0.062136538326740265, "learning_rate": 6.392648311098496e-05, "loss": 0.009442612528800964, "step": 127140 }, { "epoch": 36.09139937553222, "grad_norm": 8.336195945739746, "learning_rate": 6.39236446210616e-05, "loss": 0.0094203919172287, "step": 127150 }, { "epoch": 36.094237865455575, "grad_norm": 2.465573787689209, "learning_rate": 6.392080613113824e-05, "loss": 0.0026004062965512276, "step": 127160 }, { "epoch": 36.09707635537894, "grad_norm": 0.4660519063472748, "learning_rate": 6.391796764121488e-05, "loss": 0.005797655880451202, "step": 127170 }, { "epoch": 36.0999148453023, "grad_norm": 0.2230096012353897, "learning_rate": 6.391512915129152e-05, "loss": 0.012200331687927246, "step": 127180 }, { "epoch": 36.10275333522566, "grad_norm": 4.177309513092041, "learning_rate": 6.391229066136815e-05, "loss": 0.005138414725661278, "step": 127190 }, { "epoch": 36.10559182514902, "grad_norm": 0.20545440912246704, "learning_rate": 6.390945217144479e-05, "loss": 0.004606728255748749, "step": 127200 }, { "epoch": 36.10843031507238, "grad_norm": 8.825156211853027, "learning_rate": 6.390661368152143e-05, "loss": 0.006867963820695877, "step": 127210 }, { "epoch": 36.111268804995746, "grad_norm": 0.09048261493444443, "learning_rate": 6.390377519159807e-05, "loss": 0.0005892287939786911, "step": 127220 }, { "epoch": 36.1141072949191, "grad_norm": 10.132667541503906, "learning_rate": 6.39009367016747e-05, "loss": 0.006582017242908478, "step": 127230 }, { "epoch": 36.116945784842464, "grad_norm": 0.0191486943513155, "learning_rate": 6.389809821175136e-05, "loss": 0.0029057059437036514, "step": 127240 }, { "epoch": 36.11978427476583, "grad_norm": 0.1511474847793579, "learning_rate": 6.3895259721828e-05, "loss": 0.002964484505355358, "step": 127250 }, { "epoch": 36.12262276468918, "grad_norm": 0.9869546294212341, "learning_rate": 6.389242123190463e-05, "loss": 0.008739395439624787, "step": 127260 }, { "epoch": 36.125461254612546, "grad_norm": 0.02229364775121212, "learning_rate": 6.388958274198127e-05, "loss": 0.004914174228906632, "step": 127270 }, { "epoch": 36.12829974453591, "grad_norm": 5.860229015350342, "learning_rate": 6.388674425205791e-05, "loss": 0.002263772115111351, "step": 127280 }, { "epoch": 36.131138234459264, "grad_norm": 0.1046142727136612, "learning_rate": 6.388390576213454e-05, "loss": 0.014858955144882202, "step": 127290 }, { "epoch": 36.13397672438263, "grad_norm": 7.349294185638428, "learning_rate": 6.388106727221118e-05, "loss": 0.0034845493733882903, "step": 127300 }, { "epoch": 36.13681521430599, "grad_norm": 16.137126922607422, "learning_rate": 6.387822878228783e-05, "loss": 0.009681769460439683, "step": 127310 }, { "epoch": 36.13965370422935, "grad_norm": 0.03756166622042656, "learning_rate": 6.387539029236446e-05, "loss": 0.0030086612328886985, "step": 127320 }, { "epoch": 36.14249219415271, "grad_norm": 8.740139961242676, "learning_rate": 6.38725518024411e-05, "loss": 0.004065710306167603, "step": 127330 }, { "epoch": 36.14533068407607, "grad_norm": 1.351664662361145, "learning_rate": 6.386971331251774e-05, "loss": 0.011854997277259827, "step": 127340 }, { "epoch": 36.148169173999435, "grad_norm": 2.494436025619507, "learning_rate": 6.386687482259439e-05, "loss": 0.0024578170850872993, "step": 127350 }, { "epoch": 36.15100766392279, "grad_norm": 9.676220893859863, "learning_rate": 6.386403633267101e-05, "loss": 0.0037799105048179626, "step": 127360 }, { "epoch": 36.15384615384615, "grad_norm": 0.2886539399623871, "learning_rate": 6.386119784274767e-05, "loss": 0.0007285818457603455, "step": 127370 }, { "epoch": 36.156684643769516, "grad_norm": 0.051569465547800064, "learning_rate": 6.385835935282431e-05, "loss": 0.005106816440820694, "step": 127380 }, { "epoch": 36.15952313369287, "grad_norm": 4.232329368591309, "learning_rate": 6.385552086290094e-05, "loss": 0.0016671273857355117, "step": 127390 }, { "epoch": 36.162361623616235, "grad_norm": 0.049145087599754333, "learning_rate": 6.385268237297758e-05, "loss": 0.0007192822173237801, "step": 127400 }, { "epoch": 36.1652001135396, "grad_norm": 0.2503319978713989, "learning_rate": 6.384984388305422e-05, "loss": 0.0009589372202754021, "step": 127410 }, { "epoch": 36.16803860346296, "grad_norm": 0.3948206305503845, "learning_rate": 6.384700539313085e-05, "loss": 0.0016093211248517036, "step": 127420 }, { "epoch": 36.170877093386316, "grad_norm": 0.3234567642211914, "learning_rate": 6.384416690320749e-05, "loss": 0.006043791025876999, "step": 127430 }, { "epoch": 36.17371558330968, "grad_norm": 0.025327149778604507, "learning_rate": 6.384132841328414e-05, "loss": 0.0020017795264720916, "step": 127440 }, { "epoch": 36.17655407323304, "grad_norm": 0.4793083667755127, "learning_rate": 6.383848992336077e-05, "loss": 0.0005039902403950691, "step": 127450 }, { "epoch": 36.1793925631564, "grad_norm": 0.05211372673511505, "learning_rate": 6.383565143343741e-05, "loss": 0.009728705883026123, "step": 127460 }, { "epoch": 36.18223105307976, "grad_norm": 0.02718464285135269, "learning_rate": 6.383281294351405e-05, "loss": 0.0020835869014263152, "step": 127470 }, { "epoch": 36.185069543003124, "grad_norm": 1.0258837938308716, "learning_rate": 6.38299744535907e-05, "loss": 0.0020437249913811684, "step": 127480 }, { "epoch": 36.18790803292649, "grad_norm": 11.836182594299316, "learning_rate": 6.382713596366732e-05, "loss": 0.0026019880548119544, "step": 127490 }, { "epoch": 36.19074652284984, "grad_norm": 0.2912672460079193, "learning_rate": 6.382429747374397e-05, "loss": 0.0005424732342362403, "step": 127500 }, { "epoch": 36.19074652284984, "eval_accuracy": 0.9792077319259872, "eval_loss": 0.07721260190010071, "eval_runtime": 33.0249, "eval_samples_per_second": 476.216, "eval_steps_per_second": 7.449, "step": 127500 }, { "epoch": 36.193585012773205, "grad_norm": 0.03580710291862488, "learning_rate": 6.382145898382062e-05, "loss": 0.002097636088728905, "step": 127510 }, { "epoch": 36.19642350269657, "grad_norm": 0.2307368367910385, "learning_rate": 6.381862049389725e-05, "loss": 0.0020559467375278473, "step": 127520 }, { "epoch": 36.199261992619924, "grad_norm": 0.35809728503227234, "learning_rate": 6.381578200397389e-05, "loss": 0.005946951359510422, "step": 127530 }, { "epoch": 36.20210048254329, "grad_norm": 1.3233470916748047, "learning_rate": 6.381294351405053e-05, "loss": 0.02633269429206848, "step": 127540 }, { "epoch": 36.20493897246665, "grad_norm": 0.029896030202507973, "learning_rate": 6.381010502412716e-05, "loss": 0.007535512745380402, "step": 127550 }, { "epoch": 36.207777462390005, "grad_norm": 3.6118295192718506, "learning_rate": 6.38072665342038e-05, "loss": 0.011308150738477707, "step": 127560 }, { "epoch": 36.21061595231337, "grad_norm": 7.300992965698242, "learning_rate": 6.380442804428046e-05, "loss": 0.018350376188755034, "step": 127570 }, { "epoch": 36.21345444223673, "grad_norm": 7.677474021911621, "learning_rate": 6.380158955435708e-05, "loss": 0.006647807359695434, "step": 127580 }, { "epoch": 36.216292932160094, "grad_norm": 9.251263618469238, "learning_rate": 6.379875106443372e-05, "loss": 0.008930154144763947, "step": 127590 }, { "epoch": 36.21913142208345, "grad_norm": 0.8378251194953918, "learning_rate": 6.379591257451037e-05, "loss": 0.01058925986289978, "step": 127600 }, { "epoch": 36.22196991200681, "grad_norm": 2.4268667697906494, "learning_rate": 6.379307408458701e-05, "loss": 0.01294480413198471, "step": 127610 }, { "epoch": 36.224808401930176, "grad_norm": 5.022036552429199, "learning_rate": 6.379023559466363e-05, "loss": 0.0029441937804222107, "step": 127620 }, { "epoch": 36.22764689185353, "grad_norm": 0.4763413667678833, "learning_rate": 6.378739710474028e-05, "loss": 0.0021481389179825784, "step": 127630 }, { "epoch": 36.230485381776894, "grad_norm": 1.1024677753448486, "learning_rate": 6.378455861481693e-05, "loss": 0.018889884650707244, "step": 127640 }, { "epoch": 36.23332387170026, "grad_norm": 0.6786662936210632, "learning_rate": 6.378172012489356e-05, "loss": 0.0041947998106479645, "step": 127650 }, { "epoch": 36.23616236162361, "grad_norm": 1.6401348114013672, "learning_rate": 6.37788816349702e-05, "loss": 0.0015390027314424516, "step": 127660 }, { "epoch": 36.239000851546976, "grad_norm": 0.01559839490801096, "learning_rate": 6.377604314504684e-05, "loss": 0.0018112853169441224, "step": 127670 }, { "epoch": 36.24183934147034, "grad_norm": 0.4458465278148651, "learning_rate": 6.377320465512347e-05, "loss": 0.004853838682174682, "step": 127680 }, { "epoch": 36.2446778313937, "grad_norm": 2.9447479248046875, "learning_rate": 6.377036616520011e-05, "loss": 0.0033736944198608397, "step": 127690 }, { "epoch": 36.24751632131706, "grad_norm": 0.13243135809898376, "learning_rate": 6.376752767527675e-05, "loss": 0.0007044639438390732, "step": 127700 }, { "epoch": 36.25035481124042, "grad_norm": 0.24969357252120972, "learning_rate": 6.37646891853534e-05, "loss": 0.0009594820439815521, "step": 127710 }, { "epoch": 36.25319330116378, "grad_norm": 0.020666098222136497, "learning_rate": 6.376185069543003e-05, "loss": 0.0032269008457660674, "step": 127720 }, { "epoch": 36.25603179108714, "grad_norm": 3.6767656803131104, "learning_rate": 6.375901220550668e-05, "loss": 0.008910276740789414, "step": 127730 }, { "epoch": 36.2588702810105, "grad_norm": 0.18335220217704773, "learning_rate": 6.375617371558332e-05, "loss": 0.0007700035348534584, "step": 127740 }, { "epoch": 36.261708770933865, "grad_norm": 0.537831723690033, "learning_rate": 6.375333522565995e-05, "loss": 0.006096630543470383, "step": 127750 }, { "epoch": 36.26454726085722, "grad_norm": 1.867469310760498, "learning_rate": 6.375049673573659e-05, "loss": 0.0015664435923099518, "step": 127760 }, { "epoch": 36.26738575078058, "grad_norm": 0.021206095814704895, "learning_rate": 6.374765824581324e-05, "loss": 0.004634418338537216, "step": 127770 }, { "epoch": 36.270224240703946, "grad_norm": 4.765797138214111, "learning_rate": 6.374481975588987e-05, "loss": 0.0025973623618483544, "step": 127780 }, { "epoch": 36.27306273062731, "grad_norm": 0.5014821887016296, "learning_rate": 6.374198126596651e-05, "loss": 0.00393572747707367, "step": 127790 }, { "epoch": 36.275901220550665, "grad_norm": 0.18928174674510956, "learning_rate": 6.373914277604315e-05, "loss": 0.001676810160279274, "step": 127800 }, { "epoch": 36.27873971047403, "grad_norm": 0.004621317610144615, "learning_rate": 6.373630428611978e-05, "loss": 0.01581505686044693, "step": 127810 }, { "epoch": 36.28157820039739, "grad_norm": 0.01397619303315878, "learning_rate": 6.373346579619642e-05, "loss": 0.0017767561599612235, "step": 127820 }, { "epoch": 36.28441669032075, "grad_norm": 0.21677768230438232, "learning_rate": 6.373062730627306e-05, "loss": 0.008725470304489136, "step": 127830 }, { "epoch": 36.28725518024411, "grad_norm": 0.017049316316843033, "learning_rate": 6.37277888163497e-05, "loss": 0.00843718945980072, "step": 127840 }, { "epoch": 36.29009367016747, "grad_norm": 0.05011492595076561, "learning_rate": 6.372495032642635e-05, "loss": 0.0010581284761428833, "step": 127850 }, { "epoch": 36.29293216009083, "grad_norm": 2.2155346870422363, "learning_rate": 6.372211183650299e-05, "loss": 0.006527785211801529, "step": 127860 }, { "epoch": 36.29577065001419, "grad_norm": 0.026516983285546303, "learning_rate": 6.371927334657963e-05, "loss": 0.00593523196876049, "step": 127870 }, { "epoch": 36.298609139937554, "grad_norm": 0.020759522914886475, "learning_rate": 6.371643485665626e-05, "loss": 0.0007839288562536239, "step": 127880 }, { "epoch": 36.30144762986092, "grad_norm": 0.04300856962800026, "learning_rate": 6.37135963667329e-05, "loss": 0.0033973604440689087, "step": 127890 }, { "epoch": 36.30428611978427, "grad_norm": 0.23770634829998016, "learning_rate": 6.371075787680955e-05, "loss": 0.0010123107582330703, "step": 127900 }, { "epoch": 36.307124609707635, "grad_norm": 0.016483308747410774, "learning_rate": 6.370791938688618e-05, "loss": 0.0018373733386397363, "step": 127910 }, { "epoch": 36.309963099631, "grad_norm": 0.3022102117538452, "learning_rate": 6.370508089696282e-05, "loss": 0.006942813843488693, "step": 127920 }, { "epoch": 36.312801589554354, "grad_norm": 1.0582555532455444, "learning_rate": 6.370224240703946e-05, "loss": 0.0010986870154738426, "step": 127930 }, { "epoch": 36.31564007947772, "grad_norm": 6.193361759185791, "learning_rate": 6.369940391711609e-05, "loss": 0.007307472825050354, "step": 127940 }, { "epoch": 36.31847856940108, "grad_norm": 0.2924259603023529, "learning_rate": 6.369656542719273e-05, "loss": 0.003460599109530449, "step": 127950 }, { "epoch": 36.32131705932444, "grad_norm": 4.700315952301025, "learning_rate": 6.369372693726937e-05, "loss": 0.003620915114879608, "step": 127960 }, { "epoch": 36.3241555492478, "grad_norm": 13.353503227233887, "learning_rate": 6.369088844734602e-05, "loss": 0.03636890649795532, "step": 127970 }, { "epoch": 36.32699403917116, "grad_norm": 0.018702995032072067, "learning_rate": 6.368804995742266e-05, "loss": 0.0023743543773889542, "step": 127980 }, { "epoch": 36.329832529094524, "grad_norm": 0.21083615720272064, "learning_rate": 6.36852114674993e-05, "loss": 0.0033444549888372423, "step": 127990 }, { "epoch": 36.33267101901788, "grad_norm": 4.288078784942627, "learning_rate": 6.368237297757594e-05, "loss": 0.00825321152806282, "step": 128000 }, { "epoch": 36.33267101901788, "eval_accuracy": 0.97990716602022, "eval_loss": 0.08138560503721237, "eval_runtime": 33.0706, "eval_samples_per_second": 475.558, "eval_steps_per_second": 7.439, "step": 128000 }, { "epoch": 36.33550950894124, "grad_norm": 0.0740441381931305, "learning_rate": 6.367953448765257e-05, "loss": 0.015925508737564088, "step": 128010 }, { "epoch": 36.338347998864606, "grad_norm": 0.6311216950416565, "learning_rate": 6.367669599772921e-05, "loss": 0.013429877161979676, "step": 128020 }, { "epoch": 36.34118648878796, "grad_norm": 0.051421862095594406, "learning_rate": 6.367385750780585e-05, "loss": 0.0070877641439437864, "step": 128030 }, { "epoch": 36.344024978711325, "grad_norm": 2.205678701400757, "learning_rate": 6.367101901788249e-05, "loss": 0.0022994505241513254, "step": 128040 }, { "epoch": 36.34686346863469, "grad_norm": 0.07418135553598404, "learning_rate": 6.366818052795913e-05, "loss": 0.0014950808137655259, "step": 128050 }, { "epoch": 36.34970195855805, "grad_norm": 0.14250826835632324, "learning_rate": 6.366534203803577e-05, "loss": 0.00496947281062603, "step": 128060 }, { "epoch": 36.352540448481406, "grad_norm": 0.023561576381325722, "learning_rate": 6.36625035481124e-05, "loss": 0.003190728649497032, "step": 128070 }, { "epoch": 36.35537893840477, "grad_norm": 0.20144614577293396, "learning_rate": 6.365966505818904e-05, "loss": 0.0011895393952727317, "step": 128080 }, { "epoch": 36.35821742832813, "grad_norm": 0.6179481744766235, "learning_rate": 6.365682656826568e-05, "loss": 0.0033345699310302736, "step": 128090 }, { "epoch": 36.36105591825149, "grad_norm": 0.3809681534767151, "learning_rate": 6.365398807834233e-05, "loss": 0.0030340585857629777, "step": 128100 }, { "epoch": 36.36389440817485, "grad_norm": 0.011726233176887035, "learning_rate": 6.365114958841897e-05, "loss": 0.0057945244014263155, "step": 128110 }, { "epoch": 36.36673289809821, "grad_norm": 8.852559089660645, "learning_rate": 6.364831109849561e-05, "loss": 0.01153874397277832, "step": 128120 }, { "epoch": 36.36957138802157, "grad_norm": 0.29766154289245605, "learning_rate": 6.364547260857224e-05, "loss": 0.00395512580871582, "step": 128130 }, { "epoch": 36.37240987794493, "grad_norm": 0.6903640031814575, "learning_rate": 6.364263411864888e-05, "loss": 0.014457818865776063, "step": 128140 }, { "epoch": 36.375248367868295, "grad_norm": 2.654893159866333, "learning_rate": 6.363979562872552e-05, "loss": 0.0014224853366613388, "step": 128150 }, { "epoch": 36.37808685779166, "grad_norm": 2.1615538597106934, "learning_rate": 6.363695713880216e-05, "loss": 0.0043528154492378235, "step": 128160 }, { "epoch": 36.380925347715014, "grad_norm": 0.7065211534500122, "learning_rate": 6.36341186488788e-05, "loss": 0.006006817892193794, "step": 128170 }, { "epoch": 36.38376383763838, "grad_norm": 0.08335983753204346, "learning_rate": 6.363128015895544e-05, "loss": 0.0022925751283764837, "step": 128180 }, { "epoch": 36.38660232756174, "grad_norm": 0.7360016107559204, "learning_rate": 6.362844166903208e-05, "loss": 0.002090408466756344, "step": 128190 }, { "epoch": 36.389440817485095, "grad_norm": 10.921599388122559, "learning_rate": 6.362560317910871e-05, "loss": 0.006712330132722854, "step": 128200 }, { "epoch": 36.39227930740846, "grad_norm": 3.544099807739258, "learning_rate": 6.362276468918535e-05, "loss": 0.008136117458343506, "step": 128210 }, { "epoch": 36.39511779733182, "grad_norm": 0.02402784302830696, "learning_rate": 6.3619926199262e-05, "loss": 0.001851295679807663, "step": 128220 }, { "epoch": 36.39795628725518, "grad_norm": 0.019308915361762047, "learning_rate": 6.361708770933862e-05, "loss": 0.005727361142635346, "step": 128230 }, { "epoch": 36.40079477717854, "grad_norm": 0.022248676046729088, "learning_rate": 6.361424921941528e-05, "loss": 0.000758613832294941, "step": 128240 }, { "epoch": 36.4036332671019, "grad_norm": 0.011006774380803108, "learning_rate": 6.361141072949192e-05, "loss": 0.0010382026433944703, "step": 128250 }, { "epoch": 36.406471757025265, "grad_norm": 0.06531082838773727, "learning_rate": 6.360857223956855e-05, "loss": 0.0011497905477881431, "step": 128260 }, { "epoch": 36.40931024694862, "grad_norm": 0.029194088652729988, "learning_rate": 6.360573374964519e-05, "loss": 0.0020659532397985458, "step": 128270 }, { "epoch": 36.412148736871984, "grad_norm": 0.7702168822288513, "learning_rate": 6.360289525972183e-05, "loss": 0.002642243541777134, "step": 128280 }, { "epoch": 36.41498722679535, "grad_norm": 0.018096087500452995, "learning_rate": 6.360005676979847e-05, "loss": 0.003162376955151558, "step": 128290 }, { "epoch": 36.4178257167187, "grad_norm": 0.1240096241235733, "learning_rate": 6.359721827987511e-05, "loss": 0.006188776344060898, "step": 128300 }, { "epoch": 36.420664206642066, "grad_norm": 0.8782771229743958, "learning_rate": 6.359437978995175e-05, "loss": 0.008145396411418915, "step": 128310 }, { "epoch": 36.42350269656543, "grad_norm": 2.9634907245635986, "learning_rate": 6.35915413000284e-05, "loss": 0.004529254510998726, "step": 128320 }, { "epoch": 36.42634118648879, "grad_norm": 1.265704870223999, "learning_rate": 6.358870281010502e-05, "loss": 0.010949037224054336, "step": 128330 }, { "epoch": 36.42917967641215, "grad_norm": 0.07179161161184311, "learning_rate": 6.358586432018166e-05, "loss": 0.003952974826097489, "step": 128340 }, { "epoch": 36.43201816633551, "grad_norm": 0.24810265004634857, "learning_rate": 6.35830258302583e-05, "loss": 0.0012488944455981254, "step": 128350 }, { "epoch": 36.43485665625887, "grad_norm": 0.9431495666503906, "learning_rate": 6.358018734033493e-05, "loss": 0.0016367265954613686, "step": 128360 }, { "epoch": 36.43769514618223, "grad_norm": 5.563615798950195, "learning_rate": 6.357734885041159e-05, "loss": 0.0017283158376812935, "step": 128370 }, { "epoch": 36.44053363610559, "grad_norm": 0.061207786202430725, "learning_rate": 6.357451036048823e-05, "loss": 0.008279362320899963, "step": 128380 }, { "epoch": 36.443372126028954, "grad_norm": 7.209055423736572, "learning_rate": 6.357167187056486e-05, "loss": 0.004300671070814133, "step": 128390 }, { "epoch": 36.44621061595231, "grad_norm": 0.003988747950643301, "learning_rate": 6.35688333806415e-05, "loss": 0.0019764244556427003, "step": 128400 }, { "epoch": 36.44904910587567, "grad_norm": 0.05412541702389717, "learning_rate": 6.356599489071814e-05, "loss": 0.0029214479029178618, "step": 128410 }, { "epoch": 36.451887595799036, "grad_norm": 0.03418227657675743, "learning_rate": 6.356315640079478e-05, "loss": 0.0018004434183239936, "step": 128420 }, { "epoch": 36.4547260857224, "grad_norm": 2.882899284362793, "learning_rate": 6.356031791087141e-05, "loss": 0.0019497206434607507, "step": 128430 }, { "epoch": 36.457564575645755, "grad_norm": 0.10749418288469315, "learning_rate": 6.355747942094806e-05, "loss": 0.008823716640472412, "step": 128440 }, { "epoch": 36.46040306556912, "grad_norm": 0.16396351158618927, "learning_rate": 6.35546409310247e-05, "loss": 0.006528340280056, "step": 128450 }, { "epoch": 36.46324155549248, "grad_norm": 1.842810869216919, "learning_rate": 6.355180244110133e-05, "loss": 0.0016402468085289002, "step": 128460 }, { "epoch": 36.466080045415836, "grad_norm": 0.3928082287311554, "learning_rate": 6.354896395117798e-05, "loss": 0.002145974338054657, "step": 128470 }, { "epoch": 36.4689185353392, "grad_norm": 0.341444730758667, "learning_rate": 6.354612546125462e-05, "loss": 0.011654725670814515, "step": 128480 }, { "epoch": 36.47175702526256, "grad_norm": 0.2058708667755127, "learning_rate": 6.354328697133124e-05, "loss": 0.0009709961712360382, "step": 128490 }, { "epoch": 36.47459551518592, "grad_norm": 4.08779239654541, "learning_rate": 6.35404484814079e-05, "loss": 0.02015383541584015, "step": 128500 }, { "epoch": 36.47459551518592, "eval_accuracy": 0.9804158453614803, "eval_loss": 0.07820949703454971, "eval_runtime": 33.8352, "eval_samples_per_second": 464.811, "eval_steps_per_second": 7.271, "step": 128500 }, { "epoch": 36.47743400510928, "grad_norm": 5.589162349700928, "learning_rate": 6.353760999148454e-05, "loss": 0.0026429006829857826, "step": 128510 }, { "epoch": 36.480272495032644, "grad_norm": 0.30108344554901123, "learning_rate": 6.353477150156117e-05, "loss": 0.0007613258436322212, "step": 128520 }, { "epoch": 36.48311098495601, "grad_norm": 8.021981239318848, "learning_rate": 6.353193301163781e-05, "loss": 0.017763482034206392, "step": 128530 }, { "epoch": 36.48594947487936, "grad_norm": 0.15369409322738647, "learning_rate": 6.352909452171445e-05, "loss": 0.00550374761223793, "step": 128540 }, { "epoch": 36.488787964802725, "grad_norm": 9.195015907287598, "learning_rate": 6.352625603179109e-05, "loss": 0.0029052380472421647, "step": 128550 }, { "epoch": 36.49162645472609, "grad_norm": 0.13452789187431335, "learning_rate": 6.352341754186772e-05, "loss": 0.00905410796403885, "step": 128560 }, { "epoch": 36.494464944649444, "grad_norm": 0.40401455760002136, "learning_rate": 6.352057905194438e-05, "loss": 0.0006917098537087441, "step": 128570 }, { "epoch": 36.49730343457281, "grad_norm": 0.006946524139493704, "learning_rate": 6.351774056202102e-05, "loss": 0.002637997269630432, "step": 128580 }, { "epoch": 36.50014192449617, "grad_norm": 0.020139873027801514, "learning_rate": 6.351490207209764e-05, "loss": 0.0020865218713879584, "step": 128590 }, { "epoch": 36.502980414419525, "grad_norm": 0.03285873681306839, "learning_rate": 6.351206358217429e-05, "loss": 0.011544883251190186, "step": 128600 }, { "epoch": 36.50581890434289, "grad_norm": 6.444767475128174, "learning_rate": 6.350922509225093e-05, "loss": 0.01731460839509964, "step": 128610 }, { "epoch": 36.50865739426625, "grad_norm": 0.16594725847244263, "learning_rate": 6.350638660232756e-05, "loss": 0.014406923949718476, "step": 128620 }, { "epoch": 36.511495884189614, "grad_norm": 0.5143117904663086, "learning_rate": 6.35035481124042e-05, "loss": 0.0015452036634087563, "step": 128630 }, { "epoch": 36.51433437411297, "grad_norm": 0.03939913958311081, "learning_rate": 6.350070962248085e-05, "loss": 0.005951287969946861, "step": 128640 }, { "epoch": 36.51717286403633, "grad_norm": 0.011578623205423355, "learning_rate": 6.349787113255748e-05, "loss": 0.001238206773996353, "step": 128650 }, { "epoch": 36.520011353959696, "grad_norm": 0.13961099088191986, "learning_rate": 6.349503264263412e-05, "loss": 0.0018642691895365715, "step": 128660 }, { "epoch": 36.52284984388305, "grad_norm": 9.90263843536377, "learning_rate": 6.349219415271076e-05, "loss": 0.009278625249862671, "step": 128670 }, { "epoch": 36.525688333806414, "grad_norm": 0.44503191113471985, "learning_rate": 6.34893556627874e-05, "loss": 0.0018455931916832925, "step": 128680 }, { "epoch": 36.52852682372978, "grad_norm": 0.04685082286596298, "learning_rate": 6.348651717286403e-05, "loss": 0.016522473096847533, "step": 128690 }, { "epoch": 36.53136531365314, "grad_norm": 8.86299991607666, "learning_rate": 6.348367868294069e-05, "loss": 0.0039052672684192656, "step": 128700 }, { "epoch": 36.534203803576496, "grad_norm": 0.6099891066551208, "learning_rate": 6.348084019301733e-05, "loss": 0.011082348227500916, "step": 128710 }, { "epoch": 36.53704229349986, "grad_norm": 2.4588377475738525, "learning_rate": 6.347800170309396e-05, "loss": 0.0026957148686051367, "step": 128720 }, { "epoch": 36.53988078342322, "grad_norm": 0.01603422872722149, "learning_rate": 6.34751632131706e-05, "loss": 0.0050222743302583694, "step": 128730 }, { "epoch": 36.54271927334658, "grad_norm": 0.029701031744480133, "learning_rate": 6.347232472324724e-05, "loss": 0.0018855631351470947, "step": 128740 }, { "epoch": 36.54555776326994, "grad_norm": 3.3636531829833984, "learning_rate": 6.346948623332387e-05, "loss": 0.0017828941345214844, "step": 128750 }, { "epoch": 36.5483962531933, "grad_norm": 0.12435000389814377, "learning_rate": 6.346664774340051e-05, "loss": 0.004118817299604416, "step": 128760 }, { "epoch": 36.55123474311666, "grad_norm": 0.012346392497420311, "learning_rate": 6.346380925347716e-05, "loss": 0.00262050274759531, "step": 128770 }, { "epoch": 36.55407323304002, "grad_norm": 1.127586841583252, "learning_rate": 6.346097076355379e-05, "loss": 0.0011174457147717475, "step": 128780 }, { "epoch": 36.556911722963385, "grad_norm": 0.019489482045173645, "learning_rate": 6.345813227363043e-05, "loss": 0.000708208605647087, "step": 128790 }, { "epoch": 36.55975021288675, "grad_norm": 1.864284873008728, "learning_rate": 6.345529378370707e-05, "loss": 0.000744316540658474, "step": 128800 }, { "epoch": 36.5625887028101, "grad_norm": 0.31024107336997986, "learning_rate": 6.345245529378371e-05, "loss": 0.0003105493262410164, "step": 128810 }, { "epoch": 36.565427192733466, "grad_norm": 1.2115134000778198, "learning_rate": 6.344961680386034e-05, "loss": 0.0008138775825500488, "step": 128820 }, { "epoch": 36.56826568265683, "grad_norm": 0.33679020404815674, "learning_rate": 6.344677831393698e-05, "loss": 0.0031870104372501372, "step": 128830 }, { "epoch": 36.571104172580185, "grad_norm": 0.21569180488586426, "learning_rate": 6.344393982401364e-05, "loss": 0.0007367886602878571, "step": 128840 }, { "epoch": 36.57394266250355, "grad_norm": 0.06326588988304138, "learning_rate": 6.344110133409027e-05, "loss": 0.003513256087899208, "step": 128850 }, { "epoch": 36.57678115242691, "grad_norm": 0.9483078718185425, "learning_rate": 6.343826284416691e-05, "loss": 0.0006481915712356568, "step": 128860 }, { "epoch": 36.579619642350266, "grad_norm": 0.060423802584409714, "learning_rate": 6.343542435424355e-05, "loss": 0.0005477694794535637, "step": 128870 }, { "epoch": 36.58245813227363, "grad_norm": 0.018697666004300117, "learning_rate": 6.343258586432018e-05, "loss": 0.003423592448234558, "step": 128880 }, { "epoch": 36.58529662219699, "grad_norm": 0.08778506517410278, "learning_rate": 6.342974737439682e-05, "loss": 0.00043826550245285034, "step": 128890 }, { "epoch": 36.588135112120355, "grad_norm": 0.13765589892864227, "learning_rate": 6.342690888447347e-05, "loss": 0.0018407845869660377, "step": 128900 }, { "epoch": 36.59097360204371, "grad_norm": 0.05568287894129753, "learning_rate": 6.34240703945501e-05, "loss": 0.0015858078375458717, "step": 128910 }, { "epoch": 36.593812091967074, "grad_norm": 0.011881754733622074, "learning_rate": 6.342123190462674e-05, "loss": 0.0033213503658771513, "step": 128920 }, { "epoch": 36.59665058189044, "grad_norm": 6.056994915008545, "learning_rate": 6.341839341470338e-05, "loss": 0.008078022301197052, "step": 128930 }, { "epoch": 36.59948907181379, "grad_norm": 0.0991639718413353, "learning_rate": 6.341555492478003e-05, "loss": 0.00790184959769249, "step": 128940 }, { "epoch": 36.602327561737155, "grad_norm": 0.36321359872817993, "learning_rate": 6.341271643485665e-05, "loss": 0.005641751363873482, "step": 128950 }, { "epoch": 36.60516605166052, "grad_norm": 0.06299863010644913, "learning_rate": 6.34098779449333e-05, "loss": 0.0025318751111626627, "step": 128960 }, { "epoch": 36.608004541583874, "grad_norm": 7.753971099853516, "learning_rate": 6.340703945500994e-05, "loss": 0.02277286648750305, "step": 128970 }, { "epoch": 36.61084303150724, "grad_norm": 13.954591751098633, "learning_rate": 6.340420096508658e-05, "loss": 0.009152688086032867, "step": 128980 }, { "epoch": 36.6136815214306, "grad_norm": 0.3857493996620178, "learning_rate": 6.340136247516322e-05, "loss": 0.0019907459616661074, "step": 128990 }, { "epoch": 36.61652001135396, "grad_norm": 0.0790044292807579, "learning_rate": 6.339852398523986e-05, "loss": 0.002251184917986393, "step": 129000 }, { "epoch": 36.61652001135396, "eval_accuracy": 0.9787626375023845, "eval_loss": 0.08676465600728989, "eval_runtime": 35.7801, "eval_samples_per_second": 439.546, "eval_steps_per_second": 6.875, "step": 129000 }, { "epoch": 36.61935850127732, "grad_norm": 2.0541813373565674, "learning_rate": 6.339568549531649e-05, "loss": 0.003450358286499977, "step": 129010 }, { "epoch": 36.62219699120068, "grad_norm": 10.607779502868652, "learning_rate": 6.339284700539313e-05, "loss": 0.008392401039600372, "step": 129020 }, { "epoch": 36.625035481124044, "grad_norm": 0.4827028810977936, "learning_rate": 6.339000851546977e-05, "loss": 0.01964891850948334, "step": 129030 }, { "epoch": 36.6278739710474, "grad_norm": 0.28665804862976074, "learning_rate": 6.338717002554641e-05, "loss": 0.001608036458492279, "step": 129040 }, { "epoch": 36.63071246097076, "grad_norm": 0.03570004180073738, "learning_rate": 6.338433153562305e-05, "loss": 0.0006259029731154441, "step": 129050 }, { "epoch": 36.633550950894126, "grad_norm": 0.034186676144599915, "learning_rate": 6.33814930456997e-05, "loss": 0.0003450706601142883, "step": 129060 }, { "epoch": 36.63638944081748, "grad_norm": 0.3043781816959381, "learning_rate": 6.337865455577632e-05, "loss": 0.016274470090866088, "step": 129070 }, { "epoch": 36.639227930740844, "grad_norm": 0.21124449372291565, "learning_rate": 6.337581606585296e-05, "loss": 0.006103143095970154, "step": 129080 }, { "epoch": 36.64206642066421, "grad_norm": 0.13228654861450195, "learning_rate": 6.33729775759296e-05, "loss": 0.0006897186860442162, "step": 129090 }, { "epoch": 36.64490491058757, "grad_norm": 0.3044595420360565, "learning_rate": 6.337013908600625e-05, "loss": 0.0011497622355818749, "step": 129100 }, { "epoch": 36.647743400510926, "grad_norm": 0.6836537718772888, "learning_rate": 6.336730059608289e-05, "loss": 0.0008182326331734657, "step": 129110 }, { "epoch": 36.65058189043429, "grad_norm": 5.294212341308594, "learning_rate": 6.336446210615953e-05, "loss": 0.0027227198705077173, "step": 129120 }, { "epoch": 36.65342038035765, "grad_norm": 0.04857298359274864, "learning_rate": 6.336162361623617e-05, "loss": 0.004412461817264557, "step": 129130 }, { "epoch": 36.65625887028101, "grad_norm": 2.5375545024871826, "learning_rate": 6.33587851263128e-05, "loss": 0.003926819562911988, "step": 129140 }, { "epoch": 36.65909736020437, "grad_norm": 0.13671818375587463, "learning_rate": 6.335594663638944e-05, "loss": 0.0073725394904613495, "step": 129150 }, { "epoch": 36.66193585012773, "grad_norm": 0.7730458378791809, "learning_rate": 6.335310814646608e-05, "loss": 0.009233669936656952, "step": 129160 }, { "epoch": 36.664774340051096, "grad_norm": 0.01889481209218502, "learning_rate": 6.335026965654272e-05, "loss": 0.005228839069604874, "step": 129170 }, { "epoch": 36.66761282997445, "grad_norm": 2.99668550491333, "learning_rate": 6.334743116661936e-05, "loss": 0.00709795206785202, "step": 129180 }, { "epoch": 36.670451319897815, "grad_norm": 13.389737129211426, "learning_rate": 6.3344592676696e-05, "loss": 0.027199560403823854, "step": 129190 }, { "epoch": 36.67328980982118, "grad_norm": 0.027286341413855553, "learning_rate": 6.334175418677263e-05, "loss": 0.028968390822410584, "step": 129200 }, { "epoch": 36.67612829974453, "grad_norm": 0.04089828580617905, "learning_rate": 6.333891569684927e-05, "loss": 0.000560896098613739, "step": 129210 }, { "epoch": 36.678966789667896, "grad_norm": 0.2848278284072876, "learning_rate": 6.333607720692592e-05, "loss": 0.015086063742637634, "step": 129220 }, { "epoch": 36.68180527959126, "grad_norm": 0.048004359006881714, "learning_rate": 6.333323871700256e-05, "loss": 0.008826318383216857, "step": 129230 }, { "epoch": 36.684643769514615, "grad_norm": 11.412628173828125, "learning_rate": 6.33304002270792e-05, "loss": 0.00877254456281662, "step": 129240 }, { "epoch": 36.68748225943798, "grad_norm": 0.22681915760040283, "learning_rate": 6.332756173715584e-05, "loss": 0.004846936836838722, "step": 129250 }, { "epoch": 36.69032074936134, "grad_norm": 0.08591228723526001, "learning_rate": 6.332472324723248e-05, "loss": 0.0020131107419729235, "step": 129260 }, { "epoch": 36.693159239284704, "grad_norm": 0.022930998355150223, "learning_rate": 6.332188475730911e-05, "loss": 0.0029015200212597846, "step": 129270 }, { "epoch": 36.69599772920806, "grad_norm": 0.6547598242759705, "learning_rate": 6.331904626738575e-05, "loss": 0.00476367324590683, "step": 129280 }, { "epoch": 36.69883621913142, "grad_norm": 0.5390777587890625, "learning_rate": 6.331620777746239e-05, "loss": 0.005596251413226128, "step": 129290 }, { "epoch": 36.701674709054785, "grad_norm": 0.013815740123391151, "learning_rate": 6.331336928753903e-05, "loss": 0.0013929719105362892, "step": 129300 }, { "epoch": 36.70451319897814, "grad_norm": 0.07918163388967514, "learning_rate": 6.331053079761567e-05, "loss": 0.0008067034184932708, "step": 129310 }, { "epoch": 36.707351688901504, "grad_norm": 0.040782127529382706, "learning_rate": 6.330769230769232e-05, "loss": 0.013977065682411194, "step": 129320 }, { "epoch": 36.71019017882487, "grad_norm": 0.04645824432373047, "learning_rate": 6.330485381776894e-05, "loss": 0.0048590943217277525, "step": 129330 }, { "epoch": 36.71302866874822, "grad_norm": 0.4852018654346466, "learning_rate": 6.330201532784559e-05, "loss": 0.0007517043501138687, "step": 129340 }, { "epoch": 36.715867158671585, "grad_norm": 0.1452348530292511, "learning_rate": 6.329917683792223e-05, "loss": 0.0024736938998103143, "step": 129350 }, { "epoch": 36.71870564859495, "grad_norm": 7.587156295776367, "learning_rate": 6.329633834799887e-05, "loss": 0.004673169553279876, "step": 129360 }, { "epoch": 36.72154413851831, "grad_norm": 0.13316726684570312, "learning_rate": 6.329349985807551e-05, "loss": 0.011074727773666382, "step": 129370 }, { "epoch": 36.72438262844167, "grad_norm": 0.07752780616283417, "learning_rate": 6.329066136815215e-05, "loss": 0.00383182056248188, "step": 129380 }, { "epoch": 36.72722111836503, "grad_norm": 0.03867577388882637, "learning_rate": 6.328782287822879e-05, "loss": 0.00888003185391426, "step": 129390 }, { "epoch": 36.73005960828839, "grad_norm": 0.024995556101202965, "learning_rate": 6.328498438830542e-05, "loss": 0.001491091400384903, "step": 129400 }, { "epoch": 36.73289809821175, "grad_norm": 9.607197761535645, "learning_rate": 6.328214589838206e-05, "loss": 0.004474963992834091, "step": 129410 }, { "epoch": 36.73573658813511, "grad_norm": 9.887733459472656, "learning_rate": 6.32793074084587e-05, "loss": 0.005158013105392456, "step": 129420 }, { "epoch": 36.738575078058474, "grad_norm": 12.164817810058594, "learning_rate": 6.327646891853534e-05, "loss": 0.019280169904232026, "step": 129430 }, { "epoch": 36.74141356798184, "grad_norm": 0.5355894565582275, "learning_rate": 6.327363042861199e-05, "loss": 0.004494190216064453, "step": 129440 }, { "epoch": 36.74425205790519, "grad_norm": 0.062375105917453766, "learning_rate": 6.327079193868863e-05, "loss": 0.0016140773892402648, "step": 129450 }, { "epoch": 36.747090547828556, "grad_norm": 8.29430866241455, "learning_rate": 6.326795344876525e-05, "loss": 0.008461484313011169, "step": 129460 }, { "epoch": 36.74992903775192, "grad_norm": 0.152756929397583, "learning_rate": 6.32651149588419e-05, "loss": 0.0012998009100556374, "step": 129470 }, { "epoch": 36.752767527675275, "grad_norm": 0.5996139645576477, "learning_rate": 6.326227646891854e-05, "loss": 0.005210794508457184, "step": 129480 }, { "epoch": 36.75560601759864, "grad_norm": 13.770894050598145, "learning_rate": 6.325943797899518e-05, "loss": 0.010718227922916412, "step": 129490 }, { "epoch": 36.758444507522, "grad_norm": 0.5974063873291016, "learning_rate": 6.325659948907182e-05, "loss": 0.011442132294178009, "step": 129500 }, { "epoch": 36.758444507522, "eval_accuracy": 0.9791441470083296, "eval_loss": 0.08250121027231216, "eval_runtime": 35.1287, "eval_samples_per_second": 447.697, "eval_steps_per_second": 7.003, "step": 129500 }, { "epoch": 36.761282997445356, "grad_norm": 5.8094305992126465, "learning_rate": 6.325376099914846e-05, "loss": 0.0017808539792895316, "step": 129510 }, { "epoch": 36.76412148736872, "grad_norm": 0.013359332457184792, "learning_rate": 6.32509225092251e-05, "loss": 0.0019900668412446976, "step": 129520 }, { "epoch": 36.76695997729208, "grad_norm": 0.5706868171691895, "learning_rate": 6.324808401930173e-05, "loss": 0.0026134109124541284, "step": 129530 }, { "epoch": 36.769798467215445, "grad_norm": 0.12721939384937286, "learning_rate": 6.324524552937837e-05, "loss": 0.007015623152256012, "step": 129540 }, { "epoch": 36.7726369571388, "grad_norm": 1.6283936500549316, "learning_rate": 6.324240703945501e-05, "loss": 0.006452570855617523, "step": 129550 }, { "epoch": 36.77547544706216, "grad_norm": 2.8502397537231445, "learning_rate": 6.3239852398524e-05, "loss": 0.01969865709543228, "step": 129560 }, { "epoch": 36.778313936985526, "grad_norm": 9.011114120483398, "learning_rate": 6.323701390860063e-05, "loss": 0.006659458577632904, "step": 129570 }, { "epoch": 36.78115242690888, "grad_norm": 0.5757278800010681, "learning_rate": 6.323417541867726e-05, "loss": 0.000776919536292553, "step": 129580 }, { "epoch": 36.783990916832245, "grad_norm": 13.4680814743042, "learning_rate": 6.32313369287539e-05, "loss": 0.006024082005023956, "step": 129590 }, { "epoch": 36.78682940675561, "grad_norm": 0.08804531395435333, "learning_rate": 6.322849843883055e-05, "loss": 0.00346464216709137, "step": 129600 }, { "epoch": 36.789667896678964, "grad_norm": 1.608965516090393, "learning_rate": 6.322565994890719e-05, "loss": 0.02095048874616623, "step": 129610 }, { "epoch": 36.79250638660233, "grad_norm": 0.20679964125156403, "learning_rate": 6.322282145898383e-05, "loss": 0.006688623130321503, "step": 129620 }, { "epoch": 36.79534487652569, "grad_norm": 1.3910844326019287, "learning_rate": 6.321998296906047e-05, "loss": 0.012509118020534515, "step": 129630 }, { "epoch": 36.79818336644905, "grad_norm": 0.040059398859739304, "learning_rate": 6.32171444791371e-05, "loss": 0.007021756470203399, "step": 129640 }, { "epoch": 36.80102185637241, "grad_norm": 0.19977468252182007, "learning_rate": 6.321430598921374e-05, "loss": 0.007352250069379807, "step": 129650 }, { "epoch": 36.80386034629577, "grad_norm": 0.06561482697725296, "learning_rate": 6.321146749929038e-05, "loss": 0.0018477601930499076, "step": 129660 }, { "epoch": 36.806698836219134, "grad_norm": 0.31066200137138367, "learning_rate": 6.320862900936702e-05, "loss": 0.0013190945610404015, "step": 129670 }, { "epoch": 36.80953732614249, "grad_norm": 0.8765757083892822, "learning_rate": 6.320579051944366e-05, "loss": 0.004520407691597938, "step": 129680 }, { "epoch": 36.81237581606585, "grad_norm": 0.0565640851855278, "learning_rate": 6.32029520295203e-05, "loss": 0.002983830124139786, "step": 129690 }, { "epoch": 36.815214305989215, "grad_norm": 3.350592851638794, "learning_rate": 6.320011353959695e-05, "loss": 0.01339464783668518, "step": 129700 }, { "epoch": 36.81805279591257, "grad_norm": 0.6113952994346619, "learning_rate": 6.319727504967357e-05, "loss": 0.01191931962966919, "step": 129710 }, { "epoch": 36.820891285835934, "grad_norm": 0.04059747979044914, "learning_rate": 6.319443655975021e-05, "loss": 0.0008980678394436836, "step": 129720 }, { "epoch": 36.8237297757593, "grad_norm": 0.3991071283817291, "learning_rate": 6.319159806982686e-05, "loss": 0.003927946835756302, "step": 129730 }, { "epoch": 36.82656826568266, "grad_norm": 0.011499575339257717, "learning_rate": 6.318875957990348e-05, "loss": 0.0021372390910983086, "step": 129740 }, { "epoch": 36.829406755606016, "grad_norm": 0.009257896803319454, "learning_rate": 6.318592108998014e-05, "loss": 0.0030471270903944967, "step": 129750 }, { "epoch": 36.83224524552938, "grad_norm": 0.04495961591601372, "learning_rate": 6.318308260005678e-05, "loss": 0.0024018798023462294, "step": 129760 }, { "epoch": 36.83508373545274, "grad_norm": 1.632062315940857, "learning_rate": 6.318024411013341e-05, "loss": 0.01095319390296936, "step": 129770 }, { "epoch": 36.8379222253761, "grad_norm": 9.663044929504395, "learning_rate": 6.317740562021005e-05, "loss": 0.006088586151599884, "step": 129780 }, { "epoch": 36.84076071529946, "grad_norm": 0.0606767013669014, "learning_rate": 6.317456713028669e-05, "loss": 0.008363372087478638, "step": 129790 }, { "epoch": 36.84359920522282, "grad_norm": 0.11402411758899689, "learning_rate": 6.317172864036333e-05, "loss": 0.015436290204524994, "step": 129800 }, { "epoch": 36.84643769514618, "grad_norm": 0.20201240479946136, "learning_rate": 6.316889015043997e-05, "loss": 0.00127561055123806, "step": 129810 }, { "epoch": 36.84927618506954, "grad_norm": 0.018268918618559837, "learning_rate": 6.316605166051662e-05, "loss": 0.005718234181404114, "step": 129820 }, { "epoch": 36.852114674992904, "grad_norm": 1.721305012702942, "learning_rate": 6.316321317059324e-05, "loss": 0.007927251607179641, "step": 129830 }, { "epoch": 36.85495316491627, "grad_norm": 0.08772625774145126, "learning_rate": 6.316037468066988e-05, "loss": 0.0022249015048146246, "step": 129840 }, { "epoch": 36.85779165483962, "grad_norm": 0.04761454835534096, "learning_rate": 6.315753619074653e-05, "loss": 0.0029584193602204324, "step": 129850 }, { "epoch": 36.860630144762986, "grad_norm": 0.04814695194363594, "learning_rate": 6.315469770082317e-05, "loss": 0.0025397367775440217, "step": 129860 }, { "epoch": 36.86346863468635, "grad_norm": 0.038463614881038666, "learning_rate": 6.31518592108998e-05, "loss": 0.0022182030603289603, "step": 129870 }, { "epoch": 36.866307124609705, "grad_norm": 4.698470115661621, "learning_rate": 6.314902072097645e-05, "loss": 0.0025389248505234717, "step": 129880 }, { "epoch": 36.86914561453307, "grad_norm": 4.208362579345703, "learning_rate": 6.314618223105309e-05, "loss": 0.0030696963891386985, "step": 129890 }, { "epoch": 36.87198410445643, "grad_norm": 0.12766622006893158, "learning_rate": 6.314334374112972e-05, "loss": 0.0009986337274312973, "step": 129900 }, { "epoch": 36.87482259437979, "grad_norm": 0.13423876464366913, "learning_rate": 6.314050525120636e-05, "loss": 0.002624015510082245, "step": 129910 }, { "epoch": 36.87766108430315, "grad_norm": 0.3797364830970764, "learning_rate": 6.3137666761283e-05, "loss": 0.012287545204162597, "step": 129920 }, { "epoch": 36.88049957422651, "grad_norm": 6.133577346801758, "learning_rate": 6.313482827135963e-05, "loss": 0.0034635972231626512, "step": 129930 }, { "epoch": 36.883338064149875, "grad_norm": 3.2305848598480225, "learning_rate": 6.313198978143627e-05, "loss": 0.002403916046023369, "step": 129940 }, { "epoch": 36.88617655407323, "grad_norm": 3.1066110134124756, "learning_rate": 6.312915129151293e-05, "loss": 0.007316003739833832, "step": 129950 }, { "epoch": 36.889015043996594, "grad_norm": 0.10718487948179245, "learning_rate": 6.312631280158955e-05, "loss": 0.00277200173586607, "step": 129960 }, { "epoch": 36.89185353391996, "grad_norm": 0.09907248616218567, "learning_rate": 6.31234743116662e-05, "loss": 0.004041555523872376, "step": 129970 }, { "epoch": 36.89469202384331, "grad_norm": 0.020410960540175438, "learning_rate": 6.312063582174284e-05, "loss": 0.003070317581295967, "step": 129980 }, { "epoch": 36.897530513766675, "grad_norm": 0.4777059257030487, "learning_rate": 6.311779733181948e-05, "loss": 0.003304058313369751, "step": 129990 }, { "epoch": 36.90036900369004, "grad_norm": 3.5799248218536377, "learning_rate": 6.31149588418961e-05, "loss": 0.007366971671581268, "step": 130000 }, { "epoch": 36.90036900369004, "eval_accuracy": 0.9769822598079736, "eval_loss": 0.09072548151016235, "eval_runtime": 34.1175, "eval_samples_per_second": 460.965, "eval_steps_per_second": 7.21, "step": 130000 }, { "epoch": 36.9032074936134, "grad_norm": 0.495332807302475, "learning_rate": 6.311212035197276e-05, "loss": 0.0015956435352563858, "step": 130010 }, { "epoch": 36.90604598353676, "grad_norm": 0.07162602990865707, "learning_rate": 6.31092818620494e-05, "loss": 0.007039084285497666, "step": 130020 }, { "epoch": 36.90888447346012, "grad_norm": 0.01974036730825901, "learning_rate": 6.310644337212603e-05, "loss": 0.012395786494016648, "step": 130030 }, { "epoch": 36.91172296338348, "grad_norm": 1.6120952367782593, "learning_rate": 6.310360488220267e-05, "loss": 0.0007413361221551895, "step": 130040 }, { "epoch": 36.91456145330684, "grad_norm": 1.934460163116455, "learning_rate": 6.310076639227931e-05, "loss": 0.0019701750949025153, "step": 130050 }, { "epoch": 36.9173999432302, "grad_norm": 0.07068641483783722, "learning_rate": 6.309792790235594e-05, "loss": 0.0030051197856664656, "step": 130060 }, { "epoch": 36.920238433153564, "grad_norm": 0.40681082010269165, "learning_rate": 6.309508941243258e-05, "loss": 0.001262691617012024, "step": 130070 }, { "epoch": 36.92307692307692, "grad_norm": 0.05574515461921692, "learning_rate": 6.309225092250924e-05, "loss": 0.0009129580110311508, "step": 130080 }, { "epoch": 36.92591541300028, "grad_norm": 0.036723408848047256, "learning_rate": 6.308941243258586e-05, "loss": 0.01094631403684616, "step": 130090 }, { "epoch": 36.928753902923646, "grad_norm": 3.069185256958008, "learning_rate": 6.30865739426625e-05, "loss": 0.0013963276520371436, "step": 130100 }, { "epoch": 36.93159239284701, "grad_norm": 0.07636164128780365, "learning_rate": 6.308373545273915e-05, "loss": 0.005034613609313965, "step": 130110 }, { "epoch": 36.934430882770364, "grad_norm": 0.10870613157749176, "learning_rate": 6.308089696281579e-05, "loss": 0.004268135875463486, "step": 130120 }, { "epoch": 36.93726937269373, "grad_norm": 5.3034892082214355, "learning_rate": 6.307805847289242e-05, "loss": 0.0019975250586867334, "step": 130130 }, { "epoch": 36.94010786261709, "grad_norm": 2.8443896770477295, "learning_rate": 6.307521998296906e-05, "loss": 0.02029956877231598, "step": 130140 }, { "epoch": 36.942946352540446, "grad_norm": 0.07382318377494812, "learning_rate": 6.307238149304571e-05, "loss": 0.0017638113349676132, "step": 130150 }, { "epoch": 36.94578484246381, "grad_norm": 1.6783512830734253, "learning_rate": 6.306954300312234e-05, "loss": 0.0033314324915409087, "step": 130160 }, { "epoch": 36.94862333238717, "grad_norm": 0.6108130812644958, "learning_rate": 6.306670451319898e-05, "loss": 0.005053582787513733, "step": 130170 }, { "epoch": 36.95146182231053, "grad_norm": 0.11258241534233093, "learning_rate": 6.306386602327562e-05, "loss": 0.000786965899169445, "step": 130180 }, { "epoch": 36.95430031223389, "grad_norm": 0.525161623954773, "learning_rate": 6.306102753335225e-05, "loss": 0.0008694630116224289, "step": 130190 }, { "epoch": 36.95713880215725, "grad_norm": 0.24390898644924164, "learning_rate": 6.305818904342889e-05, "loss": 0.007368716597557068, "step": 130200 }, { "epoch": 36.959977292080616, "grad_norm": 0.014644741080701351, "learning_rate": 6.305535055350555e-05, "loss": 0.0032337047159671783, "step": 130210 }, { "epoch": 36.96281578200397, "grad_norm": 0.4670841097831726, "learning_rate": 6.305251206358218e-05, "loss": 0.001866704784333706, "step": 130220 }, { "epoch": 36.965654271927335, "grad_norm": 1.6744729280471802, "learning_rate": 6.304967357365882e-05, "loss": 0.00498042069375515, "step": 130230 }, { "epoch": 36.9684927618507, "grad_norm": 0.607171356678009, "learning_rate": 6.304683508373546e-05, "loss": 0.019280943274497985, "step": 130240 }, { "epoch": 36.97133125177405, "grad_norm": 0.0426727719604969, "learning_rate": 6.30439965938121e-05, "loss": 0.0005332037806510925, "step": 130250 }, { "epoch": 36.974169741697416, "grad_norm": 0.012115971185266972, "learning_rate": 6.304115810388873e-05, "loss": 0.0013802312314510345, "step": 130260 }, { "epoch": 36.97700823162078, "grad_norm": 4.159261703491211, "learning_rate": 6.303831961396537e-05, "loss": 0.0017375662922859193, "step": 130270 }, { "epoch": 36.97984672154414, "grad_norm": 0.5530707836151123, "learning_rate": 6.303548112404202e-05, "loss": 0.001022731140255928, "step": 130280 }, { "epoch": 36.9826852114675, "grad_norm": 1.1282517910003662, "learning_rate": 6.303264263411865e-05, "loss": 0.002628704160451889, "step": 130290 }, { "epoch": 36.98552370139086, "grad_norm": 0.14899055659770966, "learning_rate": 6.302980414419529e-05, "loss": 0.0011402800679206847, "step": 130300 }, { "epoch": 36.98836219131422, "grad_norm": 0.07190070301294327, "learning_rate": 6.302696565427193e-05, "loss": 0.0027583647519350053, "step": 130310 }, { "epoch": 36.99120068123758, "grad_norm": 0.14404425024986267, "learning_rate": 6.302412716434856e-05, "loss": 0.0002224501222372055, "step": 130320 }, { "epoch": 36.99403917116094, "grad_norm": 0.08350404351949692, "learning_rate": 6.30212886744252e-05, "loss": 0.0011549703776836395, "step": 130330 }, { "epoch": 36.996877661084305, "grad_norm": 0.4000566303730011, "learning_rate": 6.301845018450184e-05, "loss": 0.005573854595422745, "step": 130340 }, { "epoch": 36.99971615100766, "grad_norm": 0.5143399238586426, "learning_rate": 6.301561169457849e-05, "loss": 0.007256266474723816, "step": 130350 }, { "epoch": 37.002554640931024, "grad_norm": 0.03565188869833946, "learning_rate": 6.301277320465513e-05, "loss": 0.01050936132669449, "step": 130360 }, { "epoch": 37.00539313085439, "grad_norm": 0.17887035012245178, "learning_rate": 6.300993471473177e-05, "loss": 0.0015922296792268754, "step": 130370 }, { "epoch": 37.00823162077775, "grad_norm": 0.3897233009338379, "learning_rate": 6.300709622480841e-05, "loss": 0.0009696636348962784, "step": 130380 }, { "epoch": 37.011070110701105, "grad_norm": 0.09438728541135788, "learning_rate": 6.300425773488504e-05, "loss": 0.0079301618039608, "step": 130390 }, { "epoch": 37.01390860062447, "grad_norm": 0.15651796758174896, "learning_rate": 6.300141924496168e-05, "loss": 0.002548413723707199, "step": 130400 }, { "epoch": 37.01674709054783, "grad_norm": 0.1547599583864212, "learning_rate": 6.299858075503833e-05, "loss": 0.0025703204795718194, "step": 130410 }, { "epoch": 37.01958558047119, "grad_norm": 0.02767334319651127, "learning_rate": 6.299574226511496e-05, "loss": 0.0005826853215694427, "step": 130420 }, { "epoch": 37.02242407039455, "grad_norm": 2.376499652862549, "learning_rate": 6.29929037751916e-05, "loss": 0.003778344392776489, "step": 130430 }, { "epoch": 37.02526256031791, "grad_norm": 0.17291903495788574, "learning_rate": 6.299006528526824e-05, "loss": 0.0055218666791915895, "step": 130440 }, { "epoch": 37.02810105024127, "grad_norm": 0.10353511571884155, "learning_rate": 6.298722679534487e-05, "loss": 0.0005039390176534653, "step": 130450 }, { "epoch": 37.03093954016463, "grad_norm": 0.20854203402996063, "learning_rate": 6.298438830542151e-05, "loss": 0.0004312729462981224, "step": 130460 }, { "epoch": 37.033778030087994, "grad_norm": 0.033303264528512955, "learning_rate": 6.298154981549816e-05, "loss": 0.00035378970205783843, "step": 130470 }, { "epoch": 37.03661652001136, "grad_norm": 0.015487562865018845, "learning_rate": 6.29787113255748e-05, "loss": 0.001147863268852234, "step": 130480 }, { "epoch": 37.03945500993471, "grad_norm": 1.8851537704467773, "learning_rate": 6.297587283565144e-05, "loss": 0.0029305962845683096, "step": 130490 }, { "epoch": 37.042293499858076, "grad_norm": 0.13751362264156342, "learning_rate": 6.297303434572808e-05, "loss": 0.010264120995998383, "step": 130500 }, { "epoch": 37.042293499858076, "eval_accuracy": 0.9797164112672474, "eval_loss": 0.07934577763080597, "eval_runtime": 36.205, "eval_samples_per_second": 434.387, "eval_steps_per_second": 6.795, "step": 130500 }, { "epoch": 37.04513198978144, "grad_norm": 0.5436487197875977, "learning_rate": 6.297019585580472e-05, "loss": 0.001325763575732708, "step": 130510 }, { "epoch": 37.047970479704794, "grad_norm": 0.0925031453371048, "learning_rate": 6.296735736588135e-05, "loss": 0.004970481246709823, "step": 130520 }, { "epoch": 37.05080896962816, "grad_norm": 16.567846298217773, "learning_rate": 6.296451887595799e-05, "loss": 0.010370297729969025, "step": 130530 }, { "epoch": 37.05364745955152, "grad_norm": 0.045772675424814224, "learning_rate": 6.296168038603463e-05, "loss": 0.0006351975724101066, "step": 130540 }, { "epoch": 37.056485949474876, "grad_norm": 0.03827929124236107, "learning_rate": 6.295884189611127e-05, "loss": 0.003038960136473179, "step": 130550 }, { "epoch": 37.05932443939824, "grad_norm": 0.0856766402721405, "learning_rate": 6.295600340618791e-05, "loss": 0.0005665712058544159, "step": 130560 }, { "epoch": 37.0621629293216, "grad_norm": 0.0784933865070343, "learning_rate": 6.295316491626456e-05, "loss": 0.000658712349832058, "step": 130570 }, { "epoch": 37.065001419244965, "grad_norm": 0.11612975597381592, "learning_rate": 6.295032642634118e-05, "loss": 0.001969816908240318, "step": 130580 }, { "epoch": 37.06783990916832, "grad_norm": 0.07763160765171051, "learning_rate": 6.294748793641782e-05, "loss": 0.00457119420170784, "step": 130590 }, { "epoch": 37.07067839909168, "grad_norm": 0.006960602942854166, "learning_rate": 6.294464944649447e-05, "loss": 0.002505576424300671, "step": 130600 }, { "epoch": 37.073516889015046, "grad_norm": 0.12465833127498627, "learning_rate": 6.294181095657111e-05, "loss": 0.006238972768187523, "step": 130610 }, { "epoch": 37.0763553789384, "grad_norm": 0.20366549491882324, "learning_rate": 6.293897246664775e-05, "loss": 0.002705113962292671, "step": 130620 }, { "epoch": 37.079193868861765, "grad_norm": 0.04659942910075188, "learning_rate": 6.293613397672439e-05, "loss": 0.0024423394352197647, "step": 130630 }, { "epoch": 37.08203235878513, "grad_norm": 0.11202909797430038, "learning_rate": 6.293329548680103e-05, "loss": 0.0017619254067540169, "step": 130640 }, { "epoch": 37.08487084870849, "grad_norm": 0.0591554269194603, "learning_rate": 6.293045699687766e-05, "loss": 0.002140757627785206, "step": 130650 }, { "epoch": 37.087709338631846, "grad_norm": 0.09326966106891632, "learning_rate": 6.29276185069543e-05, "loss": 0.0029433706775307657, "step": 130660 }, { "epoch": 37.09054782855521, "grad_norm": 0.05217878893017769, "learning_rate": 6.292478001703094e-05, "loss": 0.002000485546886921, "step": 130670 }, { "epoch": 37.09338631847857, "grad_norm": 5.54909610748291, "learning_rate": 6.292194152710758e-05, "loss": 0.004016679525375366, "step": 130680 }, { "epoch": 37.09622480840193, "grad_norm": 0.19651298224925995, "learning_rate": 6.291910303718422e-05, "loss": 0.0011386515572667122, "step": 130690 }, { "epoch": 37.09906329832529, "grad_norm": 0.542424201965332, "learning_rate": 6.291626454726087e-05, "loss": 0.0035616066306829453, "step": 130700 }, { "epoch": 37.101901788248654, "grad_norm": 0.05161445960402489, "learning_rate": 6.29134260573375e-05, "loss": 0.0022176476195454597, "step": 130710 }, { "epoch": 37.10474027817201, "grad_norm": 0.9387657046318054, "learning_rate": 6.291058756741414e-05, "loss": 0.0018437311053276062, "step": 130720 }, { "epoch": 37.10757876809537, "grad_norm": 0.01742202788591385, "learning_rate": 6.290774907749078e-05, "loss": 0.0005846815183758736, "step": 130730 }, { "epoch": 37.110417258018735, "grad_norm": 0.13496190309524536, "learning_rate": 6.290491058756742e-05, "loss": 0.004101266711950302, "step": 130740 }, { "epoch": 37.1132557479421, "grad_norm": 1.0926427841186523, "learning_rate": 6.290207209764406e-05, "loss": 0.0009459976106882095, "step": 130750 }, { "epoch": 37.116094237865454, "grad_norm": 0.07232535630464554, "learning_rate": 6.28992336077207e-05, "loss": 0.0013535305857658385, "step": 130760 }, { "epoch": 37.11893272778882, "grad_norm": 0.02872641012072563, "learning_rate": 6.289639511779733e-05, "loss": 0.0008435329422354698, "step": 130770 }, { "epoch": 37.12177121771218, "grad_norm": 0.21747711300849915, "learning_rate": 6.289355662787397e-05, "loss": 0.0005777012556791306, "step": 130780 }, { "epoch": 37.124609707635535, "grad_norm": 0.0940374955534935, "learning_rate": 6.289071813795061e-05, "loss": 0.0003605099394917488, "step": 130790 }, { "epoch": 37.1274481975589, "grad_norm": 0.026141926646232605, "learning_rate": 6.288787964802725e-05, "loss": 0.003108092024922371, "step": 130800 }, { "epoch": 37.13028668748226, "grad_norm": 3.262558698654175, "learning_rate": 6.28850411581039e-05, "loss": 0.0007231101393699646, "step": 130810 }, { "epoch": 37.13312517740562, "grad_norm": 0.06969907879829407, "learning_rate": 6.288220266818054e-05, "loss": 0.0009005244821310044, "step": 130820 }, { "epoch": 37.13596366732898, "grad_norm": 0.02396879345178604, "learning_rate": 6.287936417825718e-05, "loss": 0.0012381860986351966, "step": 130830 }, { "epoch": 37.13880215725234, "grad_norm": 4.568332672119141, "learning_rate": 6.28765256883338e-05, "loss": 0.001168198138475418, "step": 130840 }, { "epoch": 37.141640647175706, "grad_norm": 0.8320554494857788, "learning_rate": 6.287368719841045e-05, "loss": 0.007207748293876648, "step": 130850 }, { "epoch": 37.14447913709906, "grad_norm": 0.015546812675893307, "learning_rate": 6.287084870848709e-05, "loss": 0.0032789766788482665, "step": 130860 }, { "epoch": 37.147317627022424, "grad_norm": 0.9319203495979309, "learning_rate": 6.286801021856372e-05, "loss": 0.00807638093829155, "step": 130870 }, { "epoch": 37.15015611694579, "grad_norm": 0.142912358045578, "learning_rate": 6.286517172864037e-05, "loss": 0.007482582330703735, "step": 130880 }, { "epoch": 37.15299460686914, "grad_norm": 11.67249584197998, "learning_rate": 6.286233323871701e-05, "loss": 0.012260080873966217, "step": 130890 }, { "epoch": 37.155833096792506, "grad_norm": 0.31491392850875854, "learning_rate": 6.285949474879364e-05, "loss": 0.003604312241077423, "step": 130900 }, { "epoch": 37.15867158671587, "grad_norm": 0.039744701236486435, "learning_rate": 6.285665625887028e-05, "loss": 0.0047291085124015805, "step": 130910 }, { "epoch": 37.161510076639225, "grad_norm": 0.2616907060146332, "learning_rate": 6.285381776894692e-05, "loss": 0.0062477365136146545, "step": 130920 }, { "epoch": 37.16434856656259, "grad_norm": 0.0247627142816782, "learning_rate": 6.285097927902356e-05, "loss": 0.023077738285064698, "step": 130930 }, { "epoch": 37.16718705648595, "grad_norm": 0.13013166189193726, "learning_rate": 6.28481407891002e-05, "loss": 0.00205341000109911, "step": 130940 }, { "epoch": 37.17002554640931, "grad_norm": 0.7449332475662231, "learning_rate": 6.284530229917685e-05, "loss": 0.0013566138222813606, "step": 130950 }, { "epoch": 37.17286403633267, "grad_norm": 0.04737893491983414, "learning_rate": 6.284246380925349e-05, "loss": 0.00797140821814537, "step": 130960 }, { "epoch": 37.17570252625603, "grad_norm": 14.250223159790039, "learning_rate": 6.283962531933012e-05, "loss": 0.007671293616294861, "step": 130970 }, { "epoch": 37.178541016179395, "grad_norm": 0.07738187909126282, "learning_rate": 6.283678682940676e-05, "loss": 0.0011433620005846024, "step": 130980 }, { "epoch": 37.18137950610275, "grad_norm": 0.23149025440216064, "learning_rate": 6.28339483394834e-05, "loss": 0.009113563597202301, "step": 130990 }, { "epoch": 37.18421799602611, "grad_norm": 4.381702899932861, "learning_rate": 6.283110984956003e-05, "loss": 0.0019603515043854713, "step": 131000 }, { "epoch": 37.18421799602611, "eval_accuracy": 0.9786354676670693, "eval_loss": 0.0829874724149704, "eval_runtime": 34.7864, "eval_samples_per_second": 452.102, "eval_steps_per_second": 7.072, "step": 131000 }, { "epoch": 37.187056485949476, "grad_norm": 0.04384449124336243, "learning_rate": 6.282827135963668e-05, "loss": 0.0012413946911692618, "step": 131010 }, { "epoch": 37.18989497587283, "grad_norm": 0.08517933636903763, "learning_rate": 6.282543286971332e-05, "loss": 0.000405624695122242, "step": 131020 }, { "epoch": 37.192733465796195, "grad_norm": 0.18715707957744598, "learning_rate": 6.282259437978995e-05, "loss": 0.0010187793523073196, "step": 131030 }, { "epoch": 37.19557195571956, "grad_norm": 0.4261843264102936, "learning_rate": 6.281975588986659e-05, "loss": 0.0007637517526745796, "step": 131040 }, { "epoch": 37.19841044564292, "grad_norm": 0.0062805223278701305, "learning_rate": 6.281691739994323e-05, "loss": 0.0006418770179152488, "step": 131050 }, { "epoch": 37.20124893556628, "grad_norm": 15.578452110290527, "learning_rate": 6.281407891001987e-05, "loss": 0.01383000910282135, "step": 131060 }, { "epoch": 37.20408742548964, "grad_norm": 0.027805117890238762, "learning_rate": 6.28112404200965e-05, "loss": 0.000822167843580246, "step": 131070 }, { "epoch": 37.206925915413, "grad_norm": 0.37275591492652893, "learning_rate": 6.280840193017316e-05, "loss": 0.004335812479257584, "step": 131080 }, { "epoch": 37.20976440533636, "grad_norm": 0.012719431892037392, "learning_rate": 6.28055634402498e-05, "loss": 0.0034785117954015733, "step": 131090 }, { "epoch": 37.21260289525972, "grad_norm": 0.030120402574539185, "learning_rate": 6.280272495032643e-05, "loss": 0.002583489939570427, "step": 131100 }, { "epoch": 37.215441385183084, "grad_norm": 0.32446274161338806, "learning_rate": 6.279988646040307e-05, "loss": 0.0024514071643352508, "step": 131110 }, { "epoch": 37.21827987510645, "grad_norm": 0.17226263880729675, "learning_rate": 6.279704797047971e-05, "loss": 0.00035102684050798417, "step": 131120 }, { "epoch": 37.2211183650298, "grad_norm": 0.4999212920665741, "learning_rate": 6.279420948055634e-05, "loss": 0.0005907762795686721, "step": 131130 }, { "epoch": 37.223956854953165, "grad_norm": 0.1351025551557541, "learning_rate": 6.279137099063299e-05, "loss": 0.00031042266637086867, "step": 131140 }, { "epoch": 37.22679534487653, "grad_norm": 0.13956178724765778, "learning_rate": 6.278853250070963e-05, "loss": 0.0011418793350458146, "step": 131150 }, { "epoch": 37.229633834799884, "grad_norm": 0.028245583176612854, "learning_rate": 6.278569401078626e-05, "loss": 0.0012458989396691323, "step": 131160 }, { "epoch": 37.23247232472325, "grad_norm": 0.019653156399726868, "learning_rate": 6.27828555208629e-05, "loss": 0.0010325387120246887, "step": 131170 }, { "epoch": 37.23531081464661, "grad_norm": 0.4924030601978302, "learning_rate": 6.278001703093954e-05, "loss": 0.003894422575831413, "step": 131180 }, { "epoch": 37.238149304569966, "grad_norm": 12.381702423095703, "learning_rate": 6.277717854101619e-05, "loss": 0.005947641655802726, "step": 131190 }, { "epoch": 37.24098779449333, "grad_norm": 0.008245304226875305, "learning_rate": 6.277434005109281e-05, "loss": 0.003245215117931366, "step": 131200 }, { "epoch": 37.24382628441669, "grad_norm": 0.01223819237202406, "learning_rate": 6.277150156116947e-05, "loss": 0.0006265681236982346, "step": 131210 }, { "epoch": 37.246664774340054, "grad_norm": 0.6965548992156982, "learning_rate": 6.276866307124611e-05, "loss": 0.001017705537378788, "step": 131220 }, { "epoch": 37.24950326426341, "grad_norm": 12.765945434570312, "learning_rate": 6.276582458132274e-05, "loss": 0.008354406058788299, "step": 131230 }, { "epoch": 37.25234175418677, "grad_norm": 0.35837188363075256, "learning_rate": 6.276298609139938e-05, "loss": 0.0009949659928679467, "step": 131240 }, { "epoch": 37.255180244110136, "grad_norm": 0.14322729408740997, "learning_rate": 6.276014760147602e-05, "loss": 0.006317359209060669, "step": 131250 }, { "epoch": 37.25801873403349, "grad_norm": 0.017405783757567406, "learning_rate": 6.275730911155265e-05, "loss": 0.0001573428511619568, "step": 131260 }, { "epoch": 37.260857223956855, "grad_norm": 0.8701154589653015, "learning_rate": 6.275447062162929e-05, "loss": 0.0008926255628466606, "step": 131270 }, { "epoch": 37.26369571388022, "grad_norm": 15.172795295715332, "learning_rate": 6.275163213170594e-05, "loss": 0.012093129754066467, "step": 131280 }, { "epoch": 37.26653420380357, "grad_norm": 0.3955385684967041, "learning_rate": 6.274879364178257e-05, "loss": 0.0030311569571495056, "step": 131290 }, { "epoch": 37.269372693726936, "grad_norm": 1.6020816564559937, "learning_rate": 6.274595515185921e-05, "loss": 0.00067979134619236, "step": 131300 }, { "epoch": 37.2722111836503, "grad_norm": 0.41632336378097534, "learning_rate": 6.274311666193585e-05, "loss": 0.0007792893797159195, "step": 131310 }, { "epoch": 37.27504967357366, "grad_norm": 2.4674339294433594, "learning_rate": 6.27402781720125e-05, "loss": 0.0015234081074595451, "step": 131320 }, { "epoch": 37.27788816349702, "grad_norm": 0.16857388615608215, "learning_rate": 6.273743968208912e-05, "loss": 0.0010001000016927719, "step": 131330 }, { "epoch": 37.28072665342038, "grad_norm": 0.05094946548342705, "learning_rate": 6.273460119216578e-05, "loss": 0.0017626741901040077, "step": 131340 }, { "epoch": 37.28356514334374, "grad_norm": 0.1665640026330948, "learning_rate": 6.273176270224242e-05, "loss": 0.002163083851337433, "step": 131350 }, { "epoch": 37.2864036332671, "grad_norm": 10.727806091308594, "learning_rate": 6.272892421231905e-05, "loss": 0.009709398448467254, "step": 131360 }, { "epoch": 37.28924212319046, "grad_norm": 0.568162202835083, "learning_rate": 6.272608572239569e-05, "loss": 0.00046267341822385787, "step": 131370 }, { "epoch": 37.292080613113825, "grad_norm": 0.04713902622461319, "learning_rate": 6.272324723247233e-05, "loss": 0.000482884980738163, "step": 131380 }, { "epoch": 37.29491910303718, "grad_norm": 0.39645591378211975, "learning_rate": 6.272040874254896e-05, "loss": 0.0006507135927677155, "step": 131390 }, { "epoch": 37.297757592960544, "grad_norm": 2.4270050525665283, "learning_rate": 6.27175702526256e-05, "loss": 0.002511032857000828, "step": 131400 }, { "epoch": 37.30059608288391, "grad_norm": 1.9134560823440552, "learning_rate": 6.271473176270225e-05, "loss": 0.0008561849594116211, "step": 131410 }, { "epoch": 37.30343457280727, "grad_norm": 11.172774314880371, "learning_rate": 6.271189327277888e-05, "loss": 0.002869749441742897, "step": 131420 }, { "epoch": 37.306273062730625, "grad_norm": 0.5242908596992493, "learning_rate": 6.270905478285552e-05, "loss": 0.00524035394191742, "step": 131430 }, { "epoch": 37.30911155265399, "grad_norm": 0.11237017065286636, "learning_rate": 6.270621629293217e-05, "loss": 0.0005290541797876358, "step": 131440 }, { "epoch": 37.31195004257735, "grad_norm": 1.0630091428756714, "learning_rate": 6.27033778030088e-05, "loss": 0.005932009220123291, "step": 131450 }, { "epoch": 37.31478853250071, "grad_norm": 3.0232484340667725, "learning_rate": 6.270053931308543e-05, "loss": 0.0015615561977028846, "step": 131460 }, { "epoch": 37.31762702242407, "grad_norm": 0.013162405230104923, "learning_rate": 6.269770082316208e-05, "loss": 0.023350977897644044, "step": 131470 }, { "epoch": 37.32046551234743, "grad_norm": 6.692939281463623, "learning_rate": 6.269486233323873e-05, "loss": 0.014251315593719482, "step": 131480 }, { "epoch": 37.323304002270795, "grad_norm": 0.3343401253223419, "learning_rate": 6.269202384331536e-05, "loss": 0.029580169916152955, "step": 131490 }, { "epoch": 37.32614249219415, "grad_norm": 0.5124855637550354, "learning_rate": 6.2689185353392e-05, "loss": 0.005843814462423325, "step": 131500 }, { "epoch": 37.32614249219415, "eval_accuracy": 0.9785718827494119, "eval_loss": 0.08569622784852982, "eval_runtime": 34.5012, "eval_samples_per_second": 455.839, "eval_steps_per_second": 7.13, "step": 131500 }, { "epoch": 37.328980982117514, "grad_norm": 0.18642698228359222, "learning_rate": 6.268634686346864e-05, "loss": 0.01355011761188507, "step": 131510 }, { "epoch": 37.33181947204088, "grad_norm": 0.03845125064253807, "learning_rate": 6.268350837354527e-05, "loss": 0.00815618857741356, "step": 131520 }, { "epoch": 37.33465796196423, "grad_norm": 0.04650903865695, "learning_rate": 6.268066988362191e-05, "loss": 0.002743852138519287, "step": 131530 }, { "epoch": 37.337496451887596, "grad_norm": 1.3742616176605225, "learning_rate": 6.267783139369857e-05, "loss": 0.011228834092617036, "step": 131540 }, { "epoch": 37.34033494181096, "grad_norm": 0.030468612909317017, "learning_rate": 6.26749929037752e-05, "loss": 0.0038243561983108522, "step": 131550 }, { "epoch": 37.343173431734314, "grad_norm": 14.241581916809082, "learning_rate": 6.267215441385183e-05, "loss": 0.013960903882980347, "step": 131560 }, { "epoch": 37.34601192165768, "grad_norm": 0.06322269886732101, "learning_rate": 6.266931592392848e-05, "loss": 0.0015088429674506188, "step": 131570 }, { "epoch": 37.34885041158104, "grad_norm": 2.7771830558776855, "learning_rate": 6.266647743400512e-05, "loss": 0.009151041507720947, "step": 131580 }, { "epoch": 37.3516889015044, "grad_norm": 0.025711189955472946, "learning_rate": 6.266363894408175e-05, "loss": 0.0022791847586631775, "step": 131590 }, { "epoch": 37.35452739142776, "grad_norm": 0.34682196378707886, "learning_rate": 6.266080045415839e-05, "loss": 0.002257917448878288, "step": 131600 }, { "epoch": 37.35736588135112, "grad_norm": 0.11488881707191467, "learning_rate": 6.265796196423503e-05, "loss": 0.0018339093774557113, "step": 131610 }, { "epoch": 37.360204371274484, "grad_norm": 1.6958856582641602, "learning_rate": 6.265512347431167e-05, "loss": 0.008909785747528076, "step": 131620 }, { "epoch": 37.36304286119784, "grad_norm": 13.349750518798828, "learning_rate": 6.265228498438831e-05, "loss": 0.009765972197055817, "step": 131630 }, { "epoch": 37.3658813511212, "grad_norm": 0.07682351022958755, "learning_rate": 6.264944649446495e-05, "loss": 0.001010763645172119, "step": 131640 }, { "epoch": 37.368719841044566, "grad_norm": 6.904287815093994, "learning_rate": 6.264660800454158e-05, "loss": 0.0030965238809585573, "step": 131650 }, { "epoch": 37.37155833096792, "grad_norm": 0.015940485522150993, "learning_rate": 6.264376951461822e-05, "loss": 0.006984136998653412, "step": 131660 }, { "epoch": 37.374396820891285, "grad_norm": 0.029293091967701912, "learning_rate": 6.264093102469486e-05, "loss": 0.0046793721616268154, "step": 131670 }, { "epoch": 37.37723531081465, "grad_norm": 0.017297711223363876, "learning_rate": 6.26380925347715e-05, "loss": 0.001597493141889572, "step": 131680 }, { "epoch": 37.38007380073801, "grad_norm": 0.025356778874993324, "learning_rate": 6.263525404484815e-05, "loss": 0.0008576566353440285, "step": 131690 }, { "epoch": 37.382912290661366, "grad_norm": 0.09493120014667511, "learning_rate": 6.263241555492479e-05, "loss": 0.0017482573166489601, "step": 131700 }, { "epoch": 37.38575078058473, "grad_norm": 0.41366007924079895, "learning_rate": 6.262957706500141e-05, "loss": 0.00417923629283905, "step": 131710 }, { "epoch": 37.38858927050809, "grad_norm": 1.1677772998809814, "learning_rate": 6.262673857507806e-05, "loss": 0.0015405738726258279, "step": 131720 }, { "epoch": 37.39142776043145, "grad_norm": Infinity, "learning_rate": 6.26239000851547e-05, "loss": 0.01807974874973297, "step": 131730 }, { "epoch": 37.39426625035481, "grad_norm": 0.8965014219284058, "learning_rate": 6.262134544422368e-05, "loss": 0.0062203556299209595, "step": 131740 }, { "epoch": 37.39710474027817, "grad_norm": 0.7147607207298279, "learning_rate": 6.261850695430032e-05, "loss": 0.0016111565753817558, "step": 131750 }, { "epoch": 37.39994323020153, "grad_norm": 0.7133706212043762, "learning_rate": 6.261566846437695e-05, "loss": 0.0022686542943120003, "step": 131760 }, { "epoch": 37.40278172012489, "grad_norm": 0.5262242555618286, "learning_rate": 6.261282997445359e-05, "loss": 0.0018552681431174278, "step": 131770 }, { "epoch": 37.405620210048255, "grad_norm": 0.23618988692760468, "learning_rate": 6.260999148453023e-05, "loss": 0.005871419608592987, "step": 131780 }, { "epoch": 37.40845869997162, "grad_norm": 0.3499266505241394, "learning_rate": 6.260715299460687e-05, "loss": 0.0010514486581087112, "step": 131790 }, { "epoch": 37.411297189894974, "grad_norm": 0.6901007890701294, "learning_rate": 6.260431450468351e-05, "loss": 0.0020312132313847543, "step": 131800 }, { "epoch": 37.41413567981834, "grad_norm": 0.5520873665809631, "learning_rate": 6.260147601476015e-05, "loss": 0.0011084552854299546, "step": 131810 }, { "epoch": 37.4169741697417, "grad_norm": 0.21334832906723022, "learning_rate": 6.25986375248368e-05, "loss": 0.0009336832910776139, "step": 131820 }, { "epoch": 37.419812659665055, "grad_norm": 1.1492555141448975, "learning_rate": 6.259579903491342e-05, "loss": 0.006822513788938523, "step": 131830 }, { "epoch": 37.42265114958842, "grad_norm": 0.4720112085342407, "learning_rate": 6.259296054499006e-05, "loss": 0.01473546028137207, "step": 131840 }, { "epoch": 37.42548963951178, "grad_norm": 0.25246724486351013, "learning_rate": 6.25901220550667e-05, "loss": 0.0045895446091890335, "step": 131850 }, { "epoch": 37.428328129435144, "grad_norm": 0.19371247291564941, "learning_rate": 6.258728356514335e-05, "loss": 0.011937977373600006, "step": 131860 }, { "epoch": 37.4311666193585, "grad_norm": 0.028041362762451172, "learning_rate": 6.258444507521999e-05, "loss": 0.01903127133846283, "step": 131870 }, { "epoch": 37.43400510928186, "grad_norm": 0.11545368283987045, "learning_rate": 6.258160658529663e-05, "loss": 0.01750996708869934, "step": 131880 }, { "epoch": 37.436843599205226, "grad_norm": 0.8385586142539978, "learning_rate": 6.257876809537326e-05, "loss": 0.006769111752510071, "step": 131890 }, { "epoch": 37.43968208912858, "grad_norm": 3.121156930923462, "learning_rate": 6.25759296054499e-05, "loss": 0.007170031219720841, "step": 131900 }, { "epoch": 37.442520579051944, "grad_norm": 0.4871852397918701, "learning_rate": 6.257309111552654e-05, "loss": 0.0021689215674996374, "step": 131910 }, { "epoch": 37.44535906897531, "grad_norm": 0.09697066992521286, "learning_rate": 6.257025262560318e-05, "loss": 0.005223707109689712, "step": 131920 }, { "epoch": 37.44819755889866, "grad_norm": 1.3223563432693481, "learning_rate": 6.256741413567982e-05, "loss": 0.0014988098293542862, "step": 131930 }, { "epoch": 37.451036048822026, "grad_norm": 0.025689151138067245, "learning_rate": 6.256457564575646e-05, "loss": 0.004551114886999131, "step": 131940 }, { "epoch": 37.45387453874539, "grad_norm": 6.247311592102051, "learning_rate": 6.25617371558331e-05, "loss": 0.0032116610556840895, "step": 131950 }, { "epoch": 37.45671302866875, "grad_norm": 0.1441066563129425, "learning_rate": 6.255889866590973e-05, "loss": 0.005299047008156777, "step": 131960 }, { "epoch": 37.45955151859211, "grad_norm": 0.042093127965927124, "learning_rate": 6.255606017598637e-05, "loss": 0.003163234144449234, "step": 131970 }, { "epoch": 37.46239000851547, "grad_norm": 0.21018186211585999, "learning_rate": 6.255322168606302e-05, "loss": 0.006353365629911423, "step": 131980 }, { "epoch": 37.46522849843883, "grad_norm": 4.016611099243164, "learning_rate": 6.255038319613966e-05, "loss": 0.009195314347743988, "step": 131990 }, { "epoch": 37.46806698836219, "grad_norm": 0.057149071246385574, "learning_rate": 6.25475447062163e-05, "loss": 0.0032910700887441636, "step": 132000 }, { "epoch": 37.46806698836219, "eval_accuracy": 0.9806701850321103, "eval_loss": 0.07488439977169037, "eval_runtime": 37.1392, "eval_samples_per_second": 423.461, "eval_steps_per_second": 6.624, "step": 132000 }, { "epoch": 37.47090547828555, "grad_norm": 9.393506050109863, "learning_rate": 6.254470621629294e-05, "loss": 0.0058468498289585115, "step": 132010 }, { "epoch": 37.473743968208915, "grad_norm": 0.10360367596149445, "learning_rate": 6.254186772636957e-05, "loss": 0.0012080317363142967, "step": 132020 }, { "epoch": 37.47658245813227, "grad_norm": 0.990619421005249, "learning_rate": 6.253902923644621e-05, "loss": 0.002209263667464256, "step": 132030 }, { "epoch": 37.47942094805563, "grad_norm": 0.3998298943042755, "learning_rate": 6.253619074652285e-05, "loss": 0.00328754261136055, "step": 132040 }, { "epoch": 37.482259437978996, "grad_norm": 3.8529086112976074, "learning_rate": 6.253335225659949e-05, "loss": 0.0034778013825416563, "step": 132050 }, { "epoch": 37.48509792790236, "grad_norm": 0.029815973713994026, "learning_rate": 6.253051376667613e-05, "loss": 0.007480131089687347, "step": 132060 }, { "epoch": 37.487936417825715, "grad_norm": 0.007709455210715532, "learning_rate": 6.252767527675278e-05, "loss": 0.0008846187964081764, "step": 132070 }, { "epoch": 37.49077490774908, "grad_norm": 3.991856575012207, "learning_rate": 6.252483678682942e-05, "loss": 0.0014164542779326438, "step": 132080 }, { "epoch": 37.49361339767244, "grad_norm": 0.08022431284189224, "learning_rate": 6.252199829690604e-05, "loss": 0.009386245161294937, "step": 132090 }, { "epoch": 37.496451887595796, "grad_norm": 3.4186813831329346, "learning_rate": 6.251915980698269e-05, "loss": 0.0033248983323574065, "step": 132100 }, { "epoch": 37.49929037751916, "grad_norm": 0.23392556607723236, "learning_rate": 6.251632131705933e-05, "loss": 0.011766350269317627, "step": 132110 }, { "epoch": 37.50212886744252, "grad_norm": 0.08468025177717209, "learning_rate": 6.251348282713597e-05, "loss": 0.0020154591649770735, "step": 132120 }, { "epoch": 37.50496735736588, "grad_norm": 0.31960368156433105, "learning_rate": 6.251064433721261e-05, "loss": 0.00702473297715187, "step": 132130 }, { "epoch": 37.50780584728924, "grad_norm": 4.447606563568115, "learning_rate": 6.250780584728925e-05, "loss": 0.004164911061525345, "step": 132140 }, { "epoch": 37.510644337212604, "grad_norm": 0.918534517288208, "learning_rate": 6.250496735736588e-05, "loss": 0.004645970463752746, "step": 132150 }, { "epoch": 37.51348282713597, "grad_norm": 0.04695545509457588, "learning_rate": 6.250212886744252e-05, "loss": 0.005762960761785507, "step": 132160 }, { "epoch": 37.51632131705932, "grad_norm": 4.07764196395874, "learning_rate": 6.249929037751916e-05, "loss": 0.005524593964219093, "step": 132170 }, { "epoch": 37.519159806982685, "grad_norm": 0.40854865312576294, "learning_rate": 6.24964518875958e-05, "loss": 0.009440862387418748, "step": 132180 }, { "epoch": 37.52199829690605, "grad_norm": 0.5160701870918274, "learning_rate": 6.249361339767244e-05, "loss": 0.045733267068862916, "step": 132190 }, { "epoch": 37.524836786829404, "grad_norm": 0.8461453318595886, "learning_rate": 6.249077490774909e-05, "loss": 0.03637783229351044, "step": 132200 }, { "epoch": 37.52767527675277, "grad_norm": 5.108628273010254, "learning_rate": 6.248793641782573e-05, "loss": 0.007229646295309066, "step": 132210 }, { "epoch": 37.53051376667613, "grad_norm": 0.5947097539901733, "learning_rate": 6.248509792790235e-05, "loss": 0.006560420989990235, "step": 132220 }, { "epoch": 37.533352256599485, "grad_norm": 0.0650116354227066, "learning_rate": 6.2482259437979e-05, "loss": 0.015530064702033997, "step": 132230 }, { "epoch": 37.53619074652285, "grad_norm": 0.02070966549217701, "learning_rate": 6.247942094805564e-05, "loss": 0.0038744695484638214, "step": 132240 }, { "epoch": 37.53902923644621, "grad_norm": 0.03247702121734619, "learning_rate": 6.247658245813227e-05, "loss": 0.0006168417632579804, "step": 132250 }, { "epoch": 37.541867726369574, "grad_norm": 4.131563663482666, "learning_rate": 6.247374396820892e-05, "loss": 0.0017185624688863753, "step": 132260 }, { "epoch": 37.54470621629293, "grad_norm": 0.0839415118098259, "learning_rate": 6.247090547828556e-05, "loss": 0.0004288149997591972, "step": 132270 }, { "epoch": 37.54754470621629, "grad_norm": 2.969935417175293, "learning_rate": 6.246806698836219e-05, "loss": 0.0026723800227046015, "step": 132280 }, { "epoch": 37.550383196139656, "grad_norm": 0.2918894290924072, "learning_rate": 6.246522849843883e-05, "loss": 0.002668191306293011, "step": 132290 }, { "epoch": 37.55322168606301, "grad_norm": 0.15662643313407898, "learning_rate": 6.246239000851547e-05, "loss": 0.002856089361011982, "step": 132300 }, { "epoch": 37.556060175986374, "grad_norm": 0.409084677696228, "learning_rate": 6.245955151859211e-05, "loss": 0.0029591819271445273, "step": 132310 }, { "epoch": 37.55889866590974, "grad_norm": 0.03312131017446518, "learning_rate": 6.245671302866876e-05, "loss": 0.01686297804117203, "step": 132320 }, { "epoch": 37.5617371558331, "grad_norm": 0.09247519820928574, "learning_rate": 6.24538745387454e-05, "loss": 0.003712984174489975, "step": 132330 }, { "epoch": 37.564575645756456, "grad_norm": 2.958920955657959, "learning_rate": 6.245103604882204e-05, "loss": 0.0020295102149248123, "step": 132340 }, { "epoch": 37.56741413567982, "grad_norm": 0.007139845751225948, "learning_rate": 6.244819755889867e-05, "loss": 0.003042415715754032, "step": 132350 }, { "epoch": 37.57025262560318, "grad_norm": 0.039309486746788025, "learning_rate": 6.244535906897531e-05, "loss": 0.001244613714516163, "step": 132360 }, { "epoch": 37.57309111552654, "grad_norm": 0.013589759357273579, "learning_rate": 6.244252057905195e-05, "loss": 0.0020809264853596686, "step": 132370 }, { "epoch": 37.5759296054499, "grad_norm": 0.19994892179965973, "learning_rate": 6.243968208912858e-05, "loss": 0.006995407491922378, "step": 132380 }, { "epoch": 37.57876809537326, "grad_norm": 0.08902416378259659, "learning_rate": 6.243684359920523e-05, "loss": 0.001115877367556095, "step": 132390 }, { "epoch": 37.58160658529662, "grad_norm": 0.11099760979413986, "learning_rate": 6.243400510928187e-05, "loss": 0.00048536546528339387, "step": 132400 }, { "epoch": 37.58444507521998, "grad_norm": 13.865909576416016, "learning_rate": 6.24311666193585e-05, "loss": 0.015973445773124696, "step": 132410 }, { "epoch": 37.587283565143345, "grad_norm": 0.07091575860977173, "learning_rate": 6.242832812943514e-05, "loss": 0.0008450346067547798, "step": 132420 }, { "epoch": 37.59012205506671, "grad_norm": 0.035923317074775696, "learning_rate": 6.242548963951178e-05, "loss": 0.004483270645141602, "step": 132430 }, { "epoch": 37.59296054499006, "grad_norm": 0.154133602976799, "learning_rate": 6.242265114958842e-05, "loss": 0.003423905372619629, "step": 132440 }, { "epoch": 37.595799034913426, "grad_norm": 0.008636659011244774, "learning_rate": 6.241981265966505e-05, "loss": 0.0027626527473330496, "step": 132450 }, { "epoch": 37.59863752483679, "grad_norm": 1.3674335479736328, "learning_rate": 6.241697416974171e-05, "loss": 0.00507555678486824, "step": 132460 }, { "epoch": 37.601476014760145, "grad_norm": 0.00906659197062254, "learning_rate": 6.241413567981834e-05, "loss": 0.0015340231359004975, "step": 132470 }, { "epoch": 37.60431450468351, "grad_norm": 11.018266677856445, "learning_rate": 6.241129718989498e-05, "loss": 0.006852371990680695, "step": 132480 }, { "epoch": 37.60715299460687, "grad_norm": 0.3015180230140686, "learning_rate": 6.240845869997162e-05, "loss": 0.0011654941365122796, "step": 132490 }, { "epoch": 37.60999148453023, "grad_norm": 0.024959003552794456, "learning_rate": 6.240562021004826e-05, "loss": 0.005316335335373878, "step": 132500 }, { "epoch": 37.60999148453023, "eval_accuracy": 0.977872448655179, "eval_loss": 0.08323009312152863, "eval_runtime": 34.0282, "eval_samples_per_second": 462.176, "eval_steps_per_second": 7.229, "step": 132500 }, { "epoch": 37.61282997445359, "grad_norm": 11.925795555114746, "learning_rate": 6.240278172012489e-05, "loss": 0.005222405865788459, "step": 132510 }, { "epoch": 37.61566846437695, "grad_norm": 1.8755886554718018, "learning_rate": 6.239994323020154e-05, "loss": 0.0009478667750954628, "step": 132520 }, { "epoch": 37.618506954300315, "grad_norm": 0.22157573699951172, "learning_rate": 6.239710474027818e-05, "loss": 0.01349925845861435, "step": 132530 }, { "epoch": 37.62134544422367, "grad_norm": 0.020507294684648514, "learning_rate": 6.239426625035481e-05, "loss": 0.0025297470390796663, "step": 132540 }, { "epoch": 37.624183934147034, "grad_norm": 0.9470978379249573, "learning_rate": 6.239142776043145e-05, "loss": 0.004777979105710983, "step": 132550 }, { "epoch": 37.6270224240704, "grad_norm": 0.13411569595336914, "learning_rate": 6.23885892705081e-05, "loss": 0.000809524953365326, "step": 132560 }, { "epoch": 37.62986091399375, "grad_norm": 0.0652242973446846, "learning_rate": 6.238575078058474e-05, "loss": 0.008592379093170167, "step": 132570 }, { "epoch": 37.632699403917115, "grad_norm": 0.09252923727035522, "learning_rate": 6.238291229066136e-05, "loss": 0.010446204245090485, "step": 132580 }, { "epoch": 37.63553789384048, "grad_norm": 0.11322763562202454, "learning_rate": 6.238007380073802e-05, "loss": 0.016847823560237885, "step": 132590 }, { "epoch": 37.63837638376384, "grad_norm": 0.018403680995106697, "learning_rate": 6.237723531081465e-05, "loss": 0.0015451090410351753, "step": 132600 }, { "epoch": 37.6412148736872, "grad_norm": 0.024959443137049675, "learning_rate": 6.237439682089129e-05, "loss": 0.003244943544268608, "step": 132610 }, { "epoch": 37.64405336361056, "grad_norm": 0.013751671649515629, "learning_rate": 6.237155833096793e-05, "loss": 0.00031202305108308793, "step": 132620 }, { "epoch": 37.64689185353392, "grad_norm": 0.16464385390281677, "learning_rate": 6.236871984104457e-05, "loss": 0.003019985929131508, "step": 132630 }, { "epoch": 37.64973034345728, "grad_norm": 0.3664259612560272, "learning_rate": 6.23658813511212e-05, "loss": 0.013075025379657745, "step": 132640 }, { "epoch": 37.65256883338064, "grad_norm": 0.02757844887673855, "learning_rate": 6.236304286119785e-05, "loss": 0.01843457967042923, "step": 132650 }, { "epoch": 37.655407323304004, "grad_norm": 0.1463843584060669, "learning_rate": 6.23602043712745e-05, "loss": 0.0003273520618677139, "step": 132660 }, { "epoch": 37.65824581322736, "grad_norm": 0.0178656168282032, "learning_rate": 6.235736588135112e-05, "loss": 0.003154643997550011, "step": 132670 }, { "epoch": 37.66108430315072, "grad_norm": 0.061337705701589584, "learning_rate": 6.235452739142776e-05, "loss": 0.0009010620415210724, "step": 132680 }, { "epoch": 37.663922793074086, "grad_norm": 7.729755878448486, "learning_rate": 6.23516889015044e-05, "loss": 0.008543618023395538, "step": 132690 }, { "epoch": 37.66676128299745, "grad_norm": 0.13166049122810364, "learning_rate": 6.234885041158103e-05, "loss": 0.010759660601615905, "step": 132700 }, { "epoch": 37.669599772920805, "grad_norm": 0.11178802698850632, "learning_rate": 6.234601192165767e-05, "loss": 0.0019131191074848175, "step": 132710 }, { "epoch": 37.67243826284417, "grad_norm": 0.8807064890861511, "learning_rate": 6.234317343173433e-05, "loss": 0.014370298385620118, "step": 132720 }, { "epoch": 37.67527675276753, "grad_norm": 4.006770133972168, "learning_rate": 6.234033494181096e-05, "loss": 0.004410082101821899, "step": 132730 }, { "epoch": 37.678115242690886, "grad_norm": 2.8381383419036865, "learning_rate": 6.23374964518876e-05, "loss": 0.013017717003822326, "step": 132740 }, { "epoch": 37.68095373261425, "grad_norm": 0.014047305099666119, "learning_rate": 6.233465796196424e-05, "loss": 0.006865832209587097, "step": 132750 }, { "epoch": 37.68379222253761, "grad_norm": 16.08782196044922, "learning_rate": 6.233181947204088e-05, "loss": 0.005019863694906234, "step": 132760 }, { "epoch": 37.68663071246097, "grad_norm": 3.807816743850708, "learning_rate": 6.232898098211751e-05, "loss": 0.00572056919336319, "step": 132770 }, { "epoch": 37.68946920238433, "grad_norm": 15.326094627380371, "learning_rate": 6.232614249219415e-05, "loss": 0.009024516493082047, "step": 132780 }, { "epoch": 37.69230769230769, "grad_norm": 0.4821649193763733, "learning_rate": 6.23233040022708e-05, "loss": 0.0029329033568501472, "step": 132790 }, { "epoch": 37.695146182231056, "grad_norm": 0.4758719801902771, "learning_rate": 6.232046551234743e-05, "loss": 0.02008920758962631, "step": 132800 }, { "epoch": 37.69798467215441, "grad_norm": 0.5905307531356812, "learning_rate": 6.231762702242407e-05, "loss": 0.0016674350947141648, "step": 132810 }, { "epoch": 37.700823162077775, "grad_norm": 2.0555994510650635, "learning_rate": 6.231478853250072e-05, "loss": 0.012948335707187652, "step": 132820 }, { "epoch": 37.70366165200114, "grad_norm": 0.1365179419517517, "learning_rate": 6.231195004257734e-05, "loss": 0.002909749187529087, "step": 132830 }, { "epoch": 37.706500141924494, "grad_norm": 1.9209333658218384, "learning_rate": 6.230911155265398e-05, "loss": 0.0026003854349255563, "step": 132840 }, { "epoch": 37.70933863184786, "grad_norm": 3.4197113513946533, "learning_rate": 6.230627306273064e-05, "loss": 0.004794976115226746, "step": 132850 }, { "epoch": 37.71217712177122, "grad_norm": 0.026431556791067123, "learning_rate": 6.230343457280727e-05, "loss": 0.009415353089571, "step": 132860 }, { "epoch": 37.715015611694575, "grad_norm": 5.012399196624756, "learning_rate": 6.230059608288391e-05, "loss": 0.008179936558008194, "step": 132870 }, { "epoch": 37.71785410161794, "grad_norm": 0.46812358498573303, "learning_rate": 6.229775759296055e-05, "loss": 0.006689081341028214, "step": 132880 }, { "epoch": 37.7206925915413, "grad_norm": 1.0265915393829346, "learning_rate": 6.229491910303719e-05, "loss": 0.0019595509395003317, "step": 132890 }, { "epoch": 37.723531081464664, "grad_norm": 0.03917098417878151, "learning_rate": 6.229208061311382e-05, "loss": 0.000604858435690403, "step": 132900 }, { "epoch": 37.72636957138802, "grad_norm": 0.32813021540641785, "learning_rate": 6.228924212319046e-05, "loss": 0.00782158449292183, "step": 132910 }, { "epoch": 37.72920806131138, "grad_norm": 0.7935514450073242, "learning_rate": 6.228640363326712e-05, "loss": 0.0022536534816026688, "step": 132920 }, { "epoch": 37.732046551234745, "grad_norm": 0.05673050507903099, "learning_rate": 6.228356514334374e-05, "loss": 0.0006954127922654152, "step": 132930 }, { "epoch": 37.7348850411581, "grad_norm": 0.013324640691280365, "learning_rate": 6.228072665342038e-05, "loss": 0.001873055286705494, "step": 132940 }, { "epoch": 37.737723531081464, "grad_norm": 0.7251259684562683, "learning_rate": 6.227788816349703e-05, "loss": 0.002297728694975376, "step": 132950 }, { "epoch": 37.74056202100483, "grad_norm": 1.3434944152832031, "learning_rate": 6.227504967357365e-05, "loss": 0.002633211761713028, "step": 132960 }, { "epoch": 37.74340051092818, "grad_norm": 0.4320158064365387, "learning_rate": 6.22722111836503e-05, "loss": 0.003345775604248047, "step": 132970 }, { "epoch": 37.746239000851546, "grad_norm": 0.35049009323120117, "learning_rate": 6.226937269372694e-05, "loss": 0.004780816286802292, "step": 132980 }, { "epoch": 37.74907749077491, "grad_norm": 0.12251453846693039, "learning_rate": 6.226653420380358e-05, "loss": 0.008681702613830566, "step": 132990 }, { "epoch": 37.75191598069827, "grad_norm": 2.9735779762268066, "learning_rate": 6.226369571388022e-05, "loss": 0.001429823599755764, "step": 133000 }, { "epoch": 37.75191598069827, "eval_accuracy": 0.9785082978317543, "eval_loss": 0.08352191001176834, "eval_runtime": 34.2627, "eval_samples_per_second": 459.012, "eval_steps_per_second": 7.18, "step": 133000 }, { "epoch": 37.75475447062163, "grad_norm": 0.01758180372416973, "learning_rate": 6.226085722395686e-05, "loss": 0.001404157653450966, "step": 133010 }, { "epoch": 37.75759296054499, "grad_norm": 0.004431148525327444, "learning_rate": 6.22580187340335e-05, "loss": 0.004907131940126419, "step": 133020 }, { "epoch": 37.76043145046835, "grad_norm": 2.890383720397949, "learning_rate": 6.225518024411013e-05, "loss": 0.0015389345586299895, "step": 133030 }, { "epoch": 37.76326994039171, "grad_norm": 0.08852294832468033, "learning_rate": 6.225234175418677e-05, "loss": 0.021188892424106598, "step": 133040 }, { "epoch": 37.76610843031507, "grad_norm": 0.019190387800335884, "learning_rate": 6.224950326426343e-05, "loss": 0.007525346428155899, "step": 133050 }, { "epoch": 37.768946920238434, "grad_norm": 0.01970144920051098, "learning_rate": 6.224666477434005e-05, "loss": 0.0005426708608865738, "step": 133060 }, { "epoch": 37.7717854101618, "grad_norm": 1.217386245727539, "learning_rate": 6.22438262844167e-05, "loss": 0.019490762054920195, "step": 133070 }, { "epoch": 37.77462390008515, "grad_norm": 4.960572719573975, "learning_rate": 6.224098779449334e-05, "loss": 0.005102075636386871, "step": 133080 }, { "epoch": 37.777462390008516, "grad_norm": 5.90848445892334, "learning_rate": 6.223814930456996e-05, "loss": 0.0021918699145317077, "step": 133090 }, { "epoch": 37.78030087993188, "grad_norm": 0.46149131655693054, "learning_rate": 6.22353108146466e-05, "loss": 0.018592439591884613, "step": 133100 }, { "epoch": 37.783139369855235, "grad_norm": 0.008733590133488178, "learning_rate": 6.223247232472325e-05, "loss": 0.0016873937100172042, "step": 133110 }, { "epoch": 37.7859778597786, "grad_norm": 2.9504897594451904, "learning_rate": 6.222963383479989e-05, "loss": 0.007587664574384689, "step": 133120 }, { "epoch": 37.78881634970196, "grad_norm": 0.08639904111623764, "learning_rate": 6.222679534487653e-05, "loss": 0.0009514743462204934, "step": 133130 }, { "epoch": 37.791654839625316, "grad_norm": 0.34700870513916016, "learning_rate": 6.222395685495317e-05, "loss": 0.020826500654220582, "step": 133140 }, { "epoch": 37.79449332954868, "grad_norm": 0.23260974884033203, "learning_rate": 6.222111836502981e-05, "loss": 0.003767666593194008, "step": 133150 }, { "epoch": 37.79733181947204, "grad_norm": 0.029371228069067, "learning_rate": 6.221827987510644e-05, "loss": 0.0031464770436286926, "step": 133160 }, { "epoch": 37.800170309395405, "grad_norm": 0.18348360061645508, "learning_rate": 6.221544138518308e-05, "loss": 0.0008411165326833725, "step": 133170 }, { "epoch": 37.80300879931876, "grad_norm": 0.01485811360180378, "learning_rate": 6.221260289525972e-05, "loss": 0.0015023043379187584, "step": 133180 }, { "epoch": 37.805847289242124, "grad_norm": 5.684395790100098, "learning_rate": 6.220976440533637e-05, "loss": 0.00233599916100502, "step": 133190 }, { "epoch": 37.80868577916549, "grad_norm": 0.08390551060438156, "learning_rate": 6.2206925915413e-05, "loss": 0.0006034296005964279, "step": 133200 }, { "epoch": 37.81152426908884, "grad_norm": 0.404975950717926, "learning_rate": 6.220408742548965e-05, "loss": 0.001760183647274971, "step": 133210 }, { "epoch": 37.814362759012205, "grad_norm": 0.11850272864103317, "learning_rate": 6.220124893556628e-05, "loss": 0.002190476842224598, "step": 133220 }, { "epoch": 37.81720124893557, "grad_norm": 0.6407985687255859, "learning_rate": 6.219841044564292e-05, "loss": 0.000772799551486969, "step": 133230 }, { "epoch": 37.820039738858924, "grad_norm": 0.4516345262527466, "learning_rate": 6.219557195571956e-05, "loss": 0.00654645636677742, "step": 133240 }, { "epoch": 37.82287822878229, "grad_norm": 0.010953585617244244, "learning_rate": 6.21927334657962e-05, "loss": 0.004600908979773522, "step": 133250 }, { "epoch": 37.82571671870565, "grad_norm": 12.955880165100098, "learning_rate": 6.218989497587284e-05, "loss": 0.006201768666505814, "step": 133260 }, { "epoch": 37.82855520862901, "grad_norm": 0.028795376420021057, "learning_rate": 6.218705648594948e-05, "loss": 0.0010286759585142135, "step": 133270 }, { "epoch": 37.83139369855237, "grad_norm": 0.1679365038871765, "learning_rate": 6.218421799602612e-05, "loss": 0.0006201526150107384, "step": 133280 }, { "epoch": 37.83423218847573, "grad_norm": 0.3170352280139923, "learning_rate": 6.218137950610275e-05, "loss": 0.0010532068088650703, "step": 133290 }, { "epoch": 37.837070678399094, "grad_norm": 3.160130262374878, "learning_rate": 6.217854101617939e-05, "loss": 0.00526677705347538, "step": 133300 }, { "epoch": 37.83990916832245, "grad_norm": 0.29657065868377686, "learning_rate": 6.217570252625603e-05, "loss": 0.007025278359651566, "step": 133310 }, { "epoch": 37.84274765824581, "grad_norm": 10.628646850585938, "learning_rate": 6.217286403633268e-05, "loss": 0.007872811704874038, "step": 133320 }, { "epoch": 37.845586148169176, "grad_norm": 1.1374701261520386, "learning_rate": 6.217002554640932e-05, "loss": 0.006926741451025009, "step": 133330 }, { "epoch": 37.84842463809253, "grad_norm": 0.4159615635871887, "learning_rate": 6.216718705648596e-05, "loss": 0.013576948642730713, "step": 133340 }, { "epoch": 37.851263128015894, "grad_norm": 0.1556038111448288, "learning_rate": 6.216434856656259e-05, "loss": 0.011589869856834412, "step": 133350 }, { "epoch": 37.85410161793926, "grad_norm": 0.45922616124153137, "learning_rate": 6.216151007663923e-05, "loss": 0.00915343090891838, "step": 133360 }, { "epoch": 37.85694010786262, "grad_norm": 3.46323823928833, "learning_rate": 6.215867158671587e-05, "loss": 0.013847443461418151, "step": 133370 }, { "epoch": 37.859778597785976, "grad_norm": 0.12507346272468567, "learning_rate": 6.215583309679251e-05, "loss": 0.0030009886249899864, "step": 133380 }, { "epoch": 37.86261708770934, "grad_norm": 0.15971004962921143, "learning_rate": 6.215299460686915e-05, "loss": 0.0033774681389331816, "step": 133390 }, { "epoch": 37.8654555776327, "grad_norm": 0.018746526911854744, "learning_rate": 6.21501561169458e-05, "loss": 0.0006022907793521882, "step": 133400 }, { "epoch": 37.86829406755606, "grad_norm": 0.14656415581703186, "learning_rate": 6.214731762702242e-05, "loss": 0.008496624231338502, "step": 133410 }, { "epoch": 37.87113255747942, "grad_norm": 0.4204341173171997, "learning_rate": 6.214447913709906e-05, "loss": 0.015572643280029297, "step": 133420 }, { "epoch": 37.87397104740278, "grad_norm": 3.557738780975342, "learning_rate": 6.21416406471757e-05, "loss": 0.010625657439231873, "step": 133430 }, { "epoch": 37.876809537326146, "grad_norm": 0.0603630505502224, "learning_rate": 6.213880215725235e-05, "loss": 0.0009436272084712982, "step": 133440 }, { "epoch": 37.8796480272495, "grad_norm": 0.013316819444298744, "learning_rate": 6.213596366732899e-05, "loss": 0.0013118326663970948, "step": 133450 }, { "epoch": 37.882486517172865, "grad_norm": 0.16642142832279205, "learning_rate": 6.213312517740563e-05, "loss": 0.003584205359220505, "step": 133460 }, { "epoch": 37.88532500709623, "grad_norm": 1.0646674633026123, "learning_rate": 6.213028668748227e-05, "loss": 0.00677122175693512, "step": 133470 }, { "epoch": 37.88816349701958, "grad_norm": 0.07929782569408417, "learning_rate": 6.21274481975589e-05, "loss": 0.0007186515256762504, "step": 133480 }, { "epoch": 37.891001986942946, "grad_norm": 0.2734053432941437, "learning_rate": 6.212460970763554e-05, "loss": 0.003353336825966835, "step": 133490 }, { "epoch": 37.89384047686631, "grad_norm": 0.015667583793401718, "learning_rate": 6.212177121771218e-05, "loss": 0.005912724882364273, "step": 133500 }, { "epoch": 37.89384047686631, "eval_accuracy": 0.9806701850321103, "eval_loss": 0.0728323757648468, "eval_runtime": 35.8847, "eval_samples_per_second": 438.265, "eval_steps_per_second": 6.855, "step": 133500 }, { "epoch": 37.896678966789665, "grad_norm": 0.7623921632766724, "learning_rate": 6.211893272778881e-05, "loss": 0.0048240523785352705, "step": 133510 }, { "epoch": 37.89951745671303, "grad_norm": 6.479828357696533, "learning_rate": 6.211609423786546e-05, "loss": 0.005462398380041122, "step": 133520 }, { "epoch": 37.90235594663639, "grad_norm": 0.06578011065721512, "learning_rate": 6.21132557479421e-05, "loss": 0.01988785117864609, "step": 133530 }, { "epoch": 37.90519443655975, "grad_norm": 0.28915002942085266, "learning_rate": 6.211041725801873e-05, "loss": 0.011807148903608322, "step": 133540 }, { "epoch": 37.90803292648311, "grad_norm": 0.13223469257354736, "learning_rate": 6.210757876809537e-05, "loss": 0.010090868175029754, "step": 133550 }, { "epoch": 37.91087141640647, "grad_norm": 1.0641918182373047, "learning_rate": 6.210474027817201e-05, "loss": 0.00784827247262001, "step": 133560 }, { "epoch": 37.913709906329835, "grad_norm": 0.01878693886101246, "learning_rate": 6.210190178824866e-05, "loss": 0.004500758647918701, "step": 133570 }, { "epoch": 37.91654839625319, "grad_norm": 0.11864493042230606, "learning_rate": 6.209906329832528e-05, "loss": 0.012052728235721588, "step": 133580 }, { "epoch": 37.919386886176554, "grad_norm": 0.20167142152786255, "learning_rate": 6.209622480840194e-05, "loss": 0.004166138172149658, "step": 133590 }, { "epoch": 37.92222537609992, "grad_norm": 0.09240973740816116, "learning_rate": 6.209338631847858e-05, "loss": 0.0034779652953147886, "step": 133600 }, { "epoch": 37.92506386602327, "grad_norm": 0.43006765842437744, "learning_rate": 6.209054782855521e-05, "loss": 0.007181794196367264, "step": 133610 }, { "epoch": 37.927902355946635, "grad_norm": 5.598141670227051, "learning_rate": 6.208770933863185e-05, "loss": 0.0017601538449525834, "step": 133620 }, { "epoch": 37.93074084587, "grad_norm": 0.09927999973297119, "learning_rate": 6.208487084870849e-05, "loss": 0.0008703134953975677, "step": 133630 }, { "epoch": 37.93357933579336, "grad_norm": 0.1601196825504303, "learning_rate": 6.208203235878512e-05, "loss": 0.00384872630238533, "step": 133640 }, { "epoch": 37.93641782571672, "grad_norm": 0.03693526238203049, "learning_rate": 6.207919386886177e-05, "loss": 0.004108286648988724, "step": 133650 }, { "epoch": 37.93925631564008, "grad_norm": 0.19288627803325653, "learning_rate": 6.207635537893841e-05, "loss": 0.004169532656669616, "step": 133660 }, { "epoch": 37.94209480556344, "grad_norm": 0.7092088460922241, "learning_rate": 6.207351688901504e-05, "loss": 0.010420390963554382, "step": 133670 }, { "epoch": 37.9449332954868, "grad_norm": 0.17573139071464539, "learning_rate": 6.207067839909168e-05, "loss": 0.00048105865716934203, "step": 133680 }, { "epoch": 37.94777178541016, "grad_norm": 0.39503028988838196, "learning_rate": 6.206783990916833e-05, "loss": 0.004580898210406303, "step": 133690 }, { "epoch": 37.950610275333524, "grad_norm": 0.25896045565605164, "learning_rate": 6.206500141924497e-05, "loss": 0.004926422983407975, "step": 133700 }, { "epoch": 37.95344876525688, "grad_norm": 2.4578018188476562, "learning_rate": 6.20621629293216e-05, "loss": 0.0029869560152292252, "step": 133710 }, { "epoch": 37.95628725518024, "grad_norm": 0.0502304770052433, "learning_rate": 6.205932443939825e-05, "loss": 0.00618220753967762, "step": 133720 }, { "epoch": 37.959125745103606, "grad_norm": 0.16630424559116364, "learning_rate": 6.205648594947489e-05, "loss": 0.006726567447185516, "step": 133730 }, { "epoch": 37.96196423502697, "grad_norm": 0.04972125217318535, "learning_rate": 6.205364745955152e-05, "loss": 0.003979324549436569, "step": 133740 }, { "epoch": 37.964802724950324, "grad_norm": 0.06669221073389053, "learning_rate": 6.205080896962816e-05, "loss": 0.00627133697271347, "step": 133750 }, { "epoch": 37.96764121487369, "grad_norm": 0.05081760510802269, "learning_rate": 6.20479704797048e-05, "loss": 0.0005233563482761383, "step": 133760 }, { "epoch": 37.97047970479705, "grad_norm": 0.0988716408610344, "learning_rate": 6.204513198978143e-05, "loss": 0.001973474770784378, "step": 133770 }, { "epoch": 37.973318194720406, "grad_norm": 0.25183871388435364, "learning_rate": 6.204229349985807e-05, "loss": 0.001125209964811802, "step": 133780 }, { "epoch": 37.97615668464377, "grad_norm": 0.08365386724472046, "learning_rate": 6.203945500993473e-05, "loss": 0.0002557002007961273, "step": 133790 }, { "epoch": 37.97899517456713, "grad_norm": 3.237290620803833, "learning_rate": 6.203661652001135e-05, "loss": 0.0018150700256228448, "step": 133800 }, { "epoch": 37.981833664490495, "grad_norm": 0.037344787269830704, "learning_rate": 6.2033778030088e-05, "loss": 0.007084726542234421, "step": 133810 }, { "epoch": 37.98467215441385, "grad_norm": 0.08470568805932999, "learning_rate": 6.203093954016464e-05, "loss": 0.0040331616997718815, "step": 133820 }, { "epoch": 37.98751064433721, "grad_norm": 0.009448590688407421, "learning_rate": 6.202810105024128e-05, "loss": 0.0061150029301643375, "step": 133830 }, { "epoch": 37.990349134260576, "grad_norm": 1.639615535736084, "learning_rate": 6.20252625603179e-05, "loss": 0.0016608953475952149, "step": 133840 }, { "epoch": 37.99318762418393, "grad_norm": 0.09501732885837555, "learning_rate": 6.202242407039456e-05, "loss": 0.006733503937721252, "step": 133850 }, { "epoch": 37.996026114107295, "grad_norm": 0.14965805411338806, "learning_rate": 6.20195855804712e-05, "loss": 0.005885728821158409, "step": 133860 }, { "epoch": 37.99886460403066, "grad_norm": 0.0549590066075325, "learning_rate": 6.201674709054783e-05, "loss": 0.00476120114326477, "step": 133870 }, { "epoch": 38.00170309395401, "grad_norm": 0.016147568821907043, "learning_rate": 6.201390860062447e-05, "loss": 0.005403894186019898, "step": 133880 }, { "epoch": 38.004541583877376, "grad_norm": 0.01216079667210579, "learning_rate": 6.201107011070111e-05, "loss": 0.0012152742594480515, "step": 133890 }, { "epoch": 38.00738007380074, "grad_norm": 0.007163998205214739, "learning_rate": 6.200823162077774e-05, "loss": 0.0011362681165337563, "step": 133900 }, { "epoch": 38.0102185637241, "grad_norm": 0.013308556750416756, "learning_rate": 6.200539313085438e-05, "loss": 0.00028828810900449753, "step": 133910 }, { "epoch": 38.01305705364746, "grad_norm": 0.02380562573671341, "learning_rate": 6.200255464093104e-05, "loss": 0.0025730546563863753, "step": 133920 }, { "epoch": 38.01589554357082, "grad_norm": 0.33796218037605286, "learning_rate": 6.199971615100766e-05, "loss": 0.0024466406553983687, "step": 133930 }, { "epoch": 38.018734033494184, "grad_norm": 0.014639473520219326, "learning_rate": 6.19968776610843e-05, "loss": 0.0002517087385058403, "step": 133940 }, { "epoch": 38.02157252341754, "grad_norm": 0.08308983594179153, "learning_rate": 6.199403917116095e-05, "loss": 0.000586802139878273, "step": 133950 }, { "epoch": 38.0244110133409, "grad_norm": 0.05938781052827835, "learning_rate": 6.199120068123759e-05, "loss": 0.00058657918125391, "step": 133960 }, { "epoch": 38.027249503264265, "grad_norm": 9.076826095581055, "learning_rate": 6.198836219131422e-05, "loss": 0.005051193386316299, "step": 133970 }, { "epoch": 38.03008799318762, "grad_norm": 0.03726145252585411, "learning_rate": 6.198552370139087e-05, "loss": 0.008142074197530746, "step": 133980 }, { "epoch": 38.032926483110984, "grad_norm": 0.652902364730835, "learning_rate": 6.198268521146751e-05, "loss": 0.00262993648648262, "step": 133990 }, { "epoch": 38.03576497303435, "grad_norm": 0.1720522791147232, "learning_rate": 6.197984672154414e-05, "loss": 0.004017281532287598, "step": 134000 }, { "epoch": 38.03576497303435, "eval_accuracy": 0.9701786736186176, "eval_loss": 0.1255292445421219, "eval_runtime": 37.1429, "eval_samples_per_second": 423.419, "eval_steps_per_second": 6.623, "step": 134000 }, { "epoch": 38.03860346295771, "grad_norm": 0.5704836249351501, "learning_rate": 6.197700823162078e-05, "loss": 0.0024803854525089265, "step": 134010 }, { "epoch": 38.041441952881065, "grad_norm": 0.24338151514530182, "learning_rate": 6.197416974169742e-05, "loss": 0.0015908006578683853, "step": 134020 }, { "epoch": 38.04428044280443, "grad_norm": 0.01619868353009224, "learning_rate": 6.197133125177405e-05, "loss": 0.0011193597689270972, "step": 134030 }, { "epoch": 38.04711893272779, "grad_norm": 0.3558022975921631, "learning_rate": 6.196849276185069e-05, "loss": 0.00219272468239069, "step": 134040 }, { "epoch": 38.04995742265115, "grad_norm": 0.010441632010042667, "learning_rate": 6.196565427192735e-05, "loss": 0.0005368759855628013, "step": 134050 }, { "epoch": 38.05279591257451, "grad_norm": 0.1674298644065857, "learning_rate": 6.196281578200397e-05, "loss": 0.000998491607606411, "step": 134060 }, { "epoch": 38.05563440249787, "grad_norm": 0.036640267819166183, "learning_rate": 6.195997729208062e-05, "loss": 0.005657176300883293, "step": 134070 }, { "epoch": 38.05847289242123, "grad_norm": 5.0986647605896, "learning_rate": 6.195713880215726e-05, "loss": 0.0019425775855779648, "step": 134080 }, { "epoch": 38.06131138234459, "grad_norm": 0.027088329195976257, "learning_rate": 6.19543003122339e-05, "loss": 0.0021879451349377634, "step": 134090 }, { "epoch": 38.064149872267954, "grad_norm": 0.7955424189567566, "learning_rate": 6.195146182231053e-05, "loss": 0.0014551440253853798, "step": 134100 }, { "epoch": 38.06698836219132, "grad_norm": 2.3550095558166504, "learning_rate": 6.194862333238717e-05, "loss": 0.0031470462679862978, "step": 134110 }, { "epoch": 38.06982685211467, "grad_norm": 1.430289387702942, "learning_rate": 6.194578484246382e-05, "loss": 0.0012683749198913574, "step": 134120 }, { "epoch": 38.072665342038036, "grad_norm": 6.015800952911377, "learning_rate": 6.194294635254045e-05, "loss": 0.00372002050280571, "step": 134130 }, { "epoch": 38.0755038319614, "grad_norm": 0.7207737565040588, "learning_rate": 6.194010786261709e-05, "loss": 0.007505136728286743, "step": 134140 }, { "epoch": 38.078342321884755, "grad_norm": 1.0534330606460571, "learning_rate": 6.193726937269373e-05, "loss": 0.001972649060189724, "step": 134150 }, { "epoch": 38.08118081180812, "grad_norm": 0.1450037658214569, "learning_rate": 6.193443088277036e-05, "loss": 0.004161333665251732, "step": 134160 }, { "epoch": 38.08401930173148, "grad_norm": 0.04175648093223572, "learning_rate": 6.1931592392847e-05, "loss": 0.008084569126367569, "step": 134170 }, { "epoch": 38.086857791654836, "grad_norm": 3.109922170639038, "learning_rate": 6.192875390292366e-05, "loss": 0.0024585116654634474, "step": 134180 }, { "epoch": 38.0896962815782, "grad_norm": 0.5983924865722656, "learning_rate": 6.192591541300029e-05, "loss": 0.003848155587911606, "step": 134190 }, { "epoch": 38.09253477150156, "grad_norm": 5.216207027435303, "learning_rate": 6.192307692307693e-05, "loss": 0.006480614095926285, "step": 134200 }, { "epoch": 38.095373261424925, "grad_norm": 0.11704295128583908, "learning_rate": 6.192023843315357e-05, "loss": 0.0007262568920850754, "step": 134210 }, { "epoch": 38.09821175134828, "grad_norm": 0.01690094731748104, "learning_rate": 6.191739994323021e-05, "loss": 0.00031814202666282656, "step": 134220 }, { "epoch": 38.10105024127164, "grad_norm": 0.5934534072875977, "learning_rate": 6.191456145330684e-05, "loss": 0.0011080393567681312, "step": 134230 }, { "epoch": 38.103888731195006, "grad_norm": 0.24051523208618164, "learning_rate": 6.191172296338348e-05, "loss": 0.009291982650756836, "step": 134240 }, { "epoch": 38.10672722111836, "grad_norm": 0.13735246658325195, "learning_rate": 6.190888447346012e-05, "loss": 0.004997803270816803, "step": 134250 }, { "epoch": 38.109565711041725, "grad_norm": 0.06422114372253418, "learning_rate": 6.190604598353676e-05, "loss": 0.0022265076637268066, "step": 134260 }, { "epoch": 38.11240420096509, "grad_norm": 0.8239644169807434, "learning_rate": 6.19032074936134e-05, "loss": 0.003969861567020417, "step": 134270 }, { "epoch": 38.11524269088845, "grad_norm": 1.2365531921386719, "learning_rate": 6.190036900369004e-05, "loss": 0.001142777130007744, "step": 134280 }, { "epoch": 38.11808118081181, "grad_norm": 9.364213943481445, "learning_rate": 6.189753051376667e-05, "loss": 0.00826614648103714, "step": 134290 }, { "epoch": 38.12091967073517, "grad_norm": 0.011851593852043152, "learning_rate": 6.189469202384331e-05, "loss": 0.00044023115187883375, "step": 134300 }, { "epoch": 38.12375816065853, "grad_norm": 0.007878848351538181, "learning_rate": 6.189185353391996e-05, "loss": 0.00034099332988262175, "step": 134310 }, { "epoch": 38.12659665058189, "grad_norm": 0.048633236438035965, "learning_rate": 6.18890150439966e-05, "loss": 0.0011423291638493539, "step": 134320 }, { "epoch": 38.12943514050525, "grad_norm": 0.0715763047337532, "learning_rate": 6.188617655407324e-05, "loss": 0.002662939578294754, "step": 134330 }, { "epoch": 38.132273630428614, "grad_norm": 0.028371069580316544, "learning_rate": 6.188333806414988e-05, "loss": 0.00036171264946460724, "step": 134340 }, { "epoch": 38.13511212035197, "grad_norm": 0.0041834888979792595, "learning_rate": 6.188049957422651e-05, "loss": 0.0025777619332075117, "step": 134350 }, { "epoch": 38.13795061027533, "grad_norm": 0.5686787962913513, "learning_rate": 6.187766108430315e-05, "loss": 0.0005776805803179741, "step": 134360 }, { "epoch": 38.140789100198695, "grad_norm": 0.06691711395978928, "learning_rate": 6.187482259437979e-05, "loss": 0.009525726735591888, "step": 134370 }, { "epoch": 38.14362759012206, "grad_norm": 0.04511043801903725, "learning_rate": 6.187198410445643e-05, "loss": 0.0013189801946282386, "step": 134380 }, { "epoch": 38.146466080045414, "grad_norm": 0.009178542532026768, "learning_rate": 6.186914561453307e-05, "loss": 0.00035865996032953263, "step": 134390 }, { "epoch": 38.14930456996878, "grad_norm": 0.044690974056720734, "learning_rate": 6.186630712460971e-05, "loss": 0.0016815252602100372, "step": 134400 }, { "epoch": 38.15214305989214, "grad_norm": 0.9054922461509705, "learning_rate": 6.186346863468636e-05, "loss": 0.01102529764175415, "step": 134410 }, { "epoch": 38.154981549815496, "grad_norm": 0.015711108222603798, "learning_rate": 6.186063014476298e-05, "loss": 0.0008436299860477448, "step": 134420 }, { "epoch": 38.15782003973886, "grad_norm": 0.020173439756035805, "learning_rate": 6.185779165483962e-05, "loss": 0.012356314063072204, "step": 134430 }, { "epoch": 38.16065852966222, "grad_norm": 0.013480287976562977, "learning_rate": 6.185495316491627e-05, "loss": 0.011051062494516373, "step": 134440 }, { "epoch": 38.16349701958558, "grad_norm": 4.529234409332275, "learning_rate": 6.185211467499291e-05, "loss": 0.0019994623959064483, "step": 134450 }, { "epoch": 38.16633550950894, "grad_norm": 0.18400196731090546, "learning_rate": 6.184927618506955e-05, "loss": 0.006748384237289429, "step": 134460 }, { "epoch": 38.1691739994323, "grad_norm": 0.7510069012641907, "learning_rate": 6.184643769514619e-05, "loss": 0.0036686722189188, "step": 134470 }, { "epoch": 38.172012489355666, "grad_norm": 0.006407460663467646, "learning_rate": 6.184359920522282e-05, "loss": 0.005457547307014465, "step": 134480 }, { "epoch": 38.17485097927902, "grad_norm": 0.5608824491500854, "learning_rate": 6.184076071529946e-05, "loss": 0.0010272521525621415, "step": 134490 }, { "epoch": 38.177689469202384, "grad_norm": 0.07036680728197098, "learning_rate": 6.18379222253761e-05, "loss": 0.0005003783851861953, "step": 134500 }, { "epoch": 38.177689469202384, "eval_accuracy": 0.9800979207731926, "eval_loss": 0.07514756172895432, "eval_runtime": 38.8595, "eval_samples_per_second": 404.714, "eval_steps_per_second": 6.33, "step": 134500 }, { "epoch": 38.18052795912575, "grad_norm": 0.7685106992721558, "learning_rate": 6.183508373545274e-05, "loss": 0.0005272414535284042, "step": 134510 }, { "epoch": 38.1833664490491, "grad_norm": 0.013270623981952667, "learning_rate": 6.183224524552938e-05, "loss": 0.012999813258647918, "step": 134520 }, { "epoch": 38.186204938972466, "grad_norm": 2.634406805038452, "learning_rate": 6.182940675560602e-05, "loss": 0.0014631178230047226, "step": 134530 }, { "epoch": 38.18904342889583, "grad_norm": 0.13247467577457428, "learning_rate": 6.182656826568267e-05, "loss": 0.0013133818283677101, "step": 134540 }, { "epoch": 38.191881918819185, "grad_norm": 13.219198226928711, "learning_rate": 6.18237297757593e-05, "loss": 0.013120271265506744, "step": 134550 }, { "epoch": 38.19472040874255, "grad_norm": 0.20869851112365723, "learning_rate": 6.182089128583594e-05, "loss": 0.00650833398103714, "step": 134560 }, { "epoch": 38.19755889866591, "grad_norm": 7.039749622344971, "learning_rate": 6.181805279591258e-05, "loss": 0.004609691351652146, "step": 134570 }, { "epoch": 38.20039738858927, "grad_norm": 0.08843182027339935, "learning_rate": 6.181521430598922e-05, "loss": 0.00158722884953022, "step": 134580 }, { "epoch": 38.20323587851263, "grad_norm": 0.15220847725868225, "learning_rate": 6.181237581606586e-05, "loss": 0.0008817510679364204, "step": 134590 }, { "epoch": 38.20607436843599, "grad_norm": 10.84158706665039, "learning_rate": 6.18095373261425e-05, "loss": 0.0024317096918821334, "step": 134600 }, { "epoch": 38.208912858359355, "grad_norm": 0.10404372960329056, "learning_rate": 6.180669883621913e-05, "loss": 0.004131144285202027, "step": 134610 }, { "epoch": 38.21175134828271, "grad_norm": 0.043723661452531815, "learning_rate": 6.180386034629577e-05, "loss": 0.0011300047859549522, "step": 134620 }, { "epoch": 38.214589838206074, "grad_norm": 0.033359166234731674, "learning_rate": 6.180102185637241e-05, "loss": 0.018344712257385255, "step": 134630 }, { "epoch": 38.21742832812944, "grad_norm": 6.860023021697998, "learning_rate": 6.179818336644905e-05, "loss": 0.002460017427802086, "step": 134640 }, { "epoch": 38.2202668180528, "grad_norm": 0.05471000075340271, "learning_rate": 6.17953448765257e-05, "loss": 0.005373214930295944, "step": 134650 }, { "epoch": 38.223105307976155, "grad_norm": 0.19530485570430756, "learning_rate": 6.179250638660234e-05, "loss": 0.0010156616568565368, "step": 134660 }, { "epoch": 38.22594379789952, "grad_norm": 2.2752082347869873, "learning_rate": 6.178966789667898e-05, "loss": 0.005034676566720009, "step": 134670 }, { "epoch": 38.22878228782288, "grad_norm": 10.417679786682129, "learning_rate": 6.17868294067556e-05, "loss": 0.0029569165781140327, "step": 134680 }, { "epoch": 38.23162077774624, "grad_norm": 0.08573775738477707, "learning_rate": 6.178399091683225e-05, "loss": 0.0007379787042737007, "step": 134690 }, { "epoch": 38.2344592676696, "grad_norm": 13.288413047790527, "learning_rate": 6.178115242690889e-05, "loss": 0.0074644781649112705, "step": 134700 }, { "epoch": 38.23729775759296, "grad_norm": 0.04681917279958725, "learning_rate": 6.177831393698552e-05, "loss": 0.0011805811896920203, "step": 134710 }, { "epoch": 38.24013624751632, "grad_norm": 0.18806318938732147, "learning_rate": 6.177547544706217e-05, "loss": 0.0008793771266937256, "step": 134720 }, { "epoch": 38.24297473743968, "grad_norm": 8.423418998718262, "learning_rate": 6.177263695713881e-05, "loss": 0.010218337178230286, "step": 134730 }, { "epoch": 38.245813227363044, "grad_norm": 0.43296170234680176, "learning_rate": 6.176979846721544e-05, "loss": 0.0072412282228469845, "step": 134740 }, { "epoch": 38.24865171728641, "grad_norm": 0.12988637387752533, "learning_rate": 6.176695997729208e-05, "loss": 0.0038865096867084503, "step": 134750 }, { "epoch": 38.25149020720976, "grad_norm": 0.13057474792003632, "learning_rate": 6.176412148736872e-05, "loss": 0.003936364501714707, "step": 134760 }, { "epoch": 38.254328697133126, "grad_norm": 0.1352299451828003, "learning_rate": 6.176128299744536e-05, "loss": 0.0029355498030781747, "step": 134770 }, { "epoch": 38.25716718705649, "grad_norm": 0.43168404698371887, "learning_rate": 6.1758444507522e-05, "loss": 0.009353984147310257, "step": 134780 }, { "epoch": 38.260005676979844, "grad_norm": 9.571249961853027, "learning_rate": 6.175560601759865e-05, "loss": 0.018196867406368257, "step": 134790 }, { "epoch": 38.26284416690321, "grad_norm": 0.4395882189273834, "learning_rate": 6.175276752767529e-05, "loss": 0.004565372318029404, "step": 134800 }, { "epoch": 38.26568265682657, "grad_norm": 12.108805656433105, "learning_rate": 6.174992903775192e-05, "loss": 0.004291390627622604, "step": 134810 }, { "epoch": 38.268521146749926, "grad_norm": 0.037395477294921875, "learning_rate": 6.174709054782856e-05, "loss": 0.0007753729820251465, "step": 134820 }, { "epoch": 38.27135963667329, "grad_norm": 0.04697021096944809, "learning_rate": 6.17442520579052e-05, "loss": 0.005984028428792953, "step": 134830 }, { "epoch": 38.27419812659665, "grad_norm": 2.053705930709839, "learning_rate": 6.174141356798183e-05, "loss": 0.001252765767276287, "step": 134840 }, { "epoch": 38.277036616520014, "grad_norm": 4.910493850708008, "learning_rate": 6.173857507805848e-05, "loss": 0.004214205965399742, "step": 134850 }, { "epoch": 38.27987510644337, "grad_norm": 0.11034940183162689, "learning_rate": 6.173573658813512e-05, "loss": 0.0019898418337106704, "step": 134860 }, { "epoch": 38.28271359636673, "grad_norm": 0.08946134895086288, "learning_rate": 6.173289809821175e-05, "loss": 0.004921445623040199, "step": 134870 }, { "epoch": 38.285552086290096, "grad_norm": 0.7519835829734802, "learning_rate": 6.173005960828839e-05, "loss": 0.00910932570695877, "step": 134880 }, { "epoch": 38.28839057621345, "grad_norm": 0.030343521386384964, "learning_rate": 6.172722111836503e-05, "loss": 0.003093700483441353, "step": 134890 }, { "epoch": 38.291229066136815, "grad_norm": 17.63006019592285, "learning_rate": 6.172438262844167e-05, "loss": 0.01694863885641098, "step": 134900 }, { "epoch": 38.29406755606018, "grad_norm": 0.5154672265052795, "learning_rate": 6.17215441385183e-05, "loss": 0.008747769147157669, "step": 134910 }, { "epoch": 38.29690604598353, "grad_norm": 0.5369908809661865, "learning_rate": 6.171870564859496e-05, "loss": 0.0005744501948356628, "step": 134920 }, { "epoch": 38.299744535906896, "grad_norm": 6.321199893951416, "learning_rate": 6.17158671586716e-05, "loss": 0.015514123439788818, "step": 134930 }, { "epoch": 38.30258302583026, "grad_norm": 0.009691588580608368, "learning_rate": 6.171302866874823e-05, "loss": 0.0015963349491357804, "step": 134940 }, { "epoch": 38.30542151575362, "grad_norm": 0.0010229273466393352, "learning_rate": 6.171019017882487e-05, "loss": 0.012143309414386749, "step": 134950 }, { "epoch": 38.30826000567698, "grad_norm": 1.5451672077178955, "learning_rate": 6.170735168890151e-05, "loss": 0.0018059102818369865, "step": 134960 }, { "epoch": 38.31109849560034, "grad_norm": 0.10877674072980881, "learning_rate": 6.170451319897814e-05, "loss": 0.0014419401064515113, "step": 134970 }, { "epoch": 38.3139369855237, "grad_norm": 1.7151204347610474, "learning_rate": 6.170167470905479e-05, "loss": 0.0027458230033516884, "step": 134980 }, { "epoch": 38.31677547544706, "grad_norm": 0.01628752611577511, "learning_rate": 6.169883621913143e-05, "loss": 0.002445701137185097, "step": 134990 }, { "epoch": 38.31961396537042, "grad_norm": 0.008918319828808308, "learning_rate": 6.169599772920806e-05, "loss": 0.0014657875522971152, "step": 135000 }, { "epoch": 38.31961396537042, "eval_accuracy": 0.9793984866789598, "eval_loss": 0.07885421812534332, "eval_runtime": 34.0814, "eval_samples_per_second": 461.454, "eval_steps_per_second": 7.218, "step": 135000 }, { "epoch": 38.322452455293785, "grad_norm": 0.0557883195579052, "learning_rate": 6.16931592392847e-05, "loss": 0.0014591848477721215, "step": 135010 }, { "epoch": 38.32529094521715, "grad_norm": 0.060493942350149155, "learning_rate": 6.169032074936134e-05, "loss": 0.002287701144814491, "step": 135020 }, { "epoch": 38.328129435140504, "grad_norm": 0.059983495622873306, "learning_rate": 6.168748225943798e-05, "loss": 0.0013097934424877167, "step": 135030 }, { "epoch": 38.33096792506387, "grad_norm": 0.07858454436063766, "learning_rate": 6.168464376951461e-05, "loss": 0.0017025157809257506, "step": 135040 }, { "epoch": 38.33380641498723, "grad_norm": 0.11978402733802795, "learning_rate": 6.168180527959127e-05, "loss": 0.006183784827589989, "step": 135050 }, { "epoch": 38.336644904910585, "grad_norm": 0.024973878636956215, "learning_rate": 6.167896678966791e-05, "loss": 0.0004359520971775055, "step": 135060 }, { "epoch": 38.33948339483395, "grad_norm": 8.499809265136719, "learning_rate": 6.167612829974454e-05, "loss": 0.007199577987194061, "step": 135070 }, { "epoch": 38.34232188475731, "grad_norm": 0.04775330424308777, "learning_rate": 6.167328980982118e-05, "loss": 0.00197543203830719, "step": 135080 }, { "epoch": 38.34516037468067, "grad_norm": 0.5895096063613892, "learning_rate": 6.167045131989782e-05, "loss": 0.0011191202327609062, "step": 135090 }, { "epoch": 38.34799886460403, "grad_norm": 0.01867276430130005, "learning_rate": 6.166761282997445e-05, "loss": 0.004035360738635063, "step": 135100 }, { "epoch": 38.35083735452739, "grad_norm": 2.4559810161590576, "learning_rate": 6.16647743400511e-05, "loss": 0.0011432092636823655, "step": 135110 }, { "epoch": 38.353675844450756, "grad_norm": 0.2182047814130783, "learning_rate": 6.166193585012774e-05, "loss": 0.00046364106237888335, "step": 135120 }, { "epoch": 38.35651433437411, "grad_norm": 0.00804863404482603, "learning_rate": 6.165909736020437e-05, "loss": 0.013024073839187623, "step": 135130 }, { "epoch": 38.359352824297474, "grad_norm": 0.7174137830734253, "learning_rate": 6.165625887028101e-05, "loss": 0.0032315075397491453, "step": 135140 }, { "epoch": 38.36219131422084, "grad_norm": 1.5369774103164673, "learning_rate": 6.165342038035765e-05, "loss": 0.0017221873626112939, "step": 135150 }, { "epoch": 38.36502980414419, "grad_norm": 8.77385425567627, "learning_rate": 6.16505818904343e-05, "loss": 0.016091370582580568, "step": 135160 }, { "epoch": 38.367868294067556, "grad_norm": 5.624913692474365, "learning_rate": 6.164774340051092e-05, "loss": 0.004078460484743118, "step": 135170 }, { "epoch": 38.37070678399092, "grad_norm": 7.206894397735596, "learning_rate": 6.164490491058758e-05, "loss": 0.0048384018242359165, "step": 135180 }, { "epoch": 38.373545273914274, "grad_norm": 3.7872066497802734, "learning_rate": 6.16420664206642e-05, "loss": 0.008958667516708374, "step": 135190 }, { "epoch": 38.37638376383764, "grad_norm": 2.8205859661102295, "learning_rate": 6.163922793074085e-05, "loss": 0.01591135412454605, "step": 135200 }, { "epoch": 38.379222253761, "grad_norm": 0.20070794224739075, "learning_rate": 6.163638944081749e-05, "loss": 0.004640296101570129, "step": 135210 }, { "epoch": 38.38206074368436, "grad_norm": 0.5196464657783508, "learning_rate": 6.163355095089413e-05, "loss": 0.0078135646879673, "step": 135220 }, { "epoch": 38.38489923360772, "grad_norm": 0.25022977590560913, "learning_rate": 6.163071246097076e-05, "loss": 0.005290189385414123, "step": 135230 }, { "epoch": 38.38773772353108, "grad_norm": 0.1503753960132599, "learning_rate": 6.16278739710474e-05, "loss": 0.0057117760181427, "step": 135240 }, { "epoch": 38.390576213454445, "grad_norm": 0.6074120402336121, "learning_rate": 6.162503548112405e-05, "loss": 0.011189694702625274, "step": 135250 }, { "epoch": 38.3934147033778, "grad_norm": 0.25988927483558655, "learning_rate": 6.162219699120068e-05, "loss": 0.0009357897564768791, "step": 135260 }, { "epoch": 38.39625319330116, "grad_norm": 0.7073507905006409, "learning_rate": 6.161935850127732e-05, "loss": 0.004007235541939736, "step": 135270 }, { "epoch": 38.399091683224526, "grad_norm": 1.0620476007461548, "learning_rate": 6.161652001135397e-05, "loss": 0.000672135315835476, "step": 135280 }, { "epoch": 38.40193017314788, "grad_norm": 1.3668956756591797, "learning_rate": 6.161368152143059e-05, "loss": 0.006332039088010788, "step": 135290 }, { "epoch": 38.404768663071245, "grad_norm": 0.540117084980011, "learning_rate": 6.161084303150723e-05, "loss": 0.0021289311349391936, "step": 135300 }, { "epoch": 38.40760715299461, "grad_norm": 0.08699240535497665, "learning_rate": 6.160800454158389e-05, "loss": 0.004479007050395012, "step": 135310 }, { "epoch": 38.41044564291797, "grad_norm": 9.054739952087402, "learning_rate": 6.160516605166052e-05, "loss": 0.00219569131731987, "step": 135320 }, { "epoch": 38.413284132841326, "grad_norm": 0.06463000923395157, "learning_rate": 6.160232756173716e-05, "loss": 0.0043089866638183595, "step": 135330 }, { "epoch": 38.41612262276469, "grad_norm": 0.6567022204399109, "learning_rate": 6.15994890718138e-05, "loss": 0.0002683555707335472, "step": 135340 }, { "epoch": 38.41896111268805, "grad_norm": 0.282825231552124, "learning_rate": 6.159665058189044e-05, "loss": 0.000505584292113781, "step": 135350 }, { "epoch": 38.42179960261141, "grad_norm": 0.055842604488134384, "learning_rate": 6.159381209196707e-05, "loss": 0.010624422132968903, "step": 135360 }, { "epoch": 38.42463809253477, "grad_norm": 0.7332379221916199, "learning_rate": 6.159097360204371e-05, "loss": 0.0039327584207057955, "step": 135370 }, { "epoch": 38.427476582458134, "grad_norm": 3.8074145317077637, "learning_rate": 6.158813511212037e-05, "loss": 0.003082932159304619, "step": 135380 }, { "epoch": 38.43031507238149, "grad_norm": 0.07329288870096207, "learning_rate": 6.158529662219699e-05, "loss": 0.0005435394123196602, "step": 135390 }, { "epoch": 38.43315356230485, "grad_norm": 0.272576242685318, "learning_rate": 6.158245813227363e-05, "loss": 0.002139926888048649, "step": 135400 }, { "epoch": 38.435992052228215, "grad_norm": 1.9506707191467285, "learning_rate": 6.157961964235028e-05, "loss": 0.007296667248010635, "step": 135410 }, { "epoch": 38.43883054215158, "grad_norm": 0.2818351686000824, "learning_rate": 6.15767811524269e-05, "loss": 0.0019269997254014015, "step": 135420 }, { "epoch": 38.441669032074934, "grad_norm": 0.031361132860183716, "learning_rate": 6.157394266250355e-05, "loss": 0.005898875370621681, "step": 135430 }, { "epoch": 38.4445075219983, "grad_norm": 0.17507071793079376, "learning_rate": 6.157110417258019e-05, "loss": 0.00501708984375, "step": 135440 }, { "epoch": 38.44734601192166, "grad_norm": 0.04505246877670288, "learning_rate": 6.156826568265683e-05, "loss": 0.02489379048347473, "step": 135450 }, { "epoch": 38.450184501845015, "grad_norm": 3.2420897483825684, "learning_rate": 6.156542719273347e-05, "loss": 0.009531221538782119, "step": 135460 }, { "epoch": 38.45302299176838, "grad_norm": 0.10979259759187698, "learning_rate": 6.156258870281011e-05, "loss": 0.018041130900382996, "step": 135470 }, { "epoch": 38.45586148169174, "grad_norm": 0.297700434923172, "learning_rate": 6.155975021288675e-05, "loss": 0.01232541874051094, "step": 135480 }, { "epoch": 38.458699971615104, "grad_norm": 0.7644086480140686, "learning_rate": 6.155691172296338e-05, "loss": 0.009905794262886047, "step": 135490 }, { "epoch": 38.46153846153846, "grad_norm": 13.091300010681152, "learning_rate": 6.155407323304002e-05, "loss": 0.0078253835439682, "step": 135500 }, { "epoch": 38.46153846153846, "eval_accuracy": 0.9786354676670693, "eval_loss": 0.0809955969452858, "eval_runtime": 35.6049, "eval_samples_per_second": 441.709, "eval_steps_per_second": 6.909, "step": 135500 }, { "epoch": 38.46437695146182, "grad_norm": 1.2811510562896729, "learning_rate": 6.155123474311668e-05, "loss": 0.006386943161487579, "step": 135510 }, { "epoch": 38.467215441385186, "grad_norm": 4.524451732635498, "learning_rate": 6.15483962531933e-05, "loss": 0.005774716287851334, "step": 135520 }, { "epoch": 38.47005393130854, "grad_norm": 0.7766211032867432, "learning_rate": 6.154555776326995e-05, "loss": 0.003128451481461525, "step": 135530 }, { "epoch": 38.472892421231904, "grad_norm": 2.357187032699585, "learning_rate": 6.154271927334659e-05, "loss": 0.019924332201480866, "step": 135540 }, { "epoch": 38.47573091115527, "grad_norm": 0.07996172457933426, "learning_rate": 6.153988078342321e-05, "loss": 0.0038073133677244187, "step": 135550 }, { "epoch": 38.47856940107862, "grad_norm": 0.492992639541626, "learning_rate": 6.153704229349986e-05, "loss": 0.002892624959349632, "step": 135560 }, { "epoch": 38.481407891001986, "grad_norm": 0.13809099793434143, "learning_rate": 6.15342038035765e-05, "loss": 0.0022659165784716605, "step": 135570 }, { "epoch": 38.48424638092535, "grad_norm": 0.010095041245222092, "learning_rate": 6.153136531365314e-05, "loss": 0.003166145086288452, "step": 135580 }, { "epoch": 38.48708487084871, "grad_norm": 12.58181381225586, "learning_rate": 6.152852682372978e-05, "loss": 0.008240599185228348, "step": 135590 }, { "epoch": 38.48992336077207, "grad_norm": 0.753507137298584, "learning_rate": 6.152568833380642e-05, "loss": 0.008493784070014953, "step": 135600 }, { "epoch": 38.49276185069543, "grad_norm": 21.262910842895508, "learning_rate": 6.152284984388306e-05, "loss": 0.023380067944526673, "step": 135610 }, { "epoch": 38.49560034061879, "grad_norm": 0.020773837342858315, "learning_rate": 6.152001135395969e-05, "loss": 0.025066441297531127, "step": 135620 }, { "epoch": 38.49843883054215, "grad_norm": 5.179233551025391, "learning_rate": 6.151717286403633e-05, "loss": 0.005713867768645286, "step": 135630 }, { "epoch": 38.50127732046551, "grad_norm": 0.023110590875148773, "learning_rate": 6.151433437411297e-05, "loss": 0.011672721058130265, "step": 135640 }, { "epoch": 38.504115810388875, "grad_norm": 10.621397018432617, "learning_rate": 6.151149588418961e-05, "loss": 0.010486581176519395, "step": 135650 }, { "epoch": 38.50695430031223, "grad_norm": 2.4289238452911377, "learning_rate": 6.150865739426626e-05, "loss": 0.0014154823496937753, "step": 135660 }, { "epoch": 38.50979279023559, "grad_norm": 4.998222827911377, "learning_rate": 6.15058189043429e-05, "loss": 0.02282567471265793, "step": 135670 }, { "epoch": 38.512631280158956, "grad_norm": 1.2325907945632935, "learning_rate": 6.150298041441953e-05, "loss": 0.0034927237778902055, "step": 135680 }, { "epoch": 38.51546977008232, "grad_norm": 2.2471811771392822, "learning_rate": 6.150014192449617e-05, "loss": 0.00316791906952858, "step": 135690 }, { "epoch": 38.518308260005675, "grad_norm": 0.23818141222000122, "learning_rate": 6.149730343457281e-05, "loss": 0.0014110276475548743, "step": 135700 }, { "epoch": 38.52114674992904, "grad_norm": 0.07162126153707504, "learning_rate": 6.149446494464945e-05, "loss": 0.0008360292762517929, "step": 135710 }, { "epoch": 38.5239852398524, "grad_norm": 0.5418238639831543, "learning_rate": 6.149162645472609e-05, "loss": 0.005330777168273926, "step": 135720 }, { "epoch": 38.52682372977576, "grad_norm": 0.07404498755931854, "learning_rate": 6.148878796480273e-05, "loss": 0.01662631779909134, "step": 135730 }, { "epoch": 38.52966221969912, "grad_norm": 0.18770992755889893, "learning_rate": 6.14862333238717e-05, "loss": 0.010503332316875457, "step": 135740 }, { "epoch": 38.53250070962248, "grad_norm": 0.2558671236038208, "learning_rate": 6.148339483394834e-05, "loss": 0.003346291184425354, "step": 135750 }, { "epoch": 38.535339199545845, "grad_norm": 0.33388751745224, "learning_rate": 6.148055634402498e-05, "loss": 0.00508473590016365, "step": 135760 }, { "epoch": 38.5381776894692, "grad_norm": 0.127977192401886, "learning_rate": 6.147771785410162e-05, "loss": 0.0002908051013946533, "step": 135770 }, { "epoch": 38.541016179392564, "grad_norm": 0.05396968498826027, "learning_rate": 6.147487936417826e-05, "loss": 0.009789107739925385, "step": 135780 }, { "epoch": 38.54385466931593, "grad_norm": 0.07871465384960175, "learning_rate": 6.14720408742549e-05, "loss": 0.008186904340982437, "step": 135790 }, { "epoch": 38.54669315923928, "grad_norm": 0.4141167402267456, "learning_rate": 6.146920238433153e-05, "loss": 0.004321401193737984, "step": 135800 }, { "epoch": 38.549531649162645, "grad_norm": 0.2252979278564453, "learning_rate": 6.146636389440817e-05, "loss": 0.0024842195212841033, "step": 135810 }, { "epoch": 38.55237013908601, "grad_norm": 0.3696064054965973, "learning_rate": 6.146352540448482e-05, "loss": 0.01053498089313507, "step": 135820 }, { "epoch": 38.555208629009364, "grad_norm": 0.030721038579940796, "learning_rate": 6.146068691456146e-05, "loss": 0.004423481971025467, "step": 135830 }, { "epoch": 38.55804711893273, "grad_norm": 0.020306497812271118, "learning_rate": 6.14578484246381e-05, "loss": 0.00504951998591423, "step": 135840 }, { "epoch": 38.56088560885609, "grad_norm": 0.06588511914014816, "learning_rate": 6.145500993471474e-05, "loss": 0.0011363631114363671, "step": 135850 }, { "epoch": 38.56372409877945, "grad_norm": 0.15579186379909515, "learning_rate": 6.145217144479137e-05, "loss": 0.0033096436411142348, "step": 135860 }, { "epoch": 38.56656258870281, "grad_norm": 10.890098571777344, "learning_rate": 6.144933295486801e-05, "loss": 0.007185662537813187, "step": 135870 }, { "epoch": 38.56940107862617, "grad_norm": 1.9572958946228027, "learning_rate": 6.144649446494465e-05, "loss": 0.0026154739782214163, "step": 135880 }, { "epoch": 38.572239568549534, "grad_norm": 13.790983200073242, "learning_rate": 6.144365597502129e-05, "loss": 0.018256977200508118, "step": 135890 }, { "epoch": 38.57507805847289, "grad_norm": 0.4719744324684143, "learning_rate": 6.144081748509793e-05, "loss": 0.0005313476547598838, "step": 135900 }, { "epoch": 38.57791654839625, "grad_norm": 0.46553462743759155, "learning_rate": 6.143797899517457e-05, "loss": 0.00719987004995346, "step": 135910 }, { "epoch": 38.580755038319616, "grad_norm": 0.08767589926719666, "learning_rate": 6.143514050525122e-05, "loss": 0.004509957134723663, "step": 135920 }, { "epoch": 38.58359352824297, "grad_norm": 0.31327006220817566, "learning_rate": 6.143230201532784e-05, "loss": 0.0029159488156437876, "step": 135930 }, { "epoch": 38.586432018166335, "grad_norm": 0.41613295674324036, "learning_rate": 6.142946352540449e-05, "loss": 0.010219283401966095, "step": 135940 }, { "epoch": 38.5892705080897, "grad_norm": 0.8579860925674438, "learning_rate": 6.142662503548113e-05, "loss": 0.004779508337378502, "step": 135950 }, { "epoch": 38.59210899801306, "grad_norm": 0.0146689023822546, "learning_rate": 6.142378654555777e-05, "loss": 0.0019050244241952895, "step": 135960 }, { "epoch": 38.594947487936416, "grad_norm": 1.7135367393493652, "learning_rate": 6.142094805563441e-05, "loss": 0.00257427878677845, "step": 135970 }, { "epoch": 38.59778597785978, "grad_norm": 4.078480243682861, "learning_rate": 6.141810956571105e-05, "loss": 0.0017132395878434181, "step": 135980 }, { "epoch": 38.60062446778314, "grad_norm": 10.821667671203613, "learning_rate": 6.141527107578768e-05, "loss": 0.008198779821395875, "step": 135990 }, { "epoch": 38.6034629577065, "grad_norm": 9.991869926452637, "learning_rate": 6.141243258586432e-05, "loss": 0.004455386847257614, "step": 136000 }, { "epoch": 38.6034629577065, "eval_accuracy": 0.9783175430787817, "eval_loss": 0.08232114464044571, "eval_runtime": 35.2607, "eval_samples_per_second": 446.021, "eval_steps_per_second": 6.977, "step": 136000 }, { "epoch": 38.60630144762986, "grad_norm": 0.35044077038764954, "learning_rate": 6.140959409594096e-05, "loss": 0.004070858657360077, "step": 136010 }, { "epoch": 38.60913993755322, "grad_norm": 0.08316246420145035, "learning_rate": 6.14067556060176e-05, "loss": 0.013470789790153504, "step": 136020 }, { "epoch": 38.61197842747658, "grad_norm": 0.07496082037687302, "learning_rate": 6.140391711609424e-05, "loss": 0.0041647210717201235, "step": 136030 }, { "epoch": 38.61481691739994, "grad_norm": 2.2136693000793457, "learning_rate": 6.140107862617089e-05, "loss": 0.008593781292438507, "step": 136040 }, { "epoch": 38.617655407323305, "grad_norm": 4.069894790649414, "learning_rate": 6.139824013624753e-05, "loss": 0.0064280204474926, "step": 136050 }, { "epoch": 38.62049389724667, "grad_norm": 0.015389995649456978, "learning_rate": 6.139540164632415e-05, "loss": 0.017202655971050262, "step": 136060 }, { "epoch": 38.623332387170024, "grad_norm": 0.010840652510523796, "learning_rate": 6.139284700539313e-05, "loss": 0.014419828355312348, "step": 136070 }, { "epoch": 38.62617087709339, "grad_norm": 0.9015243053436279, "learning_rate": 6.139000851546978e-05, "loss": 0.0010333331301808357, "step": 136080 }, { "epoch": 38.62900936701675, "grad_norm": 0.054451581090688705, "learning_rate": 6.138717002554642e-05, "loss": 0.0020281733945012093, "step": 136090 }, { "epoch": 38.631847856940105, "grad_norm": 1.7980101108551025, "learning_rate": 6.138433153562305e-05, "loss": 0.002270233631134033, "step": 136100 }, { "epoch": 38.63468634686347, "grad_norm": 0.1775554120540619, "learning_rate": 6.138149304569969e-05, "loss": 0.0010904129594564438, "step": 136110 }, { "epoch": 38.63752483678683, "grad_norm": 0.030048387125134468, "learning_rate": 6.137865455577633e-05, "loss": 0.0024651601910591126, "step": 136120 }, { "epoch": 38.64036332671019, "grad_norm": 1.9386261701583862, "learning_rate": 6.137581606585297e-05, "loss": 0.0021690472960472105, "step": 136130 }, { "epoch": 38.64320181663355, "grad_norm": 0.23750624060630798, "learning_rate": 6.137297757592961e-05, "loss": 0.0003968421369791031, "step": 136140 }, { "epoch": 38.64604030655691, "grad_norm": 0.6990492343902588, "learning_rate": 6.137013908600625e-05, "loss": 0.0037352167069911956, "step": 136150 }, { "epoch": 38.648878796480275, "grad_norm": 0.40639209747314453, "learning_rate": 6.13673005960829e-05, "loss": 0.0037656232714653017, "step": 136160 }, { "epoch": 38.65171728640363, "grad_norm": 0.9903722405433655, "learning_rate": 6.136446210615952e-05, "loss": 0.0009029651060700417, "step": 136170 }, { "epoch": 38.654555776326994, "grad_norm": 1.5648820400238037, "learning_rate": 6.136162361623616e-05, "loss": 0.0017655815929174424, "step": 136180 }, { "epoch": 38.65739426625036, "grad_norm": 0.004785955883562565, "learning_rate": 6.13587851263128e-05, "loss": 0.00039118211716413496, "step": 136190 }, { "epoch": 38.66023275617371, "grad_norm": 0.1243247389793396, "learning_rate": 6.135594663638943e-05, "loss": 0.0037620998919010162, "step": 136200 }, { "epoch": 38.663071246097076, "grad_norm": 0.061005644500255585, "learning_rate": 6.135310814646609e-05, "loss": 0.0007876038551330566, "step": 136210 }, { "epoch": 38.66590973602044, "grad_norm": 0.016698021441698074, "learning_rate": 6.135026965654273e-05, "loss": 0.0019412145018577575, "step": 136220 }, { "epoch": 38.6687482259438, "grad_norm": 0.6778357625007629, "learning_rate": 6.134743116661936e-05, "loss": 0.0033628754317760468, "step": 136230 }, { "epoch": 38.67158671586716, "grad_norm": 0.07289861887693405, "learning_rate": 6.1344592676696e-05, "loss": 0.0012155042961239815, "step": 136240 }, { "epoch": 38.67442520579052, "grad_norm": 0.1941007822751999, "learning_rate": 6.134175418677264e-05, "loss": 0.0036195859313011168, "step": 136250 }, { "epoch": 38.67726369571388, "grad_norm": 0.03282526507973671, "learning_rate": 6.133891569684928e-05, "loss": 0.00171671062707901, "step": 136260 }, { "epoch": 38.68010218563724, "grad_norm": 0.665863037109375, "learning_rate": 6.133607720692592e-05, "loss": 0.0016794441267848014, "step": 136270 }, { "epoch": 38.6829406755606, "grad_norm": 0.2341645061969757, "learning_rate": 6.133323871700256e-05, "loss": 0.0006398824974894523, "step": 136280 }, { "epoch": 38.685779165483964, "grad_norm": 0.11934174597263336, "learning_rate": 6.13304002270792e-05, "loss": 0.007969504594802857, "step": 136290 }, { "epoch": 38.68861765540732, "grad_norm": 0.046812936663627625, "learning_rate": 6.132756173715583e-05, "loss": 0.004245748370885849, "step": 136300 }, { "epoch": 38.69145614533068, "grad_norm": 0.0759296715259552, "learning_rate": 6.132472324723247e-05, "loss": 0.000775645487010479, "step": 136310 }, { "epoch": 38.694294635254046, "grad_norm": 9.86835765838623, "learning_rate": 6.132188475730911e-05, "loss": 0.010241778194904327, "step": 136320 }, { "epoch": 38.69713312517741, "grad_norm": 0.02673879824578762, "learning_rate": 6.131904626738574e-05, "loss": 0.0004922915250062943, "step": 136330 }, { "epoch": 38.699971615100765, "grad_norm": 0.291986346244812, "learning_rate": 6.13162077774624e-05, "loss": 0.005485595017671585, "step": 136340 }, { "epoch": 38.70281010502413, "grad_norm": 0.08575223386287689, "learning_rate": 6.131336928753904e-05, "loss": 0.0006475599482655525, "step": 136350 }, { "epoch": 38.70564859494749, "grad_norm": 0.027092453092336655, "learning_rate": 6.131053079761567e-05, "loss": 0.00045058224350214005, "step": 136360 }, { "epoch": 38.708487084870846, "grad_norm": 1.7968275547027588, "learning_rate": 6.130769230769231e-05, "loss": 0.0010335387662053109, "step": 136370 }, { "epoch": 38.71132557479421, "grad_norm": 0.051553644239902496, "learning_rate": 6.130485381776895e-05, "loss": 0.004842593520879746, "step": 136380 }, { "epoch": 38.71416406471757, "grad_norm": 2.1784896850585938, "learning_rate": 6.130201532784559e-05, "loss": 0.004233397170901298, "step": 136390 }, { "epoch": 38.71700255464093, "grad_norm": 0.06154107302427292, "learning_rate": 6.129917683792222e-05, "loss": 0.004514705762267113, "step": 136400 }, { "epoch": 38.71984104456429, "grad_norm": 0.029550757259130478, "learning_rate": 6.129633834799887e-05, "loss": 0.003960264474153518, "step": 136410 }, { "epoch": 38.72267953448765, "grad_norm": 1.9905287027359009, "learning_rate": 6.129349985807552e-05, "loss": 0.0034955449402332304, "step": 136420 }, { "epoch": 38.725518024411016, "grad_norm": 0.030955089256167412, "learning_rate": 6.129066136815214e-05, "loss": 0.0004518678411841393, "step": 136430 }, { "epoch": 38.72835651433437, "grad_norm": 0.30183839797973633, "learning_rate": 6.128782287822878e-05, "loss": 0.001785411685705185, "step": 136440 }, { "epoch": 38.731195004257735, "grad_norm": 0.07063701748847961, "learning_rate": 6.128498438830543e-05, "loss": 0.0004727933555841446, "step": 136450 }, { "epoch": 38.7340334941811, "grad_norm": 0.13402800261974335, "learning_rate": 6.128214589838205e-05, "loss": 0.0040714025497436525, "step": 136460 }, { "epoch": 38.736871984104454, "grad_norm": 0.10933179408311844, "learning_rate": 6.127930740845871e-05, "loss": 0.0006298787891864776, "step": 136470 }, { "epoch": 38.73971047402782, "grad_norm": 2.1050407886505127, "learning_rate": 6.127646891853535e-05, "loss": 0.0069958120584487915, "step": 136480 }, { "epoch": 38.74254896395118, "grad_norm": 1.422590970993042, "learning_rate": 6.127363042861198e-05, "loss": 0.016526614129543305, "step": 136490 }, { "epoch": 38.745387453874535, "grad_norm": 0.0075188432820141315, "learning_rate": 6.127079193868862e-05, "loss": 0.0009666452184319496, "step": 136500 }, { "epoch": 38.745387453874535, "eval_accuracy": 0.9825777325618363, "eval_loss": 0.06619611382484436, "eval_runtime": 34.3185, "eval_samples_per_second": 458.265, "eval_steps_per_second": 7.168, "step": 136500 }, { "epoch": 38.7482259437979, "grad_norm": 0.0074618603102862835, "learning_rate": 6.126795344876526e-05, "loss": 0.0005958257243037224, "step": 136510 }, { "epoch": 38.75106443372126, "grad_norm": 0.12224836647510529, "learning_rate": 6.12651149588419e-05, "loss": 0.0014942463487386704, "step": 136520 }, { "epoch": 38.753902923644624, "grad_norm": 0.07557173818349838, "learning_rate": 6.126227646891853e-05, "loss": 0.0003363640978932381, "step": 136530 }, { "epoch": 38.75674141356798, "grad_norm": 0.03231075778603554, "learning_rate": 6.125943797899518e-05, "loss": 0.001272120326757431, "step": 136540 }, { "epoch": 38.75957990349134, "grad_norm": 0.030424920842051506, "learning_rate": 6.125659948907183e-05, "loss": 0.0005087180063128472, "step": 136550 }, { "epoch": 38.762418393414706, "grad_norm": 0.01155832502990961, "learning_rate": 6.125376099914845e-05, "loss": 0.0014127796515822411, "step": 136560 }, { "epoch": 38.76525688333806, "grad_norm": 0.03444232791662216, "learning_rate": 6.12509225092251e-05, "loss": 0.0004647649824619293, "step": 136570 }, { "epoch": 38.768095373261424, "grad_norm": 1.7215514183044434, "learning_rate": 6.124808401930174e-05, "loss": 0.0023512490093708037, "step": 136580 }, { "epoch": 38.77093386318479, "grad_norm": 0.03763469308614731, "learning_rate": 6.124524552937836e-05, "loss": 0.007019671052694321, "step": 136590 }, { "epoch": 38.77377235310815, "grad_norm": 1.2874191999435425, "learning_rate": 6.1242407039455e-05, "loss": 0.0015476645901799202, "step": 136600 }, { "epoch": 38.776610843031506, "grad_norm": 13.892767906188965, "learning_rate": 6.123956854953166e-05, "loss": 0.014833471179008484, "step": 136610 }, { "epoch": 38.77944933295487, "grad_norm": 0.7100607752799988, "learning_rate": 6.123673005960829e-05, "loss": 0.008397702872753144, "step": 136620 }, { "epoch": 38.78228782287823, "grad_norm": 0.132250115275383, "learning_rate": 6.123389156968493e-05, "loss": 0.003598887100815773, "step": 136630 }, { "epoch": 38.78512631280159, "grad_norm": 0.17806631326675415, "learning_rate": 6.123105307976157e-05, "loss": 0.0004063952714204788, "step": 136640 }, { "epoch": 38.78796480272495, "grad_norm": 9.70821475982666, "learning_rate": 6.122821458983821e-05, "loss": 0.0028325144201517107, "step": 136650 }, { "epoch": 38.79080329264831, "grad_norm": 4.830050468444824, "learning_rate": 6.122537609991484e-05, "loss": 0.001678715832531452, "step": 136660 }, { "epoch": 38.79364178257167, "grad_norm": 0.020454932004213333, "learning_rate": 6.12225376099915e-05, "loss": 0.0010907405987381936, "step": 136670 }, { "epoch": 38.79648027249503, "grad_norm": 0.5051336884498596, "learning_rate": 6.121969912006814e-05, "loss": 0.00041405484080314636, "step": 136680 }, { "epoch": 38.799318762418395, "grad_norm": 0.1102384403347969, "learning_rate": 6.121686063014476e-05, "loss": 0.002337995357811451, "step": 136690 }, { "epoch": 38.80215725234176, "grad_norm": 0.24739918112754822, "learning_rate": 6.12140221402214e-05, "loss": 0.0006442725658416748, "step": 136700 }, { "epoch": 38.80499574226511, "grad_norm": 1.4801380634307861, "learning_rate": 6.121118365029805e-05, "loss": 0.0021919343620538712, "step": 136710 }, { "epoch": 38.807834232188476, "grad_norm": 13.728228569030762, "learning_rate": 6.120834516037467e-05, "loss": 0.007864236831665039, "step": 136720 }, { "epoch": 38.81067272211184, "grad_norm": 0.8182865977287292, "learning_rate": 6.120550667045132e-05, "loss": 0.003870575875043869, "step": 136730 }, { "epoch": 38.813511212035195, "grad_norm": 2.29473876953125, "learning_rate": 6.120266818052797e-05, "loss": 0.004464909806847573, "step": 136740 }, { "epoch": 38.81634970195856, "grad_norm": 0.01916401833295822, "learning_rate": 6.11998296906046e-05, "loss": 0.003910452127456665, "step": 136750 }, { "epoch": 38.81918819188192, "grad_norm": 0.024469558149576187, "learning_rate": 6.119699120068124e-05, "loss": 0.003116627410054207, "step": 136760 }, { "epoch": 38.822026681805276, "grad_norm": 1.687554955482483, "learning_rate": 6.119415271075788e-05, "loss": 0.0019464645534753799, "step": 136770 }, { "epoch": 38.82486517172864, "grad_norm": 0.12557190656661987, "learning_rate": 6.119131422083452e-05, "loss": 0.004021356627345085, "step": 136780 }, { "epoch": 38.827703661652, "grad_norm": 0.2625219225883484, "learning_rate": 6.118847573091115e-05, "loss": 0.0015993688255548477, "step": 136790 }, { "epoch": 38.830542151575365, "grad_norm": 1.5438308715820312, "learning_rate": 6.118563724098779e-05, "loss": 0.0028329113498330115, "step": 136800 }, { "epoch": 38.83338064149872, "grad_norm": 0.17089973390102386, "learning_rate": 6.118279875106445e-05, "loss": 0.002283358387649059, "step": 136810 }, { "epoch": 38.836219131422084, "grad_norm": 0.03571106866002083, "learning_rate": 6.117996026114108e-05, "loss": 0.0027567608281970023, "step": 136820 }, { "epoch": 38.83905762134545, "grad_norm": 0.01268973108381033, "learning_rate": 6.117712177121772e-05, "loss": 0.0011290453374385833, "step": 136830 }, { "epoch": 38.8418961112688, "grad_norm": 0.026142975315451622, "learning_rate": 6.117428328129436e-05, "loss": 0.000581265427172184, "step": 136840 }, { "epoch": 38.844734601192165, "grad_norm": 0.076985664665699, "learning_rate": 6.117144479137099e-05, "loss": 0.005393646284937858, "step": 136850 }, { "epoch": 38.84757309111553, "grad_norm": 0.09124922752380371, "learning_rate": 6.116860630144763e-05, "loss": 0.004060559719800949, "step": 136860 }, { "epoch": 38.850411581038884, "grad_norm": 0.005042530596256256, "learning_rate": 6.116576781152428e-05, "loss": 0.014664496481418609, "step": 136870 }, { "epoch": 38.85325007096225, "grad_norm": 0.4175768494606018, "learning_rate": 6.116292932160091e-05, "loss": 0.0005748627707362175, "step": 136880 }, { "epoch": 38.85608856088561, "grad_norm": 0.06300202012062073, "learning_rate": 6.116009083167755e-05, "loss": 0.001146404817700386, "step": 136890 }, { "epoch": 38.85892705080897, "grad_norm": 0.5650529861450195, "learning_rate": 6.115725234175419e-05, "loss": 0.0010785752907395362, "step": 136900 }, { "epoch": 38.86176554073233, "grad_norm": 0.05306447297334671, "learning_rate": 6.115441385183083e-05, "loss": 0.0004907485097646713, "step": 136910 }, { "epoch": 38.86460403065569, "grad_norm": 0.027294490486383438, "learning_rate": 6.115157536190746e-05, "loss": 0.004421128332614899, "step": 136920 }, { "epoch": 38.867442520579054, "grad_norm": 0.08513649553060532, "learning_rate": 6.11487368719841e-05, "loss": 0.00254517775028944, "step": 136930 }, { "epoch": 38.87028101050241, "grad_norm": 0.1995476335287094, "learning_rate": 6.114589838206074e-05, "loss": 0.0004649750888347626, "step": 136940 }, { "epoch": 38.87311950042577, "grad_norm": 0.034229524433612823, "learning_rate": 6.114305989213739e-05, "loss": 0.010361772030591965, "step": 136950 }, { "epoch": 38.875957990349136, "grad_norm": 0.536694347858429, "learning_rate": 6.114022140221403e-05, "loss": 0.0005840495228767395, "step": 136960 }, { "epoch": 38.8787964802725, "grad_norm": 0.2680436968803406, "learning_rate": 6.113738291229067e-05, "loss": 0.0030584193766117094, "step": 136970 }, { "epoch": 38.881634970195854, "grad_norm": 0.013411805033683777, "learning_rate": 6.11345444223673e-05, "loss": 0.004339297115802765, "step": 136980 }, { "epoch": 38.88447346011922, "grad_norm": 0.0815495029091835, "learning_rate": 6.113170593244394e-05, "loss": 0.0023247836157679557, "step": 136990 }, { "epoch": 38.88731195004258, "grad_norm": 6.676806926727295, "learning_rate": 6.112886744252058e-05, "loss": 0.011638703942298888, "step": 137000 }, { "epoch": 38.88731195004258, "eval_accuracy": 0.977045844725631, "eval_loss": 0.09050251543521881, "eval_runtime": 34.379, "eval_samples_per_second": 457.46, "eval_steps_per_second": 7.156, "step": 137000 }, { "epoch": 38.890150439965936, "grad_norm": 1.849623680114746, "learning_rate": 6.112602895259722e-05, "loss": 0.0029381664469838144, "step": 137010 }, { "epoch": 38.8929889298893, "grad_norm": 0.16300581395626068, "learning_rate": 6.112319046267386e-05, "loss": 0.0032596409320831297, "step": 137020 }, { "epoch": 38.89582741981266, "grad_norm": 0.0031369521748274565, "learning_rate": 6.11203519727505e-05, "loss": 0.0015504663810133935, "step": 137030 }, { "epoch": 38.89866590973602, "grad_norm": 0.6317318081855774, "learning_rate": 6.111751348282713e-05, "loss": 0.0013608753681182862, "step": 137040 }, { "epoch": 38.90150439965938, "grad_norm": 0.09672614187002182, "learning_rate": 6.111467499290377e-05, "loss": 0.0026928195729851723, "step": 137050 }, { "epoch": 38.90434288958274, "grad_norm": 0.21492120623588562, "learning_rate": 6.111183650298041e-05, "loss": 0.009702034294605255, "step": 137060 }, { "epoch": 38.907181379506106, "grad_norm": 12.485637664794922, "learning_rate": 6.110899801305706e-05, "loss": 0.01657682955265045, "step": 137070 }, { "epoch": 38.91001986942946, "grad_norm": 1.394677996635437, "learning_rate": 6.11061595231337e-05, "loss": 0.0012736547738313674, "step": 137080 }, { "epoch": 38.912858359352825, "grad_norm": 0.8090853691101074, "learning_rate": 6.110332103321034e-05, "loss": 0.004811511933803558, "step": 137090 }, { "epoch": 38.91569684927619, "grad_norm": 5.695529460906982, "learning_rate": 6.110048254328698e-05, "loss": 0.004349412024021148, "step": 137100 }, { "epoch": 38.91853533919954, "grad_norm": 0.19873790442943573, "learning_rate": 6.109764405336361e-05, "loss": 0.005954621732234955, "step": 137110 }, { "epoch": 38.921373829122906, "grad_norm": 0.036389339715242386, "learning_rate": 6.109480556344025e-05, "loss": 0.00017767921090126038, "step": 137120 }, { "epoch": 38.92421231904627, "grad_norm": 0.0646616667509079, "learning_rate": 6.109196707351689e-05, "loss": 0.010710521787405013, "step": 137130 }, { "epoch": 38.927050808969625, "grad_norm": 0.019178668037056923, "learning_rate": 6.108912858359353e-05, "loss": 0.015157431364059448, "step": 137140 }, { "epoch": 38.92988929889299, "grad_norm": 0.09474755078554153, "learning_rate": 6.108629009367017e-05, "loss": 0.0032052811235189437, "step": 137150 }, { "epoch": 38.93272778881635, "grad_norm": 11.58922004699707, "learning_rate": 6.108345160374681e-05, "loss": 0.006055006012320519, "step": 137160 }, { "epoch": 38.935566278739714, "grad_norm": 5.465831279754639, "learning_rate": 6.108061311382344e-05, "loss": 0.00175633504986763, "step": 137170 }, { "epoch": 38.93840476866307, "grad_norm": 0.029118090867996216, "learning_rate": 6.107777462390008e-05, "loss": 0.0015997454524040223, "step": 137180 }, { "epoch": 38.94124325858643, "grad_norm": 0.06618688255548477, "learning_rate": 6.107493613397672e-05, "loss": 0.020764154195785523, "step": 137190 }, { "epoch": 38.944081748509795, "grad_norm": 0.06571269035339355, "learning_rate": 6.107209764405337e-05, "loss": 0.006775470077991485, "step": 137200 }, { "epoch": 38.94692023843315, "grad_norm": 0.2501359283924103, "learning_rate": 6.106925915413001e-05, "loss": 0.0037495657801628114, "step": 137210 }, { "epoch": 38.949758728356514, "grad_norm": 0.05856098607182503, "learning_rate": 6.106642066420665e-05, "loss": 0.007169657945632934, "step": 137220 }, { "epoch": 38.95259721827988, "grad_norm": 0.041590917855501175, "learning_rate": 6.106358217428329e-05, "loss": 0.004641526937484741, "step": 137230 }, { "epoch": 38.95543570820323, "grad_norm": 0.011573225259780884, "learning_rate": 6.106074368435992e-05, "loss": 0.005600576102733612, "step": 137240 }, { "epoch": 38.958274198126595, "grad_norm": 0.008640697225928307, "learning_rate": 6.105790519443656e-05, "loss": 0.007663539797067642, "step": 137250 }, { "epoch": 38.96111268804996, "grad_norm": 0.03057229146361351, "learning_rate": 6.10550667045132e-05, "loss": 0.0042861074209213255, "step": 137260 }, { "epoch": 38.96395117797332, "grad_norm": 0.5142971277236938, "learning_rate": 6.105222821458984e-05, "loss": 0.00747603178024292, "step": 137270 }, { "epoch": 38.96678966789668, "grad_norm": 0.02291736751794815, "learning_rate": 6.104938972466648e-05, "loss": 0.0021372342482209204, "step": 137280 }, { "epoch": 38.96962815782004, "grad_norm": 3.818514108657837, "learning_rate": 6.104655123474312e-05, "loss": 0.006326550990343094, "step": 137290 }, { "epoch": 38.9724666477434, "grad_norm": 0.05449025705456734, "learning_rate": 6.104371274481975e-05, "loss": 0.004420953989028931, "step": 137300 }, { "epoch": 38.97530513766676, "grad_norm": 0.2628651559352875, "learning_rate": 6.10408742548964e-05, "loss": 0.0025927474722266196, "step": 137310 }, { "epoch": 38.97814362759012, "grad_norm": 0.08853462338447571, "learning_rate": 6.103803576497304e-05, "loss": 0.00132554043084383, "step": 137320 }, { "epoch": 38.980982117513484, "grad_norm": 0.15120229125022888, "learning_rate": 6.103519727504968e-05, "loss": 0.007235269993543625, "step": 137330 }, { "epoch": 38.98382060743684, "grad_norm": 0.057715512812137604, "learning_rate": 6.103235878512632e-05, "loss": 0.0007865836843848229, "step": 137340 }, { "epoch": 38.9866590973602, "grad_norm": 0.04122084751725197, "learning_rate": 6.102952029520296e-05, "loss": 0.0004784325137734413, "step": 137350 }, { "epoch": 38.989497587283566, "grad_norm": 0.02071714587509632, "learning_rate": 6.1026681805279594e-05, "loss": 0.0019444210454821586, "step": 137360 }, { "epoch": 38.99233607720693, "grad_norm": 6.239480495452881, "learning_rate": 6.1023843315356236e-05, "loss": 0.008644098043441772, "step": 137370 }, { "epoch": 38.995174567130285, "grad_norm": 1.2404013872146606, "learning_rate": 6.102100482543287e-05, "loss": 0.009912001341581345, "step": 137380 }, { "epoch": 38.99801305705365, "grad_norm": 0.04347290098667145, "learning_rate": 6.1018166335509505e-05, "loss": 0.006896300613880158, "step": 137390 }, { "epoch": 39.00085154697701, "grad_norm": 0.9557615518569946, "learning_rate": 6.101532784558616e-05, "loss": 0.008927178382873536, "step": 137400 }, { "epoch": 39.003690036900366, "grad_norm": 12.978480339050293, "learning_rate": 6.1012489355662794e-05, "loss": 0.01674579828977585, "step": 137410 }, { "epoch": 39.00652852682373, "grad_norm": 0.16097523272037506, "learning_rate": 6.100965086573943e-05, "loss": 0.006869884580373764, "step": 137420 }, { "epoch": 39.00936701674709, "grad_norm": 0.049987856298685074, "learning_rate": 6.100681237581607e-05, "loss": 0.0017167787998914718, "step": 137430 }, { "epoch": 39.012205506670455, "grad_norm": 0.14283014833927155, "learning_rate": 6.1003973885892705e-05, "loss": 0.0017173856496810914, "step": 137440 }, { "epoch": 39.01504399659381, "grad_norm": 11.8626708984375, "learning_rate": 6.1001135395969346e-05, "loss": 0.012581685185432434, "step": 137450 }, { "epoch": 39.01788248651717, "grad_norm": 0.9480457305908203, "learning_rate": 6.099829690604598e-05, "loss": 0.001164957322180271, "step": 137460 }, { "epoch": 39.020720976440536, "grad_norm": 0.6916205883026123, "learning_rate": 6.099545841612263e-05, "loss": 0.008870180696249008, "step": 137470 }, { "epoch": 39.02355946636389, "grad_norm": 0.2264004498720169, "learning_rate": 6.099261992619927e-05, "loss": 0.0030240800231695177, "step": 137480 }, { "epoch": 39.026397956287255, "grad_norm": 0.06114809215068817, "learning_rate": 6.0989781436275905e-05, "loss": 0.005794682726264, "step": 137490 }, { "epoch": 39.02923644621062, "grad_norm": 0.07120925188064575, "learning_rate": 6.0986942946352546e-05, "loss": 0.017390406131744383, "step": 137500 }, { "epoch": 39.02923644621062, "eval_accuracy": 0.9797164112672474, "eval_loss": 0.07477816939353943, "eval_runtime": 33.4959, "eval_samples_per_second": 469.52, "eval_steps_per_second": 7.344, "step": 137500 }, { "epoch": 39.032074936133974, "grad_norm": 0.24219587445259094, "learning_rate": 6.098410445642918e-05, "loss": 0.0004527425393462181, "step": 137510 }, { "epoch": 39.03491342605734, "grad_norm": 0.06629421561956406, "learning_rate": 6.0981265966505816e-05, "loss": 0.0047774635255336765, "step": 137520 }, { "epoch": 39.0377519159807, "grad_norm": 0.013423411175608635, "learning_rate": 6.097842747658246e-05, "loss": 0.0002601148560643196, "step": 137530 }, { "epoch": 39.04059040590406, "grad_norm": 0.2831535041332245, "learning_rate": 6.0975588986659105e-05, "loss": 0.0009505387395620346, "step": 137540 }, { "epoch": 39.04342889582742, "grad_norm": 1.563372254371643, "learning_rate": 6.097275049673574e-05, "loss": 0.0013530999422073364, "step": 137550 }, { "epoch": 39.04626738575078, "grad_norm": 0.1143813282251358, "learning_rate": 6.096991200681238e-05, "loss": 0.0006584417074918747, "step": 137560 }, { "epoch": 39.049105875674144, "grad_norm": 1.0203882455825806, "learning_rate": 6.0967073516889016e-05, "loss": 0.0006843365728855133, "step": 137570 }, { "epoch": 39.0519443655975, "grad_norm": 0.07093257457017899, "learning_rate": 6.096423502696566e-05, "loss": 0.00035826824605464933, "step": 137580 }, { "epoch": 39.05478285552086, "grad_norm": 0.34651699662208557, "learning_rate": 6.096139653704229e-05, "loss": 0.0018825443461537362, "step": 137590 }, { "epoch": 39.057621345444225, "grad_norm": 0.2608388066291809, "learning_rate": 6.095855804711894e-05, "loss": 0.009766968339681626, "step": 137600 }, { "epoch": 39.06045983536758, "grad_norm": 0.20466028153896332, "learning_rate": 6.095571955719558e-05, "loss": 0.002482393570244312, "step": 137610 }, { "epoch": 39.063298325290944, "grad_norm": 0.30219098925590515, "learning_rate": 6.0952881067272216e-05, "loss": 0.0023303527384996416, "step": 137620 }, { "epoch": 39.06613681521431, "grad_norm": 0.02492087334394455, "learning_rate": 6.095004257734885e-05, "loss": 0.002280113287270069, "step": 137630 }, { "epoch": 39.06897530513767, "grad_norm": 0.7188954949378967, "learning_rate": 6.094720408742549e-05, "loss": 0.0007746033370494843, "step": 137640 }, { "epoch": 39.071813795061026, "grad_norm": 0.8616881966590881, "learning_rate": 6.0944365597502126e-05, "loss": 0.000566759705543518, "step": 137650 }, { "epoch": 39.07465228498439, "grad_norm": 0.4782365560531616, "learning_rate": 6.094152710757877e-05, "loss": 0.0011696433648467064, "step": 137660 }, { "epoch": 39.07749077490775, "grad_norm": 0.601020336151123, "learning_rate": 6.0938688617655416e-05, "loss": 0.002906910516321659, "step": 137670 }, { "epoch": 39.08032926483111, "grad_norm": 4.8283772468566895, "learning_rate": 6.093585012773205e-05, "loss": 0.0025983037427067756, "step": 137680 }, { "epoch": 39.08316775475447, "grad_norm": 0.01594647765159607, "learning_rate": 6.093301163780869e-05, "loss": 0.0004589501768350601, "step": 137690 }, { "epoch": 39.08600624467783, "grad_norm": 5.598799228668213, "learning_rate": 6.0930173147885326e-05, "loss": 0.005682635307312012, "step": 137700 }, { "epoch": 39.08884473460119, "grad_norm": 6.907039642333984, "learning_rate": 6.092733465796197e-05, "loss": 0.0018631739541888237, "step": 137710 }, { "epoch": 39.09168322452455, "grad_norm": 0.03469192236661911, "learning_rate": 6.09244961680386e-05, "loss": 0.001341324672102928, "step": 137720 }, { "epoch": 39.094521714447914, "grad_norm": 0.10409839451313019, "learning_rate": 6.0921657678115244e-05, "loss": 0.016147503256797792, "step": 137730 }, { "epoch": 39.09736020437128, "grad_norm": 0.01670190878212452, "learning_rate": 6.091881918819189e-05, "loss": 0.0010956967249512672, "step": 137740 }, { "epoch": 39.10019869429463, "grad_norm": 2.623544692993164, "learning_rate": 6.0915980698268526e-05, "loss": 0.0009096872061491012, "step": 137750 }, { "epoch": 39.103037184217996, "grad_norm": 0.051799193024635315, "learning_rate": 6.091314220834516e-05, "loss": 0.015161095559597016, "step": 137760 }, { "epoch": 39.10587567414136, "grad_norm": 0.08301462233066559, "learning_rate": 6.09103037184218e-05, "loss": 0.0012500934302806855, "step": 137770 }, { "epoch": 39.108714164064715, "grad_norm": 0.019512662664055824, "learning_rate": 6.090746522849844e-05, "loss": 0.0016404950991272926, "step": 137780 }, { "epoch": 39.11155265398808, "grad_norm": 0.026548326015472412, "learning_rate": 6.090462673857508e-05, "loss": 0.0015265613794326783, "step": 137790 }, { "epoch": 39.11439114391144, "grad_norm": 0.6194296479225159, "learning_rate": 6.0901788248651727e-05, "loss": 0.0006021555513143539, "step": 137800 }, { "epoch": 39.1172296338348, "grad_norm": 0.009316666051745415, "learning_rate": 6.089894975872836e-05, "loss": 0.009047044068574905, "step": 137810 }, { "epoch": 39.12006812375816, "grad_norm": 0.04332108795642853, "learning_rate": 6.0896111268805e-05, "loss": 0.006756143271923065, "step": 137820 }, { "epoch": 39.12290661368152, "grad_norm": 0.1687803566455841, "learning_rate": 6.089327277888164e-05, "loss": 0.00034865736961364746, "step": 137830 }, { "epoch": 39.125745103604885, "grad_norm": 0.009639340452849865, "learning_rate": 6.089043428895828e-05, "loss": 0.006846833229064942, "step": 137840 }, { "epoch": 39.12858359352824, "grad_norm": 0.3540847897529602, "learning_rate": 6.088759579903491e-05, "loss": 0.002108702063560486, "step": 137850 }, { "epoch": 39.131422083451604, "grad_norm": 0.018497729673981667, "learning_rate": 6.088475730911155e-05, "loss": 0.0007431475445628166, "step": 137860 }, { "epoch": 39.134260573374966, "grad_norm": 0.015007786452770233, "learning_rate": 6.08819188191882e-05, "loss": 0.002270946279168129, "step": 137870 }, { "epoch": 39.13709906329832, "grad_norm": 0.16009829938411713, "learning_rate": 6.087908032926484e-05, "loss": 0.0005701959133148193, "step": 137880 }, { "epoch": 39.139937553221685, "grad_norm": 0.04875315725803375, "learning_rate": 6.087624183934147e-05, "loss": 0.0015263210982084275, "step": 137890 }, { "epoch": 39.14277604314505, "grad_norm": 0.8375362753868103, "learning_rate": 6.087340334941811e-05, "loss": 0.0016471371054649353, "step": 137900 }, { "epoch": 39.14561453306841, "grad_norm": 4.979081630706787, "learning_rate": 6.087056485949475e-05, "loss": 0.005679974704980851, "step": 137910 }, { "epoch": 39.14845302299177, "grad_norm": 0.00636505289003253, "learning_rate": 6.086772636957139e-05, "loss": 0.0018948525190353393, "step": 137920 }, { "epoch": 39.15129151291513, "grad_norm": 0.01723293587565422, "learning_rate": 6.0864887879648024e-05, "loss": 0.0017893364652991294, "step": 137930 }, { "epoch": 39.15413000283849, "grad_norm": 0.19475801289081573, "learning_rate": 6.086204938972467e-05, "loss": 0.0014092739671468734, "step": 137940 }, { "epoch": 39.15696849276185, "grad_norm": 0.43623772263526917, "learning_rate": 6.085921089980131e-05, "loss": 0.0022204436361789705, "step": 137950 }, { "epoch": 39.15980698268521, "grad_norm": 0.05648011714220047, "learning_rate": 6.085637240987795e-05, "loss": 0.0007522603496909142, "step": 137960 }, { "epoch": 39.162645472608574, "grad_norm": 0.14172212779521942, "learning_rate": 6.085353391995459e-05, "loss": 0.001974133774638176, "step": 137970 }, { "epoch": 39.16548396253193, "grad_norm": 0.3931274116039276, "learning_rate": 6.0850695430031224e-05, "loss": 0.004837201535701751, "step": 137980 }, { "epoch": 39.16832245245529, "grad_norm": 0.13354472815990448, "learning_rate": 6.084785694010786e-05, "loss": 0.000417766347527504, "step": 137990 }, { "epoch": 39.171160942378656, "grad_norm": 0.8453791737556458, "learning_rate": 6.084501845018451e-05, "loss": 0.0012475302442908286, "step": 138000 }, { "epoch": 39.171160942378656, "eval_accuracy": 0.9814967889616583, "eval_loss": 0.06625587493181229, "eval_runtime": 33.7896, "eval_samples_per_second": 465.439, "eval_steps_per_second": 7.28, "step": 138000 }, { "epoch": 39.17399943230202, "grad_norm": 0.4415855407714844, "learning_rate": 6.084217996026115e-05, "loss": 0.0021765725687146187, "step": 138010 }, { "epoch": 39.176837922225374, "grad_norm": 0.10025936365127563, "learning_rate": 6.083934147033778e-05, "loss": 0.0008749017491936683, "step": 138020 }, { "epoch": 39.17967641214874, "grad_norm": 0.04716918617486954, "learning_rate": 6.0836502980414424e-05, "loss": 0.0018938042223453522, "step": 138030 }, { "epoch": 39.1825149020721, "grad_norm": 0.008219094015657902, "learning_rate": 6.083366449049106e-05, "loss": 0.003851676732301712, "step": 138040 }, { "epoch": 39.185353391995456, "grad_norm": 0.012094452045857906, "learning_rate": 6.08308260005677e-05, "loss": 0.008924642950296402, "step": 138050 }, { "epoch": 39.18819188191882, "grad_norm": 2.7412302494049072, "learning_rate": 6.0827987510644335e-05, "loss": 0.009740673005580902, "step": 138060 }, { "epoch": 39.19103037184218, "grad_norm": 0.09662110358476639, "learning_rate": 6.082514902072098e-05, "loss": 0.007915650308132172, "step": 138070 }, { "epoch": 39.19386886176554, "grad_norm": 0.047359928488731384, "learning_rate": 6.0822310530797624e-05, "loss": 0.00371815487742424, "step": 138080 }, { "epoch": 39.1967073516889, "grad_norm": 10.637489318847656, "learning_rate": 6.081947204087426e-05, "loss": 0.004106711223721504, "step": 138090 }, { "epoch": 39.19954584161226, "grad_norm": 0.08694974333047867, "learning_rate": 6.081663355095089e-05, "loss": 0.002206754870712757, "step": 138100 }, { "epoch": 39.202384331535626, "grad_norm": 3.313483238220215, "learning_rate": 6.0813795061027535e-05, "loss": 0.0015200406312942504, "step": 138110 }, { "epoch": 39.20522282145898, "grad_norm": 0.04182513803243637, "learning_rate": 6.081095657110417e-05, "loss": 0.002858021669089794, "step": 138120 }, { "epoch": 39.208061311382345, "grad_norm": 14.153486251831055, "learning_rate": 6.080811808118081e-05, "loss": 0.013529166579246521, "step": 138130 }, { "epoch": 39.21089980130571, "grad_norm": 0.3335261940956116, "learning_rate": 6.080527959125746e-05, "loss": 0.001243467628955841, "step": 138140 }, { "epoch": 39.21373829122906, "grad_norm": 0.17593950033187866, "learning_rate": 6.0802441101334093e-05, "loss": 0.001409408077597618, "step": 138150 }, { "epoch": 39.216576781152426, "grad_norm": 0.1118583008646965, "learning_rate": 6.0799602611410735e-05, "loss": 0.011545933783054352, "step": 138160 }, { "epoch": 39.21941527107579, "grad_norm": 12.295495986938477, "learning_rate": 6.079676412148737e-05, "loss": 0.0030759537592530252, "step": 138170 }, { "epoch": 39.22225376099915, "grad_norm": 0.039261024445295334, "learning_rate": 6.079392563156401e-05, "loss": 0.0005103396251797676, "step": 138180 }, { "epoch": 39.22509225092251, "grad_norm": 0.28280678391456604, "learning_rate": 6.079137099063299e-05, "loss": 0.033857861161231996, "step": 138190 }, { "epoch": 39.22793074084587, "grad_norm": 0.7412574887275696, "learning_rate": 6.0788532500709625e-05, "loss": 0.013128601014614105, "step": 138200 }, { "epoch": 39.23076923076923, "grad_norm": 1.5203145742416382, "learning_rate": 6.078569401078627e-05, "loss": 0.0012325406074523925, "step": 138210 }, { "epoch": 39.23360772069259, "grad_norm": 2.9665331840515137, "learning_rate": 6.07828555208629e-05, "loss": 0.0065347269177436825, "step": 138220 }, { "epoch": 39.23644621061595, "grad_norm": 0.23364004492759705, "learning_rate": 6.078001703093954e-05, "loss": 0.001198039762675762, "step": 138230 }, { "epoch": 39.239284700539315, "grad_norm": 0.07402362674474716, "learning_rate": 6.077717854101618e-05, "loss": 0.0002457804977893829, "step": 138240 }, { "epoch": 39.24212319046267, "grad_norm": 0.5782866477966309, "learning_rate": 6.0774340051092825e-05, "loss": 0.0008695608004927636, "step": 138250 }, { "epoch": 39.244961680386034, "grad_norm": 0.018044110387563705, "learning_rate": 6.077150156116947e-05, "loss": 0.0004136502742767334, "step": 138260 }, { "epoch": 39.2478001703094, "grad_norm": 0.15732675790786743, "learning_rate": 6.07686630712461e-05, "loss": 0.0019291490316390991, "step": 138270 }, { "epoch": 39.25063866023276, "grad_norm": 0.12987667322158813, "learning_rate": 6.0765824581322736e-05, "loss": 0.0019279874861240387, "step": 138280 }, { "epoch": 39.253477150156115, "grad_norm": 0.12942974269390106, "learning_rate": 6.076298609139938e-05, "loss": 0.0029879288747906687, "step": 138290 }, { "epoch": 39.25631564007948, "grad_norm": 0.6758692264556885, "learning_rate": 6.076014760147601e-05, "loss": 0.0011399567127227783, "step": 138300 }, { "epoch": 39.25915413000284, "grad_norm": 0.08824548125267029, "learning_rate": 6.075730911155265e-05, "loss": 0.005688712745904922, "step": 138310 }, { "epoch": 39.2619926199262, "grad_norm": 1.9367506504058838, "learning_rate": 6.07544706216293e-05, "loss": 0.0007046146318316459, "step": 138320 }, { "epoch": 39.26483110984956, "grad_norm": 8.995638847351074, "learning_rate": 6.0751632131705936e-05, "loss": 0.005352979898452759, "step": 138330 }, { "epoch": 39.26766959977292, "grad_norm": 3.583782434463501, "learning_rate": 6.074879364178258e-05, "loss": 0.0023553336039185526, "step": 138340 }, { "epoch": 39.27050808969628, "grad_norm": 0.024912279099225998, "learning_rate": 6.074595515185921e-05, "loss": 0.0005669165402650833, "step": 138350 }, { "epoch": 39.27334657961964, "grad_norm": 0.07671637833118439, "learning_rate": 6.0743116661935853e-05, "loss": 0.0004860576242208481, "step": 138360 }, { "epoch": 39.276185069543004, "grad_norm": 0.05912360921502113, "learning_rate": 6.074027817201249e-05, "loss": 0.006079104542732239, "step": 138370 }, { "epoch": 39.27902355946637, "grad_norm": 0.06591041386127472, "learning_rate": 6.0737439682089136e-05, "loss": 0.0022126175463199615, "step": 138380 }, { "epoch": 39.28186204938972, "grad_norm": 0.05656168982386589, "learning_rate": 6.073460119216578e-05, "loss": 0.0005785120651125908, "step": 138390 }, { "epoch": 39.284700539313086, "grad_norm": 0.05867428332567215, "learning_rate": 6.073176270224241e-05, "loss": 0.0006254402920603752, "step": 138400 }, { "epoch": 39.28753902923645, "grad_norm": 3.85809588432312, "learning_rate": 6.072892421231905e-05, "loss": 0.0009350581094622612, "step": 138410 }, { "epoch": 39.290377519159804, "grad_norm": 0.07352422922849655, "learning_rate": 6.072608572239569e-05, "loss": 0.0011662164703011512, "step": 138420 }, { "epoch": 39.29321600908317, "grad_norm": 0.20045222342014313, "learning_rate": 6.072324723247232e-05, "loss": 0.0013227606192231177, "step": 138430 }, { "epoch": 39.29605449900653, "grad_norm": 0.03254120796918869, "learning_rate": 6.0720408742548964e-05, "loss": 0.0011105021461844445, "step": 138440 }, { "epoch": 39.298892988929886, "grad_norm": 0.10485178232192993, "learning_rate": 6.071757025262561e-05, "loss": 0.0035470183938741684, "step": 138450 }, { "epoch": 39.30173147885325, "grad_norm": 0.025644412264227867, "learning_rate": 6.071473176270225e-05, "loss": 0.0007809404283761978, "step": 138460 }, { "epoch": 39.30456996877661, "grad_norm": 0.02153383195400238, "learning_rate": 6.071189327277889e-05, "loss": 0.0017992811277508735, "step": 138470 }, { "epoch": 39.307408458699975, "grad_norm": 0.073344886302948, "learning_rate": 6.070905478285552e-05, "loss": 0.002291157841682434, "step": 138480 }, { "epoch": 39.31024694862333, "grad_norm": 1.591992974281311, "learning_rate": 6.0706216292932164e-05, "loss": 0.004646455496549606, "step": 138490 }, { "epoch": 39.31308543854669, "grad_norm": 0.01843457669019699, "learning_rate": 6.07033778030088e-05, "loss": 0.0066717095673084256, "step": 138500 }, { "epoch": 39.31308543854669, "eval_accuracy": 0.981941883385261, "eval_loss": 0.06635167449712753, "eval_runtime": 34.0821, "eval_samples_per_second": 461.445, "eval_steps_per_second": 7.218, "step": 138500 }, { "epoch": 39.315923928470056, "grad_norm": 0.05503377690911293, "learning_rate": 6.070053931308543e-05, "loss": 0.005224639177322387, "step": 138510 }, { "epoch": 39.31876241839341, "grad_norm": 0.020442569628357887, "learning_rate": 6.069770082316208e-05, "loss": 0.004077658802270889, "step": 138520 }, { "epoch": 39.321600908316775, "grad_norm": 1.241560459136963, "learning_rate": 6.069486233323872e-05, "loss": 0.001666879653930664, "step": 138530 }, { "epoch": 39.32443939824014, "grad_norm": 0.23200109601020813, "learning_rate": 6.069202384331536e-05, "loss": 0.0028342409059405327, "step": 138540 }, { "epoch": 39.32727788816349, "grad_norm": 0.26961979269981384, "learning_rate": 6.0689185353392e-05, "loss": 0.00036327429115772246, "step": 138550 }, { "epoch": 39.330116378086856, "grad_norm": 0.02788260206580162, "learning_rate": 6.0686346863468633e-05, "loss": 0.009934622049331664, "step": 138560 }, { "epoch": 39.33295486801022, "grad_norm": 0.06971275806427002, "learning_rate": 6.0683508373545275e-05, "loss": 0.0005482662469148636, "step": 138570 }, { "epoch": 39.33579335793358, "grad_norm": 0.6584749221801758, "learning_rate": 6.068066988362192e-05, "loss": 0.001551787368953228, "step": 138580 }, { "epoch": 39.33863184785694, "grad_norm": 0.04893480986356735, "learning_rate": 6.067783139369856e-05, "loss": 0.0003167841583490372, "step": 138590 }, { "epoch": 39.3414703377803, "grad_norm": 0.07616415619850159, "learning_rate": 6.06749929037752e-05, "loss": 0.0013985190540552138, "step": 138600 }, { "epoch": 39.344308827703664, "grad_norm": 0.04011128842830658, "learning_rate": 6.0672154413851834e-05, "loss": 0.0010650794953107833, "step": 138610 }, { "epoch": 39.34714731762702, "grad_norm": 0.008241882547736168, "learning_rate": 6.066931592392847e-05, "loss": 0.0012963490560650825, "step": 138620 }, { "epoch": 39.34998580755038, "grad_norm": 0.011212514713406563, "learning_rate": 6.066647743400511e-05, "loss": 0.011287067085504532, "step": 138630 }, { "epoch": 39.352824297473745, "grad_norm": 0.16780346632003784, "learning_rate": 6.0663638944081744e-05, "loss": 0.0031701035797595976, "step": 138640 }, { "epoch": 39.35566278739711, "grad_norm": 0.0697137862443924, "learning_rate": 6.066080045415839e-05, "loss": 0.006658057123422623, "step": 138650 }, { "epoch": 39.358501277320464, "grad_norm": 0.5874887108802795, "learning_rate": 6.0657961964235034e-05, "loss": 0.005443785339593887, "step": 138660 }, { "epoch": 39.36133976724383, "grad_norm": 0.020303871482610703, "learning_rate": 6.065512347431167e-05, "loss": 0.0022564828395843506, "step": 138670 }, { "epoch": 39.36417825716719, "grad_norm": 0.04993806779384613, "learning_rate": 6.065228498438831e-05, "loss": 0.0007726859301328659, "step": 138680 }, { "epoch": 39.367016747090545, "grad_norm": 1.4343183040618896, "learning_rate": 6.0649446494464944e-05, "loss": 0.007553227990865707, "step": 138690 }, { "epoch": 39.36985523701391, "grad_norm": 0.5735200643539429, "learning_rate": 6.0646608004541586e-05, "loss": 0.016805237531661986, "step": 138700 }, { "epoch": 39.37269372693727, "grad_norm": 0.047284822911024094, "learning_rate": 6.064376951461822e-05, "loss": 0.005717074871063233, "step": 138710 }, { "epoch": 39.37553221686063, "grad_norm": 1.1263208389282227, "learning_rate": 6.064093102469487e-05, "loss": 0.006318534910678864, "step": 138720 }, { "epoch": 39.37837070678399, "grad_norm": 0.17881818115711212, "learning_rate": 6.063809253477151e-05, "loss": 0.00043746288865804673, "step": 138730 }, { "epoch": 39.38120919670735, "grad_norm": 0.1277795433998108, "learning_rate": 6.0635254044848144e-05, "loss": 0.0007838096469640731, "step": 138740 }, { "epoch": 39.384047686630716, "grad_norm": 0.03766659274697304, "learning_rate": 6.063241555492478e-05, "loss": 0.0008854050189256668, "step": 138750 }, { "epoch": 39.38688617655407, "grad_norm": 0.04809948801994324, "learning_rate": 6.062957706500142e-05, "loss": 0.0015036895871162414, "step": 138760 }, { "epoch": 39.389724666477434, "grad_norm": 17.316999435424805, "learning_rate": 6.0626738575078055e-05, "loss": 0.012883448600769043, "step": 138770 }, { "epoch": 39.3925631564008, "grad_norm": 0.13659517467021942, "learning_rate": 6.06239000851547e-05, "loss": 0.0034447059035301207, "step": 138780 }, { "epoch": 39.39540164632415, "grad_norm": 0.11725802719593048, "learning_rate": 6.0621061595231344e-05, "loss": 0.004754360765218735, "step": 138790 }, { "epoch": 39.398240136247516, "grad_norm": 0.12834224104881287, "learning_rate": 6.061822310530798e-05, "loss": 0.006569517403841018, "step": 138800 }, { "epoch": 39.40107862617088, "grad_norm": 8.084290504455566, "learning_rate": 6.061538461538462e-05, "loss": 0.003147453814744949, "step": 138810 }, { "epoch": 39.403917116094235, "grad_norm": 0.06869591772556305, "learning_rate": 6.0612546125461255e-05, "loss": 0.000597037747502327, "step": 138820 }, { "epoch": 39.4067556060176, "grad_norm": 12.543073654174805, "learning_rate": 6.0609707635537896e-05, "loss": 0.007187694311141968, "step": 138830 }, { "epoch": 39.40959409594096, "grad_norm": 6.173370361328125, "learning_rate": 6.060686914561453e-05, "loss": 0.002685187757015228, "step": 138840 }, { "epoch": 39.41243258586432, "grad_norm": 0.0666196420788765, "learning_rate": 6.060403065569118e-05, "loss": 0.00060164425522089, "step": 138850 }, { "epoch": 39.41527107578768, "grad_norm": 0.02447224035859108, "learning_rate": 6.060119216576782e-05, "loss": 0.00043979547917842864, "step": 138860 }, { "epoch": 39.41810956571104, "grad_norm": 0.09318780899047852, "learning_rate": 6.0598353675844455e-05, "loss": 0.0008368581533432007, "step": 138870 }, { "epoch": 39.420948055634405, "grad_norm": 1.9948021173477173, "learning_rate": 6.059551518592109e-05, "loss": 0.001708356849849224, "step": 138880 }, { "epoch": 39.42378654555776, "grad_norm": 0.10896500200033188, "learning_rate": 6.059267669599773e-05, "loss": 0.002026791125535965, "step": 138890 }, { "epoch": 39.42662503548112, "grad_norm": 0.05228123068809509, "learning_rate": 6.0589838206074366e-05, "loss": 0.0036561116576194763, "step": 138900 }, { "epoch": 39.429463525404486, "grad_norm": 9.4873685836792, "learning_rate": 6.0586999716151014e-05, "loss": 0.009094107151031493, "step": 138910 }, { "epoch": 39.43230201532784, "grad_norm": 5.7344160079956055, "learning_rate": 6.0584161226227655e-05, "loss": 0.001995529793202877, "step": 138920 }, { "epoch": 39.435140505251205, "grad_norm": 1.8774508237838745, "learning_rate": 6.058132273630429e-05, "loss": 0.003912723809480667, "step": 138930 }, { "epoch": 39.43797899517457, "grad_norm": 3.6947181224823, "learning_rate": 6.057848424638093e-05, "loss": 0.0020061109215021135, "step": 138940 }, { "epoch": 39.44081748509793, "grad_norm": 2.7072269916534424, "learning_rate": 6.0575645756457566e-05, "loss": 0.015808798372745514, "step": 138950 }, { "epoch": 39.44365597502129, "grad_norm": 0.04667671397328377, "learning_rate": 6.057280726653421e-05, "loss": 0.0009655993431806565, "step": 138960 }, { "epoch": 39.44649446494465, "grad_norm": 0.3560369312763214, "learning_rate": 6.056996877661084e-05, "loss": 0.004515141248703003, "step": 138970 }, { "epoch": 39.44933295486801, "grad_norm": 0.24489068984985352, "learning_rate": 6.056713028668749e-05, "loss": 0.014861615002155304, "step": 138980 }, { "epoch": 39.45217144479137, "grad_norm": 0.07958531379699707, "learning_rate": 6.0564291796764124e-05, "loss": 0.003068917244672775, "step": 138990 }, { "epoch": 39.45500993471473, "grad_norm": 0.4710387587547302, "learning_rate": 6.0561453306840766e-05, "loss": 0.0008233841508626938, "step": 139000 }, { "epoch": 39.45500993471473, "eval_accuracy": 0.9784447129140967, "eval_loss": 0.08079985529184341, "eval_runtime": 34.2411, "eval_samples_per_second": 459.302, "eval_steps_per_second": 7.184, "step": 139000 }, { "epoch": 39.457848424638094, "grad_norm": 0.9678597450256348, "learning_rate": 6.05586148169174e-05, "loss": 0.0053438939154148105, "step": 139010 }, { "epoch": 39.46068691456146, "grad_norm": 1.0424306392669678, "learning_rate": 6.055577632699404e-05, "loss": 0.01289212554693222, "step": 139020 }, { "epoch": 39.46352540448481, "grad_norm": 0.27018454670906067, "learning_rate": 6.0552937837070676e-05, "loss": 0.0030976323410868644, "step": 139030 }, { "epoch": 39.466363894408175, "grad_norm": 0.1858838051557541, "learning_rate": 6.055009934714732e-05, "loss": 0.0012084327638149262, "step": 139040 }, { "epoch": 39.46920238433154, "grad_norm": 0.03619403392076492, "learning_rate": 6.0547260857223966e-05, "loss": 0.009268757700920106, "step": 139050 }, { "epoch": 39.472040874254894, "grad_norm": 0.12012949585914612, "learning_rate": 6.05444223673006e-05, "loss": 0.015991680324077606, "step": 139060 }, { "epoch": 39.47487936417826, "grad_norm": 0.04265688359737396, "learning_rate": 6.054158387737724e-05, "loss": 0.0011399470269680024, "step": 139070 }, { "epoch": 39.47771785410162, "grad_norm": 1.0416966676712036, "learning_rate": 6.0538745387453877e-05, "loss": 0.001205376349389553, "step": 139080 }, { "epoch": 39.480556344024976, "grad_norm": 0.06668306887149811, "learning_rate": 6.053590689753051e-05, "loss": 0.0009655458852648735, "step": 139090 }, { "epoch": 39.48339483394834, "grad_norm": 11.441139221191406, "learning_rate": 6.053306840760715e-05, "loss": 0.012382709234952927, "step": 139100 }, { "epoch": 39.4862333238717, "grad_norm": 0.42517176270484924, "learning_rate": 6.05302299176838e-05, "loss": 0.004985709115862847, "step": 139110 }, { "epoch": 39.489071813795064, "grad_norm": 15.099268913269043, "learning_rate": 6.0527391427760435e-05, "loss": 0.011611410975456237, "step": 139120 }, { "epoch": 39.49191030371842, "grad_norm": 0.16022956371307373, "learning_rate": 6.052455293783708e-05, "loss": 0.0009337238967418671, "step": 139130 }, { "epoch": 39.49474879364178, "grad_norm": 0.10008148849010468, "learning_rate": 6.052171444791371e-05, "loss": 0.008278004825115204, "step": 139140 }, { "epoch": 39.497587283565146, "grad_norm": 0.21895630657672882, "learning_rate": 6.051887595799035e-05, "loss": 0.006084667891263962, "step": 139150 }, { "epoch": 39.5004257734885, "grad_norm": 1.3596810102462769, "learning_rate": 6.051603746806699e-05, "loss": 0.001015377976000309, "step": 139160 }, { "epoch": 39.503264263411864, "grad_norm": 0.11249522864818573, "learning_rate": 6.051319897814363e-05, "loss": 0.01335364580154419, "step": 139170 }, { "epoch": 39.50610275333523, "grad_norm": 0.9844041466712952, "learning_rate": 6.051036048822028e-05, "loss": 0.0010581102222204208, "step": 139180 }, { "epoch": 39.50894124325858, "grad_norm": 0.7802326679229736, "learning_rate": 6.050752199829691e-05, "loss": 0.0026956498622894287, "step": 139190 }, { "epoch": 39.511779733181946, "grad_norm": 0.2334555834531784, "learning_rate": 6.050468350837355e-05, "loss": 0.010217313468456269, "step": 139200 }, { "epoch": 39.51461822310531, "grad_norm": 2.57680606842041, "learning_rate": 6.050184501845019e-05, "loss": 0.0021345864981412886, "step": 139210 }, { "epoch": 39.51745671302867, "grad_norm": 1.8180394172668457, "learning_rate": 6.049900652852682e-05, "loss": 0.001485462486743927, "step": 139220 }, { "epoch": 39.52029520295203, "grad_norm": 0.4410782754421234, "learning_rate": 6.049616803860346e-05, "loss": 0.0011626113206148147, "step": 139230 }, { "epoch": 39.52313369287539, "grad_norm": 0.3590135872364044, "learning_rate": 6.04933295486801e-05, "loss": 0.018053440749645232, "step": 139240 }, { "epoch": 39.52597218279875, "grad_norm": 15.503097534179688, "learning_rate": 6.0490491058756746e-05, "loss": 0.01282023787498474, "step": 139250 }, { "epoch": 39.52881067272211, "grad_norm": 0.007889934815466404, "learning_rate": 6.048765256883339e-05, "loss": 0.0012047268450260163, "step": 139260 }, { "epoch": 39.53164916264547, "grad_norm": 9.278818130493164, "learning_rate": 6.048481407891002e-05, "loss": 0.004951132088899612, "step": 139270 }, { "epoch": 39.534487652568835, "grad_norm": 4.6797661781311035, "learning_rate": 6.048197558898666e-05, "loss": 0.01644953191280365, "step": 139280 }, { "epoch": 39.53732614249219, "grad_norm": 0.1007591262459755, "learning_rate": 6.04791370990633e-05, "loss": 0.005101056396961212, "step": 139290 }, { "epoch": 39.540164632415554, "grad_norm": 0.49521416425704956, "learning_rate": 6.047629860913994e-05, "loss": 0.0007464537397027015, "step": 139300 }, { "epoch": 39.54300312233892, "grad_norm": 0.1426939070224762, "learning_rate": 6.047346011921659e-05, "loss": 0.0060915850102901455, "step": 139310 }, { "epoch": 39.54584161226228, "grad_norm": 0.050874605774879456, "learning_rate": 6.047062162929322e-05, "loss": 0.004303143918514251, "step": 139320 }, { "epoch": 39.548680102185635, "grad_norm": 0.010523208416998386, "learning_rate": 6.0467783139369863e-05, "loss": 0.001164872944355011, "step": 139330 }, { "epoch": 39.551518592109, "grad_norm": 0.11787858605384827, "learning_rate": 6.04649446494465e-05, "loss": 0.0020640727132558824, "step": 139340 }, { "epoch": 39.55435708203236, "grad_norm": 0.018950942903757095, "learning_rate": 6.046210615952313e-05, "loss": 0.0018655864521861076, "step": 139350 }, { "epoch": 39.55719557195572, "grad_norm": 1.4793403148651123, "learning_rate": 6.0459267669599774e-05, "loss": 0.007364986836910248, "step": 139360 }, { "epoch": 39.56003406187908, "grad_norm": 6.604396343231201, "learning_rate": 6.045642917967641e-05, "loss": 0.005839154496788979, "step": 139370 }, { "epoch": 39.56287255180244, "grad_norm": 0.04512658715248108, "learning_rate": 6.045359068975306e-05, "loss": 0.007810677587985993, "step": 139380 }, { "epoch": 39.565711041725805, "grad_norm": 0.16690883040428162, "learning_rate": 6.04507521998297e-05, "loss": 0.009123191982507706, "step": 139390 }, { "epoch": 39.56854953164916, "grad_norm": 0.2736813724040985, "learning_rate": 6.044791370990633e-05, "loss": 0.010136638581752778, "step": 139400 }, { "epoch": 39.571388021572524, "grad_norm": 0.799407958984375, "learning_rate": 6.0445075219982974e-05, "loss": 0.0076135009527206424, "step": 139410 }, { "epoch": 39.57422651149589, "grad_norm": 0.34595611691474915, "learning_rate": 6.044223673005961e-05, "loss": 0.000494859367609024, "step": 139420 }, { "epoch": 39.57706500141924, "grad_norm": 0.1292838752269745, "learning_rate": 6.043939824013625e-05, "loss": 0.0011536160483956337, "step": 139430 }, { "epoch": 39.579903491342606, "grad_norm": 0.527780294418335, "learning_rate": 6.0436559750212885e-05, "loss": 0.0014653878286480903, "step": 139440 }, { "epoch": 39.58274198126597, "grad_norm": 11.208525657653809, "learning_rate": 6.043372126028953e-05, "loss": 0.0045447938144207, "step": 139450 }, { "epoch": 39.585580471189324, "grad_norm": 0.5165755152702332, "learning_rate": 6.043088277036617e-05, "loss": 0.0023741481825709345, "step": 139460 }, { "epoch": 39.58841896111269, "grad_norm": 2.8765907287597656, "learning_rate": 6.042804428044281e-05, "loss": 0.0028172142803668974, "step": 139470 }, { "epoch": 39.59125745103605, "grad_norm": 0.3178364634513855, "learning_rate": 6.0425205790519443e-05, "loss": 0.0004664406180381775, "step": 139480 }, { "epoch": 39.59409594095941, "grad_norm": 0.06303209066390991, "learning_rate": 6.0422367300596085e-05, "loss": 0.006016398221254349, "step": 139490 }, { "epoch": 39.59693443088277, "grad_norm": 0.9851343631744385, "learning_rate": 6.041952881067272e-05, "loss": 0.0024714022874832153, "step": 139500 }, { "epoch": 39.59693443088277, "eval_accuracy": 0.9799707509378776, "eval_loss": 0.07693947106599808, "eval_runtime": 35.4879, "eval_samples_per_second": 443.165, "eval_steps_per_second": 6.932, "step": 139500 }, { "epoch": 39.59977292080613, "grad_norm": 1.8749613761901855, "learning_rate": 6.041669032074937e-05, "loss": 0.0010663188993930816, "step": 139510 }, { "epoch": 39.602611410729494, "grad_norm": 0.14166595041751862, "learning_rate": 6.041385183082601e-05, "loss": 0.004269582405686379, "step": 139520 }, { "epoch": 39.60544990065285, "grad_norm": 0.34215036034584045, "learning_rate": 6.0411013340902644e-05, "loss": 0.0008560461923480034, "step": 139530 }, { "epoch": 39.60828839057621, "grad_norm": 0.020293161273002625, "learning_rate": 6.0408174850979285e-05, "loss": 0.005010979622602463, "step": 139540 }, { "epoch": 39.611126880499576, "grad_norm": 0.7659504413604736, "learning_rate": 6.040533636105592e-05, "loss": 0.0013291558250784874, "step": 139550 }, { "epoch": 39.61396537042293, "grad_norm": 0.47615575790405273, "learning_rate": 6.0402497871132554e-05, "loss": 0.0013980047777295112, "step": 139560 }, { "epoch": 39.616803860346295, "grad_norm": 0.0646129846572876, "learning_rate": 6.0399659381209195e-05, "loss": 0.001903495006263256, "step": 139570 }, { "epoch": 39.61964235026966, "grad_norm": 0.020428629592061043, "learning_rate": 6.0396820891285844e-05, "loss": 0.0004499729722738266, "step": 139580 }, { "epoch": 39.62248084019302, "grad_norm": 0.038408029824495316, "learning_rate": 6.039398240136248e-05, "loss": 0.001064957119524479, "step": 139590 }, { "epoch": 39.625319330116376, "grad_norm": 10.987489700317383, "learning_rate": 6.039114391143912e-05, "loss": 0.005510361492633819, "step": 139600 }, { "epoch": 39.62815782003974, "grad_norm": 0.1033128872513771, "learning_rate": 6.0388305421515754e-05, "loss": 0.0014725219458341598, "step": 139610 }, { "epoch": 39.6309963099631, "grad_norm": 0.32857224345207214, "learning_rate": 6.0385466931592396e-05, "loss": 0.00434696190059185, "step": 139620 }, { "epoch": 39.63383479988646, "grad_norm": 0.630913257598877, "learning_rate": 6.038262844166903e-05, "loss": 0.012698149681091309, "step": 139630 }, { "epoch": 39.63667328980982, "grad_norm": 0.08295506983995438, "learning_rate": 6.037978995174567e-05, "loss": 0.009344246238470078, "step": 139640 }, { "epoch": 39.63951177973318, "grad_norm": 1.1695894002914429, "learning_rate": 6.037695146182232e-05, "loss": 0.004010669142007828, "step": 139650 }, { "epoch": 39.64235026965654, "grad_norm": 0.48185762763023376, "learning_rate": 6.0374112971898954e-05, "loss": 0.004066113382577896, "step": 139660 }, { "epoch": 39.6451887595799, "grad_norm": 0.2088603675365448, "learning_rate": 6.0371274481975596e-05, "loss": 0.00413791611790657, "step": 139670 }, { "epoch": 39.648027249503265, "grad_norm": 13.544593811035156, "learning_rate": 6.036843599205223e-05, "loss": 0.008512773364782334, "step": 139680 }, { "epoch": 39.65086573942663, "grad_norm": 1.8874883651733398, "learning_rate": 6.0365597502128865e-05, "loss": 0.0008650554344058036, "step": 139690 }, { "epoch": 39.653704229349984, "grad_norm": 0.06713263690471649, "learning_rate": 6.0362759012205506e-05, "loss": 0.00032517630606889727, "step": 139700 }, { "epoch": 39.65654271927335, "grad_norm": 0.016909999772906303, "learning_rate": 6.0359920522282154e-05, "loss": 0.0007087821140885353, "step": 139710 }, { "epoch": 39.65938120919671, "grad_norm": 3.7949206829071045, "learning_rate": 6.035708203235879e-05, "loss": 0.008322563022375107, "step": 139720 }, { "epoch": 39.662219699120065, "grad_norm": 0.19647155702114105, "learning_rate": 6.035424354243543e-05, "loss": 0.002325757034122944, "step": 139730 }, { "epoch": 39.66505818904343, "grad_norm": 0.43818825483322144, "learning_rate": 6.0351405052512065e-05, "loss": 0.0011228665709495545, "step": 139740 }, { "epoch": 39.66789667896679, "grad_norm": 0.34506067633628845, "learning_rate": 6.0348566562588706e-05, "loss": 0.002585999853909016, "step": 139750 }, { "epoch": 39.67073516889015, "grad_norm": 0.7204943299293518, "learning_rate": 6.034572807266534e-05, "loss": 0.005059043318033219, "step": 139760 }, { "epoch": 39.67357365881351, "grad_norm": 0.3773424029350281, "learning_rate": 6.034288958274198e-05, "loss": 0.0019811224192380905, "step": 139770 }, { "epoch": 39.67641214873687, "grad_norm": 0.2002657949924469, "learning_rate": 6.034005109281863e-05, "loss": 0.0032944247126579286, "step": 139780 }, { "epoch": 39.679250638660235, "grad_norm": 0.7894177436828613, "learning_rate": 6.0337212602895265e-05, "loss": 0.0021438678726553918, "step": 139790 }, { "epoch": 39.68208912858359, "grad_norm": 1.3847681283950806, "learning_rate": 6.0334374112971906e-05, "loss": 0.004726460203528404, "step": 139800 }, { "epoch": 39.684927618506954, "grad_norm": 0.23688063025474548, "learning_rate": 6.033153562304854e-05, "loss": 0.0038241155445575712, "step": 139810 }, { "epoch": 39.68776610843032, "grad_norm": 0.10516175627708435, "learning_rate": 6.0328697133125176e-05, "loss": 0.01656373292207718, "step": 139820 }, { "epoch": 39.69060459835367, "grad_norm": 0.025425909087061882, "learning_rate": 6.032585864320182e-05, "loss": 0.004116755351424218, "step": 139830 }, { "epoch": 39.693443088277036, "grad_norm": 0.006883655674755573, "learning_rate": 6.032302015327845e-05, "loss": 0.003660205751657486, "step": 139840 }, { "epoch": 39.6962815782004, "grad_norm": 6.052340030670166, "learning_rate": 6.03201816633551e-05, "loss": 0.003467187285423279, "step": 139850 }, { "epoch": 39.69912006812376, "grad_norm": 2.1318395137786865, "learning_rate": 6.031734317343174e-05, "loss": 0.0018260452896356582, "step": 139860 }, { "epoch": 39.70195855804712, "grad_norm": 0.046489883214235306, "learning_rate": 6.0314504683508376e-05, "loss": 0.005017465725541115, "step": 139870 }, { "epoch": 39.70479704797048, "grad_norm": 0.09643717110157013, "learning_rate": 6.031166619358502e-05, "loss": 0.004412503540515899, "step": 139880 }, { "epoch": 39.70763553789384, "grad_norm": 2.0318610668182373, "learning_rate": 6.030882770366165e-05, "loss": 0.010016446560621261, "step": 139890 }, { "epoch": 39.7104740278172, "grad_norm": 0.33554142713546753, "learning_rate": 6.030598921373829e-05, "loss": 0.006604233384132385, "step": 139900 }, { "epoch": 39.71331251774056, "grad_norm": 0.18289758265018463, "learning_rate": 6.030315072381494e-05, "loss": 0.002458448149263859, "step": 139910 }, { "epoch": 39.716151007663925, "grad_norm": 0.2665189206600189, "learning_rate": 6.0300312233891576e-05, "loss": 0.005028538778424263, "step": 139920 }, { "epoch": 39.71898949758728, "grad_norm": 0.054751187562942505, "learning_rate": 6.029747374396821e-05, "loss": 0.003917087614536285, "step": 139930 }, { "epoch": 39.72182798751064, "grad_norm": 0.23740018904209137, "learning_rate": 6.029463525404485e-05, "loss": 0.0005019735544919968, "step": 139940 }, { "epoch": 39.724666477434006, "grad_norm": 0.1050899401307106, "learning_rate": 6.0291796764121486e-05, "loss": 0.0007343839854001999, "step": 139950 }, { "epoch": 39.72750496735737, "grad_norm": 0.17345091700553894, "learning_rate": 6.028895827419813e-05, "loss": 0.011517295241355896, "step": 139960 }, { "epoch": 39.730343457280725, "grad_norm": 0.2550193667411804, "learning_rate": 6.028611978427476e-05, "loss": 0.002329324558377266, "step": 139970 }, { "epoch": 39.73318194720409, "grad_norm": 0.05219987407326698, "learning_rate": 6.028328129435141e-05, "loss": 0.0038969628512859344, "step": 139980 }, { "epoch": 39.73602043712745, "grad_norm": 0.05646374821662903, "learning_rate": 6.028044280442805e-05, "loss": 0.0071163192391395565, "step": 139990 }, { "epoch": 39.738858927050806, "grad_norm": 0.03333752229809761, "learning_rate": 6.0277604314504686e-05, "loss": 0.009406652301549911, "step": 140000 }, { "epoch": 39.738858927050806, "eval_accuracy": 0.9802250906085077, "eval_loss": 0.07730094343423843, "eval_runtime": 33.6816, "eval_samples_per_second": 466.932, "eval_steps_per_second": 7.304, "step": 140000 }, { "epoch": 39.74169741697417, "grad_norm": 5.180607795715332, "learning_rate": 6.027476582458133e-05, "loss": 0.0016228236258029939, "step": 140010 }, { "epoch": 39.74453590689753, "grad_norm": 0.019831007346510887, "learning_rate": 6.027192733465796e-05, "loss": 0.0009049829095602036, "step": 140020 }, { "epoch": 39.74737439682089, "grad_norm": 0.004605283495038748, "learning_rate": 6.02690888447346e-05, "loss": 0.0017602238804101944, "step": 140030 }, { "epoch": 39.75021288674425, "grad_norm": 0.9554744362831116, "learning_rate": 6.026625035481124e-05, "loss": 0.0010366074740886689, "step": 140040 }, { "epoch": 39.753051376667614, "grad_norm": 5.514071941375732, "learning_rate": 6.0263411864887887e-05, "loss": 0.0013055594637989998, "step": 140050 }, { "epoch": 39.75588986659098, "grad_norm": 0.07226937264204025, "learning_rate": 6.026057337496452e-05, "loss": 0.0008673405274748803, "step": 140060 }, { "epoch": 39.75872835651433, "grad_norm": 3.3729405403137207, "learning_rate": 6.025773488504116e-05, "loss": 0.016267013549804688, "step": 140070 }, { "epoch": 39.761566846437695, "grad_norm": 6.927515983581543, "learning_rate": 6.02548963951178e-05, "loss": 0.009255509823560715, "step": 140080 }, { "epoch": 39.76440533636106, "grad_norm": 14.113906860351562, "learning_rate": 6.025205790519444e-05, "loss": 0.006352418661117553, "step": 140090 }, { "epoch": 39.767243826284414, "grad_norm": 2.6467010974884033, "learning_rate": 6.024921941527107e-05, "loss": 0.00193468164652586, "step": 140100 }, { "epoch": 39.77008231620778, "grad_norm": 11.587573051452637, "learning_rate": 6.024638092534772e-05, "loss": 0.009303369373083115, "step": 140110 }, { "epoch": 39.77292080613114, "grad_norm": 7.731042861938477, "learning_rate": 6.024354243542436e-05, "loss": 0.0031620003283023834, "step": 140120 }, { "epoch": 39.7757592960545, "grad_norm": 0.026274699717760086, "learning_rate": 6.0240703945501e-05, "loss": 0.002146316319704056, "step": 140130 }, { "epoch": 39.77859778597786, "grad_norm": 0.22071866691112518, "learning_rate": 6.023786545557764e-05, "loss": 0.021683146059513093, "step": 140140 }, { "epoch": 39.78143627590122, "grad_norm": 0.042812224477529526, "learning_rate": 6.023502696565427e-05, "loss": 0.0037301771342754366, "step": 140150 }, { "epoch": 39.784274765824584, "grad_norm": 0.1161683201789856, "learning_rate": 6.023218847573091e-05, "loss": 0.006061394512653351, "step": 140160 }, { "epoch": 39.78711325574794, "grad_norm": 0.6947944164276123, "learning_rate": 6.022934998580755e-05, "loss": 0.0073666214942932125, "step": 140170 }, { "epoch": 39.7899517456713, "grad_norm": 3.488680362701416, "learning_rate": 6.02265114958842e-05, "loss": 0.0024717610329389574, "step": 140180 }, { "epoch": 39.792790235594666, "grad_norm": 6.038782119750977, "learning_rate": 6.022367300596083e-05, "loss": 0.0025426460430026054, "step": 140190 }, { "epoch": 39.79562872551802, "grad_norm": 0.08167636394500732, "learning_rate": 6.022083451603747e-05, "loss": 0.005234082415699959, "step": 140200 }, { "epoch": 39.798467215441384, "grad_norm": 0.3623303472995758, "learning_rate": 6.021799602611411e-05, "loss": 0.001396007463335991, "step": 140210 }, { "epoch": 39.80130570536475, "grad_norm": 0.7307189106941223, "learning_rate": 6.021515753619075e-05, "loss": 0.008457928895950317, "step": 140220 }, { "epoch": 39.80414419528811, "grad_norm": 0.43614670634269714, "learning_rate": 6.0212319046267384e-05, "loss": 0.005614956468343734, "step": 140230 }, { "epoch": 39.806982685211466, "grad_norm": 0.11204016953706741, "learning_rate": 6.020948055634403e-05, "loss": 0.003205081820487976, "step": 140240 }, { "epoch": 39.80982117513483, "grad_norm": 0.05408202111721039, "learning_rate": 6.0206642066420673e-05, "loss": 0.0007401403039693832, "step": 140250 }, { "epoch": 39.81265966505819, "grad_norm": 0.08601909130811691, "learning_rate": 6.020380357649731e-05, "loss": 0.010128014534711838, "step": 140260 }, { "epoch": 39.81549815498155, "grad_norm": 0.7854220867156982, "learning_rate": 6.020096508657395e-05, "loss": 0.0019169824197888374, "step": 140270 }, { "epoch": 39.81833664490491, "grad_norm": 0.0367460772395134, "learning_rate": 6.0198126596650584e-05, "loss": 0.0019715875387191772, "step": 140280 }, { "epoch": 39.82117513482827, "grad_norm": 0.0326920785009861, "learning_rate": 6.019528810672722e-05, "loss": 0.0030091289430856706, "step": 140290 }, { "epoch": 39.82401362475163, "grad_norm": 0.3625161349773407, "learning_rate": 6.019244961680386e-05, "loss": 0.009983541071414947, "step": 140300 }, { "epoch": 39.82685211467499, "grad_norm": 0.7909501194953918, "learning_rate": 6.018961112688051e-05, "loss": 0.00578177385032177, "step": 140310 }, { "epoch": 39.829690604598355, "grad_norm": 0.03738076612353325, "learning_rate": 6.018677263695714e-05, "loss": 0.010677408427000046, "step": 140320 }, { "epoch": 39.83252909452172, "grad_norm": 0.11721184849739075, "learning_rate": 6.0183934147033784e-05, "loss": 0.008144648373126983, "step": 140330 }, { "epoch": 39.83536758444507, "grad_norm": 0.021319258958101273, "learning_rate": 6.018109565711042e-05, "loss": 0.002455095201730728, "step": 140340 }, { "epoch": 39.838206074368436, "grad_norm": 0.06751576066017151, "learning_rate": 6.017825716718706e-05, "loss": 0.0073022134602069855, "step": 140350 }, { "epoch": 39.8410445642918, "grad_norm": 0.22781354188919067, "learning_rate": 6.0175418677263695e-05, "loss": 0.003056218661367893, "step": 140360 }, { "epoch": 39.843883054215155, "grad_norm": 0.7224525213241577, "learning_rate": 6.0172580187340336e-05, "loss": 0.012529321014881134, "step": 140370 }, { "epoch": 39.84672154413852, "grad_norm": 0.24955223500728607, "learning_rate": 6.0169741697416984e-05, "loss": 0.0012133425101637841, "step": 140380 }, { "epoch": 39.84956003406188, "grad_norm": 1.5199652910232544, "learning_rate": 6.016690320749362e-05, "loss": 0.008562378585338593, "step": 140390 }, { "epoch": 39.85239852398524, "grad_norm": 0.05575885251164436, "learning_rate": 6.0164064717570253e-05, "loss": 0.0013135379180312158, "step": 140400 }, { "epoch": 39.8552370139086, "grad_norm": 0.058260150253772736, "learning_rate": 6.0161226227646895e-05, "loss": 0.0011805795133113862, "step": 140410 }, { "epoch": 39.85807550383196, "grad_norm": 0.027986010536551476, "learning_rate": 6.015838773772353e-05, "loss": 0.016309380531311035, "step": 140420 }, { "epoch": 39.860913993755325, "grad_norm": 3.576286792755127, "learning_rate": 6.015554924780017e-05, "loss": 0.0017692962661385535, "step": 140430 }, { "epoch": 39.86375248367868, "grad_norm": 0.02668899856507778, "learning_rate": 6.015271075787682e-05, "loss": 0.0013603081926703453, "step": 140440 }, { "epoch": 39.866590973602044, "grad_norm": 0.2705933451652527, "learning_rate": 6.0149872267953453e-05, "loss": 0.0035162322223186494, "step": 140450 }, { "epoch": 39.86942946352541, "grad_norm": 0.3443372845649719, "learning_rate": 6.0147033778030095e-05, "loss": 0.0009793328121304513, "step": 140460 }, { "epoch": 39.87226795344876, "grad_norm": 0.25106799602508545, "learning_rate": 6.014419528810673e-05, "loss": 0.0039020076394081114, "step": 140470 }, { "epoch": 39.875106443372125, "grad_norm": 0.3664070665836334, "learning_rate": 6.014135679818337e-05, "loss": 0.006767159700393677, "step": 140480 }, { "epoch": 39.87794493329549, "grad_norm": 1.5214062929153442, "learning_rate": 6.0138518308260005e-05, "loss": 0.003276723623275757, "step": 140490 }, { "epoch": 39.880783423218844, "grad_norm": 0.05399065092206001, "learning_rate": 6.013567981833664e-05, "loss": 0.005181704461574554, "step": 140500 }, { "epoch": 39.880783423218844, "eval_accuracy": 0.977045844725631, "eval_loss": 0.08858271688222885, "eval_runtime": 33.7382, "eval_samples_per_second": 466.148, "eval_steps_per_second": 7.291, "step": 140500 }, { "epoch": 39.88362191314221, "grad_norm": 0.007636615540832281, "learning_rate": 6.0132841328413295e-05, "loss": 0.0015809105709195137, "step": 140510 }, { "epoch": 39.88646040306557, "grad_norm": 5.471903324127197, "learning_rate": 6.013000283848993e-05, "loss": 0.010436218976974488, "step": 140520 }, { "epoch": 39.88929889298893, "grad_norm": 0.023509038612246513, "learning_rate": 6.0127164348566564e-05, "loss": 0.0016803789883852005, "step": 140530 }, { "epoch": 39.89213738291229, "grad_norm": 0.06541233509778976, "learning_rate": 6.0124325858643206e-05, "loss": 0.003866260126233101, "step": 140540 }, { "epoch": 39.89497587283565, "grad_norm": 0.015936177223920822, "learning_rate": 6.012148736871984e-05, "loss": 0.00835585966706276, "step": 140550 }, { "epoch": 39.897814362759014, "grad_norm": 1.1790138483047485, "learning_rate": 6.011864887879648e-05, "loss": 0.0023340722545981407, "step": 140560 }, { "epoch": 39.90065285268237, "grad_norm": 0.5053675174713135, "learning_rate": 6.0115810388873116e-05, "loss": 0.0041997529566288, "step": 140570 }, { "epoch": 39.90349134260573, "grad_norm": 0.1307978332042694, "learning_rate": 6.0112971898949764e-05, "loss": 0.005003933981060982, "step": 140580 }, { "epoch": 39.906329832529096, "grad_norm": 0.21318809688091278, "learning_rate": 6.0110133409026406e-05, "loss": 0.006356914341449737, "step": 140590 }, { "epoch": 39.90916832245246, "grad_norm": 3.7613072395324707, "learning_rate": 6.010729491910304e-05, "loss": 0.0015332115814089774, "step": 140600 }, { "epoch": 39.912006812375814, "grad_norm": 0.16834582388401031, "learning_rate": 6.010445642917968e-05, "loss": 0.006772524118423462, "step": 140610 }, { "epoch": 39.91484530229918, "grad_norm": 2.963632345199585, "learning_rate": 6.0101617939256316e-05, "loss": 0.0027033500373363495, "step": 140620 }, { "epoch": 39.91768379222254, "grad_norm": 0.14764925837516785, "learning_rate": 6.009877944933295e-05, "loss": 0.0009094873443245888, "step": 140630 }, { "epoch": 39.920522282145896, "grad_norm": 0.05448424443602562, "learning_rate": 6.0095940959409606e-05, "loss": 0.00039177704602479937, "step": 140640 }, { "epoch": 39.92336077206926, "grad_norm": 0.2741493284702301, "learning_rate": 6.009310246948624e-05, "loss": 0.0040699135512113575, "step": 140650 }, { "epoch": 39.92619926199262, "grad_norm": 0.005006491206586361, "learning_rate": 6.0090263979562875e-05, "loss": 0.0004386469721794128, "step": 140660 }, { "epoch": 39.92903775191598, "grad_norm": 0.034414008259773254, "learning_rate": 6.0087425489639516e-05, "loss": 0.0010241106152534484, "step": 140670 }, { "epoch": 39.93187624183934, "grad_norm": 1.5561379194259644, "learning_rate": 6.008458699971615e-05, "loss": 0.0017207475379109383, "step": 140680 }, { "epoch": 39.9347147317627, "grad_norm": 0.058816347271203995, "learning_rate": 6.008174850979279e-05, "loss": 0.0012570088729262352, "step": 140690 }, { "epoch": 39.937553221686066, "grad_norm": 1.0321310758590698, "learning_rate": 6.007891001986943e-05, "loss": 0.0016833772882819177, "step": 140700 }, { "epoch": 39.94039171160942, "grad_norm": 4.119981288909912, "learning_rate": 6.0076071529946075e-05, "loss": 0.010317127406597137, "step": 140710 }, { "epoch": 39.943230201532785, "grad_norm": 0.19562427699565887, "learning_rate": 6.0073233040022716e-05, "loss": 0.0007414365187287331, "step": 140720 }, { "epoch": 39.94606869145615, "grad_norm": 0.6070197224617004, "learning_rate": 6.007039455009935e-05, "loss": 0.002733702398836613, "step": 140730 }, { "epoch": 39.948907181379504, "grad_norm": 0.08643516898155212, "learning_rate": 6.006755606017599e-05, "loss": 0.001535908505320549, "step": 140740 }, { "epoch": 39.95174567130287, "grad_norm": 0.01761624589562416, "learning_rate": 6.006471757025263e-05, "loss": 0.0014145830646157265, "step": 140750 }, { "epoch": 39.95458416122623, "grad_norm": 0.17561081051826477, "learning_rate": 6.006187908032926e-05, "loss": 0.002869078516960144, "step": 140760 }, { "epoch": 39.957422651149585, "grad_norm": 0.007226722780615091, "learning_rate": 6.00590405904059e-05, "loss": 0.0006704378873109817, "step": 140770 }, { "epoch": 39.96026114107295, "grad_norm": 1.2203341722488403, "learning_rate": 6.005620210048255e-05, "loss": 0.0018667129799723626, "step": 140780 }, { "epoch": 39.96309963099631, "grad_norm": 0.3066762685775757, "learning_rate": 6.0053363610559186e-05, "loss": 0.000397636741399765, "step": 140790 }, { "epoch": 39.965938120919674, "grad_norm": 0.1400853991508484, "learning_rate": 6.005052512063583e-05, "loss": 0.006970874220132828, "step": 140800 }, { "epoch": 39.96877661084303, "grad_norm": 12.223576545715332, "learning_rate": 6.004768663071246e-05, "loss": 0.0065665140748023985, "step": 140810 }, { "epoch": 39.97161510076639, "grad_norm": 0.38565951585769653, "learning_rate": 6.00448481407891e-05, "loss": 0.0006583660840988159, "step": 140820 }, { "epoch": 39.974453590689755, "grad_norm": 0.1388775110244751, "learning_rate": 6.004200965086574e-05, "loss": 0.0075241923332214355, "step": 140830 }, { "epoch": 39.97729208061311, "grad_norm": 2.0963780879974365, "learning_rate": 6.0039171160942386e-05, "loss": 0.0011190304532647133, "step": 140840 }, { "epoch": 39.980130570536474, "grad_norm": 0.7631009817123413, "learning_rate": 6.003633267101903e-05, "loss": 0.0017454151064157486, "step": 140850 }, { "epoch": 39.98296906045984, "grad_norm": 0.1579689234495163, "learning_rate": 6.003349418109566e-05, "loss": 0.01458720862865448, "step": 140860 }, { "epoch": 39.98580755038319, "grad_norm": 0.44892990589141846, "learning_rate": 6.0030655691172296e-05, "loss": 0.010024822503328323, "step": 140870 }, { "epoch": 39.988646040306556, "grad_norm": 0.05207638815045357, "learning_rate": 6.002781720124894e-05, "loss": 0.006418681889772415, "step": 140880 }, { "epoch": 39.99148453022992, "grad_norm": 1.3299591541290283, "learning_rate": 6.002497871132557e-05, "loss": 0.0036146774888038634, "step": 140890 }, { "epoch": 39.99432302015328, "grad_norm": 0.08390624821186066, "learning_rate": 6.0022140221402214e-05, "loss": 0.0022643888369202614, "step": 140900 }, { "epoch": 39.99716151007664, "grad_norm": 0.06949322670698166, "learning_rate": 6.001930173147886e-05, "loss": 0.0020107952877879143, "step": 140910 }, { "epoch": 40.0, "grad_norm": 2.506221055984497, "learning_rate": 6.0016463241555496e-05, "loss": 0.005773241072893143, "step": 140920 }, { "epoch": 40.00283848992336, "grad_norm": 0.08780799806118011, "learning_rate": 6.001362475163214e-05, "loss": 0.0009396199136972427, "step": 140930 }, { "epoch": 40.00567697984672, "grad_norm": 0.41203999519348145, "learning_rate": 6.001078626170877e-05, "loss": 0.011485329270362854, "step": 140940 }, { "epoch": 40.00851546977008, "grad_norm": 15.727099418640137, "learning_rate": 6.0007947771785414e-05, "loss": 0.01484718918800354, "step": 140950 }, { "epoch": 40.011353959693444, "grad_norm": 0.054018083959817886, "learning_rate": 6.000510928186205e-05, "loss": 0.006162045523524284, "step": 140960 }, { "epoch": 40.01419244961681, "grad_norm": 0.047532353550195694, "learning_rate": 6.000227079193868e-05, "loss": 0.001646256074309349, "step": 140970 }, { "epoch": 40.01703093954016, "grad_norm": 0.04321896657347679, "learning_rate": 5.999943230201534e-05, "loss": 0.0019460495561361313, "step": 140980 }, { "epoch": 40.019869429463526, "grad_norm": 0.5533946752548218, "learning_rate": 5.999659381209197e-05, "loss": 0.004676478356122971, "step": 140990 }, { "epoch": 40.02270791938689, "grad_norm": 0.014613586477935314, "learning_rate": 5.999375532216861e-05, "loss": 0.0020538315176963804, "step": 141000 }, { "epoch": 40.02270791938689, "eval_accuracy": 0.9796528263495898, "eval_loss": 0.08253839612007141, "eval_runtime": 33.5045, "eval_samples_per_second": 469.4, "eval_steps_per_second": 7.342, "step": 141000 }, { "epoch": 40.025546409310245, "grad_norm": 1.0436290502548218, "learning_rate": 5.999091683224525e-05, "loss": 0.000866231881082058, "step": 141010 }, { "epoch": 40.02838489923361, "grad_norm": 0.21416397392749786, "learning_rate": 5.998807834232188e-05, "loss": 0.004730762168765068, "step": 141020 }, { "epoch": 40.03122338915697, "grad_norm": 0.047134462743997574, "learning_rate": 5.9985239852398524e-05, "loss": 0.0007115015760064125, "step": 141030 }, { "epoch": 40.034061879080326, "grad_norm": 0.5270630717277527, "learning_rate": 5.998240136247517e-05, "loss": 0.0018299955874681473, "step": 141040 }, { "epoch": 40.03690036900369, "grad_norm": 0.8023728132247925, "learning_rate": 5.997956287255181e-05, "loss": 0.000613241270184517, "step": 141050 }, { "epoch": 40.03973885892705, "grad_norm": 0.7817026972770691, "learning_rate": 5.997672438262845e-05, "loss": 0.0037845227867364883, "step": 141060 }, { "epoch": 40.042577348850415, "grad_norm": 0.06673797965049744, "learning_rate": 5.997388589270508e-05, "loss": 0.001968875341117382, "step": 141070 }, { "epoch": 40.04541583877377, "grad_norm": 1.0940155982971191, "learning_rate": 5.9971047402781725e-05, "loss": 0.006292130798101425, "step": 141080 }, { "epoch": 40.04825432869713, "grad_norm": 8.073112487792969, "learning_rate": 5.996820891285836e-05, "loss": 0.007227838039398193, "step": 141090 }, { "epoch": 40.051092818620496, "grad_norm": 10.498923301696777, "learning_rate": 5.9965370422934994e-05, "loss": 0.012743796408176421, "step": 141100 }, { "epoch": 40.05393130854385, "grad_norm": 12.454150199890137, "learning_rate": 5.996253193301164e-05, "loss": 0.004240767285227776, "step": 141110 }, { "epoch": 40.056769798467215, "grad_norm": 0.10814335197210312, "learning_rate": 5.995969344308828e-05, "loss": 0.0006373737007379532, "step": 141120 }, { "epoch": 40.05960828839058, "grad_norm": 0.32532691955566406, "learning_rate": 5.995685495316492e-05, "loss": 0.012373842298984528, "step": 141130 }, { "epoch": 40.062446778313934, "grad_norm": 0.20747874677181244, "learning_rate": 5.995401646324156e-05, "loss": 0.0017332861199975014, "step": 141140 }, { "epoch": 40.0652852682373, "grad_norm": 0.2759236693382263, "learning_rate": 5.9951177973318194e-05, "loss": 0.0023934058845043182, "step": 141150 }, { "epoch": 40.06812375816066, "grad_norm": 0.05292368307709694, "learning_rate": 5.9948339483394835e-05, "loss": 0.006880740821361542, "step": 141160 }, { "epoch": 40.07096224808402, "grad_norm": 1.8744416236877441, "learning_rate": 5.994550099347147e-05, "loss": 0.006323178112506866, "step": 141170 }, { "epoch": 40.07380073800738, "grad_norm": 1.9099171161651611, "learning_rate": 5.994266250354812e-05, "loss": 0.0011234628036618233, "step": 141180 }, { "epoch": 40.07663922793074, "grad_norm": 0.01826351135969162, "learning_rate": 5.993982401362476e-05, "loss": 0.0008329086005687714, "step": 141190 }, { "epoch": 40.079477717854104, "grad_norm": 0.032349202781915665, "learning_rate": 5.9936985523701394e-05, "loss": 0.000519341416656971, "step": 141200 }, { "epoch": 40.08231620777746, "grad_norm": 0.08069310337305069, "learning_rate": 5.993443088277037e-05, "loss": 0.029745063185691832, "step": 141210 }, { "epoch": 40.08515469770082, "grad_norm": 0.04091327264904976, "learning_rate": 5.9931592392847015e-05, "loss": 0.00029405523091554644, "step": 141220 }, { "epoch": 40.087993187624186, "grad_norm": 0.10346274822950363, "learning_rate": 5.992875390292365e-05, "loss": 0.0008442491292953491, "step": 141230 }, { "epoch": 40.09083167754754, "grad_norm": 0.004118145909160376, "learning_rate": 5.992591541300029e-05, "loss": 0.004971948638558388, "step": 141240 }, { "epoch": 40.093670167470904, "grad_norm": 0.13743168115615845, "learning_rate": 5.9923076923076926e-05, "loss": 0.0013917263597249985, "step": 141250 }, { "epoch": 40.09650865739427, "grad_norm": 0.39514297246932983, "learning_rate": 5.992023843315356e-05, "loss": 0.0032725274562835694, "step": 141260 }, { "epoch": 40.09934714731763, "grad_norm": 0.005918759386986494, "learning_rate": 5.99173999432302e-05, "loss": 0.004290441051125526, "step": 141270 }, { "epoch": 40.102185637240986, "grad_norm": 0.7632564902305603, "learning_rate": 5.9914561453306836e-05, "loss": 0.0003750460222363472, "step": 141280 }, { "epoch": 40.10502412716435, "grad_norm": 0.20319803059101105, "learning_rate": 5.9911722963383485e-05, "loss": 0.0009120093658566475, "step": 141290 }, { "epoch": 40.10786261708771, "grad_norm": 0.0055253892205655575, "learning_rate": 5.9908884473460126e-05, "loss": 0.00034349504858255386, "step": 141300 }, { "epoch": 40.11070110701107, "grad_norm": 0.5779747366905212, "learning_rate": 5.990604598353676e-05, "loss": 0.0019457744434475898, "step": 141310 }, { "epoch": 40.11353959693443, "grad_norm": 0.043814003467559814, "learning_rate": 5.99032074936134e-05, "loss": 0.0002108817920088768, "step": 141320 }, { "epoch": 40.11637808685779, "grad_norm": 0.02058289758861065, "learning_rate": 5.9900369003690037e-05, "loss": 0.0006559636443853378, "step": 141330 }, { "epoch": 40.119216576781156, "grad_norm": 0.007214970886707306, "learning_rate": 5.989753051376668e-05, "loss": 0.0021311670541763307, "step": 141340 }, { "epoch": 40.12205506670451, "grad_norm": 0.005803955253213644, "learning_rate": 5.989469202384331e-05, "loss": 0.0018028853461146354, "step": 141350 }, { "epoch": 40.124893556627875, "grad_norm": 0.02854486182332039, "learning_rate": 5.989185353391996e-05, "loss": 0.00040334593504667284, "step": 141360 }, { "epoch": 40.12773204655124, "grad_norm": 0.05662837252020836, "learning_rate": 5.98890150439966e-05, "loss": 0.002988819219172001, "step": 141370 }, { "epoch": 40.13057053647459, "grad_norm": 0.012257601134479046, "learning_rate": 5.988617655407324e-05, "loss": 0.000749816745519638, "step": 141380 }, { "epoch": 40.133409026397956, "grad_norm": 1.5356072187423706, "learning_rate": 5.988333806414987e-05, "loss": 0.0012003682553768159, "step": 141390 }, { "epoch": 40.13624751632132, "grad_norm": 0.002942809835076332, "learning_rate": 5.988049957422651e-05, "loss": 0.0008409267291426658, "step": 141400 }, { "epoch": 40.139086006244675, "grad_norm": 0.05588100105524063, "learning_rate": 5.987766108430315e-05, "loss": 0.0014038095250725746, "step": 141410 }, { "epoch": 40.14192449616804, "grad_norm": 0.005803760606795549, "learning_rate": 5.9874822594379795e-05, "loss": 0.0021546676754951475, "step": 141420 }, { "epoch": 40.1447629860914, "grad_norm": 0.12067630141973495, "learning_rate": 5.987198410445644e-05, "loss": 0.0015821099281311036, "step": 141430 }, { "epoch": 40.14760147601476, "grad_norm": 0.03483074903488159, "learning_rate": 5.986914561453307e-05, "loss": 0.0005528388544917106, "step": 141440 }, { "epoch": 40.15043996593812, "grad_norm": 0.10471846908330917, "learning_rate": 5.986630712460971e-05, "loss": 0.00048025231808423996, "step": 141450 }, { "epoch": 40.15327845586148, "grad_norm": 0.015763845294713974, "learning_rate": 5.986346863468635e-05, "loss": 0.0056010264903306965, "step": 141460 }, { "epoch": 40.156116945784845, "grad_norm": 0.232723668217659, "learning_rate": 5.986063014476299e-05, "loss": 0.004055865108966827, "step": 141470 }, { "epoch": 40.1589554357082, "grad_norm": 0.015648838132619858, "learning_rate": 5.985779165483962e-05, "loss": 0.0011345468461513518, "step": 141480 }, { "epoch": 40.161793925631564, "grad_norm": 0.06445717811584473, "learning_rate": 5.985495316491627e-05, "loss": 0.0009460026398301124, "step": 141490 }, { "epoch": 40.16463241555493, "grad_norm": 0.006861208472400904, "learning_rate": 5.985211467499291e-05, "loss": 0.0008079934865236283, "step": 141500 }, { "epoch": 40.16463241555493, "eval_accuracy": 0.9821326381382336, "eval_loss": 0.06712860614061356, "eval_runtime": 34.1966, "eval_samples_per_second": 459.899, "eval_steps_per_second": 7.194, "step": 141500 }, { "epoch": 40.16747090547828, "grad_norm": 0.00868276972323656, "learning_rate": 5.984927618506955e-05, "loss": 0.0009389117360115052, "step": 141510 }, { "epoch": 40.170309395401645, "grad_norm": 0.05683035030961037, "learning_rate": 5.984643769514618e-05, "loss": 0.004602590203285217, "step": 141520 }, { "epoch": 40.17314788532501, "grad_norm": 0.5002962350845337, "learning_rate": 5.984359920522282e-05, "loss": 0.0022063734009861945, "step": 141530 }, { "epoch": 40.17598637524837, "grad_norm": 0.5711256265640259, "learning_rate": 5.984076071529946e-05, "loss": 0.000337749719619751, "step": 141540 }, { "epoch": 40.17882486517173, "grad_norm": 0.044463131576776505, "learning_rate": 5.98379222253761e-05, "loss": 0.0014334466308355332, "step": 141550 }, { "epoch": 40.18166335509509, "grad_norm": 0.011427773162722588, "learning_rate": 5.983508373545275e-05, "loss": 0.000719274953007698, "step": 141560 }, { "epoch": 40.18450184501845, "grad_norm": 0.10199786722660065, "learning_rate": 5.983224524552938e-05, "loss": 0.00727766677737236, "step": 141570 }, { "epoch": 40.18734033494181, "grad_norm": 0.05950368568301201, "learning_rate": 5.9829406755606023e-05, "loss": 0.002485848590731621, "step": 141580 }, { "epoch": 40.19017882486517, "grad_norm": 0.006787281017750502, "learning_rate": 5.982656826568266e-05, "loss": 0.001405978575348854, "step": 141590 }, { "epoch": 40.193017314788534, "grad_norm": 0.0018812369089573622, "learning_rate": 5.98237297757593e-05, "loss": 0.0010204441845417024, "step": 141600 }, { "epoch": 40.19585580471189, "grad_norm": 0.47003173828125, "learning_rate": 5.9820891285835934e-05, "loss": 0.003417060151696205, "step": 141610 }, { "epoch": 40.19869429463525, "grad_norm": 0.02769395150244236, "learning_rate": 5.981805279591258e-05, "loss": 0.004850450530648231, "step": 141620 }, { "epoch": 40.201532784558616, "grad_norm": 0.5583124160766602, "learning_rate": 5.981521430598922e-05, "loss": 0.0014606021344661714, "step": 141630 }, { "epoch": 40.20437127448198, "grad_norm": 0.3511638343334198, "learning_rate": 5.981237581606586e-05, "loss": 0.0013118216767907143, "step": 141640 }, { "epoch": 40.207209764405334, "grad_norm": 0.015874747186899185, "learning_rate": 5.980953732614249e-05, "loss": 0.0035273909568786623, "step": 141650 }, { "epoch": 40.2100482543287, "grad_norm": 0.044571422040462494, "learning_rate": 5.9806698836219134e-05, "loss": 0.0020153772085905073, "step": 141660 }, { "epoch": 40.21288674425206, "grad_norm": 11.55742359161377, "learning_rate": 5.980386034629577e-05, "loss": 0.0037393640726804734, "step": 141670 }, { "epoch": 40.215725234175416, "grad_norm": 0.9458943605422974, "learning_rate": 5.980102185637241e-05, "loss": 0.0014734545722603798, "step": 141680 }, { "epoch": 40.21856372409878, "grad_norm": 0.3980427384376526, "learning_rate": 5.979818336644906e-05, "loss": 0.0006876761093735695, "step": 141690 }, { "epoch": 40.22140221402214, "grad_norm": 1.0507011413574219, "learning_rate": 5.979534487652569e-05, "loss": 0.008468587696552277, "step": 141700 }, { "epoch": 40.2242407039455, "grad_norm": 0.16269536316394806, "learning_rate": 5.9792506386602334e-05, "loss": 0.01174234002828598, "step": 141710 }, { "epoch": 40.22707919386886, "grad_norm": 0.010436412878334522, "learning_rate": 5.978966789667897e-05, "loss": 0.00042227450758218764, "step": 141720 }, { "epoch": 40.22991768379222, "grad_norm": 0.05845770984888077, "learning_rate": 5.9786829406755603e-05, "loss": 0.0009693499654531479, "step": 141730 }, { "epoch": 40.232756173715586, "grad_norm": 0.01710483618080616, "learning_rate": 5.9783990916832245e-05, "loss": 8.599385619163513e-05, "step": 141740 }, { "epoch": 40.23559466363894, "grad_norm": 0.02175840735435486, "learning_rate": 5.978115242690888e-05, "loss": 0.004900522530078888, "step": 141750 }, { "epoch": 40.238433153562305, "grad_norm": 0.017166705802083015, "learning_rate": 5.977831393698553e-05, "loss": 0.001921183429658413, "step": 141760 }, { "epoch": 40.24127164348567, "grad_norm": 0.06778832525014877, "learning_rate": 5.977547544706217e-05, "loss": 0.001092621684074402, "step": 141770 }, { "epoch": 40.24411013340902, "grad_norm": 0.21612268686294556, "learning_rate": 5.9772636957138804e-05, "loss": 0.0031522460281848907, "step": 141780 }, { "epoch": 40.246948623332386, "grad_norm": 0.7009580731391907, "learning_rate": 5.9769798467215445e-05, "loss": 0.0012068323791027068, "step": 141790 }, { "epoch": 40.24978711325575, "grad_norm": 0.016377784311771393, "learning_rate": 5.976695997729208e-05, "loss": 0.0033611491322517394, "step": 141800 }, { "epoch": 40.25262560317911, "grad_norm": 0.033209118992090225, "learning_rate": 5.976412148736872e-05, "loss": 0.003910424187779427, "step": 141810 }, { "epoch": 40.25546409310247, "grad_norm": 0.0696825385093689, "learning_rate": 5.976128299744537e-05, "loss": 0.004611411690711975, "step": 141820 }, { "epoch": 40.25830258302583, "grad_norm": 3.3730838298797607, "learning_rate": 5.9758444507522004e-05, "loss": 0.0014760516583919524, "step": 141830 }, { "epoch": 40.261141072949194, "grad_norm": 8.742895126342773, "learning_rate": 5.9755606017598645e-05, "loss": 0.005755475908517838, "step": 141840 }, { "epoch": 40.26397956287255, "grad_norm": 0.06996360421180725, "learning_rate": 5.975276752767528e-05, "loss": 0.0006180951371788978, "step": 141850 }, { "epoch": 40.26681805279591, "grad_norm": 14.068145751953125, "learning_rate": 5.9749929037751914e-05, "loss": 0.008477815240621567, "step": 141860 }, { "epoch": 40.269656542719275, "grad_norm": 9.585485458374023, "learning_rate": 5.9747090547828556e-05, "loss": 0.0033624835312366487, "step": 141870 }, { "epoch": 40.27249503264263, "grad_norm": 2.167555809020996, "learning_rate": 5.974425205790519e-05, "loss": 0.0006003398448228836, "step": 141880 }, { "epoch": 40.275333522565994, "grad_norm": 5.879761219024658, "learning_rate": 5.974141356798184e-05, "loss": 0.013036349415779113, "step": 141890 }, { "epoch": 40.27817201248936, "grad_norm": 1.4892674684524536, "learning_rate": 5.973857507805848e-05, "loss": 0.0009020375087857247, "step": 141900 }, { "epoch": 40.28101050241272, "grad_norm": 0.10901422053575516, "learning_rate": 5.9735736588135114e-05, "loss": 0.0006477436050772667, "step": 141910 }, { "epoch": 40.283848992336075, "grad_norm": 1.988205075263977, "learning_rate": 5.9732898098211756e-05, "loss": 0.018366508185863495, "step": 141920 }, { "epoch": 40.28668748225944, "grad_norm": 10.658708572387695, "learning_rate": 5.973005960828839e-05, "loss": 0.012152320891618728, "step": 141930 }, { "epoch": 40.2895259721828, "grad_norm": 0.030229579657316208, "learning_rate": 5.972722111836503e-05, "loss": 0.0028082698583602904, "step": 141940 }, { "epoch": 40.29236446210616, "grad_norm": 0.11366204917430878, "learning_rate": 5.972438262844168e-05, "loss": 0.0044710438698530195, "step": 141950 }, { "epoch": 40.29520295202952, "grad_norm": 2.4031505584716797, "learning_rate": 5.9721544138518314e-05, "loss": 0.0011418292298913002, "step": 141960 }, { "epoch": 40.29804144195288, "grad_norm": 0.16094312071800232, "learning_rate": 5.9718705648594956e-05, "loss": 0.0005286876112222672, "step": 141970 }, { "epoch": 40.30087993187624, "grad_norm": 0.5009241700172424, "learning_rate": 5.971586715867159e-05, "loss": 0.0013372549787163735, "step": 141980 }, { "epoch": 40.3037184217996, "grad_norm": 1.7371125221252441, "learning_rate": 5.9713028668748225e-05, "loss": 0.000943661667406559, "step": 141990 }, { "epoch": 40.306556911722964, "grad_norm": 8.006519317626953, "learning_rate": 5.9710190178824866e-05, "loss": 0.0125094473361969, "step": 142000 }, { "epoch": 40.306556911722964, "eval_accuracy": 0.9788262224200419, "eval_loss": 0.08238931745290756, "eval_runtime": 34.6624, "eval_samples_per_second": 453.719, "eval_steps_per_second": 7.097, "step": 142000 }, { "epoch": 40.30939540164633, "grad_norm": 12.454694747924805, "learning_rate": 5.97073516889015e-05, "loss": 0.006968103349208832, "step": 142010 }, { "epoch": 40.31223389156968, "grad_norm": 8.308777809143066, "learning_rate": 5.970451319897815e-05, "loss": 0.0017859682440757752, "step": 142020 }, { "epoch": 40.315072381493046, "grad_norm": 4.778726100921631, "learning_rate": 5.970167470905479e-05, "loss": 0.001656438410282135, "step": 142030 }, { "epoch": 40.31791087141641, "grad_norm": 0.004150811582803726, "learning_rate": 5.9698836219131425e-05, "loss": 0.00022819750010967256, "step": 142040 }, { "epoch": 40.320749361339765, "grad_norm": 0.36683106422424316, "learning_rate": 5.9695997729208066e-05, "loss": 0.0030997252091765405, "step": 142050 }, { "epoch": 40.32358785126313, "grad_norm": 0.04271913319826126, "learning_rate": 5.96931592392847e-05, "loss": 0.003521487861871719, "step": 142060 }, { "epoch": 40.32642634118649, "grad_norm": 0.6221038699150085, "learning_rate": 5.969032074936134e-05, "loss": 0.009424805641174316, "step": 142070 }, { "epoch": 40.329264831109846, "grad_norm": 0.013297341763973236, "learning_rate": 5.968748225943798e-05, "loss": 0.005641287192702293, "step": 142080 }, { "epoch": 40.33210332103321, "grad_norm": 0.1528211385011673, "learning_rate": 5.9684643769514625e-05, "loss": 0.001149594783782959, "step": 142090 }, { "epoch": 40.33494181095657, "grad_norm": 0.02765464410185814, "learning_rate": 5.968180527959126e-05, "loss": 0.007230938971042633, "step": 142100 }, { "epoch": 40.337780300879935, "grad_norm": 4.223875045776367, "learning_rate": 5.96789667896679e-05, "loss": 0.0025204353034496306, "step": 142110 }, { "epoch": 40.34061879080329, "grad_norm": 7.918864727020264, "learning_rate": 5.9676128299744536e-05, "loss": 0.005542507395148277, "step": 142120 }, { "epoch": 40.34345728072665, "grad_norm": 0.023764388635754585, "learning_rate": 5.967328980982118e-05, "loss": 0.0010412374511361122, "step": 142130 }, { "epoch": 40.346295770650016, "grad_norm": 0.403177410364151, "learning_rate": 5.967045131989781e-05, "loss": 0.009730426967144013, "step": 142140 }, { "epoch": 40.34913426057337, "grad_norm": 0.26092803478240967, "learning_rate": 5.966761282997446e-05, "loss": 0.014317892491817474, "step": 142150 }, { "epoch": 40.351972750496735, "grad_norm": 7.044114112854004, "learning_rate": 5.96647743400511e-05, "loss": 0.017591650784015655, "step": 142160 }, { "epoch": 40.3548112404201, "grad_norm": 0.8528050184249878, "learning_rate": 5.9661935850127736e-05, "loss": 0.010358338057994843, "step": 142170 }, { "epoch": 40.35764973034346, "grad_norm": 3.032287359237671, "learning_rate": 5.965909736020438e-05, "loss": 0.009366364777088165, "step": 142180 }, { "epoch": 40.36048822026682, "grad_norm": 0.3773716688156128, "learning_rate": 5.965625887028101e-05, "loss": 0.006404641270637512, "step": 142190 }, { "epoch": 40.36332671019018, "grad_norm": 0.19598902761936188, "learning_rate": 5.9653420380357646e-05, "loss": 0.004710403829813003, "step": 142200 }, { "epoch": 40.36616520011354, "grad_norm": 0.0778585895895958, "learning_rate": 5.965058189043429e-05, "loss": 0.0038828961551189423, "step": 142210 }, { "epoch": 40.3690036900369, "grad_norm": 0.39020511507987976, "learning_rate": 5.9647743400510936e-05, "loss": 0.004948557168245315, "step": 142220 }, { "epoch": 40.37184217996026, "grad_norm": 5.710362911224365, "learning_rate": 5.964490491058757e-05, "loss": 0.006108520179986953, "step": 142230 }, { "epoch": 40.374680669883624, "grad_norm": 0.06050027534365654, "learning_rate": 5.964206642066421e-05, "loss": 0.011093492805957793, "step": 142240 }, { "epoch": 40.37751915980698, "grad_norm": 1.2445443868637085, "learning_rate": 5.9639227930740846e-05, "loss": 0.0007947681471705437, "step": 142250 }, { "epoch": 40.38035764973034, "grad_norm": 0.05011604353785515, "learning_rate": 5.963638944081749e-05, "loss": 0.0010515416041016578, "step": 142260 }, { "epoch": 40.383196139653705, "grad_norm": 0.034972742199897766, "learning_rate": 5.963355095089412e-05, "loss": 0.0008631452918052673, "step": 142270 }, { "epoch": 40.38603462957707, "grad_norm": 0.05293922498822212, "learning_rate": 5.9630712460970764e-05, "loss": 0.0006481371819972992, "step": 142280 }, { "epoch": 40.388873119500424, "grad_norm": 0.3652264475822449, "learning_rate": 5.962787397104741e-05, "loss": 0.002958782762289047, "step": 142290 }, { "epoch": 40.39171160942379, "grad_norm": 0.7977079153060913, "learning_rate": 5.9625035481124047e-05, "loss": 0.011059632897377015, "step": 142300 }, { "epoch": 40.39455009934715, "grad_norm": 0.08907410502433777, "learning_rate": 5.962219699120069e-05, "loss": 0.004475264623761177, "step": 142310 }, { "epoch": 40.397388589270506, "grad_norm": 1.1380467414855957, "learning_rate": 5.961935850127732e-05, "loss": 0.0009489981457591057, "step": 142320 }, { "epoch": 40.40022707919387, "grad_norm": 0.5206953883171082, "learning_rate": 5.961652001135396e-05, "loss": 0.006453892588615418, "step": 142330 }, { "epoch": 40.40306556911723, "grad_norm": 2.060075521469116, "learning_rate": 5.96136815214306e-05, "loss": 0.017156080901622774, "step": 142340 }, { "epoch": 40.40590405904059, "grad_norm": 0.29048821330070496, "learning_rate": 5.961084303150725e-05, "loss": 0.006171928346157074, "step": 142350 }, { "epoch": 40.40874254896395, "grad_norm": 3.594104051589966, "learning_rate": 5.960800454158388e-05, "loss": 0.008244134485721588, "step": 142360 }, { "epoch": 40.41158103888731, "grad_norm": 0.03572521731257439, "learning_rate": 5.960516605166052e-05, "loss": 0.017235425114631654, "step": 142370 }, { "epoch": 40.414419528810676, "grad_norm": 0.19064810872077942, "learning_rate": 5.960232756173716e-05, "loss": 0.0031773865222930906, "step": 142380 }, { "epoch": 40.41725801873403, "grad_norm": 0.3954075872898102, "learning_rate": 5.95994890718138e-05, "loss": 0.013761462271213531, "step": 142390 }, { "epoch": 40.420096508657394, "grad_norm": 0.08516613394021988, "learning_rate": 5.959665058189043e-05, "loss": 0.0004079265519976616, "step": 142400 }, { "epoch": 40.42293499858076, "grad_norm": 4.7221503257751465, "learning_rate": 5.9593812091967075e-05, "loss": 0.0017904598265886308, "step": 142410 }, { "epoch": 40.42577348850411, "grad_norm": 0.08182442933320999, "learning_rate": 5.959097360204372e-05, "loss": 0.005049353837966919, "step": 142420 }, { "epoch": 40.428611978427476, "grad_norm": 0.9121172428131104, "learning_rate": 5.958813511212036e-05, "loss": 0.006840399652719498, "step": 142430 }, { "epoch": 40.43145046835084, "grad_norm": 1.0983085632324219, "learning_rate": 5.9585296622197e-05, "loss": 0.002601006627082825, "step": 142440 }, { "epoch": 40.434288958274195, "grad_norm": 1.9270774126052856, "learning_rate": 5.958245813227363e-05, "loss": 0.001259155012667179, "step": 142450 }, { "epoch": 40.43712744819756, "grad_norm": 0.18700569868087769, "learning_rate": 5.957961964235027e-05, "loss": 0.002076851949095726, "step": 142460 }, { "epoch": 40.43996593812092, "grad_norm": 0.2995966970920563, "learning_rate": 5.957678115242691e-05, "loss": 0.0006552977487444877, "step": 142470 }, { "epoch": 40.44280442804428, "grad_norm": 0.1554592400789261, "learning_rate": 5.9573942662503544e-05, "loss": 0.0006002569571137428, "step": 142480 }, { "epoch": 40.44564291796764, "grad_norm": 0.048104673624038696, "learning_rate": 5.957110417258019e-05, "loss": 0.0008505310863256455, "step": 142490 }, { "epoch": 40.448481407891, "grad_norm": 4.153783321380615, "learning_rate": 5.9568265682656833e-05, "loss": 0.000971687026321888, "step": 142500 }, { "epoch": 40.448481407891, "eval_accuracy": 0.9828320722324665, "eval_loss": 0.06806152313947678, "eval_runtime": 33.1591, "eval_samples_per_second": 474.289, "eval_steps_per_second": 7.419, "step": 142500 }, { "epoch": 40.451319897814365, "grad_norm": 0.04633527249097824, "learning_rate": 5.956542719273347e-05, "loss": 0.0004117494449019432, "step": 142510 }, { "epoch": 40.45415838773772, "grad_norm": 2.310817241668701, "learning_rate": 5.956258870281011e-05, "loss": 0.0012331357225775718, "step": 142520 }, { "epoch": 40.45699687766108, "grad_norm": 0.8693340420722961, "learning_rate": 5.9559750212886744e-05, "loss": 0.00613563247025013, "step": 142530 }, { "epoch": 40.459835367584446, "grad_norm": 3.109086275100708, "learning_rate": 5.9556911722963385e-05, "loss": 0.0009439596906304359, "step": 142540 }, { "epoch": 40.46267385750781, "grad_norm": 0.6240366697311401, "learning_rate": 5.9554073233040034e-05, "loss": 0.002036161161959171, "step": 142550 }, { "epoch": 40.465512347431165, "grad_norm": 1.9425342082977295, "learning_rate": 5.955123474311667e-05, "loss": 0.00203481949865818, "step": 142560 }, { "epoch": 40.46835083735453, "grad_norm": 0.14293543994426727, "learning_rate": 5.95483962531933e-05, "loss": 0.0005871003493666648, "step": 142570 }, { "epoch": 40.47118932727789, "grad_norm": 0.022296404466032982, "learning_rate": 5.9545557763269944e-05, "loss": 0.0004890749230980873, "step": 142580 }, { "epoch": 40.47402781720125, "grad_norm": 0.026366310194134712, "learning_rate": 5.954271927334658e-05, "loss": 0.0038227051496505736, "step": 142590 }, { "epoch": 40.47686630712461, "grad_norm": 1.3304052352905273, "learning_rate": 5.953988078342322e-05, "loss": 0.0011858224868774414, "step": 142600 }, { "epoch": 40.47970479704797, "grad_norm": 0.10694858431816101, "learning_rate": 5.9537042293499855e-05, "loss": 0.00029584970325231553, "step": 142610 }, { "epoch": 40.48254328697133, "grad_norm": 0.08717722445726395, "learning_rate": 5.95342038035765e-05, "loss": 0.004255591332912445, "step": 142620 }, { "epoch": 40.48538177689469, "grad_norm": 7.628434181213379, "learning_rate": 5.9531365313653144e-05, "loss": 0.005901144444942474, "step": 142630 }, { "epoch": 40.488220266818054, "grad_norm": 0.08000162243843079, "learning_rate": 5.952852682372978e-05, "loss": 0.0011784914880990983, "step": 142640 }, { "epoch": 40.49105875674142, "grad_norm": 0.13647855818271637, "learning_rate": 5.952568833380642e-05, "loss": 0.00028798691928386686, "step": 142650 }, { "epoch": 40.49389724666477, "grad_norm": 11.240046501159668, "learning_rate": 5.9522849843883055e-05, "loss": 0.007851198315620422, "step": 142660 }, { "epoch": 40.496735736588136, "grad_norm": 0.9682193994522095, "learning_rate": 5.952001135395969e-05, "loss": 0.0018762661144137382, "step": 142670 }, { "epoch": 40.4995742265115, "grad_norm": 1.086074709892273, "learning_rate": 5.951717286403633e-05, "loss": 0.0008072873577475547, "step": 142680 }, { "epoch": 40.502412716434854, "grad_norm": 0.23845277726650238, "learning_rate": 5.951433437411298e-05, "loss": 0.001225084625184536, "step": 142690 }, { "epoch": 40.50525120635822, "grad_norm": 0.008060927502810955, "learning_rate": 5.9511495884189613e-05, "loss": 0.0011053901165723802, "step": 142700 }, { "epoch": 40.50808969628158, "grad_norm": 1.0694893598556519, "learning_rate": 5.9508657394266255e-05, "loss": 0.0005598226562142372, "step": 142710 }, { "epoch": 40.510928186204936, "grad_norm": 8.760772705078125, "learning_rate": 5.950581890434289e-05, "loss": 0.012141507863998414, "step": 142720 }, { "epoch": 40.5137666761283, "grad_norm": 1.598368763923645, "learning_rate": 5.950298041441953e-05, "loss": 0.0013400103896856308, "step": 142730 }, { "epoch": 40.51660516605166, "grad_norm": 1.0824097394943237, "learning_rate": 5.9500141924496165e-05, "loss": 0.004070276021957397, "step": 142740 }, { "epoch": 40.519443655975024, "grad_norm": 0.07297222316265106, "learning_rate": 5.9497303434572814e-05, "loss": 0.002739881910383701, "step": 142750 }, { "epoch": 40.52228214589838, "grad_norm": 2.709643602371216, "learning_rate": 5.9494464944649455e-05, "loss": 0.0030969602987170218, "step": 142760 }, { "epoch": 40.52512063582174, "grad_norm": 0.1009385958313942, "learning_rate": 5.949162645472609e-05, "loss": 0.004229632765054702, "step": 142770 }, { "epoch": 40.527959125745106, "grad_norm": 0.6534969806671143, "learning_rate": 5.948878796480273e-05, "loss": 0.002746960148215294, "step": 142780 }, { "epoch": 40.53079761566846, "grad_norm": 0.10492761433124542, "learning_rate": 5.9485949474879366e-05, "loss": 0.008863040804862976, "step": 142790 }, { "epoch": 40.533636105591825, "grad_norm": 0.21452942490577698, "learning_rate": 5.9483110984956e-05, "loss": 0.0021441087126731874, "step": 142800 }, { "epoch": 40.53647459551519, "grad_norm": 0.019678328186273575, "learning_rate": 5.948027249503264e-05, "loss": 0.0012221969664096832, "step": 142810 }, { "epoch": 40.53931308543854, "grad_norm": 0.12745602428913116, "learning_rate": 5.947743400510929e-05, "loss": 0.005945011600852012, "step": 142820 }, { "epoch": 40.542151575361906, "grad_norm": 0.08332932740449905, "learning_rate": 5.9474595515185924e-05, "loss": 0.0007079940289258957, "step": 142830 }, { "epoch": 40.54499006528527, "grad_norm": 0.074210986495018, "learning_rate": 5.9471757025262566e-05, "loss": 0.0008199237287044525, "step": 142840 }, { "epoch": 40.54782855520863, "grad_norm": 0.025322886183857918, "learning_rate": 5.94689185353392e-05, "loss": 0.0005214091390371323, "step": 142850 }, { "epoch": 40.55066704513199, "grad_norm": 0.022324036806821823, "learning_rate": 5.946608004541584e-05, "loss": 0.0007737496867775917, "step": 142860 }, { "epoch": 40.55350553505535, "grad_norm": 0.03150017932057381, "learning_rate": 5.9463241555492476e-05, "loss": 0.0016988024115562439, "step": 142870 }, { "epoch": 40.55634402497871, "grad_norm": 0.11331470310688019, "learning_rate": 5.946040306556912e-05, "loss": 0.0002889903262257576, "step": 142880 }, { "epoch": 40.55918251490207, "grad_norm": 0.04651650786399841, "learning_rate": 5.9457564575645766e-05, "loss": 0.0021392561495304106, "step": 142890 }, { "epoch": 40.56202100482543, "grad_norm": 0.09958738833665848, "learning_rate": 5.94547260857224e-05, "loss": 0.0009283058345317841, "step": 142900 }, { "epoch": 40.564859494748795, "grad_norm": 0.12276551127433777, "learning_rate": 5.945188759579904e-05, "loss": 0.0005171602591872215, "step": 142910 }, { "epoch": 40.56769798467215, "grad_norm": 0.03271547704935074, "learning_rate": 5.9449049105875676e-05, "loss": 0.00040902700275182726, "step": 142920 }, { "epoch": 40.570536474595514, "grad_norm": 0.17674578726291656, "learning_rate": 5.944621061595231e-05, "loss": 0.0015054387971758843, "step": 142930 }, { "epoch": 40.57337496451888, "grad_norm": 2.326324701309204, "learning_rate": 5.944337212602895e-05, "loss": 0.0035600900650024415, "step": 142940 }, { "epoch": 40.57621345444224, "grad_norm": 0.04298877343535423, "learning_rate": 5.94405336361056e-05, "loss": 0.0018102455884218215, "step": 142950 }, { "epoch": 40.579051944365595, "grad_norm": 0.36363428831100464, "learning_rate": 5.9437695146182235e-05, "loss": 0.005561845749616623, "step": 142960 }, { "epoch": 40.58189043428896, "grad_norm": 0.066896952688694, "learning_rate": 5.9434856656258876e-05, "loss": 0.0005918832495808602, "step": 142970 }, { "epoch": 40.58472892421232, "grad_norm": 0.13402551412582397, "learning_rate": 5.943201816633551e-05, "loss": 0.0031793393194675446, "step": 142980 }, { "epoch": 40.58756741413568, "grad_norm": 2.0409865379333496, "learning_rate": 5.942917967641215e-05, "loss": 0.001129038818180561, "step": 142990 }, { "epoch": 40.59040590405904, "grad_norm": 11.606714248657227, "learning_rate": 5.942634118648879e-05, "loss": 0.003442981094121933, "step": 143000 }, { "epoch": 40.59040590405904, "eval_accuracy": 0.9811788643733707, "eval_loss": 0.07305838912725449, "eval_runtime": 35.7427, "eval_samples_per_second": 440.006, "eval_steps_per_second": 6.883, "step": 143000 }, { "epoch": 40.5932443939824, "grad_norm": 0.9235923290252686, "learning_rate": 5.942350269656543e-05, "loss": 0.0006254680454730987, "step": 143010 }, { "epoch": 40.596082883905765, "grad_norm": 1.6512585878372192, "learning_rate": 5.9420664206642076e-05, "loss": 0.0014905681833624839, "step": 143020 }, { "epoch": 40.59892137382912, "grad_norm": 0.2386341542005539, "learning_rate": 5.941782571671871e-05, "loss": 0.001896313577890396, "step": 143030 }, { "epoch": 40.601759863752484, "grad_norm": 0.034331317991018295, "learning_rate": 5.9414987226795346e-05, "loss": 0.0008275862783193588, "step": 143040 }, { "epoch": 40.60459835367585, "grad_norm": 10.32097339630127, "learning_rate": 5.941214873687199e-05, "loss": 0.0037763483822345735, "step": 143050 }, { "epoch": 40.6074368435992, "grad_norm": 0.19526652991771698, "learning_rate": 5.940931024694862e-05, "loss": 0.0028851233422756196, "step": 143060 }, { "epoch": 40.610275333522566, "grad_norm": 0.01689166948199272, "learning_rate": 5.940647175702526e-05, "loss": 0.003981781005859375, "step": 143070 }, { "epoch": 40.61311382344593, "grad_norm": 0.3123752176761627, "learning_rate": 5.94036332671019e-05, "loss": 0.02865332067012787, "step": 143080 }, { "epoch": 40.615952313369284, "grad_norm": 2.3992464542388916, "learning_rate": 5.9400794777178546e-05, "loss": 0.0011837920174002647, "step": 143090 }, { "epoch": 40.61879080329265, "grad_norm": 0.018948636949062347, "learning_rate": 5.939795628725519e-05, "loss": 0.010756481438875198, "step": 143100 }, { "epoch": 40.62162929321601, "grad_norm": 0.4000900387763977, "learning_rate": 5.939511779733182e-05, "loss": 0.0024094147607684135, "step": 143110 }, { "epoch": 40.62446778313937, "grad_norm": 0.05206798389554024, "learning_rate": 5.939227930740846e-05, "loss": 0.003546294569969177, "step": 143120 }, { "epoch": 40.62730627306273, "grad_norm": 0.6323277354240417, "learning_rate": 5.93894408174851e-05, "loss": 0.012143327295780182, "step": 143130 }, { "epoch": 40.63014476298609, "grad_norm": 0.004129867069423199, "learning_rate": 5.938660232756173e-05, "loss": 0.0004648800939321518, "step": 143140 }, { "epoch": 40.632983252909455, "grad_norm": 0.38193556666374207, "learning_rate": 5.938376383763839e-05, "loss": 0.003053180128335953, "step": 143150 }, { "epoch": 40.63582174283281, "grad_norm": 0.06156162545084953, "learning_rate": 5.938092534771502e-05, "loss": 0.0018746849149465562, "step": 143160 }, { "epoch": 40.63866023275617, "grad_norm": 2.634889602661133, "learning_rate": 5.9378086857791656e-05, "loss": 0.0013219451531767846, "step": 143170 }, { "epoch": 40.641498722679536, "grad_norm": 1.2938185930252075, "learning_rate": 5.93752483678683e-05, "loss": 0.008958423137664795, "step": 143180 }, { "epoch": 40.64433721260289, "grad_norm": 2.571580171585083, "learning_rate": 5.937240987794493e-05, "loss": 0.014407311379909516, "step": 143190 }, { "epoch": 40.647175702526255, "grad_norm": 7.518352508544922, "learning_rate": 5.9369571388021574e-05, "loss": 0.004494618624448776, "step": 143200 }, { "epoch": 40.65001419244962, "grad_norm": 0.24177561700344086, "learning_rate": 5.936673289809821e-05, "loss": 0.005853605270385742, "step": 143210 }, { "epoch": 40.65285268237298, "grad_norm": 0.14724017679691315, "learning_rate": 5.9363894408174857e-05, "loss": 0.0006940728053450584, "step": 143220 }, { "epoch": 40.655691172296336, "grad_norm": 0.05402015149593353, "learning_rate": 5.93610559182515e-05, "loss": 0.0008757511153817177, "step": 143230 }, { "epoch": 40.6585296622197, "grad_norm": 0.21420301496982574, "learning_rate": 5.935821742832813e-05, "loss": 0.0004327576607465744, "step": 143240 }, { "epoch": 40.66136815214306, "grad_norm": 0.10305649042129517, "learning_rate": 5.9355378938404774e-05, "loss": 0.0006138226017355919, "step": 143250 }, { "epoch": 40.66420664206642, "grad_norm": 0.0736321359872818, "learning_rate": 5.935254044848141e-05, "loss": 0.007592938840389252, "step": 143260 }, { "epoch": 40.66704513198978, "grad_norm": 0.11911164969205856, "learning_rate": 5.934970195855804e-05, "loss": 0.017241401970386504, "step": 143270 }, { "epoch": 40.669883621913144, "grad_norm": 0.0034037998411804438, "learning_rate": 5.93468634686347e-05, "loss": 0.0007391856983304024, "step": 143280 }, { "epoch": 40.67272211183651, "grad_norm": 0.2277371883392334, "learning_rate": 5.934402497871133e-05, "loss": 0.004568454250693322, "step": 143290 }, { "epoch": 40.67556060175986, "grad_norm": 0.010520935989916325, "learning_rate": 5.934118648878797e-05, "loss": 0.002475268580019474, "step": 143300 }, { "epoch": 40.678399091683225, "grad_norm": 10.402847290039062, "learning_rate": 5.933834799886461e-05, "loss": 0.0023493649438023566, "step": 143310 }, { "epoch": 40.68123758160659, "grad_norm": 0.011909477412700653, "learning_rate": 5.933550950894124e-05, "loss": 0.0018254950642585754, "step": 143320 }, { "epoch": 40.684076071529944, "grad_norm": 0.045349620282649994, "learning_rate": 5.9332671019017885e-05, "loss": 0.00039427466690540315, "step": 143330 }, { "epoch": 40.68691456145331, "grad_norm": 1.192023515701294, "learning_rate": 5.932983252909452e-05, "loss": 0.013973702490329743, "step": 143340 }, { "epoch": 40.68975305137667, "grad_norm": 0.7403274774551392, "learning_rate": 5.932699403917117e-05, "loss": 0.004308281838893891, "step": 143350 }, { "epoch": 40.692591541300025, "grad_norm": 7.5895490646362305, "learning_rate": 5.932415554924781e-05, "loss": 0.005227228254079818, "step": 143360 }, { "epoch": 40.69543003122339, "grad_norm": 0.2070697844028473, "learning_rate": 5.932131705932444e-05, "loss": 0.009963823109865188, "step": 143370 }, { "epoch": 40.69826852114675, "grad_norm": 0.017709005624055862, "learning_rate": 5.9318478569401085e-05, "loss": 0.003749440237879753, "step": 143380 }, { "epoch": 40.701107011070114, "grad_norm": 0.00820360891520977, "learning_rate": 5.931564007947772e-05, "loss": 0.0031563859432935716, "step": 143390 }, { "epoch": 40.70394550099347, "grad_norm": 0.20433737337589264, "learning_rate": 5.9312801589554354e-05, "loss": 0.001091182976961136, "step": 143400 }, { "epoch": 40.70678399091683, "grad_norm": 0.011291523464024067, "learning_rate": 5.9309963099630995e-05, "loss": 0.0015087053179740906, "step": 143410 }, { "epoch": 40.709622480840196, "grad_norm": 0.041588835418224335, "learning_rate": 5.930712460970764e-05, "loss": 0.0010228028520941734, "step": 143420 }, { "epoch": 40.71246097076355, "grad_norm": 0.07634378224611282, "learning_rate": 5.930428611978428e-05, "loss": 0.000781136378645897, "step": 143430 }, { "epoch": 40.715299460686914, "grad_norm": 5.2814130783081055, "learning_rate": 5.930144762986092e-05, "loss": 0.0032880507409572602, "step": 143440 }, { "epoch": 40.71813795061028, "grad_norm": 0.09213796257972717, "learning_rate": 5.9298609139937554e-05, "loss": 0.004670964181423187, "step": 143450 }, { "epoch": 40.72097644053363, "grad_norm": 0.008234472014009953, "learning_rate": 5.9295770650014195e-05, "loss": 0.007218500971794129, "step": 143460 }, { "epoch": 40.723814930456996, "grad_norm": 0.3663245737552643, "learning_rate": 5.929293216009083e-05, "loss": 0.00044697318226099013, "step": 143470 }, { "epoch": 40.72665342038036, "grad_norm": 0.37644991278648376, "learning_rate": 5.929009367016748e-05, "loss": 0.0018665602430701256, "step": 143480 }, { "epoch": 40.72949191030372, "grad_norm": 0.3405757546424866, "learning_rate": 5.928725518024412e-05, "loss": 0.005057262629270554, "step": 143490 }, { "epoch": 40.73233040022708, "grad_norm": 0.021486885845661163, "learning_rate": 5.9284416690320754e-05, "loss": 0.007295987010002137, "step": 143500 }, { "epoch": 40.73233040022708, "eval_accuracy": 0.9810516945380555, "eval_loss": 0.08045509457588196, "eval_runtime": 34.7949, "eval_samples_per_second": 451.992, "eval_steps_per_second": 7.07, "step": 143500 }, { "epoch": 40.73516889015044, "grad_norm": 0.04890673980116844, "learning_rate": 5.928157820039739e-05, "loss": 0.0018354879692196846, "step": 143510 }, { "epoch": 40.7380073800738, "grad_norm": 7.431393623352051, "learning_rate": 5.927873971047403e-05, "loss": 0.004098164290189743, "step": 143520 }, { "epoch": 40.74084586999716, "grad_norm": 0.022016586735844612, "learning_rate": 5.9275901220550665e-05, "loss": 0.0004105430096387863, "step": 143530 }, { "epoch": 40.74368435992052, "grad_norm": 0.1093812808394432, "learning_rate": 5.9273062730627306e-05, "loss": 0.0005056517198681832, "step": 143540 }, { "epoch": 40.746522849843885, "grad_norm": 0.01017065066844225, "learning_rate": 5.9270224240703954e-05, "loss": 0.0002338230609893799, "step": 143550 }, { "epoch": 40.74936133976724, "grad_norm": 0.1534097045660019, "learning_rate": 5.926738575078059e-05, "loss": 0.00034559518098831177, "step": 143560 }, { "epoch": 40.7521998296906, "grad_norm": 0.2502013146877289, "learning_rate": 5.926454726085723e-05, "loss": 0.0010282456874847411, "step": 143570 }, { "epoch": 40.755038319613966, "grad_norm": 1.2453432083129883, "learning_rate": 5.9261708770933865e-05, "loss": 0.0026580400764942167, "step": 143580 }, { "epoch": 40.75787680953733, "grad_norm": 10.281131744384766, "learning_rate": 5.9258870281010506e-05, "loss": 0.005274901539087296, "step": 143590 }, { "epoch": 40.760715299460685, "grad_norm": 0.026481419801712036, "learning_rate": 5.925603179108714e-05, "loss": 0.0015009038150310516, "step": 143600 }, { "epoch": 40.76355378938405, "grad_norm": 0.13059979677200317, "learning_rate": 5.9253193301163775e-05, "loss": 0.0009613702073693276, "step": 143610 }, { "epoch": 40.76639227930741, "grad_norm": 6.363935947418213, "learning_rate": 5.925035481124043e-05, "loss": 0.006099586933851242, "step": 143620 }, { "epoch": 40.76923076923077, "grad_norm": 0.864789605140686, "learning_rate": 5.9247516321317065e-05, "loss": 0.0033134713768959044, "step": 143630 }, { "epoch": 40.77206925915413, "grad_norm": 0.01187922153621912, "learning_rate": 5.92446778313937e-05, "loss": 0.002066323161125183, "step": 143640 }, { "epoch": 40.77490774907749, "grad_norm": 0.14232254028320312, "learning_rate": 5.924183934147034e-05, "loss": 0.006784371286630631, "step": 143650 }, { "epoch": 40.77774623900085, "grad_norm": 0.07513915747404099, "learning_rate": 5.9239000851546975e-05, "loss": 0.012744808197021484, "step": 143660 }, { "epoch": 40.78058472892421, "grad_norm": 0.18729077279567719, "learning_rate": 5.923616236162362e-05, "loss": 0.00420268140733242, "step": 143670 }, { "epoch": 40.783423218847574, "grad_norm": 0.9046953916549683, "learning_rate": 5.9233323871700265e-05, "loss": 0.015112940967082978, "step": 143680 }, { "epoch": 40.78626170877094, "grad_norm": 0.06346188485622406, "learning_rate": 5.92304853817769e-05, "loss": 0.005135262385010719, "step": 143690 }, { "epoch": 40.78910019869429, "grad_norm": 0.035304728895425797, "learning_rate": 5.922764689185354e-05, "loss": 0.0007838593795895576, "step": 143700 }, { "epoch": 40.791938688617655, "grad_norm": 0.005441020708531141, "learning_rate": 5.9224808401930175e-05, "loss": 0.002795409224927425, "step": 143710 }, { "epoch": 40.79477717854102, "grad_norm": 0.03795967623591423, "learning_rate": 5.922196991200682e-05, "loss": 0.002640540711581707, "step": 143720 }, { "epoch": 40.797615668464374, "grad_norm": 1.7207750082015991, "learning_rate": 5.921913142208345e-05, "loss": 0.0013298653066158295, "step": 143730 }, { "epoch": 40.80045415838774, "grad_norm": 0.10160737484693527, "learning_rate": 5.9216292932160086e-05, "loss": 0.0008639039471745491, "step": 143740 }, { "epoch": 40.8032926483111, "grad_norm": 4.207059860229492, "learning_rate": 5.921345444223674e-05, "loss": 0.002527588978409767, "step": 143750 }, { "epoch": 40.80613113823446, "grad_norm": 0.49527043104171753, "learning_rate": 5.9210615952313376e-05, "loss": 0.0006910799071192742, "step": 143760 }, { "epoch": 40.80896962815782, "grad_norm": 8.517756462097168, "learning_rate": 5.920777746239001e-05, "loss": 0.021093432605266572, "step": 143770 }, { "epoch": 40.81180811808118, "grad_norm": 0.22939132153987885, "learning_rate": 5.920493897246665e-05, "loss": 0.006348015367984771, "step": 143780 }, { "epoch": 40.814646608004544, "grad_norm": 0.15670788288116455, "learning_rate": 5.9202100482543286e-05, "loss": 0.004406396672129631, "step": 143790 }, { "epoch": 40.8174850979279, "grad_norm": 0.04975789040327072, "learning_rate": 5.919926199261993e-05, "loss": 0.0030684249475598337, "step": 143800 }, { "epoch": 40.82032358785126, "grad_norm": 0.17243842780590057, "learning_rate": 5.919642350269656e-05, "loss": 0.0022882942110300066, "step": 143810 }, { "epoch": 40.823162077774626, "grad_norm": 0.032384056597948074, "learning_rate": 5.919358501277321e-05, "loss": 0.0031154289841651916, "step": 143820 }, { "epoch": 40.82600056769798, "grad_norm": 0.08215250819921494, "learning_rate": 5.919074652284985e-05, "loss": 0.0025072501972317695, "step": 143830 }, { "epoch": 40.828839057621344, "grad_norm": 0.036623213440179825, "learning_rate": 5.9187908032926486e-05, "loss": 0.003760376200079918, "step": 143840 }, { "epoch": 40.83167754754471, "grad_norm": 0.45067480206489563, "learning_rate": 5.918506954300313e-05, "loss": 0.0038955532014369964, "step": 143850 }, { "epoch": 40.83451603746807, "grad_norm": 0.26616016030311584, "learning_rate": 5.918223105307976e-05, "loss": 0.0018387889489531518, "step": 143860 }, { "epoch": 40.837354527391426, "grad_norm": 0.16273559629917145, "learning_rate": 5.91793925631564e-05, "loss": 0.0006656413897871971, "step": 143870 }, { "epoch": 40.84019301731479, "grad_norm": 0.889090359210968, "learning_rate": 5.9176554073233045e-05, "loss": 0.009086773544549943, "step": 143880 }, { "epoch": 40.84303150723815, "grad_norm": 0.20082850754261017, "learning_rate": 5.9173715583309686e-05, "loss": 0.003524201363325119, "step": 143890 }, { "epoch": 40.84586999716151, "grad_norm": 0.40557238459587097, "learning_rate": 5.917087709338632e-05, "loss": 0.0035754218697547913, "step": 143900 }, { "epoch": 40.84870848708487, "grad_norm": 0.34479820728302, "learning_rate": 5.916803860346296e-05, "loss": 0.0006799587979912757, "step": 143910 }, { "epoch": 40.85154697700823, "grad_norm": 0.05448165535926819, "learning_rate": 5.91652001135396e-05, "loss": 0.001284412108361721, "step": 143920 }, { "epoch": 40.85438546693159, "grad_norm": 0.05360502004623413, "learning_rate": 5.916236162361624e-05, "loss": 0.0017131853848695755, "step": 143930 }, { "epoch": 40.85722395685495, "grad_norm": 5.588057041168213, "learning_rate": 5.915952313369287e-05, "loss": 0.0048670656979084015, "step": 143940 }, { "epoch": 40.860062446778315, "grad_norm": 12.973480224609375, "learning_rate": 5.915668464376952e-05, "loss": 0.0043779794126749035, "step": 143950 }, { "epoch": 40.86290093670168, "grad_norm": 0.11890798062086105, "learning_rate": 5.915384615384616e-05, "loss": 0.002253056317567825, "step": 143960 }, { "epoch": 40.865739426625034, "grad_norm": 0.06963466852903366, "learning_rate": 5.91510076639228e-05, "loss": 0.0010157212615013122, "step": 143970 }, { "epoch": 40.868577916548396, "grad_norm": 1.7722244262695312, "learning_rate": 5.914816917399943e-05, "loss": 0.0032961010932922364, "step": 143980 }, { "epoch": 40.87141640647176, "grad_norm": 0.06708603352308273, "learning_rate": 5.914533068407607e-05, "loss": 0.0030178992077708245, "step": 143990 }, { "epoch": 40.874254896395115, "grad_norm": 0.1773301512002945, "learning_rate": 5.914249219415271e-05, "loss": 0.0023202845826745034, "step": 144000 }, { "epoch": 40.874254896395115, "eval_accuracy": 0.9804158453614803, "eval_loss": 0.07818161696195602, "eval_runtime": 35.4518, "eval_samples_per_second": 443.616, "eval_steps_per_second": 6.939, "step": 144000 }, { "epoch": 40.87709338631848, "grad_norm": 0.012308003380894661, "learning_rate": 5.913965370422935e-05, "loss": 0.0017876692116260528, "step": 144010 }, { "epoch": 40.87993187624184, "grad_norm": 0.18983735144138336, "learning_rate": 5.9136815214306e-05, "loss": 0.0009992945939302444, "step": 144020 }, { "epoch": 40.8827703661652, "grad_norm": 0.02481500618159771, "learning_rate": 5.913397672438263e-05, "loss": 0.0014883948490023613, "step": 144030 }, { "epoch": 40.88560885608856, "grad_norm": 0.49410852789878845, "learning_rate": 5.913113823445927e-05, "loss": 0.0004719072952866554, "step": 144040 }, { "epoch": 40.88844734601192, "grad_norm": 6.928661823272705, "learning_rate": 5.912829974453591e-05, "loss": 0.007848995923995971, "step": 144050 }, { "epoch": 40.891285835935285, "grad_norm": 0.651411771774292, "learning_rate": 5.912546125461255e-05, "loss": 0.0013357385993003844, "step": 144060 }, { "epoch": 40.89412432585864, "grad_norm": 0.019159160554409027, "learning_rate": 5.9122622764689184e-05, "loss": 0.002373536676168442, "step": 144070 }, { "epoch": 40.896962815782004, "grad_norm": 0.029335135594010353, "learning_rate": 5.911978427476583e-05, "loss": 0.0013203855603933334, "step": 144080 }, { "epoch": 40.89980130570537, "grad_norm": 0.17708368599414825, "learning_rate": 5.911694578484247e-05, "loss": 0.0007213845849037171, "step": 144090 }, { "epoch": 40.90263979562872, "grad_norm": 0.06280657649040222, "learning_rate": 5.911410729491911e-05, "loss": 0.0006477244198322297, "step": 144100 }, { "epoch": 40.905478285552086, "grad_norm": 0.2503390312194824, "learning_rate": 5.911126880499574e-05, "loss": 0.008218071609735488, "step": 144110 }, { "epoch": 40.90831677547545, "grad_norm": 1.5762044191360474, "learning_rate": 5.9108430315072384e-05, "loss": 0.0011641444638371468, "step": 144120 }, { "epoch": 40.91115526539881, "grad_norm": 0.8017350435256958, "learning_rate": 5.910559182514902e-05, "loss": 0.010278590023517609, "step": 144130 }, { "epoch": 40.91399375532217, "grad_norm": 1.578391671180725, "learning_rate": 5.910275333522566e-05, "loss": 0.003921544551849366, "step": 144140 }, { "epoch": 40.91683224524553, "grad_norm": 0.040583815425634384, "learning_rate": 5.909991484530231e-05, "loss": 0.0014848155900835992, "step": 144150 }, { "epoch": 40.91967073516889, "grad_norm": 0.515133798122406, "learning_rate": 5.909707635537894e-05, "loss": 0.0008159404620528221, "step": 144160 }, { "epoch": 40.92250922509225, "grad_norm": 0.3199862241744995, "learning_rate": 5.9094237865455584e-05, "loss": 0.008530177175998688, "step": 144170 }, { "epoch": 40.92534771501561, "grad_norm": 12.800241470336914, "learning_rate": 5.909139937553222e-05, "loss": 0.008608865737915038, "step": 144180 }, { "epoch": 40.928186204938974, "grad_norm": 5.085545539855957, "learning_rate": 5.908856088560886e-05, "loss": 0.006899361312389374, "step": 144190 }, { "epoch": 40.93102469486233, "grad_norm": 7.123458385467529, "learning_rate": 5.9085722395685494e-05, "loss": 0.0023080218583345414, "step": 144200 }, { "epoch": 40.93386318478569, "grad_norm": 0.03769266977906227, "learning_rate": 5.908288390576213e-05, "loss": 0.00046465322375297544, "step": 144210 }, { "epoch": 40.936701674709056, "grad_norm": 0.057032033801078796, "learning_rate": 5.9080045415838784e-05, "loss": 0.000958244502544403, "step": 144220 }, { "epoch": 40.93954016463242, "grad_norm": 0.7090530395507812, "learning_rate": 5.907720692591542e-05, "loss": 0.0005136365070939064, "step": 144230 }, { "epoch": 40.942378654555775, "grad_norm": 0.33989447355270386, "learning_rate": 5.907436843599205e-05, "loss": 0.0024484235793352125, "step": 144240 }, { "epoch": 40.94521714447914, "grad_norm": 0.10458314418792725, "learning_rate": 5.9071529946068695e-05, "loss": 0.0006067780777812004, "step": 144250 }, { "epoch": 40.9480556344025, "grad_norm": 0.16353796422481537, "learning_rate": 5.906869145614533e-05, "loss": 0.0020056126639246942, "step": 144260 }, { "epoch": 40.950894124325856, "grad_norm": 0.1616055965423584, "learning_rate": 5.906585296622197e-05, "loss": 0.0011218421161174775, "step": 144270 }, { "epoch": 40.95373261424922, "grad_norm": 0.07887080311775208, "learning_rate": 5.906301447629862e-05, "loss": 0.0003192277625203133, "step": 144280 }, { "epoch": 40.95657110417258, "grad_norm": 0.07575131207704544, "learning_rate": 5.906017598637525e-05, "loss": 0.0012607872486114502, "step": 144290 }, { "epoch": 40.95940959409594, "grad_norm": 0.07728254795074463, "learning_rate": 5.9057337496451895e-05, "loss": 0.0004459980875253677, "step": 144300 }, { "epoch": 40.9622480840193, "grad_norm": 0.026872551068663597, "learning_rate": 5.905449900652853e-05, "loss": 0.008663161098957062, "step": 144310 }, { "epoch": 40.96508657394266, "grad_norm": 0.291949599981308, "learning_rate": 5.905166051660517e-05, "loss": 0.004074456542730332, "step": 144320 }, { "epoch": 40.967925063866026, "grad_norm": 0.5120396614074707, "learning_rate": 5.9048822026681805e-05, "loss": 0.0006567012518644333, "step": 144330 }, { "epoch": 40.97076355378938, "grad_norm": 0.07805702835321426, "learning_rate": 5.904598353675844e-05, "loss": 0.00045165102928876875, "step": 144340 }, { "epoch": 40.973602043712745, "grad_norm": 0.07403211295604706, "learning_rate": 5.904314504683509e-05, "loss": 0.00045789442956447603, "step": 144350 }, { "epoch": 40.97644053363611, "grad_norm": 0.007573616225272417, "learning_rate": 5.904030655691173e-05, "loss": 0.00029039643704891204, "step": 144360 }, { "epoch": 40.979279023559464, "grad_norm": 0.057690978050231934, "learning_rate": 5.9037468066988364e-05, "loss": 0.0019020244479179382, "step": 144370 }, { "epoch": 40.98211751348283, "grad_norm": 1.3138244152069092, "learning_rate": 5.9034629577065005e-05, "loss": 0.000912853516638279, "step": 144380 }, { "epoch": 40.98495600340619, "grad_norm": 0.4295095205307007, "learning_rate": 5.903179108714164e-05, "loss": 0.0012177277356386184, "step": 144390 }, { "epoch": 40.987794493329545, "grad_norm": 0.08066995441913605, "learning_rate": 5.902895259721828e-05, "loss": 0.004780963808298111, "step": 144400 }, { "epoch": 40.99063298325291, "grad_norm": 0.8932247757911682, "learning_rate": 5.9026114107294916e-05, "loss": 0.0015566842630505562, "step": 144410 }, { "epoch": 40.99347147317627, "grad_norm": 7.783158302307129, "learning_rate": 5.9023275617371564e-05, "loss": 0.008339187502861023, "step": 144420 }, { "epoch": 40.996309963099634, "grad_norm": 0.04933043196797371, "learning_rate": 5.9020437127448205e-05, "loss": 0.00028826668858528137, "step": 144430 }, { "epoch": 40.99914845302299, "grad_norm": 0.06444370746612549, "learning_rate": 5.901759863752484e-05, "loss": 0.0005461972206830979, "step": 144440 }, { "epoch": 41.00198694294635, "grad_norm": 0.019178759306669235, "learning_rate": 5.9014760147601475e-05, "loss": 0.0017024343833327293, "step": 144450 }, { "epoch": 41.004825432869715, "grad_norm": 1.4440556764602661, "learning_rate": 5.9011921657678116e-05, "loss": 0.000996669940650463, "step": 144460 }, { "epoch": 41.00766392279307, "grad_norm": 0.08371582627296448, "learning_rate": 5.900908316775475e-05, "loss": 0.0008730918169021606, "step": 144470 }, { "epoch": 41.010502412716434, "grad_norm": 0.06885769963264465, "learning_rate": 5.90062446778314e-05, "loss": 0.001045554131269455, "step": 144480 }, { "epoch": 41.0133409026398, "grad_norm": 0.00897088460624218, "learning_rate": 5.900340618790804e-05, "loss": 0.0007263906300067902, "step": 144490 }, { "epoch": 41.01617939256316, "grad_norm": 0.08211721479892731, "learning_rate": 5.9000567697984675e-05, "loss": 0.0024586526677012445, "step": 144500 }, { "epoch": 41.01617939256316, "eval_accuracy": 0.9798435811025624, "eval_loss": 0.08969227224588394, "eval_runtime": 35.0117, "eval_samples_per_second": 449.193, "eval_steps_per_second": 7.026, "step": 144500 }, { "epoch": 41.019017882486516, "grad_norm": 5.165227890014648, "learning_rate": 5.8997729208061316e-05, "loss": 0.00562550276517868, "step": 144510 }, { "epoch": 41.02185637240988, "grad_norm": 5.354283332824707, "learning_rate": 5.899489071813795e-05, "loss": 0.005991879850625992, "step": 144520 }, { "epoch": 41.02469486233324, "grad_norm": 0.04433685913681984, "learning_rate": 5.899205222821459e-05, "loss": 0.000427992083132267, "step": 144530 }, { "epoch": 41.0275333522566, "grad_norm": 0.11864544451236725, "learning_rate": 5.898921373829123e-05, "loss": 0.007104694843292236, "step": 144540 }, { "epoch": 41.03037184217996, "grad_norm": 0.030506953597068787, "learning_rate": 5.8986375248367875e-05, "loss": 0.007659075409173965, "step": 144550 }, { "epoch": 41.03321033210332, "grad_norm": 0.010274100117385387, "learning_rate": 5.8983536758444516e-05, "loss": 0.0014304697513580322, "step": 144560 }, { "epoch": 41.03604882202668, "grad_norm": 0.2079620510339737, "learning_rate": 5.898069826852115e-05, "loss": 0.001691589318215847, "step": 144570 }, { "epoch": 41.03888731195004, "grad_norm": 0.03215976804494858, "learning_rate": 5.8977859778597785e-05, "loss": 0.004351912438869477, "step": 144580 }, { "epoch": 41.041725801873405, "grad_norm": 0.5914703607559204, "learning_rate": 5.897502128867443e-05, "loss": 0.0033914517611265183, "step": 144590 }, { "epoch": 41.04456429179677, "grad_norm": 0.024503856897354126, "learning_rate": 5.897218279875106e-05, "loss": 0.008566734194755555, "step": 144600 }, { "epoch": 41.04740278172012, "grad_norm": 0.038698770105838776, "learning_rate": 5.896934430882771e-05, "loss": 0.007404789328575134, "step": 144610 }, { "epoch": 41.050241271643486, "grad_norm": 0.2898517847061157, "learning_rate": 5.896650581890435e-05, "loss": 0.0015968507155776023, "step": 144620 }, { "epoch": 41.05307976156685, "grad_norm": 0.17608296871185303, "learning_rate": 5.8963667328980985e-05, "loss": 0.0041551291942596436, "step": 144630 }, { "epoch": 41.055918251490205, "grad_norm": 0.3182660639286041, "learning_rate": 5.896082883905763e-05, "loss": 0.0002292836084961891, "step": 144640 }, { "epoch": 41.05875674141357, "grad_norm": 0.036524053663015366, "learning_rate": 5.895799034913426e-05, "loss": 0.002161320485174656, "step": 144650 }, { "epoch": 41.06159523133693, "grad_norm": 0.4663165509700775, "learning_rate": 5.89551518592109e-05, "loss": 0.00048192925751209257, "step": 144660 }, { "epoch": 41.064433721260286, "grad_norm": 0.30252593755722046, "learning_rate": 5.895231336928754e-05, "loss": 0.0003612495958805084, "step": 144670 }, { "epoch": 41.06727221118365, "grad_norm": 1.2105026245117188, "learning_rate": 5.8949474879364186e-05, "loss": 0.0012543590739369393, "step": 144680 }, { "epoch": 41.07011070110701, "grad_norm": 0.022446418181061745, "learning_rate": 5.894663638944083e-05, "loss": 0.0012849343940615654, "step": 144690 }, { "epoch": 41.072949191030375, "grad_norm": 0.0037500662729144096, "learning_rate": 5.894379789951746e-05, "loss": 0.00034527573734521866, "step": 144700 }, { "epoch": 41.07578768095373, "grad_norm": 1.9706040620803833, "learning_rate": 5.8940959409594096e-05, "loss": 0.0027629980817437173, "step": 144710 }, { "epoch": 41.078626170877094, "grad_norm": 1.8815170526504517, "learning_rate": 5.893812091967074e-05, "loss": 0.0018371006473898888, "step": 144720 }, { "epoch": 41.08146466080046, "grad_norm": 1.281380295753479, "learning_rate": 5.893528242974737e-05, "loss": 0.0026117175817489623, "step": 144730 }, { "epoch": 41.08430315072381, "grad_norm": 0.4093821942806244, "learning_rate": 5.8932443939824013e-05, "loss": 0.004408936947584152, "step": 144740 }, { "epoch": 41.087141640647175, "grad_norm": 7.064424991607666, "learning_rate": 5.892960544990066e-05, "loss": 0.0123142309486866, "step": 144750 }, { "epoch": 41.08998013057054, "grad_norm": 0.02662782184779644, "learning_rate": 5.8926766959977296e-05, "loss": 0.011823469400405883, "step": 144760 }, { "epoch": 41.092818620493894, "grad_norm": 0.02551036886870861, "learning_rate": 5.892421231904627e-05, "loss": 0.0229246124625206, "step": 144770 }, { "epoch": 41.09565711041726, "grad_norm": 0.01957218162715435, "learning_rate": 5.8921373829122904e-05, "loss": 0.0008142374455928803, "step": 144780 }, { "epoch": 41.09849560034062, "grad_norm": 0.06775635480880737, "learning_rate": 5.8918535339199545e-05, "loss": 0.007054807245731353, "step": 144790 }, { "epoch": 41.10133409026398, "grad_norm": 3.0631022453308105, "learning_rate": 5.8915696849276194e-05, "loss": 0.001526324637234211, "step": 144800 }, { "epoch": 41.10417258018734, "grad_norm": 0.5677797794342041, "learning_rate": 5.891285835935283e-05, "loss": 0.006222568079829216, "step": 144810 }, { "epoch": 41.1070110701107, "grad_norm": 0.5690910220146179, "learning_rate": 5.891001986942947e-05, "loss": 0.0004984728991985321, "step": 144820 }, { "epoch": 41.109849560034064, "grad_norm": 0.2872450649738312, "learning_rate": 5.8907181379506104e-05, "loss": 0.00037709791213274, "step": 144830 }, { "epoch": 41.11268804995742, "grad_norm": 0.6090224385261536, "learning_rate": 5.890434288958274e-05, "loss": 0.007140296697616577, "step": 144840 }, { "epoch": 41.11552653988078, "grad_norm": 0.8079887628555298, "learning_rate": 5.890150439965938e-05, "loss": 0.004538212716579437, "step": 144850 }, { "epoch": 41.118365029804146, "grad_norm": 0.1242690458893776, "learning_rate": 5.889866590973603e-05, "loss": 0.00040123630315065385, "step": 144860 }, { "epoch": 41.1212035197275, "grad_norm": 0.5600941181182861, "learning_rate": 5.889582741981266e-05, "loss": 0.0007474970072507859, "step": 144870 }, { "epoch": 41.124042009650864, "grad_norm": 0.011365256272256374, "learning_rate": 5.8892988929889304e-05, "loss": 0.007668216526508331, "step": 144880 }, { "epoch": 41.12688049957423, "grad_norm": 6.863003253936768, "learning_rate": 5.889015043996594e-05, "loss": 0.005504323542118073, "step": 144890 }, { "epoch": 41.12971898949759, "grad_norm": 0.2783076763153076, "learning_rate": 5.888731195004258e-05, "loss": 0.004505572468042373, "step": 144900 }, { "epoch": 41.132557479420946, "grad_norm": 0.11287106573581696, "learning_rate": 5.8884473460119215e-05, "loss": 0.0022017301991581915, "step": 144910 }, { "epoch": 41.13539596934431, "grad_norm": 0.07117508351802826, "learning_rate": 5.8881634970195856e-05, "loss": 0.0015308449044823646, "step": 144920 }, { "epoch": 41.13823445926767, "grad_norm": 0.01685643382370472, "learning_rate": 5.8878796480272504e-05, "loss": 0.0005149859935045242, "step": 144930 }, { "epoch": 41.14107294919103, "grad_norm": 0.30695840716362, "learning_rate": 5.887595799034914e-05, "loss": 0.0033198222517967224, "step": 144940 }, { "epoch": 41.14391143911439, "grad_norm": 1.3089724779129028, "learning_rate": 5.887311950042578e-05, "loss": 0.0012748477980494498, "step": 144950 }, { "epoch": 41.14674992903775, "grad_norm": 9.177230834960938, "learning_rate": 5.8870281010502415e-05, "loss": 0.003052480146288872, "step": 144960 }, { "epoch": 41.149588418961116, "grad_norm": 0.11217060685157776, "learning_rate": 5.886744252057905e-05, "loss": 0.0016152327880263329, "step": 144970 }, { "epoch": 41.15242690888447, "grad_norm": 0.10529694706201553, "learning_rate": 5.886460403065569e-05, "loss": 0.001720801368355751, "step": 144980 }, { "epoch": 41.155265398807835, "grad_norm": 0.39430177211761475, "learning_rate": 5.886176554073234e-05, "loss": 0.0038261167705059052, "step": 144990 }, { "epoch": 41.1581038887312, "grad_norm": 0.8419860601425171, "learning_rate": 5.8858927050808974e-05, "loss": 0.0005773700773715973, "step": 145000 }, { "epoch": 41.1581038887312, "eval_accuracy": 0.9828320722324665, "eval_loss": 0.06765663623809814, "eval_runtime": 33.1299, "eval_samples_per_second": 474.707, "eval_steps_per_second": 7.425, "step": 145000 }, { "epoch": 41.16094237865455, "grad_norm": 0.01744770258665085, "learning_rate": 5.8856088560885615e-05, "loss": 0.000300886482000351, "step": 145010 }, { "epoch": 41.163780868577916, "grad_norm": 0.020032668486237526, "learning_rate": 5.885325007096225e-05, "loss": 0.0047354049980640415, "step": 145020 }, { "epoch": 41.16661935850128, "grad_norm": 0.042309124022722244, "learning_rate": 5.885041158103889e-05, "loss": 0.00031979475170373917, "step": 145030 }, { "epoch": 41.169457848424635, "grad_norm": 0.020440705120563507, "learning_rate": 5.8847573091115526e-05, "loss": 0.009955549240112304, "step": 145040 }, { "epoch": 41.172296338348, "grad_norm": 0.02791132591664791, "learning_rate": 5.884473460119217e-05, "loss": 0.0011842455714941025, "step": 145050 }, { "epoch": 41.17513482827136, "grad_norm": 5.81239652633667, "learning_rate": 5.8841896111268815e-05, "loss": 0.0024487148970365525, "step": 145060 }, { "epoch": 41.177973318194724, "grad_norm": 1.6659034490585327, "learning_rate": 5.883905762134545e-05, "loss": 0.0025142496451735497, "step": 145070 }, { "epoch": 41.18081180811808, "grad_norm": 7.796630382537842, "learning_rate": 5.883621913142209e-05, "loss": 0.011771781742572785, "step": 145080 }, { "epoch": 41.18365029804144, "grad_norm": 0.4909719228744507, "learning_rate": 5.8833380641498726e-05, "loss": 0.008883287757635116, "step": 145090 }, { "epoch": 41.186488787964805, "grad_norm": 0.3986189067363739, "learning_rate": 5.883054215157536e-05, "loss": 0.0018841015174984933, "step": 145100 }, { "epoch": 41.18932727788816, "grad_norm": 0.03725152462720871, "learning_rate": 5.8827703661652e-05, "loss": 0.0058600157499313354, "step": 145110 }, { "epoch": 41.192165767811524, "grad_norm": 3.555105447769165, "learning_rate": 5.8824865171728636e-05, "loss": 0.0008665855973958969, "step": 145120 }, { "epoch": 41.19500425773489, "grad_norm": 0.09549369663000107, "learning_rate": 5.8822026681805284e-05, "loss": 0.0007546974346041679, "step": 145130 }, { "epoch": 41.19784274765824, "grad_norm": 0.2417885810136795, "learning_rate": 5.8819188191881926e-05, "loss": 0.0007884392514824867, "step": 145140 }, { "epoch": 41.200681237581605, "grad_norm": 0.1239282563328743, "learning_rate": 5.881634970195856e-05, "loss": 0.0014668639749288559, "step": 145150 }, { "epoch": 41.20351972750497, "grad_norm": 0.1411876082420349, "learning_rate": 5.88135112120352e-05, "loss": 0.0018642745912075044, "step": 145160 }, { "epoch": 41.20635821742833, "grad_norm": 0.006056177895516157, "learning_rate": 5.8810672722111836e-05, "loss": 0.00035219714045524596, "step": 145170 }, { "epoch": 41.20919670735169, "grad_norm": 0.03289073333144188, "learning_rate": 5.880783423218848e-05, "loss": 0.001314394734799862, "step": 145180 }, { "epoch": 41.21203519727505, "grad_norm": 0.1389913707971573, "learning_rate": 5.8804995742265126e-05, "loss": 0.0004889219999313354, "step": 145190 }, { "epoch": 41.21487368719841, "grad_norm": 0.1307646483182907, "learning_rate": 5.880215725234176e-05, "loss": 0.006166274100542069, "step": 145200 }, { "epoch": 41.21771217712177, "grad_norm": 0.18569988012313843, "learning_rate": 5.8799318762418395e-05, "loss": 0.014197015762329101, "step": 145210 }, { "epoch": 41.22055066704513, "grad_norm": 0.18879322707653046, "learning_rate": 5.8796480272495036e-05, "loss": 0.0007955653592944146, "step": 145220 }, { "epoch": 41.223389156968494, "grad_norm": 0.10668859630823135, "learning_rate": 5.879364178257167e-05, "loss": 0.0035322364419698715, "step": 145230 }, { "epoch": 41.22622764689185, "grad_norm": 0.34342268109321594, "learning_rate": 5.879080329264831e-05, "loss": 0.0009992938488721848, "step": 145240 }, { "epoch": 41.22906613681521, "grad_norm": 0.07415453344583511, "learning_rate": 5.878796480272495e-05, "loss": 0.0007924078032374382, "step": 145250 }, { "epoch": 41.231904626738576, "grad_norm": 1.1786993741989136, "learning_rate": 5.8785126312801595e-05, "loss": 0.002957983873784542, "step": 145260 }, { "epoch": 41.23474311666194, "grad_norm": 0.0317879244685173, "learning_rate": 5.8782287822878236e-05, "loss": 0.003275410085916519, "step": 145270 }, { "epoch": 41.237581606585294, "grad_norm": 4.406169414520264, "learning_rate": 5.877944933295487e-05, "loss": 0.005249836295843124, "step": 145280 }, { "epoch": 41.24042009650866, "grad_norm": 0.14132271707057953, "learning_rate": 5.877661084303151e-05, "loss": 0.005497867614030838, "step": 145290 }, { "epoch": 41.24325858643202, "grad_norm": 2.679203510284424, "learning_rate": 5.877377235310815e-05, "loss": 0.0017658902332186698, "step": 145300 }, { "epoch": 41.246097076355376, "grad_norm": 2.2193667888641357, "learning_rate": 5.877093386318478e-05, "loss": 0.0013938939198851585, "step": 145310 }, { "epoch": 41.24893556627874, "grad_norm": 2.6246230602264404, "learning_rate": 5.876809537326142e-05, "loss": 0.0013784952461719513, "step": 145320 }, { "epoch": 41.2517740562021, "grad_norm": 0.028877319768071175, "learning_rate": 5.876525688333807e-05, "loss": 0.0008573114871978759, "step": 145330 }, { "epoch": 41.254612546125465, "grad_norm": 0.007104033138602972, "learning_rate": 5.8762418393414706e-05, "loss": 0.016698472201824188, "step": 145340 }, { "epoch": 41.25745103604882, "grad_norm": 0.08716236054897308, "learning_rate": 5.875957990349135e-05, "loss": 0.0019641883671283724, "step": 145350 }, { "epoch": 41.26028952597218, "grad_norm": 1.0793309211730957, "learning_rate": 5.875674141356798e-05, "loss": 0.0039888940751552585, "step": 145360 }, { "epoch": 41.263128015895546, "grad_norm": 0.004836807027459145, "learning_rate": 5.875390292364462e-05, "loss": 0.005382802337408066, "step": 145370 }, { "epoch": 41.2659665058189, "grad_norm": 0.07960745692253113, "learning_rate": 5.875106443372126e-05, "loss": 0.0015163496136665344, "step": 145380 }, { "epoch": 41.268804995742265, "grad_norm": 1.0530580282211304, "learning_rate": 5.8748225943797906e-05, "loss": 0.009351319074630738, "step": 145390 }, { "epoch": 41.27164348566563, "grad_norm": 0.05187901481986046, "learning_rate": 5.874538745387455e-05, "loss": 0.0013508226722478867, "step": 145400 }, { "epoch": 41.274481975588984, "grad_norm": 0.09002754092216492, "learning_rate": 5.874254896395118e-05, "loss": 0.0008621986955404282, "step": 145410 }, { "epoch": 41.27732046551235, "grad_norm": 0.1014762818813324, "learning_rate": 5.873971047402782e-05, "loss": 0.005639283359050751, "step": 145420 }, { "epoch": 41.28015895543571, "grad_norm": 0.022356068715453148, "learning_rate": 5.873687198410446e-05, "loss": 0.0019466567784547806, "step": 145430 }, { "epoch": 41.28299744535907, "grad_norm": 1.3115692138671875, "learning_rate": 5.873403349418109e-05, "loss": 0.0018643205985426903, "step": 145440 }, { "epoch": 41.28583593528243, "grad_norm": 1.612743854522705, "learning_rate": 5.8731195004257734e-05, "loss": 0.0007210463285446167, "step": 145450 }, { "epoch": 41.28867442520579, "grad_norm": 0.7352781891822815, "learning_rate": 5.872835651433438e-05, "loss": 0.0005157953128218651, "step": 145460 }, { "epoch": 41.291512915129154, "grad_norm": 0.16214953362941742, "learning_rate": 5.8725518024411017e-05, "loss": 0.008177148550748825, "step": 145470 }, { "epoch": 41.29435140505251, "grad_norm": 2.0975589752197266, "learning_rate": 5.872267953448766e-05, "loss": 0.004838701710104943, "step": 145480 }, { "epoch": 41.29718989497587, "grad_norm": 0.044488780200481415, "learning_rate": 5.871984104456429e-05, "loss": 0.001233726181089878, "step": 145490 }, { "epoch": 41.300028384899235, "grad_norm": 0.4174046218395233, "learning_rate": 5.8717002554640934e-05, "loss": 0.01061132550239563, "step": 145500 }, { "epoch": 41.300028384899235, "eval_accuracy": 0.9811152794557131, "eval_loss": 0.08064932376146317, "eval_runtime": 33.4381, "eval_samples_per_second": 470.332, "eval_steps_per_second": 7.357, "step": 145500 }, { "epoch": 41.30286687482259, "grad_norm": 8.842097282409668, "learning_rate": 5.871416406471757e-05, "loss": 0.00810789167881012, "step": 145510 }, { "epoch": 41.305705364745954, "grad_norm": 3.145850896835327, "learning_rate": 5.871132557479421e-05, "loss": 0.00541890524327755, "step": 145520 }, { "epoch": 41.30854385466932, "grad_norm": 1.2253642082214355, "learning_rate": 5.870848708487086e-05, "loss": 0.002346349135041237, "step": 145530 }, { "epoch": 41.31138234459268, "grad_norm": 7.472479343414307, "learning_rate": 5.870564859494749e-05, "loss": 0.004613812267780304, "step": 145540 }, { "epoch": 41.314220834516036, "grad_norm": 7.551267623901367, "learning_rate": 5.8702810105024134e-05, "loss": 0.003008134476840496, "step": 145550 }, { "epoch": 41.3170593244394, "grad_norm": 0.056840680539608, "learning_rate": 5.869997161510077e-05, "loss": 0.0017050251364707947, "step": 145560 }, { "epoch": 41.31989781436276, "grad_norm": 0.24214757978916168, "learning_rate": 5.86971331251774e-05, "loss": 0.0013001754879951477, "step": 145570 }, { "epoch": 41.32273630428612, "grad_norm": 0.009407652541995049, "learning_rate": 5.8694294635254045e-05, "loss": 0.0030156729742884638, "step": 145580 }, { "epoch": 41.32557479420948, "grad_norm": 0.8199518918991089, "learning_rate": 5.869145614533069e-05, "loss": 0.0041797574609518055, "step": 145590 }, { "epoch": 41.32841328413284, "grad_norm": 0.3639637529850006, "learning_rate": 5.868861765540733e-05, "loss": 0.004892380535602569, "step": 145600 }, { "epoch": 41.3312517740562, "grad_norm": 0.0809481143951416, "learning_rate": 5.868577916548397e-05, "loss": 0.00044105425477027895, "step": 145610 }, { "epoch": 41.33409026397956, "grad_norm": 0.062217313796281815, "learning_rate": 5.86829406755606e-05, "loss": 0.0018084283918142318, "step": 145620 }, { "epoch": 41.336928753902924, "grad_norm": 0.048892661929130554, "learning_rate": 5.8680102185637245e-05, "loss": 0.0005909547209739685, "step": 145630 }, { "epoch": 41.33976724382629, "grad_norm": 2.229015588760376, "learning_rate": 5.867726369571388e-05, "loss": 0.0012481238692998887, "step": 145640 }, { "epoch": 41.34260573374964, "grad_norm": 1.4131642580032349, "learning_rate": 5.867442520579052e-05, "loss": 0.0010107535868883132, "step": 145650 }, { "epoch": 41.345444223673006, "grad_norm": 3.160653591156006, "learning_rate": 5.867158671586717e-05, "loss": 0.003820895403623581, "step": 145660 }, { "epoch": 41.34828271359637, "grad_norm": 8.393513679504395, "learning_rate": 5.86687482259438e-05, "loss": 0.010799836367368698, "step": 145670 }, { "epoch": 41.351121203519725, "grad_norm": 0.016207391396164894, "learning_rate": 5.866590973602044e-05, "loss": 0.017931999266147615, "step": 145680 }, { "epoch": 41.35395969344309, "grad_norm": 4.774864673614502, "learning_rate": 5.866307124609708e-05, "loss": 0.040046247839927676, "step": 145690 }, { "epoch": 41.35679818336645, "grad_norm": 0.043215278536081314, "learning_rate": 5.8660232756173714e-05, "loss": 0.004232919216156006, "step": 145700 }, { "epoch": 41.35963667328981, "grad_norm": 1.6708074808120728, "learning_rate": 5.8657394266250355e-05, "loss": 0.003630925714969635, "step": 145710 }, { "epoch": 41.36247516321317, "grad_norm": 6.469550609588623, "learning_rate": 5.865455577632699e-05, "loss": 0.0037741027772426605, "step": 145720 }, { "epoch": 41.36531365313653, "grad_norm": 0.21316367387771606, "learning_rate": 5.865171728640364e-05, "loss": 0.0025115272030234335, "step": 145730 }, { "epoch": 41.368152143059895, "grad_norm": 0.018672466278076172, "learning_rate": 5.864887879648028e-05, "loss": 0.0013276498764753341, "step": 145740 }, { "epoch": 41.37099063298325, "grad_norm": 0.0724165290594101, "learning_rate": 5.8646040306556914e-05, "loss": 0.0022035308182239534, "step": 145750 }, { "epoch": 41.37382912290661, "grad_norm": 0.29891881346702576, "learning_rate": 5.8643201816633555e-05, "loss": 0.00221877247095108, "step": 145760 }, { "epoch": 41.376667612829976, "grad_norm": 0.017760997638106346, "learning_rate": 5.864036332671019e-05, "loss": 0.012022150307893753, "step": 145770 }, { "epoch": 41.37950610275333, "grad_norm": 0.9957917332649231, "learning_rate": 5.8637524836786825e-05, "loss": 0.0039200931787490845, "step": 145780 }, { "epoch": 41.382344592676695, "grad_norm": 0.3669133484363556, "learning_rate": 5.863468634686348e-05, "loss": 0.0061934515833854675, "step": 145790 }, { "epoch": 41.38518308260006, "grad_norm": 3.8157639503479004, "learning_rate": 5.8631847856940114e-05, "loss": 0.003415904939174652, "step": 145800 }, { "epoch": 41.38802157252342, "grad_norm": 0.7686415910720825, "learning_rate": 5.862900936701675e-05, "loss": 0.00962616354227066, "step": 145810 }, { "epoch": 41.39086006244678, "grad_norm": 0.21498394012451172, "learning_rate": 5.862617087709339e-05, "loss": 0.006994500011205673, "step": 145820 }, { "epoch": 41.39369855237014, "grad_norm": 0.29482078552246094, "learning_rate": 5.8623332387170025e-05, "loss": 0.007533284276723862, "step": 145830 }, { "epoch": 41.3965370422935, "grad_norm": 0.8083641529083252, "learning_rate": 5.8620493897246666e-05, "loss": 0.0015950415283441544, "step": 145840 }, { "epoch": 41.39937553221686, "grad_norm": 1.7154521942138672, "learning_rate": 5.86176554073233e-05, "loss": 0.008654941618442536, "step": 145850 }, { "epoch": 41.40221402214022, "grad_norm": 0.047019392251968384, "learning_rate": 5.861481691739995e-05, "loss": 0.0025147369131445886, "step": 145860 }, { "epoch": 41.405052512063584, "grad_norm": 0.018691329285502434, "learning_rate": 5.861197842747659e-05, "loss": 0.0006093259900808335, "step": 145870 }, { "epoch": 41.40789100198694, "grad_norm": 0.2718372344970703, "learning_rate": 5.8609139937553225e-05, "loss": 0.00416010245680809, "step": 145880 }, { "epoch": 41.4107294919103, "grad_norm": 11.633506774902344, "learning_rate": 5.8606301447629866e-05, "loss": 0.013574011623859406, "step": 145890 }, { "epoch": 41.413567981833665, "grad_norm": 0.24511748552322388, "learning_rate": 5.86034629577065e-05, "loss": 0.0007427169010043144, "step": 145900 }, { "epoch": 41.41640647175703, "grad_norm": 0.23799605667591095, "learning_rate": 5.8600624467783135e-05, "loss": 0.0022191422060132025, "step": 145910 }, { "epoch": 41.419244961680384, "grad_norm": 0.016887199133634567, "learning_rate": 5.859778597785978e-05, "loss": 0.008816300332546234, "step": 145920 }, { "epoch": 41.42208345160375, "grad_norm": 0.46408891677856445, "learning_rate": 5.8594947487936425e-05, "loss": 0.006526052951812744, "step": 145930 }, { "epoch": 41.42492194152711, "grad_norm": 4.358259677886963, "learning_rate": 5.859210899801306e-05, "loss": 0.010847023874521255, "step": 145940 }, { "epoch": 41.427760431450466, "grad_norm": 0.6787407994270325, "learning_rate": 5.85892705080897e-05, "loss": 0.008945057541131974, "step": 145950 }, { "epoch": 41.43059892137383, "grad_norm": 0.8865693807601929, "learning_rate": 5.8586432018166335e-05, "loss": 0.0010816680267453193, "step": 145960 }, { "epoch": 41.43343741129719, "grad_norm": 0.50984787940979, "learning_rate": 5.858359352824298e-05, "loss": 0.003683566674590111, "step": 145970 }, { "epoch": 41.43627590122055, "grad_norm": 0.45110899209976196, "learning_rate": 5.858075503831961e-05, "loss": 0.0031035276129841804, "step": 145980 }, { "epoch": 41.43911439114391, "grad_norm": 0.09176492691040039, "learning_rate": 5.857791654839626e-05, "loss": 0.00028884001076221464, "step": 145990 }, { "epoch": 41.44195288106727, "grad_norm": 0.10646390169858932, "learning_rate": 5.85750780584729e-05, "loss": 0.005592567473649978, "step": 146000 }, { "epoch": 41.44195288106727, "eval_accuracy": 0.9830228269854391, "eval_loss": 0.06571593135595322, "eval_runtime": 33.0308, "eval_samples_per_second": 476.131, "eval_steps_per_second": 7.448, "step": 146000 }, { "epoch": 41.444791370990636, "grad_norm": 0.039045628160238266, "learning_rate": 5.8572239568549536e-05, "loss": 0.01040572002530098, "step": 146010 }, { "epoch": 41.44762986091399, "grad_norm": 0.07591217756271362, "learning_rate": 5.856940107862618e-05, "loss": 0.00025253258645534514, "step": 146020 }, { "epoch": 41.450468350837355, "grad_norm": 0.4098784923553467, "learning_rate": 5.856656258870281e-05, "loss": 0.0012377183884382248, "step": 146030 }, { "epoch": 41.45330684076072, "grad_norm": 0.021459193900227547, "learning_rate": 5.8563724098779446e-05, "loss": 0.004071485251188278, "step": 146040 }, { "epoch": 41.45614533068407, "grad_norm": 0.13565509021282196, "learning_rate": 5.856088560885609e-05, "loss": 0.00205560103058815, "step": 146050 }, { "epoch": 41.458983820607436, "grad_norm": 8.055500030517578, "learning_rate": 5.8558047118932736e-05, "loss": 0.006392775475978852, "step": 146060 }, { "epoch": 41.4618223105308, "grad_norm": 0.1772073656320572, "learning_rate": 5.855520862900937e-05, "loss": 0.003565492480993271, "step": 146070 }, { "epoch": 41.464660800454155, "grad_norm": 1.3183289766311646, "learning_rate": 5.855237013908601e-05, "loss": 0.004457233473658562, "step": 146080 }, { "epoch": 41.46749929037752, "grad_norm": 0.9485532641410828, "learning_rate": 5.8549531649162646e-05, "loss": 0.004500225931406021, "step": 146090 }, { "epoch": 41.47033778030088, "grad_norm": 1.3369196653366089, "learning_rate": 5.854669315923929e-05, "loss": 0.0008770259097218514, "step": 146100 }, { "epoch": 41.47317627022424, "grad_norm": 1.2788058519363403, "learning_rate": 5.854385466931592e-05, "loss": 0.0015763245522975922, "step": 146110 }, { "epoch": 41.4760147601476, "grad_norm": 0.16976849734783173, "learning_rate": 5.8541016179392564e-05, "loss": 0.0015088392421603203, "step": 146120 }, { "epoch": 41.47885325007096, "grad_norm": 0.9381034970283508, "learning_rate": 5.853817768946921e-05, "loss": 0.008757475018501281, "step": 146130 }, { "epoch": 41.481691739994325, "grad_norm": 0.09268034994602203, "learning_rate": 5.8535339199545846e-05, "loss": 0.001237444207072258, "step": 146140 }, { "epoch": 41.48453022991768, "grad_norm": 0.4604848325252533, "learning_rate": 5.853250070962248e-05, "loss": 0.0005923377349972725, "step": 146150 }, { "epoch": 41.487368719841044, "grad_norm": 0.004941600374877453, "learning_rate": 5.852966221969912e-05, "loss": 0.004277006536722183, "step": 146160 }, { "epoch": 41.49020720976441, "grad_norm": 0.05535408854484558, "learning_rate": 5.852682372977576e-05, "loss": 0.0028983568772673607, "step": 146170 }, { "epoch": 41.49304569968777, "grad_norm": 3.782968759536743, "learning_rate": 5.85239852398524e-05, "loss": 0.0022454217076301576, "step": 146180 }, { "epoch": 41.495884189611125, "grad_norm": 2.1047720909118652, "learning_rate": 5.8521146749929046e-05, "loss": 0.002134759724140167, "step": 146190 }, { "epoch": 41.49872267953449, "grad_norm": 0.1622907519340515, "learning_rate": 5.851830826000568e-05, "loss": 0.01336190551519394, "step": 146200 }, { "epoch": 41.50156116945785, "grad_norm": 0.46576687693595886, "learning_rate": 5.851546977008232e-05, "loss": 0.001364123821258545, "step": 146210 }, { "epoch": 41.50439965938121, "grad_norm": 0.034283000975847244, "learning_rate": 5.851263128015896e-05, "loss": 0.0007917473092675209, "step": 146220 }, { "epoch": 41.50723814930457, "grad_norm": 0.6838528513908386, "learning_rate": 5.85097927902356e-05, "loss": 0.0007004307582974434, "step": 146230 }, { "epoch": 41.51007663922793, "grad_norm": 0.1214894950389862, "learning_rate": 5.850695430031223e-05, "loss": 0.0011119507253170013, "step": 146240 }, { "epoch": 41.51291512915129, "grad_norm": 0.06686419993638992, "learning_rate": 5.850411581038887e-05, "loss": 0.004123541340231895, "step": 146250 }, { "epoch": 41.51575361907465, "grad_norm": 0.06328951567411423, "learning_rate": 5.850127732046552e-05, "loss": 0.0004018981009721756, "step": 146260 }, { "epoch": 41.518592108998014, "grad_norm": 0.35516124963760376, "learning_rate": 5.849843883054216e-05, "loss": 0.016043232381343843, "step": 146270 }, { "epoch": 41.52143059892138, "grad_norm": 2.2244443893432617, "learning_rate": 5.849560034061879e-05, "loss": 0.0014367789030075074, "step": 146280 }, { "epoch": 41.52426908884473, "grad_norm": 0.022368652746081352, "learning_rate": 5.849276185069543e-05, "loss": 0.0025893695652484893, "step": 146290 }, { "epoch": 41.527107578768096, "grad_norm": 0.4274443984031677, "learning_rate": 5.848992336077207e-05, "loss": 0.004160197824239731, "step": 146300 }, { "epoch": 41.52994606869146, "grad_norm": 0.0331806056201458, "learning_rate": 5.848708487084871e-05, "loss": 0.0006735945120453834, "step": 146310 }, { "epoch": 41.532784558614814, "grad_norm": 0.7774903178215027, "learning_rate": 5.848424638092536e-05, "loss": 0.0022611282765865326, "step": 146320 }, { "epoch": 41.53562304853818, "grad_norm": 0.14703118801116943, "learning_rate": 5.848140789100199e-05, "loss": 0.000457029789686203, "step": 146330 }, { "epoch": 41.53846153846154, "grad_norm": 0.2303055226802826, "learning_rate": 5.847856940107863e-05, "loss": 0.0006702685728669167, "step": 146340 }, { "epoch": 41.541300028384896, "grad_norm": 0.1683044135570526, "learning_rate": 5.847573091115527e-05, "loss": 0.0026608269661664965, "step": 146350 }, { "epoch": 41.54413851830826, "grad_norm": 2.292410373687744, "learning_rate": 5.847289242123191e-05, "loss": 0.0008535925298929214, "step": 146360 }, { "epoch": 41.54697700823162, "grad_norm": 0.2048235982656479, "learning_rate": 5.8470053931308544e-05, "loss": 0.001363677904009819, "step": 146370 }, { "epoch": 41.549815498154985, "grad_norm": 0.03535062447190285, "learning_rate": 5.846721544138518e-05, "loss": 0.0038420625030994417, "step": 146380 }, { "epoch": 41.55265398807834, "grad_norm": 0.6901739239692688, "learning_rate": 5.846437695146183e-05, "loss": 0.0015150904655456543, "step": 146390 }, { "epoch": 41.5554924780017, "grad_norm": 0.01645900495350361, "learning_rate": 5.846153846153847e-05, "loss": 0.006285750120878219, "step": 146400 }, { "epoch": 41.558330967925066, "grad_norm": 0.1815907061100006, "learning_rate": 5.84586999716151e-05, "loss": 0.004920654743909836, "step": 146410 }, { "epoch": 41.56116945784842, "grad_norm": 0.8005897998809814, "learning_rate": 5.8455861481691744e-05, "loss": 0.003119840659201145, "step": 146420 }, { "epoch": 41.564007947771785, "grad_norm": 0.2314182072877884, "learning_rate": 5.845302299176838e-05, "loss": 0.0013621287420392037, "step": 146430 }, { "epoch": 41.56684643769515, "grad_norm": 0.08648715913295746, "learning_rate": 5.845018450184502e-05, "loss": 0.0012825842946767806, "step": 146440 }, { "epoch": 41.56968492761851, "grad_norm": 0.11681533604860306, "learning_rate": 5.8447346011921654e-05, "loss": 0.0009126981720328331, "step": 146450 }, { "epoch": 41.572523417541866, "grad_norm": 0.01621556095778942, "learning_rate": 5.84445075219983e-05, "loss": 0.0014541715383529664, "step": 146460 }, { "epoch": 41.57536190746523, "grad_norm": 0.004691628739237785, "learning_rate": 5.8441669032074944e-05, "loss": 0.008264334499835968, "step": 146470 }, { "epoch": 41.57820039738859, "grad_norm": 0.10413634777069092, "learning_rate": 5.843883054215158e-05, "loss": 0.00037996117025613786, "step": 146480 }, { "epoch": 41.58103888731195, "grad_norm": 10.435779571533203, "learning_rate": 5.843599205222822e-05, "loss": 0.007379056513309478, "step": 146490 }, { "epoch": 41.58387737723531, "grad_norm": 0.05092070624232292, "learning_rate": 5.8433153562304855e-05, "loss": 0.004738648235797882, "step": 146500 }, { "epoch": 41.58387737723531, "eval_accuracy": 0.9827049023971514, "eval_loss": 0.07265619933605194, "eval_runtime": 34.5162, "eval_samples_per_second": 455.641, "eval_steps_per_second": 7.127, "step": 146500 }, { "epoch": 41.586715867158674, "grad_norm": 0.2708187401294708, "learning_rate": 5.843031507238149e-05, "loss": 0.0010435299947857857, "step": 146510 }, { "epoch": 41.58955435708203, "grad_norm": 0.03674672171473503, "learning_rate": 5.842747658245814e-05, "loss": 0.00202106200158596, "step": 146520 }, { "epoch": 41.59239284700539, "grad_norm": 0.0199587419629097, "learning_rate": 5.842463809253478e-05, "loss": 0.00233745276927948, "step": 146530 }, { "epoch": 41.595231336928755, "grad_norm": 0.006073185708373785, "learning_rate": 5.842179960261141e-05, "loss": 0.0039124563336372375, "step": 146540 }, { "epoch": 41.59806982685212, "grad_norm": 0.06416381150484085, "learning_rate": 5.8418961112688055e-05, "loss": 0.0003561593592166901, "step": 146550 }, { "epoch": 41.600908316775474, "grad_norm": 0.1359870433807373, "learning_rate": 5.841612262276469e-05, "loss": 0.010043396055698395, "step": 146560 }, { "epoch": 41.60374680669884, "grad_norm": 3.2035765647888184, "learning_rate": 5.841328413284133e-05, "loss": 0.0011926228180527687, "step": 146570 }, { "epoch": 41.6065852966222, "grad_norm": 0.037953007966279984, "learning_rate": 5.8410445642917965e-05, "loss": 0.0018440999090671539, "step": 146580 }, { "epoch": 41.609423786545555, "grad_norm": 0.030310075730085373, "learning_rate": 5.840760715299461e-05, "loss": 0.0012302486225962638, "step": 146590 }, { "epoch": 41.61226227646892, "grad_norm": 5.804290771484375, "learning_rate": 5.8404768663071255e-05, "loss": 0.006946624815464019, "step": 146600 }, { "epoch": 41.61510076639228, "grad_norm": 7.316841125488281, "learning_rate": 5.840193017314789e-05, "loss": 0.017221204936504364, "step": 146610 }, { "epoch": 41.61793925631564, "grad_norm": 0.007758555002510548, "learning_rate": 5.8399091683224524e-05, "loss": 0.0009009642526507378, "step": 146620 }, { "epoch": 41.620777746239, "grad_norm": 0.05973708629608154, "learning_rate": 5.8396253193301165e-05, "loss": 0.0002517813816666603, "step": 146630 }, { "epoch": 41.62361623616236, "grad_norm": 0.07896833121776581, "learning_rate": 5.83934147033778e-05, "loss": 0.014116574823856354, "step": 146640 }, { "epoch": 41.626454726085726, "grad_norm": 0.11223994940519333, "learning_rate": 5.839057621345444e-05, "loss": 0.00046509038656949997, "step": 146650 }, { "epoch": 41.62929321600908, "grad_norm": 16.17010498046875, "learning_rate": 5.838773772353109e-05, "loss": 0.011423961073160172, "step": 146660 }, { "epoch": 41.632131705932444, "grad_norm": 8.48342514038086, "learning_rate": 5.8384899233607724e-05, "loss": 0.002125758305191994, "step": 146670 }, { "epoch": 41.63497019585581, "grad_norm": 0.5078394412994385, "learning_rate": 5.8382060743684365e-05, "loss": 0.0032507248222827913, "step": 146680 }, { "epoch": 41.63780868577916, "grad_norm": 3.119783639907837, "learning_rate": 5.8379222253761e-05, "loss": 0.0032282523810863496, "step": 146690 }, { "epoch": 41.640647175702526, "grad_norm": 0.002209828933700919, "learning_rate": 5.837638376383764e-05, "loss": 0.004221918061375618, "step": 146700 }, { "epoch": 41.64348566562589, "grad_norm": 5.288205623626709, "learning_rate": 5.8373545273914276e-05, "loss": 0.0020841635763645173, "step": 146710 }, { "epoch": 41.646324155549244, "grad_norm": 0.9506768584251404, "learning_rate": 5.8370706783990924e-05, "loss": 0.0009666787460446358, "step": 146720 }, { "epoch": 41.64916264547261, "grad_norm": 0.880843460559845, "learning_rate": 5.8367868294067565e-05, "loss": 0.000832541100680828, "step": 146730 }, { "epoch": 41.65200113539597, "grad_norm": 0.20931249856948853, "learning_rate": 5.83650298041442e-05, "loss": 0.001793455332517624, "step": 146740 }, { "epoch": 41.65483962531933, "grad_norm": 0.6084914803504944, "learning_rate": 5.8362191314220835e-05, "loss": 0.0042894091457128525, "step": 146750 }, { "epoch": 41.65767811524269, "grad_norm": 0.1444166898727417, "learning_rate": 5.8359352824297476e-05, "loss": 0.0017078332602977753, "step": 146760 }, { "epoch": 41.66051660516605, "grad_norm": 0.027284318581223488, "learning_rate": 5.835651433437411e-05, "loss": 0.0018113158643245698, "step": 146770 }, { "epoch": 41.663355095089415, "grad_norm": 0.0655958503484726, "learning_rate": 5.835367584445075e-05, "loss": 0.002733282931149006, "step": 146780 }, { "epoch": 41.66619358501277, "grad_norm": 0.033188316971063614, "learning_rate": 5.83508373545274e-05, "loss": 0.009120547771453857, "step": 146790 }, { "epoch": 41.66903207493613, "grad_norm": 3.6600234508514404, "learning_rate": 5.8347998864604035e-05, "loss": 0.003256285935640335, "step": 146800 }, { "epoch": 41.671870564859496, "grad_norm": 0.23864269256591797, "learning_rate": 5.8345160374680676e-05, "loss": 0.0036147601902484896, "step": 146810 }, { "epoch": 41.67470905478285, "grad_norm": 0.01169456634670496, "learning_rate": 5.834232188475731e-05, "loss": 0.0004419539123773575, "step": 146820 }, { "epoch": 41.677547544706215, "grad_norm": 0.1690979152917862, "learning_rate": 5.833948339483395e-05, "loss": 0.0008030109107494354, "step": 146830 }, { "epoch": 41.68038603462958, "grad_norm": 0.3639507293701172, "learning_rate": 5.833664490491059e-05, "loss": 0.001070098951458931, "step": 146840 }, { "epoch": 41.68322452455294, "grad_norm": 0.23597301542758942, "learning_rate": 5.833380641498722e-05, "loss": 0.021252278983592988, "step": 146850 }, { "epoch": 41.6860630144763, "grad_norm": 0.2580888271331787, "learning_rate": 5.8330967925063876e-05, "loss": 0.0019364653155207634, "step": 146860 }, { "epoch": 41.68890150439966, "grad_norm": 0.025108370929956436, "learning_rate": 5.832812943514051e-05, "loss": 0.0105104461312294, "step": 146870 }, { "epoch": 41.69173999432302, "grad_norm": 3.225861072540283, "learning_rate": 5.8325290945217145e-05, "loss": 0.005081158876419067, "step": 146880 }, { "epoch": 41.69457848424638, "grad_norm": 0.0103804562240839, "learning_rate": 5.832245245529379e-05, "loss": 0.012788864970207214, "step": 146890 }, { "epoch": 41.69741697416974, "grad_norm": 6.23919153213501, "learning_rate": 5.831961396537042e-05, "loss": 0.0014388246461749076, "step": 146900 }, { "epoch": 41.700255464093104, "grad_norm": 0.029683925211429596, "learning_rate": 5.831677547544706e-05, "loss": 0.008948564529418945, "step": 146910 }, { "epoch": 41.70309395401647, "grad_norm": 0.006586902774870396, "learning_rate": 5.831393698552371e-05, "loss": 0.015423280000686646, "step": 146920 }, { "epoch": 41.70593244393982, "grad_norm": 0.014525567181408405, "learning_rate": 5.8311098495600346e-05, "loss": 0.00975402221083641, "step": 146930 }, { "epoch": 41.708770933863185, "grad_norm": 0.09720782935619354, "learning_rate": 5.830826000567699e-05, "loss": 0.00045572984963655473, "step": 146940 }, { "epoch": 41.71160942378655, "grad_norm": 0.0314844474196434, "learning_rate": 5.830542151575362e-05, "loss": 0.0010619264096021653, "step": 146950 }, { "epoch": 41.714447913709904, "grad_norm": 8.498419761657715, "learning_rate": 5.830258302583026e-05, "loss": 0.00459582507610321, "step": 146960 }, { "epoch": 41.71728640363327, "grad_norm": 0.047804467380046844, "learning_rate": 5.82997445359069e-05, "loss": 0.002246495336294174, "step": 146970 }, { "epoch": 41.72012489355663, "grad_norm": 0.04210709407925606, "learning_rate": 5.829690604598353e-05, "loss": 0.002567102760076523, "step": 146980 }, { "epoch": 41.722963383479986, "grad_norm": 0.2598162293434143, "learning_rate": 5.829406755606018e-05, "loss": 0.006717988103628158, "step": 146990 }, { "epoch": 41.72580187340335, "grad_norm": 4.133634567260742, "learning_rate": 5.829122906613682e-05, "loss": 0.00155597273260355, "step": 147000 }, { "epoch": 41.72580187340335, "eval_accuracy": 0.9824505627265213, "eval_loss": 0.07462494820356369, "eval_runtime": 34.0298, "eval_samples_per_second": 462.153, "eval_steps_per_second": 7.229, "step": 147000 }, { "epoch": 41.72864036332671, "grad_norm": 0.004448027350008488, "learning_rate": 5.8288390576213456e-05, "loss": 0.0018142424523830414, "step": 147010 }, { "epoch": 41.731478853250074, "grad_norm": 0.15210631489753723, "learning_rate": 5.82855520862901e-05, "loss": 0.002645503543317318, "step": 147020 }, { "epoch": 41.73431734317343, "grad_norm": 0.4630194902420044, "learning_rate": 5.828271359636673e-05, "loss": 0.013213789463043213, "step": 147030 }, { "epoch": 41.73715583309679, "grad_norm": 1.2152631282806396, "learning_rate": 5.8279875106443374e-05, "loss": 0.002794547751545906, "step": 147040 }, { "epoch": 41.739994323020156, "grad_norm": 0.06773603707551956, "learning_rate": 5.827703661652001e-05, "loss": 0.013126692175865174, "step": 147050 }, { "epoch": 41.74283281294351, "grad_norm": 2.165396213531494, "learning_rate": 5.8274198126596656e-05, "loss": 0.0036329843103885652, "step": 147060 }, { "epoch": 41.745671302866874, "grad_norm": 9.009472846984863, "learning_rate": 5.82713596366733e-05, "loss": 0.02330363690853119, "step": 147070 }, { "epoch": 41.74850979279024, "grad_norm": 0.0328129343688488, "learning_rate": 5.826852114674993e-05, "loss": 0.005610437318682671, "step": 147080 }, { "epoch": 41.75134828271359, "grad_norm": 0.13441847264766693, "learning_rate": 5.826568265682657e-05, "loss": 0.01357411891222, "step": 147090 }, { "epoch": 41.754186772636956, "grad_norm": 0.3602278530597687, "learning_rate": 5.826284416690321e-05, "loss": 0.008287978172302247, "step": 147100 }, { "epoch": 41.75702526256032, "grad_norm": 0.15481378138065338, "learning_rate": 5.826000567697984e-05, "loss": 0.006116118282079697, "step": 147110 }, { "epoch": 41.75986375248368, "grad_norm": 0.043805066496133804, "learning_rate": 5.825716718705649e-05, "loss": 0.0060327954590320585, "step": 147120 }, { "epoch": 41.76270224240704, "grad_norm": 0.004457347560673952, "learning_rate": 5.825432869713313e-05, "loss": 0.00034384652972221377, "step": 147130 }, { "epoch": 41.7655407323304, "grad_norm": 0.6228349208831787, "learning_rate": 5.825149020720977e-05, "loss": 0.0007704190909862518, "step": 147140 }, { "epoch": 41.76837922225376, "grad_norm": 8.978469848632812, "learning_rate": 5.824865171728641e-05, "loss": 0.005887194722890854, "step": 147150 }, { "epoch": 41.77121771217712, "grad_norm": 1.32693612575531, "learning_rate": 5.824581322736304e-05, "loss": 0.006561754643917084, "step": 147160 }, { "epoch": 41.77405620210048, "grad_norm": 3.903803825378418, "learning_rate": 5.8242974737439684e-05, "loss": 0.020136633515357973, "step": 147170 }, { "epoch": 41.776894692023845, "grad_norm": 0.013790895231068134, "learning_rate": 5.824013624751632e-05, "loss": 0.02672005593776703, "step": 147180 }, { "epoch": 41.7797331819472, "grad_norm": 0.02289220504462719, "learning_rate": 5.823729775759297e-05, "loss": 0.0005390169098973274, "step": 147190 }, { "epoch": 41.78257167187056, "grad_norm": 2.6916446685791016, "learning_rate": 5.823445926766961e-05, "loss": 0.0007343221455812454, "step": 147200 }, { "epoch": 41.785410161793926, "grad_norm": 2.2185072898864746, "learning_rate": 5.823162077774624e-05, "loss": 0.004376306012272835, "step": 147210 }, { "epoch": 41.78824865171729, "grad_norm": 0.013622312806546688, "learning_rate": 5.822878228782288e-05, "loss": 0.0002229752019047737, "step": 147220 }, { "epoch": 41.791087141640645, "grad_norm": 0.045581310987472534, "learning_rate": 5.822594379789952e-05, "loss": 0.0007100898772478103, "step": 147230 }, { "epoch": 41.79392563156401, "grad_norm": 0.005224854219704866, "learning_rate": 5.8223105307976154e-05, "loss": 0.0002367500215768814, "step": 147240 }, { "epoch": 41.79676412148737, "grad_norm": 0.004311900120228529, "learning_rate": 5.8220266818052795e-05, "loss": 0.0006179597228765487, "step": 147250 }, { "epoch": 41.79960261141073, "grad_norm": 0.023090746253728867, "learning_rate": 5.821742832812944e-05, "loss": 0.00021467097103595735, "step": 147260 }, { "epoch": 41.80244110133409, "grad_norm": 0.06566687673330307, "learning_rate": 5.821458983820608e-05, "loss": 0.0004129108041524887, "step": 147270 }, { "epoch": 41.80527959125745, "grad_norm": 12.17864990234375, "learning_rate": 5.821175134828272e-05, "loss": 0.004277702048420906, "step": 147280 }, { "epoch": 41.808118081180815, "grad_norm": 0.11476089060306549, "learning_rate": 5.8208912858359354e-05, "loss": 0.004381935298442841, "step": 147290 }, { "epoch": 41.81095657110417, "grad_norm": 0.010689581744372845, "learning_rate": 5.8206074368435995e-05, "loss": 0.0015872418880462646, "step": 147300 }, { "epoch": 41.813795061027534, "grad_norm": 0.05743665620684624, "learning_rate": 5.820323587851263e-05, "loss": 0.001141665317118168, "step": 147310 }, { "epoch": 41.8166335509509, "grad_norm": 0.5051683783531189, "learning_rate": 5.820039738858928e-05, "loss": 0.008526205271482467, "step": 147320 }, { "epoch": 41.81947204087425, "grad_norm": 0.076750747859478, "learning_rate": 5.819755889866592e-05, "loss": 0.005531863868236541, "step": 147330 }, { "epoch": 41.822310530797616, "grad_norm": 0.01879073865711689, "learning_rate": 5.8194720408742554e-05, "loss": 0.0010044438764452933, "step": 147340 }, { "epoch": 41.82514902072098, "grad_norm": 0.18307679891586304, "learning_rate": 5.819188191881919e-05, "loss": 0.0003092831000685692, "step": 147350 }, { "epoch": 41.827987510644334, "grad_norm": 0.02960248477756977, "learning_rate": 5.818904342889583e-05, "loss": 0.00029043238610029223, "step": 147360 }, { "epoch": 41.8308260005677, "grad_norm": 0.03699978068470955, "learning_rate": 5.8186204938972464e-05, "loss": 0.0011271141469478608, "step": 147370 }, { "epoch": 41.83366449049106, "grad_norm": 0.0761013925075531, "learning_rate": 5.8183366449049106e-05, "loss": 0.0002657156437635422, "step": 147380 }, { "epoch": 41.83650298041442, "grad_norm": 0.0191226564347744, "learning_rate": 5.8180527959125754e-05, "loss": 0.006987433135509491, "step": 147390 }, { "epoch": 41.83934147033778, "grad_norm": 6.488769054412842, "learning_rate": 5.817768946920239e-05, "loss": 0.0019230511039495467, "step": 147400 }, { "epoch": 41.84217996026114, "grad_norm": 0.08756104856729507, "learning_rate": 5.817485097927903e-05, "loss": 0.00026912391185760496, "step": 147410 }, { "epoch": 41.845018450184504, "grad_norm": 0.015449064783751965, "learning_rate": 5.8172012489355664e-05, "loss": 0.0006213404238224029, "step": 147420 }, { "epoch": 41.84785694010786, "grad_norm": 0.023360414430499077, "learning_rate": 5.8169173999432306e-05, "loss": 0.005238861590623855, "step": 147430 }, { "epoch": 41.85069543003122, "grad_norm": 0.05560440942645073, "learning_rate": 5.816633550950894e-05, "loss": 0.0002986893057823181, "step": 147440 }, { "epoch": 41.853533919954586, "grad_norm": 2.0935442447662354, "learning_rate": 5.8163497019585575e-05, "loss": 0.0040026850998401645, "step": 147450 }, { "epoch": 41.85637240987794, "grad_norm": 0.0073538729920983315, "learning_rate": 5.816065852966222e-05, "loss": 0.0001343982294201851, "step": 147460 }, { "epoch": 41.859210899801305, "grad_norm": 0.22779123485088348, "learning_rate": 5.8157820039738865e-05, "loss": 0.0013540361076593399, "step": 147470 }, { "epoch": 41.86204938972467, "grad_norm": 5.799279689788818, "learning_rate": 5.81549815498155e-05, "loss": 0.002845979109406471, "step": 147480 }, { "epoch": 41.86488787964803, "grad_norm": 0.1804196685552597, "learning_rate": 5.815214305989214e-05, "loss": 0.005330868810415268, "step": 147490 }, { "epoch": 41.867726369571386, "grad_norm": 0.027143988758325577, "learning_rate": 5.8149304569968775e-05, "loss": 0.0016774408519268035, "step": 147500 }, { "epoch": 41.867726369571386, "eval_accuracy": 0.9823869778088637, "eval_loss": 0.06555458903312683, "eval_runtime": 33.652, "eval_samples_per_second": 467.342, "eval_steps_per_second": 7.31, "step": 147500 }, { "epoch": 41.87056485949475, "grad_norm": 0.0176338329911232, "learning_rate": 5.8146466080045417e-05, "loss": 0.009313374757766724, "step": 147510 }, { "epoch": 41.87340334941811, "grad_norm": 0.08563011884689331, "learning_rate": 5.8143627590122065e-05, "loss": 0.001039108633995056, "step": 147520 }, { "epoch": 41.87624183934147, "grad_norm": 12.437496185302734, "learning_rate": 5.81407891001987e-05, "loss": 0.0026992050930857658, "step": 147530 }, { "epoch": 41.87908032926483, "grad_norm": 0.16471022367477417, "learning_rate": 5.813795061027534e-05, "loss": 0.0007855430245399475, "step": 147540 }, { "epoch": 41.88191881918819, "grad_norm": 0.009858455508947372, "learning_rate": 5.8135112120351975e-05, "loss": 0.00206364244222641, "step": 147550 }, { "epoch": 41.88475730911155, "grad_norm": 0.28478267788887024, "learning_rate": 5.813227363042861e-05, "loss": 0.003286160156130791, "step": 147560 }, { "epoch": 41.88759579903491, "grad_norm": 0.052161090075969696, "learning_rate": 5.812943514050525e-05, "loss": 0.010879623144865036, "step": 147570 }, { "epoch": 41.890434288958275, "grad_norm": 0.01405294705182314, "learning_rate": 5.812688049957423e-05, "loss": 0.006554998457431793, "step": 147580 }, { "epoch": 41.89327277888164, "grad_norm": 0.04271293804049492, "learning_rate": 5.812404200965087e-05, "loss": 0.007492460310459137, "step": 147590 }, { "epoch": 41.896111268804994, "grad_norm": 0.0043248385190963745, "learning_rate": 5.812120351972751e-05, "loss": 0.0011374447494745255, "step": 147600 }, { "epoch": 41.89894975872836, "grad_norm": 1.1168673038482666, "learning_rate": 5.811836502980414e-05, "loss": 0.00047669261693954467, "step": 147610 }, { "epoch": 41.90178824865172, "grad_norm": 0.028728991746902466, "learning_rate": 5.811552653988078e-05, "loss": 0.00021779201924800872, "step": 147620 }, { "epoch": 41.904626738575075, "grad_norm": 0.5942674279212952, "learning_rate": 5.811268804995742e-05, "loss": 0.0005965335294604301, "step": 147630 }, { "epoch": 41.90746522849844, "grad_norm": 0.005037781782448292, "learning_rate": 5.8109849560034066e-05, "loss": 0.000847487710416317, "step": 147640 }, { "epoch": 41.9103037184218, "grad_norm": 0.021859116852283478, "learning_rate": 5.810701107011071e-05, "loss": 0.00010218787938356399, "step": 147650 }, { "epoch": 41.913142208345164, "grad_norm": 0.019548824056982994, "learning_rate": 5.810417258018734e-05, "loss": 0.0006462233141064644, "step": 147660 }, { "epoch": 41.91598069826852, "grad_norm": 0.016375435516238213, "learning_rate": 5.810133409026398e-05, "loss": 0.0010381527245044708, "step": 147670 }, { "epoch": 41.91881918819188, "grad_norm": 0.024327468127012253, "learning_rate": 5.809849560034062e-05, "loss": 0.0016331246122717857, "step": 147680 }, { "epoch": 41.921657678115245, "grad_norm": 0.1629289835691452, "learning_rate": 5.809565711041726e-05, "loss": 0.0020048046484589578, "step": 147690 }, { "epoch": 41.9244961680386, "grad_norm": 0.040022071450948715, "learning_rate": 5.809281862049391e-05, "loss": 0.0023267190903425218, "step": 147700 }, { "epoch": 41.927334657961964, "grad_norm": 0.7191936373710632, "learning_rate": 5.808998013057054e-05, "loss": 0.0009073892608284951, "step": 147710 }, { "epoch": 41.93017314788533, "grad_norm": 0.09998054802417755, "learning_rate": 5.808714164064718e-05, "loss": 0.013508261740207672, "step": 147720 }, { "epoch": 41.93301163780868, "grad_norm": 7.257813453674316, "learning_rate": 5.808430315072382e-05, "loss": 0.0025915294885635377, "step": 147730 }, { "epoch": 41.935850127732046, "grad_norm": 15.758906364440918, "learning_rate": 5.808146466080045e-05, "loss": 0.008287876099348068, "step": 147740 }, { "epoch": 41.93868861765541, "grad_norm": 0.0662807822227478, "learning_rate": 5.8078626170877094e-05, "loss": 0.0009300041943788529, "step": 147750 }, { "epoch": 41.94152710757877, "grad_norm": 9.48594856262207, "learning_rate": 5.807578768095373e-05, "loss": 0.008433856070041656, "step": 147760 }, { "epoch": 41.94436559750213, "grad_norm": 0.04321140795946121, "learning_rate": 5.807294919103038e-05, "loss": 0.0019052792340517044, "step": 147770 }, { "epoch": 41.94720408742549, "grad_norm": 0.04008945822715759, "learning_rate": 5.807011070110702e-05, "loss": 0.00029606688767671587, "step": 147780 }, { "epoch": 41.95004257734885, "grad_norm": 0.004957941360771656, "learning_rate": 5.806727221118365e-05, "loss": 0.0048335641622543335, "step": 147790 }, { "epoch": 41.95288106727221, "grad_norm": 0.006553984247148037, "learning_rate": 5.8064433721260294e-05, "loss": 0.001997763477265835, "step": 147800 }, { "epoch": 41.95571955719557, "grad_norm": 0.025978071615099907, "learning_rate": 5.806159523133693e-05, "loss": 0.007380061596632004, "step": 147810 }, { "epoch": 41.958558047118935, "grad_norm": 0.016257235780358315, "learning_rate": 5.805875674141357e-05, "loss": 0.0017568375915288925, "step": 147820 }, { "epoch": 41.96139653704229, "grad_norm": 0.20041324198246002, "learning_rate": 5.8055918251490205e-05, "loss": 0.002381419576704502, "step": 147830 }, { "epoch": 41.96423502696565, "grad_norm": 0.029849441722035408, "learning_rate": 5.805307976156685e-05, "loss": 0.014238657057285308, "step": 147840 }, { "epoch": 41.967073516889016, "grad_norm": 0.9313237071037292, "learning_rate": 5.805024127164349e-05, "loss": 0.012575682997703553, "step": 147850 }, { "epoch": 41.96991200681238, "grad_norm": 1.0081876516342163, "learning_rate": 5.804740278172013e-05, "loss": 0.008354626595973969, "step": 147860 }, { "epoch": 41.972750496735735, "grad_norm": 0.21972402930259705, "learning_rate": 5.804456429179676e-05, "loss": 0.003101377934217453, "step": 147870 }, { "epoch": 41.9755889866591, "grad_norm": 9.004803657531738, "learning_rate": 5.8041725801873405e-05, "loss": 0.011407797038555146, "step": 147880 }, { "epoch": 41.97842747658246, "grad_norm": 0.07319717854261398, "learning_rate": 5.803888731195004e-05, "loss": 0.0003382576629519463, "step": 147890 }, { "epoch": 41.981265966505816, "grad_norm": 0.007713764905929565, "learning_rate": 5.803604882202669e-05, "loss": 0.0012478522956371307, "step": 147900 }, { "epoch": 41.98410445642918, "grad_norm": 0.22216686606407166, "learning_rate": 5.803321033210333e-05, "loss": 0.001042325235903263, "step": 147910 }, { "epoch": 41.98694294635254, "grad_norm": 0.4574500024318695, "learning_rate": 5.803037184217996e-05, "loss": 0.007148212194442749, "step": 147920 }, { "epoch": 41.9897814362759, "grad_norm": 0.2043665051460266, "learning_rate": 5.8027533352256605e-05, "loss": 0.0056179285049438475, "step": 147930 }, { "epoch": 41.99261992619926, "grad_norm": 0.06017661094665527, "learning_rate": 5.802469486233324e-05, "loss": 0.001717340759932995, "step": 147940 }, { "epoch": 41.995458416122624, "grad_norm": 0.2521764636039734, "learning_rate": 5.802185637240988e-05, "loss": 0.005312496423721313, "step": 147950 }, { "epoch": 41.99829690604599, "grad_norm": 0.06547623127698898, "learning_rate": 5.8019017882486515e-05, "loss": 0.001863352209329605, "step": 147960 }, { "epoch": 42.00113539596934, "grad_norm": 1.1411349773406982, "learning_rate": 5.8016179392563163e-05, "loss": 0.0006594069767743349, "step": 147970 }, { "epoch": 42.003973885892705, "grad_norm": 8.29399585723877, "learning_rate": 5.80133409026398e-05, "loss": 0.00527244508266449, "step": 147980 }, { "epoch": 42.00681237581607, "grad_norm": 0.3729233145713806, "learning_rate": 5.801050241271644e-05, "loss": 0.0020105481147766114, "step": 147990 }, { "epoch": 42.009650865739424, "grad_norm": 0.7561850547790527, "learning_rate": 5.8007663922793074e-05, "loss": 0.004015503823757172, "step": 148000 }, { "epoch": 42.009650865739424, "eval_accuracy": 0.9794620715966172, "eval_loss": 0.08371688425540924, "eval_runtime": 33.7259, "eval_samples_per_second": 466.319, "eval_steps_per_second": 7.294, "step": 148000 }, { "epoch": 42.01248935566279, "grad_norm": 0.13504989445209503, "learning_rate": 5.8004825432869715e-05, "loss": 0.0031283874064683913, "step": 148010 }, { "epoch": 42.01532784558615, "grad_norm": 0.663907527923584, "learning_rate": 5.800198694294635e-05, "loss": 0.0007835319265723228, "step": 148020 }, { "epoch": 42.018166335509505, "grad_norm": 6.862527847290039, "learning_rate": 5.7999148453023e-05, "loss": 0.002787230536341667, "step": 148030 }, { "epoch": 42.02100482543287, "grad_norm": 0.22257274389266968, "learning_rate": 5.799630996309964e-05, "loss": 0.0010889884084463119, "step": 148040 }, { "epoch": 42.02384331535623, "grad_norm": 0.4335937798023224, "learning_rate": 5.7993471473176274e-05, "loss": 0.0027182750403881074, "step": 148050 }, { "epoch": 42.026681805279594, "grad_norm": 0.25733381509780884, "learning_rate": 5.7990632983252916e-05, "loss": 0.0028049081563949587, "step": 148060 }, { "epoch": 42.02952029520295, "grad_norm": 0.11998070031404495, "learning_rate": 5.798779449332955e-05, "loss": 0.0007637610659003257, "step": 148070 }, { "epoch": 42.03235878512631, "grad_norm": 0.005354211665689945, "learning_rate": 5.7984956003406185e-05, "loss": 0.0018464637920260429, "step": 148080 }, { "epoch": 42.035197275049676, "grad_norm": 0.0714244693517685, "learning_rate": 5.7982117513482826e-05, "loss": 0.0005668526515364647, "step": 148090 }, { "epoch": 42.03803576497303, "grad_norm": 0.1301967352628708, "learning_rate": 5.7979279023559474e-05, "loss": 0.006074760481715203, "step": 148100 }, { "epoch": 42.040874254896394, "grad_norm": 0.16898654401302338, "learning_rate": 5.797644053363611e-05, "loss": 0.004590905457735062, "step": 148110 }, { "epoch": 42.04371274481976, "grad_norm": 2.1045444011688232, "learning_rate": 5.797360204371275e-05, "loss": 0.004738837480545044, "step": 148120 }, { "epoch": 42.04655123474312, "grad_norm": 2.5118422508239746, "learning_rate": 5.7970763553789385e-05, "loss": 0.01745133101940155, "step": 148130 }, { "epoch": 42.049389724666476, "grad_norm": 0.08407410234212875, "learning_rate": 5.7967925063866026e-05, "loss": 0.00026691704988479614, "step": 148140 }, { "epoch": 42.05222821458984, "grad_norm": 0.13534782826900482, "learning_rate": 5.796508657394266e-05, "loss": 0.0004945117980241776, "step": 148150 }, { "epoch": 42.0550667045132, "grad_norm": 0.05095744878053665, "learning_rate": 5.79622480840193e-05, "loss": 0.002270787209272385, "step": 148160 }, { "epoch": 42.05790519443656, "grad_norm": 0.08661285042762756, "learning_rate": 5.795940959409595e-05, "loss": 0.0008924584835767746, "step": 148170 }, { "epoch": 42.06074368435992, "grad_norm": 0.23186247050762177, "learning_rate": 5.7956571104172585e-05, "loss": 0.0013033734634518624, "step": 148180 }, { "epoch": 42.06358217428328, "grad_norm": 0.03152122348546982, "learning_rate": 5.7953732614249226e-05, "loss": 0.008763550221920014, "step": 148190 }, { "epoch": 42.06642066420664, "grad_norm": 0.008516975678503513, "learning_rate": 5.795089412432586e-05, "loss": 0.0014852508902549744, "step": 148200 }, { "epoch": 42.06925915413, "grad_norm": 0.009729962795972824, "learning_rate": 5.7948055634402495e-05, "loss": 0.006080636009573937, "step": 148210 }, { "epoch": 42.072097644053365, "grad_norm": 4.241453170776367, "learning_rate": 5.794521714447914e-05, "loss": 0.002279701828956604, "step": 148220 }, { "epoch": 42.07493613397673, "grad_norm": 0.02098855935037136, "learning_rate": 5.7942378654555785e-05, "loss": 0.0005521569401025772, "step": 148230 }, { "epoch": 42.07777462390008, "grad_norm": 0.045127131044864655, "learning_rate": 5.793954016463242e-05, "loss": 0.001992717757821083, "step": 148240 }, { "epoch": 42.080613113823446, "grad_norm": 0.0558084212243557, "learning_rate": 5.793670167470906e-05, "loss": 0.0006010346114635468, "step": 148250 }, { "epoch": 42.08345160374681, "grad_norm": 5.071599960327148, "learning_rate": 5.7933863184785696e-05, "loss": 0.0015593402087688446, "step": 148260 }, { "epoch": 42.086290093670165, "grad_norm": 2.410439968109131, "learning_rate": 5.793102469486234e-05, "loss": 0.007891712337732315, "step": 148270 }, { "epoch": 42.08912858359353, "grad_norm": 2.9784774780273438, "learning_rate": 5.792818620493897e-05, "loss": 0.004422924295067787, "step": 148280 }, { "epoch": 42.09196707351689, "grad_norm": 0.00720193749293685, "learning_rate": 5.792534771501561e-05, "loss": 0.0004950163885951043, "step": 148290 }, { "epoch": 42.09480556344025, "grad_norm": 6.782219409942627, "learning_rate": 5.792250922509226e-05, "loss": 0.005423271656036377, "step": 148300 }, { "epoch": 42.09764405336361, "grad_norm": 1.5228815078735352, "learning_rate": 5.7919670735168896e-05, "loss": 0.0022452132776379584, "step": 148310 }, { "epoch": 42.10048254328697, "grad_norm": 9.571563720703125, "learning_rate": 5.791683224524553e-05, "loss": 0.010041240602731705, "step": 148320 }, { "epoch": 42.103321033210335, "grad_norm": 0.03986957296729088, "learning_rate": 5.791399375532217e-05, "loss": 0.002892535552382469, "step": 148330 }, { "epoch": 42.10615952313369, "grad_norm": 0.05880606919527054, "learning_rate": 5.7911155265398806e-05, "loss": 0.0005223721265792847, "step": 148340 }, { "epoch": 42.108998013057054, "grad_norm": 0.03967278078198433, "learning_rate": 5.790831677547545e-05, "loss": 0.0013493482023477555, "step": 148350 }, { "epoch": 42.11183650298042, "grad_norm": 0.178938090801239, "learning_rate": 5.790547828555208e-05, "loss": 0.001978263258934021, "step": 148360 }, { "epoch": 42.11467499290377, "grad_norm": 0.21895641088485718, "learning_rate": 5.790263979562873e-05, "loss": 0.012227811664342881, "step": 148370 }, { "epoch": 42.117513482827135, "grad_norm": 0.22781598567962646, "learning_rate": 5.789980130570537e-05, "loss": 0.0033683978021144868, "step": 148380 }, { "epoch": 42.1203519727505, "grad_norm": 1.4401628971099854, "learning_rate": 5.7896962815782006e-05, "loss": 0.0029865041375160216, "step": 148390 }, { "epoch": 42.123190462673854, "grad_norm": 0.17256776988506317, "learning_rate": 5.789412432585865e-05, "loss": 0.0017794936895370484, "step": 148400 }, { "epoch": 42.12602895259722, "grad_norm": 0.7727460861206055, "learning_rate": 5.789128583593528e-05, "loss": 0.0024641793221235274, "step": 148410 }, { "epoch": 42.12886744252058, "grad_norm": 0.08460207283496857, "learning_rate": 5.7888447346011924e-05, "loss": 0.013232448697090149, "step": 148420 }, { "epoch": 42.13170593244394, "grad_norm": 0.007280772086232901, "learning_rate": 5.788560885608857e-05, "loss": 0.0002598239108920097, "step": 148430 }, { "epoch": 42.1345444223673, "grad_norm": 3.3670411109924316, "learning_rate": 5.7882770366165206e-05, "loss": 0.013173261284828186, "step": 148440 }, { "epoch": 42.13738291229066, "grad_norm": 0.03047463856637478, "learning_rate": 5.787993187624184e-05, "loss": 0.0022823445498943327, "step": 148450 }, { "epoch": 42.140221402214024, "grad_norm": 0.07261932641267776, "learning_rate": 5.787709338631848e-05, "loss": 0.0034034598618745806, "step": 148460 }, { "epoch": 42.14305989213738, "grad_norm": 0.28530600666999817, "learning_rate": 5.787425489639512e-05, "loss": 0.002732769027352333, "step": 148470 }, { "epoch": 42.14589838206074, "grad_norm": 0.08589716255664825, "learning_rate": 5.787141640647176e-05, "loss": 0.003904636949300766, "step": 148480 }, { "epoch": 42.148736871984106, "grad_norm": 0.4357518255710602, "learning_rate": 5.786857791654839e-05, "loss": 0.00047943852841854097, "step": 148490 }, { "epoch": 42.15157536190747, "grad_norm": 0.016580084338784218, "learning_rate": 5.786573942662504e-05, "loss": 0.00026067066937685013, "step": 148500 }, { "epoch": 42.15157536190747, "eval_accuracy": 0.983976600750302, "eval_loss": 0.062181927263736725, "eval_runtime": 36.4556, "eval_samples_per_second": 431.402, "eval_steps_per_second": 6.748, "step": 148500 }, { "epoch": 42.154413851830824, "grad_norm": 0.5635818839073181, "learning_rate": 5.786290093670168e-05, "loss": 0.0009797131642699242, "step": 148510 }, { "epoch": 42.15725234175419, "grad_norm": 0.00667803967371583, "learning_rate": 5.786006244677832e-05, "loss": 0.00043123308569192886, "step": 148520 }, { "epoch": 42.16009083167755, "grad_norm": 0.6173490285873413, "learning_rate": 5.785722395685496e-05, "loss": 0.002784809097647667, "step": 148530 }, { "epoch": 42.162929321600906, "grad_norm": 0.03227712959051132, "learning_rate": 5.785438546693159e-05, "loss": 0.0007298612967133522, "step": 148540 }, { "epoch": 42.16576781152427, "grad_norm": 0.02947138249874115, "learning_rate": 5.785154697700823e-05, "loss": 0.00037629734724760057, "step": 148550 }, { "epoch": 42.16860630144763, "grad_norm": 0.08633213490247726, "learning_rate": 5.784870848708487e-05, "loss": 0.0026203444227576255, "step": 148560 }, { "epoch": 42.17144479137099, "grad_norm": 0.04946347698569298, "learning_rate": 5.784586999716152e-05, "loss": 0.00574025958776474, "step": 148570 }, { "epoch": 42.17428328129435, "grad_norm": 0.3392356038093567, "learning_rate": 5.784303150723815e-05, "loss": 0.005654659867286682, "step": 148580 }, { "epoch": 42.17712177121771, "grad_norm": 0.4065309762954712, "learning_rate": 5.784019301731479e-05, "loss": 0.002069763094186783, "step": 148590 }, { "epoch": 42.179960261141076, "grad_norm": 0.09606487303972244, "learning_rate": 5.783735452739143e-05, "loss": 0.006268689036369323, "step": 148600 }, { "epoch": 42.18279875106443, "grad_norm": 0.20625902712345123, "learning_rate": 5.783451603746807e-05, "loss": 0.0006280785426497459, "step": 148610 }, { "epoch": 42.185637240987795, "grad_norm": 1.5448293685913086, "learning_rate": 5.7831677547544704e-05, "loss": 0.012490019947290421, "step": 148620 }, { "epoch": 42.18847573091116, "grad_norm": 0.012525386177003384, "learning_rate": 5.782883905762135e-05, "loss": 0.00013627689331769944, "step": 148630 }, { "epoch": 42.19131422083451, "grad_norm": 1.8945181369781494, "learning_rate": 5.782600056769799e-05, "loss": 0.006813575327396393, "step": 148640 }, { "epoch": 42.194152710757876, "grad_norm": 0.22400662302970886, "learning_rate": 5.782316207777463e-05, "loss": 0.005037152022123337, "step": 148650 }, { "epoch": 42.19699120068124, "grad_norm": 1.1871527433395386, "learning_rate": 5.782032358785127e-05, "loss": 0.002540949918329716, "step": 148660 }, { "epoch": 42.199829690604595, "grad_norm": 0.7383244633674622, "learning_rate": 5.7817485097927904e-05, "loss": 0.00027681421488523483, "step": 148670 }, { "epoch": 42.20266818052796, "grad_norm": 0.15585535764694214, "learning_rate": 5.781464660800454e-05, "loss": 0.005134584009647369, "step": 148680 }, { "epoch": 42.20550667045132, "grad_norm": 3.114103317260742, "learning_rate": 5.781180811808118e-05, "loss": 0.01381157487630844, "step": 148690 }, { "epoch": 42.208345160374684, "grad_norm": 0.9454352855682373, "learning_rate": 5.780896962815783e-05, "loss": 0.007884953171014786, "step": 148700 }, { "epoch": 42.21118365029804, "grad_norm": 9.61143970489502, "learning_rate": 5.780613113823446e-05, "loss": 0.01910373717546463, "step": 148710 }, { "epoch": 42.2140221402214, "grad_norm": 8.243927955627441, "learning_rate": 5.7803292648311104e-05, "loss": 0.010588087141513824, "step": 148720 }, { "epoch": 42.216860630144765, "grad_norm": 0.03460073471069336, "learning_rate": 5.780045415838774e-05, "loss": 0.000984027609229088, "step": 148730 }, { "epoch": 42.21969912006812, "grad_norm": 0.009432024322450161, "learning_rate": 5.779761566846438e-05, "loss": 0.0037712998688220977, "step": 148740 }, { "epoch": 42.222537609991484, "grad_norm": 0.015866700559854507, "learning_rate": 5.7794777178541015e-05, "loss": 0.007475700974464416, "step": 148750 }, { "epoch": 42.22537609991485, "grad_norm": 0.43056586384773254, "learning_rate": 5.7791938688617656e-05, "loss": 0.006339690834283829, "step": 148760 }, { "epoch": 42.2282145898382, "grad_norm": 2.0559298992156982, "learning_rate": 5.7789100198694304e-05, "loss": 0.0021386746317148208, "step": 148770 }, { "epoch": 42.231053079761566, "grad_norm": 0.10255126655101776, "learning_rate": 5.778626170877094e-05, "loss": 0.0012138109654188157, "step": 148780 }, { "epoch": 42.23389156968493, "grad_norm": 1.3304555416107178, "learning_rate": 5.778342321884757e-05, "loss": 0.02139258533716202, "step": 148790 }, { "epoch": 42.23673005960829, "grad_norm": 1.1621980667114258, "learning_rate": 5.7780584728924215e-05, "loss": 0.0056877128779888155, "step": 148800 }, { "epoch": 42.23956854953165, "grad_norm": 0.13426943123340607, "learning_rate": 5.777774623900085e-05, "loss": 0.0009769584983587265, "step": 148810 }, { "epoch": 42.24240703945501, "grad_norm": 0.49818968772888184, "learning_rate": 5.777490774907749e-05, "loss": 0.015643107891082763, "step": 148820 }, { "epoch": 42.24524552937837, "grad_norm": 0.4834127426147461, "learning_rate": 5.777206925915414e-05, "loss": 0.008508730679750443, "step": 148830 }, { "epoch": 42.24808401930173, "grad_norm": 6.122630596160889, "learning_rate": 5.776923076923077e-05, "loss": 0.0026516007259488107, "step": 148840 }, { "epoch": 42.25092250922509, "grad_norm": 0.34452447295188904, "learning_rate": 5.7766392279307415e-05, "loss": 0.009418690204620361, "step": 148850 }, { "epoch": 42.253760999148454, "grad_norm": 0.1939232051372528, "learning_rate": 5.776355378938405e-05, "loss": 0.012090705335140228, "step": 148860 }, { "epoch": 42.25659948907182, "grad_norm": 0.36378514766693115, "learning_rate": 5.776071529946069e-05, "loss": 0.003058484382927418, "step": 148870 }, { "epoch": 42.25943797899517, "grad_norm": 0.7039912343025208, "learning_rate": 5.7757876809537325e-05, "loss": 0.0017521994188427926, "step": 148880 }, { "epoch": 42.262276468918536, "grad_norm": 0.031072214245796204, "learning_rate": 5.775503831961396e-05, "loss": 0.010899436473846436, "step": 148890 }, { "epoch": 42.2651149588419, "grad_norm": 0.010925382375717163, "learning_rate": 5.7752199829690615e-05, "loss": 0.003550399839878082, "step": 148900 }, { "epoch": 42.267953448765255, "grad_norm": 0.04251685366034508, "learning_rate": 5.774936133976725e-05, "loss": 0.010822443664073944, "step": 148910 }, { "epoch": 42.27079193868862, "grad_norm": 0.21869465708732605, "learning_rate": 5.7746522849843884e-05, "loss": 0.0020966615527868273, "step": 148920 }, { "epoch": 42.27363042861198, "grad_norm": 0.08682548999786377, "learning_rate": 5.7743684359920525e-05, "loss": 0.018640124797821046, "step": 148930 }, { "epoch": 42.276468918535336, "grad_norm": 0.45291298627853394, "learning_rate": 5.774084586999716e-05, "loss": 0.0023030145093798637, "step": 148940 }, { "epoch": 42.2793074084587, "grad_norm": 0.06491441279649734, "learning_rate": 5.77380073800738e-05, "loss": 0.00029875561594963076, "step": 148950 }, { "epoch": 42.28214589838206, "grad_norm": 0.011854357086122036, "learning_rate": 5.7735168890150436e-05, "loss": 0.0018132233992218972, "step": 148960 }, { "epoch": 42.284984388305425, "grad_norm": 0.0340898372232914, "learning_rate": 5.7732330400227084e-05, "loss": 0.0010575352236628533, "step": 148970 }, { "epoch": 42.28782287822878, "grad_norm": 0.03270970284938812, "learning_rate": 5.7729491910303725e-05, "loss": 0.003037561476230621, "step": 148980 }, { "epoch": 42.29066136815214, "grad_norm": 0.40189501643180847, "learning_rate": 5.772665342038036e-05, "loss": 0.0009944675490260123, "step": 148990 }, { "epoch": 42.293499858075506, "grad_norm": 0.03260350227355957, "learning_rate": 5.7723814930457e-05, "loss": 0.006242355331778526, "step": 149000 }, { "epoch": 42.293499858075506, "eval_accuracy": 0.9818782984676034, "eval_loss": 0.072067029774189, "eval_runtime": 37.3861, "eval_samples_per_second": 420.664, "eval_steps_per_second": 6.58, "step": 149000 }, { "epoch": 42.29633834799886, "grad_norm": 0.012630388140678406, "learning_rate": 5.7720976440533636e-05, "loss": 0.0023158881813287735, "step": 149010 }, { "epoch": 42.299176837922225, "grad_norm": 0.3972333073616028, "learning_rate": 5.771813795061027e-05, "loss": 0.0010951055213809014, "step": 149020 }, { "epoch": 42.30201532784559, "grad_norm": 0.027228839695453644, "learning_rate": 5.7715299460686926e-05, "loss": 0.0048057641834020615, "step": 149030 }, { "epoch": 42.304853817768944, "grad_norm": 0.18567191064357758, "learning_rate": 5.771246097076356e-05, "loss": 0.0009849751368165016, "step": 149040 }, { "epoch": 42.30769230769231, "grad_norm": 15.079144477844238, "learning_rate": 5.7709622480840195e-05, "loss": 0.004274927824735641, "step": 149050 }, { "epoch": 42.31053079761567, "grad_norm": 0.07061915844678879, "learning_rate": 5.7706783990916836e-05, "loss": 0.004074958711862564, "step": 149060 }, { "epoch": 42.31336928753903, "grad_norm": 8.32724380493164, "learning_rate": 5.770394550099347e-05, "loss": 0.0032767247408628463, "step": 149070 }, { "epoch": 42.31620777746239, "grad_norm": 7.583128929138184, "learning_rate": 5.770110701107011e-05, "loss": 0.0025001589208841326, "step": 149080 }, { "epoch": 42.31904626738575, "grad_norm": 6.784241676330566, "learning_rate": 5.769826852114675e-05, "loss": 0.002610155753791332, "step": 149090 }, { "epoch": 42.321884757309114, "grad_norm": 0.5287998914718628, "learning_rate": 5.7695430031223395e-05, "loss": 0.0015937607735395432, "step": 149100 }, { "epoch": 42.32472324723247, "grad_norm": 0.13064247369766235, "learning_rate": 5.7692591541300036e-05, "loss": 0.0004553545266389847, "step": 149110 }, { "epoch": 42.32756173715583, "grad_norm": 0.231301411986351, "learning_rate": 5.768975305137667e-05, "loss": 0.0034344617277383806, "step": 149120 }, { "epoch": 42.330400227079195, "grad_norm": 0.10502910614013672, "learning_rate": 5.768691456145331e-05, "loss": 0.0005330532789230346, "step": 149130 }, { "epoch": 42.33323871700255, "grad_norm": 0.09793553501367569, "learning_rate": 5.768407607152995e-05, "loss": 0.009008872509002685, "step": 149140 }, { "epoch": 42.336077206925914, "grad_norm": 0.12131383270025253, "learning_rate": 5.768123758160658e-05, "loss": 0.0019162138924002647, "step": 149150 }, { "epoch": 42.33891569684928, "grad_norm": 1.4933286905288696, "learning_rate": 5.767839909168322e-05, "loss": 0.007092460989952087, "step": 149160 }, { "epoch": 42.34175418677264, "grad_norm": 0.5240055918693542, "learning_rate": 5.767556060175987e-05, "loss": 0.0005489150062203407, "step": 149170 }, { "epoch": 42.344592676695996, "grad_norm": 0.002286842791363597, "learning_rate": 5.7672722111836506e-05, "loss": 0.0008285230025649071, "step": 149180 }, { "epoch": 42.34743116661936, "grad_norm": 0.02868444100022316, "learning_rate": 5.766988362191315e-05, "loss": 0.0009840404614806174, "step": 149190 }, { "epoch": 42.35026965654272, "grad_norm": 0.10616371780633926, "learning_rate": 5.766704513198978e-05, "loss": 0.0023007798939943314, "step": 149200 }, { "epoch": 42.35310814646608, "grad_norm": 1.4111546277999878, "learning_rate": 5.766420664206642e-05, "loss": 0.0008992619812488555, "step": 149210 }, { "epoch": 42.35594663638944, "grad_norm": 6.268981456756592, "learning_rate": 5.766136815214306e-05, "loss": 0.008783967047929764, "step": 149220 }, { "epoch": 42.3587851263128, "grad_norm": 0.004635338205844164, "learning_rate": 5.7658529662219706e-05, "loss": 0.0006999442353844643, "step": 149230 }, { "epoch": 42.36162361623616, "grad_norm": 0.5798527002334595, "learning_rate": 5.765569117229635e-05, "loss": 0.0015348918735980988, "step": 149240 }, { "epoch": 42.36446210615952, "grad_norm": 0.1775139719247818, "learning_rate": 5.765285268237298e-05, "loss": 0.002348132058978081, "step": 149250 }, { "epoch": 42.367300596082885, "grad_norm": 10.618471145629883, "learning_rate": 5.7650014192449616e-05, "loss": 0.005468238890171051, "step": 149260 }, { "epoch": 42.37013908600625, "grad_norm": 0.19201308488845825, "learning_rate": 5.764717570252626e-05, "loss": 0.0011455917730927468, "step": 149270 }, { "epoch": 42.3729775759296, "grad_norm": 0.2903873920440674, "learning_rate": 5.764433721260289e-05, "loss": 0.0026749476790428163, "step": 149280 }, { "epoch": 42.375816065852966, "grad_norm": 0.05815752223134041, "learning_rate": 5.7641498722679534e-05, "loss": 0.00886228084564209, "step": 149290 }, { "epoch": 42.37865455577633, "grad_norm": 4.030114650726318, "learning_rate": 5.763866023275618e-05, "loss": 0.008472998440265656, "step": 149300 }, { "epoch": 42.381493045699685, "grad_norm": 10.210912704467773, "learning_rate": 5.7635821742832816e-05, "loss": 0.01984582245349884, "step": 149310 }, { "epoch": 42.38433153562305, "grad_norm": 0.430069237947464, "learning_rate": 5.763298325290946e-05, "loss": 0.004504071176052093, "step": 149320 }, { "epoch": 42.38717002554641, "grad_norm": 0.06360428035259247, "learning_rate": 5.763014476298609e-05, "loss": 0.006189453601837158, "step": 149330 }, { "epoch": 42.39000851546977, "grad_norm": 0.026077209040522575, "learning_rate": 5.7627306273062734e-05, "loss": 0.002119317278265953, "step": 149340 }, { "epoch": 42.39284700539313, "grad_norm": 0.760020911693573, "learning_rate": 5.762446778313937e-05, "loss": 0.0007919654250144959, "step": 149350 }, { "epoch": 42.39568549531649, "grad_norm": 0.7065972089767456, "learning_rate": 5.7621629293216016e-05, "loss": 0.0012015903368592263, "step": 149360 }, { "epoch": 42.398523985239855, "grad_norm": 0.019509809091687202, "learning_rate": 5.761879080329266e-05, "loss": 0.0003894045948982239, "step": 149370 }, { "epoch": 42.40136247516321, "grad_norm": 0.12073556333780289, "learning_rate": 5.761595231336929e-05, "loss": 0.0021422646939754488, "step": 149380 }, { "epoch": 42.404200965086574, "grad_norm": 0.014653287827968597, "learning_rate": 5.761311382344593e-05, "loss": 0.0005870640277862548, "step": 149390 }, { "epoch": 42.40703945500994, "grad_norm": 0.6399942636489868, "learning_rate": 5.761027533352257e-05, "loss": 0.0008635856211185455, "step": 149400 }, { "epoch": 42.40987794493329, "grad_norm": 0.19939400255680084, "learning_rate": 5.76074368435992e-05, "loss": 0.0007410751655697823, "step": 149410 }, { "epoch": 42.412716434856655, "grad_norm": 0.3074977695941925, "learning_rate": 5.7604598353675844e-05, "loss": 0.0007544541731476784, "step": 149420 }, { "epoch": 42.41555492478002, "grad_norm": 0.005880317185074091, "learning_rate": 5.760175986375249e-05, "loss": 0.0029939210042357446, "step": 149430 }, { "epoch": 42.41839341470338, "grad_norm": 2.1886231899261475, "learning_rate": 5.759892137382913e-05, "loss": 0.001207580603659153, "step": 149440 }, { "epoch": 42.42123190462674, "grad_norm": 0.38269636034965515, "learning_rate": 5.759608288390577e-05, "loss": 0.0020557306706905365, "step": 149450 }, { "epoch": 42.4240703945501, "grad_norm": 0.04686698317527771, "learning_rate": 5.75932443939824e-05, "loss": 0.006124045699834824, "step": 149460 }, { "epoch": 42.42690888447346, "grad_norm": 0.19029246270656586, "learning_rate": 5.7590405904059044e-05, "loss": 0.009254573285579682, "step": 149470 }, { "epoch": 42.42974737439682, "grad_norm": 0.0996827632188797, "learning_rate": 5.758756741413568e-05, "loss": 0.005550019443035126, "step": 149480 }, { "epoch": 42.43258586432018, "grad_norm": 0.011075971648097038, "learning_rate": 5.7584728924212314e-05, "loss": 0.001432417705655098, "step": 149490 }, { "epoch": 42.435424354243544, "grad_norm": 2.4945077896118164, "learning_rate": 5.758189043428897e-05, "loss": 0.0007360722869634628, "step": 149500 }, { "epoch": 42.435424354243544, "eval_accuracy": 0.9821962230558912, "eval_loss": 0.0727243646979332, "eval_runtime": 34.1784, "eval_samples_per_second": 460.144, "eval_steps_per_second": 7.198, "step": 149500 }, { "epoch": 42.4382628441669, "grad_norm": 0.016597673296928406, "learning_rate": 5.75790519443656e-05, "loss": 0.0008084140717983246, "step": 149510 }, { "epoch": 42.44110133409026, "grad_norm": 12.174782752990723, "learning_rate": 5.757621345444224e-05, "loss": 0.0043737441301345825, "step": 149520 }, { "epoch": 42.443939824013626, "grad_norm": 0.10583662241697311, "learning_rate": 5.757337496451888e-05, "loss": 0.026764553785324097, "step": 149530 }, { "epoch": 42.44677831393699, "grad_norm": 0.5196587443351746, "learning_rate": 5.7570536474595514e-05, "loss": 0.004666018486022949, "step": 149540 }, { "epoch": 42.449616803860344, "grad_norm": 0.011579029262065887, "learning_rate": 5.7567697984672155e-05, "loss": 0.004749627411365509, "step": 149550 }, { "epoch": 42.45245529378371, "grad_norm": 0.01636349968612194, "learning_rate": 5.75648594947488e-05, "loss": 0.0054640047252178196, "step": 149560 }, { "epoch": 42.45529378370707, "grad_norm": 0.03645698353648186, "learning_rate": 5.756202100482544e-05, "loss": 0.0008678026497364044, "step": 149570 }, { "epoch": 42.458132273630426, "grad_norm": 0.113944873213768, "learning_rate": 5.755918251490208e-05, "loss": 0.006420239806175232, "step": 149580 }, { "epoch": 42.46097076355379, "grad_norm": 0.041331466287374496, "learning_rate": 5.7556627873971046e-05, "loss": 0.012068688124418258, "step": 149590 }, { "epoch": 42.46380925347715, "grad_norm": 0.1503952145576477, "learning_rate": 5.755378938404769e-05, "loss": 0.0008111020550131797, "step": 149600 }, { "epoch": 42.46664774340051, "grad_norm": 0.03218971937894821, "learning_rate": 5.7550950894124335e-05, "loss": 0.005635309964418411, "step": 149610 }, { "epoch": 42.46948623332387, "grad_norm": 0.07090260088443756, "learning_rate": 5.754811240420097e-05, "loss": 0.0006343081593513488, "step": 149620 }, { "epoch": 42.47232472324723, "grad_norm": 0.06116881221532822, "learning_rate": 5.754527391427761e-05, "loss": 0.000762861780822277, "step": 149630 }, { "epoch": 42.475163213170596, "grad_norm": 5.056071758270264, "learning_rate": 5.7542435424354246e-05, "loss": 0.0018387759104371072, "step": 149640 }, { "epoch": 42.47800170309395, "grad_norm": 0.04109840840101242, "learning_rate": 5.753959693443089e-05, "loss": 0.003315833956003189, "step": 149650 }, { "epoch": 42.480840193017315, "grad_norm": 0.00818520039319992, "learning_rate": 5.753675844450752e-05, "loss": 0.0026999803259968756, "step": 149660 }, { "epoch": 42.48367868294068, "grad_norm": 0.13565373420715332, "learning_rate": 5.7533919954584156e-05, "loss": 0.0007398989051580429, "step": 149670 }, { "epoch": 42.48651717286403, "grad_norm": 1.0627899169921875, "learning_rate": 5.7531081464660804e-05, "loss": 0.0014476608484983444, "step": 149680 }, { "epoch": 42.489355662787396, "grad_norm": 0.4356234669685364, "learning_rate": 5.7528242974737446e-05, "loss": 0.0010314090177416801, "step": 149690 }, { "epoch": 42.49219415271076, "grad_norm": 0.7320193648338318, "learning_rate": 5.752540448481408e-05, "loss": 0.0006992999464273453, "step": 149700 }, { "epoch": 42.49503264263412, "grad_norm": 0.10247719287872314, "learning_rate": 5.752256599489072e-05, "loss": 0.0007414452731609345, "step": 149710 }, { "epoch": 42.49787113255748, "grad_norm": 2.0021777153015137, "learning_rate": 5.7519727504967356e-05, "loss": 0.012616576254367828, "step": 149720 }, { "epoch": 42.50070962248084, "grad_norm": 0.04961194843053818, "learning_rate": 5.7516889015044e-05, "loss": 0.0018883829936385155, "step": 149730 }, { "epoch": 42.503548112404204, "grad_norm": 0.03602848947048187, "learning_rate": 5.7514050525120646e-05, "loss": 0.0051383376121521, "step": 149740 }, { "epoch": 42.50638660232756, "grad_norm": 0.01240641437470913, "learning_rate": 5.751121203519728e-05, "loss": 0.0011544100940227508, "step": 149750 }, { "epoch": 42.50922509225092, "grad_norm": 5.474228382110596, "learning_rate": 5.750837354527392e-05, "loss": 0.0021074559539556504, "step": 149760 }, { "epoch": 42.512063582174285, "grad_norm": 0.13350380957126617, "learning_rate": 5.7505535055350556e-05, "loss": 0.005255614966154098, "step": 149770 }, { "epoch": 42.51490207209764, "grad_norm": 0.015593701042234898, "learning_rate": 5.750269656542719e-05, "loss": 0.004522071033716202, "step": 149780 }, { "epoch": 42.517740562021004, "grad_norm": 0.009658041410148144, "learning_rate": 5.749985807550383e-05, "loss": 0.00875111147761345, "step": 149790 }, { "epoch": 42.52057905194437, "grad_norm": 0.41736963391304016, "learning_rate": 5.749701958558047e-05, "loss": 0.0007737789303064347, "step": 149800 }, { "epoch": 42.52341754186773, "grad_norm": 0.011112752370536327, "learning_rate": 5.7494181095657115e-05, "loss": 0.0008388208225369453, "step": 149810 }, { "epoch": 42.526256031791085, "grad_norm": 8.794468879699707, "learning_rate": 5.7491342605733757e-05, "loss": 0.0020127279683947565, "step": 149820 }, { "epoch": 42.52909452171445, "grad_norm": 0.029080675914883614, "learning_rate": 5.748850411581039e-05, "loss": 0.002035396173596382, "step": 149830 }, { "epoch": 42.53193301163781, "grad_norm": 0.011296866461634636, "learning_rate": 5.748566562588703e-05, "loss": 0.02024710029363632, "step": 149840 }, { "epoch": 42.53477150156117, "grad_norm": 0.0508672334253788, "learning_rate": 5.748282713596367e-05, "loss": 0.0012831812724471091, "step": 149850 }, { "epoch": 42.53760999148453, "grad_norm": 0.024711672216653824, "learning_rate": 5.747998864604031e-05, "loss": 0.018377575278282165, "step": 149860 }, { "epoch": 42.54044848140789, "grad_norm": 0.011167130433022976, "learning_rate": 5.747715015611694e-05, "loss": 0.005779191106557846, "step": 149870 }, { "epoch": 42.54328697133125, "grad_norm": 0.2561008036136627, "learning_rate": 5.747431166619359e-05, "loss": 0.0009773651137948037, "step": 149880 }, { "epoch": 42.54612546125461, "grad_norm": 21.687774658203125, "learning_rate": 5.747147317627023e-05, "loss": 0.010425988584756851, "step": 149890 }, { "epoch": 42.548963951177974, "grad_norm": 0.1312975436449051, "learning_rate": 5.746863468634687e-05, "loss": 0.00033493414521217345, "step": 149900 }, { "epoch": 42.55180244110134, "grad_norm": 0.07946325838565826, "learning_rate": 5.74657961964235e-05, "loss": 0.007547348737716675, "step": 149910 }, { "epoch": 42.55464093102469, "grad_norm": 0.23279789090156555, "learning_rate": 5.746295770650014e-05, "loss": 0.0022748695686459542, "step": 149920 }, { "epoch": 42.557479420948056, "grad_norm": 0.042573198676109314, "learning_rate": 5.746011921657678e-05, "loss": 0.0008408466354012489, "step": 149930 }, { "epoch": 42.56031791087142, "grad_norm": 0.03268809616565704, "learning_rate": 5.7457280726653426e-05, "loss": 0.0188313752412796, "step": 149940 }, { "epoch": 42.563156400794774, "grad_norm": 0.05562429130077362, "learning_rate": 5.745444223673007e-05, "loss": 0.0017085054889321328, "step": 149950 }, { "epoch": 42.56599489071814, "grad_norm": 0.7340807914733887, "learning_rate": 5.74516037468067e-05, "loss": 0.0005642067641019821, "step": 149960 }, { "epoch": 42.5688333806415, "grad_norm": 0.014761121943593025, "learning_rate": 5.744876525688334e-05, "loss": 0.00603003278374672, "step": 149970 }, { "epoch": 42.571671870564856, "grad_norm": 0.011022931896150112, "learning_rate": 5.744592676695998e-05, "loss": 0.007810892909765244, "step": 149980 }, { "epoch": 42.57451036048822, "grad_norm": 0.08278270065784454, "learning_rate": 5.744308827703662e-05, "loss": 0.0005509929731488227, "step": 149990 }, { "epoch": 42.57734885041158, "grad_norm": 0.01686011254787445, "learning_rate": 5.7440249787113254e-05, "loss": 0.0004287831485271454, "step": 150000 }, { "epoch": 42.57734885041158, "eval_accuracy": 0.9835950912443568, "eval_loss": 0.06694793701171875, "eval_runtime": 36.6362, "eval_samples_per_second": 429.275, "eval_steps_per_second": 6.715, "step": 150000 }, { "epoch": 42.580187340334945, "grad_norm": 1.1768161058425903, "learning_rate": 5.74374112971899e-05, "loss": 0.0004266202449798584, "step": 150010 }, { "epoch": 42.5830258302583, "grad_norm": 0.08212864398956299, "learning_rate": 5.7434572807266543e-05, "loss": 0.0038760975003242493, "step": 150020 }, { "epoch": 42.58586432018166, "grad_norm": 9.098673820495605, "learning_rate": 5.743173431734318e-05, "loss": 0.004574078321456909, "step": 150030 }, { "epoch": 42.588702810105026, "grad_norm": 0.032331693917512894, "learning_rate": 5.742889582741981e-05, "loss": 0.0018044034019112586, "step": 150040 }, { "epoch": 42.59154130002838, "grad_norm": 0.9126233458518982, "learning_rate": 5.7426057337496454e-05, "loss": 0.019902142882347106, "step": 150050 }, { "epoch": 42.594379789951745, "grad_norm": 0.8716942667961121, "learning_rate": 5.742321884757309e-05, "loss": 0.009272891283035278, "step": 150060 }, { "epoch": 42.59721827987511, "grad_norm": 0.21125072240829468, "learning_rate": 5.742038035764973e-05, "loss": 0.014104557037353516, "step": 150070 }, { "epoch": 42.60005676979847, "grad_norm": 0.3089865744113922, "learning_rate": 5.741754186772638e-05, "loss": 0.0027934605255723, "step": 150080 }, { "epoch": 42.60289525972183, "grad_norm": 0.04363598674535751, "learning_rate": 5.741470337780301e-05, "loss": 0.010488376021385193, "step": 150090 }, { "epoch": 42.60573374964519, "grad_norm": 0.019342590123414993, "learning_rate": 5.7411864887879654e-05, "loss": 0.002674164995551109, "step": 150100 }, { "epoch": 42.60857223956855, "grad_norm": 0.04729393497109413, "learning_rate": 5.740902639795629e-05, "loss": 0.004904352128505707, "step": 150110 }, { "epoch": 42.61141072949191, "grad_norm": 3.9452967643737793, "learning_rate": 5.740618790803293e-05, "loss": 0.004681844264268875, "step": 150120 }, { "epoch": 42.61424921941527, "grad_norm": 0.05537530034780502, "learning_rate": 5.7403349418109565e-05, "loss": 0.0022235020995140077, "step": 150130 }, { "epoch": 42.617087709338634, "grad_norm": 0.1837112158536911, "learning_rate": 5.740051092818621e-05, "loss": 0.008333974331617356, "step": 150140 }, { "epoch": 42.61992619926199, "grad_norm": 12.200554847717285, "learning_rate": 5.739767243826285e-05, "loss": 0.014847613871097565, "step": 150150 }, { "epoch": 42.62276468918535, "grad_norm": 0.02259373478591442, "learning_rate": 5.739483394833949e-05, "loss": 0.005884405225515366, "step": 150160 }, { "epoch": 42.625603179108715, "grad_norm": 0.7720295786857605, "learning_rate": 5.739199545841612e-05, "loss": 0.002811381220817566, "step": 150170 }, { "epoch": 42.62844166903208, "grad_norm": 0.11381250619888306, "learning_rate": 5.7389156968492765e-05, "loss": 0.0027453599497675894, "step": 150180 }, { "epoch": 42.631280158955434, "grad_norm": 1.2558753490447998, "learning_rate": 5.73863184785694e-05, "loss": 0.010280607640743256, "step": 150190 }, { "epoch": 42.6341186488788, "grad_norm": 2.2998220920562744, "learning_rate": 5.738347998864604e-05, "loss": 0.0032253097742795944, "step": 150200 }, { "epoch": 42.63695713880216, "grad_norm": 0.8665777444839478, "learning_rate": 5.738064149872269e-05, "loss": 0.0036393731832504274, "step": 150210 }, { "epoch": 42.639795628725516, "grad_norm": 0.326058566570282, "learning_rate": 5.7377803008799323e-05, "loss": 0.0009406166151165962, "step": 150220 }, { "epoch": 42.64263411864888, "grad_norm": 0.020419934764504433, "learning_rate": 5.7374964518875965e-05, "loss": 0.002082503028213978, "step": 150230 }, { "epoch": 42.64547260857224, "grad_norm": 0.020502902567386627, "learning_rate": 5.73721260289526e-05, "loss": 0.003367174044251442, "step": 150240 }, { "epoch": 42.6483110984956, "grad_norm": 0.10022071748971939, "learning_rate": 5.7369287539029234e-05, "loss": 0.004716675728559494, "step": 150250 }, { "epoch": 42.65114958841896, "grad_norm": 6.159453868865967, "learning_rate": 5.7366449049105875e-05, "loss": 0.0023421386256814005, "step": 150260 }, { "epoch": 42.65398807834232, "grad_norm": 0.03652922436594963, "learning_rate": 5.736361055918251e-05, "loss": 0.0009731996804475784, "step": 150270 }, { "epoch": 42.656826568265686, "grad_norm": 2.8371171951293945, "learning_rate": 5.736077206925916e-05, "loss": 0.0032070089131593705, "step": 150280 }, { "epoch": 42.65966505818904, "grad_norm": 1.0625066757202148, "learning_rate": 5.73579335793358e-05, "loss": 0.0011358041316270827, "step": 150290 }, { "epoch": 42.662503548112404, "grad_norm": 0.07655610144138336, "learning_rate": 5.7355095089412434e-05, "loss": 0.0006543919444084167, "step": 150300 }, { "epoch": 42.66534203803577, "grad_norm": 0.15140414237976074, "learning_rate": 5.7352256599489076e-05, "loss": 0.0013461053371429444, "step": 150310 }, { "epoch": 42.66818052795912, "grad_norm": 0.00384216639213264, "learning_rate": 5.734941810956571e-05, "loss": 0.0025096656754612923, "step": 150320 }, { "epoch": 42.671019017882486, "grad_norm": 0.24469080567359924, "learning_rate": 5.734657961964235e-05, "loss": 0.0007516177371144295, "step": 150330 }, { "epoch": 42.67385750780585, "grad_norm": 0.3526161313056946, "learning_rate": 5.7343741129719e-05, "loss": 0.0007613256573677063, "step": 150340 }, { "epoch": 42.676695997729205, "grad_norm": 0.04592909663915634, "learning_rate": 5.7340902639795634e-05, "loss": 0.0009906854480504989, "step": 150350 }, { "epoch": 42.67953448765257, "grad_norm": 0.560907244682312, "learning_rate": 5.7338064149872276e-05, "loss": 0.0031745903193950652, "step": 150360 }, { "epoch": 42.68237297757593, "grad_norm": 0.904221773147583, "learning_rate": 5.733522565994891e-05, "loss": 0.0007121680304408073, "step": 150370 }, { "epoch": 42.68521146749929, "grad_norm": 0.009947238489985466, "learning_rate": 5.7332387170025545e-05, "loss": 0.0009164517745375633, "step": 150380 }, { "epoch": 42.68804995742265, "grad_norm": 0.02027098461985588, "learning_rate": 5.7329548680102186e-05, "loss": 0.0017208542674779892, "step": 150390 }, { "epoch": 42.69088844734601, "grad_norm": 1.7286885976791382, "learning_rate": 5.732671019017882e-05, "loss": 0.0021368391811847688, "step": 150400 }, { "epoch": 42.693726937269375, "grad_norm": 0.08964485675096512, "learning_rate": 5.732387170025547e-05, "loss": 0.000199752114713192, "step": 150410 }, { "epoch": 42.69656542719273, "grad_norm": 0.07821428030729294, "learning_rate": 5.732103321033211e-05, "loss": 0.001383558101952076, "step": 150420 }, { "epoch": 42.69940391711609, "grad_norm": 0.3087732791900635, "learning_rate": 5.7318194720408745e-05, "loss": 0.0017948320135474205, "step": 150430 }, { "epoch": 42.702242407039456, "grad_norm": 0.07986743748188019, "learning_rate": 5.7315356230485386e-05, "loss": 0.0007892910391092301, "step": 150440 }, { "epoch": 42.70508089696281, "grad_norm": 2.7072854042053223, "learning_rate": 5.731251774056202e-05, "loss": 0.002226419933140278, "step": 150450 }, { "epoch": 42.707919386886175, "grad_norm": 0.91676265001297, "learning_rate": 5.730967925063866e-05, "loss": 0.001158384419977665, "step": 150460 }, { "epoch": 42.71075787680954, "grad_norm": 0.4807262718677521, "learning_rate": 5.73068407607153e-05, "loss": 0.009643922746181487, "step": 150470 }, { "epoch": 42.7135963667329, "grad_norm": 0.8463000655174255, "learning_rate": 5.7304002270791945e-05, "loss": 0.0027210399508476256, "step": 150480 }, { "epoch": 42.71643485665626, "grad_norm": 0.06933121383190155, "learning_rate": 5.7301163780868586e-05, "loss": 0.0019632622599601744, "step": 150490 }, { "epoch": 42.71927334657962, "grad_norm": 0.022977665066719055, "learning_rate": 5.729832529094522e-05, "loss": 0.0019438061863183975, "step": 150500 }, { "epoch": 42.71927334657962, "eval_accuracy": 0.9807973548674255, "eval_loss": 0.07655210793018341, "eval_runtime": 34.6174, "eval_samples_per_second": 454.309, "eval_steps_per_second": 7.106, "step": 150500 }, { "epoch": 42.72211183650298, "grad_norm": 4.100272178649902, "learning_rate": 5.7295486801021856e-05, "loss": 0.013723984360694885, "step": 150510 }, { "epoch": 42.72495032642634, "grad_norm": 0.20159626007080078, "learning_rate": 5.72926483110985e-05, "loss": 0.0016479192301630974, "step": 150520 }, { "epoch": 42.7277888163497, "grad_norm": 0.6140091419219971, "learning_rate": 5.728980982117513e-05, "loss": 0.0003667719662189484, "step": 150530 }, { "epoch": 42.730627306273064, "grad_norm": 0.03184365853667259, "learning_rate": 5.728697133125178e-05, "loss": 0.0011655991896986962, "step": 150540 }, { "epoch": 42.73346579619643, "grad_norm": 0.012270036153495312, "learning_rate": 5.728413284132842e-05, "loss": 0.0004571516066789627, "step": 150550 }, { "epoch": 42.73630428611978, "grad_norm": 0.016838563606142998, "learning_rate": 5.7281294351405056e-05, "loss": 0.0011217748746275903, "step": 150560 }, { "epoch": 42.739142776043145, "grad_norm": 0.01990608312189579, "learning_rate": 5.72784558614817e-05, "loss": 0.001579907350242138, "step": 150570 }, { "epoch": 42.74198126596651, "grad_norm": 11.781166076660156, "learning_rate": 5.727561737155833e-05, "loss": 0.008622461557388305, "step": 150580 }, { "epoch": 42.744819755889864, "grad_norm": 0.4754284620285034, "learning_rate": 5.727277888163497e-05, "loss": 0.0064560286700725555, "step": 150590 }, { "epoch": 42.74765824581323, "grad_norm": 0.26690611243247986, "learning_rate": 5.726994039171161e-05, "loss": 0.0010170456022024154, "step": 150600 }, { "epoch": 42.75049673573659, "grad_norm": 0.7893202900886536, "learning_rate": 5.7267101901788256e-05, "loss": 0.004936959221959114, "step": 150610 }, { "epoch": 42.753335225659946, "grad_norm": 0.1070001944899559, "learning_rate": 5.726426341186489e-05, "loss": 0.0007953343912959099, "step": 150620 }, { "epoch": 42.75617371558331, "grad_norm": 1.8287423849105835, "learning_rate": 5.726142492194153e-05, "loss": 0.0030458997935056686, "step": 150630 }, { "epoch": 42.75901220550667, "grad_norm": 6.425121784210205, "learning_rate": 5.7258586432018166e-05, "loss": 0.002796266973018646, "step": 150640 }, { "epoch": 42.761850695430034, "grad_norm": 0.08308438956737518, "learning_rate": 5.725574794209481e-05, "loss": 0.0036181263625621797, "step": 150650 }, { "epoch": 42.76468918535339, "grad_norm": 2.580648422241211, "learning_rate": 5.725290945217144e-05, "loss": 0.011951509863138199, "step": 150660 }, { "epoch": 42.76752767527675, "grad_norm": 0.3942524194717407, "learning_rate": 5.7250070962248084e-05, "loss": 0.001244967058300972, "step": 150670 }, { "epoch": 42.770366165200116, "grad_norm": 0.016687460243701935, "learning_rate": 5.724723247232473e-05, "loss": 0.0016169855371117592, "step": 150680 }, { "epoch": 42.77320465512347, "grad_norm": 0.9351746439933777, "learning_rate": 5.7244393982401366e-05, "loss": 0.0039869040250778195, "step": 150690 }, { "epoch": 42.776043145046835, "grad_norm": 5.606001853942871, "learning_rate": 5.724155549247801e-05, "loss": 0.009740294516086578, "step": 150700 }, { "epoch": 42.7788816349702, "grad_norm": 0.7359834313392639, "learning_rate": 5.723871700255464e-05, "loss": 0.009144122898578643, "step": 150710 }, { "epoch": 42.78172012489355, "grad_norm": 0.019286900758743286, "learning_rate": 5.723587851263128e-05, "loss": 0.00248347744345665, "step": 150720 }, { "epoch": 42.784558614816916, "grad_norm": 0.1185140535235405, "learning_rate": 5.723304002270792e-05, "loss": 0.0010218767449259758, "step": 150730 }, { "epoch": 42.78739710474028, "grad_norm": 0.16646192967891693, "learning_rate": 5.7230201532784567e-05, "loss": 0.001132616400718689, "step": 150740 }, { "epoch": 42.79023559466364, "grad_norm": 0.0956704244017601, "learning_rate": 5.72273630428612e-05, "loss": 0.0012758757919073104, "step": 150750 }, { "epoch": 42.793074084587, "grad_norm": 0.03417479991912842, "learning_rate": 5.722452455293784e-05, "loss": 0.0011648450046777726, "step": 150760 }, { "epoch": 42.79591257451036, "grad_norm": 4.95966911315918, "learning_rate": 5.722168606301448e-05, "loss": 0.004082559794187546, "step": 150770 }, { "epoch": 42.79875106443372, "grad_norm": 9.994795799255371, "learning_rate": 5.721884757309112e-05, "loss": 0.006733474880456924, "step": 150780 }, { "epoch": 42.80158955435708, "grad_norm": 1.1576870679855347, "learning_rate": 5.721600908316775e-05, "loss": 0.0018917400389909743, "step": 150790 }, { "epoch": 42.80442804428044, "grad_norm": 0.016875935718417168, "learning_rate": 5.7213170593244394e-05, "loss": 0.001337618939578533, "step": 150800 }, { "epoch": 42.807266534203805, "grad_norm": 0.47908368706703186, "learning_rate": 5.721033210332104e-05, "loss": 0.0011338729411363602, "step": 150810 }, { "epoch": 42.81010502412717, "grad_norm": 7.891409397125244, "learning_rate": 5.720749361339768e-05, "loss": 0.0051012631505727764, "step": 150820 }, { "epoch": 42.812943514050524, "grad_norm": 3.064743995666504, "learning_rate": 5.720465512347432e-05, "loss": 0.0018203647807240487, "step": 150830 }, { "epoch": 42.81578200397389, "grad_norm": 0.03450886160135269, "learning_rate": 5.720181663355095e-05, "loss": 0.0035750783979892732, "step": 150840 }, { "epoch": 42.81862049389725, "grad_norm": 0.08885154873132706, "learning_rate": 5.719897814362759e-05, "loss": 0.03136290013790131, "step": 150850 }, { "epoch": 42.821458983820605, "grad_norm": 14.898045539855957, "learning_rate": 5.719613965370423e-05, "loss": 0.003489578515291214, "step": 150860 }, { "epoch": 42.82429747374397, "grad_norm": 0.01828574575483799, "learning_rate": 5.7193301163780864e-05, "loss": 0.010202425718307494, "step": 150870 }, { "epoch": 42.82713596366733, "grad_norm": 0.09356603771448135, "learning_rate": 5.719046267385751e-05, "loss": 0.0012912670150399207, "step": 150880 }, { "epoch": 42.82997445359069, "grad_norm": 0.02459779381752014, "learning_rate": 5.718762418393415e-05, "loss": 0.0006403431296348571, "step": 150890 }, { "epoch": 42.83281294351405, "grad_norm": 0.1790907084941864, "learning_rate": 5.718478569401079e-05, "loss": 0.0007724050432443619, "step": 150900 }, { "epoch": 42.83565143343741, "grad_norm": 0.08459431678056717, "learning_rate": 5.718194720408743e-05, "loss": 0.0012457547709345818, "step": 150910 }, { "epoch": 42.838489923360775, "grad_norm": 0.006854152772575617, "learning_rate": 5.7179108714164064e-05, "loss": 0.0001696363091468811, "step": 150920 }, { "epoch": 42.84132841328413, "grad_norm": 0.060963310301303864, "learning_rate": 5.7176270224240705e-05, "loss": 0.0010008225217461586, "step": 150930 }, { "epoch": 42.844166903207494, "grad_norm": 0.07896207273006439, "learning_rate": 5.717343173431735e-05, "loss": 0.000448470376431942, "step": 150940 }, { "epoch": 42.84700539313086, "grad_norm": 0.08384078741073608, "learning_rate": 5.717059324439399e-05, "loss": 0.004282497614622116, "step": 150950 }, { "epoch": 42.84984388305421, "grad_norm": 10.917803764343262, "learning_rate": 5.716775475447063e-05, "loss": 0.005484047159552574, "step": 150960 }, { "epoch": 42.852682372977576, "grad_norm": 0.08138173818588257, "learning_rate": 5.7164916264547264e-05, "loss": 0.0037995245307683944, "step": 150970 }, { "epoch": 42.85552086290094, "grad_norm": 0.3964734673500061, "learning_rate": 5.71620777746239e-05, "loss": 0.012950757145881652, "step": 150980 }, { "epoch": 42.858359352824294, "grad_norm": 0.7408061027526855, "learning_rate": 5.715923928470054e-05, "loss": 0.007805038243532181, "step": 150990 }, { "epoch": 42.86119784274766, "grad_norm": 0.23521091043949127, "learning_rate": 5.7156400794777175e-05, "loss": 0.007884423434734344, "step": 151000 }, { "epoch": 42.86119784274766, "eval_accuracy": 0.9802886755261652, "eval_loss": 0.07557478547096252, "eval_runtime": 37.5108, "eval_samples_per_second": 419.266, "eval_steps_per_second": 6.558, "step": 151000 }, { "epoch": 42.86403633267102, "grad_norm": 0.04674926772713661, "learning_rate": 5.715356230485382e-05, "loss": 0.006673458963632584, "step": 151010 }, { "epoch": 42.86687482259438, "grad_norm": 0.007885012775659561, "learning_rate": 5.7150723814930464e-05, "loss": 0.008719100058078766, "step": 151020 }, { "epoch": 42.86971331251774, "grad_norm": 0.007881649769842625, "learning_rate": 5.71478853250071e-05, "loss": 0.003587343171238899, "step": 151030 }, { "epoch": 42.8725518024411, "grad_norm": 0.7576191425323486, "learning_rate": 5.714504683508374e-05, "loss": 0.001175275258719921, "step": 151040 }, { "epoch": 42.875390292364465, "grad_norm": 0.04694578796625137, "learning_rate": 5.7142208345160375e-05, "loss": 0.001969522424042225, "step": 151050 }, { "epoch": 42.87822878228782, "grad_norm": 1.7327940464019775, "learning_rate": 5.7139369855237016e-05, "loss": 0.0037328705191612245, "step": 151060 }, { "epoch": 42.88106727221118, "grad_norm": 0.10951203852891922, "learning_rate": 5.7136531365313664e-05, "loss": 0.00015715565532445908, "step": 151070 }, { "epoch": 42.883905762134546, "grad_norm": 0.5504537224769592, "learning_rate": 5.71336928753903e-05, "loss": 0.007852288335561753, "step": 151080 }, { "epoch": 42.8867442520579, "grad_norm": 0.8897615075111389, "learning_rate": 5.713085438546693e-05, "loss": 0.0014535514637827873, "step": 151090 }, { "epoch": 42.889582741981265, "grad_norm": 0.24015575647354126, "learning_rate": 5.7128015895543575e-05, "loss": 0.005601328238844872, "step": 151100 }, { "epoch": 42.89242123190463, "grad_norm": 0.05519101768732071, "learning_rate": 5.712517740562021e-05, "loss": 0.0005010547116398812, "step": 151110 }, { "epoch": 42.89525972182799, "grad_norm": 0.022900717332959175, "learning_rate": 5.712233891569685e-05, "loss": 0.0014277221634984016, "step": 151120 }, { "epoch": 42.898098211751346, "grad_norm": 1.6056188344955444, "learning_rate": 5.7119500425773485e-05, "loss": 0.0013885468244552613, "step": 151130 }, { "epoch": 42.90093670167471, "grad_norm": 0.015429944731295109, "learning_rate": 5.7116661935850133e-05, "loss": 0.00038306917995214464, "step": 151140 }, { "epoch": 42.90377519159807, "grad_norm": 0.3516647219657898, "learning_rate": 5.7113823445926775e-05, "loss": 0.0017564037814736366, "step": 151150 }, { "epoch": 42.90661368152143, "grad_norm": 0.026742322370409966, "learning_rate": 5.711098495600341e-05, "loss": 0.005385183170437813, "step": 151160 }, { "epoch": 42.90945217144479, "grad_norm": 7.306840419769287, "learning_rate": 5.710814646608005e-05, "loss": 0.004769621044397354, "step": 151170 }, { "epoch": 42.912290661368154, "grad_norm": 0.036553893238306046, "learning_rate": 5.7105307976156685e-05, "loss": 0.003592398017644882, "step": 151180 }, { "epoch": 42.91512915129151, "grad_norm": 0.19105391204357147, "learning_rate": 5.710246948623332e-05, "loss": 0.006098232045769691, "step": 151190 }, { "epoch": 42.91796764121487, "grad_norm": 0.028531715273857117, "learning_rate": 5.709963099630996e-05, "loss": 0.0008749743923544884, "step": 151200 }, { "epoch": 42.920806131138235, "grad_norm": 0.11725546419620514, "learning_rate": 5.709679250638661e-05, "loss": 0.013132403790950774, "step": 151210 }, { "epoch": 42.9236446210616, "grad_norm": 0.044381365180015564, "learning_rate": 5.7093954016463244e-05, "loss": 0.00039666648954153063, "step": 151220 }, { "epoch": 42.926483110984954, "grad_norm": 1.1313979625701904, "learning_rate": 5.7091115526539885e-05, "loss": 0.0019663391634821893, "step": 151230 }, { "epoch": 42.92932160090832, "grad_norm": 0.06541422009468079, "learning_rate": 5.708827703661652e-05, "loss": 0.0004262320697307587, "step": 151240 }, { "epoch": 42.93216009083168, "grad_norm": 0.2100992351770401, "learning_rate": 5.708543854669316e-05, "loss": 0.0003027345985174179, "step": 151250 }, { "epoch": 42.934998580755035, "grad_norm": 0.28697332739830017, "learning_rate": 5.7082600056769796e-05, "loss": 0.00019940920174121857, "step": 151260 }, { "epoch": 42.9378370706784, "grad_norm": 0.009838307276368141, "learning_rate": 5.7079761566846444e-05, "loss": 0.0005347298458218575, "step": 151270 }, { "epoch": 42.94067556060176, "grad_norm": 3.2903873920440674, "learning_rate": 5.7076923076923086e-05, "loss": 0.0021616557613015177, "step": 151280 }, { "epoch": 42.943514050525124, "grad_norm": 0.004284272901713848, "learning_rate": 5.707408458699972e-05, "loss": 0.00029637273401021956, "step": 151290 }, { "epoch": 42.94635254044848, "grad_norm": 0.10150881111621857, "learning_rate": 5.707124609707636e-05, "loss": 0.006523017585277557, "step": 151300 }, { "epoch": 42.94919103037184, "grad_norm": 0.30970054864883423, "learning_rate": 5.7068407607152996e-05, "loss": 0.0005733136087656022, "step": 151310 }, { "epoch": 42.952029520295206, "grad_norm": 0.038985636085271835, "learning_rate": 5.706556911722963e-05, "loss": 0.00020355284214019776, "step": 151320 }, { "epoch": 42.95486801021856, "grad_norm": 0.006500665098428726, "learning_rate": 5.706273062730627e-05, "loss": 0.0016718709841370583, "step": 151330 }, { "epoch": 42.957706500141924, "grad_norm": 0.04108281806111336, "learning_rate": 5.705989213738292e-05, "loss": 0.0031333591789007185, "step": 151340 }, { "epoch": 42.96054499006529, "grad_norm": 0.11312586814165115, "learning_rate": 5.7057053647459555e-05, "loss": 0.00029597207903862, "step": 151350 }, { "epoch": 42.96338347998864, "grad_norm": 0.040382035076618195, "learning_rate": 5.7054215157536196e-05, "loss": 0.002256019040942192, "step": 151360 }, { "epoch": 42.966221969912006, "grad_norm": 0.08199688047170639, "learning_rate": 5.705137666761283e-05, "loss": 0.0008020788431167602, "step": 151370 }, { "epoch": 42.96906045983537, "grad_norm": 0.0018619141774252057, "learning_rate": 5.704853817768947e-05, "loss": 0.012253812700510024, "step": 151380 }, { "epoch": 42.97189894975873, "grad_norm": 0.06896129995584488, "learning_rate": 5.704569968776611e-05, "loss": 0.00066156517714262, "step": 151390 }, { "epoch": 42.97473743968209, "grad_norm": 0.8291550278663635, "learning_rate": 5.704286119784275e-05, "loss": 0.000262429378926754, "step": 151400 }, { "epoch": 42.97757592960545, "grad_norm": 0.45148080587387085, "learning_rate": 5.7040022707919396e-05, "loss": 0.00049737598747015, "step": 151410 }, { "epoch": 42.98041441952881, "grad_norm": 0.046866800636053085, "learning_rate": 5.703718421799603e-05, "loss": 0.013904155790805816, "step": 151420 }, { "epoch": 42.98325290945217, "grad_norm": 0.7287517786026001, "learning_rate": 5.703434572807267e-05, "loss": 0.0005495034158229828, "step": 151430 }, { "epoch": 42.98609139937553, "grad_norm": 0.0799592137336731, "learning_rate": 5.703150723814931e-05, "loss": 0.0021159050986170767, "step": 151440 }, { "epoch": 42.988929889298895, "grad_norm": 0.10019347816705704, "learning_rate": 5.702866874822594e-05, "loss": 0.0005868870764970779, "step": 151450 }, { "epoch": 42.99176837922225, "grad_norm": 0.3072514235973358, "learning_rate": 5.702583025830258e-05, "loss": 0.009305911511182785, "step": 151460 }, { "epoch": 42.99460686914561, "grad_norm": 0.08467470109462738, "learning_rate": 5.702299176837923e-05, "loss": 0.00032862946391105653, "step": 151470 }, { "epoch": 42.997445359068976, "grad_norm": 0.016290560364723206, "learning_rate": 5.7020153278455866e-05, "loss": 0.00020164716988801956, "step": 151480 }, { "epoch": 43.00028384899234, "grad_norm": 0.2922505736351013, "learning_rate": 5.701731478853251e-05, "loss": 0.0005797470919787883, "step": 151490 }, { "epoch": 43.003122338915695, "grad_norm": 0.2592238187789917, "learning_rate": 5.701447629860914e-05, "loss": 0.00023702792823314666, "step": 151500 }, { "epoch": 43.003122338915695, "eval_accuracy": 0.9837858459973294, "eval_loss": 0.06443171203136444, "eval_runtime": 42.6995, "eval_samples_per_second": 368.318, "eval_steps_per_second": 5.761, "step": 151500 }, { "epoch": 43.00596082883906, "grad_norm": 0.00935086328536272, "learning_rate": 5.701163780868578e-05, "loss": 0.00033766087144613264, "step": 151510 }, { "epoch": 43.00879931876242, "grad_norm": 0.567474901676178, "learning_rate": 5.700879931876242e-05, "loss": 0.0002596393227577209, "step": 151520 }, { "epoch": 43.01163780868578, "grad_norm": 0.10982117801904678, "learning_rate": 5.700596082883906e-05, "loss": 0.010310281813144685, "step": 151530 }, { "epoch": 43.01447629860914, "grad_norm": 0.0037591364234685898, "learning_rate": 5.700312233891571e-05, "loss": 0.0005603298544883728, "step": 151540 }, { "epoch": 43.0173147885325, "grad_norm": 0.015550284646451473, "learning_rate": 5.700028384899234e-05, "loss": 0.0009874461218714714, "step": 151550 }, { "epoch": 43.02015327845586, "grad_norm": 0.00530818197876215, "learning_rate": 5.6997445359068976e-05, "loss": 0.008255188912153244, "step": 151560 }, { "epoch": 43.02299176837922, "grad_norm": 0.7167891263961792, "learning_rate": 5.699460686914562e-05, "loss": 0.00029727574437856676, "step": 151570 }, { "epoch": 43.025830258302584, "grad_norm": 0.01044511515647173, "learning_rate": 5.699176837922225e-05, "loss": 0.0022608716040849685, "step": 151580 }, { "epoch": 43.02866874822595, "grad_norm": 0.008279608562588692, "learning_rate": 5.6988929889298894e-05, "loss": 0.0017869027331471443, "step": 151590 }, { "epoch": 43.0315072381493, "grad_norm": 0.00301912147551775, "learning_rate": 5.698609139937553e-05, "loss": 0.0043927781283855435, "step": 151600 }, { "epoch": 43.034345728072665, "grad_norm": 0.20065619051456451, "learning_rate": 5.6983252909452176e-05, "loss": 0.0017105767503380776, "step": 151610 }, { "epoch": 43.03718421799603, "grad_norm": 2.1131460666656494, "learning_rate": 5.698041441952882e-05, "loss": 0.00657118558883667, "step": 151620 }, { "epoch": 43.040022707919384, "grad_norm": 0.04525010660290718, "learning_rate": 5.697757592960545e-05, "loss": 0.0007119417190551758, "step": 151630 }, { "epoch": 43.04286119784275, "grad_norm": 0.11827481538057327, "learning_rate": 5.6974737439682094e-05, "loss": 0.001216753013432026, "step": 151640 }, { "epoch": 43.04569968776611, "grad_norm": 0.981870174407959, "learning_rate": 5.697189894975873e-05, "loss": 0.0015243418514728547, "step": 151650 }, { "epoch": 43.04853817768947, "grad_norm": 0.021501358598470688, "learning_rate": 5.696906045983536e-05, "loss": 0.0008294770494103431, "step": 151660 }, { "epoch": 43.05137666761283, "grad_norm": 0.013544824905693531, "learning_rate": 5.696622196991202e-05, "loss": 0.0010386278852820396, "step": 151670 }, { "epoch": 43.05421515753619, "grad_norm": 0.037785936146974564, "learning_rate": 5.696338347998865e-05, "loss": 0.0027672216296195985, "step": 151680 }, { "epoch": 43.057053647459554, "grad_norm": 0.1499575823545456, "learning_rate": 5.696054499006529e-05, "loss": 0.0004048364236950874, "step": 151690 }, { "epoch": 43.05989213738291, "grad_norm": 0.009667696431279182, "learning_rate": 5.695770650014193e-05, "loss": 0.0011656580492854119, "step": 151700 }, { "epoch": 43.06273062730627, "grad_norm": 0.050976160913705826, "learning_rate": 5.695486801021856e-05, "loss": 0.0003268091008067131, "step": 151710 }, { "epoch": 43.065569117229636, "grad_norm": 0.013631191104650497, "learning_rate": 5.6952029520295204e-05, "loss": 0.006471741199493408, "step": 151720 }, { "epoch": 43.06840760715299, "grad_norm": 0.010988146997988224, "learning_rate": 5.694919103037184e-05, "loss": 0.0014211127534508705, "step": 151730 }, { "epoch": 43.071246097076354, "grad_norm": 0.024681609123945236, "learning_rate": 5.694635254044849e-05, "loss": 0.0003177626058459282, "step": 151740 }, { "epoch": 43.07408458699972, "grad_norm": 0.012910747900605202, "learning_rate": 5.694351405052513e-05, "loss": 0.0006057046353816986, "step": 151750 }, { "epoch": 43.07692307692308, "grad_norm": 0.014547587372362614, "learning_rate": 5.694067556060176e-05, "loss": 0.0007288359105587005, "step": 151760 }, { "epoch": 43.079761566846436, "grad_norm": 0.049732182174921036, "learning_rate": 5.6937837070678405e-05, "loss": 0.005880807712674141, "step": 151770 }, { "epoch": 43.0826000567698, "grad_norm": 0.061113398522138596, "learning_rate": 5.693499858075504e-05, "loss": 0.0004989741370081902, "step": 151780 }, { "epoch": 43.08543854669316, "grad_norm": 0.05878279730677605, "learning_rate": 5.6932160090831674e-05, "loss": 0.00046403929591178896, "step": 151790 }, { "epoch": 43.08827703661652, "grad_norm": 0.009841426275670528, "learning_rate": 5.6929321600908315e-05, "loss": 0.0002668071538209915, "step": 151800 }, { "epoch": 43.09111552653988, "grad_norm": 6.438069820404053, "learning_rate": 5.692648311098496e-05, "loss": 0.0028992902487516405, "step": 151810 }, { "epoch": 43.09395401646324, "grad_norm": 0.11933879554271698, "learning_rate": 5.69236446210616e-05, "loss": 0.0007465552538633346, "step": 151820 }, { "epoch": 43.0967925063866, "grad_norm": 0.08424641937017441, "learning_rate": 5.692080613113824e-05, "loss": 0.004669895023107528, "step": 151830 }, { "epoch": 43.09963099630996, "grad_norm": 5.195957660675049, "learning_rate": 5.6917967641214874e-05, "loss": 0.009774918109178543, "step": 151840 }, { "epoch": 43.102469486233325, "grad_norm": 0.21409399807453156, "learning_rate": 5.6915129151291515e-05, "loss": 0.0014965707436203957, "step": 151850 }, { "epoch": 43.10530797615669, "grad_norm": 0.03655388578772545, "learning_rate": 5.691229066136815e-05, "loss": 0.013217225670814514, "step": 151860 }, { "epoch": 43.10814646608004, "grad_norm": 12.077071189880371, "learning_rate": 5.69094521714448e-05, "loss": 0.015717299282550813, "step": 151870 }, { "epoch": 43.110984956003406, "grad_norm": 0.10002972185611725, "learning_rate": 5.690661368152144e-05, "loss": 0.0012707892805337907, "step": 151880 }, { "epoch": 43.11382344592677, "grad_norm": 0.008327421732246876, "learning_rate": 5.6903775191598074e-05, "loss": 0.001524055190384388, "step": 151890 }, { "epoch": 43.116661935850125, "grad_norm": 0.016476064920425415, "learning_rate": 5.6900936701674715e-05, "loss": 0.001862005889415741, "step": 151900 }, { "epoch": 43.11950042577349, "grad_norm": 0.06387214362621307, "learning_rate": 5.689809821175135e-05, "loss": 0.0013552650809288025, "step": 151910 }, { "epoch": 43.12233891569685, "grad_norm": 0.36655786633491516, "learning_rate": 5.6895259721827984e-05, "loss": 0.0010843852534890175, "step": 151920 }, { "epoch": 43.12517740562021, "grad_norm": 0.004167093429714441, "learning_rate": 5.6892421231904626e-05, "loss": 0.0014511099085211753, "step": 151930 }, { "epoch": 43.12801589554357, "grad_norm": 2.095648765563965, "learning_rate": 5.6889582741981274e-05, "loss": 0.0007770033553242683, "step": 151940 }, { "epoch": 43.13085438546693, "grad_norm": 0.006331621669232845, "learning_rate": 5.688674425205791e-05, "loss": 0.0053155310451984406, "step": 151950 }, { "epoch": 43.133692875390295, "grad_norm": 1.4068270921707153, "learning_rate": 5.688390576213455e-05, "loss": 0.0015014613047242165, "step": 151960 }, { "epoch": 43.13653136531365, "grad_norm": 0.10611765086650848, "learning_rate": 5.6881067272211185e-05, "loss": 0.004529792815446854, "step": 151970 }, { "epoch": 43.139369855237014, "grad_norm": 0.37396275997161865, "learning_rate": 5.6878228782287826e-05, "loss": 0.001319780945777893, "step": 151980 }, { "epoch": 43.14220834516038, "grad_norm": 0.4282955825328827, "learning_rate": 5.687539029236446e-05, "loss": 0.006726816296577454, "step": 151990 }, { "epoch": 43.14504683508373, "grad_norm": 0.06207936257123947, "learning_rate": 5.68725518024411e-05, "loss": 0.0013739079236984252, "step": 152000 }, { "epoch": 43.14504683508373, "eval_accuracy": 0.9822598079735487, "eval_loss": 0.07079780101776123, "eval_runtime": 36.969, "eval_samples_per_second": 425.41, "eval_steps_per_second": 6.654, "step": 152000 }, { "epoch": 43.147885325007096, "grad_norm": 0.0328654870390892, "learning_rate": 5.686971331251775e-05, "loss": 0.0015058893710374831, "step": 152010 }, { "epoch": 43.15072381493046, "grad_norm": 1.264357328414917, "learning_rate": 5.6866874822594385e-05, "loss": 0.016852375864982606, "step": 152020 }, { "epoch": 43.15356230485382, "grad_norm": 0.03565609082579613, "learning_rate": 5.686403633267102e-05, "loss": 0.0009031504392623902, "step": 152030 }, { "epoch": 43.15640079477718, "grad_norm": 1.099882960319519, "learning_rate": 5.686119784274766e-05, "loss": 0.012906111776828766, "step": 152040 }, { "epoch": 43.15923928470054, "grad_norm": 0.04848111793398857, "learning_rate": 5.6858359352824295e-05, "loss": 0.0010289698839187622, "step": 152050 }, { "epoch": 43.1620777746239, "grad_norm": 0.19739419221878052, "learning_rate": 5.685552086290094e-05, "loss": 0.004855605959892273, "step": 152060 }, { "epoch": 43.16491626454726, "grad_norm": 0.0690426155924797, "learning_rate": 5.6852682372977585e-05, "loss": 0.0023398257791996, "step": 152070 }, { "epoch": 43.16775475447062, "grad_norm": 0.07328411936759949, "learning_rate": 5.684984388305422e-05, "loss": 0.0006866216659545899, "step": 152080 }, { "epoch": 43.170593244393984, "grad_norm": 0.4875405728816986, "learning_rate": 5.684700539313086e-05, "loss": 0.0032399270683526995, "step": 152090 }, { "epoch": 43.17343173431734, "grad_norm": 0.1963791847229004, "learning_rate": 5.6844166903207495e-05, "loss": 0.013916566967964172, "step": 152100 }, { "epoch": 43.1762702242407, "grad_norm": 0.8011450171470642, "learning_rate": 5.684132841328414e-05, "loss": 0.0011674581095576287, "step": 152110 }, { "epoch": 43.179108714164066, "grad_norm": 0.003732465673238039, "learning_rate": 5.683848992336077e-05, "loss": 0.0006812429055571556, "step": 152120 }, { "epoch": 43.18194720408743, "grad_norm": 0.15666379034519196, "learning_rate": 5.6835651433437406e-05, "loss": 0.0005530767142772674, "step": 152130 }, { "epoch": 43.184785694010785, "grad_norm": 0.3823123276233673, "learning_rate": 5.683281294351406e-05, "loss": 0.0006855133920907974, "step": 152140 }, { "epoch": 43.18762418393415, "grad_norm": 0.940406084060669, "learning_rate": 5.6829974453590695e-05, "loss": 0.006473401933908463, "step": 152150 }, { "epoch": 43.19046267385751, "grad_norm": 4.232378005981445, "learning_rate": 5.682713596366733e-05, "loss": 0.0017684625461697578, "step": 152160 }, { "epoch": 43.193301163780866, "grad_norm": 0.015703734010457993, "learning_rate": 5.682429747374397e-05, "loss": 0.002156866900622845, "step": 152170 }, { "epoch": 43.19613965370423, "grad_norm": 0.06543601304292679, "learning_rate": 5.6821458983820606e-05, "loss": 0.0008201740682125091, "step": 152180 }, { "epoch": 43.19897814362759, "grad_norm": 0.1870831549167633, "learning_rate": 5.681862049389725e-05, "loss": 0.00037915538996458054, "step": 152190 }, { "epoch": 43.20181663355095, "grad_norm": 3.0090389251708984, "learning_rate": 5.681578200397388e-05, "loss": 0.0007346369326114655, "step": 152200 }, { "epoch": 43.20465512347431, "grad_norm": 0.1553211212158203, "learning_rate": 5.681294351405053e-05, "loss": 0.0019126512110233308, "step": 152210 }, { "epoch": 43.20749361339767, "grad_norm": 0.0036437397357076406, "learning_rate": 5.681010502412717e-05, "loss": 0.004633501172065735, "step": 152220 }, { "epoch": 43.210332103321036, "grad_norm": 2.1110405921936035, "learning_rate": 5.6807266534203806e-05, "loss": 0.004716425761580467, "step": 152230 }, { "epoch": 43.21317059324439, "grad_norm": 3.0762617588043213, "learning_rate": 5.680442804428045e-05, "loss": 0.0016938772052526474, "step": 152240 }, { "epoch": 43.216009083167755, "grad_norm": 0.08249589055776596, "learning_rate": 5.680158955435708e-05, "loss": 0.013850726187229156, "step": 152250 }, { "epoch": 43.21884757309112, "grad_norm": 0.046545568853616714, "learning_rate": 5.679875106443372e-05, "loss": 0.0018868470564484597, "step": 152260 }, { "epoch": 43.221686063014474, "grad_norm": 0.015289896167814732, "learning_rate": 5.6795912574510365e-05, "loss": 0.004061764478683472, "step": 152270 }, { "epoch": 43.22452455293784, "grad_norm": 1.2964617013931274, "learning_rate": 5.6793074084587006e-05, "loss": 0.0012582838535308838, "step": 152280 }, { "epoch": 43.2273630428612, "grad_norm": 0.07059235870838165, "learning_rate": 5.679023559466364e-05, "loss": 0.004635906219482422, "step": 152290 }, { "epoch": 43.230201532784555, "grad_norm": 0.037953149527311325, "learning_rate": 5.678739710474028e-05, "loss": 0.004340910911560058, "step": 152300 }, { "epoch": 43.23304002270792, "grad_norm": 0.04964462295174599, "learning_rate": 5.678455861481692e-05, "loss": 0.008157704025506973, "step": 152310 }, { "epoch": 43.23587851263128, "grad_norm": 1.097936987876892, "learning_rate": 5.678172012489356e-05, "loss": 0.0009706003591418266, "step": 152320 }, { "epoch": 43.238717002554644, "grad_norm": 0.29077672958374023, "learning_rate": 5.677888163497019e-05, "loss": 0.0009162953123450279, "step": 152330 }, { "epoch": 43.241555492478, "grad_norm": 1.4559952020645142, "learning_rate": 5.677604314504684e-05, "loss": 0.0026202725246548654, "step": 152340 }, { "epoch": 43.24439398240136, "grad_norm": 0.275511234998703, "learning_rate": 5.677320465512348e-05, "loss": 0.004549045115709305, "step": 152350 }, { "epoch": 43.247232472324725, "grad_norm": 0.4391036927700043, "learning_rate": 5.677036616520012e-05, "loss": 0.018954366445541382, "step": 152360 }, { "epoch": 43.25007096224808, "grad_norm": 13.056754112243652, "learning_rate": 5.676752767527675e-05, "loss": 0.009220927953720093, "step": 152370 }, { "epoch": 43.252909452171444, "grad_norm": 0.06337467581033707, "learning_rate": 5.676468918535339e-05, "loss": 0.0010279031470417977, "step": 152380 }, { "epoch": 43.25574794209481, "grad_norm": 7.4217329025268555, "learning_rate": 5.676185069543003e-05, "loss": 0.005477697402238846, "step": 152390 }, { "epoch": 43.25858643201816, "grad_norm": 0.04194336012005806, "learning_rate": 5.6759012205506676e-05, "loss": 0.021258686482906342, "step": 152400 }, { "epoch": 43.261424921941526, "grad_norm": 7.3442277908325195, "learning_rate": 5.675617371558332e-05, "loss": 0.015120401978492737, "step": 152410 }, { "epoch": 43.26426341186489, "grad_norm": 0.018433157354593277, "learning_rate": 5.675333522565995e-05, "loss": 0.010390038788318633, "step": 152420 }, { "epoch": 43.26710190178825, "grad_norm": 0.041775040328502655, "learning_rate": 5.675049673573659e-05, "loss": 0.0006608342751860619, "step": 152430 }, { "epoch": 43.26994039171161, "grad_norm": 0.3600941002368927, "learning_rate": 5.674765824581323e-05, "loss": 0.00042895544320344927, "step": 152440 }, { "epoch": 43.27277888163497, "grad_norm": 0.09284286201000214, "learning_rate": 5.674481975588987e-05, "loss": 0.0004192549735307693, "step": 152450 }, { "epoch": 43.27561737155833, "grad_norm": 5.510642051696777, "learning_rate": 5.6741981265966504e-05, "loss": 0.0016887536272406577, "step": 152460 }, { "epoch": 43.27845586148169, "grad_norm": 2.0762672424316406, "learning_rate": 5.673914277604315e-05, "loss": 0.0020567798987030984, "step": 152470 }, { "epoch": 43.28129435140505, "grad_norm": 0.08274292200803757, "learning_rate": 5.673630428611979e-05, "loss": 0.00047300495207309724, "step": 152480 }, { "epoch": 43.284132841328415, "grad_norm": 0.08963485062122345, "learning_rate": 5.673346579619643e-05, "loss": 0.0003708302974700928, "step": 152490 }, { "epoch": 43.28697133125178, "grad_norm": 0.2836528718471527, "learning_rate": 5.673062730627306e-05, "loss": 0.0027031967416405677, "step": 152500 }, { "epoch": 43.28697133125178, "eval_accuracy": 0.9810516945380555, "eval_loss": 0.0727243721485138, "eval_runtime": 35.729, "eval_samples_per_second": 440.175, "eval_steps_per_second": 6.885, "step": 152500 }, { "epoch": 43.28980982117513, "grad_norm": 0.009578309021890163, "learning_rate": 5.6727788816349704e-05, "loss": 0.0008885851129889488, "step": 152510 }, { "epoch": 43.292648311098496, "grad_norm": 0.05703756958246231, "learning_rate": 5.672495032642634e-05, "loss": 0.00019269548356533052, "step": 152520 }, { "epoch": 43.29548680102186, "grad_norm": 0.007329477928578854, "learning_rate": 5.672211183650298e-05, "loss": 0.0014387087896466256, "step": 152530 }, { "epoch": 43.298325290945215, "grad_norm": 0.017200078815221786, "learning_rate": 5.671927334657963e-05, "loss": 0.0006730075925588608, "step": 152540 }, { "epoch": 43.30116378086858, "grad_norm": 0.2905217707157135, "learning_rate": 5.671643485665626e-05, "loss": 0.0011831259354948998, "step": 152550 }, { "epoch": 43.30400227079194, "grad_norm": 6.033872127532959, "learning_rate": 5.6713596366732904e-05, "loss": 0.0026432678103446962, "step": 152560 }, { "epoch": 43.306840760715296, "grad_norm": 0.07014671713113785, "learning_rate": 5.671075787680954e-05, "loss": 0.002902078256011009, "step": 152570 }, { "epoch": 43.30967925063866, "grad_norm": 6.155324935913086, "learning_rate": 5.670791938688618e-05, "loss": 0.002152441255748272, "step": 152580 }, { "epoch": 43.31251774056202, "grad_norm": 5.364418029785156, "learning_rate": 5.6705080896962814e-05, "loss": 0.0029280476272106172, "step": 152590 }, { "epoch": 43.315356230485385, "grad_norm": 0.06279491633176804, "learning_rate": 5.670224240703946e-05, "loss": 0.006863147020339966, "step": 152600 }, { "epoch": 43.31819472040874, "grad_norm": 0.05597006902098656, "learning_rate": 5.6699403917116104e-05, "loss": 0.0020756693556904793, "step": 152610 }, { "epoch": 43.321033210332104, "grad_norm": 0.9091084599494934, "learning_rate": 5.669656542719274e-05, "loss": 0.0011825835332274436, "step": 152620 }, { "epoch": 43.32387170025547, "grad_norm": 0.05968506634235382, "learning_rate": 5.669372693726937e-05, "loss": 0.0009108109399676323, "step": 152630 }, { "epoch": 43.32671019017882, "grad_norm": 0.2397923469543457, "learning_rate": 5.6690888447346014e-05, "loss": 0.0010595696046948432, "step": 152640 }, { "epoch": 43.329548680102185, "grad_norm": 0.00596369756385684, "learning_rate": 5.668804995742265e-05, "loss": 0.0006328945979475975, "step": 152650 }, { "epoch": 43.33238717002555, "grad_norm": 1.1821368932724, "learning_rate": 5.668521146749929e-05, "loss": 0.00040476880967617036, "step": 152660 }, { "epoch": 43.335225659948904, "grad_norm": 0.007997938431799412, "learning_rate": 5.668237297757594e-05, "loss": 0.007238224893808365, "step": 152670 }, { "epoch": 43.33806414987227, "grad_norm": 0.0048398664221167564, "learning_rate": 5.667953448765257e-05, "loss": 0.008678990602493285, "step": 152680 }, { "epoch": 43.34090263979563, "grad_norm": 0.07835734635591507, "learning_rate": 5.6676695997729214e-05, "loss": 0.0015766674652695656, "step": 152690 }, { "epoch": 43.34374112971899, "grad_norm": 0.006689738482236862, "learning_rate": 5.667385750780585e-05, "loss": 0.0030457964166998863, "step": 152700 }, { "epoch": 43.34657961964235, "grad_norm": 0.9607021808624268, "learning_rate": 5.667101901788249e-05, "loss": 0.018563923239707947, "step": 152710 }, { "epoch": 43.34941810956571, "grad_norm": 0.2654072344303131, "learning_rate": 5.6668180527959125e-05, "loss": 0.003859967365860939, "step": 152720 }, { "epoch": 43.352256599489074, "grad_norm": 0.03662710636854172, "learning_rate": 5.666534203803576e-05, "loss": 0.00040495283901691436, "step": 152730 }, { "epoch": 43.35509508941243, "grad_norm": 0.0533829964697361, "learning_rate": 5.666250354811241e-05, "loss": 0.0016849184408783912, "step": 152740 }, { "epoch": 43.35793357933579, "grad_norm": 0.39077144861221313, "learning_rate": 5.665966505818905e-05, "loss": 0.004461916163563728, "step": 152750 }, { "epoch": 43.360772069259156, "grad_norm": 0.059943266212940216, "learning_rate": 5.6656826568265684e-05, "loss": 0.00066349096596241, "step": 152760 }, { "epoch": 43.36361055918251, "grad_norm": 1.8989614248275757, "learning_rate": 5.6653988078342325e-05, "loss": 0.0010208329185843468, "step": 152770 }, { "epoch": 43.366449049105874, "grad_norm": 0.09664318710565567, "learning_rate": 5.665114958841896e-05, "loss": 0.0022865777835249903, "step": 152780 }, { "epoch": 43.36928753902924, "grad_norm": 0.08086778223514557, "learning_rate": 5.66483110984956e-05, "loss": 0.000776146911084652, "step": 152790 }, { "epoch": 43.3721260289526, "grad_norm": 0.02025444060564041, "learning_rate": 5.664547260857225e-05, "loss": 0.010076656937599182, "step": 152800 }, { "epoch": 43.374964518875956, "grad_norm": 0.0895228236913681, "learning_rate": 5.6642634118648884e-05, "loss": 0.0005845125764608383, "step": 152810 }, { "epoch": 43.37780300879932, "grad_norm": 0.21376478672027588, "learning_rate": 5.6639795628725525e-05, "loss": 0.00029534101486206055, "step": 152820 }, { "epoch": 43.38064149872268, "grad_norm": 0.015170397236943245, "learning_rate": 5.663695713880216e-05, "loss": 0.0005310231819748879, "step": 152830 }, { "epoch": 43.38347998864604, "grad_norm": 0.2735845148563385, "learning_rate": 5.6634118648878794e-05, "loss": 0.0006800740957260132, "step": 152840 }, { "epoch": 43.3863184785694, "grad_norm": 0.0382818765938282, "learning_rate": 5.6631280158955436e-05, "loss": 0.0014265848323702813, "step": 152850 }, { "epoch": 43.38915696849276, "grad_norm": 0.053997885435819626, "learning_rate": 5.662844166903207e-05, "loss": 0.0001558832824230194, "step": 152860 }, { "epoch": 43.391995458416126, "grad_norm": 0.06009572371840477, "learning_rate": 5.662560317910872e-05, "loss": 0.0010416410863399506, "step": 152870 }, { "epoch": 43.39483394833948, "grad_norm": 1.127760648727417, "learning_rate": 5.662276468918536e-05, "loss": 0.0008127793669700622, "step": 152880 }, { "epoch": 43.397672438262845, "grad_norm": 0.008454595692455769, "learning_rate": 5.6619926199261995e-05, "loss": 0.004092592000961304, "step": 152890 }, { "epoch": 43.40051092818621, "grad_norm": 0.855063796043396, "learning_rate": 5.6617087709338636e-05, "loss": 0.0017206262797117232, "step": 152900 }, { "epoch": 43.40334941810956, "grad_norm": 1.3849399089813232, "learning_rate": 5.661424921941527e-05, "loss": 0.002251453697681427, "step": 152910 }, { "epoch": 43.406187908032926, "grad_norm": 0.2876548767089844, "learning_rate": 5.661141072949191e-05, "loss": 0.004437950998544693, "step": 152920 }, { "epoch": 43.40902639795629, "grad_norm": 0.09994953125715256, "learning_rate": 5.6608572239568546e-05, "loss": 0.0009996313601732254, "step": 152930 }, { "epoch": 43.411864887879645, "grad_norm": 0.013796444982290268, "learning_rate": 5.6605733749645195e-05, "loss": 0.0002330660820007324, "step": 152940 }, { "epoch": 43.41470337780301, "grad_norm": 0.03399978205561638, "learning_rate": 5.6602895259721836e-05, "loss": 0.0006151346489787102, "step": 152950 }, { "epoch": 43.41754186772637, "grad_norm": 0.00628358218818903, "learning_rate": 5.660005676979847e-05, "loss": 0.013794597983360291, "step": 152960 }, { "epoch": 43.420380357649734, "grad_norm": 0.16903670132160187, "learning_rate": 5.6597218279875105e-05, "loss": 0.0002290681004524231, "step": 152970 }, { "epoch": 43.42321884757309, "grad_norm": 0.30709704756736755, "learning_rate": 5.6594379789951747e-05, "loss": 0.0012572724372148514, "step": 152980 }, { "epoch": 43.42605733749645, "grad_norm": 0.04028359428048134, "learning_rate": 5.659154130002838e-05, "loss": 0.0006210586056113243, "step": 152990 }, { "epoch": 43.428895827419815, "grad_norm": 0.021022673696279526, "learning_rate": 5.658870281010503e-05, "loss": 0.0012198779731988907, "step": 153000 }, { "epoch": 43.428895827419815, "eval_accuracy": 0.9803522604438227, "eval_loss": 0.07423359155654907, "eval_runtime": 34.6442, "eval_samples_per_second": 453.958, "eval_steps_per_second": 7.101, "step": 153000 }, { "epoch": 43.43173431734317, "grad_norm": 10.808751106262207, "learning_rate": 5.658586432018167e-05, "loss": 0.009641854465007782, "step": 153010 }, { "epoch": 43.434572807266534, "grad_norm": 0.7161473631858826, "learning_rate": 5.6583025830258305e-05, "loss": 0.0004636567085981369, "step": 153020 }, { "epoch": 43.4374112971899, "grad_norm": 0.8092128038406372, "learning_rate": 5.658018734033495e-05, "loss": 0.0007798656821250916, "step": 153030 }, { "epoch": 43.44024978711325, "grad_norm": 0.027301989495754242, "learning_rate": 5.657734885041158e-05, "loss": 0.0023845100775361063, "step": 153040 }, { "epoch": 43.443088277036615, "grad_norm": 0.1159922182559967, "learning_rate": 5.657451036048822e-05, "loss": 0.0002569224685430527, "step": 153050 }, { "epoch": 43.44592676695998, "grad_norm": 0.02436135709285736, "learning_rate": 5.657167187056486e-05, "loss": 0.0037109911441802978, "step": 153060 }, { "epoch": 43.44876525688334, "grad_norm": 0.10706202685832977, "learning_rate": 5.6568833380641505e-05, "loss": 0.0008087849244475364, "step": 153070 }, { "epoch": 43.4516037468067, "grad_norm": 0.31608352065086365, "learning_rate": 5.656599489071815e-05, "loss": 0.011767198145389558, "step": 153080 }, { "epoch": 43.45444223673006, "grad_norm": 0.9450284242630005, "learning_rate": 5.656315640079478e-05, "loss": 0.0005815163254737854, "step": 153090 }, { "epoch": 43.45728072665342, "grad_norm": 0.10696419328451157, "learning_rate": 5.6560317910871416e-05, "loss": 0.000531616434454918, "step": 153100 }, { "epoch": 43.46011921657678, "grad_norm": 0.03840429708361626, "learning_rate": 5.655747942094806e-05, "loss": 0.00868912935256958, "step": 153110 }, { "epoch": 43.46295770650014, "grad_norm": 0.047803301364183426, "learning_rate": 5.655464093102469e-05, "loss": 0.0027926849201321603, "step": 153120 }, { "epoch": 43.465796196423504, "grad_norm": 0.04814853146672249, "learning_rate": 5.655180244110133e-05, "loss": 0.0003755349665880203, "step": 153130 }, { "epoch": 43.46863468634686, "grad_norm": 0.034356486052274704, "learning_rate": 5.654896395117798e-05, "loss": 0.000433717668056488, "step": 153140 }, { "epoch": 43.47147317627022, "grad_norm": 0.2660091519355774, "learning_rate": 5.6546125461254616e-05, "loss": 0.0007601298391819, "step": 153150 }, { "epoch": 43.474311666193586, "grad_norm": 12.376680374145508, "learning_rate": 5.654328697133126e-05, "loss": 0.006318332254886627, "step": 153160 }, { "epoch": 43.47715015611695, "grad_norm": 0.21779540181159973, "learning_rate": 5.654044848140789e-05, "loss": 0.0016677239909768105, "step": 153170 }, { "epoch": 43.479988646040304, "grad_norm": 0.404398113489151, "learning_rate": 5.6537609991484533e-05, "loss": 0.0020261041820049284, "step": 153180 }, { "epoch": 43.48282713596367, "grad_norm": 1.871950626373291, "learning_rate": 5.653477150156117e-05, "loss": 0.007501987367868423, "step": 153190 }, { "epoch": 43.48566562588703, "grad_norm": 6.910791397094727, "learning_rate": 5.6531933011637816e-05, "loss": 0.01996311843395233, "step": 153200 }, { "epoch": 43.488504115810386, "grad_norm": 1.9497021436691284, "learning_rate": 5.652909452171445e-05, "loss": 0.0015448413789272309, "step": 153210 }, { "epoch": 43.49134260573375, "grad_norm": 0.10950101912021637, "learning_rate": 5.652625603179109e-05, "loss": 0.004636970907449722, "step": 153220 }, { "epoch": 43.49418109565711, "grad_norm": 0.15537582337856293, "learning_rate": 5.652341754186773e-05, "loss": 0.0005324946716427803, "step": 153230 }, { "epoch": 43.497019585580475, "grad_norm": 0.013595640659332275, "learning_rate": 5.652057905194437e-05, "loss": 0.0025098394602537156, "step": 153240 }, { "epoch": 43.49985807550383, "grad_norm": 0.19961939752101898, "learning_rate": 5.6517740562021e-05, "loss": 0.001434594765305519, "step": 153250 }, { "epoch": 43.50269656542719, "grad_norm": 0.005431048572063446, "learning_rate": 5.6514902072097644e-05, "loss": 0.004250634461641312, "step": 153260 }, { "epoch": 43.505535055350556, "grad_norm": 0.06334385275840759, "learning_rate": 5.651206358217429e-05, "loss": 0.000761803612112999, "step": 153270 }, { "epoch": 43.50837354527391, "grad_norm": 0.07640001177787781, "learning_rate": 5.650922509225093e-05, "loss": 0.0038008332252502442, "step": 153280 }, { "epoch": 43.511212035197275, "grad_norm": 0.10814528167247772, "learning_rate": 5.650638660232757e-05, "loss": 0.006379276514053345, "step": 153290 }, { "epoch": 43.51405052512064, "grad_norm": 0.37663936614990234, "learning_rate": 5.65035481124042e-05, "loss": 0.0123762845993042, "step": 153300 }, { "epoch": 43.51688901504399, "grad_norm": 0.13418583571910858, "learning_rate": 5.650070962248084e-05, "loss": 0.0006143009290099144, "step": 153310 }, { "epoch": 43.519727504967356, "grad_norm": 0.0813075453042984, "learning_rate": 5.649787113255748e-05, "loss": 0.0010774379596114158, "step": 153320 }, { "epoch": 43.52256599489072, "grad_norm": 0.0070370095781981945, "learning_rate": 5.6495032642634113e-05, "loss": 0.0003204280510544777, "step": 153330 }, { "epoch": 43.52540448481408, "grad_norm": 0.0838664323091507, "learning_rate": 5.649219415271076e-05, "loss": 0.0025504091754555702, "step": 153340 }, { "epoch": 43.52824297473744, "grad_norm": 2.976020336151123, "learning_rate": 5.64893556627874e-05, "loss": 0.0009870415553450585, "step": 153350 }, { "epoch": 43.5310814646608, "grad_norm": 0.04180997982621193, "learning_rate": 5.648651717286404e-05, "loss": 0.004081838577985763, "step": 153360 }, { "epoch": 43.533919954584164, "grad_norm": 0.25737103819847107, "learning_rate": 5.648367868294068e-05, "loss": 0.0007884806022047996, "step": 153370 }, { "epoch": 43.53675844450752, "grad_norm": 0.01495260838419199, "learning_rate": 5.6480840193017313e-05, "loss": 0.00030612554401159285, "step": 153380 }, { "epoch": 43.53959693443088, "grad_norm": 0.029840264469385147, "learning_rate": 5.6478001703093955e-05, "loss": 0.0049653973430395125, "step": 153390 }, { "epoch": 43.542435424354245, "grad_norm": 0.08333849161863327, "learning_rate": 5.64751632131706e-05, "loss": 0.0007930740714073182, "step": 153400 }, { "epoch": 43.5452739142776, "grad_norm": 0.36890193819999695, "learning_rate": 5.647232472324724e-05, "loss": 0.0017783109098672867, "step": 153410 }, { "epoch": 43.548112404200964, "grad_norm": 0.014134321361780167, "learning_rate": 5.646948623332388e-05, "loss": 0.001893671229481697, "step": 153420 }, { "epoch": 43.55095089412433, "grad_norm": 13.349177360534668, "learning_rate": 5.6466647743400514e-05, "loss": 0.012241728603839874, "step": 153430 }, { "epoch": 43.55378938404769, "grad_norm": 0.017002278938889503, "learning_rate": 5.646380925347715e-05, "loss": 0.0005123298615217209, "step": 153440 }, { "epoch": 43.556627873971046, "grad_norm": 0.02654067799448967, "learning_rate": 5.646097076355379e-05, "loss": 0.0008165098726749421, "step": 153450 }, { "epoch": 43.55946636389441, "grad_norm": 0.21465371549129486, "learning_rate": 5.6458132273630424e-05, "loss": 0.02145056426525116, "step": 153460 }, { "epoch": 43.56230485381777, "grad_norm": 0.042155180126428604, "learning_rate": 5.645529378370707e-05, "loss": 0.0008059868589043618, "step": 153470 }, { "epoch": 43.56514334374113, "grad_norm": 0.2699970006942749, "learning_rate": 5.6452455293783714e-05, "loss": 0.003462113067507744, "step": 153480 }, { "epoch": 43.56798183366449, "grad_norm": 4.923833847045898, "learning_rate": 5.644961680386035e-05, "loss": 0.008399033546447754, "step": 153490 }, { "epoch": 43.57082032358785, "grad_norm": 7.879570484161377, "learning_rate": 5.644677831393699e-05, "loss": 0.0065297380089759825, "step": 153500 }, { "epoch": 43.57082032358785, "eval_accuracy": 0.9771094296432886, "eval_loss": 0.08846405893564224, "eval_runtime": 35.0973, "eval_samples_per_second": 448.097, "eval_steps_per_second": 7.009, "step": 153500 }, { "epoch": 43.57365881351121, "grad_norm": 0.02610708214342594, "learning_rate": 5.6443939824013624e-05, "loss": 0.025878366827964783, "step": 153510 }, { "epoch": 43.57649730343457, "grad_norm": 0.06952115148305893, "learning_rate": 5.6441101334090266e-05, "loss": 0.001183454692363739, "step": 153520 }, { "epoch": 43.579335793357934, "grad_norm": 3.785395622253418, "learning_rate": 5.6438262844166914e-05, "loss": 0.0024497950449585914, "step": 153530 }, { "epoch": 43.5821742832813, "grad_norm": 1.664858341217041, "learning_rate": 5.643542435424355e-05, "loss": 0.002590658701956272, "step": 153540 }, { "epoch": 43.58501277320465, "grad_norm": 1.8620352745056152, "learning_rate": 5.643258586432019e-05, "loss": 0.006890442967414856, "step": 153550 }, { "epoch": 43.587851263128016, "grad_norm": 0.016927745193243027, "learning_rate": 5.6429747374396824e-05, "loss": 0.0013885872438549995, "step": 153560 }, { "epoch": 43.59068975305138, "grad_norm": 0.06242585554718971, "learning_rate": 5.642690888447346e-05, "loss": 0.003155702352523804, "step": 153570 }, { "epoch": 43.593528242974735, "grad_norm": 1.0647488832473755, "learning_rate": 5.64240703945501e-05, "loss": 0.004752069711685181, "step": 153580 }, { "epoch": 43.5963667328981, "grad_norm": 0.02889789082109928, "learning_rate": 5.6421231904626735e-05, "loss": 0.010921470820903778, "step": 153590 }, { "epoch": 43.59920522282146, "grad_norm": 1.397362470626831, "learning_rate": 5.641839341470338e-05, "loss": 0.0028692752122879027, "step": 153600 }, { "epoch": 43.602043712744816, "grad_norm": 0.04782985523343086, "learning_rate": 5.6415554924780024e-05, "loss": 0.0033182330429553986, "step": 153610 }, { "epoch": 43.60488220266818, "grad_norm": 0.2867395877838135, "learning_rate": 5.641271643485666e-05, "loss": 0.004737193137407303, "step": 153620 }, { "epoch": 43.60772069259154, "grad_norm": 7.007397651672363, "learning_rate": 5.64098779449333e-05, "loss": 0.002075057476758957, "step": 153630 }, { "epoch": 43.610559182514905, "grad_norm": 18.095151901245117, "learning_rate": 5.6407039455009935e-05, "loss": 0.015291669964790344, "step": 153640 }, { "epoch": 43.61339767243826, "grad_norm": 0.06479736417531967, "learning_rate": 5.6404200965086576e-05, "loss": 0.004870763048529625, "step": 153650 }, { "epoch": 43.61623616236162, "grad_norm": 0.047427501529455185, "learning_rate": 5.640136247516321e-05, "loss": 0.0010550571605563164, "step": 153660 }, { "epoch": 43.619074652284986, "grad_norm": 0.06401299685239792, "learning_rate": 5.639852398523986e-05, "loss": 0.011502717435359956, "step": 153670 }, { "epoch": 43.62191314220834, "grad_norm": 1.638598084449768, "learning_rate": 5.6395685495316494e-05, "loss": 0.010948451608419419, "step": 153680 }, { "epoch": 43.624751632131705, "grad_norm": 0.09966938197612762, "learning_rate": 5.6392847005393135e-05, "loss": 0.010276015847921371, "step": 153690 }, { "epoch": 43.62759012205507, "grad_norm": 0.52044677734375, "learning_rate": 5.639000851546977e-05, "loss": 0.0012634491547942161, "step": 153700 }, { "epoch": 43.63042861197843, "grad_norm": 0.08923406153917313, "learning_rate": 5.638717002554641e-05, "loss": 0.0014402419328689575, "step": 153710 }, { "epoch": 43.63326710190179, "grad_norm": 0.30465230345726013, "learning_rate": 5.6384331535623046e-05, "loss": 0.0029221348464488982, "step": 153720 }, { "epoch": 43.63610559182515, "grad_norm": 0.5428586602210999, "learning_rate": 5.6381493045699694e-05, "loss": 0.00541597530245781, "step": 153730 }, { "epoch": 43.63894408174851, "grad_norm": 0.1508817821741104, "learning_rate": 5.6378654555776335e-05, "loss": 0.0025826672092080115, "step": 153740 }, { "epoch": 43.64178257167187, "grad_norm": 0.05312874913215637, "learning_rate": 5.637581606585297e-05, "loss": 0.00034854747354984283, "step": 153750 }, { "epoch": 43.64462106159523, "grad_norm": 0.06541154533624649, "learning_rate": 5.637297757592961e-05, "loss": 0.003376702964305878, "step": 153760 }, { "epoch": 43.647459551518594, "grad_norm": 0.5741783976554871, "learning_rate": 5.6370139086006246e-05, "loss": 0.0012398447841405868, "step": 153770 }, { "epoch": 43.65029804144195, "grad_norm": 0.1173282340168953, "learning_rate": 5.636730059608288e-05, "loss": 0.0026853304356336594, "step": 153780 }, { "epoch": 43.65313653136531, "grad_norm": 0.17960786819458008, "learning_rate": 5.636446210615952e-05, "loss": 0.0004866115748882294, "step": 153790 }, { "epoch": 43.655975021288675, "grad_norm": 0.018428990617394447, "learning_rate": 5.63619074652285e-05, "loss": 0.017810125648975373, "step": 153800 }, { "epoch": 43.65881351121204, "grad_norm": 0.2471126765012741, "learning_rate": 5.635906897530514e-05, "loss": 0.002628660947084427, "step": 153810 }, { "epoch": 43.661652001135394, "grad_norm": 1.2696048021316528, "learning_rate": 5.635623048538178e-05, "loss": 0.0009449474513530731, "step": 153820 }, { "epoch": 43.66449049105876, "grad_norm": 1.7761489152908325, "learning_rate": 5.635339199545841e-05, "loss": 0.0015598241239786147, "step": 153830 }, { "epoch": 43.66732898098212, "grad_norm": 0.016332322731614113, "learning_rate": 5.6350553505535054e-05, "loss": 0.0076208412647247314, "step": 153840 }, { "epoch": 43.670167470905476, "grad_norm": 0.02615770883858204, "learning_rate": 5.63477150156117e-05, "loss": 0.013192364573478698, "step": 153850 }, { "epoch": 43.67300596082884, "grad_norm": 0.6375195980072021, "learning_rate": 5.6344876525688336e-05, "loss": 0.0013165174052119255, "step": 153860 }, { "epoch": 43.6758444507522, "grad_norm": 0.0693691074848175, "learning_rate": 5.634203803576498e-05, "loss": 0.0008381053805351258, "step": 153870 }, { "epoch": 43.67868294067556, "grad_norm": 0.03240704908967018, "learning_rate": 5.633919954584161e-05, "loss": 0.0004123715683817863, "step": 153880 }, { "epoch": 43.68152143059892, "grad_norm": 0.4099147319793701, "learning_rate": 5.6336361055918254e-05, "loss": 0.005307330936193466, "step": 153890 }, { "epoch": 43.68435992052228, "grad_norm": 0.031020937487483025, "learning_rate": 5.633352256599489e-05, "loss": 0.0012893477454781533, "step": 153900 }, { "epoch": 43.687198410445646, "grad_norm": 0.009969227015972137, "learning_rate": 5.6330684076071537e-05, "loss": 0.0035874687135219576, "step": 153910 }, { "epoch": 43.690036900369, "grad_norm": 0.002911313669756055, "learning_rate": 5.632784558614818e-05, "loss": 0.0003688495606184006, "step": 153920 }, { "epoch": 43.692875390292365, "grad_norm": 0.37219274044036865, "learning_rate": 5.632500709622481e-05, "loss": 0.0014486737549304963, "step": 153930 }, { "epoch": 43.69571388021573, "grad_norm": 0.004114745184779167, "learning_rate": 5.6322168606301454e-05, "loss": 0.0022539906203746796, "step": 153940 }, { "epoch": 43.69855237013908, "grad_norm": 0.008129196241497993, "learning_rate": 5.631933011637809e-05, "loss": 0.0008590605109930038, "step": 153950 }, { "epoch": 43.701390860062446, "grad_norm": 1.5870591402053833, "learning_rate": 5.631649162645472e-05, "loss": 0.00706782266497612, "step": 153960 }, { "epoch": 43.70422934998581, "grad_norm": 0.3544924855232239, "learning_rate": 5.6313653136531364e-05, "loss": 0.008196482062339782, "step": 153970 }, { "epoch": 43.70706783990917, "grad_norm": 11.04712200164795, "learning_rate": 5.631081464660801e-05, "loss": 0.004263366013765335, "step": 153980 }, { "epoch": 43.70990632983253, "grad_norm": 0.028074584901332855, "learning_rate": 5.630797615668465e-05, "loss": 0.0015650179237127304, "step": 153990 }, { "epoch": 43.71274481975589, "grad_norm": 0.580682098865509, "learning_rate": 5.630513766676129e-05, "loss": 0.0009681770578026771, "step": 154000 }, { "epoch": 43.71274481975589, "eval_accuracy": 0.9818782984676034, "eval_loss": 0.07160479575395584, "eval_runtime": 34.0657, "eval_samples_per_second": 461.666, "eval_steps_per_second": 7.221, "step": 154000 }, { "epoch": 43.71558330967925, "grad_norm": 0.022493500262498856, "learning_rate": 5.630229917683792e-05, "loss": 0.0020832590758800506, "step": 154010 }, { "epoch": 43.71842179960261, "grad_norm": 0.07028790563344955, "learning_rate": 5.6299460686914565e-05, "loss": 0.0033430114388465883, "step": 154020 }, { "epoch": 43.72126028952597, "grad_norm": 0.02020389772951603, "learning_rate": 5.62966221969912e-05, "loss": 0.01097070574760437, "step": 154030 }, { "epoch": 43.724098779449335, "grad_norm": 0.05483675003051758, "learning_rate": 5.629378370706784e-05, "loss": 0.004776961356401444, "step": 154040 }, { "epoch": 43.72693726937269, "grad_norm": 0.012218831107020378, "learning_rate": 5.629094521714449e-05, "loss": 0.004128340259194374, "step": 154050 }, { "epoch": 43.729775759296054, "grad_norm": 6.25172758102417, "learning_rate": 5.628810672722112e-05, "loss": 0.006833499670028687, "step": 154060 }, { "epoch": 43.73261424921942, "grad_norm": 0.15857407450675964, "learning_rate": 5.6285268237297765e-05, "loss": 0.0018059542402625085, "step": 154070 }, { "epoch": 43.73545273914278, "grad_norm": 0.2582097053527832, "learning_rate": 5.62824297473744e-05, "loss": 0.018608731031417847, "step": 154080 }, { "epoch": 43.738291229066135, "grad_norm": 0.019614001736044884, "learning_rate": 5.6279591257451034e-05, "loss": 0.0009627986699342727, "step": 154090 }, { "epoch": 43.7411297189895, "grad_norm": 0.6917924880981445, "learning_rate": 5.6276752767527675e-05, "loss": 0.0005550341680645943, "step": 154100 }, { "epoch": 43.74396820891286, "grad_norm": 0.38288331031799316, "learning_rate": 5.627391427760432e-05, "loss": 0.007871776074171066, "step": 154110 }, { "epoch": 43.74680669883622, "grad_norm": 0.04757211357355118, "learning_rate": 5.627107578768096e-05, "loss": 0.0034960828721523284, "step": 154120 }, { "epoch": 43.74964518875958, "grad_norm": 0.04697048291563988, "learning_rate": 5.62682372977576e-05, "loss": 0.000641869381070137, "step": 154130 }, { "epoch": 43.75248367868294, "grad_norm": 0.010776704177260399, "learning_rate": 5.6265398807834234e-05, "loss": 0.0014316676184535026, "step": 154140 }, { "epoch": 43.7553221686063, "grad_norm": 0.21400155127048492, "learning_rate": 5.6262560317910875e-05, "loss": 0.0002763040363788605, "step": 154150 }, { "epoch": 43.75816065852966, "grad_norm": 0.003396968822926283, "learning_rate": 5.625972182798751e-05, "loss": 0.00021591279655694962, "step": 154160 }, { "epoch": 43.760999148453024, "grad_norm": 0.5492722988128662, "learning_rate": 5.625688333806415e-05, "loss": 0.0019268158823251725, "step": 154170 }, { "epoch": 43.76383763837639, "grad_norm": 0.020307617262005806, "learning_rate": 5.62540448481408e-05, "loss": 0.0021465888246893884, "step": 154180 }, { "epoch": 43.76667612829974, "grad_norm": 0.148207426071167, "learning_rate": 5.6251206358217434e-05, "loss": 0.00044733807444572447, "step": 154190 }, { "epoch": 43.769514618223106, "grad_norm": 0.4104233682155609, "learning_rate": 5.624836786829407e-05, "loss": 0.0012068521231412888, "step": 154200 }, { "epoch": 43.77235310814647, "grad_norm": 0.1223101019859314, "learning_rate": 5.624552937837071e-05, "loss": 0.00029516033828258514, "step": 154210 }, { "epoch": 43.775191598069824, "grad_norm": 0.021492410451173782, "learning_rate": 5.6242690888447345e-05, "loss": 0.0004895489662885665, "step": 154220 }, { "epoch": 43.77803008799319, "grad_norm": 0.08369467407464981, "learning_rate": 5.6239852398523986e-05, "loss": 0.00019669625908136368, "step": 154230 }, { "epoch": 43.78086857791655, "grad_norm": 1.202041745185852, "learning_rate": 5.623701390860062e-05, "loss": 0.001467658020555973, "step": 154240 }, { "epoch": 43.783707067839906, "grad_norm": 0.17881910502910614, "learning_rate": 5.623417541867727e-05, "loss": 0.0006838161498308182, "step": 154250 }, { "epoch": 43.78654555776327, "grad_norm": 0.2440555840730667, "learning_rate": 5.623133692875391e-05, "loss": 0.005201253667473793, "step": 154260 }, { "epoch": 43.78938404768663, "grad_norm": 0.05330654978752136, "learning_rate": 5.6228498438830545e-05, "loss": 0.0029523430392146112, "step": 154270 }, { "epoch": 43.792222537609995, "grad_norm": 0.04155469685792923, "learning_rate": 5.6225659948907186e-05, "loss": 0.003963040560483933, "step": 154280 }, { "epoch": 43.79506102753335, "grad_norm": 0.023605775088071823, "learning_rate": 5.622282145898382e-05, "loss": 0.0016544802114367486, "step": 154290 }, { "epoch": 43.79789951745671, "grad_norm": 0.18279320001602173, "learning_rate": 5.6219982969060455e-05, "loss": 0.0007481120526790618, "step": 154300 }, { "epoch": 43.800738007380076, "grad_norm": 0.0048071881756186485, "learning_rate": 5.621714447913711e-05, "loss": 0.0027427805587649347, "step": 154310 }, { "epoch": 43.80357649730343, "grad_norm": 0.11387521028518677, "learning_rate": 5.6214305989213745e-05, "loss": 0.0002580597996711731, "step": 154320 }, { "epoch": 43.806414987226795, "grad_norm": 0.17932264506816864, "learning_rate": 5.621146749929038e-05, "loss": 0.0008125657215714455, "step": 154330 }, { "epoch": 43.80925347715016, "grad_norm": 1.0526673793792725, "learning_rate": 5.620862900936702e-05, "loss": 0.0009213188663125038, "step": 154340 }, { "epoch": 43.81209196707351, "grad_norm": 0.2575249671936035, "learning_rate": 5.6205790519443655e-05, "loss": 0.00040987487882375716, "step": 154350 }, { "epoch": 43.814930456996876, "grad_norm": 0.07897946238517761, "learning_rate": 5.62029520295203e-05, "loss": 0.00030170883983373644, "step": 154360 }, { "epoch": 43.81776894692024, "grad_norm": 0.05116071552038193, "learning_rate": 5.620011353959693e-05, "loss": 0.0006183471530675888, "step": 154370 }, { "epoch": 43.8206074368436, "grad_norm": 0.0812288150191307, "learning_rate": 5.619727504967358e-05, "loss": 0.00022416114807128907, "step": 154380 }, { "epoch": 43.82344592676696, "grad_norm": 0.02419820986688137, "learning_rate": 5.619443655975022e-05, "loss": 0.00019300412386655808, "step": 154390 }, { "epoch": 43.82628441669032, "grad_norm": 0.01854860596358776, "learning_rate": 5.6191598069826855e-05, "loss": 0.00012232959270477294, "step": 154400 }, { "epoch": 43.829122906613684, "grad_norm": 0.01148872822523117, "learning_rate": 5.61887595799035e-05, "loss": 0.0005028080195188522, "step": 154410 }, { "epoch": 43.83196139653704, "grad_norm": 0.12528418004512787, "learning_rate": 5.618592108998013e-05, "loss": 0.0003821166232228279, "step": 154420 }, { "epoch": 43.8347998864604, "grad_norm": 0.09467723220586777, "learning_rate": 5.6183082600056766e-05, "loss": 0.0002961607649922371, "step": 154430 }, { "epoch": 43.837638376383765, "grad_norm": 0.773350179195404, "learning_rate": 5.618024411013341e-05, "loss": 0.0016979165375232697, "step": 154440 }, { "epoch": 43.84047686630713, "grad_norm": 0.04346238449215889, "learning_rate": 5.6177405620210056e-05, "loss": 0.002946471609175205, "step": 154450 }, { "epoch": 43.843315356230484, "grad_norm": 0.03723211586475372, "learning_rate": 5.617456713028669e-05, "loss": 0.0005447156727313995, "step": 154460 }, { "epoch": 43.84615384615385, "grad_norm": 0.24650727212429047, "learning_rate": 5.617172864036333e-05, "loss": 0.0004453670233488083, "step": 154470 }, { "epoch": 43.84899233607721, "grad_norm": 0.19682954251766205, "learning_rate": 5.6168890150439966e-05, "loss": 0.0008248711004853248, "step": 154480 }, { "epoch": 43.851830826000565, "grad_norm": 0.419793039560318, "learning_rate": 5.616605166051661e-05, "loss": 0.0007764257490634919, "step": 154490 }, { "epoch": 43.85466931592393, "grad_norm": 0.7077958583831787, "learning_rate": 5.616321317059324e-05, "loss": 0.001727164164185524, "step": 154500 }, { "epoch": 43.85466931592393, "eval_accuracy": 0.978953392255357, "eval_loss": 0.0777365043759346, "eval_runtime": 34.9985, "eval_samples_per_second": 449.362, "eval_steps_per_second": 7.029, "step": 154500 }, { "epoch": 43.85750780584729, "grad_norm": 0.01504975650459528, "learning_rate": 5.616037468066989e-05, "loss": 0.0006213514134287834, "step": 154510 }, { "epoch": 43.86034629577065, "grad_norm": 10.18017578125, "learning_rate": 5.615753619074653e-05, "loss": 0.012781348824501038, "step": 154520 }, { "epoch": 43.86318478569401, "grad_norm": 0.017869001254439354, "learning_rate": 5.6154697700823166e-05, "loss": 0.0014596624299883843, "step": 154530 }, { "epoch": 43.86602327561737, "grad_norm": 0.012217680923640728, "learning_rate": 5.615185921089981e-05, "loss": 0.006482252478599548, "step": 154540 }, { "epoch": 43.868861765540736, "grad_norm": 0.02577430009841919, "learning_rate": 5.614902072097644e-05, "loss": 0.0022085361182689668, "step": 154550 }, { "epoch": 43.87170025546409, "grad_norm": 1.9286187887191772, "learning_rate": 5.614646608004542e-05, "loss": 0.005127398297190666, "step": 154560 }, { "epoch": 43.874538745387454, "grad_norm": 0.05924082174897194, "learning_rate": 5.6143627590122064e-05, "loss": 0.004058090597391128, "step": 154570 }, { "epoch": 43.87737723531082, "grad_norm": 0.06493588536977768, "learning_rate": 5.61407891001987e-05, "loss": 0.0019420266151428222, "step": 154580 }, { "epoch": 43.88021572523417, "grad_norm": 0.0769554153084755, "learning_rate": 5.613795061027533e-05, "loss": 0.007860396802425385, "step": 154590 }, { "epoch": 43.883054215157536, "grad_norm": 6.6370697021484375, "learning_rate": 5.6135112120351974e-05, "loss": 0.00230832826346159, "step": 154600 }, { "epoch": 43.8858927050809, "grad_norm": 0.06038786843419075, "learning_rate": 5.613227363042861e-05, "loss": 0.003609666973352432, "step": 154610 }, { "epoch": 43.888731195004254, "grad_norm": 0.4096817672252655, "learning_rate": 5.612943514050525e-05, "loss": 0.004236669838428497, "step": 154620 }, { "epoch": 43.89156968492762, "grad_norm": 0.17260317504405975, "learning_rate": 5.61265966505819e-05, "loss": 0.007374207675457001, "step": 154630 }, { "epoch": 43.89440817485098, "grad_norm": 0.05516205355525017, "learning_rate": 5.612375816065853e-05, "loss": 0.0021448083221912383, "step": 154640 }, { "epoch": 43.89724666477434, "grad_norm": 0.8603276014328003, "learning_rate": 5.6120919670735174e-05, "loss": 0.0015929367393255234, "step": 154650 }, { "epoch": 43.9000851546977, "grad_norm": 1.5562461614608765, "learning_rate": 5.611808118081181e-05, "loss": 0.0009366409853100776, "step": 154660 }, { "epoch": 43.90292364462106, "grad_norm": 0.025142258033156395, "learning_rate": 5.611524269088845e-05, "loss": 0.001723792962729931, "step": 154670 }, { "epoch": 43.905762134544425, "grad_norm": 0.04074539244174957, "learning_rate": 5.6112404200965085e-05, "loss": 0.0009798139333724975, "step": 154680 }, { "epoch": 43.90860062446778, "grad_norm": 0.019344383850693703, "learning_rate": 5.610956571104173e-05, "loss": 0.0011826109141111374, "step": 154690 }, { "epoch": 43.91143911439114, "grad_norm": 0.1750621199607849, "learning_rate": 5.6106727221118374e-05, "loss": 0.008888573199510575, "step": 154700 }, { "epoch": 43.914277604314506, "grad_norm": 0.29634588956832886, "learning_rate": 5.610388873119501e-05, "loss": 0.0016735559329390526, "step": 154710 }, { "epoch": 43.91711609423786, "grad_norm": 0.015468505211174488, "learning_rate": 5.6101050241271643e-05, "loss": 0.0011156648397445679, "step": 154720 }, { "epoch": 43.919954584161225, "grad_norm": 1.7920782566070557, "learning_rate": 5.6098211751348285e-05, "loss": 0.009108118712902069, "step": 154730 }, { "epoch": 43.92279307408459, "grad_norm": 0.03695196658372879, "learning_rate": 5.609537326142492e-05, "loss": 0.012342369556427002, "step": 154740 }, { "epoch": 43.92563156400795, "grad_norm": 0.07239231467247009, "learning_rate": 5.609253477150156e-05, "loss": 0.0008373336866497993, "step": 154750 }, { "epoch": 43.928470053931306, "grad_norm": 1.9262795448303223, "learning_rate": 5.608969628157821e-05, "loss": 0.0011343590915203095, "step": 154760 }, { "epoch": 43.93130854385467, "grad_norm": 11.920012474060059, "learning_rate": 5.6086857791654844e-05, "loss": 0.01421487033367157, "step": 154770 }, { "epoch": 43.93414703377803, "grad_norm": 7.368864059448242, "learning_rate": 5.6084019301731485e-05, "loss": 0.01367056965827942, "step": 154780 }, { "epoch": 43.93698552370139, "grad_norm": 0.14563612639904022, "learning_rate": 5.608118081180812e-05, "loss": 0.007513649016618729, "step": 154790 }, { "epoch": 43.93982401362475, "grad_norm": 0.7071606516838074, "learning_rate": 5.607834232188476e-05, "loss": 0.010770447552204132, "step": 154800 }, { "epoch": 43.942662503548114, "grad_norm": 0.1945652961730957, "learning_rate": 5.6075503831961396e-05, "loss": 0.005262922495603561, "step": 154810 }, { "epoch": 43.94550099347148, "grad_norm": 5.658971786499023, "learning_rate": 5.607266534203803e-05, "loss": 0.015572588145732879, "step": 154820 }, { "epoch": 43.94833948339483, "grad_norm": 10.344818115234375, "learning_rate": 5.6069826852114685e-05, "loss": 0.007333941757678986, "step": 154830 }, { "epoch": 43.951177973318195, "grad_norm": 0.023091064766049385, "learning_rate": 5.606698836219132e-05, "loss": 0.0034615807235240936, "step": 154840 }, { "epoch": 43.95401646324156, "grad_norm": 12.169610977172852, "learning_rate": 5.6064149872267954e-05, "loss": 0.0035271886736154556, "step": 154850 }, { "epoch": 43.956854953164914, "grad_norm": 0.03122204914689064, "learning_rate": 5.6061311382344596e-05, "loss": 0.005981720238924027, "step": 154860 }, { "epoch": 43.95969344308828, "grad_norm": 0.1313038319349289, "learning_rate": 5.605847289242123e-05, "loss": 0.00257395226508379, "step": 154870 }, { "epoch": 43.96253193301164, "grad_norm": 0.02829558588564396, "learning_rate": 5.605563440249787e-05, "loss": 0.0045620866119861605, "step": 154880 }, { "epoch": 43.965370422934996, "grad_norm": 0.16823841631412506, "learning_rate": 5.605279591257452e-05, "loss": 0.00529848113656044, "step": 154890 }, { "epoch": 43.96820891285836, "grad_norm": 0.0594043955206871, "learning_rate": 5.6049957422651154e-05, "loss": 0.0009642789140343666, "step": 154900 }, { "epoch": 43.97104740278172, "grad_norm": 0.05581025406718254, "learning_rate": 5.6047118932727796e-05, "loss": 0.004058460146188736, "step": 154910 }, { "epoch": 43.973885892705084, "grad_norm": 1.4736840724945068, "learning_rate": 5.604428044280443e-05, "loss": 0.00339917428791523, "step": 154920 }, { "epoch": 43.97672438262844, "grad_norm": 0.8444429039955139, "learning_rate": 5.604144195288107e-05, "loss": 0.003835049644112587, "step": 154930 }, { "epoch": 43.9795628725518, "grad_norm": 1.3454216718673706, "learning_rate": 5.6038603462957706e-05, "loss": 0.002089572511613369, "step": 154940 }, { "epoch": 43.982401362475166, "grad_norm": 0.2736746072769165, "learning_rate": 5.603576497303434e-05, "loss": 0.0013477144762873649, "step": 154950 }, { "epoch": 43.98523985239852, "grad_norm": 0.009213417768478394, "learning_rate": 5.603292648311099e-05, "loss": 0.0016317173838615417, "step": 154960 }, { "epoch": 43.988078342321884, "grad_norm": 0.06372193992137909, "learning_rate": 5.603008799318763e-05, "loss": 0.0018438924103975295, "step": 154970 }, { "epoch": 43.99091683224525, "grad_norm": 0.09982889145612717, "learning_rate": 5.6027249503264265e-05, "loss": 0.0028252659365534784, "step": 154980 }, { "epoch": 43.9937553221686, "grad_norm": 0.2242569476366043, "learning_rate": 5.6024411013340906e-05, "loss": 0.002870455011725426, "step": 154990 }, { "epoch": 43.996593812091966, "grad_norm": 0.037898097187280655, "learning_rate": 5.602157252341754e-05, "loss": 0.00013488605618476867, "step": 155000 }, { "epoch": 43.996593812091966, "eval_accuracy": 0.9805430151967953, "eval_loss": 0.07810639590024948, "eval_runtime": 36.5025, "eval_samples_per_second": 430.847, "eval_steps_per_second": 6.739, "step": 155000 }, { "epoch": 43.99943230201533, "grad_norm": 12.962503433227539, "learning_rate": 5.601873403349418e-05, "loss": 0.009013582020998001, "step": 155010 }, { "epoch": 44.00227079193869, "grad_norm": 0.10897231101989746, "learning_rate": 5.601589554357082e-05, "loss": 0.00018442012369632721, "step": 155020 }, { "epoch": 44.00510928186205, "grad_norm": 0.07300283759832382, "learning_rate": 5.6013057053647465e-05, "loss": 0.0011806806549429893, "step": 155030 }, { "epoch": 44.00794777178541, "grad_norm": 0.009008501656353474, "learning_rate": 5.6010218563724106e-05, "loss": 0.0035046376287937164, "step": 155040 }, { "epoch": 44.01078626170877, "grad_norm": 0.07379471510648727, "learning_rate": 5.600738007380074e-05, "loss": 0.00029019154608249663, "step": 155050 }, { "epoch": 44.01362475163213, "grad_norm": 0.3665422797203064, "learning_rate": 5.6004541583877376e-05, "loss": 0.00024086330085992813, "step": 155060 }, { "epoch": 44.01646324155549, "grad_norm": 0.012023755349218845, "learning_rate": 5.600170309395402e-05, "loss": 0.00030050668865442274, "step": 155070 }, { "epoch": 44.019301731478855, "grad_norm": 0.005586853250861168, "learning_rate": 5.599886460403065e-05, "loss": 0.00025889407843351366, "step": 155080 }, { "epoch": 44.02214022140221, "grad_norm": 0.04133278876543045, "learning_rate": 5.59960261141073e-05, "loss": 0.0001871250569820404, "step": 155090 }, { "epoch": 44.02497871132557, "grad_norm": 0.061129819601774216, "learning_rate": 5.599318762418394e-05, "loss": 0.0008243529126048088, "step": 155100 }, { "epoch": 44.027817201248936, "grad_norm": 0.04848893731832504, "learning_rate": 5.5990349134260576e-05, "loss": 0.00039188526570796964, "step": 155110 }, { "epoch": 44.0306556911723, "grad_norm": 0.05303439870476723, "learning_rate": 5.598751064433722e-05, "loss": 0.0005872942507266999, "step": 155120 }, { "epoch": 44.033494181095655, "grad_norm": 0.042911697179079056, "learning_rate": 5.598467215441385e-05, "loss": 0.013215619325637817, "step": 155130 }, { "epoch": 44.03633267101902, "grad_norm": 0.536017119884491, "learning_rate": 5.598183366449049e-05, "loss": 0.0006606800481677056, "step": 155140 }, { "epoch": 44.03917116094238, "grad_norm": 0.06260266900062561, "learning_rate": 5.597899517456713e-05, "loss": 0.00037090983241796496, "step": 155150 }, { "epoch": 44.04200965086574, "grad_norm": 0.014101826585829258, "learning_rate": 5.5976156684643776e-05, "loss": 0.001759456843137741, "step": 155160 }, { "epoch": 44.0448481407891, "grad_norm": 0.6283633708953857, "learning_rate": 5.597331819472042e-05, "loss": 0.00561424046754837, "step": 155170 }, { "epoch": 44.04768663071246, "grad_norm": 0.017061304301023483, "learning_rate": 5.597047970479705e-05, "loss": 0.001663818582892418, "step": 155180 }, { "epoch": 44.050525120635825, "grad_norm": 1.075775146484375, "learning_rate": 5.5967641214873686e-05, "loss": 0.0008500231429934502, "step": 155190 }, { "epoch": 44.05336361055918, "grad_norm": 0.04970533773303032, "learning_rate": 5.596480272495033e-05, "loss": 0.007049179822206497, "step": 155200 }, { "epoch": 44.056202100482544, "grad_norm": 12.47802734375, "learning_rate": 5.596196423502696e-05, "loss": 0.006432218849658966, "step": 155210 }, { "epoch": 44.05904059040591, "grad_norm": 0.11684665083885193, "learning_rate": 5.5959125745103604e-05, "loss": 0.002705670706927776, "step": 155220 }, { "epoch": 44.06187908032926, "grad_norm": 0.005364813841879368, "learning_rate": 5.595628725518025e-05, "loss": 0.003538116440176964, "step": 155230 }, { "epoch": 44.064717570252625, "grad_norm": 0.006294559221714735, "learning_rate": 5.5953448765256887e-05, "loss": 0.018889760971069335, "step": 155240 }, { "epoch": 44.06755606017599, "grad_norm": 0.8360396027565002, "learning_rate": 5.595061027533353e-05, "loss": 0.0004325220361351967, "step": 155250 }, { "epoch": 44.070394550099344, "grad_norm": 1.8003277778625488, "learning_rate": 5.594777178541016e-05, "loss": 0.0009164553135633469, "step": 155260 }, { "epoch": 44.07323304002271, "grad_norm": 1.0936763286590576, "learning_rate": 5.5944933295486804e-05, "loss": 0.0010612141340970993, "step": 155270 }, { "epoch": 44.07607152994607, "grad_norm": 0.6204494833946228, "learning_rate": 5.594209480556344e-05, "loss": 0.009737672656774521, "step": 155280 }, { "epoch": 44.07891001986943, "grad_norm": 9.381623268127441, "learning_rate": 5.593925631564009e-05, "loss": 0.013190166652202606, "step": 155290 }, { "epoch": 44.08174850979279, "grad_norm": 0.07939009368419647, "learning_rate": 5.593641782571673e-05, "loss": 0.005768376588821411, "step": 155300 }, { "epoch": 44.08458699971615, "grad_norm": 7.453685283660889, "learning_rate": 5.593357933579336e-05, "loss": 0.003542008250951767, "step": 155310 }, { "epoch": 44.087425489639514, "grad_norm": 0.06499631702899933, "learning_rate": 5.593074084587e-05, "loss": 0.0031107597053050993, "step": 155320 }, { "epoch": 44.09026397956287, "grad_norm": 0.06707420945167542, "learning_rate": 5.592790235594664e-05, "loss": 0.0014002302661538125, "step": 155330 }, { "epoch": 44.09310246948623, "grad_norm": 0.11488330364227295, "learning_rate": 5.592506386602327e-05, "loss": 0.0013233672827482224, "step": 155340 }, { "epoch": 44.095940959409596, "grad_norm": 0.09650080651044846, "learning_rate": 5.5922225376099915e-05, "loss": 0.0005355365574359894, "step": 155350 }, { "epoch": 44.09877944933295, "grad_norm": 0.846027135848999, "learning_rate": 5.591938688617656e-05, "loss": 0.0013819491490721703, "step": 155360 }, { "epoch": 44.101617939256315, "grad_norm": 0.035312194377183914, "learning_rate": 5.59165483962532e-05, "loss": 0.008005672693252563, "step": 155370 }, { "epoch": 44.10445642917968, "grad_norm": 1.6969034671783447, "learning_rate": 5.591370990632984e-05, "loss": 0.009735625982284547, "step": 155380 }, { "epoch": 44.10729491910304, "grad_norm": 0.39494550228118896, "learning_rate": 5.591087141640647e-05, "loss": 0.0012352587655186653, "step": 155390 }, { "epoch": 44.110133409026396, "grad_norm": 0.13789963722229004, "learning_rate": 5.5908032926483115e-05, "loss": 0.0005183875560760498, "step": 155400 }, { "epoch": 44.11297189894976, "grad_norm": 0.08164885640144348, "learning_rate": 5.590519443655975e-05, "loss": 0.00011816006153821945, "step": 155410 }, { "epoch": 44.11581038887312, "grad_norm": 12.597244262695312, "learning_rate": 5.5902355946636384e-05, "loss": 0.010258959233760833, "step": 155420 }, { "epoch": 44.11864887879648, "grad_norm": 0.05901462957262993, "learning_rate": 5.589951745671303e-05, "loss": 0.00934808850288391, "step": 155430 }, { "epoch": 44.12148736871984, "grad_norm": 4.555620193481445, "learning_rate": 5.589667896678967e-05, "loss": 0.0055685356259346005, "step": 155440 }, { "epoch": 44.1243258586432, "grad_norm": 0.08280699700117111, "learning_rate": 5.589384047686631e-05, "loss": 0.001165146753191948, "step": 155450 }, { "epoch": 44.12716434856656, "grad_norm": 0.08273538202047348, "learning_rate": 5.589100198694295e-05, "loss": 0.0009404465556144715, "step": 155460 }, { "epoch": 44.13000283848992, "grad_norm": 0.035394441336393356, "learning_rate": 5.5888163497019584e-05, "loss": 0.0007471131160855293, "step": 155470 }, { "epoch": 44.132841328413285, "grad_norm": 0.04407985508441925, "learning_rate": 5.5885325007096225e-05, "loss": 0.000991075299680233, "step": 155480 }, { "epoch": 44.13567981833665, "grad_norm": 0.10401735454797745, "learning_rate": 5.5882486517172873e-05, "loss": 0.002316517382860184, "step": 155490 }, { "epoch": 44.138518308260004, "grad_norm": 0.006653179880231619, "learning_rate": 5.587964802724951e-05, "loss": 0.00040649622678756714, "step": 155500 }, { "epoch": 44.138518308260004, "eval_accuracy": 0.982069053220576, "eval_loss": 0.06529824435710907, "eval_runtime": 38.3101, "eval_samples_per_second": 410.519, "eval_steps_per_second": 6.421, "step": 155500 }, { "epoch": 44.14135679818337, "grad_norm": 0.0699249655008316, "learning_rate": 5.587680953732615e-05, "loss": 0.006391587853431702, "step": 155510 }, { "epoch": 44.14419528810673, "grad_norm": 0.1680278331041336, "learning_rate": 5.5873971047402784e-05, "loss": 0.0013931343331933022, "step": 155520 }, { "epoch": 44.147033778030085, "grad_norm": 0.05735896900296211, "learning_rate": 5.587113255747942e-05, "loss": 0.004176818206906319, "step": 155530 }, { "epoch": 44.14987226795345, "grad_norm": 2.908205509185791, "learning_rate": 5.586829406755606e-05, "loss": 0.0011715607717633248, "step": 155540 }, { "epoch": 44.15271075787681, "grad_norm": 0.034450363367795944, "learning_rate": 5.5865455577632695e-05, "loss": 0.002414228394627571, "step": 155550 }, { "epoch": 44.15554924780017, "grad_norm": 0.7857425212860107, "learning_rate": 5.586261708770934e-05, "loss": 0.0023058138787746428, "step": 155560 }, { "epoch": 44.15838773772353, "grad_norm": 1.1269985437393188, "learning_rate": 5.5859778597785984e-05, "loss": 0.011594128608703614, "step": 155570 }, { "epoch": 44.16122622764689, "grad_norm": 0.05669726803898811, "learning_rate": 5.585694010786262e-05, "loss": 0.015388762950897217, "step": 155580 }, { "epoch": 44.164064717570255, "grad_norm": 0.007079245522618294, "learning_rate": 5.585410161793926e-05, "loss": 0.0013171736150979995, "step": 155590 }, { "epoch": 44.16690320749361, "grad_norm": 0.012840247713029385, "learning_rate": 5.5851263128015895e-05, "loss": 0.0020984912291169165, "step": 155600 }, { "epoch": 44.169741697416974, "grad_norm": 0.012920303270220757, "learning_rate": 5.5848424638092536e-05, "loss": 0.0018947891891002654, "step": 155610 }, { "epoch": 44.17258018734034, "grad_norm": 0.11053874343633652, "learning_rate": 5.5845586148169184e-05, "loss": 0.0010321727022528648, "step": 155620 }, { "epoch": 44.17541867726369, "grad_norm": 0.19926968216896057, "learning_rate": 5.584274765824582e-05, "loss": 0.0013097457587718964, "step": 155630 }, { "epoch": 44.178257167187056, "grad_norm": 7.5558576583862305, "learning_rate": 5.583990916832246e-05, "loss": 0.012523648142814637, "step": 155640 }, { "epoch": 44.18109565711042, "grad_norm": 0.009981303475797176, "learning_rate": 5.5837070678399095e-05, "loss": 0.0005761824548244476, "step": 155650 }, { "epoch": 44.18393414703378, "grad_norm": 0.007810608483850956, "learning_rate": 5.583423218847573e-05, "loss": 0.002498750761151314, "step": 155660 }, { "epoch": 44.18677263695714, "grad_norm": 0.396610289812088, "learning_rate": 5.583139369855237e-05, "loss": 0.0007991367951035499, "step": 155670 }, { "epoch": 44.1896111268805, "grad_norm": 0.5866830348968506, "learning_rate": 5.5828555208629005e-05, "loss": 0.0034756265580654145, "step": 155680 }, { "epoch": 44.19244961680386, "grad_norm": 0.14219608902931213, "learning_rate": 5.5825716718705654e-05, "loss": 0.000255904532968998, "step": 155690 }, { "epoch": 44.19528810672722, "grad_norm": 5.149024486541748, "learning_rate": 5.5822878228782295e-05, "loss": 0.01990717798471451, "step": 155700 }, { "epoch": 44.19812659665058, "grad_norm": 0.17797595262527466, "learning_rate": 5.582003973885893e-05, "loss": 0.00740932747721672, "step": 155710 }, { "epoch": 44.200965086573945, "grad_norm": 0.018132222816348076, "learning_rate": 5.581720124893557e-05, "loss": 0.006713265180587768, "step": 155720 }, { "epoch": 44.2038035764973, "grad_norm": 0.05845973640680313, "learning_rate": 5.5814362759012205e-05, "loss": 0.00031941868364810945, "step": 155730 }, { "epoch": 44.20664206642066, "grad_norm": 0.020058276131749153, "learning_rate": 5.581152426908885e-05, "loss": 0.006320445984601975, "step": 155740 }, { "epoch": 44.209480556344026, "grad_norm": 0.3233753740787506, "learning_rate": 5.580868577916548e-05, "loss": 0.001068563759326935, "step": 155750 }, { "epoch": 44.21231904626739, "grad_norm": 0.010991787537932396, "learning_rate": 5.580584728924213e-05, "loss": 0.0037518151104450224, "step": 155760 }, { "epoch": 44.215157536190745, "grad_norm": 0.23510201275348663, "learning_rate": 5.580300879931877e-05, "loss": 0.01071232631802559, "step": 155770 }, { "epoch": 44.21799602611411, "grad_norm": 0.009671185165643692, "learning_rate": 5.5800170309395406e-05, "loss": 0.006797020882368087, "step": 155780 }, { "epoch": 44.22083451603747, "grad_norm": 0.0921497493982315, "learning_rate": 5.579733181947204e-05, "loss": 0.0005306337028741836, "step": 155790 }, { "epoch": 44.223673005960826, "grad_norm": 0.04618654400110245, "learning_rate": 5.579449332954868e-05, "loss": 0.0046963956207036976, "step": 155800 }, { "epoch": 44.22651149588419, "grad_norm": 0.014641190879046917, "learning_rate": 5.5791654839625316e-05, "loss": 0.009786078333854675, "step": 155810 }, { "epoch": 44.22934998580755, "grad_norm": 0.005550975911319256, "learning_rate": 5.5788816349701964e-05, "loss": 0.001191428489983082, "step": 155820 }, { "epoch": 44.23218847573091, "grad_norm": 10.590398788452148, "learning_rate": 5.5785977859778606e-05, "loss": 0.012146256119012832, "step": 155830 }, { "epoch": 44.23502696565427, "grad_norm": 0.02275444194674492, "learning_rate": 5.578313936985524e-05, "loss": 0.0019751193001866342, "step": 155840 }, { "epoch": 44.237865455577634, "grad_norm": 0.03945295140147209, "learning_rate": 5.578030087993188e-05, "loss": 0.004378719627857209, "step": 155850 }, { "epoch": 44.240703945501, "grad_norm": 0.014931850135326385, "learning_rate": 5.5777462390008516e-05, "loss": 0.0005617177113890648, "step": 155860 }, { "epoch": 44.24354243542435, "grad_norm": 0.5147567391395569, "learning_rate": 5.577462390008516e-05, "loss": 0.000880824401974678, "step": 155870 }, { "epoch": 44.246380925347715, "grad_norm": 0.6406856775283813, "learning_rate": 5.577178541016179e-05, "loss": 0.0030507080256938933, "step": 155880 }, { "epoch": 44.24921941527108, "grad_norm": 0.0177372545003891, "learning_rate": 5.576894692023844e-05, "loss": 0.0022024277597665785, "step": 155890 }, { "epoch": 44.252057905194434, "grad_norm": 14.15445613861084, "learning_rate": 5.5766108430315075e-05, "loss": 0.010901601612567901, "step": 155900 }, { "epoch": 44.2548963951178, "grad_norm": 0.09911911934614182, "learning_rate": 5.5763269940391716e-05, "loss": 0.005051644891500473, "step": 155910 }, { "epoch": 44.25773488504116, "grad_norm": 0.026440950110554695, "learning_rate": 5.576043145046835e-05, "loss": 0.0009767716750502587, "step": 155920 }, { "epoch": 44.260573374964515, "grad_norm": 0.005672353319823742, "learning_rate": 5.575759296054499e-05, "loss": 0.0024885984137654306, "step": 155930 }, { "epoch": 44.26341186488788, "grad_norm": 15.075161933898926, "learning_rate": 5.575475447062163e-05, "loss": 0.005102863162755966, "step": 155940 }, { "epoch": 44.26625035481124, "grad_norm": 3.0424246788024902, "learning_rate": 5.575191598069827e-05, "loss": 0.01739899218082428, "step": 155950 }, { "epoch": 44.269088844734604, "grad_norm": 0.6088936924934387, "learning_rate": 5.5749077490774916e-05, "loss": 0.0003414435312151909, "step": 155960 }, { "epoch": 44.27192733465796, "grad_norm": 0.458457887172699, "learning_rate": 5.574623900085155e-05, "loss": 0.0010421654209494592, "step": 155970 }, { "epoch": 44.27476582458132, "grad_norm": 0.009103701449930668, "learning_rate": 5.574340051092819e-05, "loss": 0.009217307716608048, "step": 155980 }, { "epoch": 44.277604314504686, "grad_norm": 0.18766288459300995, "learning_rate": 5.574056202100483e-05, "loss": 0.0023655997589230537, "step": 155990 }, { "epoch": 44.28044280442804, "grad_norm": 0.8070005178451538, "learning_rate": 5.573772353108146e-05, "loss": 0.0020738322287797927, "step": 156000 }, { "epoch": 44.28044280442804, "eval_accuracy": 0.9821962230558912, "eval_loss": 0.0691811740398407, "eval_runtime": 35.363, "eval_samples_per_second": 444.73, "eval_steps_per_second": 6.956, "step": 156000 }, { "epoch": 44.283281294351404, "grad_norm": 0.025883352383971214, "learning_rate": 5.57348850411581e-05, "loss": 0.0039119325578212735, "step": 156010 }, { "epoch": 44.28611978427477, "grad_norm": 0.023011518642306328, "learning_rate": 5.573204655123475e-05, "loss": 0.002679343149065971, "step": 156020 }, { "epoch": 44.28895827419813, "grad_norm": 0.008074343204498291, "learning_rate": 5.5729208061311386e-05, "loss": 0.0006928518414497375, "step": 156030 }, { "epoch": 44.291796764121486, "grad_norm": 0.08389303088188171, "learning_rate": 5.572636957138803e-05, "loss": 0.0013281021267175675, "step": 156040 }, { "epoch": 44.29463525404485, "grad_norm": 0.017398234456777573, "learning_rate": 5.572353108146466e-05, "loss": 0.0006031397730112076, "step": 156050 }, { "epoch": 44.29747374396821, "grad_norm": 0.7239664793014526, "learning_rate": 5.57206925915413e-05, "loss": 0.0016680816188454628, "step": 156060 }, { "epoch": 44.30031223389157, "grad_norm": 0.042810697108507156, "learning_rate": 5.571785410161794e-05, "loss": 0.0006660452112555504, "step": 156070 }, { "epoch": 44.30315072381493, "grad_norm": 8.21890640258789, "learning_rate": 5.571501561169458e-05, "loss": 0.010346238315105439, "step": 156080 }, { "epoch": 44.30598921373829, "grad_norm": 0.051238760352134705, "learning_rate": 5.571217712177123e-05, "loss": 0.00047778822481632234, "step": 156090 }, { "epoch": 44.30882770366165, "grad_norm": 1.4792497158050537, "learning_rate": 5.570933863184786e-05, "loss": 0.0006121860817074776, "step": 156100 }, { "epoch": 44.31166619358501, "grad_norm": 4.951146125793457, "learning_rate": 5.57065001419245e-05, "loss": 0.002031335234642029, "step": 156110 }, { "epoch": 44.314504683508375, "grad_norm": 0.9382633566856384, "learning_rate": 5.570366165200114e-05, "loss": 0.0015822632238268852, "step": 156120 }, { "epoch": 44.31734317343174, "grad_norm": 0.05032495781779289, "learning_rate": 5.570082316207777e-05, "loss": 0.003065297566354275, "step": 156130 }, { "epoch": 44.32018166335509, "grad_norm": 0.35006874799728394, "learning_rate": 5.5697984672154414e-05, "loss": 0.0022630328312516212, "step": 156140 }, { "epoch": 44.323020153278456, "grad_norm": 0.06147061660885811, "learning_rate": 5.569514618223105e-05, "loss": 0.0015559552237391471, "step": 156150 }, { "epoch": 44.32585864320182, "grad_norm": 0.02771296352148056, "learning_rate": 5.5692307692307696e-05, "loss": 0.005533108860254288, "step": 156160 }, { "epoch": 44.328697133125175, "grad_norm": 0.04471290111541748, "learning_rate": 5.568946920238434e-05, "loss": 0.00714460015296936, "step": 156170 }, { "epoch": 44.33153562304854, "grad_norm": 0.05188252776861191, "learning_rate": 5.568663071246097e-05, "loss": 0.001909668743610382, "step": 156180 }, { "epoch": 44.3343741129719, "grad_norm": 0.006432416848838329, "learning_rate": 5.5683792222537614e-05, "loss": 0.0002785569056868553, "step": 156190 }, { "epoch": 44.33721260289526, "grad_norm": 2.0927717685699463, "learning_rate": 5.568095373261425e-05, "loss": 0.0009884199127554894, "step": 156200 }, { "epoch": 44.34005109281862, "grad_norm": 0.022259799763560295, "learning_rate": 5.567811524269089e-05, "loss": 0.00040573086589574815, "step": 156210 }, { "epoch": 44.34288958274198, "grad_norm": 12.952374458312988, "learning_rate": 5.567527675276754e-05, "loss": 0.005482783168554306, "step": 156220 }, { "epoch": 44.345728072665345, "grad_norm": 1.1972992420196533, "learning_rate": 5.567243826284417e-05, "loss": 0.00041348431259393694, "step": 156230 }, { "epoch": 44.3485665625887, "grad_norm": 0.0024462391156703234, "learning_rate": 5.5669599772920814e-05, "loss": 0.00011733267456293106, "step": 156240 }, { "epoch": 44.351405052512064, "grad_norm": 2.957796573638916, "learning_rate": 5.566676128299745e-05, "loss": 0.0010420085862278938, "step": 156250 }, { "epoch": 44.35424354243543, "grad_norm": 0.0830511823296547, "learning_rate": 5.566392279307408e-05, "loss": 0.0003465967252850533, "step": 156260 }, { "epoch": 44.35708203235878, "grad_norm": 0.051333192735910416, "learning_rate": 5.5661084303150725e-05, "loss": 0.00033863428980112075, "step": 156270 }, { "epoch": 44.359920522282145, "grad_norm": 0.2214515507221222, "learning_rate": 5.565824581322736e-05, "loss": 0.00017182938754558563, "step": 156280 }, { "epoch": 44.36275901220551, "grad_norm": 13.099161148071289, "learning_rate": 5.565540732330401e-05, "loss": 0.007111907005310059, "step": 156290 }, { "epoch": 44.365597502128864, "grad_norm": 0.0061645968817174435, "learning_rate": 5.565256883338065e-05, "loss": 0.0002686677500605583, "step": 156300 }, { "epoch": 44.36843599205223, "grad_norm": 0.005251913797110319, "learning_rate": 5.564973034345728e-05, "loss": 0.0015814818441867828, "step": 156310 }, { "epoch": 44.37127448197559, "grad_norm": 0.2723861038684845, "learning_rate": 5.5646891853533925e-05, "loss": 0.0006010431796312332, "step": 156320 }, { "epoch": 44.37411297189895, "grad_norm": 0.08732365071773529, "learning_rate": 5.564405336361056e-05, "loss": 0.0028638307005167006, "step": 156330 }, { "epoch": 44.37695146182231, "grad_norm": 0.05778186023235321, "learning_rate": 5.56412148736872e-05, "loss": 0.007026452571153641, "step": 156340 }, { "epoch": 44.37978995174567, "grad_norm": 0.024993710219860077, "learning_rate": 5.5638376383763835e-05, "loss": 0.000435304269194603, "step": 156350 }, { "epoch": 44.382628441669034, "grad_norm": 0.009138326160609722, "learning_rate": 5.563553789384048e-05, "loss": 0.0013671880587935448, "step": 156360 }, { "epoch": 44.38546693159239, "grad_norm": 0.047898776829242706, "learning_rate": 5.563269940391712e-05, "loss": 0.0030730068683624268, "step": 156370 }, { "epoch": 44.38830542151575, "grad_norm": 0.02910420671105385, "learning_rate": 5.562986091399376e-05, "loss": 0.001220790483057499, "step": 156380 }, { "epoch": 44.391143911439116, "grad_norm": 0.018178602680563927, "learning_rate": 5.5627022424070394e-05, "loss": 0.002162555232644081, "step": 156390 }, { "epoch": 44.39398240136248, "grad_norm": 0.03618853911757469, "learning_rate": 5.5624183934147035e-05, "loss": 0.0009644296020269394, "step": 156400 }, { "epoch": 44.396820891285834, "grad_norm": 0.3645394742488861, "learning_rate": 5.562134544422367e-05, "loss": 0.001035107858479023, "step": 156410 }, { "epoch": 44.3996593812092, "grad_norm": 0.8636709451675415, "learning_rate": 5.561850695430032e-05, "loss": 0.0004597175866365433, "step": 156420 }, { "epoch": 44.40249787113256, "grad_norm": 0.6600233316421509, "learning_rate": 5.561566846437696e-05, "loss": 0.000860300101339817, "step": 156430 }, { "epoch": 44.405336361055916, "grad_norm": 0.2507040798664093, "learning_rate": 5.5612829974453594e-05, "loss": 0.0007014486938714982, "step": 156440 }, { "epoch": 44.40817485097928, "grad_norm": 0.012772534973919392, "learning_rate": 5.5609991484530235e-05, "loss": 0.007538089901208878, "step": 156450 }, { "epoch": 44.41101334090264, "grad_norm": 0.02086837776005268, "learning_rate": 5.560715299460687e-05, "loss": 0.0003764655441045761, "step": 156460 }, { "epoch": 44.413851830826, "grad_norm": 0.01707325503230095, "learning_rate": 5.5604314504683505e-05, "loss": 0.00030120518058538435, "step": 156470 }, { "epoch": 44.41669032074936, "grad_norm": 0.009592363610863686, "learning_rate": 5.5601476014760146e-05, "loss": 0.000273057259619236, "step": 156480 }, { "epoch": 44.41952881067272, "grad_norm": 1.3485312461853027, "learning_rate": 5.5598637524836794e-05, "loss": 0.0007699541747570038, "step": 156490 }, { "epoch": 44.422367300596086, "grad_norm": 8.40451717376709, "learning_rate": 5.559579903491343e-05, "loss": 0.0018899744376540185, "step": 156500 }, { "epoch": 44.422367300596086, "eval_accuracy": 0.9809245247027405, "eval_loss": 0.0747559443116188, "eval_runtime": 36.8965, "eval_samples_per_second": 426.247, "eval_steps_per_second": 6.667, "step": 156500 }, { "epoch": 44.42520579051944, "grad_norm": 1.426670789718628, "learning_rate": 5.559296054499007e-05, "loss": 0.0021092742681503296, "step": 156510 }, { "epoch": 44.428044280442805, "grad_norm": 0.05565939471125603, "learning_rate": 5.5590122055066705e-05, "loss": 0.00045278966426849366, "step": 156520 }, { "epoch": 44.43088277036617, "grad_norm": 9.096282005310059, "learning_rate": 5.5587283565143346e-05, "loss": 0.0023333007469773293, "step": 156530 }, { "epoch": 44.43372126028952, "grad_norm": 0.030429886654019356, "learning_rate": 5.558444507521998e-05, "loss": 0.0006388837471604347, "step": 156540 }, { "epoch": 44.436559750212886, "grad_norm": 0.015168112702667713, "learning_rate": 5.558160658529662e-05, "loss": 0.0008395697921514511, "step": 156550 }, { "epoch": 44.43939824013625, "grad_norm": 0.05574434995651245, "learning_rate": 5.557876809537327e-05, "loss": 0.0002404557541012764, "step": 156560 }, { "epoch": 44.442236730059605, "grad_norm": 0.3121058940887451, "learning_rate": 5.5575929605449905e-05, "loss": 0.0014069288969039916, "step": 156570 }, { "epoch": 44.44507521998297, "grad_norm": 0.1552421599626541, "learning_rate": 5.5573091115526546e-05, "loss": 0.011298835277557373, "step": 156580 }, { "epoch": 44.44791370990633, "grad_norm": 16.45296859741211, "learning_rate": 5.557025262560318e-05, "loss": 0.0135759636759758, "step": 156590 }, { "epoch": 44.450752199829694, "grad_norm": 0.2790147066116333, "learning_rate": 5.5567414135679815e-05, "loss": 0.005287895351648331, "step": 156600 }, { "epoch": 44.45359068975305, "grad_norm": 6.852138996124268, "learning_rate": 5.556457564575646e-05, "loss": 0.010211680084466934, "step": 156610 }, { "epoch": 44.45642917967641, "grad_norm": 0.01887897402048111, "learning_rate": 5.5561737155833105e-05, "loss": 0.0008040301501750946, "step": 156620 }, { "epoch": 44.459267669599775, "grad_norm": 2.0154521465301514, "learning_rate": 5.555889866590974e-05, "loss": 0.0016362816095352173, "step": 156630 }, { "epoch": 44.46210615952313, "grad_norm": 0.006984923034906387, "learning_rate": 5.555606017598638e-05, "loss": 0.0004886351525783539, "step": 156640 }, { "epoch": 44.464944649446494, "grad_norm": 2.370093822479248, "learning_rate": 5.5553221686063015e-05, "loss": 0.0011133590713143348, "step": 156650 }, { "epoch": 44.46778313936986, "grad_norm": 0.1903674453496933, "learning_rate": 5.555038319613966e-05, "loss": 0.0007484469562768937, "step": 156660 }, { "epoch": 44.47062162929321, "grad_norm": 0.07875172048807144, "learning_rate": 5.554754470621629e-05, "loss": 0.00222934614866972, "step": 156670 }, { "epoch": 44.473460119216575, "grad_norm": 0.08739776909351349, "learning_rate": 5.554470621629293e-05, "loss": 0.005962193757295608, "step": 156680 }, { "epoch": 44.47629860913994, "grad_norm": 2.5181853771209717, "learning_rate": 5.554186772636958e-05, "loss": 0.007427451014518738, "step": 156690 }, { "epoch": 44.4791370990633, "grad_norm": 0.20847997069358826, "learning_rate": 5.5539029236446216e-05, "loss": 0.003709855675697327, "step": 156700 }, { "epoch": 44.48197558898666, "grad_norm": 0.036608293652534485, "learning_rate": 5.553619074652286e-05, "loss": 0.008839324116706848, "step": 156710 }, { "epoch": 44.48481407891002, "grad_norm": 2.5490710735321045, "learning_rate": 5.553335225659949e-05, "loss": 0.0013879723846912384, "step": 156720 }, { "epoch": 44.48765256883338, "grad_norm": 0.05759309232234955, "learning_rate": 5.5530513766676126e-05, "loss": 0.0064148962497711185, "step": 156730 }, { "epoch": 44.49049105875674, "grad_norm": 0.20904557406902313, "learning_rate": 5.552767527675277e-05, "loss": 0.0005261052399873733, "step": 156740 }, { "epoch": 44.4933295486801, "grad_norm": 0.03588863089680672, "learning_rate": 5.55248367868294e-05, "loss": 0.0008031770586967469, "step": 156750 }, { "epoch": 44.496168038603464, "grad_norm": 0.591232419013977, "learning_rate": 5.552199829690605e-05, "loss": 0.004139840975403786, "step": 156760 }, { "epoch": 44.49900652852683, "grad_norm": 10.029489517211914, "learning_rate": 5.551915980698269e-05, "loss": 0.0069253861904144285, "step": 156770 }, { "epoch": 44.50184501845018, "grad_norm": 0.08324166387319565, "learning_rate": 5.5516321317059326e-05, "loss": 0.010728587955236435, "step": 156780 }, { "epoch": 44.504683508373546, "grad_norm": 0.015135228633880615, "learning_rate": 5.551348282713597e-05, "loss": 0.0007023625075817108, "step": 156790 }, { "epoch": 44.50752199829691, "grad_norm": 0.2512914538383484, "learning_rate": 5.55106443372126e-05, "loss": 0.00022900383919477463, "step": 156800 }, { "epoch": 44.510360488220265, "grad_norm": 0.04528867080807686, "learning_rate": 5.5507805847289244e-05, "loss": 0.0005205929279327393, "step": 156810 }, { "epoch": 44.51319897814363, "grad_norm": 0.024822255596518517, "learning_rate": 5.550496735736589e-05, "loss": 0.0020809397101402283, "step": 156820 }, { "epoch": 44.51603746806699, "grad_norm": 0.08554660528898239, "learning_rate": 5.5502128867442526e-05, "loss": 0.0007379416376352311, "step": 156830 }, { "epoch": 44.518875957990346, "grad_norm": 1.7508137226104736, "learning_rate": 5.549929037751916e-05, "loss": 0.0015969293192029, "step": 156840 }, { "epoch": 44.52171444791371, "grad_norm": 0.518244743347168, "learning_rate": 5.54964518875958e-05, "loss": 0.003400442749261856, "step": 156850 }, { "epoch": 44.52455293783707, "grad_norm": 0.05249672010540962, "learning_rate": 5.549361339767244e-05, "loss": 0.010106568038463593, "step": 156860 }, { "epoch": 44.527391427760435, "grad_norm": 0.17662981152534485, "learning_rate": 5.549077490774908e-05, "loss": 0.006136494129896164, "step": 156870 }, { "epoch": 44.53022991768379, "grad_norm": 4.722834587097168, "learning_rate": 5.548793641782571e-05, "loss": 0.0034492772072553636, "step": 156880 }, { "epoch": 44.53306840760715, "grad_norm": 0.0074323853477835655, "learning_rate": 5.548509792790236e-05, "loss": 0.0038264401257038115, "step": 156890 }, { "epoch": 44.535906897530516, "grad_norm": 0.2954625189304352, "learning_rate": 5.5482259437979e-05, "loss": 0.000492524541914463, "step": 156900 }, { "epoch": 44.53874538745387, "grad_norm": 0.012900340370833874, "learning_rate": 5.547942094805564e-05, "loss": 0.0018310852348804474, "step": 156910 }, { "epoch": 44.541583877377235, "grad_norm": 0.981627345085144, "learning_rate": 5.547658245813228e-05, "loss": 0.009431952983140946, "step": 156920 }, { "epoch": 44.5444223673006, "grad_norm": 6.682305335998535, "learning_rate": 5.547374396820891e-05, "loss": 0.002269558049738407, "step": 156930 }, { "epoch": 44.547260857223954, "grad_norm": 0.043226514011621475, "learning_rate": 5.547090547828555e-05, "loss": 0.0038981020450592043, "step": 156940 }, { "epoch": 44.55009934714732, "grad_norm": 0.1977856606245041, "learning_rate": 5.54680669883622e-05, "loss": 0.0006040720269083977, "step": 156950 }, { "epoch": 44.55293783707068, "grad_norm": 1.352722406387329, "learning_rate": 5.546522849843884e-05, "loss": 0.0012447986751794814, "step": 156960 }, { "epoch": 44.55577632699404, "grad_norm": 0.07884121686220169, "learning_rate": 5.546239000851547e-05, "loss": 0.0051211100071668625, "step": 156970 }, { "epoch": 44.5586148169174, "grad_norm": 0.028588151559233665, "learning_rate": 5.545955151859211e-05, "loss": 0.0067764706909656525, "step": 156980 }, { "epoch": 44.56145330684076, "grad_norm": 0.26544204354286194, "learning_rate": 5.545671302866875e-05, "loss": 0.0046577505767345425, "step": 156990 }, { "epoch": 44.564291796764124, "grad_norm": 2.533151388168335, "learning_rate": 5.545387453874539e-05, "loss": 0.006539425253868103, "step": 157000 }, { "epoch": 44.564291796764124, "eval_accuracy": 0.9788262224200419, "eval_loss": 0.09520041942596436, "eval_runtime": 34.9198, "eval_samples_per_second": 450.375, "eval_steps_per_second": 7.045, "step": 157000 }, { "epoch": 44.56713028668748, "grad_norm": 0.05408522114157677, "learning_rate": 5.5451036048822024e-05, "loss": 0.0065715894103050235, "step": 157010 }, { "epoch": 44.56996877661084, "grad_norm": 14.473291397094727, "learning_rate": 5.544819755889867e-05, "loss": 0.012285082787275314, "step": 157020 }, { "epoch": 44.572807266534205, "grad_norm": 1.8319827318191528, "learning_rate": 5.544535906897531e-05, "loss": 0.010145700722932815, "step": 157030 }, { "epoch": 44.57564575645756, "grad_norm": 0.1650376170873642, "learning_rate": 5.544252057905195e-05, "loss": 0.001396014913916588, "step": 157040 }, { "epoch": 44.578484246380924, "grad_norm": 0.15029171109199524, "learning_rate": 5.543968208912859e-05, "loss": 0.0008736763149499894, "step": 157050 }, { "epoch": 44.58132273630429, "grad_norm": 2.0231308937072754, "learning_rate": 5.5436843599205224e-05, "loss": 0.0012332303449511528, "step": 157060 }, { "epoch": 44.58416122622765, "grad_norm": 0.011695928871631622, "learning_rate": 5.543400510928186e-05, "loss": 0.001036747545003891, "step": 157070 }, { "epoch": 44.586999716151006, "grad_norm": 3.323282480239868, "learning_rate": 5.54311666193585e-05, "loss": 0.001159415952861309, "step": 157080 }, { "epoch": 44.58983820607437, "grad_norm": 0.17654326558113098, "learning_rate": 5.542832812943515e-05, "loss": 0.0006427040323615074, "step": 157090 }, { "epoch": 44.59267669599773, "grad_norm": 0.022380487993359566, "learning_rate": 5.542548963951178e-05, "loss": 0.00638522207736969, "step": 157100 }, { "epoch": 44.59551518592109, "grad_norm": 0.07547654956579208, "learning_rate": 5.5422651149588424e-05, "loss": 0.0011264612898230554, "step": 157110 }, { "epoch": 44.59835367584445, "grad_norm": 0.3226102292537689, "learning_rate": 5.541981265966506e-05, "loss": 0.000615406408905983, "step": 157120 }, { "epoch": 44.60119216576781, "grad_norm": 11.672792434692383, "learning_rate": 5.54169741697417e-05, "loss": 0.007174202054738998, "step": 157130 }, { "epoch": 44.604030655691176, "grad_norm": 0.03849797695875168, "learning_rate": 5.5414135679818334e-05, "loss": 0.006256081908941269, "step": 157140 }, { "epoch": 44.60686914561453, "grad_norm": 0.06992045789957047, "learning_rate": 5.541129718989498e-05, "loss": 0.001974639296531677, "step": 157150 }, { "epoch": 44.609707635537895, "grad_norm": 0.9919527173042297, "learning_rate": 5.5408458699971624e-05, "loss": 0.0006628416478633881, "step": 157160 }, { "epoch": 44.61254612546126, "grad_norm": 0.04974054545164108, "learning_rate": 5.540562021004826e-05, "loss": 0.004058602824807167, "step": 157170 }, { "epoch": 44.61538461538461, "grad_norm": 0.06735054403543472, "learning_rate": 5.54027817201249e-05, "loss": 0.0029813751578330995, "step": 157180 }, { "epoch": 44.618223105307976, "grad_norm": 0.009861480444669724, "learning_rate": 5.5399943230201534e-05, "loss": 0.0015553582459688186, "step": 157190 }, { "epoch": 44.62106159523134, "grad_norm": 0.013007582165300846, "learning_rate": 5.539710474027817e-05, "loss": 0.007166382670402527, "step": 157200 }, { "epoch": 44.623900085154695, "grad_norm": 0.04722771793603897, "learning_rate": 5.539426625035481e-05, "loss": 0.0032987646758556367, "step": 157210 }, { "epoch": 44.62673857507806, "grad_norm": 0.03976571187376976, "learning_rate": 5.539142776043146e-05, "loss": 0.0035001635551452637, "step": 157220 }, { "epoch": 44.62957706500142, "grad_norm": 0.03209367394447327, "learning_rate": 5.538858927050809e-05, "loss": 0.0023889826610684396, "step": 157230 }, { "epoch": 44.63241555492478, "grad_norm": 0.1682220697402954, "learning_rate": 5.5385750780584735e-05, "loss": 0.00447433814406395, "step": 157240 }, { "epoch": 44.63525404484814, "grad_norm": 2.264878511428833, "learning_rate": 5.538291229066137e-05, "loss": 0.004031073302030563, "step": 157250 }, { "epoch": 44.6380925347715, "grad_norm": 0.2812945246696472, "learning_rate": 5.538007380073801e-05, "loss": 0.016580113768577577, "step": 157260 }, { "epoch": 44.640931024694865, "grad_norm": 0.030676402151584625, "learning_rate": 5.5377235310814645e-05, "loss": 0.0025819754227995872, "step": 157270 }, { "epoch": 44.64376951461822, "grad_norm": 3.9392411708831787, "learning_rate": 5.5374396820891287e-05, "loss": 0.0033441178500652315, "step": 157280 }, { "epoch": 44.646608004541584, "grad_norm": 0.019982095807790756, "learning_rate": 5.5371558330967935e-05, "loss": 0.004452769458293915, "step": 157290 }, { "epoch": 44.64944649446495, "grad_norm": 0.5459398627281189, "learning_rate": 5.536871984104457e-05, "loss": 0.008787131309509278, "step": 157300 }, { "epoch": 44.6522849843883, "grad_norm": 0.24187228083610535, "learning_rate": 5.5365881351121204e-05, "loss": 0.0004108654335141182, "step": 157310 }, { "epoch": 44.655123474311665, "grad_norm": 6.857498645782471, "learning_rate": 5.5363042861197845e-05, "loss": 0.005826739966869355, "step": 157320 }, { "epoch": 44.65796196423503, "grad_norm": 0.09951264411211014, "learning_rate": 5.536020437127448e-05, "loss": 0.0003575796261429787, "step": 157330 }, { "epoch": 44.66080045415839, "grad_norm": 0.015978632494807243, "learning_rate": 5.535736588135112e-05, "loss": 0.002544151619076729, "step": 157340 }, { "epoch": 44.66363894408175, "grad_norm": 0.9135395884513855, "learning_rate": 5.535452739142777e-05, "loss": 0.006677610427141189, "step": 157350 }, { "epoch": 44.66647743400511, "grad_norm": 0.32463619112968445, "learning_rate": 5.5351688901504404e-05, "loss": 0.0024977570399641992, "step": 157360 }, { "epoch": 44.66931592392847, "grad_norm": 0.017990967258810997, "learning_rate": 5.5348850411581045e-05, "loss": 0.0006179407238960267, "step": 157370 }, { "epoch": 44.67215441385183, "grad_norm": 0.14578375220298767, "learning_rate": 5.534601192165768e-05, "loss": 0.002411486394703388, "step": 157380 }, { "epoch": 44.67499290377519, "grad_norm": 0.0395384356379509, "learning_rate": 5.534317343173432e-05, "loss": 0.002887247875332832, "step": 157390 }, { "epoch": 44.677831393698554, "grad_norm": 0.050441574305295944, "learning_rate": 5.5340334941810956e-05, "loss": 0.00014123637229204177, "step": 157400 }, { "epoch": 44.68066988362191, "grad_norm": 0.026967760175466537, "learning_rate": 5.533749645188759e-05, "loss": 0.0016681252047419548, "step": 157410 }, { "epoch": 44.68350837354527, "grad_norm": 0.917779266834259, "learning_rate": 5.5334657961964245e-05, "loss": 0.0014971388503909112, "step": 157420 }, { "epoch": 44.686346863468636, "grad_norm": 0.024303585290908813, "learning_rate": 5.533181947204088e-05, "loss": 0.0003816727548837662, "step": 157430 }, { "epoch": 44.689185353392, "grad_norm": 0.09835635125637054, "learning_rate": 5.5328980982117515e-05, "loss": 0.002416698820888996, "step": 157440 }, { "epoch": 44.692023843315354, "grad_norm": 0.06550965458154678, "learning_rate": 5.5326142492194156e-05, "loss": 0.0031736582517623902, "step": 157450 }, { "epoch": 44.69486233323872, "grad_norm": 1.268196702003479, "learning_rate": 5.532330400227079e-05, "loss": 0.015711696445941926, "step": 157460 }, { "epoch": 44.69770082316208, "grad_norm": 1.5669794082641602, "learning_rate": 5.532046551234743e-05, "loss": 0.0004458548501133919, "step": 157470 }, { "epoch": 44.700539313085436, "grad_norm": 0.14614298939704895, "learning_rate": 5.5317627022424067e-05, "loss": 0.004806867986917496, "step": 157480 }, { "epoch": 44.7033778030088, "grad_norm": 1.3464354276657104, "learning_rate": 5.5314788532500715e-05, "loss": 0.0008591718971729279, "step": 157490 }, { "epoch": 44.70621629293216, "grad_norm": 0.11931300908327103, "learning_rate": 5.5311950042577356e-05, "loss": 0.0015416728332638741, "step": 157500 }, { "epoch": 44.70621629293216, "eval_accuracy": 0.9815603738793158, "eval_loss": 0.07316756993532181, "eval_runtime": 34.6658, "eval_samples_per_second": 453.675, "eval_steps_per_second": 7.096, "step": 157500 }, { "epoch": 44.70905478285552, "grad_norm": 0.28400057554244995, "learning_rate": 5.530911155265399e-05, "loss": 0.002796079032123089, "step": 157510 }, { "epoch": 44.71189327277888, "grad_norm": 3.057932138442993, "learning_rate": 5.530627306273063e-05, "loss": 0.0009657170623540879, "step": 157520 }, { "epoch": 44.71473176270224, "grad_norm": 0.03602493926882744, "learning_rate": 5.530343457280727e-05, "loss": 0.000590437836945057, "step": 157530 }, { "epoch": 44.717570252625606, "grad_norm": 0.2842561602592468, "learning_rate": 5.53005960828839e-05, "loss": 0.002372116595506668, "step": 157540 }, { "epoch": 44.72040874254896, "grad_norm": 0.30367881059646606, "learning_rate": 5.5297757592960556e-05, "loss": 0.0005055882036685943, "step": 157550 }, { "epoch": 44.723247232472325, "grad_norm": 9.02198314666748, "learning_rate": 5.529491910303719e-05, "loss": 0.011659345775842666, "step": 157560 }, { "epoch": 44.72608572239569, "grad_norm": 0.009865541011095047, "learning_rate": 5.5292080613113825e-05, "loss": 0.0005932793021202088, "step": 157570 }, { "epoch": 44.72892421231904, "grad_norm": 0.005841899663209915, "learning_rate": 5.528924212319047e-05, "loss": 0.00613291934132576, "step": 157580 }, { "epoch": 44.731762702242406, "grad_norm": 0.11236811429262161, "learning_rate": 5.52864036332671e-05, "loss": 0.001873253844678402, "step": 157590 }, { "epoch": 44.73460119216577, "grad_norm": 0.8004417419433594, "learning_rate": 5.528356514334374e-05, "loss": 0.0037494920194149016, "step": 157600 }, { "epoch": 44.73743968208913, "grad_norm": 0.01946021430194378, "learning_rate": 5.528072665342038e-05, "loss": 0.006612609326839447, "step": 157610 }, { "epoch": 44.74027817201249, "grad_norm": 1.0839909315109253, "learning_rate": 5.5277888163497026e-05, "loss": 0.0018900277093052865, "step": 157620 }, { "epoch": 44.74311666193585, "grad_norm": 12.043798446655273, "learning_rate": 5.527504967357367e-05, "loss": 0.004669753462076187, "step": 157630 }, { "epoch": 44.745955151859214, "grad_norm": 0.09044749289751053, "learning_rate": 5.52722111836503e-05, "loss": 0.006139054894447327, "step": 157640 }, { "epoch": 44.74879364178257, "grad_norm": 0.05937543883919716, "learning_rate": 5.526937269372694e-05, "loss": 0.001873796433210373, "step": 157650 }, { "epoch": 44.75163213170593, "grad_norm": 0.00805190484970808, "learning_rate": 5.526653420380358e-05, "loss": 0.00043327491730451586, "step": 157660 }, { "epoch": 44.754470621629295, "grad_norm": 0.17588165402412415, "learning_rate": 5.526369571388021e-05, "loss": 0.0006526678800582886, "step": 157670 }, { "epoch": 44.75730911155265, "grad_norm": 0.04757062345743179, "learning_rate": 5.5260857223956853e-05, "loss": 0.0006321728229522705, "step": 157680 }, { "epoch": 44.760147601476014, "grad_norm": 0.27326858043670654, "learning_rate": 5.52580187340335e-05, "loss": 0.0010538475587964057, "step": 157690 }, { "epoch": 44.76298609139938, "grad_norm": 0.03502674400806427, "learning_rate": 5.5255180244110136e-05, "loss": 0.00017592310905456543, "step": 157700 }, { "epoch": 44.76582458132274, "grad_norm": 0.0032962679397314787, "learning_rate": 5.525234175418678e-05, "loss": 0.0010530836880207062, "step": 157710 }, { "epoch": 44.768663071246095, "grad_norm": 0.003586317179724574, "learning_rate": 5.524950326426341e-05, "loss": 0.007720042765140533, "step": 157720 }, { "epoch": 44.77150156116946, "grad_norm": 0.007128340192139149, "learning_rate": 5.5246664774340054e-05, "loss": 0.006333956122398376, "step": 157730 }, { "epoch": 44.77434005109282, "grad_norm": 0.07560210675001144, "learning_rate": 5.524382628441669e-05, "loss": 0.0002283964306116104, "step": 157740 }, { "epoch": 44.77717854101618, "grad_norm": 0.023584287613630295, "learning_rate": 5.5240987794493336e-05, "loss": 0.00227635744959116, "step": 157750 }, { "epoch": 44.78001703093954, "grad_norm": 1.9804236888885498, "learning_rate": 5.523814930456998e-05, "loss": 0.0004884736612439155, "step": 157760 }, { "epoch": 44.7828555208629, "grad_norm": 0.22876444458961487, "learning_rate": 5.523531081464661e-05, "loss": 0.00125411469489336, "step": 157770 }, { "epoch": 44.78569401078626, "grad_norm": 0.2384657859802246, "learning_rate": 5.523247232472325e-05, "loss": 0.00039307791739702226, "step": 157780 }, { "epoch": 44.78853250070962, "grad_norm": 0.07222501933574677, "learning_rate": 5.522963383479989e-05, "loss": 0.0025676973164081573, "step": 157790 }, { "epoch": 44.791370990632984, "grad_norm": 0.02155538648366928, "learning_rate": 5.522679534487652e-05, "loss": 0.0005978545174002648, "step": 157800 }, { "epoch": 44.79420948055635, "grad_norm": 0.29295381903648376, "learning_rate": 5.5223956854953164e-05, "loss": 0.00018590502440929413, "step": 157810 }, { "epoch": 44.7970479704797, "grad_norm": 0.03244061395525932, "learning_rate": 5.522111836502981e-05, "loss": 0.002217397652566433, "step": 157820 }, { "epoch": 44.799886460403066, "grad_norm": 0.10547493398189545, "learning_rate": 5.521827987510645e-05, "loss": 0.00023358073085546493, "step": 157830 }, { "epoch": 44.80272495032643, "grad_norm": 0.011396202258765697, "learning_rate": 5.521544138518309e-05, "loss": 0.00010406151413917541, "step": 157840 }, { "epoch": 44.805563440249784, "grad_norm": 0.017056556418538094, "learning_rate": 5.521260289525972e-05, "loss": 0.00034971311688423155, "step": 157850 }, { "epoch": 44.80840193017315, "grad_norm": 0.7882155179977417, "learning_rate": 5.5209764405336364e-05, "loss": 0.0004647504538297653, "step": 157860 }, { "epoch": 44.81124042009651, "grad_norm": 0.15028639137744904, "learning_rate": 5.5206925915413e-05, "loss": 0.0003507031127810478, "step": 157870 }, { "epoch": 44.814078910019866, "grad_norm": 0.009083678014576435, "learning_rate": 5.5204087425489633e-05, "loss": 0.001516558602452278, "step": 157880 }, { "epoch": 44.81691739994323, "grad_norm": 0.4191237986087799, "learning_rate": 5.520124893556629e-05, "loss": 0.0029837092384696006, "step": 157890 }, { "epoch": 44.81975588986659, "grad_norm": 0.1283908188343048, "learning_rate": 5.519841044564292e-05, "loss": 0.000272226519882679, "step": 157900 }, { "epoch": 44.822594379789955, "grad_norm": 6.103508949279785, "learning_rate": 5.519557195571956e-05, "loss": 0.002320270612835884, "step": 157910 }, { "epoch": 44.82543286971331, "grad_norm": 0.01767829805612564, "learning_rate": 5.51927334657962e-05, "loss": 0.0020275263115763663, "step": 157920 }, { "epoch": 44.82827135963667, "grad_norm": 0.04949922114610672, "learning_rate": 5.5189894975872834e-05, "loss": 0.014158299565315247, "step": 157930 }, { "epoch": 44.831109849560036, "grad_norm": 0.12008839845657349, "learning_rate": 5.5187056485949475e-05, "loss": 0.010123172402381897, "step": 157940 }, { "epoch": 44.83394833948339, "grad_norm": 7.46970272064209, "learning_rate": 5.518421799602612e-05, "loss": 0.002348887920379639, "step": 157950 }, { "epoch": 44.836786829406755, "grad_norm": 3.5501139163970947, "learning_rate": 5.518137950610276e-05, "loss": 0.0009447144344449043, "step": 157960 }, { "epoch": 44.83962531933012, "grad_norm": 0.03705357015132904, "learning_rate": 5.51785410161794e-05, "loss": 0.002539428323507309, "step": 157970 }, { "epoch": 44.84246380925348, "grad_norm": 0.8227686882019043, "learning_rate": 5.5175702526256034e-05, "loss": 0.0014607677236199378, "step": 157980 }, { "epoch": 44.845302299176836, "grad_norm": 0.01894565485417843, "learning_rate": 5.5172864036332675e-05, "loss": 0.0010280922055244447, "step": 157990 }, { "epoch": 44.8481407891002, "grad_norm": 1.1416842937469482, "learning_rate": 5.517002554640931e-05, "loss": 0.0012219741940498352, "step": 158000 }, { "epoch": 44.8481407891002, "eval_accuracy": 0.9816239587969734, "eval_loss": 0.07412763684988022, "eval_runtime": 35.826, "eval_samples_per_second": 438.983, "eval_steps_per_second": 6.867, "step": 158000 }, { "epoch": 44.85097927902356, "grad_norm": 14.376980781555176, "learning_rate": 5.5167187056485944e-05, "loss": 0.007518823444843292, "step": 158010 }, { "epoch": 44.85381776894692, "grad_norm": 0.0678994208574295, "learning_rate": 5.51643485665626e-05, "loss": 0.0009030163288116455, "step": 158020 }, { "epoch": 44.85665625887028, "grad_norm": 0.8861182332038879, "learning_rate": 5.5161510076639234e-05, "loss": 0.003369784355163574, "step": 158030 }, { "epoch": 44.859494748793644, "grad_norm": 5.675867080688477, "learning_rate": 5.515867158671587e-05, "loss": 0.0011923899874091148, "step": 158040 }, { "epoch": 44.862333238717, "grad_norm": 0.027283668518066406, "learning_rate": 5.515583309679251e-05, "loss": 0.0031189698725938796, "step": 158050 }, { "epoch": 44.86517172864036, "grad_norm": 17.790983200073242, "learning_rate": 5.5152994606869144e-05, "loss": 0.017824420332908632, "step": 158060 }, { "epoch": 44.868010218563725, "grad_norm": 0.11873092502355576, "learning_rate": 5.5150156116945786e-05, "loss": 0.0009493196383118629, "step": 158070 }, { "epoch": 44.87084870848709, "grad_norm": 0.24055258929729462, "learning_rate": 5.514731762702242e-05, "loss": 0.0030713314190506935, "step": 158080 }, { "epoch": 44.873687198410444, "grad_norm": 4.027245044708252, "learning_rate": 5.514447913709907e-05, "loss": 0.0018227947875857353, "step": 158090 }, { "epoch": 44.87652568833381, "grad_norm": 0.16691860556602478, "learning_rate": 5.514164064717571e-05, "loss": 0.0009308036416769028, "step": 158100 }, { "epoch": 44.87936417825717, "grad_norm": 0.03555460274219513, "learning_rate": 5.5138802157252344e-05, "loss": 0.0013853199779987335, "step": 158110 }, { "epoch": 44.882202668180526, "grad_norm": 0.04839939624071121, "learning_rate": 5.5135963667328986e-05, "loss": 0.0022936472669243813, "step": 158120 }, { "epoch": 44.88504115810389, "grad_norm": 1.0632603168487549, "learning_rate": 5.513312517740562e-05, "loss": 0.002088184654712677, "step": 158130 }, { "epoch": 44.88787964802725, "grad_norm": 0.19796642661094666, "learning_rate": 5.5130286687482255e-05, "loss": 0.0038101322948932647, "step": 158140 }, { "epoch": 44.89071813795061, "grad_norm": 0.1636369675397873, "learning_rate": 5.512773204655124e-05, "loss": 0.012387145310640335, "step": 158150 }, { "epoch": 44.89355662787397, "grad_norm": 0.037090759724378586, "learning_rate": 5.5124893556627876e-05, "loss": 0.0028784282505512236, "step": 158160 }, { "epoch": 44.89639511779733, "grad_norm": 0.017456332221627235, "learning_rate": 5.512205506670452e-05, "loss": 0.0004145471379160881, "step": 158170 }, { "epoch": 44.899233607720696, "grad_norm": 0.01129926834255457, "learning_rate": 5.511921657678115e-05, "loss": 0.0005389453843235969, "step": 158180 }, { "epoch": 44.90207209764405, "grad_norm": 1.0940086841583252, "learning_rate": 5.511637808685779e-05, "loss": 0.004700354486703873, "step": 158190 }, { "epoch": 44.904910587567414, "grad_norm": 0.18994072079658508, "learning_rate": 5.5113539596934435e-05, "loss": 0.0036414779722690584, "step": 158200 }, { "epoch": 44.90774907749078, "grad_norm": 0.07277844846248627, "learning_rate": 5.5110701107011076e-05, "loss": 0.004773284494876862, "step": 158210 }, { "epoch": 44.91058756741413, "grad_norm": 0.024079032242298126, "learning_rate": 5.510786261708771e-05, "loss": 0.004716842994093895, "step": 158220 }, { "epoch": 44.913426057337496, "grad_norm": 2.1876108646392822, "learning_rate": 5.510502412716435e-05, "loss": 0.0057860840111970905, "step": 158230 }, { "epoch": 44.91626454726086, "grad_norm": 0.9204452037811279, "learning_rate": 5.510218563724099e-05, "loss": 0.003061116300523281, "step": 158240 }, { "epoch": 44.919103037184215, "grad_norm": 0.24316127598285675, "learning_rate": 5.509934714731763e-05, "loss": 0.0021649736911058424, "step": 158250 }, { "epoch": 44.92194152710758, "grad_norm": 4.674000263214111, "learning_rate": 5.509650865739426e-05, "loss": 0.019288113713264464, "step": 158260 }, { "epoch": 44.92478001703094, "grad_norm": 6.492913246154785, "learning_rate": 5.509367016747091e-05, "loss": 0.0023892395198345183, "step": 158270 }, { "epoch": 44.9276185069543, "grad_norm": 8.00478458404541, "learning_rate": 5.509083167754755e-05, "loss": 0.006150199845433235, "step": 158280 }, { "epoch": 44.93045699687766, "grad_norm": 0.0816565454006195, "learning_rate": 5.508799318762419e-05, "loss": 0.0015279749408364297, "step": 158290 }, { "epoch": 44.93329548680102, "grad_norm": 1.6368283033370972, "learning_rate": 5.508515469770082e-05, "loss": 0.0027935832738876345, "step": 158300 }, { "epoch": 44.936133976724385, "grad_norm": 0.5423925518989563, "learning_rate": 5.508231620777746e-05, "loss": 0.010988852381706238, "step": 158310 }, { "epoch": 44.93897246664774, "grad_norm": 0.2155109941959381, "learning_rate": 5.50794777178541e-05, "loss": 0.0016975628212094307, "step": 158320 }, { "epoch": 44.9418109565711, "grad_norm": 0.9249335527420044, "learning_rate": 5.5076639227930746e-05, "loss": 0.0045364491641521456, "step": 158330 }, { "epoch": 44.944649446494466, "grad_norm": 0.5262198448181152, "learning_rate": 5.507380073800739e-05, "loss": 0.000977524183690548, "step": 158340 }, { "epoch": 44.94748793641783, "grad_norm": 0.20779113471508026, "learning_rate": 5.507096224808402e-05, "loss": 0.007841752469539642, "step": 158350 }, { "epoch": 44.950326426341185, "grad_norm": 0.06268156319856644, "learning_rate": 5.506812375816066e-05, "loss": 0.0006134837865829467, "step": 158360 }, { "epoch": 44.95316491626455, "grad_norm": 0.04672107473015785, "learning_rate": 5.50652852682373e-05, "loss": 0.0007808653637766839, "step": 158370 }, { "epoch": 44.95600340618791, "grad_norm": 2.4784088134765625, "learning_rate": 5.506244677831394e-05, "loss": 0.00794374719262123, "step": 158380 }, { "epoch": 44.95884189611127, "grad_norm": 5.4685516357421875, "learning_rate": 5.5059608288390574e-05, "loss": 0.013189126551151276, "step": 158390 }, { "epoch": 44.96168038603463, "grad_norm": 0.05447293072938919, "learning_rate": 5.505676979846722e-05, "loss": 0.009084481745958328, "step": 158400 }, { "epoch": 44.96451887595799, "grad_norm": 0.09337880462408066, "learning_rate": 5.505393130854386e-05, "loss": 0.003589717298746109, "step": 158410 }, { "epoch": 44.96735736588135, "grad_norm": 0.037523042410612106, "learning_rate": 5.50510928186205e-05, "loss": 0.0019592909142374993, "step": 158420 }, { "epoch": 44.97019585580471, "grad_norm": 0.4049399793148041, "learning_rate": 5.504825432869713e-05, "loss": 0.01838420331478119, "step": 158430 }, { "epoch": 44.973034345728074, "grad_norm": 2.8611373901367188, "learning_rate": 5.5045415838773774e-05, "loss": 0.0009954167529940606, "step": 158440 }, { "epoch": 44.97587283565144, "grad_norm": 0.09327209740877151, "learning_rate": 5.504257734885041e-05, "loss": 0.003703735023736954, "step": 158450 }, { "epoch": 44.97871132557479, "grad_norm": 0.06555250287055969, "learning_rate": 5.503973885892705e-05, "loss": 0.010993841290473937, "step": 158460 }, { "epoch": 44.981549815498155, "grad_norm": 2.788046360015869, "learning_rate": 5.50369003690037e-05, "loss": 0.001409068889915943, "step": 158470 }, { "epoch": 44.98438830542152, "grad_norm": 6.211036682128906, "learning_rate": 5.503406187908033e-05, "loss": 0.0070623144507408146, "step": 158480 }, { "epoch": 44.987226795344874, "grad_norm": 0.12958607077598572, "learning_rate": 5.5031223389156974e-05, "loss": 0.0019913392141461374, "step": 158490 }, { "epoch": 44.99006528526824, "grad_norm": 2.955886125564575, "learning_rate": 5.502838489923361e-05, "loss": 0.0032547269016504287, "step": 158500 }, { "epoch": 44.99006528526824, "eval_accuracy": 0.982069053220576, "eval_loss": 0.07143403589725494, "eval_runtime": 36.0481, "eval_samples_per_second": 436.278, "eval_steps_per_second": 6.824, "step": 158500 }, { "epoch": 44.9929037751916, "grad_norm": 1.5401824712753296, "learning_rate": 5.502554640931025e-05, "loss": 0.00048012249171733854, "step": 158510 }, { "epoch": 44.995742265114956, "grad_norm": 1.3361990451812744, "learning_rate": 5.5022707919386885e-05, "loss": 0.0021795235574245454, "step": 158520 }, { "epoch": 44.99858075503832, "grad_norm": 0.06571158766746521, "learning_rate": 5.501986942946353e-05, "loss": 0.0010855842381715774, "step": 158530 }, { "epoch": 45.00141924496168, "grad_norm": 3.6269891262054443, "learning_rate": 5.501703093954017e-05, "loss": 0.0036652550101280213, "step": 158540 }, { "epoch": 45.004257734885044, "grad_norm": 0.3940187990665436, "learning_rate": 5.501419244961681e-05, "loss": 0.003363274037837982, "step": 158550 }, { "epoch": 45.0070962248084, "grad_norm": 0.6823458075523376, "learning_rate": 5.501135395969344e-05, "loss": 0.0029423946514725687, "step": 158560 }, { "epoch": 45.00993471473176, "grad_norm": 0.007262636441737413, "learning_rate": 5.5008515469770085e-05, "loss": 0.00035716630518436434, "step": 158570 }, { "epoch": 45.012773204655126, "grad_norm": 4.744016170501709, "learning_rate": 5.500567697984672e-05, "loss": 0.0020218992605805395, "step": 158580 }, { "epoch": 45.01561169457848, "grad_norm": 0.1051047071814537, "learning_rate": 5.500283848992336e-05, "loss": 0.0004638319835066795, "step": 158590 }, { "epoch": 45.018450184501845, "grad_norm": 0.006925339810550213, "learning_rate": 5.500000000000001e-05, "loss": 0.000344439223408699, "step": 158600 }, { "epoch": 45.02128867442521, "grad_norm": 0.03700825572013855, "learning_rate": 5.499716151007664e-05, "loss": 0.0007423475384712219, "step": 158610 }, { "epoch": 45.02412716434856, "grad_norm": 0.013713554479181767, "learning_rate": 5.4994323020153285e-05, "loss": 0.0005368689075112343, "step": 158620 }, { "epoch": 45.026965654271926, "grad_norm": 0.9647694826126099, "learning_rate": 5.499148453022992e-05, "loss": 0.0006979577243328095, "step": 158630 }, { "epoch": 45.02980414419529, "grad_norm": 0.0095977196469903, "learning_rate": 5.498864604030656e-05, "loss": 0.00071562509983778, "step": 158640 }, { "epoch": 45.03264263411865, "grad_norm": 0.049811020493507385, "learning_rate": 5.4985807550383195e-05, "loss": 0.000209987536072731, "step": 158650 }, { "epoch": 45.03548112404201, "grad_norm": 2.98647403717041, "learning_rate": 5.4982969060459843e-05, "loss": 0.0030856898054480554, "step": 158660 }, { "epoch": 45.03831961396537, "grad_norm": 0.010086371563374996, "learning_rate": 5.498013057053648e-05, "loss": 0.00014455337077379227, "step": 158670 }, { "epoch": 45.04115810388873, "grad_norm": 0.02149253338575363, "learning_rate": 5.497729208061312e-05, "loss": 0.0005693808197975159, "step": 158680 }, { "epoch": 45.04399659381209, "grad_norm": 0.5937157869338989, "learning_rate": 5.4974453590689754e-05, "loss": 0.0005500907078385353, "step": 158690 }, { "epoch": 45.04683508373545, "grad_norm": 0.025938989594578743, "learning_rate": 5.4971615100766395e-05, "loss": 0.00022723600268363952, "step": 158700 }, { "epoch": 45.049673573658815, "grad_norm": 0.042126309126615524, "learning_rate": 5.496877661084303e-05, "loss": 0.0008861796930432319, "step": 158710 }, { "epoch": 45.05251206358217, "grad_norm": 0.09170858561992645, "learning_rate": 5.496593812091967e-05, "loss": 0.0010108461603522301, "step": 158720 }, { "epoch": 45.055350553505534, "grad_norm": 0.017943045124411583, "learning_rate": 5.496309963099632e-05, "loss": 0.006465678662061691, "step": 158730 }, { "epoch": 45.0581890434289, "grad_norm": 1.1744251251220703, "learning_rate": 5.4960261141072954e-05, "loss": 0.0008592236787080765, "step": 158740 }, { "epoch": 45.06102753335226, "grad_norm": 0.007669487502425909, "learning_rate": 5.4957422651149595e-05, "loss": 0.0003159128129482269, "step": 158750 }, { "epoch": 45.063866023275615, "grad_norm": 0.3053285479545593, "learning_rate": 5.495458416122623e-05, "loss": 0.0001563137397170067, "step": 158760 }, { "epoch": 45.06670451319898, "grad_norm": 0.2551368772983551, "learning_rate": 5.4951745671302865e-05, "loss": 0.0010043496266007423, "step": 158770 }, { "epoch": 45.06954300312234, "grad_norm": 0.033774927258491516, "learning_rate": 5.4948907181379506e-05, "loss": 0.0020836297422647475, "step": 158780 }, { "epoch": 45.0723814930457, "grad_norm": 0.02801787480711937, "learning_rate": 5.494606869145614e-05, "loss": 0.002918172441422939, "step": 158790 }, { "epoch": 45.07521998296906, "grad_norm": 0.014567938633263111, "learning_rate": 5.494323020153279e-05, "loss": 0.0027493184432387354, "step": 158800 }, { "epoch": 45.07805847289242, "grad_norm": 0.004875526763498783, "learning_rate": 5.494039171160943e-05, "loss": 0.006837742030620575, "step": 158810 }, { "epoch": 45.080896962815785, "grad_norm": 0.013610155321657658, "learning_rate": 5.4937553221686065e-05, "loss": 0.00027219820767641065, "step": 158820 }, { "epoch": 45.08373545273914, "grad_norm": 0.0276426263153553, "learning_rate": 5.4934714731762706e-05, "loss": 0.0011455938220024109, "step": 158830 }, { "epoch": 45.086573942662504, "grad_norm": 0.07069926708936691, "learning_rate": 5.493187624183934e-05, "loss": 0.000499003566801548, "step": 158840 }, { "epoch": 45.08941243258587, "grad_norm": 0.006240655668079853, "learning_rate": 5.492903775191598e-05, "loss": 0.0058745771646499636, "step": 158850 }, { "epoch": 45.09225092250922, "grad_norm": 4.090603351593018, "learning_rate": 5.492619926199263e-05, "loss": 0.002246329188346863, "step": 158860 }, { "epoch": 45.095089412432586, "grad_norm": 0.07075213640928268, "learning_rate": 5.4923360772069265e-05, "loss": 0.0017322417348623275, "step": 158870 }, { "epoch": 45.09792790235595, "grad_norm": 0.0548449344933033, "learning_rate": 5.4920522282145906e-05, "loss": 0.0023708853870630263, "step": 158880 }, { "epoch": 45.100766392279304, "grad_norm": 0.022027941420674324, "learning_rate": 5.491768379222254e-05, "loss": 0.0006109571084380149, "step": 158890 }, { "epoch": 45.10360488220267, "grad_norm": 0.02078745886683464, "learning_rate": 5.4914845302299175e-05, "loss": 0.0014784859493374824, "step": 158900 }, { "epoch": 45.10644337212603, "grad_norm": 0.012250973843038082, "learning_rate": 5.491200681237582e-05, "loss": 0.002907549776136875, "step": 158910 }, { "epoch": 45.10928186204939, "grad_norm": 0.5209750533103943, "learning_rate": 5.490916832245245e-05, "loss": 0.00040853600949048997, "step": 158920 }, { "epoch": 45.11212035197275, "grad_norm": 15.864836692810059, "learning_rate": 5.49063298325291e-05, "loss": 0.015370750427246093, "step": 158930 }, { "epoch": 45.11495884189611, "grad_norm": 0.5350141525268555, "learning_rate": 5.490349134260574e-05, "loss": 0.0019713129848241808, "step": 158940 }, { "epoch": 45.117797331819474, "grad_norm": 0.5496970415115356, "learning_rate": 5.4900652852682376e-05, "loss": 0.006469304114580155, "step": 158950 }, { "epoch": 45.12063582174283, "grad_norm": 0.12696009874343872, "learning_rate": 5.489781436275902e-05, "loss": 0.00026617106050252914, "step": 158960 }, { "epoch": 45.12347431166619, "grad_norm": 0.06399170309305191, "learning_rate": 5.489497587283565e-05, "loss": 0.0004396436735987663, "step": 158970 }, { "epoch": 45.126312801589556, "grad_norm": 0.20445826649665833, "learning_rate": 5.489213738291229e-05, "loss": 0.0033467821776866913, "step": 158980 }, { "epoch": 45.12915129151291, "grad_norm": 0.10445437580347061, "learning_rate": 5.488929889298893e-05, "loss": 0.0020018452778458595, "step": 158990 }, { "epoch": 45.131989781436275, "grad_norm": 0.09223033487796783, "learning_rate": 5.4886460403065576e-05, "loss": 0.0012113170698285103, "step": 159000 }, { "epoch": 45.131989781436275, "eval_accuracy": 0.9802250906085077, "eval_loss": 0.07288016378879547, "eval_runtime": 39.6479, "eval_samples_per_second": 396.667, "eval_steps_per_second": 6.205, "step": 159000 }, { "epoch": 45.13482827135964, "grad_norm": 0.12784628570079803, "learning_rate": 5.488362191314221e-05, "loss": 0.0007437108084559441, "step": 159010 }, { "epoch": 45.137666761283, "grad_norm": 0.08743168413639069, "learning_rate": 5.488078342321885e-05, "loss": 0.0017359724268317222, "step": 159020 }, { "epoch": 45.140505251206356, "grad_norm": 0.19199925661087036, "learning_rate": 5.4877944933295486e-05, "loss": 0.00029882024973630904, "step": 159030 }, { "epoch": 45.14334374112972, "grad_norm": 2.1468868255615234, "learning_rate": 5.487510644337213e-05, "loss": 0.0012004191055893898, "step": 159040 }, { "epoch": 45.14618223105308, "grad_norm": 0.023817867040634155, "learning_rate": 5.487226795344876e-05, "loss": 5.860719829797745e-05, "step": 159050 }, { "epoch": 45.14902072097644, "grad_norm": 0.4027917683124542, "learning_rate": 5.486942946352541e-05, "loss": 0.0013109678402543068, "step": 159060 }, { "epoch": 45.1518592108998, "grad_norm": 0.01844888925552368, "learning_rate": 5.486659097360205e-05, "loss": 0.00038837920874357224, "step": 159070 }, { "epoch": 45.154697700823164, "grad_norm": 0.0525038056075573, "learning_rate": 5.4863752483678686e-05, "loss": 0.0022150341421365736, "step": 159080 }, { "epoch": 45.15753619074652, "grad_norm": 0.2339223325252533, "learning_rate": 5.486091399375533e-05, "loss": 0.0008624130859971046, "step": 159090 }, { "epoch": 45.16037468066988, "grad_norm": 0.04866651073098183, "learning_rate": 5.485807550383196e-05, "loss": 0.001260950230062008, "step": 159100 }, { "epoch": 45.163213170593245, "grad_norm": 0.7890387177467346, "learning_rate": 5.48552370139086e-05, "loss": 0.0008266733959317208, "step": 159110 }, { "epoch": 45.16605166051661, "grad_norm": 0.012820526957511902, "learning_rate": 5.485239852398524e-05, "loss": 0.0013058794662356376, "step": 159120 }, { "epoch": 45.168890150439964, "grad_norm": 0.3399050831794739, "learning_rate": 5.4849560034061886e-05, "loss": 0.00038384795188903806, "step": 159130 }, { "epoch": 45.17172864036333, "grad_norm": 0.5349472761154175, "learning_rate": 5.484672154413852e-05, "loss": 0.0005605228245258332, "step": 159140 }, { "epoch": 45.17456713028669, "grad_norm": 0.053823892027139664, "learning_rate": 5.484388305421516e-05, "loss": 0.007832657545804977, "step": 159150 }, { "epoch": 45.177405620210045, "grad_norm": 1.7454737424850464, "learning_rate": 5.48410445642918e-05, "loss": 0.0032148323953151703, "step": 159160 }, { "epoch": 45.18024411013341, "grad_norm": 0.07120741903781891, "learning_rate": 5.483820607436844e-05, "loss": 0.0013906853273510933, "step": 159170 }, { "epoch": 45.18308260005677, "grad_norm": 0.01058767456561327, "learning_rate": 5.483536758444507e-05, "loss": 0.0004082836210727692, "step": 159180 }, { "epoch": 45.185921089980134, "grad_norm": 0.03741321340203285, "learning_rate": 5.4832529094521714e-05, "loss": 0.0007336528971791268, "step": 159190 }, { "epoch": 45.18875957990349, "grad_norm": 0.19816653430461884, "learning_rate": 5.482969060459836e-05, "loss": 0.0002834135666489601, "step": 159200 }, { "epoch": 45.19159806982685, "grad_norm": 0.012456159107387066, "learning_rate": 5.4826852114675e-05, "loss": 0.005797814950346947, "step": 159210 }, { "epoch": 45.194436559750216, "grad_norm": 0.02514851838350296, "learning_rate": 5.482401362475164e-05, "loss": 0.0023755472153425216, "step": 159220 }, { "epoch": 45.19727504967357, "grad_norm": 0.06590291857719421, "learning_rate": 5.482117513482827e-05, "loss": 0.0013220066204667092, "step": 159230 }, { "epoch": 45.200113539596934, "grad_norm": 1.5153616666793823, "learning_rate": 5.481833664490491e-05, "loss": 0.0049925088882446286, "step": 159240 }, { "epoch": 45.2029520295203, "grad_norm": 0.11178610473871231, "learning_rate": 5.481549815498155e-05, "loss": 0.0009141361340880394, "step": 159250 }, { "epoch": 45.20579051944365, "grad_norm": 1.4851499795913696, "learning_rate": 5.48126596650582e-05, "loss": 0.0044053617864847185, "step": 159260 }, { "epoch": 45.208629009367016, "grad_norm": 0.0445651039481163, "learning_rate": 5.480982117513483e-05, "loss": 0.0024019630625844004, "step": 159270 }, { "epoch": 45.21146749929038, "grad_norm": 0.4776557981967926, "learning_rate": 5.480698268521147e-05, "loss": 0.005981573462486267, "step": 159280 }, { "epoch": 45.21430598921374, "grad_norm": 0.029828757047653198, "learning_rate": 5.480414419528811e-05, "loss": 0.002338174730539322, "step": 159290 }, { "epoch": 45.2171444791371, "grad_norm": 0.5894966721534729, "learning_rate": 5.480130570536475e-05, "loss": 0.0012537574395537377, "step": 159300 }, { "epoch": 45.21998296906046, "grad_norm": 0.08537449687719345, "learning_rate": 5.4798467215441384e-05, "loss": 0.004897443950176239, "step": 159310 }, { "epoch": 45.22282145898382, "grad_norm": 0.6169788241386414, "learning_rate": 5.4795628725518025e-05, "loss": 0.007334719598293305, "step": 159320 }, { "epoch": 45.22565994890718, "grad_norm": 2.0479564666748047, "learning_rate": 5.479279023559467e-05, "loss": 0.011660800874233246, "step": 159330 }, { "epoch": 45.22849843883054, "grad_norm": 0.1357361525297165, "learning_rate": 5.478995174567131e-05, "loss": 0.0017374742776155472, "step": 159340 }, { "epoch": 45.231336928753905, "grad_norm": 1.3424798250198364, "learning_rate": 5.478711325574795e-05, "loss": 0.0016519224271178245, "step": 159350 }, { "epoch": 45.23417541867726, "grad_norm": 0.0357898585498333, "learning_rate": 5.4784274765824584e-05, "loss": 0.0016668537631630898, "step": 159360 }, { "epoch": 45.23701390860062, "grad_norm": 0.6878849267959595, "learning_rate": 5.478143627590122e-05, "loss": 0.0009658211842179298, "step": 159370 }, { "epoch": 45.239852398523986, "grad_norm": 0.0714469701051712, "learning_rate": 5.477859778597786e-05, "loss": 0.013088533282279968, "step": 159380 }, { "epoch": 45.24269088844735, "grad_norm": 0.014806711114943027, "learning_rate": 5.4775759296054494e-05, "loss": 0.0007658546790480613, "step": 159390 }, { "epoch": 45.245529378370705, "grad_norm": 0.0022192769683897495, "learning_rate": 5.477292080613114e-05, "loss": 0.00509309284389019, "step": 159400 }, { "epoch": 45.24836786829407, "grad_norm": 0.22753405570983887, "learning_rate": 5.4770082316207784e-05, "loss": 0.0023132363334298135, "step": 159410 }, { "epoch": 45.25120635821743, "grad_norm": 0.15086865425109863, "learning_rate": 5.476724382628442e-05, "loss": 0.0008806295692920685, "step": 159420 }, { "epoch": 45.254044848140786, "grad_norm": 0.00883480254560709, "learning_rate": 5.476440533636106e-05, "loss": 0.0027333350852131845, "step": 159430 }, { "epoch": 45.25688333806415, "grad_norm": 0.03567882999777794, "learning_rate": 5.4761566846437694e-05, "loss": 0.005340202897787094, "step": 159440 }, { "epoch": 45.25972182798751, "grad_norm": 0.1933916211128235, "learning_rate": 5.4758728356514336e-05, "loss": 0.001819242164492607, "step": 159450 }, { "epoch": 45.26256031791087, "grad_norm": 0.013094644993543625, "learning_rate": 5.4755889866590984e-05, "loss": 0.000857461430132389, "step": 159460 }, { "epoch": 45.26539880783423, "grad_norm": 0.0011658314615488052, "learning_rate": 5.475305137666762e-05, "loss": 0.004725193977355957, "step": 159470 }, { "epoch": 45.268237297757594, "grad_norm": 0.10210270434617996, "learning_rate": 5.475021288674425e-05, "loss": 0.0007044197991490364, "step": 159480 }, { "epoch": 45.27107578768096, "grad_norm": 0.22514928877353668, "learning_rate": 5.4747374396820895e-05, "loss": 0.000345599465072155, "step": 159490 }, { "epoch": 45.27391427760431, "grad_norm": 0.008252253755927086, "learning_rate": 5.474453590689753e-05, "loss": 0.004051699116826057, "step": 159500 }, { "epoch": 45.27391427760431, "eval_accuracy": 0.9825141476441788, "eval_loss": 0.06826258450746536, "eval_runtime": 41.5487, "eval_samples_per_second": 378.52, "eval_steps_per_second": 5.921, "step": 159500 }, { "epoch": 45.276752767527675, "grad_norm": 0.0603395514190197, "learning_rate": 5.474169741697417e-05, "loss": 0.0002572638913989067, "step": 159510 }, { "epoch": 45.27959125745104, "grad_norm": 0.08421454578638077, "learning_rate": 5.4738858927050805e-05, "loss": 0.0007293751463294029, "step": 159520 }, { "epoch": 45.282429747374394, "grad_norm": 0.08200055360794067, "learning_rate": 5.473602043712745e-05, "loss": 0.0012642854824662208, "step": 159530 }, { "epoch": 45.28526823729776, "grad_norm": 0.31261903047561646, "learning_rate": 5.4733181947204095e-05, "loss": 0.0002381870523095131, "step": 159540 }, { "epoch": 45.28810672722112, "grad_norm": 0.3010736405849457, "learning_rate": 5.473034345728073e-05, "loss": 0.0004101654514670372, "step": 159550 }, { "epoch": 45.29094521714448, "grad_norm": 0.20040249824523926, "learning_rate": 5.472750496735737e-05, "loss": 0.0007758298888802528, "step": 159560 }, { "epoch": 45.29378370706784, "grad_norm": 0.0681283250451088, "learning_rate": 5.4724666477434005e-05, "loss": 0.0006276421248912811, "step": 159570 }, { "epoch": 45.2966221969912, "grad_norm": 0.3151954412460327, "learning_rate": 5.472182798751064e-05, "loss": 0.0006993034854531288, "step": 159580 }, { "epoch": 45.299460686914564, "grad_norm": 0.2854955196380615, "learning_rate": 5.471898949758728e-05, "loss": 0.0003900663927197456, "step": 159590 }, { "epoch": 45.30229917683792, "grad_norm": 0.05185122415423393, "learning_rate": 5.471615100766393e-05, "loss": 0.0001521209254860878, "step": 159600 }, { "epoch": 45.30513766676128, "grad_norm": 0.06733708083629608, "learning_rate": 5.4713312517740564e-05, "loss": 0.00011324547231197357, "step": 159610 }, { "epoch": 45.307976156684646, "grad_norm": 0.014716721139848232, "learning_rate": 5.4710474027817205e-05, "loss": 0.00010806825011968612, "step": 159620 }, { "epoch": 45.310814646608, "grad_norm": 1.5006113052368164, "learning_rate": 5.470763553789384e-05, "loss": 0.0023533230647444724, "step": 159630 }, { "epoch": 45.313653136531364, "grad_norm": 11.903341293334961, "learning_rate": 5.470479704797048e-05, "loss": 0.0029412990435957907, "step": 159640 }, { "epoch": 45.31649162645473, "grad_norm": 0.4520995020866394, "learning_rate": 5.4701958558047116e-05, "loss": 0.0003512879833579063, "step": 159650 }, { "epoch": 45.31933011637809, "grad_norm": 0.027372311800718307, "learning_rate": 5.4699120068123764e-05, "loss": 0.0001512715592980385, "step": 159660 }, { "epoch": 45.322168606301446, "grad_norm": 0.08951181918382645, "learning_rate": 5.4696281578200405e-05, "loss": 0.0005681587383151055, "step": 159670 }, { "epoch": 45.32500709622481, "grad_norm": 0.012195337563753128, "learning_rate": 5.469344308827704e-05, "loss": 0.0006885763257741928, "step": 159680 }, { "epoch": 45.32784558614817, "grad_norm": 0.14791397750377655, "learning_rate": 5.469060459835368e-05, "loss": 0.0038163334131240843, "step": 159690 }, { "epoch": 45.33068407607153, "grad_norm": 0.1689176857471466, "learning_rate": 5.4687766108430316e-05, "loss": 0.00031511168926954267, "step": 159700 }, { "epoch": 45.33352256599489, "grad_norm": 0.24008643627166748, "learning_rate": 5.468492761850695e-05, "loss": 0.0045428119599819185, "step": 159710 }, { "epoch": 45.33636105591825, "grad_norm": 0.03301669657230377, "learning_rate": 5.468208912858359e-05, "loss": 0.005124498903751373, "step": 159720 }, { "epoch": 45.33919954584161, "grad_norm": 0.07376495748758316, "learning_rate": 5.467925063866024e-05, "loss": 0.010830549895763398, "step": 159730 }, { "epoch": 45.34203803576497, "grad_norm": 0.03725498914718628, "learning_rate": 5.4676412148736875e-05, "loss": 0.0004223920404911041, "step": 159740 }, { "epoch": 45.344876525688335, "grad_norm": 0.05911104381084442, "learning_rate": 5.4673573658813516e-05, "loss": 0.00047551523894071577, "step": 159750 }, { "epoch": 45.3477150156117, "grad_norm": 0.2949197292327881, "learning_rate": 5.467073516889015e-05, "loss": 0.0023817030712962152, "step": 159760 }, { "epoch": 45.35055350553505, "grad_norm": 0.014132639393210411, "learning_rate": 5.466789667896679e-05, "loss": 0.0025226324796676636, "step": 159770 }, { "epoch": 45.353391995458416, "grad_norm": 0.21063174307346344, "learning_rate": 5.466505818904343e-05, "loss": 0.0036691997200250627, "step": 159780 }, { "epoch": 45.35623048538178, "grad_norm": 0.020979560911655426, "learning_rate": 5.466221969912007e-05, "loss": 0.0007521629333496094, "step": 159790 }, { "epoch": 45.359068975305135, "grad_norm": 0.023915927857160568, "learning_rate": 5.4659381209196716e-05, "loss": 0.013146118819713592, "step": 159800 }, { "epoch": 45.3619074652285, "grad_norm": 0.03375186026096344, "learning_rate": 5.465654271927335e-05, "loss": 0.004859503358602524, "step": 159810 }, { "epoch": 45.36474595515186, "grad_norm": 0.008840242400765419, "learning_rate": 5.465370422934999e-05, "loss": 0.0003935873508453369, "step": 159820 }, { "epoch": 45.36758444507522, "grad_norm": 0.19060902297496796, "learning_rate": 5.465086573942663e-05, "loss": 0.0006797082722187042, "step": 159830 }, { "epoch": 45.37042293499858, "grad_norm": 2.6236047744750977, "learning_rate": 5.464802724950326e-05, "loss": 0.0023095810785889624, "step": 159840 }, { "epoch": 45.37326142492194, "grad_norm": 0.05714070424437523, "learning_rate": 5.46451887595799e-05, "loss": 0.0029403289780020716, "step": 159850 }, { "epoch": 45.376099914845305, "grad_norm": 0.022211190313100815, "learning_rate": 5.464235026965655e-05, "loss": 0.0004343496635556221, "step": 159860 }, { "epoch": 45.37893840476866, "grad_norm": 0.4469107389450073, "learning_rate": 5.4639511779733185e-05, "loss": 0.010612155497074127, "step": 159870 }, { "epoch": 45.381776894692024, "grad_norm": 0.1943846344947815, "learning_rate": 5.463667328980983e-05, "loss": 0.01913638263940811, "step": 159880 }, { "epoch": 45.38461538461539, "grad_norm": 0.11628121137619019, "learning_rate": 5.463383479988646e-05, "loss": 0.004867494106292725, "step": 159890 }, { "epoch": 45.38745387453874, "grad_norm": 10.52775764465332, "learning_rate": 5.46309963099631e-05, "loss": 0.002704496495425701, "step": 159900 }, { "epoch": 45.390292364462105, "grad_norm": 10.666203498840332, "learning_rate": 5.462815782003974e-05, "loss": 0.005968395248055458, "step": 159910 }, { "epoch": 45.39313085438547, "grad_norm": 0.00374771561473608, "learning_rate": 5.462531933011638e-05, "loss": 0.00022746063768863678, "step": 159920 }, { "epoch": 45.395969344308824, "grad_norm": 0.0297396257519722, "learning_rate": 5.462248084019303e-05, "loss": 0.0020917734131217004, "step": 159930 }, { "epoch": 45.39880783423219, "grad_norm": 3.587686777114868, "learning_rate": 5.461964235026966e-05, "loss": 0.0013524999842047692, "step": 159940 }, { "epoch": 45.40164632415555, "grad_norm": 0.9327978491783142, "learning_rate": 5.4616803860346296e-05, "loss": 0.006946433335542679, "step": 159950 }, { "epoch": 45.40448481407891, "grad_norm": 3.265942335128784, "learning_rate": 5.461396537042294e-05, "loss": 0.005595095455646515, "step": 159960 }, { "epoch": 45.40732330400227, "grad_norm": 1.7020010948181152, "learning_rate": 5.461112688049957e-05, "loss": 0.005600398033857345, "step": 159970 }, { "epoch": 45.41016179392563, "grad_norm": 0.014358306303620338, "learning_rate": 5.4608288390576214e-05, "loss": 0.0014185221865773202, "step": 159980 }, { "epoch": 45.413000283848994, "grad_norm": 0.03483399376273155, "learning_rate": 5.460544990065286e-05, "loss": 0.018777211010456086, "step": 159990 }, { "epoch": 45.41583877377235, "grad_norm": 0.010769211687147617, "learning_rate": 5.4602611410729496e-05, "loss": 0.002930435165762901, "step": 160000 }, { "epoch": 45.41583877377235, "eval_accuracy": 0.9795256565142748, "eval_loss": 0.08330738544464111, "eval_runtime": 42.3239, "eval_samples_per_second": 371.587, "eval_steps_per_second": 5.812, "step": 160000 }, { "epoch": 45.41867726369571, "grad_norm": 0.4138513207435608, "learning_rate": 5.459977292080614e-05, "loss": 0.013922411203384399, "step": 160010 }, { "epoch": 45.421515753619076, "grad_norm": 0.3266136944293976, "learning_rate": 5.459693443088277e-05, "loss": 0.038823100924491885, "step": 160020 }, { "epoch": 45.42435424354244, "grad_norm": 0.32848554849624634, "learning_rate": 5.4594095940959414e-05, "loss": 0.004890777915716171, "step": 160030 }, { "epoch": 45.427192733465795, "grad_norm": 0.007401431445032358, "learning_rate": 5.459125745103605e-05, "loss": 0.003864455595612526, "step": 160040 }, { "epoch": 45.43003122338916, "grad_norm": 0.019012434408068657, "learning_rate": 5.458841896111268e-05, "loss": 0.0007590951398015022, "step": 160050 }, { "epoch": 45.43286971331252, "grad_norm": 4.653788089752197, "learning_rate": 5.458558047118934e-05, "loss": 0.0050536379218101505, "step": 160060 }, { "epoch": 45.435708203235876, "grad_norm": 0.008717099204659462, "learning_rate": 5.458274198126597e-05, "loss": 0.004290538281202317, "step": 160070 }, { "epoch": 45.43854669315924, "grad_norm": 0.12217991799116135, "learning_rate": 5.457990349134261e-05, "loss": 0.002149310149252415, "step": 160080 }, { "epoch": 45.4413851830826, "grad_norm": 0.12803369760513306, "learning_rate": 5.457706500141925e-05, "loss": 0.005326883494853973, "step": 160090 }, { "epoch": 45.44422367300596, "grad_norm": 0.19172397255897522, "learning_rate": 5.457422651149588e-05, "loss": 0.0037758514285087586, "step": 160100 }, { "epoch": 45.44706216292932, "grad_norm": 0.5447067022323608, "learning_rate": 5.4571388021572524e-05, "loss": 0.0005782881751656532, "step": 160110 }, { "epoch": 45.44990065285268, "grad_norm": 0.22636768221855164, "learning_rate": 5.456854953164916e-05, "loss": 0.0038543380796909332, "step": 160120 }, { "epoch": 45.452739142776046, "grad_norm": 0.018185067921876907, "learning_rate": 5.456571104172581e-05, "loss": 0.003885367512702942, "step": 160130 }, { "epoch": 45.4555776326994, "grad_norm": 0.13834618031978607, "learning_rate": 5.456287255180245e-05, "loss": 0.0002128053456544876, "step": 160140 }, { "epoch": 45.458416122622765, "grad_norm": 1.2770633697509766, "learning_rate": 5.456003406187908e-05, "loss": 0.007271718978881836, "step": 160150 }, { "epoch": 45.46125461254613, "grad_norm": 3.459409236907959, "learning_rate": 5.4557195571955724e-05, "loss": 0.008900448679924011, "step": 160160 }, { "epoch": 45.464093102469484, "grad_norm": 0.0454348586499691, "learning_rate": 5.455435708203236e-05, "loss": 0.0007165383547544479, "step": 160170 }, { "epoch": 45.46693159239285, "grad_norm": 0.20466019213199615, "learning_rate": 5.4551518592108994e-05, "loss": 0.0012323172762989999, "step": 160180 }, { "epoch": 45.46977008231621, "grad_norm": 0.018631748855113983, "learning_rate": 5.454868010218565e-05, "loss": 0.0020602263510227205, "step": 160190 }, { "epoch": 45.472608572239565, "grad_norm": 0.561143696308136, "learning_rate": 5.454584161226228e-05, "loss": 0.004859659820795059, "step": 160200 }, { "epoch": 45.47544706216293, "grad_norm": 0.09703049808740616, "learning_rate": 5.454300312233892e-05, "loss": 0.01368236094713211, "step": 160210 }, { "epoch": 45.47828555208629, "grad_norm": 0.1415378898382187, "learning_rate": 5.454016463241556e-05, "loss": 0.011127794533967972, "step": 160220 }, { "epoch": 45.481124042009654, "grad_norm": 0.026215840131044388, "learning_rate": 5.4537326142492194e-05, "loss": 0.00813448578119278, "step": 160230 }, { "epoch": 45.48396253193301, "grad_norm": 0.01422963384538889, "learning_rate": 5.4534487652568835e-05, "loss": 0.002725645340979099, "step": 160240 }, { "epoch": 45.48680102185637, "grad_norm": 0.1630634367465973, "learning_rate": 5.453164916264547e-05, "loss": 0.0011205917224287986, "step": 160250 }, { "epoch": 45.489639511779735, "grad_norm": 0.9303855895996094, "learning_rate": 5.452881067272212e-05, "loss": 0.005817442387342453, "step": 160260 }, { "epoch": 45.49247800170309, "grad_norm": 0.04455699026584625, "learning_rate": 5.452597218279876e-05, "loss": 0.0015698933973908424, "step": 160270 }, { "epoch": 45.495316491626454, "grad_norm": 0.18545585870742798, "learning_rate": 5.4523133692875394e-05, "loss": 0.004998447000980377, "step": 160280 }, { "epoch": 45.49815498154982, "grad_norm": 0.8331698775291443, "learning_rate": 5.4520295202952035e-05, "loss": 0.00535271167755127, "step": 160290 }, { "epoch": 45.50099347147318, "grad_norm": 0.04927339032292366, "learning_rate": 5.451745671302867e-05, "loss": 0.001363808661699295, "step": 160300 }, { "epoch": 45.503831961396536, "grad_norm": 16.07996368408203, "learning_rate": 5.451490207209765e-05, "loss": 0.0205392062664032, "step": 160310 }, { "epoch": 45.5066704513199, "grad_norm": 1.4576486349105835, "learning_rate": 5.451206358217429e-05, "loss": 0.012477782368659974, "step": 160320 }, { "epoch": 45.50950894124326, "grad_norm": 0.6226568222045898, "learning_rate": 5.4509225092250926e-05, "loss": 0.0010156651958823204, "step": 160330 }, { "epoch": 45.51234743116662, "grad_norm": 0.20980527997016907, "learning_rate": 5.450638660232757e-05, "loss": 0.000594901666045189, "step": 160340 }, { "epoch": 45.51518592108998, "grad_norm": 0.5961027145385742, "learning_rate": 5.45035481124042e-05, "loss": 0.0033671144396066667, "step": 160350 }, { "epoch": 45.51802441101334, "grad_norm": 0.009544388391077518, "learning_rate": 5.4500709622480836e-05, "loss": 0.0037752926349639893, "step": 160360 }, { "epoch": 45.5208629009367, "grad_norm": 0.009091355837881565, "learning_rate": 5.4497871132557484e-05, "loss": 0.0010108549147844314, "step": 160370 }, { "epoch": 45.52370139086006, "grad_norm": 8.388962745666504, "learning_rate": 5.4495032642634126e-05, "loss": 0.004487627744674682, "step": 160380 }, { "epoch": 45.526539880783425, "grad_norm": 0.09054858237504959, "learning_rate": 5.449219415271076e-05, "loss": 0.014982976019382477, "step": 160390 }, { "epoch": 45.52937837070679, "grad_norm": 0.23437437415122986, "learning_rate": 5.44893556627874e-05, "loss": 0.000591062568128109, "step": 160400 }, { "epoch": 45.53221686063014, "grad_norm": 0.00643211230635643, "learning_rate": 5.4486517172864036e-05, "loss": 0.0023524042218923567, "step": 160410 }, { "epoch": 45.535055350553506, "grad_norm": 0.022233547642827034, "learning_rate": 5.448367868294068e-05, "loss": 0.0004369406029582024, "step": 160420 }, { "epoch": 45.53789384047687, "grad_norm": 0.09708583354949951, "learning_rate": 5.448084019301731e-05, "loss": 0.001673959381878376, "step": 160430 }, { "epoch": 45.540732330400225, "grad_norm": 0.2653454542160034, "learning_rate": 5.447800170309396e-05, "loss": 0.004362666606903076, "step": 160440 }, { "epoch": 45.54357082032359, "grad_norm": 0.03761344775557518, "learning_rate": 5.44751632131706e-05, "loss": 0.00031910836696624756, "step": 160450 }, { "epoch": 45.54640931024695, "grad_norm": 0.6177605390548706, "learning_rate": 5.4472324723247236e-05, "loss": 0.0007839273661375046, "step": 160460 }, { "epoch": 45.549247800170306, "grad_norm": 0.0724533423781395, "learning_rate": 5.446948623332387e-05, "loss": 0.0004593070596456528, "step": 160470 }, { "epoch": 45.55208629009367, "grad_norm": 0.2024359554052353, "learning_rate": 5.446664774340051e-05, "loss": 0.0011886483058333396, "step": 160480 }, { "epoch": 45.55492478001703, "grad_norm": 0.02737051621079445, "learning_rate": 5.446380925347715e-05, "loss": 0.0002861354500055313, "step": 160490 }, { "epoch": 45.557763269940395, "grad_norm": 0.2570703327655792, "learning_rate": 5.446097076355379e-05, "loss": 0.0004037169739603996, "step": 160500 }, { "epoch": 45.557763269940395, "eval_accuracy": 0.9811152794557131, "eval_loss": 0.07470319420099258, "eval_runtime": 42.779, "eval_samples_per_second": 367.633, "eval_steps_per_second": 5.75, "step": 160500 }, { "epoch": 45.56060175986375, "grad_norm": 3.4329776763916016, "learning_rate": 5.4458132273630437e-05, "loss": 0.001296890713274479, "step": 160510 }, { "epoch": 45.563440249787114, "grad_norm": 0.008224153891205788, "learning_rate": 5.445529378370707e-05, "loss": 0.005414653569459915, "step": 160520 }, { "epoch": 45.56627873971048, "grad_norm": 0.14314314723014832, "learning_rate": 5.445245529378371e-05, "loss": 0.0010469766333699227, "step": 160530 }, { "epoch": 45.56911722963383, "grad_norm": 10.657225608825684, "learning_rate": 5.444961680386035e-05, "loss": 0.007770347595214844, "step": 160540 }, { "epoch": 45.571955719557195, "grad_norm": 1.6199439764022827, "learning_rate": 5.444677831393699e-05, "loss": 0.0005296865478157997, "step": 160550 }, { "epoch": 45.57479420948056, "grad_norm": 3.476101875305176, "learning_rate": 5.444393982401362e-05, "loss": 0.0013977708294987679, "step": 160560 }, { "epoch": 45.577632699403914, "grad_norm": 0.10407447814941406, "learning_rate": 5.444110133409027e-05, "loss": 0.0005158713087439537, "step": 160570 }, { "epoch": 45.58047118932728, "grad_norm": 0.11729846894741058, "learning_rate": 5.443826284416691e-05, "loss": 0.00047108065336942673, "step": 160580 }, { "epoch": 45.58330967925064, "grad_norm": 0.03156236186623573, "learning_rate": 5.443542435424355e-05, "loss": 0.001832774095237255, "step": 160590 }, { "epoch": 45.586148169174, "grad_norm": 0.018754499033093452, "learning_rate": 5.443258586432018e-05, "loss": 0.0006076658144593238, "step": 160600 }, { "epoch": 45.58898665909736, "grad_norm": 2.7832932472229004, "learning_rate": 5.442974737439682e-05, "loss": 0.0008824538439512253, "step": 160610 }, { "epoch": 45.59182514902072, "grad_norm": 0.14858117699623108, "learning_rate": 5.442690888447346e-05, "loss": 0.0010270977392792702, "step": 160620 }, { "epoch": 45.594663638944084, "grad_norm": 0.09265534579753876, "learning_rate": 5.44240703945501e-05, "loss": 0.00394224226474762, "step": 160630 }, { "epoch": 45.59750212886744, "grad_norm": 8.329898834228516, "learning_rate": 5.442123190462675e-05, "loss": 0.0034707106649875643, "step": 160640 }, { "epoch": 45.6003406187908, "grad_norm": 0.2868787944316864, "learning_rate": 5.441839341470338e-05, "loss": 0.00034773405641317365, "step": 160650 }, { "epoch": 45.603179108714166, "grad_norm": 0.025569641962647438, "learning_rate": 5.441555492478002e-05, "loss": 0.0006480615586042405, "step": 160660 }, { "epoch": 45.60601759863752, "grad_norm": 0.31392744183540344, "learning_rate": 5.441271643485666e-05, "loss": 0.0006338896229863167, "step": 160670 }, { "epoch": 45.608856088560884, "grad_norm": 0.14233022928237915, "learning_rate": 5.44098779449333e-05, "loss": 0.0013391880318522453, "step": 160680 }, { "epoch": 45.61169457848425, "grad_norm": 0.014867480844259262, "learning_rate": 5.4407039455009934e-05, "loss": 0.00601908341050148, "step": 160690 }, { "epoch": 45.61453306840761, "grad_norm": 0.022148558869957924, "learning_rate": 5.440420096508657e-05, "loss": 0.01145465075969696, "step": 160700 }, { "epoch": 45.617371558330966, "grad_norm": 0.7262381315231323, "learning_rate": 5.440136247516322e-05, "loss": 0.007752090692520142, "step": 160710 }, { "epoch": 45.62021004825433, "grad_norm": 0.032212115824222565, "learning_rate": 5.439852398523986e-05, "loss": 0.0006128765642642974, "step": 160720 }, { "epoch": 45.62304853817769, "grad_norm": 0.13035179674625397, "learning_rate": 5.439568549531649e-05, "loss": 0.0010563308373093606, "step": 160730 }, { "epoch": 45.62588702810105, "grad_norm": 1.110717535018921, "learning_rate": 5.4392847005393134e-05, "loss": 0.005571741983294487, "step": 160740 }, { "epoch": 45.62872551802441, "grad_norm": 0.030636638402938843, "learning_rate": 5.439000851546977e-05, "loss": 0.002362273447215557, "step": 160750 }, { "epoch": 45.63156400794777, "grad_norm": 0.16916652023792267, "learning_rate": 5.438717002554641e-05, "loss": 0.0012678075581789017, "step": 160760 }, { "epoch": 45.634402497871136, "grad_norm": 4.105142116546631, "learning_rate": 5.438433153562306e-05, "loss": 0.001983126625418663, "step": 160770 }, { "epoch": 45.63724098779449, "grad_norm": 1.0451929569244385, "learning_rate": 5.438149304569969e-05, "loss": 0.004290753602981567, "step": 160780 }, { "epoch": 45.640079477717855, "grad_norm": 0.004200618714094162, "learning_rate": 5.4378654555776334e-05, "loss": 0.0002987055107951164, "step": 160790 }, { "epoch": 45.64291796764122, "grad_norm": 0.11463065445423126, "learning_rate": 5.437581606585297e-05, "loss": 0.00620746985077858, "step": 160800 }, { "epoch": 45.64575645756457, "grad_norm": 0.12996764481067657, "learning_rate": 5.437297757592961e-05, "loss": 0.003592453896999359, "step": 160810 }, { "epoch": 45.648594947487936, "grad_norm": 0.14441248774528503, "learning_rate": 5.4370139086006245e-05, "loss": 0.0006583886221051216, "step": 160820 }, { "epoch": 45.6514334374113, "grad_norm": 0.19428227841854095, "learning_rate": 5.436730059608288e-05, "loss": 0.0010618159547448158, "step": 160830 }, { "epoch": 45.654271927334655, "grad_norm": 2.926391124725342, "learning_rate": 5.436446210615953e-05, "loss": 0.0008376507088541984, "step": 160840 }, { "epoch": 45.65711041725802, "grad_norm": 0.2699679136276245, "learning_rate": 5.436162361623617e-05, "loss": 0.0003604253754019737, "step": 160850 }, { "epoch": 45.65994890718138, "grad_norm": 2.561103343963623, "learning_rate": 5.43587851263128e-05, "loss": 0.0039489932358264925, "step": 160860 }, { "epoch": 45.66278739710474, "grad_norm": 0.04443507641553879, "learning_rate": 5.4355946636389445e-05, "loss": 0.0008452178910374642, "step": 160870 }, { "epoch": 45.6656258870281, "grad_norm": 0.37831375002861023, "learning_rate": 5.435310814646608e-05, "loss": 0.002181612141430378, "step": 160880 }, { "epoch": 45.66846437695146, "grad_norm": 0.07276986539363861, "learning_rate": 5.435026965654272e-05, "loss": 0.0009753143414855004, "step": 160890 }, { "epoch": 45.671302866874825, "grad_norm": 0.4843709468841553, "learning_rate": 5.4347431166619355e-05, "loss": 0.005910448729991913, "step": 160900 }, { "epoch": 45.67414135679818, "grad_norm": 0.019349416717886925, "learning_rate": 5.4344592676696003e-05, "loss": 0.0028214765712618827, "step": 160910 }, { "epoch": 45.676979846721544, "grad_norm": 0.11619032919406891, "learning_rate": 5.4341754186772645e-05, "loss": 0.0005098318681120873, "step": 160920 }, { "epoch": 45.67981833664491, "grad_norm": 1.2402803897857666, "learning_rate": 5.433891569684928e-05, "loss": 0.0003803787752985954, "step": 160930 }, { "epoch": 45.68265682656826, "grad_norm": 0.04172055050730705, "learning_rate": 5.4336077206925914e-05, "loss": 0.0008201997727155686, "step": 160940 }, { "epoch": 45.685495316491625, "grad_norm": 0.008072702214121819, "learning_rate": 5.4333238717002555e-05, "loss": 0.008291099965572358, "step": 160950 }, { "epoch": 45.68833380641499, "grad_norm": 0.3931308388710022, "learning_rate": 5.433040022707919e-05, "loss": 0.0010797731578350068, "step": 160960 }, { "epoch": 45.69117229633835, "grad_norm": 0.00695434957742691, "learning_rate": 5.432756173715584e-05, "loss": 0.0009727878496050835, "step": 160970 }, { "epoch": 45.69401078626171, "grad_norm": 0.053480926901102066, "learning_rate": 5.432472324723248e-05, "loss": 0.001528889499604702, "step": 160980 }, { "epoch": 45.69684927618507, "grad_norm": 0.27113837003707886, "learning_rate": 5.4321884757309114e-05, "loss": 0.001055908203125, "step": 160990 }, { "epoch": 45.69968776610843, "grad_norm": 0.22890038788318634, "learning_rate": 5.4319046267385755e-05, "loss": 0.00876677930355072, "step": 161000 }, { "epoch": 45.69968776610843, "eval_accuracy": 0.981814713549946, "eval_loss": 0.0753539651632309, "eval_runtime": 46.1471, "eval_samples_per_second": 340.801, "eval_steps_per_second": 5.331, "step": 161000 }, { "epoch": 45.70252625603179, "grad_norm": 0.11232751607894897, "learning_rate": 5.431620777746239e-05, "loss": 0.010948166251182556, "step": 161010 }, { "epoch": 45.70536474595515, "grad_norm": 0.15846073627471924, "learning_rate": 5.431336928753903e-05, "loss": 0.0025597324594855307, "step": 161020 }, { "epoch": 45.708203235878514, "grad_norm": 0.07860668003559113, "learning_rate": 5.4310530797615666e-05, "loss": 0.0030417928472161295, "step": 161030 }, { "epoch": 45.71104172580187, "grad_norm": 0.09012950211763382, "learning_rate": 5.4307692307692314e-05, "loss": 0.0012717470526695251, "step": 161040 }, { "epoch": 45.71388021572523, "grad_norm": 0.32993581891059875, "learning_rate": 5.4304853817768956e-05, "loss": 0.0007789699360728264, "step": 161050 }, { "epoch": 45.716718705648596, "grad_norm": 0.008312270045280457, "learning_rate": 5.430201532784559e-05, "loss": 0.0009114598855376243, "step": 161060 }, { "epoch": 45.71955719557196, "grad_norm": 0.009127099066972733, "learning_rate": 5.4299176837922225e-05, "loss": 0.0002559209242463112, "step": 161070 }, { "epoch": 45.722395685495314, "grad_norm": 0.03797849267721176, "learning_rate": 5.4296338347998866e-05, "loss": 0.00017702076584100724, "step": 161080 }, { "epoch": 45.72523417541868, "grad_norm": 0.12931275367736816, "learning_rate": 5.42934998580755e-05, "loss": 0.0007182994857430458, "step": 161090 }, { "epoch": 45.72807266534204, "grad_norm": 0.045076657086610794, "learning_rate": 5.429066136815214e-05, "loss": 0.002316676639020443, "step": 161100 }, { "epoch": 45.730911155265396, "grad_norm": 0.006203007884323597, "learning_rate": 5.428782287822879e-05, "loss": 0.002521984279155731, "step": 161110 }, { "epoch": 45.73374964518876, "grad_norm": 0.20286677777767181, "learning_rate": 5.4284984388305425e-05, "loss": 0.0008687403053045273, "step": 161120 }, { "epoch": 45.73658813511212, "grad_norm": 0.014532710425555706, "learning_rate": 5.4282145898382066e-05, "loss": 0.0004468921571969986, "step": 161130 }, { "epoch": 45.739426625035485, "grad_norm": 0.07575029879808426, "learning_rate": 5.42793074084587e-05, "loss": 0.0010754156857728959, "step": 161140 }, { "epoch": 45.74226511495884, "grad_norm": 0.09936939179897308, "learning_rate": 5.427646891853534e-05, "loss": 0.00036323238164186477, "step": 161150 }, { "epoch": 45.7451036048822, "grad_norm": 0.06546441465616226, "learning_rate": 5.427363042861198e-05, "loss": 0.00014611538499593736, "step": 161160 }, { "epoch": 45.747942094805566, "grad_norm": 0.20018723607063293, "learning_rate": 5.4270791938688625e-05, "loss": 0.0005893416702747345, "step": 161170 }, { "epoch": 45.75078058472892, "grad_norm": 0.0059152222238481045, "learning_rate": 5.4267953448765266e-05, "loss": 0.000516241230070591, "step": 161180 }, { "epoch": 45.753619074652285, "grad_norm": 0.45734086632728577, "learning_rate": 5.42651149588419e-05, "loss": 0.00047994982451200486, "step": 161190 }, { "epoch": 45.75645756457565, "grad_norm": 0.019337201490998268, "learning_rate": 5.4262276468918536e-05, "loss": 0.0033423319458961488, "step": 161200 }, { "epoch": 45.759296054499, "grad_norm": 0.7782332301139832, "learning_rate": 5.425943797899518e-05, "loss": 0.009949520975351334, "step": 161210 }, { "epoch": 45.762134544422366, "grad_norm": 0.04163959249854088, "learning_rate": 5.425659948907181e-05, "loss": 0.012414542585611343, "step": 161220 }, { "epoch": 45.76497303434573, "grad_norm": 0.016806986182928085, "learning_rate": 5.425376099914845e-05, "loss": 0.0004386408254504204, "step": 161230 }, { "epoch": 45.76781152426909, "grad_norm": 0.0695587769150734, "learning_rate": 5.42509225092251e-05, "loss": 0.006502444297075272, "step": 161240 }, { "epoch": 45.77065001419245, "grad_norm": 0.01322184782475233, "learning_rate": 5.4248084019301736e-05, "loss": 0.0005355760455131531, "step": 161250 }, { "epoch": 45.77348850411581, "grad_norm": 3.1732470989227295, "learning_rate": 5.424524552937838e-05, "loss": 0.002751076407730579, "step": 161260 }, { "epoch": 45.776326994039174, "grad_norm": 0.15304240584373474, "learning_rate": 5.424240703945501e-05, "loss": 0.0023474736139178277, "step": 161270 }, { "epoch": 45.77916548396253, "grad_norm": 0.07995710521936417, "learning_rate": 5.423956854953165e-05, "loss": 0.0021915614604949953, "step": 161280 }, { "epoch": 45.78200397388589, "grad_norm": 0.27569958567619324, "learning_rate": 5.423673005960829e-05, "loss": 0.000987621769309044, "step": 161290 }, { "epoch": 45.784842463809255, "grad_norm": 0.023329539224505424, "learning_rate": 5.423389156968492e-05, "loss": 0.0013782398775219918, "step": 161300 }, { "epoch": 45.78768095373261, "grad_norm": 0.01663055643439293, "learning_rate": 5.423105307976157e-05, "loss": 0.0010583680123090744, "step": 161310 }, { "epoch": 45.790519443655974, "grad_norm": 0.27953040599823, "learning_rate": 5.422821458983821e-05, "loss": 0.0027317523956298826, "step": 161320 }, { "epoch": 45.79335793357934, "grad_norm": 9.166824340820312, "learning_rate": 5.4225376099914846e-05, "loss": 0.008585377037525177, "step": 161330 }, { "epoch": 45.7961964235027, "grad_norm": 0.04476873576641083, "learning_rate": 5.422253760999149e-05, "loss": 0.002188659645617008, "step": 161340 }, { "epoch": 45.799034913426055, "grad_norm": 0.08611876517534256, "learning_rate": 5.421969912006812e-05, "loss": 0.0023712176829576493, "step": 161350 }, { "epoch": 45.80187340334942, "grad_norm": 0.06918346881866455, "learning_rate": 5.4216860630144764e-05, "loss": 0.00702894926071167, "step": 161360 }, { "epoch": 45.80471189327278, "grad_norm": 0.034656934440135956, "learning_rate": 5.421402214022141e-05, "loss": 0.001539813168346882, "step": 161370 }, { "epoch": 45.80755038319614, "grad_norm": 0.28998515009880066, "learning_rate": 5.4211183650298046e-05, "loss": 0.003569912165403366, "step": 161380 }, { "epoch": 45.8103888731195, "grad_norm": 0.06117420271039009, "learning_rate": 5.420834516037469e-05, "loss": 0.0004751678556203842, "step": 161390 }, { "epoch": 45.81322736304286, "grad_norm": 0.14613409340381622, "learning_rate": 5.420550667045132e-05, "loss": 0.002525460533797741, "step": 161400 }, { "epoch": 45.81606585296622, "grad_norm": 0.037351902574300766, "learning_rate": 5.420266818052796e-05, "loss": 0.0003976438194513321, "step": 161410 }, { "epoch": 45.81890434288958, "grad_norm": 0.005813698284327984, "learning_rate": 5.41998296906046e-05, "loss": 0.0008904740214347839, "step": 161420 }, { "epoch": 45.821742832812944, "grad_norm": 0.036375533789396286, "learning_rate": 5.419699120068123e-05, "loss": 0.00022082962095737457, "step": 161430 }, { "epoch": 45.82458132273631, "grad_norm": 0.3367425501346588, "learning_rate": 5.419415271075788e-05, "loss": 0.005597367137670517, "step": 161440 }, { "epoch": 45.82741981265966, "grad_norm": 0.01766126975417137, "learning_rate": 5.419131422083452e-05, "loss": 0.0071509875357151035, "step": 161450 }, { "epoch": 45.830258302583026, "grad_norm": 0.1299835443496704, "learning_rate": 5.418847573091116e-05, "loss": 0.008022312074899673, "step": 161460 }, { "epoch": 45.83309679250639, "grad_norm": 0.07125413417816162, "learning_rate": 5.41856372409878e-05, "loss": 0.0025847265496850014, "step": 161470 }, { "epoch": 45.835935282429745, "grad_norm": 0.4109669029712677, "learning_rate": 5.418279875106443e-05, "loss": 0.0011785870417952537, "step": 161480 }, { "epoch": 45.83877377235311, "grad_norm": 0.02785283327102661, "learning_rate": 5.4179960261141074e-05, "loss": 0.0022473067045211794, "step": 161490 }, { "epoch": 45.84161226227647, "grad_norm": 0.010782374069094658, "learning_rate": 5.417712177121771e-05, "loss": 0.00047536175698041915, "step": 161500 }, { "epoch": 45.84161226227647, "eval_accuracy": 0.9814967889616583, "eval_loss": 0.07109144330024719, "eval_runtime": 37.6783, "eval_samples_per_second": 417.402, "eval_steps_per_second": 6.529, "step": 161500 }, { "epoch": 45.84445075219983, "grad_norm": 0.005558681674301624, "learning_rate": 5.417428328129436e-05, "loss": 0.00025617703795433044, "step": 161510 }, { "epoch": 45.84728924212319, "grad_norm": 0.03566117584705353, "learning_rate": 5.4171444791371e-05, "loss": 0.0039396025240421295, "step": 161520 }, { "epoch": 45.85012773204655, "grad_norm": 0.0771050751209259, "learning_rate": 5.416860630144763e-05, "loss": 0.017132765054702757, "step": 161530 }, { "epoch": 45.852966221969915, "grad_norm": 0.745235025882721, "learning_rate": 5.416576781152427e-05, "loss": 0.0010394752025604248, "step": 161540 }, { "epoch": 45.85580471189327, "grad_norm": 3.7148287296295166, "learning_rate": 5.416292932160091e-05, "loss": 0.003183603286743164, "step": 161550 }, { "epoch": 45.85864320181663, "grad_norm": 8.350167274475098, "learning_rate": 5.4160090831677544e-05, "loss": 0.0019927704706788063, "step": 161560 }, { "epoch": 45.861481691739996, "grad_norm": 0.05893498659133911, "learning_rate": 5.415725234175419e-05, "loss": 0.0010876163840293884, "step": 161570 }, { "epoch": 45.86432018166335, "grad_norm": 0.9179393649101257, "learning_rate": 5.415441385183083e-05, "loss": 0.000454682856798172, "step": 161580 }, { "epoch": 45.867158671586715, "grad_norm": 0.1879298985004425, "learning_rate": 5.415157536190747e-05, "loss": 0.0021821314468979834, "step": 161590 }, { "epoch": 45.86999716151008, "grad_norm": 1.5740453004837036, "learning_rate": 5.414873687198411e-05, "loss": 0.0005143871530890465, "step": 161600 }, { "epoch": 45.87283565143344, "grad_norm": 0.050025008618831635, "learning_rate": 5.4145898382060744e-05, "loss": 0.0019515212625265121, "step": 161610 }, { "epoch": 45.8756741413568, "grad_norm": 0.436909019947052, "learning_rate": 5.4143059892137385e-05, "loss": 0.0003214610740542412, "step": 161620 }, { "epoch": 45.87851263128016, "grad_norm": 12.616104125976562, "learning_rate": 5.414022140221402e-05, "loss": 0.004876536875963211, "step": 161630 }, { "epoch": 45.88135112120352, "grad_norm": 0.09026309102773666, "learning_rate": 5.413738291229067e-05, "loss": 0.004167253896594047, "step": 161640 }, { "epoch": 45.88418961112688, "grad_norm": 6.99563455581665, "learning_rate": 5.413454442236731e-05, "loss": 0.0019181780517101288, "step": 161650 }, { "epoch": 45.88702810105024, "grad_norm": 0.003523879684507847, "learning_rate": 5.4131705932443944e-05, "loss": 0.005695381760597229, "step": 161660 }, { "epoch": 45.889866590973604, "grad_norm": 4.954928874969482, "learning_rate": 5.412886744252058e-05, "loss": 0.007698944956064224, "step": 161670 }, { "epoch": 45.89270508089696, "grad_norm": 0.6856346130371094, "learning_rate": 5.412602895259722e-05, "loss": 0.01067950427532196, "step": 161680 }, { "epoch": 45.89554357082032, "grad_norm": 1.6870795488357544, "learning_rate": 5.4123190462673854e-05, "loss": 0.0019940298050642015, "step": 161690 }, { "epoch": 45.898382060743685, "grad_norm": 0.04259120300412178, "learning_rate": 5.41203519727505e-05, "loss": 0.0009042354300618172, "step": 161700 }, { "epoch": 45.90122055066705, "grad_norm": 0.00704108364880085, "learning_rate": 5.4117513482827144e-05, "loss": 0.00026097074151039125, "step": 161710 }, { "epoch": 45.904059040590404, "grad_norm": 0.037962064146995544, "learning_rate": 5.411467499290378e-05, "loss": 0.000757661834359169, "step": 161720 }, { "epoch": 45.90689753051377, "grad_norm": 3.5239808559417725, "learning_rate": 5.411183650298042e-05, "loss": 0.0024619758129119872, "step": 161730 }, { "epoch": 45.90973602043713, "grad_norm": 0.0597061961889267, "learning_rate": 5.4108998013057055e-05, "loss": 0.00041733384132385253, "step": 161740 }, { "epoch": 45.912574510360486, "grad_norm": 0.28038352727890015, "learning_rate": 5.4106159523133696e-05, "loss": 0.021750913560390474, "step": 161750 }, { "epoch": 45.91541300028385, "grad_norm": 4.862824440002441, "learning_rate": 5.410332103321033e-05, "loss": 0.014711561799049377, "step": 161760 }, { "epoch": 45.91825149020721, "grad_norm": 0.18321485817432404, "learning_rate": 5.410048254328698e-05, "loss": 0.01736283153295517, "step": 161770 }, { "epoch": 45.92108998013057, "grad_norm": 0.08896391093730927, "learning_rate": 5.409764405336361e-05, "loss": 0.001979121193289757, "step": 161780 }, { "epoch": 45.92392847005393, "grad_norm": 1.5334032773971558, "learning_rate": 5.4094805563440255e-05, "loss": 0.0036241050809621813, "step": 161790 }, { "epoch": 45.92676695997729, "grad_norm": 0.4281325340270996, "learning_rate": 5.409196707351689e-05, "loss": 0.0106591135263443, "step": 161800 }, { "epoch": 45.929605449900656, "grad_norm": 0.12113489210605621, "learning_rate": 5.408912858359353e-05, "loss": 0.0008447654545307159, "step": 161810 }, { "epoch": 45.93244393982401, "grad_norm": 0.01841827854514122, "learning_rate": 5.4086290093670165e-05, "loss": 0.00305580236017704, "step": 161820 }, { "epoch": 45.935282429747375, "grad_norm": 0.09350305795669556, "learning_rate": 5.408345160374681e-05, "loss": 0.0034215144813060762, "step": 161830 }, { "epoch": 45.93812091967074, "grad_norm": 0.08412113040685654, "learning_rate": 5.4080613113823455e-05, "loss": 0.004505753889679909, "step": 161840 }, { "epoch": 45.94095940959409, "grad_norm": 0.023349352180957794, "learning_rate": 5.407777462390009e-05, "loss": 0.009733752906322479, "step": 161850 }, { "epoch": 45.943797899517456, "grad_norm": 0.17016719281673431, "learning_rate": 5.407493613397673e-05, "loss": 0.0094394713640213, "step": 161860 }, { "epoch": 45.94663638944082, "grad_norm": 0.5784303545951843, "learning_rate": 5.4072097644053365e-05, "loss": 0.0025143858045339586, "step": 161870 }, { "epoch": 45.949474879364175, "grad_norm": 0.46253979206085205, "learning_rate": 5.406925915413e-05, "loss": 0.0067811205983161925, "step": 161880 }, { "epoch": 45.95231336928754, "grad_norm": 0.6326520442962646, "learning_rate": 5.406642066420664e-05, "loss": 0.005946379527449608, "step": 161890 }, { "epoch": 45.9551518592109, "grad_norm": 0.12783749401569366, "learning_rate": 5.406358217428329e-05, "loss": 0.007023400068283081, "step": 161900 }, { "epoch": 45.95799034913426, "grad_norm": 0.48256343603134155, "learning_rate": 5.4060743684359924e-05, "loss": 0.0016438093036413193, "step": 161910 }, { "epoch": 45.96082883905762, "grad_norm": 5.082817554473877, "learning_rate": 5.4057905194436565e-05, "loss": 0.0020875828340649605, "step": 161920 }, { "epoch": 45.96366732898098, "grad_norm": 15.329933166503906, "learning_rate": 5.40550667045132e-05, "loss": 0.01598338782787323, "step": 161930 }, { "epoch": 45.966505818904345, "grad_norm": 0.018659353256225586, "learning_rate": 5.405222821458984e-05, "loss": 0.0005131913349032402, "step": 161940 }, { "epoch": 45.9693443088277, "grad_norm": 0.18477976322174072, "learning_rate": 5.4049389724666476e-05, "loss": 0.0007322300225496292, "step": 161950 }, { "epoch": 45.972182798751064, "grad_norm": 0.030182382091879845, "learning_rate": 5.404655123474312e-05, "loss": 0.0008646970614790917, "step": 161960 }, { "epoch": 45.97502128867443, "grad_norm": 0.3042486906051636, "learning_rate": 5.4043712744819766e-05, "loss": 0.0005701068788766861, "step": 161970 }, { "epoch": 45.97785977859779, "grad_norm": 4.587296485900879, "learning_rate": 5.40408742548964e-05, "loss": 0.0015424581244587899, "step": 161980 }, { "epoch": 45.980698268521145, "grad_norm": 0.07052531093358994, "learning_rate": 5.403803576497304e-05, "loss": 0.006990177184343338, "step": 161990 }, { "epoch": 45.98353675844451, "grad_norm": 0.26150599122047424, "learning_rate": 5.4035197275049676e-05, "loss": 0.007107995450496674, "step": 162000 }, { "epoch": 45.98353675844451, "eval_accuracy": 0.9798435811025624, "eval_loss": 0.07955674827098846, "eval_runtime": 53.2884, "eval_samples_per_second": 295.13, "eval_steps_per_second": 4.616, "step": 162000 }, { "epoch": 45.98637524836787, "grad_norm": 0.07200948894023895, "learning_rate": 5.403235878512631e-05, "loss": 0.00867936909198761, "step": 162010 }, { "epoch": 45.98921373829123, "grad_norm": 0.0589984729886055, "learning_rate": 5.402952029520295e-05, "loss": 0.0006369737908244133, "step": 162020 }, { "epoch": 45.99205222821459, "grad_norm": 0.30306509137153625, "learning_rate": 5.402668180527959e-05, "loss": 0.0019225101917982102, "step": 162030 }, { "epoch": 45.99489071813795, "grad_norm": 0.032347939908504486, "learning_rate": 5.4023843315356235e-05, "loss": 0.0008008500561118125, "step": 162040 }, { "epoch": 45.99772920806131, "grad_norm": 2.3780007362365723, "learning_rate": 5.4021004825432876e-05, "loss": 0.003596164286136627, "step": 162050 }, { "epoch": 46.00056769798467, "grad_norm": 0.04571404308080673, "learning_rate": 5.401816633550951e-05, "loss": 0.0008566643111407757, "step": 162060 }, { "epoch": 46.003406187908034, "grad_norm": 0.04236887767910957, "learning_rate": 5.401532784558615e-05, "loss": 0.0018108218908309936, "step": 162070 }, { "epoch": 46.0062446778314, "grad_norm": 0.36329779028892517, "learning_rate": 5.401248935566279e-05, "loss": 0.0005710760131478309, "step": 162080 }, { "epoch": 46.00908316775475, "grad_norm": 0.11362437158823013, "learning_rate": 5.400965086573943e-05, "loss": 0.001699255406856537, "step": 162090 }, { "epoch": 46.011921657678116, "grad_norm": 0.029484974220395088, "learning_rate": 5.4006812375816076e-05, "loss": 0.0017206419259309768, "step": 162100 }, { "epoch": 46.01476014760148, "grad_norm": 0.31603777408599854, "learning_rate": 5.400397388589271e-05, "loss": 0.0023553404957056046, "step": 162110 }, { "epoch": 46.017598637524834, "grad_norm": 4.410210609436035, "learning_rate": 5.400113539596935e-05, "loss": 0.007308177649974823, "step": 162120 }, { "epoch": 46.0204371274482, "grad_norm": 0.5365107655525208, "learning_rate": 5.399829690604599e-05, "loss": 0.0005033632740378379, "step": 162130 }, { "epoch": 46.02327561737156, "grad_norm": 0.27512863278388977, "learning_rate": 5.399545841612262e-05, "loss": 0.0007552217692136764, "step": 162140 }, { "epoch": 46.026114107294916, "grad_norm": 0.23626933991909027, "learning_rate": 5.399261992619926e-05, "loss": 0.0002985768020153046, "step": 162150 }, { "epoch": 46.02895259721828, "grad_norm": 0.006913408171385527, "learning_rate": 5.39897814362759e-05, "loss": 0.0003108274191617966, "step": 162160 }, { "epoch": 46.03179108714164, "grad_norm": 0.02778448723256588, "learning_rate": 5.3986942946352546e-05, "loss": 0.0001646561548113823, "step": 162170 }, { "epoch": 46.034629577065004, "grad_norm": 0.05824076384305954, "learning_rate": 5.398410445642919e-05, "loss": 0.00041065942496061326, "step": 162180 }, { "epoch": 46.03746806698836, "grad_norm": 0.19169381260871887, "learning_rate": 5.398126596650582e-05, "loss": 0.0010849716141819953, "step": 162190 }, { "epoch": 46.04030655691172, "grad_norm": 0.04374273866415024, "learning_rate": 5.397842747658246e-05, "loss": 0.00021917298436164857, "step": 162200 }, { "epoch": 46.043145046835086, "grad_norm": 0.38149186968803406, "learning_rate": 5.39755889866591e-05, "loss": 0.0011309217661619186, "step": 162210 }, { "epoch": 46.04598353675844, "grad_norm": 8.999382972717285, "learning_rate": 5.397275049673574e-05, "loss": 0.004919512942433357, "step": 162220 }, { "epoch": 46.048822026681805, "grad_norm": 0.00919905211776495, "learning_rate": 5.3969912006812374e-05, "loss": 0.0008580105379223824, "step": 162230 }, { "epoch": 46.05166051660517, "grad_norm": 0.0760836973786354, "learning_rate": 5.396707351688902e-05, "loss": 0.004180705919861793, "step": 162240 }, { "epoch": 46.05449900652852, "grad_norm": 3.1124494075775146, "learning_rate": 5.3964235026965656e-05, "loss": 0.0012007977813482285, "step": 162250 }, { "epoch": 46.057337496451886, "grad_norm": 0.02776338905096054, "learning_rate": 5.39613965370423e-05, "loss": 0.0005645383149385452, "step": 162260 }, { "epoch": 46.06017598637525, "grad_norm": 0.07014703750610352, "learning_rate": 5.395855804711893e-05, "loss": 0.0005700966343283653, "step": 162270 }, { "epoch": 46.06301447629861, "grad_norm": 0.032131094485521317, "learning_rate": 5.3955719557195574e-05, "loss": 0.0002970585599541664, "step": 162280 }, { "epoch": 46.06585296622197, "grad_norm": 0.06976740807294846, "learning_rate": 5.395288106727221e-05, "loss": 0.0002395300194621086, "step": 162290 }, { "epoch": 46.06869145614533, "grad_norm": 0.0040765791200101376, "learning_rate": 5.3950042577348856e-05, "loss": 0.0004985306411981582, "step": 162300 }, { "epoch": 46.071529946068694, "grad_norm": 0.04457193985581398, "learning_rate": 5.39472040874255e-05, "loss": 0.00030099954456090926, "step": 162310 }, { "epoch": 46.07436843599205, "grad_norm": 0.21516470611095428, "learning_rate": 5.394436559750213e-05, "loss": 0.00012315306812524796, "step": 162320 }, { "epoch": 46.07720692591541, "grad_norm": 0.4364336431026459, "learning_rate": 5.3941527107578774e-05, "loss": 0.0005716018378734589, "step": 162330 }, { "epoch": 46.080045415838775, "grad_norm": 0.07005792111158371, "learning_rate": 5.393868861765541e-05, "loss": 0.00014902018010616304, "step": 162340 }, { "epoch": 46.08288390576214, "grad_norm": 0.07923626899719238, "learning_rate": 5.393585012773204e-05, "loss": 0.004127166047692299, "step": 162350 }, { "epoch": 46.085722395685494, "grad_norm": 0.5760943293571472, "learning_rate": 5.3933011637808684e-05, "loss": 0.0015041720122098922, "step": 162360 }, { "epoch": 46.08856088560886, "grad_norm": 0.036159079521894455, "learning_rate": 5.393017314788533e-05, "loss": 0.0004506183788180351, "step": 162370 }, { "epoch": 46.09139937553222, "grad_norm": 0.02652639150619507, "learning_rate": 5.392733465796197e-05, "loss": 0.0018915046006441117, "step": 162380 }, { "epoch": 46.094237865455575, "grad_norm": 0.15839041769504547, "learning_rate": 5.392449616803861e-05, "loss": 0.0023580243811011314, "step": 162390 }, { "epoch": 46.09707635537894, "grad_norm": 0.06562204658985138, "learning_rate": 5.392165767811524e-05, "loss": 0.0029989562928676603, "step": 162400 }, { "epoch": 46.0999148453023, "grad_norm": 0.07088036835193634, "learning_rate": 5.3918819188191884e-05, "loss": 0.015363869071006776, "step": 162410 }, { "epoch": 46.10275333522566, "grad_norm": 0.02702244184911251, "learning_rate": 5.391598069826852e-05, "loss": 0.0018599381670355796, "step": 162420 }, { "epoch": 46.10559182514902, "grad_norm": 0.23338258266448975, "learning_rate": 5.391314220834516e-05, "loss": 0.0066576175391674045, "step": 162430 }, { "epoch": 46.10843031507238, "grad_norm": 0.15333592891693115, "learning_rate": 5.391030371842181e-05, "loss": 0.0007164129987359047, "step": 162440 }, { "epoch": 46.111268804995746, "grad_norm": 0.006976655684411526, "learning_rate": 5.390746522849844e-05, "loss": 0.00576360672712326, "step": 162450 }, { "epoch": 46.1141072949191, "grad_norm": 0.05320030450820923, "learning_rate": 5.3904626738575084e-05, "loss": 0.0005248257890343666, "step": 162460 }, { "epoch": 46.116945784842464, "grad_norm": 0.04682684317231178, "learning_rate": 5.390178824865172e-05, "loss": 0.0025250621140003203, "step": 162470 }, { "epoch": 46.11978427476583, "grad_norm": 0.10380784422159195, "learning_rate": 5.3898949758728354e-05, "loss": 0.0018677346408367157, "step": 162480 }, { "epoch": 46.12262276468918, "grad_norm": 0.011112399399280548, "learning_rate": 5.3896111268804995e-05, "loss": 0.0006690207868814468, "step": 162490 }, { "epoch": 46.125461254612546, "grad_norm": 0.01489220280200243, "learning_rate": 5.389327277888164e-05, "loss": 0.00025579724460840226, "step": 162500 }, { "epoch": 46.125461254612546, "eval_accuracy": 0.982895657150124, "eval_loss": 0.06673024594783783, "eval_runtime": 40.3671, "eval_samples_per_second": 389.599, "eval_steps_per_second": 6.094, "step": 162500 }, { "epoch": 46.12829974453591, "grad_norm": 0.03820209950208664, "learning_rate": 5.389043428895828e-05, "loss": 0.0024784550070762633, "step": 162510 }, { "epoch": 46.131138234459264, "grad_norm": 0.03259578347206116, "learning_rate": 5.388759579903492e-05, "loss": 0.012887287139892577, "step": 162520 }, { "epoch": 46.13397672438263, "grad_norm": 0.15458153188228607, "learning_rate": 5.3884757309111554e-05, "loss": 0.0001891016960144043, "step": 162530 }, { "epoch": 46.13681521430599, "grad_norm": 1.4992066621780396, "learning_rate": 5.3881918819188195e-05, "loss": 0.00031114500015974047, "step": 162540 }, { "epoch": 46.13965370422935, "grad_norm": 0.04644280672073364, "learning_rate": 5.387908032926483e-05, "loss": 0.0024193137884140014, "step": 162550 }, { "epoch": 46.14249219415271, "grad_norm": 0.013094194233417511, "learning_rate": 5.387624183934147e-05, "loss": 0.0002572862431406975, "step": 162560 }, { "epoch": 46.14533068407607, "grad_norm": 0.12416289001703262, "learning_rate": 5.387340334941812e-05, "loss": 0.0019784213975071905, "step": 162570 }, { "epoch": 46.148169173999435, "grad_norm": 0.011005834676325321, "learning_rate": 5.3870564859494754e-05, "loss": 0.000952276773750782, "step": 162580 }, { "epoch": 46.15100766392279, "grad_norm": 0.04340539500117302, "learning_rate": 5.386772636957139e-05, "loss": 0.0013362208381295204, "step": 162590 }, { "epoch": 46.15384615384615, "grad_norm": 0.06644293665885925, "learning_rate": 5.386488787964803e-05, "loss": 0.001341301016509533, "step": 162600 }, { "epoch": 46.156684643769516, "grad_norm": 0.24802358448505402, "learning_rate": 5.3862049389724664e-05, "loss": 0.000547056831419468, "step": 162610 }, { "epoch": 46.15952313369287, "grad_norm": 1.5320998430252075, "learning_rate": 5.3859210899801306e-05, "loss": 0.0007293291389942169, "step": 162620 }, { "epoch": 46.162361623616235, "grad_norm": 0.10855016112327576, "learning_rate": 5.385637240987794e-05, "loss": 0.00025583170354366304, "step": 162630 }, { "epoch": 46.1652001135396, "grad_norm": 0.0133963068947196, "learning_rate": 5.385353391995459e-05, "loss": 0.00024827849119901657, "step": 162640 }, { "epoch": 46.16803860346296, "grad_norm": 0.016581183299422264, "learning_rate": 5.385069543003123e-05, "loss": 0.0007639855146408081, "step": 162650 }, { "epoch": 46.170877093386316, "grad_norm": 0.11060385406017303, "learning_rate": 5.3847856940107865e-05, "loss": 0.0020827032625675202, "step": 162660 }, { "epoch": 46.17371558330968, "grad_norm": 0.043789755553007126, "learning_rate": 5.3845018450184506e-05, "loss": 0.00026704054325819013, "step": 162670 }, { "epoch": 46.17655407323304, "grad_norm": 0.07335441559553146, "learning_rate": 5.384217996026114e-05, "loss": 0.0002528199926018715, "step": 162680 }, { "epoch": 46.1793925631564, "grad_norm": 0.0028225970454514027, "learning_rate": 5.383934147033778e-05, "loss": 0.000164862722158432, "step": 162690 }, { "epoch": 46.18223105307976, "grad_norm": 0.04552728310227394, "learning_rate": 5.383650298041443e-05, "loss": 0.00019902512431144715, "step": 162700 }, { "epoch": 46.185069543003124, "grad_norm": 0.1764087677001953, "learning_rate": 5.3833664490491065e-05, "loss": 0.0007293958216905594, "step": 162710 }, { "epoch": 46.18790803292649, "grad_norm": 0.11290168017148972, "learning_rate": 5.38308260005677e-05, "loss": 0.004432444274425506, "step": 162720 }, { "epoch": 46.19074652284984, "grad_norm": 0.15805934369564056, "learning_rate": 5.382798751064434e-05, "loss": 0.0003749443218111992, "step": 162730 }, { "epoch": 46.193585012773205, "grad_norm": 0.006263004150241613, "learning_rate": 5.3825149020720975e-05, "loss": 0.0003876447677612305, "step": 162740 }, { "epoch": 46.19642350269657, "grad_norm": 0.004565217066556215, "learning_rate": 5.3822310530797617e-05, "loss": 0.0017584418877959252, "step": 162750 }, { "epoch": 46.199261992619924, "grad_norm": 0.0066228690557181835, "learning_rate": 5.381947204087425e-05, "loss": 0.00042107850313186643, "step": 162760 }, { "epoch": 46.20210048254329, "grad_norm": 3.2366833686828613, "learning_rate": 5.38166335509509e-05, "loss": 0.007951974123716354, "step": 162770 }, { "epoch": 46.20493897246665, "grad_norm": 0.1355009227991104, "learning_rate": 5.381379506102754e-05, "loss": 0.0054387211799621586, "step": 162780 }, { "epoch": 46.207777462390005, "grad_norm": 0.04972456768155098, "learning_rate": 5.3810956571104175e-05, "loss": 0.00034842267632484436, "step": 162790 }, { "epoch": 46.21061595231337, "grad_norm": 0.664230465888977, "learning_rate": 5.380811808118082e-05, "loss": 0.0023140687495470046, "step": 162800 }, { "epoch": 46.21345444223673, "grad_norm": 0.011386923491954803, "learning_rate": 5.380527959125745e-05, "loss": 0.014219683408737183, "step": 162810 }, { "epoch": 46.216292932160094, "grad_norm": 0.7225995659828186, "learning_rate": 5.3802441101334086e-05, "loss": 0.0006542455404996872, "step": 162820 }, { "epoch": 46.21913142208345, "grad_norm": 0.05724590644240379, "learning_rate": 5.379960261141073e-05, "loss": 0.0005939073860645294, "step": 162830 }, { "epoch": 46.22196991200681, "grad_norm": 0.02142835035920143, "learning_rate": 5.3796764121487375e-05, "loss": 0.0006526308134198189, "step": 162840 }, { "epoch": 46.224808401930176, "grad_norm": 0.3756957948207855, "learning_rate": 5.379392563156401e-05, "loss": 0.006861001253128052, "step": 162850 }, { "epoch": 46.22764689185353, "grad_norm": 0.024955326691269875, "learning_rate": 5.379108714164065e-05, "loss": 0.0027303878217935563, "step": 162860 }, { "epoch": 46.230485381776894, "grad_norm": 0.06815392524003983, "learning_rate": 5.3788248651717286e-05, "loss": 0.0031417366117239, "step": 162870 }, { "epoch": 46.23332387170026, "grad_norm": 0.9627792835235596, "learning_rate": 5.378541016179393e-05, "loss": 0.0008027095347642899, "step": 162880 }, { "epoch": 46.23616236162361, "grad_norm": 0.0693509429693222, "learning_rate": 5.378257167187056e-05, "loss": 0.0006504060700535774, "step": 162890 }, { "epoch": 46.239000851546976, "grad_norm": 0.03350795805454254, "learning_rate": 5.377973318194721e-05, "loss": 0.005270944908261299, "step": 162900 }, { "epoch": 46.24183934147034, "grad_norm": 0.02455325983464718, "learning_rate": 5.377689469202385e-05, "loss": 0.00071746576577425, "step": 162910 }, { "epoch": 46.2446778313937, "grad_norm": 1.8838506937026978, "learning_rate": 5.3774056202100486e-05, "loss": 0.0005711944773793221, "step": 162920 }, { "epoch": 46.24751632131706, "grad_norm": 1.8115596771240234, "learning_rate": 5.377121771217713e-05, "loss": 0.0030653802677989006, "step": 162930 }, { "epoch": 46.25035481124042, "grad_norm": 16.82415008544922, "learning_rate": 5.376837922225376e-05, "loss": 0.009684388339519501, "step": 162940 }, { "epoch": 46.25319330116378, "grad_norm": 0.062415774911642075, "learning_rate": 5.37655407323304e-05, "loss": 0.00024518072605133056, "step": 162950 }, { "epoch": 46.25603179108714, "grad_norm": 1.0092709064483643, "learning_rate": 5.376270224240704e-05, "loss": 0.0007025988772511483, "step": 162960 }, { "epoch": 46.2588702810105, "grad_norm": 4.3934173583984375, "learning_rate": 5.3759863752483686e-05, "loss": 0.005939678847789764, "step": 162970 }, { "epoch": 46.261708770933865, "grad_norm": 6.393370628356934, "learning_rate": 5.375702526256032e-05, "loss": 0.0032783329486846926, "step": 162980 }, { "epoch": 46.26454726085722, "grad_norm": 0.3178304135799408, "learning_rate": 5.375418677263696e-05, "loss": 0.0030555197969079017, "step": 162990 }, { "epoch": 46.26738575078058, "grad_norm": 0.07510831207036972, "learning_rate": 5.37513482827136e-05, "loss": 0.0018351351842284202, "step": 163000 }, { "epoch": 46.26738575078058, "eval_accuracy": 0.9803522604438227, "eval_loss": 0.0824885442852974, "eval_runtime": 39.759, "eval_samples_per_second": 395.559, "eval_steps_per_second": 6.187, "step": 163000 }, { "epoch": 46.270224240703946, "grad_norm": 1.7560962438583374, "learning_rate": 5.374850979279024e-05, "loss": 0.018745292723178864, "step": 163010 }, { "epoch": 46.27306273062731, "grad_norm": 6.201270580291748, "learning_rate": 5.374567130286687e-05, "loss": 0.0052130863070487974, "step": 163020 }, { "epoch": 46.275901220550665, "grad_norm": 0.07337410002946854, "learning_rate": 5.374283281294352e-05, "loss": 0.0014450538903474808, "step": 163030 }, { "epoch": 46.27873971047403, "grad_norm": 0.1878250688314438, "learning_rate": 5.373999432302016e-05, "loss": 0.004597426205873489, "step": 163040 }, { "epoch": 46.28157820039739, "grad_norm": 0.022181715816259384, "learning_rate": 5.37371558330968e-05, "loss": 0.0018152249976992607, "step": 163050 }, { "epoch": 46.28441669032075, "grad_norm": 1.2175722122192383, "learning_rate": 5.373431734317343e-05, "loss": 0.010837848484516143, "step": 163060 }, { "epoch": 46.28725518024411, "grad_norm": 0.02944447658956051, "learning_rate": 5.373147885325007e-05, "loss": 0.0014300541952252388, "step": 163070 }, { "epoch": 46.29009367016747, "grad_norm": 11.630851745605469, "learning_rate": 5.372864036332671e-05, "loss": 0.011434248834848403, "step": 163080 }, { "epoch": 46.29293216009083, "grad_norm": 0.25866812467575073, "learning_rate": 5.372580187340335e-05, "loss": 0.0019855959340929984, "step": 163090 }, { "epoch": 46.29577065001419, "grad_norm": 0.1290142983198166, "learning_rate": 5.372296338348e-05, "loss": 0.006949211657047272, "step": 163100 }, { "epoch": 46.298609139937554, "grad_norm": 1.6736280918121338, "learning_rate": 5.372012489355663e-05, "loss": 0.0007507527247071266, "step": 163110 }, { "epoch": 46.30144762986092, "grad_norm": 1.0186225175857544, "learning_rate": 5.371728640363327e-05, "loss": 0.0008559774607419968, "step": 163120 }, { "epoch": 46.30428611978427, "grad_norm": 0.18261432647705078, "learning_rate": 5.371444791370991e-05, "loss": 0.004014894366264343, "step": 163130 }, { "epoch": 46.307124609707635, "grad_norm": 0.011953786946833134, "learning_rate": 5.371160942378655e-05, "loss": 0.005980336666107177, "step": 163140 }, { "epoch": 46.309963099631, "grad_norm": 0.08193595707416534, "learning_rate": 5.3708770933863183e-05, "loss": 0.0015900401398539544, "step": 163150 }, { "epoch": 46.312801589554354, "grad_norm": 1.0386197566986084, "learning_rate": 5.3705932443939825e-05, "loss": 0.0006404133513569832, "step": 163160 }, { "epoch": 46.31564007947772, "grad_norm": 3.5699124336242676, "learning_rate": 5.370309395401647e-05, "loss": 0.0038264404982328417, "step": 163170 }, { "epoch": 46.31847856940108, "grad_norm": 0.019200464710593224, "learning_rate": 5.370025546409311e-05, "loss": 0.0007794745266437531, "step": 163180 }, { "epoch": 46.32131705932444, "grad_norm": 1.7165249586105347, "learning_rate": 5.369741697416974e-05, "loss": 0.0012363966554403305, "step": 163190 }, { "epoch": 46.3241555492478, "grad_norm": 0.716703474521637, "learning_rate": 5.3694578484246384e-05, "loss": 0.013904839754104614, "step": 163200 }, { "epoch": 46.32699403917116, "grad_norm": 0.0195330660790205, "learning_rate": 5.369173999432302e-05, "loss": 0.01104419082403183, "step": 163210 }, { "epoch": 46.329832529094524, "grad_norm": 0.2047504186630249, "learning_rate": 5.368890150439966e-05, "loss": 0.0016424385830760002, "step": 163220 }, { "epoch": 46.33267101901788, "grad_norm": 2.057812213897705, "learning_rate": 5.368606301447631e-05, "loss": 0.005538377538323402, "step": 163230 }, { "epoch": 46.33550950894124, "grad_norm": 14.324738502502441, "learning_rate": 5.368322452455294e-05, "loss": 0.00819714218378067, "step": 163240 }, { "epoch": 46.338347998864606, "grad_norm": 0.00845100823789835, "learning_rate": 5.3680386034629584e-05, "loss": 0.002797645516693592, "step": 163250 }, { "epoch": 46.34118648878796, "grad_norm": 0.20200495421886444, "learning_rate": 5.367754754470622e-05, "loss": 0.0014144230633974075, "step": 163260 }, { "epoch": 46.344024978711325, "grad_norm": 0.23762142658233643, "learning_rate": 5.367470905478286e-05, "loss": 0.0008480966091156006, "step": 163270 }, { "epoch": 46.34686346863469, "grad_norm": 0.07617129385471344, "learning_rate": 5.3671870564859494e-05, "loss": 0.0006049908697605133, "step": 163280 }, { "epoch": 46.34970195855805, "grad_norm": 0.09053850173950195, "learning_rate": 5.366903207493613e-05, "loss": 0.0003071518614888191, "step": 163290 }, { "epoch": 46.352540448481406, "grad_norm": 0.031837306916713715, "learning_rate": 5.3666193585012784e-05, "loss": 0.0008027972653508186, "step": 163300 }, { "epoch": 46.35537893840477, "grad_norm": 0.47519636154174805, "learning_rate": 5.366335509508942e-05, "loss": 0.000999552384018898, "step": 163310 }, { "epoch": 46.35821742832813, "grad_norm": 2.023634910583496, "learning_rate": 5.366051660516605e-05, "loss": 0.0013587847352027893, "step": 163320 }, { "epoch": 46.36105591825149, "grad_norm": 0.12498708814382553, "learning_rate": 5.3657678115242694e-05, "loss": 0.0018486950546503067, "step": 163330 }, { "epoch": 46.36389440817485, "grad_norm": 0.387174129486084, "learning_rate": 5.365483962531933e-05, "loss": 0.00027941353619098663, "step": 163340 }, { "epoch": 46.36673289809821, "grad_norm": 0.22497862577438354, "learning_rate": 5.365200113539597e-05, "loss": 0.0051064353436231615, "step": 163350 }, { "epoch": 46.36957138802157, "grad_norm": 0.8958325386047363, "learning_rate": 5.3649162645472605e-05, "loss": 0.0022966643795371056, "step": 163360 }, { "epoch": 46.37240987794493, "grad_norm": 0.031420789659023285, "learning_rate": 5.364632415554925e-05, "loss": 0.003374814987182617, "step": 163370 }, { "epoch": 46.375248367868295, "grad_norm": 0.06276945024728775, "learning_rate": 5.3643485665625894e-05, "loss": 0.00046710819005966184, "step": 163380 }, { "epoch": 46.37808685779166, "grad_norm": 0.02192704938352108, "learning_rate": 5.364064717570253e-05, "loss": 0.0004726702347397804, "step": 163390 }, { "epoch": 46.380925347715014, "grad_norm": 0.017494721338152885, "learning_rate": 5.363780868577917e-05, "loss": 0.00019665975123643874, "step": 163400 }, { "epoch": 46.38376383763838, "grad_norm": 0.05018869414925575, "learning_rate": 5.3634970195855805e-05, "loss": 0.00018048789352178573, "step": 163410 }, { "epoch": 46.38660232756174, "grad_norm": 0.2581008970737457, "learning_rate": 5.363213170593244e-05, "loss": 0.0009971961379051208, "step": 163420 }, { "epoch": 46.389440817485095, "grad_norm": 0.001992772798985243, "learning_rate": 5.362929321600909e-05, "loss": 0.0005037302151322364, "step": 163430 }, { "epoch": 46.39227930740846, "grad_norm": 3.3649725914001465, "learning_rate": 5.362645472608573e-05, "loss": 0.0014136597514152528, "step": 163440 }, { "epoch": 46.39511779733182, "grad_norm": 0.019265592098236084, "learning_rate": 5.3623616236162364e-05, "loss": 0.002799687348306179, "step": 163450 }, { "epoch": 46.39795628725518, "grad_norm": 0.038446713238954544, "learning_rate": 5.3620777746239005e-05, "loss": 0.0028505839407444, "step": 163460 }, { "epoch": 46.40079477717854, "grad_norm": 0.035489171743392944, "learning_rate": 5.361793925631564e-05, "loss": 0.004865611344575882, "step": 163470 }, { "epoch": 46.4036332671019, "grad_norm": 0.290492445230484, "learning_rate": 5.361510076639228e-05, "loss": 0.0038106672465801237, "step": 163480 }, { "epoch": 46.406471757025265, "grad_norm": 0.4454902410507202, "learning_rate": 5.3612262276468916e-05, "loss": 0.01907256245613098, "step": 163490 }, { "epoch": 46.40931024694862, "grad_norm": 0.19280767440795898, "learning_rate": 5.3609423786545564e-05, "loss": 0.003637766093015671, "step": 163500 }, { "epoch": 46.40931024694862, "eval_accuracy": 0.9823233928912062, "eval_loss": 0.0692962184548378, "eval_runtime": 37.5013, "eval_samples_per_second": 419.372, "eval_steps_per_second": 6.56, "step": 163500 }, { "epoch": 46.412148736871984, "grad_norm": 0.018482061102986336, "learning_rate": 5.3606585296622205e-05, "loss": 0.0008847314864397049, "step": 163510 }, { "epoch": 46.41498722679535, "grad_norm": 0.023833850398659706, "learning_rate": 5.360374680669884e-05, "loss": 0.0001437809318304062, "step": 163520 }, { "epoch": 46.4178257167187, "grad_norm": 0.010948365554213524, "learning_rate": 5.3600908316775474e-05, "loss": 0.0005193158984184265, "step": 163530 }, { "epoch": 46.420664206642066, "grad_norm": 0.03881035000085831, "learning_rate": 5.3598069826852116e-05, "loss": 0.00021573808044195175, "step": 163540 }, { "epoch": 46.42350269656543, "grad_norm": 8.41369342803955, "learning_rate": 5.359523133692875e-05, "loss": 0.007260255515575409, "step": 163550 }, { "epoch": 46.42634118648879, "grad_norm": 0.06501033157110214, "learning_rate": 5.359239284700539e-05, "loss": 0.0009009268134832383, "step": 163560 }, { "epoch": 46.42917967641215, "grad_norm": 0.0023123170249164104, "learning_rate": 5.358955435708204e-05, "loss": 0.012901613116264343, "step": 163570 }, { "epoch": 46.43201816633551, "grad_norm": 0.014667796902358532, "learning_rate": 5.3586715867158674e-05, "loss": 0.003879065066576004, "step": 163580 }, { "epoch": 46.43485665625887, "grad_norm": 0.0187514778226614, "learning_rate": 5.3583877377235316e-05, "loss": 0.0022272903472185133, "step": 163590 }, { "epoch": 46.43769514618223, "grad_norm": 1.5912766456604004, "learning_rate": 5.358103888731195e-05, "loss": 0.0014877857640385628, "step": 163600 }, { "epoch": 46.44053363610559, "grad_norm": 0.08779482543468475, "learning_rate": 5.357820039738859e-05, "loss": 0.0019742149859666826, "step": 163610 }, { "epoch": 46.443372126028954, "grad_norm": 2.1582603454589844, "learning_rate": 5.3575361907465226e-05, "loss": 0.017604903876781465, "step": 163620 }, { "epoch": 46.44621061595231, "grad_norm": 0.08500450104475021, "learning_rate": 5.3572523417541875e-05, "loss": 0.00023211464285850526, "step": 163630 }, { "epoch": 46.44904910587567, "grad_norm": 0.08926035463809967, "learning_rate": 5.3569684927618516e-05, "loss": 0.0009614868089556694, "step": 163640 }, { "epoch": 46.451887595799036, "grad_norm": 0.041980721056461334, "learning_rate": 5.356684643769515e-05, "loss": 0.004935212433338165, "step": 163650 }, { "epoch": 46.4547260857224, "grad_norm": 0.06422276049852371, "learning_rate": 5.3564007947771785e-05, "loss": 0.00044629946351051333, "step": 163660 }, { "epoch": 46.457564575645755, "grad_norm": 0.004923907574266195, "learning_rate": 5.3561169457848427e-05, "loss": 0.0034327488392591476, "step": 163670 }, { "epoch": 46.46040306556912, "grad_norm": 3.0260062217712402, "learning_rate": 5.355833096792506e-05, "loss": 0.0007938744500279426, "step": 163680 }, { "epoch": 46.46324155549248, "grad_norm": 0.21224939823150635, "learning_rate": 5.35554924780017e-05, "loss": 0.014979815483093262, "step": 163690 }, { "epoch": 46.466080045415836, "grad_norm": 0.37068721652030945, "learning_rate": 5.355265398807835e-05, "loss": 0.005405589565634727, "step": 163700 }, { "epoch": 46.4689185353392, "grad_norm": 0.10144870728254318, "learning_rate": 5.3549815498154985e-05, "loss": 0.0010445613414049148, "step": 163710 }, { "epoch": 46.47175702526256, "grad_norm": 0.3580465316772461, "learning_rate": 5.354697700823163e-05, "loss": 0.002991179749369621, "step": 163720 }, { "epoch": 46.47459551518592, "grad_norm": 0.09166176617145538, "learning_rate": 5.354413851830826e-05, "loss": 0.009137493371963502, "step": 163730 }, { "epoch": 46.47743400510928, "grad_norm": 0.1686500459909439, "learning_rate": 5.35413000283849e-05, "loss": 0.0027200618758797647, "step": 163740 }, { "epoch": 46.480272495032644, "grad_norm": 2.2569243907928467, "learning_rate": 5.353846153846154e-05, "loss": 0.0053439982235431675, "step": 163750 }, { "epoch": 46.48311098495601, "grad_norm": 2.0595240592956543, "learning_rate": 5.353562304853817e-05, "loss": 0.005293352156877517, "step": 163760 }, { "epoch": 46.48594947487936, "grad_norm": 0.4752841889858246, "learning_rate": 5.353278455861483e-05, "loss": 0.009310082346200944, "step": 163770 }, { "epoch": 46.488787964802725, "grad_norm": 1.4166909456253052, "learning_rate": 5.352994606869146e-05, "loss": 0.0017268009483814239, "step": 163780 }, { "epoch": 46.49162645472609, "grad_norm": 4.644964694976807, "learning_rate": 5.3527107578768096e-05, "loss": 0.01887253075838089, "step": 163790 }, { "epoch": 46.494464944649444, "grad_norm": Infinity, "learning_rate": 5.352426908884474e-05, "loss": 0.010103533416986466, "step": 163800 }, { "epoch": 46.49730343457281, "grad_norm": 0.02251434698700905, "learning_rate": 5.352171444791372e-05, "loss": 0.0009649012237787247, "step": 163810 }, { "epoch": 46.50014192449617, "grad_norm": 5.384031295776367, "learning_rate": 5.351887595799036e-05, "loss": 0.019852398335933684, "step": 163820 }, { "epoch": 46.502980414419525, "grad_norm": 0.00615740567445755, "learning_rate": 5.351603746806699e-05, "loss": 0.0027600595727562903, "step": 163830 }, { "epoch": 46.50581890434289, "grad_norm": 0.12677894532680511, "learning_rate": 5.351319897814363e-05, "loss": 0.0029130786657333373, "step": 163840 }, { "epoch": 46.50865739426625, "grad_norm": 0.01670137234032154, "learning_rate": 5.351036048822027e-05, "loss": 0.0050706729292869564, "step": 163850 }, { "epoch": 46.511495884189614, "grad_norm": 1.0973623991012573, "learning_rate": 5.3507521998296904e-05, "loss": 0.0008494580164551735, "step": 163860 }, { "epoch": 46.51433437411297, "grad_norm": 0.007423890754580498, "learning_rate": 5.3504683508373545e-05, "loss": 0.0012851128354668616, "step": 163870 }, { "epoch": 46.51717286403633, "grad_norm": 0.024417266249656677, "learning_rate": 5.350184501845019e-05, "loss": 0.0013193504884839058, "step": 163880 }, { "epoch": 46.520011353959696, "grad_norm": 0.23814821243286133, "learning_rate": 5.349900652852683e-05, "loss": 0.0009992113336920738, "step": 163890 }, { "epoch": 46.52284984388305, "grad_norm": 1.599064826965332, "learning_rate": 5.349616803860347e-05, "loss": 0.00105687715113163, "step": 163900 }, { "epoch": 46.525688333806414, "grad_norm": 0.004662740975618362, "learning_rate": 5.3493329548680104e-05, "loss": 0.0002742428332567215, "step": 163910 }, { "epoch": 46.52852682372978, "grad_norm": 0.11176346242427826, "learning_rate": 5.3490491058756745e-05, "loss": 0.00031259283423423767, "step": 163920 }, { "epoch": 46.53136531365314, "grad_norm": 0.0013666830491274595, "learning_rate": 5.348765256883338e-05, "loss": 0.0001855740323662758, "step": 163930 }, { "epoch": 46.534203803576496, "grad_norm": 8.643783569335938, "learning_rate": 5.3484814078910014e-05, "loss": 0.002580687962472439, "step": 163940 }, { "epoch": 46.53704229349986, "grad_norm": 0.07948879897594452, "learning_rate": 5.348197558898666e-05, "loss": 0.001239553838968277, "step": 163950 }, { "epoch": 46.53988078342322, "grad_norm": 0.12992620468139648, "learning_rate": 5.3479137099063304e-05, "loss": 0.0003698980435729027, "step": 163960 }, { "epoch": 46.54271927334658, "grad_norm": 0.024348782375454903, "learning_rate": 5.347629860913994e-05, "loss": 0.00046099480241537096, "step": 163970 }, { "epoch": 46.54555776326994, "grad_norm": 0.012336469255387783, "learning_rate": 5.347346011921658e-05, "loss": 0.00918412283062935, "step": 163980 }, { "epoch": 46.5483962531933, "grad_norm": 0.013274343684315681, "learning_rate": 5.3470621629293215e-05, "loss": 0.002609454281628132, "step": 163990 }, { "epoch": 46.55123474311666, "grad_norm": 0.2202041745185852, "learning_rate": 5.3467783139369856e-05, "loss": 0.0004216166213154793, "step": 164000 }, { "epoch": 46.55123474311666, "eval_accuracy": 0.9817511286322884, "eval_loss": 0.0731024369597435, "eval_runtime": 36.5833, "eval_samples_per_second": 429.895, "eval_steps_per_second": 6.724, "step": 164000 }, { "epoch": 46.55407323304002, "grad_norm": 0.054978638887405396, "learning_rate": 5.3464944649446504e-05, "loss": 0.0009311836212873459, "step": 164010 }, { "epoch": 46.556911722963385, "grad_norm": 0.05512779578566551, "learning_rate": 5.346210615952314e-05, "loss": 0.0006948044523596763, "step": 164020 }, { "epoch": 46.55975021288675, "grad_norm": 0.14860321581363678, "learning_rate": 5.345926766959978e-05, "loss": 0.0007588593289256096, "step": 164030 }, { "epoch": 46.5625887028101, "grad_norm": 0.04413338750600815, "learning_rate": 5.3456429179676415e-05, "loss": 0.0004628032445907593, "step": 164040 }, { "epoch": 46.565427192733466, "grad_norm": 0.19434519112110138, "learning_rate": 5.345359068975305e-05, "loss": 0.007336030900478363, "step": 164050 }, { "epoch": 46.56826568265683, "grad_norm": 0.017556162551045418, "learning_rate": 5.345075219982969e-05, "loss": 0.0045310743153095245, "step": 164060 }, { "epoch": 46.571104172580185, "grad_norm": 0.009570423513650894, "learning_rate": 5.3447913709906325e-05, "loss": 0.002897500805556774, "step": 164070 }, { "epoch": 46.57394266250355, "grad_norm": 0.8498844504356384, "learning_rate": 5.3445075219982973e-05, "loss": 0.0009401371702551842, "step": 164080 }, { "epoch": 46.57678115242691, "grad_norm": 0.010226813144981861, "learning_rate": 5.3442236730059615e-05, "loss": 0.004073283076286316, "step": 164090 }, { "epoch": 46.579619642350266, "grad_norm": 0.7305397391319275, "learning_rate": 5.343939824013625e-05, "loss": 0.011279651522636413, "step": 164100 }, { "epoch": 46.58245813227363, "grad_norm": 9.173604965209961, "learning_rate": 5.343655975021289e-05, "loss": 0.01664261519908905, "step": 164110 }, { "epoch": 46.58529662219699, "grad_norm": 0.04834338650107384, "learning_rate": 5.3433721260289525e-05, "loss": 0.00267996471375227, "step": 164120 }, { "epoch": 46.588135112120355, "grad_norm": 0.14496660232543945, "learning_rate": 5.343088277036617e-05, "loss": 0.0031046576797962187, "step": 164130 }, { "epoch": 46.59097360204371, "grad_norm": 2.936861276626587, "learning_rate": 5.34280442804428e-05, "loss": 0.0024801801890134813, "step": 164140 }, { "epoch": 46.593812091967074, "grad_norm": 2.383110284805298, "learning_rate": 5.342520579051945e-05, "loss": 0.0013533039018511772, "step": 164150 }, { "epoch": 46.59665058189044, "grad_norm": 0.045343298465013504, "learning_rate": 5.342236730059609e-05, "loss": 0.009829068183898925, "step": 164160 }, { "epoch": 46.59948907181379, "grad_norm": 0.39256665110588074, "learning_rate": 5.3419528810672725e-05, "loss": 0.0006148176267743111, "step": 164170 }, { "epoch": 46.602327561737155, "grad_norm": 0.1173771321773529, "learning_rate": 5.341669032074936e-05, "loss": 0.0009616632014513016, "step": 164180 }, { "epoch": 46.60516605166052, "grad_norm": 0.03913067653775215, "learning_rate": 5.3413851830826e-05, "loss": 0.007566293329000473, "step": 164190 }, { "epoch": 46.608004541583874, "grad_norm": 0.03382549807429314, "learning_rate": 5.3411013340902636e-05, "loss": 0.002771482989192009, "step": 164200 }, { "epoch": 46.61084303150724, "grad_norm": 0.23588202893733978, "learning_rate": 5.3408174850979284e-05, "loss": 0.001549750566482544, "step": 164210 }, { "epoch": 46.6136815214306, "grad_norm": 0.014134090393781662, "learning_rate": 5.3405336361055926e-05, "loss": 0.008238612115383149, "step": 164220 }, { "epoch": 46.61652001135396, "grad_norm": 1.4708055257797241, "learning_rate": 5.340249787113256e-05, "loss": 0.0018319472670555114, "step": 164230 }, { "epoch": 46.61935850127732, "grad_norm": 0.016067134216427803, "learning_rate": 5.33996593812092e-05, "loss": 0.009977276623249053, "step": 164240 }, { "epoch": 46.62219699120068, "grad_norm": 6.62035608291626, "learning_rate": 5.3396820891285836e-05, "loss": 0.002682622894644737, "step": 164250 }, { "epoch": 46.625035481124044, "grad_norm": 0.027448495849967003, "learning_rate": 5.339398240136248e-05, "loss": 0.0006648061797022819, "step": 164260 }, { "epoch": 46.6278739710474, "grad_norm": 0.04657125473022461, "learning_rate": 5.339114391143911e-05, "loss": 0.0016188453882932663, "step": 164270 }, { "epoch": 46.63071246097076, "grad_norm": 0.4999307692050934, "learning_rate": 5.338830542151576e-05, "loss": 0.00043725408613681793, "step": 164280 }, { "epoch": 46.633550950894126, "grad_norm": 0.3141748309135437, "learning_rate": 5.33854669315924e-05, "loss": 0.001329459808766842, "step": 164290 }, { "epoch": 46.63638944081748, "grad_norm": 0.17700962722301483, "learning_rate": 5.3382628441669036e-05, "loss": 0.0030176933854818345, "step": 164300 }, { "epoch": 46.639227930740844, "grad_norm": 0.011298269033432007, "learning_rate": 5.337978995174567e-05, "loss": 0.002550259605050087, "step": 164310 }, { "epoch": 46.64206642066421, "grad_norm": 0.004124420695006847, "learning_rate": 5.337695146182231e-05, "loss": 0.003008936159312725, "step": 164320 }, { "epoch": 46.64490491058757, "grad_norm": 0.2887416481971741, "learning_rate": 5.337411297189895e-05, "loss": 0.0028220674023032187, "step": 164330 }, { "epoch": 46.647743400510926, "grad_norm": 0.12297516316175461, "learning_rate": 5.337127448197559e-05, "loss": 0.00043729599565267563, "step": 164340 }, { "epoch": 46.65058189043429, "grad_norm": 0.015360387042164803, "learning_rate": 5.3368435992052236e-05, "loss": 0.003083941899240017, "step": 164350 }, { "epoch": 46.65342038035765, "grad_norm": 0.8691886067390442, "learning_rate": 5.336559750212887e-05, "loss": 0.0032389506697654722, "step": 164360 }, { "epoch": 46.65625887028101, "grad_norm": 0.015299973078072071, "learning_rate": 5.336275901220551e-05, "loss": 0.0003819093108177185, "step": 164370 }, { "epoch": 46.65909736020437, "grad_norm": 0.002548599150031805, "learning_rate": 5.335992052228215e-05, "loss": 0.0038468748331069945, "step": 164380 }, { "epoch": 46.66193585012773, "grad_norm": 0.021784605458378792, "learning_rate": 5.335708203235879e-05, "loss": 0.00038795825093984604, "step": 164390 }, { "epoch": 46.664774340051096, "grad_norm": 0.1252327412366867, "learning_rate": 5.335424354243542e-05, "loss": 0.0015659453347325325, "step": 164400 }, { "epoch": 46.66761282997445, "grad_norm": 1.4776842594146729, "learning_rate": 5.335140505251207e-05, "loss": 0.000587795116007328, "step": 164410 }, { "epoch": 46.670451319897815, "grad_norm": 0.002891518408432603, "learning_rate": 5.3348566562588706e-05, "loss": 0.0011839373037219048, "step": 164420 }, { "epoch": 46.67328980982118, "grad_norm": 0.19695326685905457, "learning_rate": 5.334572807266535e-05, "loss": 0.0026686904951930047, "step": 164430 }, { "epoch": 46.67612829974453, "grad_norm": 0.026138490065932274, "learning_rate": 5.334288958274198e-05, "loss": 0.00036917924880981446, "step": 164440 }, { "epoch": 46.678966789667896, "grad_norm": 0.24314969778060913, "learning_rate": 5.334005109281862e-05, "loss": 0.005315446853637695, "step": 164450 }, { "epoch": 46.68180527959126, "grad_norm": 0.548101544380188, "learning_rate": 5.333721260289526e-05, "loss": 0.0007631422951817513, "step": 164460 }, { "epoch": 46.684643769514615, "grad_norm": 0.026173122227191925, "learning_rate": 5.33343741129719e-05, "loss": 0.0016731645911931992, "step": 164470 }, { "epoch": 46.68748225943798, "grad_norm": 0.027828240767121315, "learning_rate": 5.333153562304855e-05, "loss": 0.0006338572129607201, "step": 164480 }, { "epoch": 46.69032074936134, "grad_norm": 1.4787408113479614, "learning_rate": 5.332869713312518e-05, "loss": 0.0006396386772394181, "step": 164490 }, { "epoch": 46.693159239284704, "grad_norm": 0.9759419560432434, "learning_rate": 5.332585864320182e-05, "loss": 0.0010546335950493812, "step": 164500 }, { "epoch": 46.693159239284704, "eval_accuracy": 0.9806701850321103, "eval_loss": 0.0771656408905983, "eval_runtime": 36.643, "eval_samples_per_second": 429.195, "eval_steps_per_second": 6.713, "step": 164500 }, { "epoch": 46.69599772920806, "grad_norm": 0.21570032835006714, "learning_rate": 5.332302015327846e-05, "loss": 0.006721266359090805, "step": 164510 }, { "epoch": 46.69883621913142, "grad_norm": 0.08661403506994247, "learning_rate": 5.332018166335509e-05, "loss": 0.0006578609347343445, "step": 164520 }, { "epoch": 46.701674709054785, "grad_norm": 0.008930855430662632, "learning_rate": 5.3317343173431734e-05, "loss": 0.005960845947265625, "step": 164530 }, { "epoch": 46.70451319897814, "grad_norm": 0.019362110644578934, "learning_rate": 5.331450468350837e-05, "loss": 0.0037521719932556152, "step": 164540 }, { "epoch": 46.707351688901504, "grad_norm": 0.023698750883340836, "learning_rate": 5.3311666193585016e-05, "loss": 0.0004852520301938057, "step": 164550 }, { "epoch": 46.71019017882487, "grad_norm": 0.057186078280210495, "learning_rate": 5.330882770366166e-05, "loss": 0.0003858359530568123, "step": 164560 }, { "epoch": 46.71302866874822, "grad_norm": 0.13061100244522095, "learning_rate": 5.330598921373829e-05, "loss": 0.0004120577126741409, "step": 164570 }, { "epoch": 46.715867158671585, "grad_norm": 0.6045131683349609, "learning_rate": 5.3303150723814934e-05, "loss": 0.0013090811669826507, "step": 164580 }, { "epoch": 46.71870564859495, "grad_norm": 0.022583583369851112, "learning_rate": 5.330031223389157e-05, "loss": 0.0014555856585502624, "step": 164590 }, { "epoch": 46.72154413851831, "grad_norm": 2.2395644187927246, "learning_rate": 5.329747374396821e-05, "loss": 0.0014290444552898407, "step": 164600 }, { "epoch": 46.72438262844167, "grad_norm": 0.2213468849658966, "learning_rate": 5.329463525404486e-05, "loss": 0.003768125921487808, "step": 164610 }, { "epoch": 46.72722111836503, "grad_norm": 0.007564156781882048, "learning_rate": 5.329179676412149e-05, "loss": 0.003116919472813606, "step": 164620 }, { "epoch": 46.73005960828839, "grad_norm": 0.07434268295764923, "learning_rate": 5.3288958274198134e-05, "loss": 0.002563374489545822, "step": 164630 }, { "epoch": 46.73289809821175, "grad_norm": 0.3011215925216675, "learning_rate": 5.328611978427477e-05, "loss": 0.009587319195270538, "step": 164640 }, { "epoch": 46.73573658813511, "grad_norm": 0.02049030177295208, "learning_rate": 5.32832812943514e-05, "loss": 0.0036767043173313143, "step": 164650 }, { "epoch": 46.738575078058474, "grad_norm": 0.006727906409651041, "learning_rate": 5.3280442804428044e-05, "loss": 0.003406001627445221, "step": 164660 }, { "epoch": 46.74141356798184, "grad_norm": 0.025726817548274994, "learning_rate": 5.327760431450468e-05, "loss": 0.011585208773612975, "step": 164670 }, { "epoch": 46.74425205790519, "grad_norm": 0.05812310799956322, "learning_rate": 5.327476582458133e-05, "loss": 0.003317079693078995, "step": 164680 }, { "epoch": 46.747090547828556, "grad_norm": 0.02197866700589657, "learning_rate": 5.327192733465797e-05, "loss": 0.0035661906003952026, "step": 164690 }, { "epoch": 46.74992903775192, "grad_norm": 0.47157415747642517, "learning_rate": 5.32690888447346e-05, "loss": 0.0009295549243688583, "step": 164700 }, { "epoch": 46.752767527675275, "grad_norm": 0.6668805480003357, "learning_rate": 5.3266250354811244e-05, "loss": 0.0006923049688339234, "step": 164710 }, { "epoch": 46.75560601759864, "grad_norm": 0.012043535709381104, "learning_rate": 5.326341186488788e-05, "loss": 0.008093620836734771, "step": 164720 }, { "epoch": 46.758444507522, "grad_norm": 0.008694746531546116, "learning_rate": 5.326057337496452e-05, "loss": 0.004580257833003998, "step": 164730 }, { "epoch": 46.761282997445356, "grad_norm": 0.3203781545162201, "learning_rate": 5.325773488504117e-05, "loss": 0.0005455253645777703, "step": 164740 }, { "epoch": 46.76412148736872, "grad_norm": 5.83833646774292, "learning_rate": 5.32548963951178e-05, "loss": 0.005352434515953064, "step": 164750 }, { "epoch": 46.76695997729208, "grad_norm": 0.10197533667087555, "learning_rate": 5.3252057905194445e-05, "loss": 0.00033706165850162507, "step": 164760 }, { "epoch": 46.769798467215445, "grad_norm": 0.0050534033216536045, "learning_rate": 5.324921941527108e-05, "loss": 0.00019014216959476471, "step": 164770 }, { "epoch": 46.7726369571388, "grad_norm": 15.178738594055176, "learning_rate": 5.3246380925347714e-05, "loss": 0.005055110901594162, "step": 164780 }, { "epoch": 46.77547544706216, "grad_norm": 0.019673120230436325, "learning_rate": 5.3243542435424355e-05, "loss": 0.0006798507645726203, "step": 164790 }, { "epoch": 46.778313936985526, "grad_norm": 0.03813923895359039, "learning_rate": 5.324070394550099e-05, "loss": 0.001564910262823105, "step": 164800 }, { "epoch": 46.78115242690888, "grad_norm": 12.751200675964355, "learning_rate": 5.323786545557764e-05, "loss": 0.0030060352757573126, "step": 164810 }, { "epoch": 46.783990916832245, "grad_norm": 11.56466293334961, "learning_rate": 5.323502696565428e-05, "loss": 0.007409811019897461, "step": 164820 }, { "epoch": 46.78682940675561, "grad_norm": 0.018139144405722618, "learning_rate": 5.3232188475730914e-05, "loss": 0.0036050498485565184, "step": 164830 }, { "epoch": 46.789667896678964, "grad_norm": 0.06457583606243134, "learning_rate": 5.3229349985807555e-05, "loss": 0.00650712326169014, "step": 164840 }, { "epoch": 46.79250638660233, "grad_norm": 0.06581851840019226, "learning_rate": 5.322651149588419e-05, "loss": 0.004680928587913513, "step": 164850 }, { "epoch": 46.79534487652569, "grad_norm": 0.06586632877588272, "learning_rate": 5.322367300596083e-05, "loss": 0.0007603533565998077, "step": 164860 }, { "epoch": 46.79818336644905, "grad_norm": 0.4746870696544647, "learning_rate": 5.3220834516037466e-05, "loss": 0.001577981561422348, "step": 164870 }, { "epoch": 46.80102185637241, "grad_norm": 3.050637722015381, "learning_rate": 5.3217996026114114e-05, "loss": 0.007298415899276734, "step": 164880 }, { "epoch": 46.80386034629577, "grad_norm": 13.677130699157715, "learning_rate": 5.321515753619075e-05, "loss": 0.008586320281028747, "step": 164890 }, { "epoch": 46.806698836219134, "grad_norm": 0.0879044160246849, "learning_rate": 5.321231904626739e-05, "loss": 0.004771288856863976, "step": 164900 }, { "epoch": 46.80953732614249, "grad_norm": 2.934196949005127, "learning_rate": 5.3209480556344025e-05, "loss": 0.0012677874416112899, "step": 164910 }, { "epoch": 46.81237581606585, "grad_norm": 0.2267649918794632, "learning_rate": 5.3206642066420666e-05, "loss": 0.010330940783023834, "step": 164920 }, { "epoch": 46.815214305989215, "grad_norm": 0.006205976475030184, "learning_rate": 5.32038035764973e-05, "loss": 0.006031459197402, "step": 164930 }, { "epoch": 46.81805279591257, "grad_norm": 0.05585699528455734, "learning_rate": 5.320096508657395e-05, "loss": 0.0011978840455412864, "step": 164940 }, { "epoch": 46.820891285835934, "grad_norm": 0.041020218282938004, "learning_rate": 5.319812659665059e-05, "loss": 0.00040831118822097777, "step": 164950 }, { "epoch": 46.8237297757593, "grad_norm": 0.021080246195197105, "learning_rate": 5.3195288106727225e-05, "loss": 0.0014667833223938943, "step": 164960 }, { "epoch": 46.82656826568266, "grad_norm": 0.029373999685049057, "learning_rate": 5.3192449616803866e-05, "loss": 0.0016465529799461365, "step": 164970 }, { "epoch": 46.829406755606016, "grad_norm": 1.3102799654006958, "learning_rate": 5.31896111268805e-05, "loss": 0.001210256665945053, "step": 164980 }, { "epoch": 46.83224524552938, "grad_norm": 0.07705166935920715, "learning_rate": 5.3186772636957135e-05, "loss": 0.0005567308515310287, "step": 164990 }, { "epoch": 46.83508373545274, "grad_norm": 3.7931265830993652, "learning_rate": 5.3183934147033777e-05, "loss": 0.0020467273890972136, "step": 165000 }, { "epoch": 46.83508373545274, "eval_accuracy": 0.9792713168436447, "eval_loss": 0.08229527622461319, "eval_runtime": 35.4394, "eval_samples_per_second": 443.772, "eval_steps_per_second": 6.941, "step": 165000 }, { "epoch": 46.8379222253761, "grad_norm": 0.2365800142288208, "learning_rate": 5.3181095657110425e-05, "loss": 0.0014320390298962593, "step": 165010 }, { "epoch": 46.84076071529946, "grad_norm": 0.10082763433456421, "learning_rate": 5.317825716718706e-05, "loss": 0.0002754110842943192, "step": 165020 }, { "epoch": 46.84359920522282, "grad_norm": 2.6603238582611084, "learning_rate": 5.31754186772637e-05, "loss": 0.002124791406095028, "step": 165030 }, { "epoch": 46.84643769514618, "grad_norm": 0.06360793858766556, "learning_rate": 5.3172580187340335e-05, "loss": 0.0005446575582027435, "step": 165040 }, { "epoch": 46.84927618506954, "grad_norm": 0.10896068066358566, "learning_rate": 5.316974169741698e-05, "loss": 0.004896321892738342, "step": 165050 }, { "epoch": 46.852114674992904, "grad_norm": 0.45514804124832153, "learning_rate": 5.316690320749361e-05, "loss": 0.0037041082978248596, "step": 165060 }, { "epoch": 46.85495316491627, "grad_norm": 0.018101410940289497, "learning_rate": 5.316406471757025e-05, "loss": 0.0044506765902042385, "step": 165070 }, { "epoch": 46.85779165483962, "grad_norm": 0.13395874202251434, "learning_rate": 5.31612262276469e-05, "loss": 0.004580998420715332, "step": 165080 }, { "epoch": 46.860630144762986, "grad_norm": 10.038833618164062, "learning_rate": 5.3158387737723535e-05, "loss": 0.0026512730866670607, "step": 165090 }, { "epoch": 46.86346863468635, "grad_norm": 0.012202754616737366, "learning_rate": 5.315554924780018e-05, "loss": 0.002698143757879734, "step": 165100 }, { "epoch": 46.866307124609705, "grad_norm": 0.05278470367193222, "learning_rate": 5.315271075787681e-05, "loss": 0.007745574414730072, "step": 165110 }, { "epoch": 46.86914561453307, "grad_norm": 0.05686945468187332, "learning_rate": 5.3149872267953446e-05, "loss": 0.004172205552458763, "step": 165120 }, { "epoch": 46.87198410445643, "grad_norm": 2.8561763763427734, "learning_rate": 5.314703377803009e-05, "loss": 0.0030514078214764597, "step": 165130 }, { "epoch": 46.87482259437979, "grad_norm": 0.27995067834854126, "learning_rate": 5.3144195288106735e-05, "loss": 0.0007947292178869247, "step": 165140 }, { "epoch": 46.87766108430315, "grad_norm": 7.972288608551025, "learning_rate": 5.314135679818337e-05, "loss": 0.0058582756668329235, "step": 165150 }, { "epoch": 46.88049957422651, "grad_norm": 0.020079249516129494, "learning_rate": 5.313851830826001e-05, "loss": 0.006077944114804268, "step": 165160 }, { "epoch": 46.883338064149875, "grad_norm": 0.6013755202293396, "learning_rate": 5.3135679818336646e-05, "loss": 0.006243055313825607, "step": 165170 }, { "epoch": 46.88617655407323, "grad_norm": 0.010915352031588554, "learning_rate": 5.313284132841329e-05, "loss": 0.009507717192173004, "step": 165180 }, { "epoch": 46.889015043996594, "grad_norm": 0.03147503361105919, "learning_rate": 5.313000283848992e-05, "loss": 0.00036843530833721163, "step": 165190 }, { "epoch": 46.89185353391996, "grad_norm": 0.5128496289253235, "learning_rate": 5.3127164348566563e-05, "loss": 0.0054844368249177934, "step": 165200 }, { "epoch": 46.89469202384331, "grad_norm": 0.5138306021690369, "learning_rate": 5.312432585864321e-05, "loss": 0.0007009325549006462, "step": 165210 }, { "epoch": 46.897530513766675, "grad_norm": 0.47975701093673706, "learning_rate": 5.3121487368719846e-05, "loss": 0.0010177358984947205, "step": 165220 }, { "epoch": 46.90036900369004, "grad_norm": 0.06515030562877655, "learning_rate": 5.311864887879649e-05, "loss": 0.00115045215934515, "step": 165230 }, { "epoch": 46.9032074936134, "grad_norm": 1.872125506401062, "learning_rate": 5.311581038887312e-05, "loss": 0.0005591647699475288, "step": 165240 }, { "epoch": 46.90604598353676, "grad_norm": 15.097201347351074, "learning_rate": 5.311297189894976e-05, "loss": 0.0071831740438938144, "step": 165250 }, { "epoch": 46.90888447346012, "grad_norm": 0.26181235909461975, "learning_rate": 5.31101334090264e-05, "loss": 0.003196173906326294, "step": 165260 }, { "epoch": 46.91172296338348, "grad_norm": 0.08525344729423523, "learning_rate": 5.310729491910303e-05, "loss": 0.012831127643585205, "step": 165270 }, { "epoch": 46.91456145330684, "grad_norm": 0.1097240000963211, "learning_rate": 5.310445642917968e-05, "loss": 0.0012742003425955772, "step": 165280 }, { "epoch": 46.9173999432302, "grad_norm": 0.9831604361534119, "learning_rate": 5.310161793925632e-05, "loss": 0.007013158500194549, "step": 165290 }, { "epoch": 46.920238433153564, "grad_norm": 17.303735733032227, "learning_rate": 5.309877944933296e-05, "loss": 0.010859248042106629, "step": 165300 }, { "epoch": 46.92307692307692, "grad_norm": 0.9799913167953491, "learning_rate": 5.30959409594096e-05, "loss": 0.005504919588565827, "step": 165310 }, { "epoch": 46.92591541300028, "grad_norm": 0.23109747469425201, "learning_rate": 5.309310246948623e-05, "loss": 0.0019059201702475547, "step": 165320 }, { "epoch": 46.928753902923646, "grad_norm": 0.11546457558870316, "learning_rate": 5.3090263979562874e-05, "loss": 0.000356811098754406, "step": 165330 }, { "epoch": 46.93159239284701, "grad_norm": 0.39509496092796326, "learning_rate": 5.308742548963952e-05, "loss": 0.0005500344559550285, "step": 165340 }, { "epoch": 46.934430882770364, "grad_norm": 11.088008880615234, "learning_rate": 5.308458699971616e-05, "loss": 0.005805023014545441, "step": 165350 }, { "epoch": 46.93726937269373, "grad_norm": 3.754624843597412, "learning_rate": 5.308174850979279e-05, "loss": 0.003084545023739338, "step": 165360 }, { "epoch": 46.94010786261709, "grad_norm": 0.005420480389147997, "learning_rate": 5.307891001986943e-05, "loss": 0.008974198997020722, "step": 165370 }, { "epoch": 46.942946352540446, "grad_norm": 0.052847813814878464, "learning_rate": 5.307607152994607e-05, "loss": 0.0036912009119987486, "step": 165380 }, { "epoch": 46.94578484246381, "grad_norm": 1.4170554876327515, "learning_rate": 5.307323304002271e-05, "loss": 0.0007381377741694451, "step": 165390 }, { "epoch": 46.94862333238717, "grad_norm": 0.040951505303382874, "learning_rate": 5.3070394550099343e-05, "loss": 0.0003526641055941582, "step": 165400 }, { "epoch": 46.95146182231053, "grad_norm": 0.022618889808654785, "learning_rate": 5.306755606017599e-05, "loss": 0.0017289163544774056, "step": 165410 }, { "epoch": 46.95430031223389, "grad_norm": 0.05186162516474724, "learning_rate": 5.306471757025263e-05, "loss": 0.007591878622770309, "step": 165420 }, { "epoch": 46.95713880215725, "grad_norm": 0.04865683242678642, "learning_rate": 5.306187908032927e-05, "loss": 0.0007211195304989814, "step": 165430 }, { "epoch": 46.959977292080616, "grad_norm": 0.011640830896794796, "learning_rate": 5.305904059040591e-05, "loss": 0.0009308941662311554, "step": 165440 }, { "epoch": 46.96281578200397, "grad_norm": 0.025141505524516106, "learning_rate": 5.3056202100482544e-05, "loss": 0.0013410652056336403, "step": 165450 }, { "epoch": 46.965654271927335, "grad_norm": 0.0705638900399208, "learning_rate": 5.305336361055918e-05, "loss": 0.0005087418481707573, "step": 165460 }, { "epoch": 46.9684927618507, "grad_norm": 0.003962900023907423, "learning_rate": 5.305052512063582e-05, "loss": 0.00016014128923416137, "step": 165470 }, { "epoch": 46.97133125177405, "grad_norm": 0.2901109755039215, "learning_rate": 5.304768663071247e-05, "loss": 0.0003242826089262962, "step": 165480 }, { "epoch": 46.974169741697416, "grad_norm": 0.5823953151702881, "learning_rate": 5.30448481407891e-05, "loss": 0.0005790194496512413, "step": 165490 }, { "epoch": 46.97700823162078, "grad_norm": 0.06389641016721725, "learning_rate": 5.3042009650865744e-05, "loss": 0.0004033781588077545, "step": 165500 }, { "epoch": 46.97700823162078, "eval_accuracy": 0.9834043364913843, "eval_loss": 0.06744599342346191, "eval_runtime": 34.0102, "eval_samples_per_second": 462.421, "eval_steps_per_second": 7.233, "step": 165500 }, { "epoch": 46.97984672154414, "grad_norm": 0.024449244141578674, "learning_rate": 5.303917116094238e-05, "loss": 0.0009400499984622002, "step": 165510 }, { "epoch": 46.9826852114675, "grad_norm": 0.010172804817557335, "learning_rate": 5.303633267101902e-05, "loss": 0.0010962856933474541, "step": 165520 }, { "epoch": 46.98552370139086, "grad_norm": 0.03908035531640053, "learning_rate": 5.3033494181095654e-05, "loss": 0.001188839040696621, "step": 165530 }, { "epoch": 46.98836219131422, "grad_norm": 0.1666809469461441, "learning_rate": 5.30306556911723e-05, "loss": 0.0008620595559477806, "step": 165540 }, { "epoch": 46.99120068123758, "grad_norm": 0.15168020129203796, "learning_rate": 5.3027817201248944e-05, "loss": 0.00046611130237579347, "step": 165550 }, { "epoch": 46.99403917116094, "grad_norm": 1.090630292892456, "learning_rate": 5.302497871132558e-05, "loss": 0.0007279498502612114, "step": 165560 }, { "epoch": 46.996877661084305, "grad_norm": 0.13153967261314392, "learning_rate": 5.302214022140222e-05, "loss": 0.0018404796719551086, "step": 165570 }, { "epoch": 46.99971615100766, "grad_norm": 17.33665657043457, "learning_rate": 5.3019301731478854e-05, "loss": 0.009425066411495209, "step": 165580 }, { "epoch": 47.002554640931024, "grad_norm": 0.07474440336227417, "learning_rate": 5.301646324155549e-05, "loss": 0.0005717718042433262, "step": 165590 }, { "epoch": 47.00539313085439, "grad_norm": 0.009968290105462074, "learning_rate": 5.301362475163213e-05, "loss": 0.004202573373913765, "step": 165600 }, { "epoch": 47.00823162077775, "grad_norm": 0.08264816552400589, "learning_rate": 5.301078626170878e-05, "loss": 0.00017354656010866166, "step": 165610 }, { "epoch": 47.011070110701105, "grad_norm": 0.09547740966081619, "learning_rate": 5.300794777178541e-05, "loss": 0.0009524289518594742, "step": 165620 }, { "epoch": 47.01390860062447, "grad_norm": 0.9656988978385925, "learning_rate": 5.3005109281862054e-05, "loss": 0.000710240937769413, "step": 165630 }, { "epoch": 47.01674709054783, "grad_norm": 1.8479256629943848, "learning_rate": 5.300227079193869e-05, "loss": 0.009764330089092254, "step": 165640 }, { "epoch": 47.01958558047119, "grad_norm": 0.025283265858888626, "learning_rate": 5.299943230201533e-05, "loss": 0.001226378232240677, "step": 165650 }, { "epoch": 47.02242407039455, "grad_norm": 0.04165585711598396, "learning_rate": 5.2996593812091965e-05, "loss": 0.0020985063165426254, "step": 165660 }, { "epoch": 47.02526256031791, "grad_norm": 1.2304692268371582, "learning_rate": 5.2993755322168606e-05, "loss": 0.005511408299207687, "step": 165670 }, { "epoch": 47.02810105024127, "grad_norm": 0.282669335603714, "learning_rate": 5.2990916832245255e-05, "loss": 0.000687454640865326, "step": 165680 }, { "epoch": 47.03093954016463, "grad_norm": 2.1172327995300293, "learning_rate": 5.298807834232189e-05, "loss": 0.002123318426311016, "step": 165690 }, { "epoch": 47.033778030087994, "grad_norm": 3.0107579231262207, "learning_rate": 5.298523985239853e-05, "loss": 0.005178391933441162, "step": 165700 }, { "epoch": 47.03661652001136, "grad_norm": 3.3194549083709717, "learning_rate": 5.2982401362475165e-05, "loss": 0.0013821592554450035, "step": 165710 }, { "epoch": 47.03945500993471, "grad_norm": 0.004713813774287701, "learning_rate": 5.29795628725518e-05, "loss": 0.002544776536524296, "step": 165720 }, { "epoch": 47.042293499858076, "grad_norm": 0.01989128068089485, "learning_rate": 5.297672438262844e-05, "loss": 0.015817905962467193, "step": 165730 }, { "epoch": 47.04513198978144, "grad_norm": 0.011016498319804668, "learning_rate": 5.297388589270509e-05, "loss": 0.0010878358036279679, "step": 165740 }, { "epoch": 47.047970479704794, "grad_norm": 0.015210695564746857, "learning_rate": 5.2971047402781724e-05, "loss": 0.0012009505182504654, "step": 165750 }, { "epoch": 47.05080896962816, "grad_norm": 0.5606326460838318, "learning_rate": 5.2968208912858365e-05, "loss": 0.001152154989540577, "step": 165760 }, { "epoch": 47.05364745955152, "grad_norm": 0.18850776553153992, "learning_rate": 5.2965370422935e-05, "loss": 0.00042840894311666486, "step": 165770 }, { "epoch": 47.056485949474876, "grad_norm": 0.27655598521232605, "learning_rate": 5.296253193301164e-05, "loss": 0.0005877438932657241, "step": 165780 }, { "epoch": 47.05932443939824, "grad_norm": 2.1636600494384766, "learning_rate": 5.2959693443088276e-05, "loss": 0.005510587990283966, "step": 165790 }, { "epoch": 47.0621629293216, "grad_norm": 0.040806468576192856, "learning_rate": 5.295685495316492e-05, "loss": 0.00019103065133094788, "step": 165800 }, { "epoch": 47.065001419244965, "grad_norm": 1.195719838142395, "learning_rate": 5.2954016463241565e-05, "loss": 0.004736392945051193, "step": 165810 }, { "epoch": 47.06783990916832, "grad_norm": 0.034314900636672974, "learning_rate": 5.29511779733182e-05, "loss": 0.0007178839296102524, "step": 165820 }, { "epoch": 47.07067839909168, "grad_norm": 0.13804897665977478, "learning_rate": 5.2948339483394834e-05, "loss": 0.0005042914301156998, "step": 165830 }, { "epoch": 47.073516889015046, "grad_norm": 0.011656164191663265, "learning_rate": 5.2945500993471476e-05, "loss": 0.0004541376605629921, "step": 165840 }, { "epoch": 47.0763553789384, "grad_norm": 0.018055960536003113, "learning_rate": 5.294266250354811e-05, "loss": 0.000262993760406971, "step": 165850 }, { "epoch": 47.079193868861765, "grad_norm": 0.19947485625743866, "learning_rate": 5.293982401362475e-05, "loss": 0.0011315250769257546, "step": 165860 }, { "epoch": 47.08203235878513, "grad_norm": 0.3909997045993805, "learning_rate": 5.2936985523701386e-05, "loss": 0.0002881085500121117, "step": 165870 }, { "epoch": 47.08487084870849, "grad_norm": 0.4581909775733948, "learning_rate": 5.2934147033778035e-05, "loss": 0.00039067137986421586, "step": 165880 }, { "epoch": 47.087709338631846, "grad_norm": 0.00627509830519557, "learning_rate": 5.2931308543854676e-05, "loss": 0.0005099896341562271, "step": 165890 }, { "epoch": 47.09054782855521, "grad_norm": 0.04172228276729584, "learning_rate": 5.292847005393131e-05, "loss": 0.0022157559171319006, "step": 165900 }, { "epoch": 47.09338631847857, "grad_norm": 1.8427138328552246, "learning_rate": 5.292563156400795e-05, "loss": 0.0011202156543731689, "step": 165910 }, { "epoch": 47.09622480840193, "grad_norm": 0.02245367132127285, "learning_rate": 5.2922793074084587e-05, "loss": 0.0005897879600524902, "step": 165920 }, { "epoch": 47.09906329832529, "grad_norm": 0.04035332798957825, "learning_rate": 5.2920238433153566e-05, "loss": 0.015618778765201569, "step": 165930 }, { "epoch": 47.101901788248654, "grad_norm": 0.002970238449051976, "learning_rate": 5.291739994323021e-05, "loss": 9.916350245475769e-05, "step": 165940 }, { "epoch": 47.10474027817201, "grad_norm": 0.025254210457205772, "learning_rate": 5.291456145330684e-05, "loss": 0.0005587629973888397, "step": 165950 }, { "epoch": 47.10757876809537, "grad_norm": 1.070662498474121, "learning_rate": 5.2911722963383484e-05, "loss": 0.0006078548729419708, "step": 165960 }, { "epoch": 47.110417258018735, "grad_norm": 0.137339785695076, "learning_rate": 5.290888447346012e-05, "loss": 0.00028986185789108275, "step": 165970 }, { "epoch": 47.1132557479421, "grad_norm": 0.12066911160945892, "learning_rate": 5.290604598353675e-05, "loss": 0.000445563904941082, "step": 165980 }, { "epoch": 47.116094237865454, "grad_norm": 0.007078132126480341, "learning_rate": 5.290320749361341e-05, "loss": 0.0012267936021089555, "step": 165990 }, { "epoch": 47.11893272778882, "grad_norm": 0.019854731857776642, "learning_rate": 5.290036900369004e-05, "loss": 6.612129509449005e-05, "step": 166000 }, { "epoch": 47.11893272778882, "eval_accuracy": 0.9853754689387677, "eval_loss": 0.06434072554111481, "eval_runtime": 35.4639, "eval_samples_per_second": 443.465, "eval_steps_per_second": 6.937, "step": 166000 }, { "epoch": 47.12177121771218, "grad_norm": 0.01954798400402069, "learning_rate": 5.289753051376668e-05, "loss": 0.00022166799753904344, "step": 166010 }, { "epoch": 47.124609707635535, "grad_norm": 0.3871883749961853, "learning_rate": 5.289469202384332e-05, "loss": 0.0015082418918609618, "step": 166020 }, { "epoch": 47.1274481975589, "grad_norm": 0.09418389201164246, "learning_rate": 5.289185353391995e-05, "loss": 0.0011137686669826508, "step": 166030 }, { "epoch": 47.13028668748226, "grad_norm": 1.1215718984603882, "learning_rate": 5.2889015043996595e-05, "loss": 0.010057423263788223, "step": 166040 }, { "epoch": 47.13312517740562, "grad_norm": 0.1584499180316925, "learning_rate": 5.288617655407323e-05, "loss": 0.0009113853797316551, "step": 166050 }, { "epoch": 47.13596366732898, "grad_norm": 23.905670166015625, "learning_rate": 5.288333806414988e-05, "loss": 0.008307871967554092, "step": 166060 }, { "epoch": 47.13880215725234, "grad_norm": 0.06161513552069664, "learning_rate": 5.288049957422652e-05, "loss": 0.002290584705770016, "step": 166070 }, { "epoch": 47.141640647175706, "grad_norm": 0.01899268850684166, "learning_rate": 5.287766108430315e-05, "loss": 0.000828574039041996, "step": 166080 }, { "epoch": 47.14447913709906, "grad_norm": 0.038225360214710236, "learning_rate": 5.2874822594379795e-05, "loss": 0.00022326093167066575, "step": 166090 }, { "epoch": 47.147317627022424, "grad_norm": 0.0730384811758995, "learning_rate": 5.287198410445643e-05, "loss": 0.0007906967774033546, "step": 166100 }, { "epoch": 47.15015611694579, "grad_norm": 0.0015913447132334113, "learning_rate": 5.2869145614533064e-05, "loss": 0.0004580916836857796, "step": 166110 }, { "epoch": 47.15299460686914, "grad_norm": 0.1753922551870346, "learning_rate": 5.286630712460971e-05, "loss": 0.0006380291655659675, "step": 166120 }, { "epoch": 47.155833096792506, "grad_norm": 0.004492358770221472, "learning_rate": 5.286346863468635e-05, "loss": 0.0016625341027975082, "step": 166130 }, { "epoch": 47.15867158671587, "grad_norm": 0.0456785187125206, "learning_rate": 5.286063014476299e-05, "loss": 0.006846468150615692, "step": 166140 }, { "epoch": 47.161510076639225, "grad_norm": 0.008336924016475677, "learning_rate": 5.285779165483963e-05, "loss": 0.0005162119865417481, "step": 166150 }, { "epoch": 47.16434856656259, "grad_norm": 0.04916519299149513, "learning_rate": 5.2854953164916264e-05, "loss": 0.0007955053821206093, "step": 166160 }, { "epoch": 47.16718705648595, "grad_norm": 0.9688761234283447, "learning_rate": 5.2852114674992905e-05, "loss": 0.0006530920043587684, "step": 166170 }, { "epoch": 47.17002554640931, "grad_norm": 0.1581534594297409, "learning_rate": 5.284927618506954e-05, "loss": 0.00015444792807102204, "step": 166180 }, { "epoch": 47.17286403633267, "grad_norm": 2.727480411529541, "learning_rate": 5.284643769514619e-05, "loss": 0.0013980492949485779, "step": 166190 }, { "epoch": 47.17570252625603, "grad_norm": 0.055984169244766235, "learning_rate": 5.284359920522283e-05, "loss": 0.00033441167324781417, "step": 166200 }, { "epoch": 47.178541016179395, "grad_norm": 0.03017616830766201, "learning_rate": 5.2840760715299464e-05, "loss": 0.0007129468023777008, "step": 166210 }, { "epoch": 47.18137950610275, "grad_norm": 0.17422787845134735, "learning_rate": 5.28379222253761e-05, "loss": 0.0003732476383447647, "step": 166220 }, { "epoch": 47.18421799602611, "grad_norm": 0.3361385762691498, "learning_rate": 5.283508373545274e-05, "loss": 0.0017517609521746635, "step": 166230 }, { "epoch": 47.187056485949476, "grad_norm": 0.032731249928474426, "learning_rate": 5.2832245245529375e-05, "loss": 0.0034328050911426543, "step": 166240 }, { "epoch": 47.18989497587283, "grad_norm": 0.14068067073822021, "learning_rate": 5.2829406755606016e-05, "loss": 0.0007778115570545197, "step": 166250 }, { "epoch": 47.192733465796195, "grad_norm": 0.09111491590738297, "learning_rate": 5.2826568265682664e-05, "loss": 0.001295914500951767, "step": 166260 }, { "epoch": 47.19557195571956, "grad_norm": 0.15714767575263977, "learning_rate": 5.28237297757593e-05, "loss": 0.0042479228228330616, "step": 166270 }, { "epoch": 47.19841044564292, "grad_norm": 0.13359786570072174, "learning_rate": 5.282089128583594e-05, "loss": 0.003353724628686905, "step": 166280 }, { "epoch": 47.20124893556628, "grad_norm": 2.4261314868927, "learning_rate": 5.2818052795912575e-05, "loss": 0.004206588864326477, "step": 166290 }, { "epoch": 47.20408742548964, "grad_norm": 1.2046884298324585, "learning_rate": 5.2815214305989216e-05, "loss": 0.0003070641309022903, "step": 166300 }, { "epoch": 47.206925915413, "grad_norm": 10.354576110839844, "learning_rate": 5.281237581606585e-05, "loss": 0.00850830078125, "step": 166310 }, { "epoch": 47.20976440533636, "grad_norm": 1.2296448945999146, "learning_rate": 5.28095373261425e-05, "loss": 0.002916264347732067, "step": 166320 }, { "epoch": 47.21260289525972, "grad_norm": 5.349850177764893, "learning_rate": 5.280669883621914e-05, "loss": 0.009799469262361526, "step": 166330 }, { "epoch": 47.215441385183084, "grad_norm": 0.3105749189853668, "learning_rate": 5.2803860346295775e-05, "loss": 0.0008969709277153015, "step": 166340 }, { "epoch": 47.21827987510645, "grad_norm": 0.15404950082302094, "learning_rate": 5.280102185637241e-05, "loss": 0.005632200837135315, "step": 166350 }, { "epoch": 47.2211183650298, "grad_norm": 0.14473956823349, "learning_rate": 5.279818336644905e-05, "loss": 0.004847605526447296, "step": 166360 }, { "epoch": 47.223956854953165, "grad_norm": 0.01723306253552437, "learning_rate": 5.2795344876525685e-05, "loss": 0.006452636420726776, "step": 166370 }, { "epoch": 47.22679534487653, "grad_norm": 10.309540748596191, "learning_rate": 5.279250638660233e-05, "loss": 0.0035555660724639893, "step": 166380 }, { "epoch": 47.229633834799884, "grad_norm": 5.108060836791992, "learning_rate": 5.2789667896678975e-05, "loss": 0.002332133986055851, "step": 166390 }, { "epoch": 47.23247232472325, "grad_norm": 0.022411329671740532, "learning_rate": 5.278682940675561e-05, "loss": 0.00032005924731492996, "step": 166400 }, { "epoch": 47.23531081464661, "grad_norm": 0.05336960405111313, "learning_rate": 5.278399091683225e-05, "loss": 0.00784326046705246, "step": 166410 }, { "epoch": 47.238149304569966, "grad_norm": 0.1261288970708847, "learning_rate": 5.2781152426908885e-05, "loss": 0.012076672911643983, "step": 166420 }, { "epoch": 47.24098779449333, "grad_norm": 10.071539878845215, "learning_rate": 5.277831393698553e-05, "loss": 0.004104364663362503, "step": 166430 }, { "epoch": 47.24382628441669, "grad_norm": 0.13978561758995056, "learning_rate": 5.277547544706216e-05, "loss": 0.00032413024455308915, "step": 166440 }, { "epoch": 47.246664774340054, "grad_norm": 0.03106663189828396, "learning_rate": 5.277263695713881e-05, "loss": 0.0012975038960576057, "step": 166450 }, { "epoch": 47.24950326426341, "grad_norm": 0.010754905641078949, "learning_rate": 5.276979846721545e-05, "loss": 0.0002751009538769722, "step": 166460 }, { "epoch": 47.25234175418677, "grad_norm": 0.1718127429485321, "learning_rate": 5.2766959977292086e-05, "loss": 0.0011797565966844559, "step": 166470 }, { "epoch": 47.255180244110136, "grad_norm": 0.09654141962528229, "learning_rate": 5.276412148736872e-05, "loss": 0.0003275373950600624, "step": 166480 }, { "epoch": 47.25801873403349, "grad_norm": 0.12636995315551758, "learning_rate": 5.276128299744536e-05, "loss": 0.00038160793483257296, "step": 166490 }, { "epoch": 47.260857223956855, "grad_norm": 0.044128309935331345, "learning_rate": 5.2758444507521996e-05, "loss": 0.00014971271157264708, "step": 166500 }, { "epoch": 47.260857223956855, "eval_accuracy": 0.9844216951739048, "eval_loss": 0.0640367791056633, "eval_runtime": 34.1884, "eval_samples_per_second": 460.01, "eval_steps_per_second": 7.195, "step": 166500 }, { "epoch": 47.26369571388022, "grad_norm": 0.0042740521021187305, "learning_rate": 5.275560601759864e-05, "loss": 0.0003721872344613075, "step": 166510 }, { "epoch": 47.26653420380357, "grad_norm": 0.005104904528707266, "learning_rate": 5.2752767527675286e-05, "loss": 0.0008218273520469666, "step": 166520 }, { "epoch": 47.269372693726936, "grad_norm": 0.037011150270700455, "learning_rate": 5.274992903775192e-05, "loss": 0.00029018912464380265, "step": 166530 }, { "epoch": 47.2722111836503, "grad_norm": 0.03869977965950966, "learning_rate": 5.274709054782856e-05, "loss": 0.0010556325316429139, "step": 166540 }, { "epoch": 47.27504967357366, "grad_norm": 0.32294923067092896, "learning_rate": 5.2744252057905196e-05, "loss": 0.00016855746507644654, "step": 166550 }, { "epoch": 47.27788816349702, "grad_norm": 2.9058151245117188, "learning_rate": 5.274141356798184e-05, "loss": 0.0007623642683029175, "step": 166560 }, { "epoch": 47.28072665342038, "grad_norm": 0.01905333623290062, "learning_rate": 5.273857507805847e-05, "loss": 0.00016860440373420716, "step": 166570 }, { "epoch": 47.28356514334374, "grad_norm": 0.07354488223791122, "learning_rate": 5.273573658813511e-05, "loss": 0.0006312783807516098, "step": 166580 }, { "epoch": 47.2864036332671, "grad_norm": 0.14596717059612274, "learning_rate": 5.2732898098211755e-05, "loss": 0.00017186924815177919, "step": 166590 }, { "epoch": 47.28924212319046, "grad_norm": 0.03604298457503319, "learning_rate": 5.2730059608288396e-05, "loss": 0.0005163691937923431, "step": 166600 }, { "epoch": 47.292080613113825, "grad_norm": 0.010628595016896725, "learning_rate": 5.272722111836503e-05, "loss": 0.0010418226942420007, "step": 166610 }, { "epoch": 47.29491910303718, "grad_norm": 0.042304590344429016, "learning_rate": 5.272438262844167e-05, "loss": 0.0001454310491681099, "step": 166620 }, { "epoch": 47.297757592960544, "grad_norm": 0.006932371761649847, "learning_rate": 5.272154413851831e-05, "loss": 0.00013930890709161757, "step": 166630 }, { "epoch": 47.30059608288391, "grad_norm": 0.007671685889363289, "learning_rate": 5.271870564859495e-05, "loss": 0.0003259913995862007, "step": 166640 }, { "epoch": 47.30343457280727, "grad_norm": 0.014329459518194199, "learning_rate": 5.2715867158671596e-05, "loss": 0.0005452604964375496, "step": 166650 }, { "epoch": 47.306273062730625, "grad_norm": 0.007297629490494728, "learning_rate": 5.271302866874823e-05, "loss": 0.0004180746152997017, "step": 166660 }, { "epoch": 47.30911155265399, "grad_norm": 0.12028004974126816, "learning_rate": 5.271019017882487e-05, "loss": 0.0002833988517522812, "step": 166670 }, { "epoch": 47.31195004257735, "grad_norm": 0.12449847161769867, "learning_rate": 5.270735168890151e-05, "loss": 0.0001417560502886772, "step": 166680 }, { "epoch": 47.31478853250071, "grad_norm": 7.9747843742370605, "learning_rate": 5.270451319897814e-05, "loss": 0.0052520155906677244, "step": 166690 }, { "epoch": 47.31762702242407, "grad_norm": 0.04660665988922119, "learning_rate": 5.270167470905478e-05, "loss": 0.0004315996542572975, "step": 166700 }, { "epoch": 47.32046551234743, "grad_norm": 1.9401639699935913, "learning_rate": 5.269883621913142e-05, "loss": 0.0022576481103897096, "step": 166710 }, { "epoch": 47.323304002270795, "grad_norm": 0.010950454510748386, "learning_rate": 5.2695997729208066e-05, "loss": 0.000249863974750042, "step": 166720 }, { "epoch": 47.32614249219415, "grad_norm": 0.044718511402606964, "learning_rate": 5.269315923928471e-05, "loss": 0.0001087486743927002, "step": 166730 }, { "epoch": 47.328980982117514, "grad_norm": 0.10349511355161667, "learning_rate": 5.269032074936134e-05, "loss": 0.00045157279819250105, "step": 166740 }, { "epoch": 47.33181947204088, "grad_norm": 0.006912370678037405, "learning_rate": 5.268748225943798e-05, "loss": 0.0005410011857748031, "step": 166750 }, { "epoch": 47.33465796196423, "grad_norm": 0.23242416977882385, "learning_rate": 5.268464376951462e-05, "loss": 0.00010576006025075912, "step": 166760 }, { "epoch": 47.337496451887596, "grad_norm": 0.10857999324798584, "learning_rate": 5.268180527959126e-05, "loss": 0.0036471303552389144, "step": 166770 }, { "epoch": 47.34033494181096, "grad_norm": 0.044723257422447205, "learning_rate": 5.2678966789667894e-05, "loss": 0.0011435393244028091, "step": 166780 }, { "epoch": 47.343173431734314, "grad_norm": 0.08844407647848129, "learning_rate": 5.267612829974454e-05, "loss": 0.014150601625442506, "step": 166790 }, { "epoch": 47.34601192165768, "grad_norm": 0.9992348551750183, "learning_rate": 5.267328980982118e-05, "loss": 0.002608104608952999, "step": 166800 }, { "epoch": 47.34885041158104, "grad_norm": 0.024144552648067474, "learning_rate": 5.267045131989782e-05, "loss": 0.009970492124557495, "step": 166810 }, { "epoch": 47.3516889015044, "grad_norm": 0.0034485652577131987, "learning_rate": 5.266761282997445e-05, "loss": 0.0026295047253370287, "step": 166820 }, { "epoch": 47.35452739142776, "grad_norm": 0.015175798907876015, "learning_rate": 5.2664774340051094e-05, "loss": 0.008172889798879623, "step": 166830 }, { "epoch": 47.35736588135112, "grad_norm": 0.015477482229471207, "learning_rate": 5.266193585012773e-05, "loss": 0.0006014207378029823, "step": 166840 }, { "epoch": 47.360204371274484, "grad_norm": 0.0980260819196701, "learning_rate": 5.2659097360204376e-05, "loss": 0.004081302881240844, "step": 166850 }, { "epoch": 47.36304286119784, "grad_norm": 1.2960976362228394, "learning_rate": 5.265625887028102e-05, "loss": 0.0016308054327964783, "step": 166860 }, { "epoch": 47.3658813511212, "grad_norm": 2.8868603706359863, "learning_rate": 5.265342038035765e-05, "loss": 0.0015324357897043227, "step": 166870 }, { "epoch": 47.368719841044566, "grad_norm": 0.7814738750457764, "learning_rate": 5.2650581890434294e-05, "loss": 0.0009984621778130532, "step": 166880 }, { "epoch": 47.37155833096792, "grad_norm": 0.7778225541114807, "learning_rate": 5.264774340051093e-05, "loss": 0.0014217784628272057, "step": 166890 }, { "epoch": 47.374396820891285, "grad_norm": 0.1819504350423813, "learning_rate": 5.264490491058757e-05, "loss": 0.012876455485820771, "step": 166900 }, { "epoch": 47.37723531081465, "grad_norm": 0.007079355418682098, "learning_rate": 5.2642066420664204e-05, "loss": 0.000867467001080513, "step": 166910 }, { "epoch": 47.38007380073801, "grad_norm": 0.3608905076980591, "learning_rate": 5.263922793074085e-05, "loss": 0.00960761159658432, "step": 166920 }, { "epoch": 47.382912290661366, "grad_norm": 1.9556928873062134, "learning_rate": 5.2636389440817494e-05, "loss": 0.001226402446627617, "step": 166930 }, { "epoch": 47.38575078058473, "grad_norm": 0.23868544399738312, "learning_rate": 5.263355095089413e-05, "loss": 0.0015514982864260674, "step": 166940 }, { "epoch": 47.38858927050809, "grad_norm": 0.06806334108114243, "learning_rate": 5.263071246097076e-05, "loss": 0.0011254483833909035, "step": 166950 }, { "epoch": 47.39142776043145, "grad_norm": 0.038743894547224045, "learning_rate": 5.2627873971047404e-05, "loss": 0.0013945359736680985, "step": 166960 }, { "epoch": 47.39426625035481, "grad_norm": 0.056662410497665405, "learning_rate": 5.262503548112404e-05, "loss": 0.005033820495009422, "step": 166970 }, { "epoch": 47.39710474027817, "grad_norm": 0.677243173122406, "learning_rate": 5.262219699120068e-05, "loss": 0.0007417282089591026, "step": 166980 }, { "epoch": 47.39994323020153, "grad_norm": 0.07789450138807297, "learning_rate": 5.261935850127733e-05, "loss": 0.0018675120547413826, "step": 166990 }, { "epoch": 47.40278172012489, "grad_norm": 0.03157220035791397, "learning_rate": 5.261652001135396e-05, "loss": 0.0016803223639726638, "step": 167000 }, { "epoch": 47.40278172012489, "eval_accuracy": 0.9805430151967953, "eval_loss": 0.08013010025024414, "eval_runtime": 33.6691, "eval_samples_per_second": 467.105, "eval_steps_per_second": 7.306, "step": 167000 }, { "epoch": 47.405620210048255, "grad_norm": 0.30453169345855713, "learning_rate": 5.2613681521430605e-05, "loss": 0.0017057489603757858, "step": 167010 }, { "epoch": 47.40845869997162, "grad_norm": 0.22457246482372284, "learning_rate": 5.261084303150724e-05, "loss": 0.0021385570988059046, "step": 167020 }, { "epoch": 47.411297189894974, "grad_norm": 2.8357796669006348, "learning_rate": 5.260800454158388e-05, "loss": 0.0047154143452644345, "step": 167030 }, { "epoch": 47.41413567981834, "grad_norm": 0.41572362184524536, "learning_rate": 5.2605166051660515e-05, "loss": 0.013699346780776977, "step": 167040 }, { "epoch": 47.4169741697417, "grad_norm": 0.02192205935716629, "learning_rate": 5.260232756173716e-05, "loss": 0.00032510124146938323, "step": 167050 }, { "epoch": 47.419812659665055, "grad_norm": 7.994377136230469, "learning_rate": 5.25994890718138e-05, "loss": 0.006555289030075073, "step": 167060 }, { "epoch": 47.42265114958842, "grad_norm": 0.14023131132125854, "learning_rate": 5.259665058189044e-05, "loss": 0.0008802639320492744, "step": 167070 }, { "epoch": 47.42548963951178, "grad_norm": 0.012311067432165146, "learning_rate": 5.2593812091967074e-05, "loss": 0.002046292461454868, "step": 167080 }, { "epoch": 47.428328129435144, "grad_norm": 0.05421510711312294, "learning_rate": 5.2590973602043715e-05, "loss": 0.0017878087237477303, "step": 167090 }, { "epoch": 47.4311666193585, "grad_norm": 14.103699684143066, "learning_rate": 5.258813511212035e-05, "loss": 0.014536020159721375, "step": 167100 }, { "epoch": 47.43400510928186, "grad_norm": 0.2087467908859253, "learning_rate": 5.258529662219699e-05, "loss": 0.001028785854578018, "step": 167110 }, { "epoch": 47.436843599205226, "grad_norm": 0.25825706124305725, "learning_rate": 5.258245813227364e-05, "loss": 0.009041027724742889, "step": 167120 }, { "epoch": 47.43968208912858, "grad_norm": 0.09754849225282669, "learning_rate": 5.2579619642350274e-05, "loss": 0.0003370737656950951, "step": 167130 }, { "epoch": 47.442520579051944, "grad_norm": 5.997060775756836, "learning_rate": 5.2576781152426915e-05, "loss": 0.006041136756539345, "step": 167140 }, { "epoch": 47.44535906897531, "grad_norm": 0.1545877456665039, "learning_rate": 5.257394266250355e-05, "loss": 0.0022831188514828684, "step": 167150 }, { "epoch": 47.44819755889866, "grad_norm": 0.38627707958221436, "learning_rate": 5.2571104172580185e-05, "loss": 0.001626572012901306, "step": 167160 }, { "epoch": 47.451036048822026, "grad_norm": 0.029713492840528488, "learning_rate": 5.2568265682656826e-05, "loss": 0.0027602735906839372, "step": 167170 }, { "epoch": 47.45387453874539, "grad_norm": 0.036559782922267914, "learning_rate": 5.256542719273346e-05, "loss": 0.00149884894490242, "step": 167180 }, { "epoch": 47.45671302866875, "grad_norm": 0.7431686520576477, "learning_rate": 5.256258870281011e-05, "loss": 0.00030505843460559845, "step": 167190 }, { "epoch": 47.45955151859211, "grad_norm": 0.9765399098396301, "learning_rate": 5.255975021288675e-05, "loss": 0.0029207773506641386, "step": 167200 }, { "epoch": 47.46239000851547, "grad_norm": 0.3893764913082123, "learning_rate": 5.2556911722963385e-05, "loss": 0.00033113863319158556, "step": 167210 }, { "epoch": 47.46522849843883, "grad_norm": 0.009321015328168869, "learning_rate": 5.2554073233040026e-05, "loss": 0.00024921242147684096, "step": 167220 }, { "epoch": 47.46806698836219, "grad_norm": 10.566884994506836, "learning_rate": 5.255123474311666e-05, "loss": 0.001682123914361, "step": 167230 }, { "epoch": 47.47090547828555, "grad_norm": 0.03652959689497948, "learning_rate": 5.25483962531933e-05, "loss": 0.0003044677898287773, "step": 167240 }, { "epoch": 47.473743968208915, "grad_norm": 0.01318306103348732, "learning_rate": 5.254555776326995e-05, "loss": 0.0027083862572908402, "step": 167250 }, { "epoch": 47.47658245813227, "grad_norm": 0.01672990433871746, "learning_rate": 5.2542719273346585e-05, "loss": 0.00030224435031414034, "step": 167260 }, { "epoch": 47.47942094805563, "grad_norm": 0.02858227677643299, "learning_rate": 5.2539880783423226e-05, "loss": 0.0005406834185123444, "step": 167270 }, { "epoch": 47.482259437978996, "grad_norm": 0.0298707764595747, "learning_rate": 5.253704229349986e-05, "loss": 0.0004708079621195793, "step": 167280 }, { "epoch": 47.48509792790236, "grad_norm": 0.20858587324619293, "learning_rate": 5.2534203803576495e-05, "loss": 0.010516070574522019, "step": 167290 }, { "epoch": 47.487936417825715, "grad_norm": 0.1043524444103241, "learning_rate": 5.253136531365314e-05, "loss": 0.00023540426045656204, "step": 167300 }, { "epoch": 47.49077490774908, "grad_norm": 1.6719950437545776, "learning_rate": 5.252852682372977e-05, "loss": 0.000615539588034153, "step": 167310 }, { "epoch": 47.49361339767244, "grad_norm": 0.13115805387496948, "learning_rate": 5.252568833380642e-05, "loss": 0.00014178287237882615, "step": 167320 }, { "epoch": 47.496451887595796, "grad_norm": 0.25651228427886963, "learning_rate": 5.252284984388306e-05, "loss": 0.0003449423238635063, "step": 167330 }, { "epoch": 47.49929037751916, "grad_norm": 0.9784300923347473, "learning_rate": 5.2520011353959695e-05, "loss": 0.0021761221811175346, "step": 167340 }, { "epoch": 47.50212886744252, "grad_norm": 0.03507251292467117, "learning_rate": 5.251717286403634e-05, "loss": 0.001216168887913227, "step": 167350 }, { "epoch": 47.50496735736588, "grad_norm": 0.037180595099925995, "learning_rate": 5.251433437411297e-05, "loss": 0.003997651487588882, "step": 167360 }, { "epoch": 47.50780584728924, "grad_norm": 9.196673393249512, "learning_rate": 5.251149588418961e-05, "loss": 0.003172621876001358, "step": 167370 }, { "epoch": 47.510644337212604, "grad_norm": 0.3982827365398407, "learning_rate": 5.250865739426625e-05, "loss": 0.004454168677330017, "step": 167380 }, { "epoch": 47.51348282713597, "grad_norm": 0.41422808170318604, "learning_rate": 5.2505818904342895e-05, "loss": 0.0006493605673313141, "step": 167390 }, { "epoch": 47.51632131705932, "grad_norm": 0.26815783977508545, "learning_rate": 5.250298041441954e-05, "loss": 0.0002329448238015175, "step": 167400 }, { "epoch": 47.519159806982685, "grad_norm": 0.07583802938461304, "learning_rate": 5.250014192449617e-05, "loss": 0.0004238005727529526, "step": 167410 }, { "epoch": 47.52199829690605, "grad_norm": 0.050296224653720856, "learning_rate": 5.2497303434572806e-05, "loss": 0.0004106746986508369, "step": 167420 }, { "epoch": 47.524836786829404, "grad_norm": 0.02042561024427414, "learning_rate": 5.249446494464945e-05, "loss": 0.0008412597700953484, "step": 167430 }, { "epoch": 47.52767527675277, "grad_norm": 0.2631629705429077, "learning_rate": 5.249162645472608e-05, "loss": 0.002032356709241867, "step": 167440 }, { "epoch": 47.53051376667613, "grad_norm": 0.07955020666122437, "learning_rate": 5.248878796480273e-05, "loss": 0.0003062641248106956, "step": 167450 }, { "epoch": 47.533352256599485, "grad_norm": 0.13592423498630524, "learning_rate": 5.248594947487937e-05, "loss": 0.00035659149289131166, "step": 167460 }, { "epoch": 47.53619074652285, "grad_norm": 0.29726457595825195, "learning_rate": 5.2483110984956006e-05, "loss": 0.00019919518381357194, "step": 167470 }, { "epoch": 47.53902923644621, "grad_norm": 0.06623820215463638, "learning_rate": 5.248027249503265e-05, "loss": 0.00028131455183029176, "step": 167480 }, { "epoch": 47.541867726369574, "grad_norm": 0.06319672614336014, "learning_rate": 5.247743400510928e-05, "loss": 8.058659732341766e-05, "step": 167490 }, { "epoch": 47.54470621629293, "grad_norm": 0.27699872851371765, "learning_rate": 5.2474595515185924e-05, "loss": 0.00024262648075819017, "step": 167500 }, { "epoch": 47.54470621629293, "eval_accuracy": 0.9846760348445349, "eval_loss": 0.061824530363082886, "eval_runtime": 34.7987, "eval_samples_per_second": 451.942, "eval_steps_per_second": 7.069, "step": 167500 }, { "epoch": 47.54754470621629, "grad_norm": 0.014071105979382992, "learning_rate": 5.247175702526256e-05, "loss": 0.0003407519310712814, "step": 167510 }, { "epoch": 47.550383196139656, "grad_norm": 0.012202532961964607, "learning_rate": 5.2468918535339206e-05, "loss": 6.537958979606629e-05, "step": 167520 }, { "epoch": 47.55322168606301, "grad_norm": 0.07051093876361847, "learning_rate": 5.246608004541584e-05, "loss": 0.00029912274330854414, "step": 167530 }, { "epoch": 47.556060175986374, "grad_norm": 0.04351521283388138, "learning_rate": 5.246324155549248e-05, "loss": 0.0001537717878818512, "step": 167540 }, { "epoch": 47.55889866590974, "grad_norm": 0.021716434508562088, "learning_rate": 5.246040306556912e-05, "loss": 0.0012583035975694656, "step": 167550 }, { "epoch": 47.5617371558331, "grad_norm": 0.0201044213026762, "learning_rate": 5.245756457564576e-05, "loss": 0.003882741928100586, "step": 167560 }, { "epoch": 47.564575645756456, "grad_norm": 0.022917164489626884, "learning_rate": 5.245472608572239e-05, "loss": 0.0007893575355410575, "step": 167570 }, { "epoch": 47.56741413567982, "grad_norm": 0.08404967933893204, "learning_rate": 5.2451887595799034e-05, "loss": 0.013448211550712585, "step": 167580 }, { "epoch": 47.57025262560318, "grad_norm": 0.003131540957838297, "learning_rate": 5.244904910587568e-05, "loss": 0.00723414421081543, "step": 167590 }, { "epoch": 47.57309111552654, "grad_norm": 0.011245744302868843, "learning_rate": 5.244621061595232e-05, "loss": 0.017796728014945983, "step": 167600 }, { "epoch": 47.5759296054499, "grad_norm": 0.030202150344848633, "learning_rate": 5.244337212602896e-05, "loss": 0.0026536891236901283, "step": 167610 }, { "epoch": 47.57876809537326, "grad_norm": 0.13024942576885223, "learning_rate": 5.244053363610559e-05, "loss": 0.0040173731744289395, "step": 167620 }, { "epoch": 47.58160658529662, "grad_norm": 0.08053561300039291, "learning_rate": 5.243769514618223e-05, "loss": 0.0004609204828739166, "step": 167630 }, { "epoch": 47.58444507521998, "grad_norm": 1.733656883239746, "learning_rate": 5.243485665625887e-05, "loss": 0.0024609174579381943, "step": 167640 }, { "epoch": 47.587283565143345, "grad_norm": 7.981705665588379, "learning_rate": 5.243201816633552e-05, "loss": 0.007874681055545807, "step": 167650 }, { "epoch": 47.59012205506671, "grad_norm": 0.08721255511045456, "learning_rate": 5.242917967641215e-05, "loss": 0.012249346077442168, "step": 167660 }, { "epoch": 47.59296054499006, "grad_norm": 0.009116792120039463, "learning_rate": 5.242634118648879e-05, "loss": 0.005207388103008271, "step": 167670 }, { "epoch": 47.595799034913426, "grad_norm": 0.041044075042009354, "learning_rate": 5.242350269656543e-05, "loss": 0.0016981519758701325, "step": 167680 }, { "epoch": 47.59863752483679, "grad_norm": 0.2508912980556488, "learning_rate": 5.242066420664207e-05, "loss": 0.0033692974597215653, "step": 167690 }, { "epoch": 47.601476014760145, "grad_norm": 0.022783806547522545, "learning_rate": 5.2417825716718704e-05, "loss": 0.0013389507308602332, "step": 167700 }, { "epoch": 47.60431450468351, "grad_norm": 0.06140339374542236, "learning_rate": 5.2414987226795345e-05, "loss": 0.0012647133320569993, "step": 167710 }, { "epoch": 47.60715299460687, "grad_norm": 1.6322802305221558, "learning_rate": 5.241214873687199e-05, "loss": 0.006556585431098938, "step": 167720 }, { "epoch": 47.60999148453023, "grad_norm": 0.03294241800904274, "learning_rate": 5.240931024694863e-05, "loss": 0.0016359062865376472, "step": 167730 }, { "epoch": 47.61282997445359, "grad_norm": 0.23127639293670654, "learning_rate": 5.240647175702527e-05, "loss": 0.0087317556142807, "step": 167740 }, { "epoch": 47.61566846437695, "grad_norm": 0.10062713176012039, "learning_rate": 5.2403633267101904e-05, "loss": 0.007807309925556183, "step": 167750 }, { "epoch": 47.618506954300315, "grad_norm": 0.023907439783215523, "learning_rate": 5.240079477717854e-05, "loss": 0.00154388677328825, "step": 167760 }, { "epoch": 47.62134544422367, "grad_norm": 12.464014053344727, "learning_rate": 5.239795628725518e-05, "loss": 0.01288902759552002, "step": 167770 }, { "epoch": 47.624183934147034, "grad_norm": 0.2574860453605652, "learning_rate": 5.239511779733183e-05, "loss": 0.0008720966055989265, "step": 167780 }, { "epoch": 47.6270224240704, "grad_norm": 0.07813920825719833, "learning_rate": 5.239227930740846e-05, "loss": 0.00023787617683410645, "step": 167790 }, { "epoch": 47.62986091399375, "grad_norm": 0.2255236953496933, "learning_rate": 5.2389440817485104e-05, "loss": 0.004667848348617554, "step": 167800 }, { "epoch": 47.632699403917115, "grad_norm": 1.4980509281158447, "learning_rate": 5.238660232756174e-05, "loss": 0.0062880739569664, "step": 167810 }, { "epoch": 47.63553789384048, "grad_norm": 0.16783279180526733, "learning_rate": 5.238376383763838e-05, "loss": 0.0021084399893879892, "step": 167820 }, { "epoch": 47.63837638376384, "grad_norm": 0.16252517700195312, "learning_rate": 5.2380925347715014e-05, "loss": 0.0002838851884007454, "step": 167830 }, { "epoch": 47.6412148736872, "grad_norm": 0.007776624523103237, "learning_rate": 5.2378086857791656e-05, "loss": 0.0010516196489334106, "step": 167840 }, { "epoch": 47.64405336361056, "grad_norm": 0.0638156309723854, "learning_rate": 5.2375248367868304e-05, "loss": 0.0009817993268370628, "step": 167850 }, { "epoch": 47.64689185353392, "grad_norm": 0.06489431113004684, "learning_rate": 5.237240987794494e-05, "loss": 0.00045020412653684616, "step": 167860 }, { "epoch": 47.64973034345728, "grad_norm": 0.007905522361397743, "learning_rate": 5.236957138802158e-05, "loss": 0.0004978187382221222, "step": 167870 }, { "epoch": 47.65256883338064, "grad_norm": 0.21678362786769867, "learning_rate": 5.2366732898098214e-05, "loss": 0.0018007885664701462, "step": 167880 }, { "epoch": 47.655407323304004, "grad_norm": 2.7188668251037598, "learning_rate": 5.236389440817485e-05, "loss": 0.007741740345954895, "step": 167890 }, { "epoch": 47.65824581322736, "grad_norm": 0.007130295038223267, "learning_rate": 5.236105591825149e-05, "loss": 0.0006566276773810386, "step": 167900 }, { "epoch": 47.66108430315072, "grad_norm": 0.043632980436086655, "learning_rate": 5.2358217428328125e-05, "loss": 0.00038492679595947266, "step": 167910 }, { "epoch": 47.663922793074086, "grad_norm": 0.2371196448802948, "learning_rate": 5.235537893840477e-05, "loss": 0.0032657306641340257, "step": 167920 }, { "epoch": 47.66676128299745, "grad_norm": 0.2617782652378082, "learning_rate": 5.2352540448481415e-05, "loss": 0.009955888986587525, "step": 167930 }, { "epoch": 47.669599772920805, "grad_norm": 0.027898740023374557, "learning_rate": 5.234970195855805e-05, "loss": 0.0026493417099118233, "step": 167940 }, { "epoch": 47.67243826284417, "grad_norm": 0.09032541513442993, "learning_rate": 5.234686346863469e-05, "loss": 0.003992386907339096, "step": 167950 }, { "epoch": 47.67527675276753, "grad_norm": 0.06379378587007523, "learning_rate": 5.2344024978711325e-05, "loss": 0.007890459150075912, "step": 167960 }, { "epoch": 47.678115242690886, "grad_norm": 0.524357795715332, "learning_rate": 5.2341186488787966e-05, "loss": 0.005979350954294205, "step": 167970 }, { "epoch": 47.68095373261425, "grad_norm": 0.3797849416732788, "learning_rate": 5.2338347998864615e-05, "loss": 0.007146790623664856, "step": 167980 }, { "epoch": 47.68379222253761, "grad_norm": 4.127708911895752, "learning_rate": 5.233550950894125e-05, "loss": 0.002046644128859043, "step": 167990 }, { "epoch": 47.68663071246097, "grad_norm": 0.020269272848963737, "learning_rate": 5.2332671019017884e-05, "loss": 0.026859018206596374, "step": 168000 }, { "epoch": 47.68663071246097, "eval_accuracy": 0.9796528263495898, "eval_loss": 0.07881426811218262, "eval_runtime": 33.92, "eval_samples_per_second": 463.65, "eval_steps_per_second": 7.252, "step": 168000 }, { "epoch": 47.68946920238433, "grad_norm": 0.0217917338013649, "learning_rate": 5.2329832529094525e-05, "loss": 0.004231991618871689, "step": 168010 }, { "epoch": 47.69230769230769, "grad_norm": 0.04595213010907173, "learning_rate": 5.232699403917116e-05, "loss": 0.0036754295229911803, "step": 168020 }, { "epoch": 47.695146182231056, "grad_norm": 0.032858218997716904, "learning_rate": 5.23241555492478e-05, "loss": 0.0007697438821196556, "step": 168030 }, { "epoch": 47.69798467215441, "grad_norm": 0.7573349475860596, "learning_rate": 5.2321317059324436e-05, "loss": 0.0029288675636053086, "step": 168040 }, { "epoch": 47.700823162077775, "grad_norm": 0.1149166077375412, "learning_rate": 5.2318478569401084e-05, "loss": 0.014555101096630097, "step": 168050 }, { "epoch": 47.70366165200114, "grad_norm": 0.019190892577171326, "learning_rate": 5.2315640079477725e-05, "loss": 0.0007212480530142784, "step": 168060 }, { "epoch": 47.706500141924494, "grad_norm": 0.05284080281853676, "learning_rate": 5.231280158955436e-05, "loss": 0.0038875386118888853, "step": 168070 }, { "epoch": 47.70933863184786, "grad_norm": 0.05337938293814659, "learning_rate": 5.2309963099631e-05, "loss": 0.0008590042591094971, "step": 168080 }, { "epoch": 47.71217712177122, "grad_norm": 4.740211486816406, "learning_rate": 5.2307124609707636e-05, "loss": 0.0018460169434547425, "step": 168090 }, { "epoch": 47.715015611694575, "grad_norm": 3.8986968994140625, "learning_rate": 5.230428611978427e-05, "loss": 0.0011336609721183776, "step": 168100 }, { "epoch": 47.71785410161794, "grad_norm": 0.01143480185419321, "learning_rate": 5.230144762986091e-05, "loss": 0.009959089010953904, "step": 168110 }, { "epoch": 47.7206925915413, "grad_norm": 0.31721293926239014, "learning_rate": 5.229860913993756e-05, "loss": 0.0013358352705836296, "step": 168120 }, { "epoch": 47.723531081464664, "grad_norm": 0.1107940673828125, "learning_rate": 5.2295770650014195e-05, "loss": 0.0006342472508549691, "step": 168130 }, { "epoch": 47.72636957138802, "grad_norm": 0.12838509678840637, "learning_rate": 5.2292932160090836e-05, "loss": 0.0009633345529437065, "step": 168140 }, { "epoch": 47.72920806131138, "grad_norm": 0.005932871717959642, "learning_rate": 5.229009367016747e-05, "loss": 0.002950303070247173, "step": 168150 }, { "epoch": 47.732046551234745, "grad_norm": 0.020103082060813904, "learning_rate": 5.228725518024411e-05, "loss": 0.0012451523914933204, "step": 168160 }, { "epoch": 47.7348850411581, "grad_norm": 0.029243962839245796, "learning_rate": 5.2284416690320747e-05, "loss": 0.00281010027974844, "step": 168170 }, { "epoch": 47.737723531081464, "grad_norm": 0.4643760621547699, "learning_rate": 5.2281578200397395e-05, "loss": 0.0008870098739862442, "step": 168180 }, { "epoch": 47.74056202100483, "grad_norm": 8.948081970214844, "learning_rate": 5.2278739710474036e-05, "loss": 0.004702483117580414, "step": 168190 }, { "epoch": 47.74340051092818, "grad_norm": 0.06231750175356865, "learning_rate": 5.227590122055067e-05, "loss": 0.002549354173243046, "step": 168200 }, { "epoch": 47.746239000851546, "grad_norm": 0.12038164585828781, "learning_rate": 5.227306273062731e-05, "loss": 0.0003492923453450203, "step": 168210 }, { "epoch": 47.74907749077491, "grad_norm": 0.026345014572143555, "learning_rate": 5.227022424070395e-05, "loss": 0.0023725295439362528, "step": 168220 }, { "epoch": 47.75191598069827, "grad_norm": 0.03304043412208557, "learning_rate": 5.226738575078058e-05, "loss": 0.0015295518562197686, "step": 168230 }, { "epoch": 47.75475447062163, "grad_norm": 0.3065239489078522, "learning_rate": 5.226454726085722e-05, "loss": 0.001089405082166195, "step": 168240 }, { "epoch": 47.75759296054499, "grad_norm": 0.013905271887779236, "learning_rate": 5.226170877093387e-05, "loss": 0.0022424431517720222, "step": 168250 }, { "epoch": 47.76043145046835, "grad_norm": 0.025260476395487785, "learning_rate": 5.2258870281010505e-05, "loss": 0.004656286537647247, "step": 168260 }, { "epoch": 47.76326994039171, "grad_norm": 0.08779329061508179, "learning_rate": 5.225603179108715e-05, "loss": 0.0016972724348306655, "step": 168270 }, { "epoch": 47.76610843031507, "grad_norm": 0.02595452405512333, "learning_rate": 5.225319330116378e-05, "loss": 0.0005788732320070267, "step": 168280 }, { "epoch": 47.768946920238434, "grad_norm": 0.2517952024936676, "learning_rate": 5.225035481124042e-05, "loss": 0.00092536099255085, "step": 168290 }, { "epoch": 47.7717854101618, "grad_norm": 0.14743928611278534, "learning_rate": 5.224751632131706e-05, "loss": 0.00039326269179582594, "step": 168300 }, { "epoch": 47.77462390008515, "grad_norm": 0.19466610252857208, "learning_rate": 5.22446778313937e-05, "loss": 0.0004775501787662506, "step": 168310 }, { "epoch": 47.777462390008516, "grad_norm": 0.1532432585954666, "learning_rate": 5.224183934147035e-05, "loss": 0.0018266396597027778, "step": 168320 }, { "epoch": 47.78030087993188, "grad_norm": 1.9613957405090332, "learning_rate": 5.223900085154698e-05, "loss": 0.001077560894191265, "step": 168330 }, { "epoch": 47.783139369855235, "grad_norm": 0.009171808138489723, "learning_rate": 5.223616236162362e-05, "loss": 0.004672363027930259, "step": 168340 }, { "epoch": 47.7859778597786, "grad_norm": 2.2702202796936035, "learning_rate": 5.223332387170026e-05, "loss": 0.0046198412775993345, "step": 168350 }, { "epoch": 47.78881634970196, "grad_norm": 0.148513525724411, "learning_rate": 5.223048538177689e-05, "loss": 0.00334397554397583, "step": 168360 }, { "epoch": 47.791654839625316, "grad_norm": 2.006169319152832, "learning_rate": 5.222764689185353e-05, "loss": 0.0025731494650244715, "step": 168370 }, { "epoch": 47.79449332954868, "grad_norm": 5.169857025146484, "learning_rate": 5.222480840193018e-05, "loss": 0.0011966442689299583, "step": 168380 }, { "epoch": 47.79733181947204, "grad_norm": 0.16845732927322388, "learning_rate": 5.2221969912006816e-05, "loss": 0.002064188756048679, "step": 168390 }, { "epoch": 47.800170309395405, "grad_norm": 0.054867930710315704, "learning_rate": 5.221913142208346e-05, "loss": 0.005886828154325485, "step": 168400 }, { "epoch": 47.80300879931876, "grad_norm": 0.02062874101102352, "learning_rate": 5.221629293216009e-05, "loss": 0.0024167502298951147, "step": 168410 }, { "epoch": 47.805847289242124, "grad_norm": 3.017658233642578, "learning_rate": 5.2213454442236733e-05, "loss": 0.007700935006141663, "step": 168420 }, { "epoch": 47.80868577916549, "grad_norm": 4.040587425231934, "learning_rate": 5.221061595231337e-05, "loss": 0.0032626397907733916, "step": 168430 }, { "epoch": 47.81152426908884, "grad_norm": 0.07607712596654892, "learning_rate": 5.220777746239001e-05, "loss": 0.00035864971578121186, "step": 168440 }, { "epoch": 47.814362759012205, "grad_norm": 0.7589403986930847, "learning_rate": 5.220493897246666e-05, "loss": 0.0010655777528882026, "step": 168450 }, { "epoch": 47.81720124893557, "grad_norm": 0.03529934585094452, "learning_rate": 5.220210048254329e-05, "loss": 0.0012301459908485412, "step": 168460 }, { "epoch": 47.820039738858924, "grad_norm": 0.020168056711554527, "learning_rate": 5.219926199261993e-05, "loss": 0.0025970671325922013, "step": 168470 }, { "epoch": 47.82287822878229, "grad_norm": 8.188736915588379, "learning_rate": 5.219642350269657e-05, "loss": 0.006221314892172813, "step": 168480 }, { "epoch": 47.82571671870565, "grad_norm": 0.09342166036367416, "learning_rate": 5.21935850127732e-05, "loss": 0.004651052877306938, "step": 168490 }, { "epoch": 47.82855520862901, "grad_norm": 0.027947628870606422, "learning_rate": 5.2190746522849844e-05, "loss": 0.0035556510090827944, "step": 168500 }, { "epoch": 47.82855520862901, "eval_accuracy": 0.9825141476441788, "eval_loss": 0.06950222700834274, "eval_runtime": 36.4802, "eval_samples_per_second": 431.11, "eval_steps_per_second": 6.743, "step": 168500 }, { "epoch": 47.83139369855237, "grad_norm": 1.5978673696517944, "learning_rate": 5.218790803292648e-05, "loss": 0.0016476696357131005, "step": 168510 }, { "epoch": 47.83423218847573, "grad_norm": 0.07125792652368546, "learning_rate": 5.218506954300313e-05, "loss": 0.02220817655324936, "step": 168520 }, { "epoch": 47.837070678399094, "grad_norm": 0.18096770346164703, "learning_rate": 5.218223105307977e-05, "loss": 0.0006683645769953728, "step": 168530 }, { "epoch": 47.83990916832245, "grad_norm": 0.027149943634867668, "learning_rate": 5.21793925631564e-05, "loss": 0.0009533250704407692, "step": 168540 }, { "epoch": 47.84274765824581, "grad_norm": 0.12820181250572205, "learning_rate": 5.2176554073233044e-05, "loss": 0.0011973021551966666, "step": 168550 }, { "epoch": 47.845586148169176, "grad_norm": 0.038472943007946014, "learning_rate": 5.217371558330968e-05, "loss": 0.0015174126252532004, "step": 168560 }, { "epoch": 47.84842463809253, "grad_norm": 0.0030534612014889717, "learning_rate": 5.2170877093386313e-05, "loss": 0.011164520680904389, "step": 168570 }, { "epoch": 47.851263128015894, "grad_norm": 0.019066056236624718, "learning_rate": 5.216803860346297e-05, "loss": 0.0021604467183351517, "step": 168580 }, { "epoch": 47.85410161793926, "grad_norm": 0.681826114654541, "learning_rate": 5.21652001135396e-05, "loss": 0.012727925181388855, "step": 168590 }, { "epoch": 47.85694010786262, "grad_norm": 0.22745615243911743, "learning_rate": 5.216236162361624e-05, "loss": 0.010138212144374848, "step": 168600 }, { "epoch": 47.859778597785976, "grad_norm": 1.150004506111145, "learning_rate": 5.215952313369288e-05, "loss": 0.0017575625330209731, "step": 168610 }, { "epoch": 47.86261708770934, "grad_norm": 0.03508690744638443, "learning_rate": 5.2156684643769514e-05, "loss": 0.0004630634561181068, "step": 168620 }, { "epoch": 47.8654555776327, "grad_norm": 1.1357454061508179, "learning_rate": 5.2153846153846155e-05, "loss": 0.0019653460010886194, "step": 168630 }, { "epoch": 47.86829406755606, "grad_norm": 12.086004257202148, "learning_rate": 5.215100766392279e-05, "loss": 0.015030665695667267, "step": 168640 }, { "epoch": 47.87113255747942, "grad_norm": 0.6035994291305542, "learning_rate": 5.214816917399944e-05, "loss": 0.000524241290986538, "step": 168650 }, { "epoch": 47.87397104740278, "grad_norm": 0.011745094321668148, "learning_rate": 5.214533068407608e-05, "loss": 0.0008487666025757789, "step": 168660 }, { "epoch": 47.876809537326146, "grad_norm": 0.06214432790875435, "learning_rate": 5.2142492194152714e-05, "loss": 0.002435859851539135, "step": 168670 }, { "epoch": 47.8796480272495, "grad_norm": 0.5837722420692444, "learning_rate": 5.2139653704229355e-05, "loss": 0.004243297874927521, "step": 168680 }, { "epoch": 47.882486517172865, "grad_norm": 0.07719015330076218, "learning_rate": 5.213681521430599e-05, "loss": 0.0006387539207935334, "step": 168690 }, { "epoch": 47.88532500709623, "grad_norm": 0.4027409553527832, "learning_rate": 5.2133976724382624e-05, "loss": 0.0007786242291331291, "step": 168700 }, { "epoch": 47.88816349701958, "grad_norm": 0.2129209339618683, "learning_rate": 5.2131138234459266e-05, "loss": 0.0002932652831077576, "step": 168710 }, { "epoch": 47.891001986942946, "grad_norm": 0.045939985662698746, "learning_rate": 5.2128299744535914e-05, "loss": 0.004412328824400902, "step": 168720 }, { "epoch": 47.89384047686631, "grad_norm": 0.07811310142278671, "learning_rate": 5.212546125461255e-05, "loss": 0.00010415241122245789, "step": 168730 }, { "epoch": 47.896678966789665, "grad_norm": 0.90378338098526, "learning_rate": 5.212262276468919e-05, "loss": 0.0002545008435845375, "step": 168740 }, { "epoch": 47.89951745671303, "grad_norm": 0.05408381298184395, "learning_rate": 5.2119784274765824e-05, "loss": 0.007136912643909454, "step": 168750 }, { "epoch": 47.90235594663639, "grad_norm": 0.0318538062274456, "learning_rate": 5.2116945784842466e-05, "loss": 0.001945844665169716, "step": 168760 }, { "epoch": 47.90519443655975, "grad_norm": 0.10543574392795563, "learning_rate": 5.21141072949191e-05, "loss": 0.004420486837625503, "step": 168770 }, { "epoch": 47.90803292648311, "grad_norm": 0.2277643233537674, "learning_rate": 5.211126880499575e-05, "loss": 0.0011746259406208992, "step": 168780 }, { "epoch": 47.91087141640647, "grad_norm": 0.21129348874092102, "learning_rate": 5.210843031507239e-05, "loss": 0.0005062012001872063, "step": 168790 }, { "epoch": 47.913709906329835, "grad_norm": 0.0051498101092875, "learning_rate": 5.2105591825149024e-05, "loss": 0.0007574876770377159, "step": 168800 }, { "epoch": 47.91654839625319, "grad_norm": 0.005280068144202232, "learning_rate": 5.2102753335225666e-05, "loss": 0.00021742656826972961, "step": 168810 }, { "epoch": 47.919386886176554, "grad_norm": 0.8461639881134033, "learning_rate": 5.20999148453023e-05, "loss": 0.0015847541391849518, "step": 168820 }, { "epoch": 47.92222537609992, "grad_norm": 0.022861801087856293, "learning_rate": 5.2097076355378935e-05, "loss": 0.00011067390441894531, "step": 168830 }, { "epoch": 47.92506386602327, "grad_norm": 0.04168519750237465, "learning_rate": 5.2094237865455576e-05, "loss": 0.0014826053753495216, "step": 168840 }, { "epoch": 47.927902355946635, "grad_norm": 2.0852065086364746, "learning_rate": 5.2091399375532224e-05, "loss": 0.0022508187219500543, "step": 168850 }, { "epoch": 47.93074084587, "grad_norm": 0.02408798225224018, "learning_rate": 5.208856088560886e-05, "loss": 0.006409173458814621, "step": 168860 }, { "epoch": 47.93357933579336, "grad_norm": 0.6798720359802246, "learning_rate": 5.20857223956855e-05, "loss": 0.0015010995790362357, "step": 168870 }, { "epoch": 47.93641782571672, "grad_norm": 0.05377171188592911, "learning_rate": 5.2082883905762135e-05, "loss": 0.01458638608455658, "step": 168880 }, { "epoch": 47.93925631564008, "grad_norm": 0.019298341125249863, "learning_rate": 5.2080045415838776e-05, "loss": 0.0008792333304882049, "step": 168890 }, { "epoch": 47.94209480556344, "grad_norm": 0.8943296670913696, "learning_rate": 5.207720692591541e-05, "loss": 0.009293907135725022, "step": 168900 }, { "epoch": 47.9449332954868, "grad_norm": 0.017878934741020203, "learning_rate": 5.207436843599205e-05, "loss": 0.000873708538711071, "step": 168910 }, { "epoch": 47.94777178541016, "grad_norm": 0.01232930738478899, "learning_rate": 5.20715299460687e-05, "loss": 0.0002272525802254677, "step": 168920 }, { "epoch": 47.950610275333524, "grad_norm": 0.12244562804698944, "learning_rate": 5.2068691456145335e-05, "loss": 0.00026232395321130754, "step": 168930 }, { "epoch": 47.95344876525688, "grad_norm": 0.13781915605068207, "learning_rate": 5.206585296622197e-05, "loss": 0.0020251618698239326, "step": 168940 }, { "epoch": 47.95628725518024, "grad_norm": 6.055298805236816, "learning_rate": 5.206301447629861e-05, "loss": 0.0067734867334365845, "step": 168950 }, { "epoch": 47.959125745103606, "grad_norm": 0.0674518495798111, "learning_rate": 5.2060175986375246e-05, "loss": 0.009214747697114944, "step": 168960 }, { "epoch": 47.96196423502697, "grad_norm": 0.0009118501911871135, "learning_rate": 5.205733749645189e-05, "loss": 0.003899650275707245, "step": 168970 }, { "epoch": 47.964802724950324, "grad_norm": 0.0312931165099144, "learning_rate": 5.2054499006528535e-05, "loss": 0.0011573674157261849, "step": 168980 }, { "epoch": 47.96764121487369, "grad_norm": 0.11054525524377823, "learning_rate": 5.205166051660517e-05, "loss": 0.0005775755271315575, "step": 168990 }, { "epoch": 47.97047970479705, "grad_norm": 0.03500887006521225, "learning_rate": 5.204882202668181e-05, "loss": 0.005032597109675407, "step": 169000 }, { "epoch": 47.97047970479705, "eval_accuracy": 0.9842945253385896, "eval_loss": 0.06303679198026657, "eval_runtime": 35.3992, "eval_samples_per_second": 444.276, "eval_steps_per_second": 6.949, "step": 169000 }, { "epoch": 47.973318194720406, "grad_norm": 0.024034956470131874, "learning_rate": 5.2045983536758446e-05, "loss": 0.0002641262486577034, "step": 169010 }, { "epoch": 47.97615668464377, "grad_norm": 0.23327341675758362, "learning_rate": 5.204314504683509e-05, "loss": 0.0002582216635346413, "step": 169020 }, { "epoch": 47.97899517456713, "grad_norm": 0.1120612621307373, "learning_rate": 5.204030655691172e-05, "loss": 0.002685690484941006, "step": 169030 }, { "epoch": 47.981833664490495, "grad_norm": 0.0036882334388792515, "learning_rate": 5.2037468066988356e-05, "loss": 0.00011408515274524689, "step": 169040 }, { "epoch": 47.98467215441385, "grad_norm": 1.4659192562103271, "learning_rate": 5.203462957706501e-05, "loss": 0.004119763895869255, "step": 169050 }, { "epoch": 47.98751064433721, "grad_norm": 0.06447150558233261, "learning_rate": 5.2031791087141646e-05, "loss": 0.00011881720274686813, "step": 169060 }, { "epoch": 47.990349134260576, "grad_norm": 0.03855590894818306, "learning_rate": 5.202895259721828e-05, "loss": 0.0051779188215732574, "step": 169070 }, { "epoch": 47.99318762418393, "grad_norm": 0.004211109597235918, "learning_rate": 5.202611410729492e-05, "loss": 0.0004900489002466202, "step": 169080 }, { "epoch": 47.996026114107295, "grad_norm": 4.714810371398926, "learning_rate": 5.202355946636389e-05, "loss": 0.006928452849388122, "step": 169090 }, { "epoch": 47.99886460403066, "grad_norm": 0.09048769623041153, "learning_rate": 5.202072097644054e-05, "loss": 0.00025935135781764986, "step": 169100 }, { "epoch": 48.00170309395401, "grad_norm": 0.003974501043558121, "learning_rate": 5.201788248651718e-05, "loss": 0.0008131522685289383, "step": 169110 }, { "epoch": 48.004541583877376, "grad_norm": 0.0037459672894328833, "learning_rate": 5.201504399659381e-05, "loss": 0.0005619173869490623, "step": 169120 }, { "epoch": 48.00738007380074, "grad_norm": 0.013904325664043427, "learning_rate": 5.2012205506670454e-05, "loss": 0.005674907565116882, "step": 169130 }, { "epoch": 48.0102185637241, "grad_norm": 0.8175116777420044, "learning_rate": 5.200936701674709e-05, "loss": 0.006965428590774536, "step": 169140 }, { "epoch": 48.01305705364746, "grad_norm": 0.09392550587654114, "learning_rate": 5.200652852682373e-05, "loss": 0.0049454070627689365, "step": 169150 }, { "epoch": 48.01589554357082, "grad_norm": 0.2125004678964615, "learning_rate": 5.200369003690038e-05, "loss": 0.005884797126054764, "step": 169160 }, { "epoch": 48.018734033494184, "grad_norm": 0.3208320736885071, "learning_rate": 5.200085154697701e-05, "loss": 0.003868444263935089, "step": 169170 }, { "epoch": 48.02157252341754, "grad_norm": 0.03139375522732735, "learning_rate": 5.1998013057053654e-05, "loss": 0.0007406063377857208, "step": 169180 }, { "epoch": 48.0244110133409, "grad_norm": 1.5263378620147705, "learning_rate": 5.199517456713029e-05, "loss": 0.04025929868221283, "step": 169190 }, { "epoch": 48.027249503264265, "grad_norm": 0.19508737325668335, "learning_rate": 5.199233607720693e-05, "loss": 0.001240980066359043, "step": 169200 }, { "epoch": 48.03008799318762, "grad_norm": 0.2004241943359375, "learning_rate": 5.1989497587283564e-05, "loss": 0.0058360625058412555, "step": 169210 }, { "epoch": 48.032926483110984, "grad_norm": 0.03123517334461212, "learning_rate": 5.19866590973602e-05, "loss": 0.016002625226974487, "step": 169220 }, { "epoch": 48.03576497303435, "grad_norm": 0.03838253766298294, "learning_rate": 5.198382060743685e-05, "loss": 0.0005968103185296058, "step": 169230 }, { "epoch": 48.03860346295771, "grad_norm": 0.15703527629375458, "learning_rate": 5.198098211751349e-05, "loss": 0.01131969913840294, "step": 169240 }, { "epoch": 48.041441952881065, "grad_norm": 0.04316042736172676, "learning_rate": 5.197814362759012e-05, "loss": 0.0003680497407913208, "step": 169250 }, { "epoch": 48.04428044280443, "grad_norm": 0.10699286311864853, "learning_rate": 5.1975305137666765e-05, "loss": 0.005546287447214126, "step": 169260 }, { "epoch": 48.04711893272779, "grad_norm": 0.006181813310831785, "learning_rate": 5.19724666477434e-05, "loss": 0.00021482743322849275, "step": 169270 }, { "epoch": 48.04995742265115, "grad_norm": 0.02008775621652603, "learning_rate": 5.196962815782004e-05, "loss": 0.0003266243264079094, "step": 169280 }, { "epoch": 48.05279591257451, "grad_norm": 0.04248051717877388, "learning_rate": 5.1966789667896675e-05, "loss": 0.0011708611622452737, "step": 169290 }, { "epoch": 48.05563440249787, "grad_norm": 0.3391958773136139, "learning_rate": 5.196395117797332e-05, "loss": 0.0034120500087738035, "step": 169300 }, { "epoch": 48.05847289242123, "grad_norm": 11.916740417480469, "learning_rate": 5.1961112688049965e-05, "loss": 0.019707635045051575, "step": 169310 }, { "epoch": 48.06131138234459, "grad_norm": 0.025469433516263962, "learning_rate": 5.19582741981266e-05, "loss": 0.014173769950866699, "step": 169320 }, { "epoch": 48.064149872267954, "grad_norm": 0.42081472277641296, "learning_rate": 5.195543570820324e-05, "loss": 0.0004422694444656372, "step": 169330 }, { "epoch": 48.06698836219132, "grad_norm": 2.9052436351776123, "learning_rate": 5.1952597218279875e-05, "loss": 0.0009109623730182648, "step": 169340 }, { "epoch": 48.06982685211467, "grad_norm": 0.33233651518821716, "learning_rate": 5.194975872835651e-05, "loss": 0.0029893375933170317, "step": 169350 }, { "epoch": 48.072665342038036, "grad_norm": 0.34167957305908203, "learning_rate": 5.194692023843316e-05, "loss": 0.006196878477931022, "step": 169360 }, { "epoch": 48.0755038319614, "grad_norm": 0.00471788365393877, "learning_rate": 5.19440817485098e-05, "loss": 0.0019762810319662095, "step": 169370 }, { "epoch": 48.078342321884755, "grad_norm": 0.07276025414466858, "learning_rate": 5.1941243258586434e-05, "loss": 0.0003411976620554924, "step": 169380 }, { "epoch": 48.08118081180812, "grad_norm": 0.007149125449359417, "learning_rate": 5.1938404768663075e-05, "loss": 0.0001294836401939392, "step": 169390 }, { "epoch": 48.08401930173148, "grad_norm": 1.1718329191207886, "learning_rate": 5.193556627873971e-05, "loss": 0.0005786841735243798, "step": 169400 }, { "epoch": 48.086857791654836, "grad_norm": 0.22367091476917267, "learning_rate": 5.193272778881635e-05, "loss": 0.0007422501221299171, "step": 169410 }, { "epoch": 48.0896962815782, "grad_norm": 0.3084978461265564, "learning_rate": 5.1929889298892986e-05, "loss": 0.0017950965091586113, "step": 169420 }, { "epoch": 48.09253477150156, "grad_norm": 0.050169847905635834, "learning_rate": 5.1927050808969634e-05, "loss": 0.0018456116318702697, "step": 169430 }, { "epoch": 48.095373261424925, "grad_norm": 1.5041332244873047, "learning_rate": 5.1924212319046275e-05, "loss": 0.0007944473996758461, "step": 169440 }, { "epoch": 48.09821175134828, "grad_norm": 0.031940024346113205, "learning_rate": 5.192137382912291e-05, "loss": 0.007065792381763458, "step": 169450 }, { "epoch": 48.10105024127164, "grad_norm": 0.06882797181606293, "learning_rate": 5.1918535339199545e-05, "loss": 0.0016044413670897484, "step": 169460 }, { "epoch": 48.103888731195006, "grad_norm": 0.011602262035012245, "learning_rate": 5.1915696849276186e-05, "loss": 0.011416642367839814, "step": 169470 }, { "epoch": 48.10672722111836, "grad_norm": 0.009691138751804829, "learning_rate": 5.191285835935282e-05, "loss": 0.0006365258246660233, "step": 169480 }, { "epoch": 48.109565711041725, "grad_norm": 0.021319067105650902, "learning_rate": 5.191001986942947e-05, "loss": 0.0007166668772697448, "step": 169490 }, { "epoch": 48.11240420096509, "grad_norm": 0.015397973358631134, "learning_rate": 5.190718137950611e-05, "loss": 0.00039647333323955536, "step": 169500 }, { "epoch": 48.11240420096509, "eval_accuracy": 0.9814967889616583, "eval_loss": 0.07978805899620056, "eval_runtime": 33.613, "eval_samples_per_second": 467.884, "eval_steps_per_second": 7.319, "step": 169500 }, { "epoch": 48.11524269088845, "grad_norm": 5.843402862548828, "learning_rate": 5.1904342889582745e-05, "loss": 0.002059211768209934, "step": 169510 }, { "epoch": 48.11808118081181, "grad_norm": 0.07909126579761505, "learning_rate": 5.1901504399659386e-05, "loss": 0.003141956776380539, "step": 169520 }, { "epoch": 48.12091967073517, "grad_norm": 0.27140548825263977, "learning_rate": 5.189866590973602e-05, "loss": 0.0006495948880910873, "step": 169530 }, { "epoch": 48.12375816065853, "grad_norm": 0.00487532839179039, "learning_rate": 5.189582741981266e-05, "loss": 0.0021698193624615668, "step": 169540 }, { "epoch": 48.12659665058189, "grad_norm": 0.03925818204879761, "learning_rate": 5.18929889298893e-05, "loss": 0.0009170720353722572, "step": 169550 }, { "epoch": 48.12943514050525, "grad_norm": 0.005167062394320965, "learning_rate": 5.1890150439965945e-05, "loss": 0.000279347226023674, "step": 169560 }, { "epoch": 48.132273630428614, "grad_norm": 0.05481762811541557, "learning_rate": 5.1887311950042586e-05, "loss": 0.0011258304119110107, "step": 169570 }, { "epoch": 48.13511212035197, "grad_norm": 0.30973437428474426, "learning_rate": 5.188447346011922e-05, "loss": 0.00018184706568717955, "step": 169580 }, { "epoch": 48.13795061027533, "grad_norm": 0.15813449025154114, "learning_rate": 5.1881634970195855e-05, "loss": 0.0007038865238428116, "step": 169590 }, { "epoch": 48.140789100198695, "grad_norm": 0.01218936312943697, "learning_rate": 5.18787964802725e-05, "loss": 0.0009840911254286766, "step": 169600 }, { "epoch": 48.14362759012206, "grad_norm": 0.0016784765757620335, "learning_rate": 5.187595799034913e-05, "loss": 0.005103451013565063, "step": 169610 }, { "epoch": 48.146466080045414, "grad_norm": 0.07069042325019836, "learning_rate": 5.187311950042577e-05, "loss": 0.00023606475442647935, "step": 169620 }, { "epoch": 48.14930456996878, "grad_norm": 0.08807370066642761, "learning_rate": 5.187028101050242e-05, "loss": 0.0005458328872919082, "step": 169630 }, { "epoch": 48.15214305989214, "grad_norm": 2.8024535179138184, "learning_rate": 5.1867442520579055e-05, "loss": 0.0008078690618276597, "step": 169640 }, { "epoch": 48.154981549815496, "grad_norm": 0.15312205255031586, "learning_rate": 5.18646040306557e-05, "loss": 0.0005660489201545716, "step": 169650 }, { "epoch": 48.15782003973886, "grad_norm": 0.018968990072607994, "learning_rate": 5.186176554073233e-05, "loss": 0.00020388104021549225, "step": 169660 }, { "epoch": 48.16065852966222, "grad_norm": 0.01781640388071537, "learning_rate": 5.185892705080897e-05, "loss": 0.00010977163910865783, "step": 169670 }, { "epoch": 48.16349701958558, "grad_norm": 0.05107291787862778, "learning_rate": 5.185608856088561e-05, "loss": 0.0003041112795472145, "step": 169680 }, { "epoch": 48.16633550950894, "grad_norm": 0.033181674778461456, "learning_rate": 5.1853250070962256e-05, "loss": 0.0007379349321126938, "step": 169690 }, { "epoch": 48.1691739994323, "grad_norm": 0.019977036863565445, "learning_rate": 5.185041158103889e-05, "loss": 0.0004889043048024178, "step": 169700 }, { "epoch": 48.172012489355666, "grad_norm": 0.01530026737600565, "learning_rate": 5.184757309111553e-05, "loss": 0.00024585500359535216, "step": 169710 }, { "epoch": 48.17485097927902, "grad_norm": 0.08299068361520767, "learning_rate": 5.1844734601192166e-05, "loss": 0.0006488190963864326, "step": 169720 }, { "epoch": 48.177689469202384, "grad_norm": 0.4412902593612671, "learning_rate": 5.184189611126881e-05, "loss": 0.01173195093870163, "step": 169730 }, { "epoch": 48.18052795912575, "grad_norm": 1.6852134466171265, "learning_rate": 5.183905762134544e-05, "loss": 0.0005903201177716255, "step": 169740 }, { "epoch": 48.1833664490491, "grad_norm": 0.03864777460694313, "learning_rate": 5.1836219131422084e-05, "loss": 0.0005603807047009468, "step": 169750 }, { "epoch": 48.186204938972466, "grad_norm": 0.030786694958806038, "learning_rate": 5.183338064149873e-05, "loss": 0.0005984002724289894, "step": 169760 }, { "epoch": 48.18904342889583, "grad_norm": 0.09240388870239258, "learning_rate": 5.1830542151575366e-05, "loss": 0.000489768385887146, "step": 169770 }, { "epoch": 48.191881918819185, "grad_norm": 0.34261295199394226, "learning_rate": 5.182770366165201e-05, "loss": 0.0028328860178589823, "step": 169780 }, { "epoch": 48.19472040874255, "grad_norm": 0.00946933962404728, "learning_rate": 5.182486517172864e-05, "loss": 0.0001675419509410858, "step": 169790 }, { "epoch": 48.19755889866591, "grad_norm": 0.2412862926721573, "learning_rate": 5.182202668180528e-05, "loss": 0.0003730384632945061, "step": 169800 }, { "epoch": 48.20039738858927, "grad_norm": 0.012593233957886696, "learning_rate": 5.181918819188192e-05, "loss": 9.999442845582962e-05, "step": 169810 }, { "epoch": 48.20323587851263, "grad_norm": 0.031629763543605804, "learning_rate": 5.181634970195855e-05, "loss": 0.0008299373090267182, "step": 169820 }, { "epoch": 48.20607436843599, "grad_norm": 0.008687361143529415, "learning_rate": 5.18135112120352e-05, "loss": 0.0011571524664759636, "step": 169830 }, { "epoch": 48.208912858359355, "grad_norm": 0.3079584538936615, "learning_rate": 5.181067272211184e-05, "loss": 0.005076476186513901, "step": 169840 }, { "epoch": 48.21175134828271, "grad_norm": 0.009382108226418495, "learning_rate": 5.180783423218848e-05, "loss": 0.0003099724650382996, "step": 169850 }, { "epoch": 48.214589838206074, "grad_norm": 0.03351113945245743, "learning_rate": 5.180499574226512e-05, "loss": 0.0005394168198108673, "step": 169860 }, { "epoch": 48.21742832812944, "grad_norm": 0.08608076721429825, "learning_rate": 5.180215725234175e-05, "loss": 0.003739796206355095, "step": 169870 }, { "epoch": 48.2202668180528, "grad_norm": 0.18519806861877441, "learning_rate": 5.1799318762418394e-05, "loss": 0.000256195105612278, "step": 169880 }, { "epoch": 48.223105307976155, "grad_norm": 0.060730356723070145, "learning_rate": 5.179648027249504e-05, "loss": 0.009554793685674667, "step": 169890 }, { "epoch": 48.22594379789952, "grad_norm": 0.009651537984609604, "learning_rate": 5.179364178257168e-05, "loss": 0.0006754888221621513, "step": 169900 }, { "epoch": 48.22878228782288, "grad_norm": 0.09042307734489441, "learning_rate": 5.179080329264832e-05, "loss": 8.59169289469719e-05, "step": 169910 }, { "epoch": 48.23162077774624, "grad_norm": 0.06241042912006378, "learning_rate": 5.178796480272495e-05, "loss": 0.00036689937114715577, "step": 169920 }, { "epoch": 48.2344592676696, "grad_norm": 0.07417991012334824, "learning_rate": 5.178512631280159e-05, "loss": 0.00026162080466747285, "step": 169930 }, { "epoch": 48.23729775759296, "grad_norm": 0.03435307368636131, "learning_rate": 5.178228782287823e-05, "loss": 0.00013902932405471802, "step": 169940 }, { "epoch": 48.24013624751632, "grad_norm": 0.028166960924863815, "learning_rate": 5.1779449332954864e-05, "loss": 9.968951344490052e-05, "step": 169950 }, { "epoch": 48.24297473743968, "grad_norm": 0.058733705431222916, "learning_rate": 5.177661084303151e-05, "loss": 6.303470581769943e-05, "step": 169960 }, { "epoch": 48.245813227363044, "grad_norm": 0.23977917432785034, "learning_rate": 5.177377235310815e-05, "loss": 0.00013793185353279115, "step": 169970 }, { "epoch": 48.24865171728641, "grad_norm": 0.007222427520900965, "learning_rate": 5.177093386318479e-05, "loss": 0.00014491751790046692, "step": 169980 }, { "epoch": 48.25149020720976, "grad_norm": 0.12634755671024323, "learning_rate": 5.176809537326143e-05, "loss": 9.001623839139938e-05, "step": 169990 }, { "epoch": 48.254328697133126, "grad_norm": 0.016489798203110695, "learning_rate": 5.1765256883338064e-05, "loss": 0.00015788916498422624, "step": 170000 }, { "epoch": 48.254328697133126, "eval_accuracy": 0.9865835823742608, "eval_loss": 0.05956593528389931, "eval_runtime": 34.1814, "eval_samples_per_second": 460.104, "eval_steps_per_second": 7.197, "step": 170000 }, { "epoch": 48.25716718705649, "grad_norm": 0.044976793229579926, "learning_rate": 5.1762418393414705e-05, "loss": 0.0002828843891620636, "step": 170010 }, { "epoch": 48.260005676979844, "grad_norm": 0.06896226108074188, "learning_rate": 5.175957990349134e-05, "loss": 0.001285165548324585, "step": 170020 }, { "epoch": 48.26284416690321, "grad_norm": 0.02814648114144802, "learning_rate": 5.175674141356799e-05, "loss": 0.0006371829658746719, "step": 170030 }, { "epoch": 48.26568265682657, "grad_norm": 0.005358932539820671, "learning_rate": 5.175390292364463e-05, "loss": 0.00020611956715583802, "step": 170040 }, { "epoch": 48.268521146749926, "grad_norm": 0.00851475354284048, "learning_rate": 5.1751064433721264e-05, "loss": 0.0006021659821271897, "step": 170050 }, { "epoch": 48.27135963667329, "grad_norm": 0.1878855973482132, "learning_rate": 5.17482259437979e-05, "loss": 0.000969201885163784, "step": 170060 }, { "epoch": 48.27419812659665, "grad_norm": 0.014645183458924294, "learning_rate": 5.174538745387454e-05, "loss": 0.0002661498263478279, "step": 170070 }, { "epoch": 48.277036616520014, "grad_norm": 0.2204105705022812, "learning_rate": 5.1742548963951174e-05, "loss": 0.001254452019929886, "step": 170080 }, { "epoch": 48.27987510644337, "grad_norm": 0.23105978965759277, "learning_rate": 5.173971047402782e-05, "loss": 0.009521880000829697, "step": 170090 }, { "epoch": 48.28271359636673, "grad_norm": 0.019851697608828545, "learning_rate": 5.1736871984104464e-05, "loss": 0.0005358891561627388, "step": 170100 }, { "epoch": 48.285552086290096, "grad_norm": 0.0026549939066171646, "learning_rate": 5.17340334941811e-05, "loss": 0.00022897273302078248, "step": 170110 }, { "epoch": 48.28839057621345, "grad_norm": 0.002686286112293601, "learning_rate": 5.173119500425774e-05, "loss": 0.00326082706451416, "step": 170120 }, { "epoch": 48.291229066136815, "grad_norm": 0.035726480185985565, "learning_rate": 5.1728356514334374e-05, "loss": 0.0031574442982673645, "step": 170130 }, { "epoch": 48.29406755606018, "grad_norm": 0.07477939128875732, "learning_rate": 5.1725518024411016e-05, "loss": 0.00012739673256874086, "step": 170140 }, { "epoch": 48.29690604598353, "grad_norm": 0.015393474139273167, "learning_rate": 5.172267953448765e-05, "loss": 0.00010398086160421372, "step": 170150 }, { "epoch": 48.299744535906896, "grad_norm": 0.039770785719156265, "learning_rate": 5.17198410445643e-05, "loss": 0.001329629309475422, "step": 170160 }, { "epoch": 48.30258302583026, "grad_norm": 0.03712442144751549, "learning_rate": 5.171700255464093e-05, "loss": 0.00011495184153318406, "step": 170170 }, { "epoch": 48.30542151575362, "grad_norm": 0.35187241435050964, "learning_rate": 5.1714164064717575e-05, "loss": 0.0003941792994737625, "step": 170180 }, { "epoch": 48.30826000567698, "grad_norm": 0.03290212154388428, "learning_rate": 5.171132557479421e-05, "loss": 0.0002569820731878281, "step": 170190 }, { "epoch": 48.31109849560034, "grad_norm": 0.0018244253005832434, "learning_rate": 5.170848708487085e-05, "loss": 0.00024128518998622894, "step": 170200 }, { "epoch": 48.3139369855237, "grad_norm": 0.0560508593916893, "learning_rate": 5.1705648594947485e-05, "loss": 0.0008398795500397683, "step": 170210 }, { "epoch": 48.31677547544706, "grad_norm": 0.153071328997612, "learning_rate": 5.1702810105024126e-05, "loss": 0.0002511471509933472, "step": 170220 }, { "epoch": 48.31961396537042, "grad_norm": 0.01747928187251091, "learning_rate": 5.1699971615100775e-05, "loss": 0.0013664575293660165, "step": 170230 }, { "epoch": 48.322452455293785, "grad_norm": 0.16018392145633698, "learning_rate": 5.169713312517741e-05, "loss": 0.0004027949646115303, "step": 170240 }, { "epoch": 48.32529094521715, "grad_norm": 0.020656175911426544, "learning_rate": 5.169429463525405e-05, "loss": 0.0015816468745470046, "step": 170250 }, { "epoch": 48.328129435140504, "grad_norm": 0.02113514393568039, "learning_rate": 5.1691456145330685e-05, "loss": 0.0006672313436865806, "step": 170260 }, { "epoch": 48.33096792506387, "grad_norm": 0.015868857502937317, "learning_rate": 5.168861765540732e-05, "loss": 0.0010426819324493408, "step": 170270 }, { "epoch": 48.33380641498723, "grad_norm": 6.815341472625732, "learning_rate": 5.168577916548396e-05, "loss": 0.009638189524412154, "step": 170280 }, { "epoch": 48.336644904910585, "grad_norm": 0.020481539890170097, "learning_rate": 5.168294067556061e-05, "loss": 0.00015776585787534713, "step": 170290 }, { "epoch": 48.33948339483395, "grad_norm": 0.009122311137616634, "learning_rate": 5.1680102185637244e-05, "loss": 0.0016775472089648248, "step": 170300 }, { "epoch": 48.34232188475731, "grad_norm": 0.0368160717189312, "learning_rate": 5.1677263695713885e-05, "loss": 0.00010248590260744094, "step": 170310 }, { "epoch": 48.34516037468067, "grad_norm": 0.08173693716526031, "learning_rate": 5.167442520579052e-05, "loss": 0.00020996816456317902, "step": 170320 }, { "epoch": 48.34799886460403, "grad_norm": 0.16462399065494537, "learning_rate": 5.167158671586716e-05, "loss": 0.0005900751799345017, "step": 170330 }, { "epoch": 48.35083735452739, "grad_norm": 0.3772743046283722, "learning_rate": 5.1668748225943796e-05, "loss": 0.0020767807960510253, "step": 170340 }, { "epoch": 48.353675844450756, "grad_norm": 0.08166877180337906, "learning_rate": 5.166590973602044e-05, "loss": 9.452793747186661e-05, "step": 170350 }, { "epoch": 48.35651433437411, "grad_norm": 0.0025877971202135086, "learning_rate": 5.1663071246097085e-05, "loss": 0.002845164015889168, "step": 170360 }, { "epoch": 48.359352824297474, "grad_norm": 0.016378046944737434, "learning_rate": 5.166023275617372e-05, "loss": 0.0001991819590330124, "step": 170370 }, { "epoch": 48.36219131422084, "grad_norm": 0.052050549536943436, "learning_rate": 5.165739426625036e-05, "loss": 0.003881955146789551, "step": 170380 }, { "epoch": 48.36502980414419, "grad_norm": 0.05074809864163399, "learning_rate": 5.1654555776326996e-05, "loss": 0.0006522759795188904, "step": 170390 }, { "epoch": 48.367868294067556, "grad_norm": 0.003966166637837887, "learning_rate": 5.165171728640363e-05, "loss": 0.00025546569377183915, "step": 170400 }, { "epoch": 48.37070678399092, "grad_norm": 0.013416006229817867, "learning_rate": 5.164887879648027e-05, "loss": 0.0013338109478354455, "step": 170410 }, { "epoch": 48.373545273914274, "grad_norm": 0.06919465959072113, "learning_rate": 5.1646040306556907e-05, "loss": 0.002602764591574669, "step": 170420 }, { "epoch": 48.37638376383764, "grad_norm": 0.02405506931245327, "learning_rate": 5.1643201816633555e-05, "loss": 0.00023520980030298234, "step": 170430 }, { "epoch": 48.379222253761, "grad_norm": 0.02850024215877056, "learning_rate": 5.1640363326710196e-05, "loss": 0.00013509336858987808, "step": 170440 }, { "epoch": 48.38206074368436, "grad_norm": 0.06221349909901619, "learning_rate": 5.163752483678683e-05, "loss": 0.0007343536242842674, "step": 170450 }, { "epoch": 48.38489923360772, "grad_norm": 0.09539590775966644, "learning_rate": 5.163468634686347e-05, "loss": 0.0003920018672943115, "step": 170460 }, { "epoch": 48.38773772353108, "grad_norm": 0.09858322143554688, "learning_rate": 5.163184785694011e-05, "loss": 0.00016888752579689026, "step": 170470 }, { "epoch": 48.390576213454445, "grad_norm": 0.014040568843483925, "learning_rate": 5.162900936701675e-05, "loss": 0.004613256454467774, "step": 170480 }, { "epoch": 48.3934147033778, "grad_norm": 0.07158415019512177, "learning_rate": 5.1626170877093396e-05, "loss": 0.0003268897533416748, "step": 170490 }, { "epoch": 48.39625319330116, "grad_norm": 12.994355201721191, "learning_rate": 5.162333238717003e-05, "loss": 0.012944254279136657, "step": 170500 }, { "epoch": 48.39625319330116, "eval_accuracy": 0.9795256565142748, "eval_loss": 0.0906384214758873, "eval_runtime": 39.4975, "eval_samples_per_second": 398.177, "eval_steps_per_second": 6.228, "step": 170500 }, { "epoch": 48.399091683224526, "grad_norm": 0.08026884496212006, "learning_rate": 5.162049389724667e-05, "loss": 0.026843199133872987, "step": 170510 }, { "epoch": 48.40193017314788, "grad_norm": 0.16727294027805328, "learning_rate": 5.161765540732331e-05, "loss": 0.000755077600479126, "step": 170520 }, { "epoch": 48.404768663071245, "grad_norm": 0.006892481818795204, "learning_rate": 5.161481691739994e-05, "loss": 0.0012753628194332122, "step": 170530 }, { "epoch": 48.40760715299461, "grad_norm": 0.09506100416183472, "learning_rate": 5.161197842747658e-05, "loss": 0.006491412222385406, "step": 170540 }, { "epoch": 48.41044564291797, "grad_norm": 0.18350262939929962, "learning_rate": 5.160913993755322e-05, "loss": 0.002954533509910107, "step": 170550 }, { "epoch": 48.413284132841326, "grad_norm": 0.11232449114322662, "learning_rate": 5.1606301447629865e-05, "loss": 0.008386693894863129, "step": 170560 }, { "epoch": 48.41612262276469, "grad_norm": 0.052045442163944244, "learning_rate": 5.160346295770651e-05, "loss": 0.012074953317642212, "step": 170570 }, { "epoch": 48.41896111268805, "grad_norm": 3.253239154815674, "learning_rate": 5.160062446778314e-05, "loss": 0.0039244394749403, "step": 170580 }, { "epoch": 48.42179960261141, "grad_norm": 0.02079762890934944, "learning_rate": 5.159778597785978e-05, "loss": 0.0030462214723229407, "step": 170590 }, { "epoch": 48.42463809253477, "grad_norm": 0.17121194303035736, "learning_rate": 5.159494748793642e-05, "loss": 0.0006528047844767571, "step": 170600 }, { "epoch": 48.427476582458134, "grad_norm": 0.0032920718658715487, "learning_rate": 5.159210899801306e-05, "loss": 0.0004955703392624855, "step": 170610 }, { "epoch": 48.43031507238149, "grad_norm": 0.0033245037775486708, "learning_rate": 5.158927050808971e-05, "loss": 0.0002894138917326927, "step": 170620 }, { "epoch": 48.43315356230485, "grad_norm": 0.011462626978754997, "learning_rate": 5.158643201816634e-05, "loss": 0.0005859555676579475, "step": 170630 }, { "epoch": 48.435992052228215, "grad_norm": 0.6650513410568237, "learning_rate": 5.1583593528242976e-05, "loss": 0.0006885008886456489, "step": 170640 }, { "epoch": 48.43883054215158, "grad_norm": 0.009596663527190685, "learning_rate": 5.158075503831962e-05, "loss": 0.004397527128458023, "step": 170650 }, { "epoch": 48.441669032074934, "grad_norm": 0.03147398307919502, "learning_rate": 5.157791654839625e-05, "loss": 0.0060897693037986755, "step": 170660 }, { "epoch": 48.4445075219983, "grad_norm": 0.0466473326086998, "learning_rate": 5.1575078058472893e-05, "loss": 0.0013504676520824433, "step": 170670 }, { "epoch": 48.44734601192166, "grad_norm": 0.9511595368385315, "learning_rate": 5.157223956854953e-05, "loss": 0.001948450691998005, "step": 170680 }, { "epoch": 48.450184501845015, "grad_norm": 0.0012802323326468468, "learning_rate": 5.1569401078626176e-05, "loss": 0.002673465944826603, "step": 170690 }, { "epoch": 48.45302299176838, "grad_norm": 0.0578363761305809, "learning_rate": 5.156656258870282e-05, "loss": 0.0016680726781487464, "step": 170700 }, { "epoch": 48.45586148169174, "grad_norm": 0.1913767158985138, "learning_rate": 5.156372409877945e-05, "loss": 0.009164725989103317, "step": 170710 }, { "epoch": 48.458699971615104, "grad_norm": 0.11749342083930969, "learning_rate": 5.1560885608856094e-05, "loss": 0.0013852590695023538, "step": 170720 }, { "epoch": 48.46153846153846, "grad_norm": 0.014130194671452045, "learning_rate": 5.155804711893273e-05, "loss": 0.001146489754319191, "step": 170730 }, { "epoch": 48.46437695146182, "grad_norm": 0.04129895940423012, "learning_rate": 5.155520862900936e-05, "loss": 0.0002925094217061996, "step": 170740 }, { "epoch": 48.467215441385186, "grad_norm": 0.11672885715961456, "learning_rate": 5.1552370139086004e-05, "loss": 0.004190940409898758, "step": 170750 }, { "epoch": 48.47005393130854, "grad_norm": 0.03670286387205124, "learning_rate": 5.154953164916265e-05, "loss": 0.0014485090970993042, "step": 170760 }, { "epoch": 48.472892421231904, "grad_norm": 0.0445064939558506, "learning_rate": 5.154669315923929e-05, "loss": 0.004745295271277428, "step": 170770 }, { "epoch": 48.47573091115527, "grad_norm": 0.07154038548469543, "learning_rate": 5.154385466931593e-05, "loss": 0.001292652264237404, "step": 170780 }, { "epoch": 48.47856940107862, "grad_norm": 0.034357521682977676, "learning_rate": 5.154101617939256e-05, "loss": 0.0010862838476896285, "step": 170790 }, { "epoch": 48.481407891001986, "grad_norm": 0.169928178191185, "learning_rate": 5.1538177689469204e-05, "loss": 0.004103013128042221, "step": 170800 }, { "epoch": 48.48424638092535, "grad_norm": 0.01374371349811554, "learning_rate": 5.153533919954584e-05, "loss": 0.004275486245751381, "step": 170810 }, { "epoch": 48.48708487084871, "grad_norm": 0.35057759284973145, "learning_rate": 5.153250070962249e-05, "loss": 0.002965838462114334, "step": 170820 }, { "epoch": 48.48992336077207, "grad_norm": 0.035232968628406525, "learning_rate": 5.152966221969913e-05, "loss": 0.0036769315600395204, "step": 170830 }, { "epoch": 48.49276185069543, "grad_norm": 0.08318639546632767, "learning_rate": 5.152682372977576e-05, "loss": 0.012103427946567536, "step": 170840 }, { "epoch": 48.49560034061879, "grad_norm": 0.03698692470788956, "learning_rate": 5.1523985239852404e-05, "loss": 0.003944763168692589, "step": 170850 }, { "epoch": 48.49843883054215, "grad_norm": 0.12857766449451447, "learning_rate": 5.152114674992904e-05, "loss": 0.0009454693645238876, "step": 170860 }, { "epoch": 48.50127732046551, "grad_norm": 18.004819869995117, "learning_rate": 5.1518308260005674e-05, "loss": 0.015207961201667786, "step": 170870 }, { "epoch": 48.504115810388875, "grad_norm": 0.018049070611596107, "learning_rate": 5.1515469770082315e-05, "loss": 0.0028762023895978926, "step": 170880 }, { "epoch": 48.50695430031223, "grad_norm": 0.6828169822692871, "learning_rate": 5.151263128015896e-05, "loss": 0.0042075544595718386, "step": 170890 }, { "epoch": 48.50979279023559, "grad_norm": 0.3162483274936676, "learning_rate": 5.15097927902356e-05, "loss": 0.010669587552547455, "step": 170900 }, { "epoch": 48.512631280158956, "grad_norm": 0.06296465545892715, "learning_rate": 5.150695430031224e-05, "loss": 0.002124257758259773, "step": 170910 }, { "epoch": 48.51546977008232, "grad_norm": 0.8940988183021545, "learning_rate": 5.1504115810388874e-05, "loss": 0.0006614010781049728, "step": 170920 }, { "epoch": 48.518308260005675, "grad_norm": 0.052074968814849854, "learning_rate": 5.1501277320465515e-05, "loss": 0.0034765787422657013, "step": 170930 }, { "epoch": 48.52114674992904, "grad_norm": 0.34856876730918884, "learning_rate": 5.149843883054215e-05, "loss": 0.0008349584415555001, "step": 170940 }, { "epoch": 48.5239852398524, "grad_norm": 0.41496580839157104, "learning_rate": 5.149560034061879e-05, "loss": 0.00045248493552207947, "step": 170950 }, { "epoch": 48.52682372977576, "grad_norm": 0.012813698500394821, "learning_rate": 5.149276185069544e-05, "loss": 0.001008734293282032, "step": 170960 }, { "epoch": 48.52966221969912, "grad_norm": 1.0808312892913818, "learning_rate": 5.1489923360772074e-05, "loss": 0.0020518029108643533, "step": 170970 }, { "epoch": 48.53250070962248, "grad_norm": 0.14930208027362823, "learning_rate": 5.1487084870848715e-05, "loss": 0.00047734808176755903, "step": 170980 }, { "epoch": 48.535339199545845, "grad_norm": 0.005207897163927555, "learning_rate": 5.148424638092535e-05, "loss": 0.0004294726997613907, "step": 170990 }, { "epoch": 48.5381776894692, "grad_norm": 0.8354384303092957, "learning_rate": 5.1481407891001984e-05, "loss": 0.0003550529479980469, "step": 171000 }, { "epoch": 48.5381776894692, "eval_accuracy": 0.9827684873148089, "eval_loss": 0.06963706761598587, "eval_runtime": 45.6155, "eval_samples_per_second": 344.773, "eval_steps_per_second": 5.393, "step": 171000 }, { "epoch": 48.541016179392564, "grad_norm": 0.02595224417746067, "learning_rate": 5.1478569401078626e-05, "loss": 0.0008818211033940315, "step": 171010 }, { "epoch": 48.54385466931593, "grad_norm": 0.11939025670289993, "learning_rate": 5.1475730911155274e-05, "loss": 0.00027615558356046676, "step": 171020 }, { "epoch": 48.54669315923928, "grad_norm": 0.004348083399236202, "learning_rate": 5.147289242123191e-05, "loss": 0.0018642662093043328, "step": 171030 }, { "epoch": 48.549531649162645, "grad_norm": 0.017641931772232056, "learning_rate": 5.147005393130855e-05, "loss": 0.0034941241145133974, "step": 171040 }, { "epoch": 48.55237013908601, "grad_norm": 4.325717926025391, "learning_rate": 5.1467215441385184e-05, "loss": 0.006074047833681107, "step": 171050 }, { "epoch": 48.555208629009364, "grad_norm": 0.061697233468294144, "learning_rate": 5.1464376951461826e-05, "loss": 0.0032572753727436064, "step": 171060 }, { "epoch": 48.55804711893273, "grad_norm": 0.012299598194658756, "learning_rate": 5.146153846153846e-05, "loss": 0.0003554686903953552, "step": 171070 }, { "epoch": 48.56088560885609, "grad_norm": 0.18701478838920593, "learning_rate": 5.14586999716151e-05, "loss": 0.0005247971042990684, "step": 171080 }, { "epoch": 48.56372409877945, "grad_norm": 14.08698844909668, "learning_rate": 5.145586148169175e-05, "loss": 0.0066584542393684385, "step": 171090 }, { "epoch": 48.56656258870281, "grad_norm": 0.1505287140607834, "learning_rate": 5.1453022991768384e-05, "loss": 0.0007166853174567223, "step": 171100 }, { "epoch": 48.56940107862617, "grad_norm": 0.15173600614070892, "learning_rate": 5.145018450184502e-05, "loss": 0.007317440211772918, "step": 171110 }, { "epoch": 48.572239568549534, "grad_norm": 0.2225589007139206, "learning_rate": 5.144734601192166e-05, "loss": 0.0027039485052227975, "step": 171120 }, { "epoch": 48.57507805847289, "grad_norm": 0.28140926361083984, "learning_rate": 5.1444507521998295e-05, "loss": 0.00356011800467968, "step": 171130 }, { "epoch": 48.57791654839625, "grad_norm": 0.22864601016044617, "learning_rate": 5.1441669032074936e-05, "loss": 0.001562163792550564, "step": 171140 }, { "epoch": 48.580755038319616, "grad_norm": 0.021991118788719177, "learning_rate": 5.143883054215157e-05, "loss": 0.0006248874589800835, "step": 171150 }, { "epoch": 48.58359352824297, "grad_norm": 0.0665723904967308, "learning_rate": 5.143599205222822e-05, "loss": 0.011386295408010482, "step": 171160 }, { "epoch": 48.586432018166335, "grad_norm": 0.05240504443645477, "learning_rate": 5.143315356230486e-05, "loss": 0.0016126636415719985, "step": 171170 }, { "epoch": 48.5892705080897, "grad_norm": 0.42506808042526245, "learning_rate": 5.1430315072381495e-05, "loss": 0.0031232234090566636, "step": 171180 }, { "epoch": 48.59210899801306, "grad_norm": 0.02462630905210972, "learning_rate": 5.1427476582458137e-05, "loss": 0.0005158323794603348, "step": 171190 }, { "epoch": 48.594947487936416, "grad_norm": 0.007298700045794249, "learning_rate": 5.142463809253477e-05, "loss": 0.0036269426345825195, "step": 171200 }, { "epoch": 48.59778597785978, "grad_norm": 0.14389865100383759, "learning_rate": 5.1421799602611406e-05, "loss": 0.0011060805991292, "step": 171210 }, { "epoch": 48.60062446778314, "grad_norm": 1.043174147605896, "learning_rate": 5.141896111268806e-05, "loss": 0.003989964723587036, "step": 171220 }, { "epoch": 48.6034629577065, "grad_norm": 0.06407985091209412, "learning_rate": 5.1416122622764695e-05, "loss": 0.00144350603222847, "step": 171230 }, { "epoch": 48.60630144762986, "grad_norm": 0.045945752412080765, "learning_rate": 5.141328413284133e-05, "loss": 0.001356818713247776, "step": 171240 }, { "epoch": 48.60913993755322, "grad_norm": 0.06955615431070328, "learning_rate": 5.141044564291797e-05, "loss": 0.004788700491189957, "step": 171250 }, { "epoch": 48.61197842747658, "grad_norm": 1.043851613998413, "learning_rate": 5.1407607152994606e-05, "loss": 0.0008264988660812378, "step": 171260 }, { "epoch": 48.61481691739994, "grad_norm": 0.0997934341430664, "learning_rate": 5.140476866307125e-05, "loss": 0.017137369513511656, "step": 171270 }, { "epoch": 48.617655407323305, "grad_norm": 0.038645628839731216, "learning_rate": 5.140193017314788e-05, "loss": 0.04460117518901825, "step": 171280 }, { "epoch": 48.62049389724667, "grad_norm": 0.11068563908338547, "learning_rate": 5.139909168322453e-05, "loss": 0.013326248526573181, "step": 171290 }, { "epoch": 48.623332387170024, "grad_norm": 2.561586380004883, "learning_rate": 5.139625319330117e-05, "loss": 0.0006265915930271148, "step": 171300 }, { "epoch": 48.62617087709339, "grad_norm": 0.17241470515727997, "learning_rate": 5.1393414703377806e-05, "loss": 0.006039725244045257, "step": 171310 }, { "epoch": 48.62900936701675, "grad_norm": 1.535244345664978, "learning_rate": 5.139057621345445e-05, "loss": 0.0007133031263947487, "step": 171320 }, { "epoch": 48.631847856940105, "grad_norm": 0.06234268099069595, "learning_rate": 5.138773772353108e-05, "loss": 0.011993341892957688, "step": 171330 }, { "epoch": 48.63468634686347, "grad_norm": 0.18898798525333405, "learning_rate": 5.1384899233607716e-05, "loss": 0.006341452896595001, "step": 171340 }, { "epoch": 48.63752483678683, "grad_norm": 0.09617564082145691, "learning_rate": 5.13823445926767e-05, "loss": 0.014554890990257262, "step": 171350 }, { "epoch": 48.64036332671019, "grad_norm": 0.15164797008037567, "learning_rate": 5.137950610275334e-05, "loss": 0.0016083387657999993, "step": 171360 }, { "epoch": 48.64320181663355, "grad_norm": 0.0427999310195446, "learning_rate": 5.137666761282998e-05, "loss": 0.0029245855286717413, "step": 171370 }, { "epoch": 48.64604030655691, "grad_norm": 0.03737925738096237, "learning_rate": 5.1373829122906614e-05, "loss": 0.00028189439326524733, "step": 171380 }, { "epoch": 48.648878796480275, "grad_norm": 0.04371367394924164, "learning_rate": 5.137099063298325e-05, "loss": 0.0008450284600257874, "step": 171390 }, { "epoch": 48.65171728640363, "grad_norm": 0.07587093859910965, "learning_rate": 5.13681521430599e-05, "loss": 0.0005902472883462905, "step": 171400 }, { "epoch": 48.654555776326994, "grad_norm": 0.056127261370420456, "learning_rate": 5.136531365313654e-05, "loss": 0.003952125087380409, "step": 171410 }, { "epoch": 48.65739426625036, "grad_norm": 0.007047215476632118, "learning_rate": 5.136247516321317e-05, "loss": 0.003062296286225319, "step": 171420 }, { "epoch": 48.66023275617371, "grad_norm": 0.254069983959198, "learning_rate": 5.1359636673289814e-05, "loss": 0.0016808366402983666, "step": 171430 }, { "epoch": 48.663071246097076, "grad_norm": 1.0108191967010498, "learning_rate": 5.135679818336645e-05, "loss": 0.007387925684452057, "step": 171440 }, { "epoch": 48.66590973602044, "grad_norm": 2.1564085483551025, "learning_rate": 5.135395969344309e-05, "loss": 0.0007277287542819977, "step": 171450 }, { "epoch": 48.6687482259438, "grad_norm": 1.0004743337631226, "learning_rate": 5.1351121203519724e-05, "loss": 0.00047033466398715973, "step": 171460 }, { "epoch": 48.67158671586716, "grad_norm": 0.023611821234226227, "learning_rate": 5.134828271359637e-05, "loss": 0.0005976177752017974, "step": 171470 }, { "epoch": 48.67442520579052, "grad_norm": 0.9807735085487366, "learning_rate": 5.1345444223673014e-05, "loss": 0.031820148229599, "step": 171480 }, { "epoch": 48.67726369571388, "grad_norm": 0.06842703372240067, "learning_rate": 5.134260573374965e-05, "loss": 0.001732335425913334, "step": 171490 }, { "epoch": 48.68010218563724, "grad_norm": 4.849551200866699, "learning_rate": 5.133976724382629e-05, "loss": 0.0012718364596366882, "step": 171500 }, { "epoch": 48.68010218563724, "eval_accuracy": 0.982895657150124, "eval_loss": 0.0691387727856636, "eval_runtime": 42.4949, "eval_samples_per_second": 370.091, "eval_steps_per_second": 5.789, "step": 171500 }, { "epoch": 48.6829406755606, "grad_norm": 0.0076178619638085365, "learning_rate": 5.1336928753902925e-05, "loss": 0.0007334535941481591, "step": 171510 }, { "epoch": 48.685779165483964, "grad_norm": 0.04530981928110123, "learning_rate": 5.133409026397956e-05, "loss": 0.000843287818133831, "step": 171520 }, { "epoch": 48.68861765540732, "grad_norm": 11.807878494262695, "learning_rate": 5.13312517740562e-05, "loss": 0.00278313085436821, "step": 171530 }, { "epoch": 48.69145614533068, "grad_norm": 0.2661706507205963, "learning_rate": 5.132841328413285e-05, "loss": 0.00027030780911445617, "step": 171540 }, { "epoch": 48.694294635254046, "grad_norm": 0.8800146579742432, "learning_rate": 5.132557479420948e-05, "loss": 0.0005501382052898407, "step": 171550 }, { "epoch": 48.69713312517741, "grad_norm": 0.4755294620990753, "learning_rate": 5.1322736304286125e-05, "loss": 0.0006971484050154686, "step": 171560 }, { "epoch": 48.699971615100765, "grad_norm": 10.257270812988281, "learning_rate": 5.131989781436276e-05, "loss": 0.0048220671713352205, "step": 171570 }, { "epoch": 48.70281010502413, "grad_norm": 0.0665379986166954, "learning_rate": 5.13170593244394e-05, "loss": 0.0004794999957084656, "step": 171580 }, { "epoch": 48.70564859494749, "grad_norm": 0.13373297452926636, "learning_rate": 5.1314220834516035e-05, "loss": 0.0032360263168811796, "step": 171590 }, { "epoch": 48.708487084870846, "grad_norm": 9.944698333740234, "learning_rate": 5.131138234459268e-05, "loss": 0.004245896637439728, "step": 171600 }, { "epoch": 48.71132557479421, "grad_norm": 7.896509170532227, "learning_rate": 5.1308543854669325e-05, "loss": 0.002143920958042145, "step": 171610 }, { "epoch": 48.71416406471757, "grad_norm": 0.011694610118865967, "learning_rate": 5.130570536474596e-05, "loss": 0.0016773933544754981, "step": 171620 }, { "epoch": 48.71700255464093, "grad_norm": 3.358588457107544, "learning_rate": 5.1302866874822594e-05, "loss": 0.008660806715488434, "step": 171630 }, { "epoch": 48.71984104456429, "grad_norm": 0.004349596798419952, "learning_rate": 5.1300028384899235e-05, "loss": 0.009327958524227142, "step": 171640 }, { "epoch": 48.72267953448765, "grad_norm": 0.01397404633462429, "learning_rate": 5.129718989497587e-05, "loss": 0.0018753813579678535, "step": 171650 }, { "epoch": 48.725518024411016, "grad_norm": 0.06549683213233948, "learning_rate": 5.129435140505251e-05, "loss": 0.002859456092119217, "step": 171660 }, { "epoch": 48.72835651433437, "grad_norm": 0.1638902723789215, "learning_rate": 5.129151291512916e-05, "loss": 0.004612377285957337, "step": 171670 }, { "epoch": 48.731195004257735, "grad_norm": 0.004089196212589741, "learning_rate": 5.1288674425205794e-05, "loss": 0.0021406639367341994, "step": 171680 }, { "epoch": 48.7340334941811, "grad_norm": 0.01339395809918642, "learning_rate": 5.1285835935282435e-05, "loss": 0.0015074148774147033, "step": 171690 }, { "epoch": 48.736871984104454, "grad_norm": 0.2218884974718094, "learning_rate": 5.128299744535907e-05, "loss": 0.0008535047993063927, "step": 171700 }, { "epoch": 48.73971047402782, "grad_norm": 0.3360118269920349, "learning_rate": 5.128015895543571e-05, "loss": 0.008595493435859681, "step": 171710 }, { "epoch": 48.74254896395118, "grad_norm": 0.2279486507177353, "learning_rate": 5.1277320465512346e-05, "loss": 0.0007108788937330246, "step": 171720 }, { "epoch": 48.745387453874535, "grad_norm": 0.03911725804209709, "learning_rate": 5.127448197558898e-05, "loss": 0.001053198054432869, "step": 171730 }, { "epoch": 48.7482259437979, "grad_norm": 0.481182724237442, "learning_rate": 5.1271643485665636e-05, "loss": 0.0009545264765620232, "step": 171740 }, { "epoch": 48.75106443372126, "grad_norm": 0.06656099855899811, "learning_rate": 5.126880499574227e-05, "loss": 0.009357321262359618, "step": 171750 }, { "epoch": 48.753902923644624, "grad_norm": 0.20643968880176544, "learning_rate": 5.1265966505818905e-05, "loss": 0.0002762936055660248, "step": 171760 }, { "epoch": 48.75674141356798, "grad_norm": 0.0072885602712631226, "learning_rate": 5.1263128015895546e-05, "loss": 0.0012040453031659127, "step": 171770 }, { "epoch": 48.75957990349134, "grad_norm": 0.35727420449256897, "learning_rate": 5.126028952597218e-05, "loss": 0.011901598423719406, "step": 171780 }, { "epoch": 48.762418393414706, "grad_norm": 0.892014741897583, "learning_rate": 5.125745103604882e-05, "loss": 0.0021401284262537955, "step": 171790 }, { "epoch": 48.76525688333806, "grad_norm": 0.06187048554420471, "learning_rate": 5.125461254612547e-05, "loss": 0.004240356385707855, "step": 171800 }, { "epoch": 48.768095373261424, "grad_norm": 0.6635447144508362, "learning_rate": 5.1251774056202105e-05, "loss": 0.003756944090127945, "step": 171810 }, { "epoch": 48.77093386318479, "grad_norm": 0.23696769773960114, "learning_rate": 5.1248935566278746e-05, "loss": 0.0005246877670288086, "step": 171820 }, { "epoch": 48.77377235310815, "grad_norm": 0.3690941333770752, "learning_rate": 5.124609707635538e-05, "loss": 0.0003678217530250549, "step": 171830 }, { "epoch": 48.776610843031506, "grad_norm": 0.002002360299229622, "learning_rate": 5.124325858643202e-05, "loss": 0.004401063546538353, "step": 171840 }, { "epoch": 48.77944933295487, "grad_norm": 0.017545143142342567, "learning_rate": 5.124042009650866e-05, "loss": 0.0003314413130283356, "step": 171850 }, { "epoch": 48.78228782287823, "grad_norm": 0.03838846832513809, "learning_rate": 5.123758160658529e-05, "loss": 0.0007242150604724884, "step": 171860 }, { "epoch": 48.78512631280159, "grad_norm": 0.08274850249290466, "learning_rate": 5.1234743116661946e-05, "loss": 0.0025399573147296905, "step": 171870 }, { "epoch": 48.78796480272495, "grad_norm": 12.970132827758789, "learning_rate": 5.123190462673858e-05, "loss": 0.009933342039585114, "step": 171880 }, { "epoch": 48.79080329264831, "grad_norm": 0.3641849160194397, "learning_rate": 5.1229066136815215e-05, "loss": 0.006592909246683121, "step": 171890 }, { "epoch": 48.79364178257167, "grad_norm": 1.51405668258667, "learning_rate": 5.122622764689186e-05, "loss": 0.003892940282821655, "step": 171900 }, { "epoch": 48.79648027249503, "grad_norm": 0.06443338096141815, "learning_rate": 5.122338915696849e-05, "loss": 0.00350276380777359, "step": 171910 }, { "epoch": 48.799318762418395, "grad_norm": 0.503505289554596, "learning_rate": 5.122055066704513e-05, "loss": 0.0033166147768497467, "step": 171920 }, { "epoch": 48.80215725234176, "grad_norm": 0.33852633833885193, "learning_rate": 5.121771217712177e-05, "loss": 0.0017838526517152786, "step": 171930 }, { "epoch": 48.80499574226511, "grad_norm": 16.115583419799805, "learning_rate": 5.1214873687198416e-05, "loss": 0.003869367390871048, "step": 171940 }, { "epoch": 48.807834232188476, "grad_norm": 4.163418769836426, "learning_rate": 5.121203519727506e-05, "loss": 0.0010175781324505806, "step": 171950 }, { "epoch": 48.81067272211184, "grad_norm": 0.015110868029296398, "learning_rate": 5.120919670735169e-05, "loss": 0.0005650745704770088, "step": 171960 }, { "epoch": 48.813511212035195, "grad_norm": 0.05014403164386749, "learning_rate": 5.120635821742833e-05, "loss": 0.0013108473271131516, "step": 171970 }, { "epoch": 48.81634970195856, "grad_norm": 0.18880100548267365, "learning_rate": 5.120351972750497e-05, "loss": 0.0027879547327756883, "step": 171980 }, { "epoch": 48.81918819188192, "grad_norm": 0.004668315406888723, "learning_rate": 5.12006812375816e-05, "loss": 0.002723262459039688, "step": 171990 }, { "epoch": 48.822026681805276, "grad_norm": 0.024633362889289856, "learning_rate": 5.119784274765825e-05, "loss": 0.0020718779414892197, "step": 172000 }, { "epoch": 48.822026681805276, "eval_accuracy": 0.9822598079735487, "eval_loss": 0.06886842846870422, "eval_runtime": 57.8289, "eval_samples_per_second": 271.957, "eval_steps_per_second": 4.254, "step": 172000 }, { "epoch": 48.82486517172864, "grad_norm": 0.1550317257642746, "learning_rate": 5.119500425773489e-05, "loss": 0.008529268205165863, "step": 172010 }, { "epoch": 48.827703661652, "grad_norm": 0.04509894549846649, "learning_rate": 5.1192165767811526e-05, "loss": 0.0046820022165775296, "step": 172020 }, { "epoch": 48.830542151575365, "grad_norm": 0.07496773451566696, "learning_rate": 5.118932727788817e-05, "loss": 0.0018806528300046922, "step": 172030 }, { "epoch": 48.83338064149872, "grad_norm": 0.024739954620599747, "learning_rate": 5.11864887879648e-05, "loss": 0.0019555455073714256, "step": 172040 }, { "epoch": 48.836219131422084, "grad_norm": 0.007052856031805277, "learning_rate": 5.1183650298041444e-05, "loss": 0.003748336434364319, "step": 172050 }, { "epoch": 48.83905762134545, "grad_norm": 0.8031505346298218, "learning_rate": 5.118081180811808e-05, "loss": 0.004230421036481857, "step": 172060 }, { "epoch": 48.8418961112688, "grad_norm": 0.037653740495443344, "learning_rate": 5.1177973318194726e-05, "loss": 0.000683160126209259, "step": 172070 }, { "epoch": 48.844734601192165, "grad_norm": 0.04196833074092865, "learning_rate": 5.117513482827137e-05, "loss": 0.0011399144306778908, "step": 172080 }, { "epoch": 48.84757309111553, "grad_norm": 0.03924829885363579, "learning_rate": 5.1172296338348e-05, "loss": 0.003416544944047928, "step": 172090 }, { "epoch": 48.850411581038884, "grad_norm": 0.1079760491847992, "learning_rate": 5.116945784842464e-05, "loss": 0.0006976684555411339, "step": 172100 }, { "epoch": 48.85325007096225, "grad_norm": 0.03409904986619949, "learning_rate": 5.116661935850128e-05, "loss": 0.027209094166755675, "step": 172110 }, { "epoch": 48.85608856088561, "grad_norm": 18.342023849487305, "learning_rate": 5.116378086857791e-05, "loss": 0.009111824631690978, "step": 172120 }, { "epoch": 48.85892705080897, "grad_norm": 5.128731727600098, "learning_rate": 5.1160942378654554e-05, "loss": 0.006392365694046021, "step": 172130 }, { "epoch": 48.86176554073233, "grad_norm": 0.030700305476784706, "learning_rate": 5.11581038887312e-05, "loss": 0.002669900469481945, "step": 172140 }, { "epoch": 48.86460403065569, "grad_norm": 0.019176673144102097, "learning_rate": 5.115526539880784e-05, "loss": 0.002727293036878109, "step": 172150 }, { "epoch": 48.867442520579054, "grad_norm": 0.2142675668001175, "learning_rate": 5.115242690888448e-05, "loss": 0.0003793070092797279, "step": 172160 }, { "epoch": 48.87028101050241, "grad_norm": 0.10820090025663376, "learning_rate": 5.114958841896111e-05, "loss": 0.0005071548745036125, "step": 172170 }, { "epoch": 48.87311950042577, "grad_norm": 0.08832554519176483, "learning_rate": 5.1146749929037754e-05, "loss": 0.0002574404701590538, "step": 172180 }, { "epoch": 48.875957990349136, "grad_norm": 0.022503940388560295, "learning_rate": 5.114391143911439e-05, "loss": 0.0003464441746473312, "step": 172190 }, { "epoch": 48.8787964802725, "grad_norm": 0.013734964653849602, "learning_rate": 5.114107294919104e-05, "loss": 0.00033740084618330004, "step": 172200 }, { "epoch": 48.881634970195854, "grad_norm": 3.631925106048584, "learning_rate": 5.113823445926768e-05, "loss": 0.0006503153592348099, "step": 172210 }, { "epoch": 48.88447346011922, "grad_norm": 0.06509058177471161, "learning_rate": 5.113539596934431e-05, "loss": 0.00139458030462265, "step": 172220 }, { "epoch": 48.88731195004258, "grad_norm": 0.19235949218273163, "learning_rate": 5.113255747942095e-05, "loss": 0.0012407789006829261, "step": 172230 }, { "epoch": 48.890150439965936, "grad_norm": 6.652499198913574, "learning_rate": 5.112971898949759e-05, "loss": 0.002552397921681404, "step": 172240 }, { "epoch": 48.8929889298893, "grad_norm": 0.0216253399848938, "learning_rate": 5.1126880499574224e-05, "loss": 0.00021348092705011367, "step": 172250 }, { "epoch": 48.89582741981266, "grad_norm": 1.8731006383895874, "learning_rate": 5.1124042009650865e-05, "loss": 0.003294369950890541, "step": 172260 }, { "epoch": 48.89866590973602, "grad_norm": 0.526324987411499, "learning_rate": 5.112120351972751e-05, "loss": 0.008514520525932313, "step": 172270 }, { "epoch": 48.90150439965938, "grad_norm": 0.10516509413719177, "learning_rate": 5.111836502980415e-05, "loss": 0.0016142025589942933, "step": 172280 }, { "epoch": 48.90434288958274, "grad_norm": 0.1404300183057785, "learning_rate": 5.111552653988079e-05, "loss": 0.00027407445013523103, "step": 172290 }, { "epoch": 48.907181379506106, "grad_norm": 0.036318257451057434, "learning_rate": 5.1112688049957424e-05, "loss": 0.0001265963539481163, "step": 172300 }, { "epoch": 48.91001986942946, "grad_norm": 0.08049097657203674, "learning_rate": 5.1109849560034065e-05, "loss": 0.0008900707587599754, "step": 172310 }, { "epoch": 48.912858359352825, "grad_norm": 0.029953401535749435, "learning_rate": 5.11070110701107e-05, "loss": 0.0002869933843612671, "step": 172320 }, { "epoch": 48.91569684927619, "grad_norm": 0.04234524816274643, "learning_rate": 5.110417258018735e-05, "loss": 0.0003334585577249527, "step": 172330 }, { "epoch": 48.91853533919954, "grad_norm": 0.44163966178894043, "learning_rate": 5.110133409026399e-05, "loss": 0.0008081404492259025, "step": 172340 }, { "epoch": 48.921373829122906, "grad_norm": 1.261939525604248, "learning_rate": 5.1098495600340624e-05, "loss": 0.0005803788080811501, "step": 172350 }, { "epoch": 48.92421231904627, "grad_norm": 0.002357468707486987, "learning_rate": 5.109565711041726e-05, "loss": 0.00038174744695425035, "step": 172360 }, { "epoch": 48.927050808969625, "grad_norm": 0.012548476457595825, "learning_rate": 5.10928186204939e-05, "loss": 0.0003977200016379356, "step": 172370 }, { "epoch": 48.92988929889299, "grad_norm": 0.05203751474618912, "learning_rate": 5.1089980130570534e-05, "loss": 0.00416593886911869, "step": 172380 }, { "epoch": 48.93272778881635, "grad_norm": 0.1796775609254837, "learning_rate": 5.1087141640647176e-05, "loss": 0.0004849078133702278, "step": 172390 }, { "epoch": 48.935566278739714, "grad_norm": 2.172093629837036, "learning_rate": 5.1084303150723824e-05, "loss": 0.0021058842539787292, "step": 172400 }, { "epoch": 48.93840476866307, "grad_norm": 0.00900931004434824, "learning_rate": 5.108146466080046e-05, "loss": 0.001914515346288681, "step": 172410 }, { "epoch": 48.94124325858643, "grad_norm": 12.580336570739746, "learning_rate": 5.10786261708771e-05, "loss": 0.008826524019241333, "step": 172420 }, { "epoch": 48.944081748509795, "grad_norm": 0.0411706380546093, "learning_rate": 5.1075787680953735e-05, "loss": 0.0003944525495171547, "step": 172430 }, { "epoch": 48.94692023843315, "grad_norm": 0.003917935770004988, "learning_rate": 5.1072949191030376e-05, "loss": 0.003803454339504242, "step": 172440 }, { "epoch": 48.949758728356514, "grad_norm": 0.0171451848000288, "learning_rate": 5.107011070110701e-05, "loss": 0.0003707585856318474, "step": 172450 }, { "epoch": 48.95259721827988, "grad_norm": 11.978520393371582, "learning_rate": 5.1067272211183645e-05, "loss": 0.0018452901393175125, "step": 172460 }, { "epoch": 48.95543570820323, "grad_norm": 2.012876272201538, "learning_rate": 5.106443372126029e-05, "loss": 0.00039653386920690536, "step": 172470 }, { "epoch": 48.958274198126595, "grad_norm": 10.661269187927246, "learning_rate": 5.1061595231336935e-05, "loss": 0.005534153059124946, "step": 172480 }, { "epoch": 48.96111268804996, "grad_norm": 0.034276001155376434, "learning_rate": 5.105875674141357e-05, "loss": 0.0002475826069712639, "step": 172490 }, { "epoch": 48.96395117797332, "grad_norm": 0.011962748132646084, "learning_rate": 5.105591825149021e-05, "loss": 0.0003346733748912811, "step": 172500 }, { "epoch": 48.96395117797332, "eval_accuracy": 0.9825777325618363, "eval_loss": 0.06676778942346573, "eval_runtime": 39.6186, "eval_samples_per_second": 396.96, "eval_steps_per_second": 6.209, "step": 172500 }, { "epoch": 48.96678966789668, "grad_norm": 0.619348406791687, "learning_rate": 5.1053079761566845e-05, "loss": 0.00602128766477108, "step": 172510 }, { "epoch": 48.96962815782004, "grad_norm": 0.029161319136619568, "learning_rate": 5.1050241271643487e-05, "loss": 0.0005292337387800217, "step": 172520 }, { "epoch": 48.9724666477434, "grad_norm": 6.128579616546631, "learning_rate": 5.1047402781720135e-05, "loss": 0.003255309909582138, "step": 172530 }, { "epoch": 48.97530513766676, "grad_norm": 0.0520092248916626, "learning_rate": 5.104456429179677e-05, "loss": 0.00036361757665872576, "step": 172540 }, { "epoch": 48.97814362759012, "grad_norm": 0.029282990843057632, "learning_rate": 5.104172580187341e-05, "loss": 0.0013528632000088692, "step": 172550 }, { "epoch": 48.980982117513484, "grad_norm": 0.11386149376630783, "learning_rate": 5.1038887311950045e-05, "loss": 0.0017364857718348502, "step": 172560 }, { "epoch": 48.98382060743684, "grad_norm": 0.10917927324771881, "learning_rate": 5.103604882202668e-05, "loss": 0.003189971297979355, "step": 172570 }, { "epoch": 48.9866590973602, "grad_norm": 0.1376745104789734, "learning_rate": 5.103321033210332e-05, "loss": 0.002772531658411026, "step": 172580 }, { "epoch": 48.989497587283566, "grad_norm": 0.003207562491297722, "learning_rate": 5.1030371842179956e-05, "loss": 0.0048079386353492735, "step": 172590 }, { "epoch": 48.99233607720693, "grad_norm": 0.7738311886787415, "learning_rate": 5.1027533352256604e-05, "loss": 0.0002995925024151802, "step": 172600 }, { "epoch": 48.995174567130285, "grad_norm": 0.03410785645246506, "learning_rate": 5.1024694862333245e-05, "loss": 0.0001852136105298996, "step": 172610 }, { "epoch": 48.99801305705365, "grad_norm": 1.2903273105621338, "learning_rate": 5.102185637240988e-05, "loss": 0.00029754824936389923, "step": 172620 }, { "epoch": 49.00085154697701, "grad_norm": 0.08021868020296097, "learning_rate": 5.101901788248652e-05, "loss": 0.0013607312925159932, "step": 172630 }, { "epoch": 49.003690036900366, "grad_norm": 0.04134587571024895, "learning_rate": 5.1016179392563156e-05, "loss": 0.002543826401233673, "step": 172640 }, { "epoch": 49.00652852682373, "grad_norm": 0.010404832661151886, "learning_rate": 5.10133409026398e-05, "loss": 0.004188922420144081, "step": 172650 }, { "epoch": 49.00936701674709, "grad_norm": 0.036409247666597366, "learning_rate": 5.101050241271643e-05, "loss": 0.0005296485498547554, "step": 172660 }, { "epoch": 49.012205506670455, "grad_norm": 0.014150423929095268, "learning_rate": 5.100766392279308e-05, "loss": 0.003531962260603905, "step": 172670 }, { "epoch": 49.01504399659381, "grad_norm": 0.15591579675674438, "learning_rate": 5.100482543286972e-05, "loss": 0.0007508901879191398, "step": 172680 }, { "epoch": 49.01788248651717, "grad_norm": 0.02846970409154892, "learning_rate": 5.1001986942946356e-05, "loss": 0.00049362163990736, "step": 172690 }, { "epoch": 49.020720976440536, "grad_norm": 0.07327206432819366, "learning_rate": 5.099914845302299e-05, "loss": 0.0027297196909785272, "step": 172700 }, { "epoch": 49.02355946636389, "grad_norm": 0.007595105096697807, "learning_rate": 5.099630996309963e-05, "loss": 0.00900234878063202, "step": 172710 }, { "epoch": 49.026397956287255, "grad_norm": 0.021133890375494957, "learning_rate": 5.099347147317627e-05, "loss": 0.0002221466973423958, "step": 172720 }, { "epoch": 49.02923644621062, "grad_norm": 0.3168136179447174, "learning_rate": 5.0990632983252915e-05, "loss": 0.0028120720759034156, "step": 172730 }, { "epoch": 49.032074936133974, "grad_norm": 0.006599833723157644, "learning_rate": 5.0987794493329556e-05, "loss": 0.004768143594264984, "step": 172740 }, { "epoch": 49.03491342605734, "grad_norm": 0.008820326067507267, "learning_rate": 5.098495600340619e-05, "loss": 0.0019353110343217849, "step": 172750 }, { "epoch": 49.0377519159807, "grad_norm": 0.050884801894426346, "learning_rate": 5.098211751348283e-05, "loss": 0.001988324150443077, "step": 172760 }, { "epoch": 49.04059040590406, "grad_norm": 0.2703840136528015, "learning_rate": 5.097927902355947e-05, "loss": 0.0016085365787148476, "step": 172770 }, { "epoch": 49.04342889582742, "grad_norm": 0.002660023747012019, "learning_rate": 5.097644053363611e-05, "loss": 0.00018938276916742326, "step": 172780 }, { "epoch": 49.04626738575078, "grad_norm": 1.5721036195755005, "learning_rate": 5.097360204371274e-05, "loss": 0.0005204467102885246, "step": 172790 }, { "epoch": 49.049105875674144, "grad_norm": 0.12435431778430939, "learning_rate": 5.097076355378939e-05, "loss": 0.0009707717224955558, "step": 172800 }, { "epoch": 49.0519443655975, "grad_norm": 0.26744344830513, "learning_rate": 5.0967925063866025e-05, "loss": 0.0004173416644334793, "step": 172810 }, { "epoch": 49.05478285552086, "grad_norm": 0.4778245985507965, "learning_rate": 5.096508657394267e-05, "loss": 0.0017566777765750884, "step": 172820 }, { "epoch": 49.057621345444225, "grad_norm": 0.025775248184800148, "learning_rate": 5.09622480840193e-05, "loss": 0.0026230361312627793, "step": 172830 }, { "epoch": 49.06045983536758, "grad_norm": 0.010363045148551464, "learning_rate": 5.095940959409594e-05, "loss": 0.001697605475783348, "step": 172840 }, { "epoch": 49.063298325290944, "grad_norm": 0.011164270341396332, "learning_rate": 5.095657110417258e-05, "loss": 0.0006647951900959015, "step": 172850 }, { "epoch": 49.06613681521431, "grad_norm": 0.01727045141160488, "learning_rate": 5.095373261424922e-05, "loss": 0.00027414541691541674, "step": 172860 }, { "epoch": 49.06897530513767, "grad_norm": 1.3082969188690186, "learning_rate": 5.095089412432587e-05, "loss": 0.0008059265092015266, "step": 172870 }, { "epoch": 49.071813795061026, "grad_norm": 0.36574602127075195, "learning_rate": 5.09480556344025e-05, "loss": 0.00023948065936565399, "step": 172880 }, { "epoch": 49.07465228498439, "grad_norm": 0.34330683946609497, "learning_rate": 5.094521714447914e-05, "loss": 0.0006063353270292282, "step": 172890 }, { "epoch": 49.07749077490775, "grad_norm": 0.03180843964219093, "learning_rate": 5.094237865455578e-05, "loss": 0.0037816666066646577, "step": 172900 }, { "epoch": 49.08032926483111, "grad_norm": 0.05602617561817169, "learning_rate": 5.093954016463242e-05, "loss": 0.0007875155657529831, "step": 172910 }, { "epoch": 49.08316775475447, "grad_norm": 0.5401623845100403, "learning_rate": 5.0936701674709053e-05, "loss": 0.0004502752795815468, "step": 172920 }, { "epoch": 49.08600624467783, "grad_norm": 3.1806912422180176, "learning_rate": 5.09338631847857e-05, "loss": 0.0006189092993736268, "step": 172930 }, { "epoch": 49.08884473460119, "grad_norm": 0.29677480459213257, "learning_rate": 5.0931024694862336e-05, "loss": 0.00041602905839681624, "step": 172940 }, { "epoch": 49.09168322452455, "grad_norm": 0.005201929714530706, "learning_rate": 5.092818620493898e-05, "loss": 9.885691106319428e-05, "step": 172950 }, { "epoch": 49.094521714447914, "grad_norm": 0.1378512978553772, "learning_rate": 5.092534771501561e-05, "loss": 0.0004422139376401901, "step": 172960 }, { "epoch": 49.09736020437128, "grad_norm": 0.6512723565101624, "learning_rate": 5.0922509225092254e-05, "loss": 0.004430752992630005, "step": 172970 }, { "epoch": 49.10019869429463, "grad_norm": 3.28786039352417, "learning_rate": 5.091967073516889e-05, "loss": 0.000591135211288929, "step": 172980 }, { "epoch": 49.103037184217996, "grad_norm": 0.008921501226723194, "learning_rate": 5.091683224524553e-05, "loss": 0.013212184607982635, "step": 172990 }, { "epoch": 49.10587567414136, "grad_norm": 0.11762204766273499, "learning_rate": 5.091399375532218e-05, "loss": 0.0005966555327177048, "step": 173000 }, { "epoch": 49.10587567414136, "eval_accuracy": 0.9815603738793158, "eval_loss": 0.07495671510696411, "eval_runtime": 43.1167, "eval_samples_per_second": 364.754, "eval_steps_per_second": 5.705, "step": 173000 }, { "epoch": 49.108714164064715, "grad_norm": 0.027098264545202255, "learning_rate": 5.091115526539881e-05, "loss": 0.011499625444412232, "step": 173010 }, { "epoch": 49.11155265398808, "grad_norm": 0.02584156207740307, "learning_rate": 5.0908316775475454e-05, "loss": 0.00047681313008069993, "step": 173020 }, { "epoch": 49.11439114391144, "grad_norm": 0.026556219905614853, "learning_rate": 5.090547828555209e-05, "loss": 0.0033132866024971007, "step": 173030 }, { "epoch": 49.1172296338348, "grad_norm": 0.005407451651990414, "learning_rate": 5.090263979562872e-05, "loss": 0.0005501424893736839, "step": 173040 }, { "epoch": 49.12006812375816, "grad_norm": 0.053606241941452026, "learning_rate": 5.0899801305705364e-05, "loss": 0.00040723495185375215, "step": 173050 }, { "epoch": 49.12290661368152, "grad_norm": 0.05409738048911095, "learning_rate": 5.0896962815782e-05, "loss": 0.00012383293360471726, "step": 173060 }, { "epoch": 49.125745103604885, "grad_norm": 0.008262792602181435, "learning_rate": 5.089412432585865e-05, "loss": 0.0008053552359342575, "step": 173070 }, { "epoch": 49.12858359352824, "grad_norm": 0.14966638386249542, "learning_rate": 5.089128583593529e-05, "loss": 0.00021353866904973983, "step": 173080 }, { "epoch": 49.131422083451604, "grad_norm": 0.021692395210266113, "learning_rate": 5.088844734601192e-05, "loss": 0.0006325678899884224, "step": 173090 }, { "epoch": 49.134260573374966, "grad_norm": 0.025693045929074287, "learning_rate": 5.0885608856088564e-05, "loss": 0.00026172026991844177, "step": 173100 }, { "epoch": 49.13709906329832, "grad_norm": 0.0670175701379776, "learning_rate": 5.08827703661652e-05, "loss": 0.00021873172372579574, "step": 173110 }, { "epoch": 49.139937553221685, "grad_norm": 0.007444949354976416, "learning_rate": 5.087993187624184e-05, "loss": 0.00015151184052228928, "step": 173120 }, { "epoch": 49.14277604314505, "grad_norm": 0.017048411071300507, "learning_rate": 5.087709338631849e-05, "loss": 0.0013564541935920716, "step": 173130 }, { "epoch": 49.14561453306841, "grad_norm": 0.03685367852449417, "learning_rate": 5.087425489639512e-05, "loss": 0.006111228466033935, "step": 173140 }, { "epoch": 49.14845302299177, "grad_norm": 0.6952961087226868, "learning_rate": 5.0871416406471764e-05, "loss": 0.0035687334835529327, "step": 173150 }, { "epoch": 49.15129151291513, "grad_norm": 2.9574217796325684, "learning_rate": 5.08685779165484e-05, "loss": 0.0006376389414072037, "step": 173160 }, { "epoch": 49.15413000283849, "grad_norm": 0.018381044268608093, "learning_rate": 5.0865739426625034e-05, "loss": 0.0005066081881523133, "step": 173170 }, { "epoch": 49.15696849276185, "grad_norm": 0.07152307778596878, "learning_rate": 5.0862900936701675e-05, "loss": 0.0017794840037822723, "step": 173180 }, { "epoch": 49.15980698268521, "grad_norm": 0.026165487244725227, "learning_rate": 5.086006244677831e-05, "loss": 0.0008303778246045113, "step": 173190 }, { "epoch": 49.162645472608574, "grad_norm": 0.013135927729308605, "learning_rate": 5.085722395685496e-05, "loss": 0.00017598234117031098, "step": 173200 }, { "epoch": 49.16548396253193, "grad_norm": 0.005901908967643976, "learning_rate": 5.08543854669316e-05, "loss": 0.00024753585457801817, "step": 173210 }, { "epoch": 49.16832245245529, "grad_norm": 0.01396728865802288, "learning_rate": 5.0851546977008234e-05, "loss": 0.005374399572610855, "step": 173220 }, { "epoch": 49.171160942378656, "grad_norm": 0.3182765543460846, "learning_rate": 5.0848708487084875e-05, "loss": 0.001267799362540245, "step": 173230 }, { "epoch": 49.17399943230202, "grad_norm": 0.6491089463233948, "learning_rate": 5.084586999716151e-05, "loss": 0.0007734855636954308, "step": 173240 }, { "epoch": 49.176837922225374, "grad_norm": 1.0804539918899536, "learning_rate": 5.084303150723815e-05, "loss": 0.0012430470436811448, "step": 173250 }, { "epoch": 49.17967641214874, "grad_norm": 0.03199104964733124, "learning_rate": 5.0840193017314786e-05, "loss": 0.0003736786544322968, "step": 173260 }, { "epoch": 49.1825149020721, "grad_norm": 0.2978309392929077, "learning_rate": 5.0837354527391434e-05, "loss": 0.0021051745861768723, "step": 173270 }, { "epoch": 49.185353391995456, "grad_norm": 0.048829466104507446, "learning_rate": 5.083451603746807e-05, "loss": 0.0061610259115695955, "step": 173280 }, { "epoch": 49.18819188191882, "grad_norm": 0.02275141142308712, "learning_rate": 5.083167754754471e-05, "loss": 0.003460334613919258, "step": 173290 }, { "epoch": 49.19103037184218, "grad_norm": 0.2705031633377075, "learning_rate": 5.0828839057621344e-05, "loss": 0.005852743238210678, "step": 173300 }, { "epoch": 49.19386886176554, "grad_norm": 0.1626574546098709, "learning_rate": 5.0826000567697986e-05, "loss": 0.023170028626918793, "step": 173310 }, { "epoch": 49.1967073516889, "grad_norm": 0.10004540532827377, "learning_rate": 5.082316207777462e-05, "loss": 0.010315421223640441, "step": 173320 }, { "epoch": 49.19954584161226, "grad_norm": 0.05717096105217934, "learning_rate": 5.082032358785127e-05, "loss": 0.008979750424623489, "step": 173330 }, { "epoch": 49.202384331535626, "grad_norm": 0.0746074989438057, "learning_rate": 5.081748509792791e-05, "loss": 0.00036601759493350985, "step": 173340 }, { "epoch": 49.20522282145898, "grad_norm": 0.13937285542488098, "learning_rate": 5.0814646608004544e-05, "loss": 0.00044565629214048385, "step": 173350 }, { "epoch": 49.208061311382345, "grad_norm": 0.5778933167457581, "learning_rate": 5.0811808118081186e-05, "loss": 0.0036432269960641863, "step": 173360 }, { "epoch": 49.21089980130571, "grad_norm": 0.5561633706092834, "learning_rate": 5.080896962815782e-05, "loss": 0.003316197544336319, "step": 173370 }, { "epoch": 49.21373829122906, "grad_norm": 0.24399779736995697, "learning_rate": 5.080613113823446e-05, "loss": 0.0013607284054160118, "step": 173380 }, { "epoch": 49.216576781152426, "grad_norm": 0.034732282161712646, "learning_rate": 5.0803292648311096e-05, "loss": 0.0014997366815805435, "step": 173390 }, { "epoch": 49.21941527107579, "grad_norm": 0.002865035319700837, "learning_rate": 5.0800454158387745e-05, "loss": 0.002116484194993973, "step": 173400 }, { "epoch": 49.22225376099915, "grad_norm": 0.011179662309587002, "learning_rate": 5.079761566846438e-05, "loss": 0.0027124907821416853, "step": 173410 }, { "epoch": 49.22509225092251, "grad_norm": 0.6900181174278259, "learning_rate": 5.079477717854102e-05, "loss": 0.0016047455370426177, "step": 173420 }, { "epoch": 49.22793074084587, "grad_norm": 0.22079913318157196, "learning_rate": 5.0791938688617655e-05, "loss": 0.0024692652747035026, "step": 173430 }, { "epoch": 49.23076923076923, "grad_norm": 0.018613586202263832, "learning_rate": 5.0789100198694297e-05, "loss": 0.002295869030058384, "step": 173440 }, { "epoch": 49.23360772069259, "grad_norm": 0.5456721782684326, "learning_rate": 5.078626170877093e-05, "loss": 0.002402189187705517, "step": 173450 }, { "epoch": 49.23644621061595, "grad_norm": 0.8755607604980469, "learning_rate": 5.078342321884757e-05, "loss": 0.0020458757877349853, "step": 173460 }, { "epoch": 49.239284700539315, "grad_norm": 0.01801050454378128, "learning_rate": 5.078058472892422e-05, "loss": 0.003946931660175323, "step": 173470 }, { "epoch": 49.24212319046267, "grad_norm": 2.877392530441284, "learning_rate": 5.0777746239000855e-05, "loss": 0.011103591322898865, "step": 173480 }, { "epoch": 49.244961680386034, "grad_norm": 3.178103446960449, "learning_rate": 5.07749077490775e-05, "loss": 0.008416242897510529, "step": 173490 }, { "epoch": 49.2478001703094, "grad_norm": 0.8898977637290955, "learning_rate": 5.077206925915413e-05, "loss": 0.004933030158281326, "step": 173500 }, { "epoch": 49.2478001703094, "eval_accuracy": 0.9808609397850829, "eval_loss": 0.07547703385353088, "eval_runtime": 46.8945, "eval_samples_per_second": 335.37, "eval_steps_per_second": 5.246, "step": 173500 }, { "epoch": 49.25063866023276, "grad_norm": 0.37797850370407104, "learning_rate": 5.0769230769230766e-05, "loss": 0.00035095755010843276, "step": 173510 }, { "epoch": 49.253477150156115, "grad_norm": 0.22879520058631897, "learning_rate": 5.076639227930741e-05, "loss": 0.00020421911031007767, "step": 173520 }, { "epoch": 49.25631564007948, "grad_norm": 0.011219787411391735, "learning_rate": 5.0763553789384055e-05, "loss": 0.0004050819203257561, "step": 173530 }, { "epoch": 49.25915413000284, "grad_norm": 0.01793583668768406, "learning_rate": 5.076071529946069e-05, "loss": 0.00025122351944446565, "step": 173540 }, { "epoch": 49.2619926199262, "grad_norm": 0.0035673121456056833, "learning_rate": 5.075787680953733e-05, "loss": 0.00037287063896656035, "step": 173550 }, { "epoch": 49.26483110984956, "grad_norm": 0.10243482142686844, "learning_rate": 5.0755038319613966e-05, "loss": 0.0016485050320625306, "step": 173560 }, { "epoch": 49.26766959977292, "grad_norm": 0.019190741702914238, "learning_rate": 5.075219982969061e-05, "loss": 0.0016065247356891633, "step": 173570 }, { "epoch": 49.27050808969628, "grad_norm": 0.09686889499425888, "learning_rate": 5.074936133976724e-05, "loss": 0.006855979561805725, "step": 173580 }, { "epoch": 49.27334657961964, "grad_norm": 0.006627015303820372, "learning_rate": 5.074652284984388e-05, "loss": 0.006238134205341339, "step": 173590 }, { "epoch": 49.276185069543004, "grad_norm": 0.005685140378773212, "learning_rate": 5.074368435992053e-05, "loss": 0.0008075196295976639, "step": 173600 }, { "epoch": 49.27902355946637, "grad_norm": 0.013804112561047077, "learning_rate": 5.0740845869997166e-05, "loss": 0.0003884410485625267, "step": 173610 }, { "epoch": 49.28186204938972, "grad_norm": 3.585510015487671, "learning_rate": 5.073800738007381e-05, "loss": 0.006490004807710647, "step": 173620 }, { "epoch": 49.284700539313086, "grad_norm": 0.07122096419334412, "learning_rate": 5.073516889015044e-05, "loss": 0.008641113340854645, "step": 173630 }, { "epoch": 49.28753902923645, "grad_norm": 0.16109275817871094, "learning_rate": 5.0732330400227077e-05, "loss": 0.005090145766735077, "step": 173640 }, { "epoch": 49.290377519159804, "grad_norm": 0.10976337641477585, "learning_rate": 5.072949191030372e-05, "loss": 0.0010920483618974686, "step": 173650 }, { "epoch": 49.29321600908317, "grad_norm": 0.021452827379107475, "learning_rate": 5.0726653420380366e-05, "loss": 0.0005086841061711312, "step": 173660 }, { "epoch": 49.29605449900653, "grad_norm": 0.08144129812717438, "learning_rate": 5.0723814930457e-05, "loss": 0.0037302657961845397, "step": 173670 }, { "epoch": 49.298892988929886, "grad_norm": 0.9109154939651489, "learning_rate": 5.072097644053364e-05, "loss": 0.00045267120003700256, "step": 173680 }, { "epoch": 49.30173147885325, "grad_norm": 2.0825037956237793, "learning_rate": 5.071813795061028e-05, "loss": 0.004480123519897461, "step": 173690 }, { "epoch": 49.30456996877661, "grad_norm": 0.01335460226982832, "learning_rate": 5.071529946068692e-05, "loss": 0.0007884535938501358, "step": 173700 }, { "epoch": 49.307408458699975, "grad_norm": 0.4388771951198578, "learning_rate": 5.071246097076355e-05, "loss": 0.0002724427729845047, "step": 173710 }, { "epoch": 49.31024694862333, "grad_norm": 0.03757588937878609, "learning_rate": 5.0709622480840194e-05, "loss": 0.000273747555911541, "step": 173720 }, { "epoch": 49.31308543854669, "grad_norm": 0.0856081172823906, "learning_rate": 5.070678399091684e-05, "loss": 0.0005198299884796142, "step": 173730 }, { "epoch": 49.315923928470056, "grad_norm": 0.011354420334100723, "learning_rate": 5.070394550099348e-05, "loss": 0.007217292487621307, "step": 173740 }, { "epoch": 49.31876241839341, "grad_norm": 14.397234916687012, "learning_rate": 5.070110701107011e-05, "loss": 0.010606282949447631, "step": 173750 }, { "epoch": 49.321600908316775, "grad_norm": 0.16175386309623718, "learning_rate": 5.069826852114675e-05, "loss": 0.000547834113240242, "step": 173760 }, { "epoch": 49.32443939824014, "grad_norm": 2.616548776626587, "learning_rate": 5.069543003122339e-05, "loss": 0.0020261913537979125, "step": 173770 }, { "epoch": 49.32727788816349, "grad_norm": 0.20592527091503143, "learning_rate": 5.069259154130003e-05, "loss": 0.001213437505066395, "step": 173780 }, { "epoch": 49.330116378086856, "grad_norm": 0.9774052500724792, "learning_rate": 5.068975305137666e-05, "loss": 0.004590088874101639, "step": 173790 }, { "epoch": 49.33295486801022, "grad_norm": 10.430704116821289, "learning_rate": 5.068691456145331e-05, "loss": 0.0026701465249061584, "step": 173800 }, { "epoch": 49.33579335793358, "grad_norm": 0.30863186717033386, "learning_rate": 5.068407607152995e-05, "loss": 0.0003650711849331856, "step": 173810 }, { "epoch": 49.33863184785694, "grad_norm": 0.015812506899237633, "learning_rate": 5.068123758160659e-05, "loss": 0.0004943538457155228, "step": 173820 }, { "epoch": 49.3414703377803, "grad_norm": 0.20319175720214844, "learning_rate": 5.067839909168323e-05, "loss": 0.0018726848065853119, "step": 173830 }, { "epoch": 49.344308827703664, "grad_norm": 0.03498397022485733, "learning_rate": 5.0675560601759863e-05, "loss": 0.0016406534239649772, "step": 173840 }, { "epoch": 49.34714731762702, "grad_norm": 0.29165568947792053, "learning_rate": 5.0672722111836505e-05, "loss": 0.00043859966099262236, "step": 173850 }, { "epoch": 49.34998580755038, "grad_norm": 0.04021596536040306, "learning_rate": 5.066988362191315e-05, "loss": 9.265337139368058e-05, "step": 173860 }, { "epoch": 49.352824297473745, "grad_norm": 0.0022128077689558268, "learning_rate": 5.066704513198979e-05, "loss": 0.00011959262192249299, "step": 173870 }, { "epoch": 49.35566278739711, "grad_norm": 0.025816572830080986, "learning_rate": 5.066420664206642e-05, "loss": 0.0001500103622674942, "step": 173880 }, { "epoch": 49.358501277320464, "grad_norm": 0.0027741296216845512, "learning_rate": 5.0661368152143064e-05, "loss": 0.00010170452296733856, "step": 173890 }, { "epoch": 49.36133976724383, "grad_norm": 0.02407781593501568, "learning_rate": 5.06585296622197e-05, "loss": 0.0012634983286261559, "step": 173900 }, { "epoch": 49.36417825716719, "grad_norm": 0.0385005883872509, "learning_rate": 5.065569117229634e-05, "loss": 0.0001909114420413971, "step": 173910 }, { "epoch": 49.367016747090545, "grad_norm": 0.007314893417060375, "learning_rate": 5.0652852682372974e-05, "loss": 0.0002542346715927124, "step": 173920 }, { "epoch": 49.36985523701391, "grad_norm": 15.232815742492676, "learning_rate": 5.065001419244962e-05, "loss": 0.017137892544269562, "step": 173930 }, { "epoch": 49.37269372693727, "grad_norm": 0.563315749168396, "learning_rate": 5.0647175702526264e-05, "loss": 0.00025378037244081495, "step": 173940 }, { "epoch": 49.37553221686063, "grad_norm": 0.14993268251419067, "learning_rate": 5.06443372126029e-05, "loss": 0.0005484612658619881, "step": 173950 }, { "epoch": 49.37837070678399, "grad_norm": 7.407670497894287, "learning_rate": 5.064149872267954e-05, "loss": 0.001347305066883564, "step": 173960 }, { "epoch": 49.38120919670735, "grad_norm": 4.9700927734375, "learning_rate": 5.0638660232756174e-05, "loss": 0.00435202457010746, "step": 173970 }, { "epoch": 49.384047686630716, "grad_norm": 17.92072296142578, "learning_rate": 5.063582174283281e-05, "loss": 0.006987900286912918, "step": 173980 }, { "epoch": 49.38688617655407, "grad_norm": 0.7848578691482544, "learning_rate": 5.063298325290945e-05, "loss": 0.0013462882488965988, "step": 173990 }, { "epoch": 49.389724666477434, "grad_norm": 2.1665947437286377, "learning_rate": 5.06301447629861e-05, "loss": 0.000705534964799881, "step": 174000 }, { "epoch": 49.389724666477434, "eval_accuracy": 0.9815603738793158, "eval_loss": 0.07514899969100952, "eval_runtime": 52.6857, "eval_samples_per_second": 298.506, "eval_steps_per_second": 4.669, "step": 174000 }, { "epoch": 49.3925631564008, "grad_norm": 15.421875953674316, "learning_rate": 5.062730627306273e-05, "loss": 0.02188076078891754, "step": 174010 }, { "epoch": 49.39540164632415, "grad_norm": 0.1753579080104828, "learning_rate": 5.0624467783139374e-05, "loss": 0.001162395440042019, "step": 174020 }, { "epoch": 49.398240136247516, "grad_norm": 0.03324950486421585, "learning_rate": 5.062162929321601e-05, "loss": 0.004558925330638885, "step": 174030 }, { "epoch": 49.40107862617088, "grad_norm": 0.019120924174785614, "learning_rate": 5.061879080329265e-05, "loss": 0.0005900561809539795, "step": 174040 }, { "epoch": 49.403917116094235, "grad_norm": 0.007949891500175, "learning_rate": 5.0615952313369285e-05, "loss": 0.0009469294920563698, "step": 174050 }, { "epoch": 49.4067556060176, "grad_norm": 0.06185964494943619, "learning_rate": 5.061311382344593e-05, "loss": 0.0029897572472691535, "step": 174060 }, { "epoch": 49.40959409594096, "grad_norm": 0.02488011121749878, "learning_rate": 5.0610275333522574e-05, "loss": 0.00034469421952962873, "step": 174070 }, { "epoch": 49.41243258586432, "grad_norm": 0.01624230481684208, "learning_rate": 5.060743684359921e-05, "loss": 0.006592446565628051, "step": 174080 }, { "epoch": 49.41527107578768, "grad_norm": 0.01234495174139738, "learning_rate": 5.060459835367585e-05, "loss": 0.00145492572337389, "step": 174090 }, { "epoch": 49.41810956571104, "grad_norm": 0.2703849971294403, "learning_rate": 5.0601759863752485e-05, "loss": 0.0006024504080414772, "step": 174100 }, { "epoch": 49.420948055634405, "grad_norm": 0.002407326828688383, "learning_rate": 5.059892137382912e-05, "loss": 0.00017177257686853408, "step": 174110 }, { "epoch": 49.42378654555776, "grad_norm": 0.005917589645832777, "learning_rate": 5.059608288390576e-05, "loss": 0.0004075814038515091, "step": 174120 }, { "epoch": 49.42662503548112, "grad_norm": 0.03331804275512695, "learning_rate": 5.059324439398241e-05, "loss": 0.00024207774549722672, "step": 174130 }, { "epoch": 49.429463525404486, "grad_norm": 0.010127008892595768, "learning_rate": 5.0590405904059044e-05, "loss": 0.00020511839538812638, "step": 174140 }, { "epoch": 49.43230201532784, "grad_norm": 0.5246769189834595, "learning_rate": 5.0587567414135685e-05, "loss": 0.002251950092613697, "step": 174150 }, { "epoch": 49.435140505251205, "grad_norm": 0.10512787103652954, "learning_rate": 5.058472892421232e-05, "loss": 0.0005064113065600395, "step": 174160 }, { "epoch": 49.43797899517457, "grad_norm": 0.021720435470342636, "learning_rate": 5.058189043428896e-05, "loss": 0.0004912951961159706, "step": 174170 }, { "epoch": 49.44081748509793, "grad_norm": 0.013641199097037315, "learning_rate": 5.0579051944365596e-05, "loss": 0.0003964968025684357, "step": 174180 }, { "epoch": 49.44365597502129, "grad_norm": 0.48330914974212646, "learning_rate": 5.057621345444224e-05, "loss": 0.001940724439918995, "step": 174190 }, { "epoch": 49.44649446494465, "grad_norm": 0.019834591075778008, "learning_rate": 5.0573374964518885e-05, "loss": 0.002898728288710117, "step": 174200 }, { "epoch": 49.44933295486801, "grad_norm": 0.0712965652346611, "learning_rate": 5.057053647459552e-05, "loss": 0.00016094241291284562, "step": 174210 }, { "epoch": 49.45217144479137, "grad_norm": 0.04110801964998245, "learning_rate": 5.0567697984672154e-05, "loss": 0.0004387697204947472, "step": 174220 }, { "epoch": 49.45500993471473, "grad_norm": 0.5176910161972046, "learning_rate": 5.0564859494748796e-05, "loss": 0.0002965565770864487, "step": 174230 }, { "epoch": 49.457848424638094, "grad_norm": 0.23943549394607544, "learning_rate": 5.056202100482543e-05, "loss": 0.00024407319724559784, "step": 174240 }, { "epoch": 49.46068691456146, "grad_norm": 2.170217275619507, "learning_rate": 5.055918251490207e-05, "loss": 0.0005347970873117447, "step": 174250 }, { "epoch": 49.46352540448481, "grad_norm": 0.3291780650615692, "learning_rate": 5.055634402497872e-05, "loss": 0.0011090101674199105, "step": 174260 }, { "epoch": 49.466363894408175, "grad_norm": 0.07212573289871216, "learning_rate": 5.0553505535055354e-05, "loss": 0.0009600302204489708, "step": 174270 }, { "epoch": 49.46920238433154, "grad_norm": 0.10937491804361343, "learning_rate": 5.0550667045131996e-05, "loss": 0.001861206814646721, "step": 174280 }, { "epoch": 49.472040874254894, "grad_norm": 0.10568354278802872, "learning_rate": 5.054782855520863e-05, "loss": 0.0003573289141058922, "step": 174290 }, { "epoch": 49.47487936417826, "grad_norm": 0.021108120679855347, "learning_rate": 5.054499006528527e-05, "loss": 0.002158614806830883, "step": 174300 }, { "epoch": 49.47771785410162, "grad_norm": 1.831330418586731, "learning_rate": 5.0542151575361906e-05, "loss": 0.0011845823377370835, "step": 174310 }, { "epoch": 49.480556344024976, "grad_norm": 0.25577807426452637, "learning_rate": 5.053931308543854e-05, "loss": 0.0019349796697497369, "step": 174320 }, { "epoch": 49.48339483394834, "grad_norm": 0.19844917953014374, "learning_rate": 5.0536474595515196e-05, "loss": 0.0053324513137340546, "step": 174330 }, { "epoch": 49.4862333238717, "grad_norm": 0.1396484375, "learning_rate": 5.053363610559183e-05, "loss": 0.0010088635608553886, "step": 174340 }, { "epoch": 49.489071813795064, "grad_norm": 0.040133554488420486, "learning_rate": 5.0530797615668465e-05, "loss": 0.00035756342113018035, "step": 174350 }, { "epoch": 49.49191030371842, "grad_norm": 0.02061900496482849, "learning_rate": 5.0527959125745106e-05, "loss": 0.0005216805264353752, "step": 174360 }, { "epoch": 49.49474879364178, "grad_norm": 0.6208645701408386, "learning_rate": 5.052512063582174e-05, "loss": 0.004311104491353035, "step": 174370 }, { "epoch": 49.497587283565146, "grad_norm": 0.4833066165447235, "learning_rate": 5.052228214589838e-05, "loss": 0.004930291324853897, "step": 174380 }, { "epoch": 49.5004257734885, "grad_norm": 0.7171051502227783, "learning_rate": 5.051944365597502e-05, "loss": 0.0007763464003801346, "step": 174390 }, { "epoch": 49.503264263411864, "grad_norm": 0.015144539065659046, "learning_rate": 5.0516605166051665e-05, "loss": 0.0030331362038850786, "step": 174400 }, { "epoch": 49.50610275333523, "grad_norm": 0.04924998804926872, "learning_rate": 5.0513766676128307e-05, "loss": 0.00020491592586040496, "step": 174410 }, { "epoch": 49.50894124325858, "grad_norm": 4.1399312019348145, "learning_rate": 5.051092818620494e-05, "loss": 0.0027459193021059035, "step": 174420 }, { "epoch": 49.511779733181946, "grad_norm": 0.05186019092798233, "learning_rate": 5.050808969628158e-05, "loss": 0.00500379204750061, "step": 174430 }, { "epoch": 49.51461822310531, "grad_norm": 0.08035076409578323, "learning_rate": 5.050525120635822e-05, "loss": 0.0003254154697060585, "step": 174440 }, { "epoch": 49.51745671302867, "grad_norm": 0.008256973698735237, "learning_rate": 5.050241271643485e-05, "loss": 0.002218375913798809, "step": 174450 }, { "epoch": 49.52029520295203, "grad_norm": 0.0547168031334877, "learning_rate": 5.049957422651151e-05, "loss": 0.0015186768025159836, "step": 174460 }, { "epoch": 49.52313369287539, "grad_norm": 6.839453220367432, "learning_rate": 5.049673573658814e-05, "loss": 0.003158114477992058, "step": 174470 }, { "epoch": 49.52597218279875, "grad_norm": 0.006458666175603867, "learning_rate": 5.0493897246664776e-05, "loss": 0.001749158278107643, "step": 174480 }, { "epoch": 49.52881067272211, "grad_norm": 0.9295017719268799, "learning_rate": 5.049105875674142e-05, "loss": 0.0007466824725270271, "step": 174490 }, { "epoch": 49.53164916264547, "grad_norm": 0.0035789664834737778, "learning_rate": 5.048822026681805e-05, "loss": 0.0028571881353855132, "step": 174500 }, { "epoch": 49.53164916264547, "eval_accuracy": 0.9811152794557131, "eval_loss": 0.0725000724196434, "eval_runtime": 40.7545, "eval_samples_per_second": 385.896, "eval_steps_per_second": 6.036, "step": 174500 }, { "epoch": 49.534487652568835, "grad_norm": 1.3997513055801392, "learning_rate": 5.048538177689469e-05, "loss": 0.0004405876621603966, "step": 174510 }, { "epoch": 49.53732614249219, "grad_norm": 0.022183090448379517, "learning_rate": 5.048254328697133e-05, "loss": 0.0006896954029798508, "step": 174520 }, { "epoch": 49.540164632415554, "grad_norm": 0.1419254094362259, "learning_rate": 5.0479704797047976e-05, "loss": 0.00016508195549249649, "step": 174530 }, { "epoch": 49.54300312233892, "grad_norm": 0.6777351498603821, "learning_rate": 5.047686630712462e-05, "loss": 0.0007809290662407875, "step": 174540 }, { "epoch": 49.54584161226228, "grad_norm": 0.01918463408946991, "learning_rate": 5.047402781720125e-05, "loss": 0.0005490638315677643, "step": 174550 }, { "epoch": 49.548680102185635, "grad_norm": 1.310707449913025, "learning_rate": 5.047118932727789e-05, "loss": 0.002290942333638668, "step": 174560 }, { "epoch": 49.551518592109, "grad_norm": 0.09275130182504654, "learning_rate": 5.046835083735453e-05, "loss": 0.017001134157180787, "step": 174570 }, { "epoch": 49.55435708203236, "grad_norm": 1.2076565027236938, "learning_rate": 5.046551234743116e-05, "loss": 0.0016177715733647346, "step": 174580 }, { "epoch": 49.55719557195572, "grad_norm": 0.011864629574120045, "learning_rate": 5.0462673857507804e-05, "loss": 0.0022129403427243234, "step": 174590 }, { "epoch": 49.56003406187908, "grad_norm": 0.006723410915583372, "learning_rate": 5.045983536758445e-05, "loss": 0.0042122513055801395, "step": 174600 }, { "epoch": 49.56287255180244, "grad_norm": 0.00822402909398079, "learning_rate": 5.045699687766109e-05, "loss": 0.004742847383022308, "step": 174610 }, { "epoch": 49.565711041725805, "grad_norm": 0.00976292509585619, "learning_rate": 5.045415838773773e-05, "loss": 0.002782423235476017, "step": 174620 }, { "epoch": 49.56854953164916, "grad_norm": 0.9308652281761169, "learning_rate": 5.045131989781436e-05, "loss": 0.0004125332459807396, "step": 174630 }, { "epoch": 49.571388021572524, "grad_norm": 0.10618901252746582, "learning_rate": 5.0448481407891004e-05, "loss": 0.0027801338583230972, "step": 174640 }, { "epoch": 49.57422651149589, "grad_norm": 0.011702622286975384, "learning_rate": 5.044564291796764e-05, "loss": 0.0012946601957082749, "step": 174650 }, { "epoch": 49.57706500141924, "grad_norm": 0.003512283554300666, "learning_rate": 5.044280442804429e-05, "loss": 0.003310385346412659, "step": 174660 }, { "epoch": 49.579903491342606, "grad_norm": 1.276296615600586, "learning_rate": 5.043996593812093e-05, "loss": 0.0010142428800463676, "step": 174670 }, { "epoch": 49.58274198126597, "grad_norm": 0.0031334927771240473, "learning_rate": 5.043712744819756e-05, "loss": 0.001242898590862751, "step": 174680 }, { "epoch": 49.585580471189324, "grad_norm": 0.008817499503493309, "learning_rate": 5.04342889582742e-05, "loss": 0.00045943781733512876, "step": 174690 }, { "epoch": 49.58841896111269, "grad_norm": 0.09783212095499039, "learning_rate": 5.043145046835084e-05, "loss": 0.0007568670436739922, "step": 174700 }, { "epoch": 49.59125745103605, "grad_norm": 0.009235050529241562, "learning_rate": 5.042861197842747e-05, "loss": 0.0031763941049575807, "step": 174710 }, { "epoch": 49.59409594095941, "grad_norm": 0.013328253291547298, "learning_rate": 5.0425773488504115e-05, "loss": 0.003067890927195549, "step": 174720 }, { "epoch": 49.59693443088277, "grad_norm": 0.0122997360303998, "learning_rate": 5.042293499858076e-05, "loss": 0.0003880130127072334, "step": 174730 }, { "epoch": 49.59977292080613, "grad_norm": 0.02036145143210888, "learning_rate": 5.04200965086574e-05, "loss": 0.0014973679557442666, "step": 174740 }, { "epoch": 49.602611410729494, "grad_norm": 0.04777945578098297, "learning_rate": 5.041725801873404e-05, "loss": 0.00023764893412590027, "step": 174750 }, { "epoch": 49.60544990065285, "grad_norm": 0.013457502238452435, "learning_rate": 5.0414419528810673e-05, "loss": 0.0004305262118577957, "step": 174760 }, { "epoch": 49.60828839057621, "grad_norm": 7.974243640899658, "learning_rate": 5.0411581038887315e-05, "loss": 0.0019852017983794214, "step": 174770 }, { "epoch": 49.611126880499576, "grad_norm": 0.030172966420650482, "learning_rate": 5.040874254896395e-05, "loss": 0.0007038218900561333, "step": 174780 }, { "epoch": 49.61396537042293, "grad_norm": 0.6499804258346558, "learning_rate": 5.0405904059040584e-05, "loss": 0.0005193358287215233, "step": 174790 }, { "epoch": 49.616803860346295, "grad_norm": 0.39265960454940796, "learning_rate": 5.040306556911724e-05, "loss": 0.0014462962746620179, "step": 174800 }, { "epoch": 49.61964235026966, "grad_norm": 0.053226128220558167, "learning_rate": 5.0400227079193873e-05, "loss": 0.00033442433923482897, "step": 174810 }, { "epoch": 49.62248084019302, "grad_norm": 0.015348297543823719, "learning_rate": 5.039738858927051e-05, "loss": 0.002773045748472214, "step": 174820 }, { "epoch": 49.625319330116376, "grad_norm": 0.07274086773395538, "learning_rate": 5.039455009934715e-05, "loss": 0.006864999234676361, "step": 174830 }, { "epoch": 49.62815782003974, "grad_norm": 0.14690639078617096, "learning_rate": 5.0391711609423784e-05, "loss": 0.007312789559364319, "step": 174840 }, { "epoch": 49.6309963099631, "grad_norm": 0.06017298251390457, "learning_rate": 5.0388873119500425e-05, "loss": 0.0005053164437413216, "step": 174850 }, { "epoch": 49.63383479988646, "grad_norm": 0.0074120392091572285, "learning_rate": 5.0386034629577074e-05, "loss": 0.002776567451655865, "step": 174860 }, { "epoch": 49.63667328980982, "grad_norm": 0.35902661085128784, "learning_rate": 5.038319613965371e-05, "loss": 0.0018462469801306725, "step": 174870 }, { "epoch": 49.63951177973318, "grad_norm": 0.14161162078380585, "learning_rate": 5.038035764973035e-05, "loss": 0.000286213681101799, "step": 174880 }, { "epoch": 49.64235026965654, "grad_norm": 0.03229610621929169, "learning_rate": 5.0377519159806984e-05, "loss": 0.003970592468976975, "step": 174890 }, { "epoch": 49.6451887595799, "grad_norm": 0.043440498411655426, "learning_rate": 5.0374680669883626e-05, "loss": 0.0006732828915119171, "step": 174900 }, { "epoch": 49.648027249503265, "grad_norm": 0.3630772829055786, "learning_rate": 5.037184217996026e-05, "loss": 0.00042467769235372544, "step": 174910 }, { "epoch": 49.65086573942663, "grad_norm": 0.004694806411862373, "learning_rate": 5.0369003690036895e-05, "loss": 0.00038890484720468523, "step": 174920 }, { "epoch": 49.653704229349984, "grad_norm": 0.0025243910495191813, "learning_rate": 5.036616520011355e-05, "loss": 0.001160096935927868, "step": 174930 }, { "epoch": 49.65654271927335, "grad_norm": 0.19320717453956604, "learning_rate": 5.0363326710190184e-05, "loss": 0.002753636613488197, "step": 174940 }, { "epoch": 49.65938120919671, "grad_norm": 0.08788640052080154, "learning_rate": 5.036048822026682e-05, "loss": 0.00031668003648519514, "step": 174950 }, { "epoch": 49.662219699120065, "grad_norm": 0.30486372113227844, "learning_rate": 5.035764973034346e-05, "loss": 0.007881902158260345, "step": 174960 }, { "epoch": 49.66505818904343, "grad_norm": 0.4249066412448883, "learning_rate": 5.0354811240420095e-05, "loss": 0.0007943680509924888, "step": 174970 }, { "epoch": 49.66789667896679, "grad_norm": 0.010790211148560047, "learning_rate": 5.0351972750496736e-05, "loss": 0.00198532547801733, "step": 174980 }, { "epoch": 49.67073516889015, "grad_norm": 3.4351634979248047, "learning_rate": 5.0349134260573384e-05, "loss": 0.0030405210331082345, "step": 174990 }, { "epoch": 49.67357365881351, "grad_norm": 0.05058572441339493, "learning_rate": 5.034629577065002e-05, "loss": 0.0006678413599729538, "step": 175000 }, { "epoch": 49.67357365881351, "eval_accuracy": 0.9809881096203981, "eval_loss": 0.075393907725811, "eval_runtime": 45.1142, "eval_samples_per_second": 348.604, "eval_steps_per_second": 5.453, "step": 175000 }, { "epoch": 49.67641214873687, "grad_norm": 0.5777895450592041, "learning_rate": 5.034345728072666e-05, "loss": 0.002501225657761097, "step": 175010 }, { "epoch": 49.679250638660235, "grad_norm": 0.18316030502319336, "learning_rate": 5.0340618790803295e-05, "loss": 0.0004713933914899826, "step": 175020 }, { "epoch": 49.68208912858359, "grad_norm": 0.17862114310264587, "learning_rate": 5.0337780300879936e-05, "loss": 0.0019603189080953597, "step": 175030 }, { "epoch": 49.684927618506954, "grad_norm": 0.0087740458548069, "learning_rate": 5.033494181095657e-05, "loss": 0.0014011232182383538, "step": 175040 }, { "epoch": 49.68776610843032, "grad_norm": 0.02259809710085392, "learning_rate": 5.0332103321033205e-05, "loss": 0.0006428014487028122, "step": 175050 }, { "epoch": 49.69060459835367, "grad_norm": 1.1071306467056274, "learning_rate": 5.0329264831109854e-05, "loss": 0.0007984979078173637, "step": 175060 }, { "epoch": 49.693443088277036, "grad_norm": 0.19482627511024475, "learning_rate": 5.0326426341186495e-05, "loss": 0.0019625917077064512, "step": 175070 }, { "epoch": 49.6962815782004, "grad_norm": 3.4788103103637695, "learning_rate": 5.032358785126313e-05, "loss": 0.0028288891538977624, "step": 175080 }, { "epoch": 49.69912006812376, "grad_norm": 0.5496608018875122, "learning_rate": 5.032074936133977e-05, "loss": 0.0010878808796405792, "step": 175090 }, { "epoch": 49.70195855804712, "grad_norm": 0.019493376836180687, "learning_rate": 5.0317910871416406e-05, "loss": 0.010831978917121888, "step": 175100 }, { "epoch": 49.70479704797048, "grad_norm": 0.48662394285202026, "learning_rate": 5.031507238149305e-05, "loss": 0.0010795660316944122, "step": 175110 }, { "epoch": 49.70763553789384, "grad_norm": 0.0019725742749869823, "learning_rate": 5.031223389156968e-05, "loss": 0.0008565885946154594, "step": 175120 }, { "epoch": 49.7104740278172, "grad_norm": 0.3011181056499481, "learning_rate": 5.030939540164633e-05, "loss": 0.010952278971672058, "step": 175130 }, { "epoch": 49.71331251774056, "grad_norm": 0.01567448116838932, "learning_rate": 5.030655691172297e-05, "loss": 0.0001947028562426567, "step": 175140 }, { "epoch": 49.716151007663925, "grad_norm": 0.44039568305015564, "learning_rate": 5.0303718421799606e-05, "loss": 0.0007317773997783661, "step": 175150 }, { "epoch": 49.71898949758728, "grad_norm": 12.531011581420898, "learning_rate": 5.030087993187624e-05, "loss": 0.0035062361508607864, "step": 175160 }, { "epoch": 49.72182798751064, "grad_norm": 2.723846673965454, "learning_rate": 5.029804144195288e-05, "loss": 0.0010469544678926468, "step": 175170 }, { "epoch": 49.724666477434006, "grad_norm": 0.005333107430487871, "learning_rate": 5.0295202952029516e-05, "loss": 0.0026531051844358444, "step": 175180 }, { "epoch": 49.72750496735737, "grad_norm": 0.021984033286571503, "learning_rate": 5.0292364462106164e-05, "loss": 0.0016447167843580246, "step": 175190 }, { "epoch": 49.730343457280725, "grad_norm": 0.020666781812906265, "learning_rate": 5.0289525972182806e-05, "loss": 0.0009647879749536514, "step": 175200 }, { "epoch": 49.73318194720409, "grad_norm": 0.11782089620828629, "learning_rate": 5.028668748225944e-05, "loss": 0.0076807476580142975, "step": 175210 }, { "epoch": 49.73602043712745, "grad_norm": 0.2571037709712982, "learning_rate": 5.028384899233608e-05, "loss": 0.00029055345803499224, "step": 175220 }, { "epoch": 49.738858927050806, "grad_norm": 0.6008956432342529, "learning_rate": 5.0281010502412716e-05, "loss": 0.008499915152788163, "step": 175230 }, { "epoch": 49.74169741697417, "grad_norm": 0.00963953323662281, "learning_rate": 5.027817201248936e-05, "loss": 0.00031006988137960434, "step": 175240 }, { "epoch": 49.74453590689753, "grad_norm": 0.047423671931028366, "learning_rate": 5.027533352256599e-05, "loss": 0.012643775343894959, "step": 175250 }, { "epoch": 49.74737439682089, "grad_norm": 0.578525960445404, "learning_rate": 5.027249503264264e-05, "loss": 0.0007793739438056946, "step": 175260 }, { "epoch": 49.75021288674425, "grad_norm": 0.004556373227387667, "learning_rate": 5.026965654271928e-05, "loss": 0.0004154402762651443, "step": 175270 }, { "epoch": 49.753051376667614, "grad_norm": 0.005190047435462475, "learning_rate": 5.0266818052795916e-05, "loss": 0.0004318591207265854, "step": 175280 }, { "epoch": 49.75588986659098, "grad_norm": 1.1151281595230103, "learning_rate": 5.026397956287255e-05, "loss": 0.004777891933918, "step": 175290 }, { "epoch": 49.75872835651433, "grad_norm": 0.022117571905255318, "learning_rate": 5.026114107294919e-05, "loss": 0.0004674486815929413, "step": 175300 }, { "epoch": 49.761566846437695, "grad_norm": 0.0306618083268404, "learning_rate": 5.025830258302583e-05, "loss": 0.0014106450602412225, "step": 175310 }, { "epoch": 49.76440533636106, "grad_norm": 0.012254735454916954, "learning_rate": 5.025546409310247e-05, "loss": 0.00018807705491781236, "step": 175320 }, { "epoch": 49.767243826284414, "grad_norm": 0.055610958486795425, "learning_rate": 5.0252625603179117e-05, "loss": 0.0005584711208939553, "step": 175330 }, { "epoch": 49.77008231620778, "grad_norm": 0.2369375228881836, "learning_rate": 5.024978711325575e-05, "loss": 0.0007307246327400207, "step": 175340 }, { "epoch": 49.77292080613114, "grad_norm": 0.010055062361061573, "learning_rate": 5.024694862333239e-05, "loss": 0.00022206753492355348, "step": 175350 }, { "epoch": 49.7757592960545, "grad_norm": 0.05399242043495178, "learning_rate": 5.024439398240136e-05, "loss": 0.007264809310436248, "step": 175360 }, { "epoch": 49.77859778597786, "grad_norm": 16.82171058654785, "learning_rate": 5.024155549247801e-05, "loss": 0.008378252387046814, "step": 175370 }, { "epoch": 49.78143627590122, "grad_norm": 0.004002092406153679, "learning_rate": 5.023871700255465e-05, "loss": 0.00016614962369203566, "step": 175380 }, { "epoch": 49.784274765824584, "grad_norm": 0.14674746990203857, "learning_rate": 5.023587851263128e-05, "loss": 0.001006784662604332, "step": 175390 }, { "epoch": 49.78711325574794, "grad_norm": 0.04377254098653793, "learning_rate": 5.0233040022707924e-05, "loss": 0.0013438863679766655, "step": 175400 }, { "epoch": 49.7899517456713, "grad_norm": 0.19128426909446716, "learning_rate": 5.023020153278456e-05, "loss": 0.000872323289513588, "step": 175410 }, { "epoch": 49.792790235594666, "grad_norm": 0.4067605137825012, "learning_rate": 5.02273630428612e-05, "loss": 0.0003910517320036888, "step": 175420 }, { "epoch": 49.79562872551802, "grad_norm": 0.020119091495871544, "learning_rate": 5.0224524552937835e-05, "loss": 0.0022533327341079713, "step": 175430 }, { "epoch": 49.798467215441384, "grad_norm": 0.03866219148039818, "learning_rate": 5.022168606301448e-05, "loss": 0.00034465789794921873, "step": 175440 }, { "epoch": 49.80130570536475, "grad_norm": 0.3347543478012085, "learning_rate": 5.0218847573091125e-05, "loss": 0.0009799083694815637, "step": 175450 }, { "epoch": 49.80414419528811, "grad_norm": 3.754640817642212, "learning_rate": 5.021600908316776e-05, "loss": 0.0009435923770070076, "step": 175460 }, { "epoch": 49.806982685211466, "grad_norm": 0.01881418749690056, "learning_rate": 5.0213170593244394e-05, "loss": 0.0003478165715932846, "step": 175470 }, { "epoch": 49.80982117513483, "grad_norm": 0.037694916129112244, "learning_rate": 5.0210332103321035e-05, "loss": 0.001637822948396206, "step": 175480 }, { "epoch": 49.81265966505819, "grad_norm": 0.031218569725751877, "learning_rate": 5.020749361339767e-05, "loss": 0.0017028195783495904, "step": 175490 }, { "epoch": 49.81549815498155, "grad_norm": 0.026324359700083733, "learning_rate": 5.020465512347431e-05, "loss": 0.0052852541208267215, "step": 175500 }, { "epoch": 49.81549815498155, "eval_accuracy": 0.9799707509378776, "eval_loss": 0.08514831215143204, "eval_runtime": 44.6544, "eval_samples_per_second": 352.194, "eval_steps_per_second": 5.509, "step": 175500 }, { "epoch": 49.81833664490491, "grad_norm": 0.1947772055864334, "learning_rate": 5.020181663355096e-05, "loss": 0.0033474985510110857, "step": 175510 }, { "epoch": 49.82117513482827, "grad_norm": 0.10537213087081909, "learning_rate": 5.0198978143627594e-05, "loss": 0.005785617232322693, "step": 175520 }, { "epoch": 49.82401362475163, "grad_norm": 0.03716200590133667, "learning_rate": 5.0196139653704235e-05, "loss": 0.004064638912677765, "step": 175530 }, { "epoch": 49.82685211467499, "grad_norm": 10.257777214050293, "learning_rate": 5.01935850127732e-05, "loss": 0.02134166657924652, "step": 175540 }, { "epoch": 49.829690604598355, "grad_norm": 0.007998460903763771, "learning_rate": 5.019074652284984e-05, "loss": 0.004863967001438141, "step": 175550 }, { "epoch": 49.83252909452172, "grad_norm": 3.5787527561187744, "learning_rate": 5.018790803292649e-05, "loss": 0.004025820642709732, "step": 175560 }, { "epoch": 49.83536758444507, "grad_norm": 0.18284490704536438, "learning_rate": 5.0185069543003126e-05, "loss": 0.0033828780055046082, "step": 175570 }, { "epoch": 49.838206074368436, "grad_norm": 1.0677744150161743, "learning_rate": 5.018223105307977e-05, "loss": 0.009861975908279419, "step": 175580 }, { "epoch": 49.8410445642918, "grad_norm": 0.24271273612976074, "learning_rate": 5.01793925631564e-05, "loss": 0.00030893702059984205, "step": 175590 }, { "epoch": 49.843883054215155, "grad_norm": 0.15560871362686157, "learning_rate": 5.017655407323304e-05, "loss": 0.0002342868596315384, "step": 175600 }, { "epoch": 49.84672154413852, "grad_norm": 0.007093563675880432, "learning_rate": 5.017371558330968e-05, "loss": 0.0005103804171085358, "step": 175610 }, { "epoch": 49.84956003406188, "grad_norm": 0.3513840138912201, "learning_rate": 5.0170877093386326e-05, "loss": 0.0008102435618638993, "step": 175620 }, { "epoch": 49.85239852398524, "grad_norm": 0.06051476299762726, "learning_rate": 5.016803860346296e-05, "loss": 0.001005120761692524, "step": 175630 }, { "epoch": 49.8552370139086, "grad_norm": 0.001116464613005519, "learning_rate": 5.01652001135396e-05, "loss": 0.0006913064047694206, "step": 175640 }, { "epoch": 49.85807550383196, "grad_norm": 0.11276336759328842, "learning_rate": 5.0162361623616236e-05, "loss": 0.0005567248910665513, "step": 175650 }, { "epoch": 49.860913993755325, "grad_norm": 1.088966965675354, "learning_rate": 5.015952313369288e-05, "loss": 0.0016041632741689682, "step": 175660 }, { "epoch": 49.86375248367868, "grad_norm": 0.001400827313773334, "learning_rate": 5.015668464376951e-05, "loss": 0.0006260914728045463, "step": 175670 }, { "epoch": 49.866590973602044, "grad_norm": 0.03844325989484787, "learning_rate": 5.0153846153846154e-05, "loss": 0.0009424000978469849, "step": 175680 }, { "epoch": 49.86942946352541, "grad_norm": 0.24292278289794922, "learning_rate": 5.01510076639228e-05, "loss": 0.0027101315557956696, "step": 175690 }, { "epoch": 49.87226795344876, "grad_norm": 0.11953822523355484, "learning_rate": 5.0148169173999436e-05, "loss": 0.003956707939505577, "step": 175700 }, { "epoch": 49.875106443372125, "grad_norm": 0.12276313453912735, "learning_rate": 5.014533068407608e-05, "loss": 0.0010580992326140403, "step": 175710 }, { "epoch": 49.87794493329549, "grad_norm": 0.4888617694377899, "learning_rate": 5.014249219415271e-05, "loss": 0.0002568969503045082, "step": 175720 }, { "epoch": 49.880783423218844, "grad_norm": 0.021438568830490112, "learning_rate": 5.013965370422935e-05, "loss": 0.0002265915274620056, "step": 175730 }, { "epoch": 49.88362191314221, "grad_norm": 0.018035532906651497, "learning_rate": 5.013681521430599e-05, "loss": 0.0006949290633201599, "step": 175740 }, { "epoch": 49.88646040306557, "grad_norm": 2.36773681640625, "learning_rate": 5.0133976724382637e-05, "loss": 0.002191770076751709, "step": 175750 }, { "epoch": 49.88929889298893, "grad_norm": 0.022713199257850647, "learning_rate": 5.013113823445927e-05, "loss": 0.0006339691579341888, "step": 175760 }, { "epoch": 49.89213738291229, "grad_norm": 0.03646502643823624, "learning_rate": 5.012829974453591e-05, "loss": 0.001051345467567444, "step": 175770 }, { "epoch": 49.89497587283565, "grad_norm": 0.05966269224882126, "learning_rate": 5.012546125461255e-05, "loss": 0.00156708974391222, "step": 175780 }, { "epoch": 49.897814362759014, "grad_norm": 0.010498174466192722, "learning_rate": 5.012262276468919e-05, "loss": 0.001789357326924801, "step": 175790 }, { "epoch": 49.90065285268237, "grad_norm": 0.1464214026927948, "learning_rate": 5.011978427476582e-05, "loss": 0.004627253860235214, "step": 175800 }, { "epoch": 49.90349134260573, "grad_norm": 0.031834784895181656, "learning_rate": 5.0116945784842464e-05, "loss": 0.006738530099391937, "step": 175810 }, { "epoch": 49.906329832529096, "grad_norm": 0.7251991629600525, "learning_rate": 5.011410729491911e-05, "loss": 0.0003935448825359344, "step": 175820 }, { "epoch": 49.90916832245246, "grad_norm": 0.10335340350866318, "learning_rate": 5.011126880499575e-05, "loss": 0.0011290784925222397, "step": 175830 }, { "epoch": 49.912006812375814, "grad_norm": 0.03564690798521042, "learning_rate": 5.010843031507239e-05, "loss": 0.0026142418384552, "step": 175840 }, { "epoch": 49.91484530229918, "grad_norm": 0.06710988283157349, "learning_rate": 5.010559182514902e-05, "loss": 0.0013648532330989838, "step": 175850 }, { "epoch": 49.91768379222254, "grad_norm": 0.0062402174808084965, "learning_rate": 5.010275333522566e-05, "loss": 0.008835308253765106, "step": 175860 }, { "epoch": 49.920522282145896, "grad_norm": 0.038856081664562225, "learning_rate": 5.00999148453023e-05, "loss": 0.003369591385126114, "step": 175870 }, { "epoch": 49.92336077206926, "grad_norm": 0.3476046323776245, "learning_rate": 5.0097076355378934e-05, "loss": 0.001714903675019741, "step": 175880 }, { "epoch": 49.92619926199262, "grad_norm": 0.012492208741605282, "learning_rate": 5.009423786545558e-05, "loss": 0.0050074361264705654, "step": 175890 }, { "epoch": 49.92903775191598, "grad_norm": 0.004202702082693577, "learning_rate": 5.009139937553222e-05, "loss": 0.0061509557068347934, "step": 175900 }, { "epoch": 49.93187624183934, "grad_norm": 0.8320783972740173, "learning_rate": 5.008856088560886e-05, "loss": 0.0013588165864348411, "step": 175910 }, { "epoch": 49.9347147317627, "grad_norm": 0.6849009990692139, "learning_rate": 5.00857223956855e-05, "loss": 0.0008055375888943672, "step": 175920 }, { "epoch": 49.937553221686066, "grad_norm": 0.1021001935005188, "learning_rate": 5.0082883905762134e-05, "loss": 0.0016340840607881545, "step": 175930 }, { "epoch": 49.94039171160942, "grad_norm": 0.3413701355457306, "learning_rate": 5.0080045415838775e-05, "loss": 0.0028732486069202425, "step": 175940 }, { "epoch": 49.943230201532785, "grad_norm": 0.2315894514322281, "learning_rate": 5.0077206925915423e-05, "loss": 0.0004743743687868118, "step": 175950 }, { "epoch": 49.94606869145615, "grad_norm": 0.35282427072525024, "learning_rate": 5.007436843599206e-05, "loss": 0.005572152882814407, "step": 175960 }, { "epoch": 49.948907181379504, "grad_norm": 0.06680784374475479, "learning_rate": 5.007152994606869e-05, "loss": 0.0017559453845024109, "step": 175970 }, { "epoch": 49.95174567130287, "grad_norm": 0.2892598807811737, "learning_rate": 5.0068691456145334e-05, "loss": 0.0002549074590206146, "step": 175980 }, { "epoch": 49.95458416122623, "grad_norm": 0.051638249307870865, "learning_rate": 5.006585296622197e-05, "loss": 0.0016755053773522377, "step": 175990 }, { "epoch": 49.957422651149585, "grad_norm": 0.9715096950531006, "learning_rate": 5.006301447629861e-05, "loss": 0.002749914862215519, "step": 176000 }, { "epoch": 49.957422651149585, "eval_accuracy": 0.9802886755261652, "eval_loss": 0.0789952501654625, "eval_runtime": 37.3382, "eval_samples_per_second": 421.204, "eval_steps_per_second": 6.588, "step": 176000 }, { "epoch": 49.96026114107295, "grad_norm": 0.3057706952095032, "learning_rate": 5.0060175986375245e-05, "loss": 0.018963524699211122, "step": 176010 }, { "epoch": 49.96309963099631, "grad_norm": 4.110438823699951, "learning_rate": 5.005733749645189e-05, "loss": 0.005212287977337837, "step": 176020 }, { "epoch": 49.965938120919674, "grad_norm": 0.0535208098590374, "learning_rate": 5.0054499006528534e-05, "loss": 0.0059766341000795364, "step": 176030 }, { "epoch": 49.96877661084303, "grad_norm": 0.4122450649738312, "learning_rate": 5.005166051660517e-05, "loss": 0.0020861780270934106, "step": 176040 }, { "epoch": 49.97161510076639, "grad_norm": 0.15241412818431854, "learning_rate": 5.004882202668181e-05, "loss": 0.016507193446159363, "step": 176050 }, { "epoch": 49.974453590689755, "grad_norm": 1.0079600811004639, "learning_rate": 5.0045983536758445e-05, "loss": 0.0016788695007562636, "step": 176060 }, { "epoch": 49.97729208061311, "grad_norm": 0.040756706148386, "learning_rate": 5.0043145046835086e-05, "loss": 0.017596060037612916, "step": 176070 }, { "epoch": 49.980130570536474, "grad_norm": 1.0445901155471802, "learning_rate": 5.004030655691172e-05, "loss": 0.008661928027868271, "step": 176080 }, { "epoch": 49.98296906045984, "grad_norm": 0.049785763025283813, "learning_rate": 5.003746806698837e-05, "loss": 0.004804645478725433, "step": 176090 }, { "epoch": 49.98580755038319, "grad_norm": 1.161075472831726, "learning_rate": 5.0034629577065e-05, "loss": 0.004342104494571686, "step": 176100 }, { "epoch": 49.988646040306556, "grad_norm": 0.13195335865020752, "learning_rate": 5.0031791087141645e-05, "loss": 0.0061186693608760835, "step": 176110 }, { "epoch": 49.99148453022992, "grad_norm": 0.3353036046028137, "learning_rate": 5.002895259721828e-05, "loss": 0.0012462822720408439, "step": 176120 }, { "epoch": 49.99432302015328, "grad_norm": 0.30154430866241455, "learning_rate": 5.002611410729492e-05, "loss": 0.00048540737479925157, "step": 176130 }, { "epoch": 49.99716151007664, "grad_norm": 0.4388117492198944, "learning_rate": 5.0023275617371555e-05, "loss": 0.0023450201377272608, "step": 176140 }, { "epoch": 50.0, "grad_norm": 0.06641458719968796, "learning_rate": 5.0020437127448203e-05, "loss": 0.0003019726136699319, "step": 176150 }, { "epoch": 50.00283848992336, "grad_norm": 0.011047014966607094, "learning_rate": 5.0017598637524845e-05, "loss": 0.0004379408434033394, "step": 176160 }, { "epoch": 50.00567697984672, "grad_norm": 0.1946564018726349, "learning_rate": 5.001476014760148e-05, "loss": 0.00024729818105697634, "step": 176170 }, { "epoch": 50.00851546977008, "grad_norm": 2.189566135406494, "learning_rate": 5.001192165767812e-05, "loss": 0.0004700539633631706, "step": 176180 }, { "epoch": 50.011353959693444, "grad_norm": 0.03514385223388672, "learning_rate": 5.0009083167754755e-05, "loss": 0.0032695747911930086, "step": 176190 }, { "epoch": 50.01419244961681, "grad_norm": 0.010160994715988636, "learning_rate": 5.000624467783139e-05, "loss": 0.0002783371135592461, "step": 176200 }, { "epoch": 50.01703093954016, "grad_norm": 0.006384863518178463, "learning_rate": 5.000340618790803e-05, "loss": 0.00047411732375621793, "step": 176210 }, { "epoch": 50.019869429463526, "grad_norm": 0.01772894151508808, "learning_rate": 5.000056769798468e-05, "loss": 0.00023264624178409576, "step": 176220 }, { "epoch": 50.02270791938689, "grad_norm": 0.19179916381835938, "learning_rate": 4.9997729208061314e-05, "loss": 0.005536779388785362, "step": 176230 }, { "epoch": 50.025546409310245, "grad_norm": 3.0814032554626465, "learning_rate": 4.9994890718137956e-05, "loss": 0.0007450401782989502, "step": 176240 }, { "epoch": 50.02838489923361, "grad_norm": 0.21112145483493805, "learning_rate": 4.999205222821459e-05, "loss": 0.001192418299615383, "step": 176250 }, { "epoch": 50.03122338915697, "grad_norm": 0.0047845072112977505, "learning_rate": 4.998921373829123e-05, "loss": 9.329039603471756e-05, "step": 176260 }, { "epoch": 50.034061879080326, "grad_norm": 5.895194053649902, "learning_rate": 4.998637524836787e-05, "loss": 0.0008397623896598816, "step": 176270 }, { "epoch": 50.03690036900369, "grad_norm": 0.002326264977455139, "learning_rate": 4.998353675844451e-05, "loss": 0.000806553103029728, "step": 176280 }, { "epoch": 50.03973885892705, "grad_norm": 0.03298676386475563, "learning_rate": 4.998069826852115e-05, "loss": 0.00010777469724416733, "step": 176290 }, { "epoch": 50.042577348850415, "grad_norm": 0.036003477871418, "learning_rate": 4.997785977859779e-05, "loss": 0.005755875259637833, "step": 176300 }, { "epoch": 50.04541583877377, "grad_norm": 0.3316333293914795, "learning_rate": 4.997502128867443e-05, "loss": 0.007372388243675232, "step": 176310 }, { "epoch": 50.04825432869713, "grad_norm": 0.8168872594833374, "learning_rate": 4.9972182798751066e-05, "loss": 0.002645586244761944, "step": 176320 }, { "epoch": 50.051092818620496, "grad_norm": 0.17600269615650177, "learning_rate": 4.99693443088277e-05, "loss": 0.00012637339532375335, "step": 176330 }, { "epoch": 50.05393130854385, "grad_norm": 0.025407753884792328, "learning_rate": 4.996650581890435e-05, "loss": 0.00013604070991277695, "step": 176340 }, { "epoch": 50.056769798467215, "grad_norm": 0.012501208111643791, "learning_rate": 4.9963667328980984e-05, "loss": 0.0008387109264731407, "step": 176350 }, { "epoch": 50.05960828839058, "grad_norm": 0.7443181276321411, "learning_rate": 4.9960828839057625e-05, "loss": 0.0004433363676071167, "step": 176360 }, { "epoch": 50.062446778313934, "grad_norm": 0.06494267284870148, "learning_rate": 4.9957990349134266e-05, "loss": 0.0009995579719543457, "step": 176370 }, { "epoch": 50.0652852682373, "grad_norm": 0.6817200779914856, "learning_rate": 4.99551518592109e-05, "loss": 0.0023353079333901404, "step": 176380 }, { "epoch": 50.06812375816066, "grad_norm": 0.01471209991723299, "learning_rate": 4.995231336928754e-05, "loss": 0.0022267861291766166, "step": 176390 }, { "epoch": 50.07096224808402, "grad_norm": 0.4790765047073364, "learning_rate": 4.9949474879364184e-05, "loss": 0.00036194305866956713, "step": 176400 }, { "epoch": 50.07380073800738, "grad_norm": 0.870887041091919, "learning_rate": 4.994663638944082e-05, "loss": 0.000477510504424572, "step": 176410 }, { "epoch": 50.07663922793074, "grad_norm": 3.137392997741699, "learning_rate": 4.994379789951746e-05, "loss": 0.0008313259109854698, "step": 176420 }, { "epoch": 50.079477717854104, "grad_norm": 0.011231510899960995, "learning_rate": 4.9940959409594094e-05, "loss": 0.00015397705137729645, "step": 176430 }, { "epoch": 50.08231620777746, "grad_norm": 0.01388354692608118, "learning_rate": 4.9938120919670736e-05, "loss": 0.00021182242780923844, "step": 176440 }, { "epoch": 50.08515469770082, "grad_norm": 0.052085038274526596, "learning_rate": 4.993528242974738e-05, "loss": 0.0010100716724991798, "step": 176450 }, { "epoch": 50.087993187624186, "grad_norm": 0.1642066240310669, "learning_rate": 4.993244393982401e-05, "loss": 0.0010257169604301452, "step": 176460 }, { "epoch": 50.09083167754754, "grad_norm": 0.14474131166934967, "learning_rate": 4.992960544990066e-05, "loss": 0.0016784394159913064, "step": 176470 }, { "epoch": 50.093670167470904, "grad_norm": 0.05516636371612549, "learning_rate": 4.9926766959977294e-05, "loss": 0.0007140913978219033, "step": 176480 }, { "epoch": 50.09650865739427, "grad_norm": 0.03253169730305672, "learning_rate": 4.992392847005393e-05, "loss": 0.00033639296889305116, "step": 176490 }, { "epoch": 50.09934714731763, "grad_norm": 0.024744326248764992, "learning_rate": 4.992108998013058e-05, "loss": 0.002184981666505337, "step": 176500 }, { "epoch": 50.09934714731763, "eval_accuracy": 0.9795256565142748, "eval_loss": 0.08271688967943192, "eval_runtime": 36.685, "eval_samples_per_second": 428.703, "eval_steps_per_second": 6.706, "step": 176500 }, { "epoch": 50.102185637240986, "grad_norm": 0.04519375413656235, "learning_rate": 4.991825149020721e-05, "loss": 0.006472869962453842, "step": 176510 }, { "epoch": 50.10502412716435, "grad_norm": 0.01036011427640915, "learning_rate": 4.991541300028385e-05, "loss": 0.00043050963431596757, "step": 176520 }, { "epoch": 50.10786261708771, "grad_norm": 0.03313395380973816, "learning_rate": 4.991257451036049e-05, "loss": 0.0007402420043945313, "step": 176530 }, { "epoch": 50.11070110701107, "grad_norm": 1.1382125616073608, "learning_rate": 4.990973602043713e-05, "loss": 0.0009842073544859887, "step": 176540 }, { "epoch": 50.11353959693443, "grad_norm": 0.17398355901241302, "learning_rate": 4.990689753051377e-05, "loss": 0.0013152368366718293, "step": 176550 }, { "epoch": 50.11637808685779, "grad_norm": 0.294647753238678, "learning_rate": 4.9904059040590405e-05, "loss": 0.0012091424316167832, "step": 176560 }, { "epoch": 50.119216576781156, "grad_norm": 0.012480718083679676, "learning_rate": 4.9901220550667046e-05, "loss": 0.005239012092351914, "step": 176570 }, { "epoch": 50.12205506670451, "grad_norm": 2.8275561332702637, "learning_rate": 4.989838206074369e-05, "loss": 0.0007401600480079651, "step": 176580 }, { "epoch": 50.124893556627875, "grad_norm": 0.003974087070673704, "learning_rate": 4.989554357082032e-05, "loss": 0.00021333899348974228, "step": 176590 }, { "epoch": 50.12773204655124, "grad_norm": 0.0021992893889546394, "learning_rate": 4.989270508089697e-05, "loss": 0.00014548879116773605, "step": 176600 }, { "epoch": 50.13057053647459, "grad_norm": 0.09871815145015717, "learning_rate": 4.9889866590973605e-05, "loss": 0.0011641783639788628, "step": 176610 }, { "epoch": 50.133409026397956, "grad_norm": 0.9405157566070557, "learning_rate": 4.988702810105024e-05, "loss": 0.0012574698776006699, "step": 176620 }, { "epoch": 50.13624751632132, "grad_norm": 0.2288752794265747, "learning_rate": 4.988418961112688e-05, "loss": 0.000950719602406025, "step": 176630 }, { "epoch": 50.139086006244675, "grad_norm": 0.011974070221185684, "learning_rate": 4.988135112120352e-05, "loss": 0.0005793120712041855, "step": 176640 }, { "epoch": 50.14192449616804, "grad_norm": 0.14287327229976654, "learning_rate": 4.9878512631280164e-05, "loss": 0.0002450928092002869, "step": 176650 }, { "epoch": 50.1447629860914, "grad_norm": 0.05842907354235649, "learning_rate": 4.98756741413568e-05, "loss": 0.003305698186159134, "step": 176660 }, { "epoch": 50.14760147601476, "grad_norm": 0.14197088778018951, "learning_rate": 4.987283565143344e-05, "loss": 0.0020552946254611014, "step": 176670 }, { "epoch": 50.15043996593812, "grad_norm": 0.2903248071670532, "learning_rate": 4.986999716151008e-05, "loss": 0.0006472675129771232, "step": 176680 }, { "epoch": 50.15327845586148, "grad_norm": 0.08994513005018234, "learning_rate": 4.9867158671586716e-05, "loss": 0.004416780173778534, "step": 176690 }, { "epoch": 50.156116945784845, "grad_norm": 0.028650710359215736, "learning_rate": 4.986432018166336e-05, "loss": 0.00034349225461483, "step": 176700 }, { "epoch": 50.1589554357082, "grad_norm": 0.0024077589623630047, "learning_rate": 4.986148169174e-05, "loss": 0.001966416649520397, "step": 176710 }, { "epoch": 50.161793925631564, "grad_norm": 0.0242326520383358, "learning_rate": 4.985864320181663e-05, "loss": 0.0002592502161860466, "step": 176720 }, { "epoch": 50.16463241555493, "grad_norm": 0.13250458240509033, "learning_rate": 4.9855804711893274e-05, "loss": 0.005005958303809166, "step": 176730 }, { "epoch": 50.16747090547828, "grad_norm": 0.01152278482913971, "learning_rate": 4.9852966221969916e-05, "loss": 0.012616913020610809, "step": 176740 }, { "epoch": 50.170309395401645, "grad_norm": 14.07308292388916, "learning_rate": 4.985012773204655e-05, "loss": 0.003994447737932205, "step": 176750 }, { "epoch": 50.17314788532501, "grad_norm": 5.737757205963135, "learning_rate": 4.984728924212319e-05, "loss": 0.0011671433225274087, "step": 176760 }, { "epoch": 50.17598637524837, "grad_norm": 0.004077515564858913, "learning_rate": 4.984445075219983e-05, "loss": 0.00018788408488035202, "step": 176770 }, { "epoch": 50.17882486517173, "grad_norm": 0.019467005506157875, "learning_rate": 4.9841612262276475e-05, "loss": 0.0028463369235396385, "step": 176780 }, { "epoch": 50.18166335509509, "grad_norm": 1.6506342887878418, "learning_rate": 4.983877377235311e-05, "loss": 0.0010838607326149941, "step": 176790 }, { "epoch": 50.18450184501845, "grad_norm": 0.10147836804389954, "learning_rate": 4.983593528242975e-05, "loss": 0.0003597714006900787, "step": 176800 }, { "epoch": 50.18734033494181, "grad_norm": 0.0836382582783699, "learning_rate": 4.983309679250639e-05, "loss": 0.0022874044254422186, "step": 176810 }, { "epoch": 50.19017882486517, "grad_norm": 0.08406874537467957, "learning_rate": 4.9830258302583027e-05, "loss": 0.0011408381164073945, "step": 176820 }, { "epoch": 50.193017314788534, "grad_norm": 0.6282241344451904, "learning_rate": 4.982741981265967e-05, "loss": 0.00019717849791049956, "step": 176830 }, { "epoch": 50.19585580471189, "grad_norm": 1.6229209899902344, "learning_rate": 4.982458132273631e-05, "loss": 0.00037357304245233536, "step": 176840 }, { "epoch": 50.19869429463525, "grad_norm": 0.4469185471534729, "learning_rate": 4.9821742832812944e-05, "loss": 0.0005145488306879997, "step": 176850 }, { "epoch": 50.201532784558616, "grad_norm": 0.28403589129447937, "learning_rate": 4.9818904342889585e-05, "loss": 0.001212712563574314, "step": 176860 }, { "epoch": 50.20437127448198, "grad_norm": 0.6827614307403564, "learning_rate": 4.9816065852966227e-05, "loss": 0.00036678947508335115, "step": 176870 }, { "epoch": 50.207209764405334, "grad_norm": 10.887860298156738, "learning_rate": 4.981322736304286e-05, "loss": 0.0075437106192111966, "step": 176880 }, { "epoch": 50.2100482543287, "grad_norm": 0.017527088522911072, "learning_rate": 4.98103888731195e-05, "loss": 0.000561496987938881, "step": 176890 }, { "epoch": 50.21288674425206, "grad_norm": 1.06354820728302, "learning_rate": 4.9807550383196144e-05, "loss": 0.0017194153741002082, "step": 176900 }, { "epoch": 50.215725234175416, "grad_norm": 0.053778715431690216, "learning_rate": 4.980471189327278e-05, "loss": 0.0005779091268777847, "step": 176910 }, { "epoch": 50.21856372409878, "grad_norm": 0.15828350186347961, "learning_rate": 4.980187340334942e-05, "loss": 0.008802390098571778, "step": 176920 }, { "epoch": 50.22140221402214, "grad_norm": 0.18158212304115295, "learning_rate": 4.9799034913426055e-05, "loss": 0.00035154279321432115, "step": 176930 }, { "epoch": 50.2242407039455, "grad_norm": 0.009640474803745747, "learning_rate": 4.97961964235027e-05, "loss": 0.0013858439400792123, "step": 176940 }, { "epoch": 50.22707919386886, "grad_norm": 0.03298366814851761, "learning_rate": 4.979335793357934e-05, "loss": 0.006298000365495682, "step": 176950 }, { "epoch": 50.22991768379222, "grad_norm": 0.11402001976966858, "learning_rate": 4.979051944365597e-05, "loss": 0.00020461641252040864, "step": 176960 }, { "epoch": 50.232756173715586, "grad_norm": 0.23938776552677155, "learning_rate": 4.978768095373262e-05, "loss": 0.0017928794026374818, "step": 176970 }, { "epoch": 50.23559466363894, "grad_norm": 12.299174308776855, "learning_rate": 4.9784842463809255e-05, "loss": 0.002292858622968197, "step": 176980 }, { "epoch": 50.238433153562305, "grad_norm": 0.019104860723018646, "learning_rate": 4.9782003973885896e-05, "loss": 0.0005000920966267586, "step": 176990 }, { "epoch": 50.24127164348567, "grad_norm": 0.018486907705664635, "learning_rate": 4.977916548396254e-05, "loss": 0.0004103479906916618, "step": 177000 }, { "epoch": 50.24127164348567, "eval_accuracy": 0.9835950912443568, "eval_loss": 0.06585913151502609, "eval_runtime": 34.6176, "eval_samples_per_second": 454.307, "eval_steps_per_second": 7.106, "step": 177000 }, { "epoch": 50.24411013340902, "grad_norm": 0.1982554793357849, "learning_rate": 4.977632699403917e-05, "loss": 0.0030837124213576317, "step": 177010 }, { "epoch": 50.246948623332386, "grad_norm": 0.67021644115448, "learning_rate": 4.977348850411581e-05, "loss": 0.0008273754268884659, "step": 177020 }, { "epoch": 50.24978711325575, "grad_norm": 0.3453882336616516, "learning_rate": 4.9770650014192455e-05, "loss": 0.000473986379802227, "step": 177030 }, { "epoch": 50.25262560317911, "grad_norm": 0.013284002430737019, "learning_rate": 4.976781152426909e-05, "loss": 0.0024780388921499252, "step": 177040 }, { "epoch": 50.25546409310247, "grad_norm": 0.01017698086798191, "learning_rate": 4.976497303434573e-05, "loss": 0.0002745307981967926, "step": 177050 }, { "epoch": 50.25830258302583, "grad_norm": 0.1566135287284851, "learning_rate": 4.9762134544422365e-05, "loss": 0.005714311078190803, "step": 177060 }, { "epoch": 50.261141072949194, "grad_norm": 0.7029308676719666, "learning_rate": 4.9759296054499013e-05, "loss": 0.0030181130394339563, "step": 177070 }, { "epoch": 50.26397956287255, "grad_norm": 1.071557879447937, "learning_rate": 4.975645756457565e-05, "loss": 0.006632360816001892, "step": 177080 }, { "epoch": 50.26681805279591, "grad_norm": 0.1285959631204605, "learning_rate": 4.975361907465228e-05, "loss": 0.02107929140329361, "step": 177090 }, { "epoch": 50.269656542719275, "grad_norm": 0.01355596911162138, "learning_rate": 4.975078058472893e-05, "loss": 0.003091316856443882, "step": 177100 }, { "epoch": 50.27249503264263, "grad_norm": 0.6344688534736633, "learning_rate": 4.9747942094805565e-05, "loss": 0.006692691892385483, "step": 177110 }, { "epoch": 50.275333522565994, "grad_norm": 0.24596385657787323, "learning_rate": 4.974510360488221e-05, "loss": 0.002978968806564808, "step": 177120 }, { "epoch": 50.27817201248936, "grad_norm": 0.06037038937211037, "learning_rate": 4.974226511495885e-05, "loss": 0.0006841357797384262, "step": 177130 }, { "epoch": 50.28101050241272, "grad_norm": 0.052158549427986145, "learning_rate": 4.973942662503548e-05, "loss": 0.0009825050830841065, "step": 177140 }, { "epoch": 50.283848992336075, "grad_norm": 0.019953472539782524, "learning_rate": 4.9736588135112124e-05, "loss": 0.000713951513171196, "step": 177150 }, { "epoch": 50.28668748225944, "grad_norm": 0.10924579203128815, "learning_rate": 4.973374964518876e-05, "loss": 0.008761272579431535, "step": 177160 }, { "epoch": 50.2895259721828, "grad_norm": 0.012108390219509602, "learning_rate": 4.97309111552654e-05, "loss": 0.0008253704756498337, "step": 177170 }, { "epoch": 50.29236446210616, "grad_norm": 0.042290061712265015, "learning_rate": 4.972807266534204e-05, "loss": 0.0006712395697832108, "step": 177180 }, { "epoch": 50.29520295202952, "grad_norm": 0.07061628252267838, "learning_rate": 4.9725234175418676e-05, "loss": 0.007660107314586639, "step": 177190 }, { "epoch": 50.29804144195288, "grad_norm": 0.028236202895641327, "learning_rate": 4.9722395685495324e-05, "loss": 0.0010941410437226295, "step": 177200 }, { "epoch": 50.30087993187624, "grad_norm": 0.019646713510155678, "learning_rate": 4.971955719557196e-05, "loss": 0.0015507271513342858, "step": 177210 }, { "epoch": 50.3037184217996, "grad_norm": 0.01657615229487419, "learning_rate": 4.9716718705648593e-05, "loss": 8.123256266117096e-05, "step": 177220 }, { "epoch": 50.306556911722964, "grad_norm": 3.0022382736206055, "learning_rate": 4.971388021572524e-05, "loss": 0.005314110592007637, "step": 177230 }, { "epoch": 50.30939540164633, "grad_norm": 7.596179008483887, "learning_rate": 4.9711041725801876e-05, "loss": 0.0029734516516327857, "step": 177240 }, { "epoch": 50.31223389156968, "grad_norm": 0.03972538933157921, "learning_rate": 4.970820323587852e-05, "loss": 0.007044260948896408, "step": 177250 }, { "epoch": 50.315072381493046, "grad_norm": 0.03758332505822182, "learning_rate": 4.970536474595515e-05, "loss": 0.002057547494769096, "step": 177260 }, { "epoch": 50.31791087141641, "grad_norm": 0.6733481884002686, "learning_rate": 4.9702526256031793e-05, "loss": 0.0007719844579696655, "step": 177270 }, { "epoch": 50.320749361339765, "grad_norm": 0.15674684941768646, "learning_rate": 4.9699687766108435e-05, "loss": 0.0016361262649297714, "step": 177280 }, { "epoch": 50.32358785126313, "grad_norm": 0.09822171181440353, "learning_rate": 4.969684927618507e-05, "loss": 0.0006087679415941238, "step": 177290 }, { "epoch": 50.32642634118649, "grad_norm": 0.03135913237929344, "learning_rate": 4.969401078626171e-05, "loss": 0.010294127464294433, "step": 177300 }, { "epoch": 50.329264831109846, "grad_norm": 0.39426955580711365, "learning_rate": 4.969117229633835e-05, "loss": 0.000994923710823059, "step": 177310 }, { "epoch": 50.33210332103321, "grad_norm": 0.139894038438797, "learning_rate": 4.968833380641499e-05, "loss": 0.0047951340675354, "step": 177320 }, { "epoch": 50.33494181095657, "grad_norm": 0.3571105897426605, "learning_rate": 4.968549531649163e-05, "loss": 0.009452199935913086, "step": 177330 }, { "epoch": 50.337780300879935, "grad_norm": 0.9153376221656799, "learning_rate": 4.968265682656827e-05, "loss": 0.006810718774795532, "step": 177340 }, { "epoch": 50.34061879080329, "grad_norm": 0.020133474841713905, "learning_rate": 4.9679818336644904e-05, "loss": 0.005895834416151047, "step": 177350 }, { "epoch": 50.34345728072665, "grad_norm": 0.008862294256687164, "learning_rate": 4.9676979846721546e-05, "loss": 0.006742405891418457, "step": 177360 }, { "epoch": 50.346295770650016, "grad_norm": 9.723737716674805, "learning_rate": 4.967414135679819e-05, "loss": 0.007398355007171631, "step": 177370 }, { "epoch": 50.34913426057337, "grad_norm": 0.4639098346233368, "learning_rate": 4.967130286687482e-05, "loss": 0.000708504393696785, "step": 177380 }, { "epoch": 50.351972750496735, "grad_norm": 0.3428248167037964, "learning_rate": 4.966846437695146e-05, "loss": 0.003915112093091011, "step": 177390 }, { "epoch": 50.3548112404201, "grad_norm": 2.476086378097534, "learning_rate": 4.9665625887028104e-05, "loss": 0.0006986603140830994, "step": 177400 }, { "epoch": 50.35764973034346, "grad_norm": 0.0066941529512405396, "learning_rate": 4.9662787397104746e-05, "loss": 0.0010455910116434098, "step": 177410 }, { "epoch": 50.36048822026682, "grad_norm": 0.22824184596538544, "learning_rate": 4.965994890718138e-05, "loss": 0.0020651038736104964, "step": 177420 }, { "epoch": 50.36332671019018, "grad_norm": 4.466464519500732, "learning_rate": 4.965711041725802e-05, "loss": 0.010431519150733948, "step": 177430 }, { "epoch": 50.36616520011354, "grad_norm": 0.13182330131530762, "learning_rate": 4.965427192733466e-05, "loss": 0.01245148628950119, "step": 177440 }, { "epoch": 50.3690036900369, "grad_norm": 0.9364569783210754, "learning_rate": 4.96514334374113e-05, "loss": 0.00036634337157011033, "step": 177450 }, { "epoch": 50.37184217996026, "grad_norm": 0.11793991923332214, "learning_rate": 4.964859494748794e-05, "loss": 0.0012985726818442345, "step": 177460 }, { "epoch": 50.374680669883624, "grad_norm": 0.009376534260809422, "learning_rate": 4.964575645756458e-05, "loss": 0.0009373283013701439, "step": 177470 }, { "epoch": 50.37751915980698, "grad_norm": 0.20377060770988464, "learning_rate": 4.9642917967641215e-05, "loss": 0.0028071293607354166, "step": 177480 }, { "epoch": 50.38035764973034, "grad_norm": 0.01544203795492649, "learning_rate": 4.9640079477717856e-05, "loss": 0.0008023623377084732, "step": 177490 }, { "epoch": 50.383196139653705, "grad_norm": 0.08562029153108597, "learning_rate": 4.96372409877945e-05, "loss": 0.0002086309716105461, "step": 177500 }, { "epoch": 50.383196139653705, "eval_accuracy": 0.9825777325618363, "eval_loss": 0.06637311726808548, "eval_runtime": 38.3502, "eval_samples_per_second": 410.089, "eval_steps_per_second": 6.415, "step": 177500 }, { "epoch": 50.38603462957707, "grad_norm": 0.03368818759918213, "learning_rate": 4.963440249787113e-05, "loss": 0.00033841785043478014, "step": 177510 }, { "epoch": 50.388873119500424, "grad_norm": 2.186863899230957, "learning_rate": 4.9631564007947774e-05, "loss": 0.000840487889945507, "step": 177520 }, { "epoch": 50.39171160942379, "grad_norm": 0.7177827954292297, "learning_rate": 4.9628725518024415e-05, "loss": 0.00044138021767139437, "step": 177530 }, { "epoch": 50.39455009934715, "grad_norm": 3.913792610168457, "learning_rate": 4.9625887028101056e-05, "loss": 0.003207983821630478, "step": 177540 }, { "epoch": 50.397388589270506, "grad_norm": 0.0041189067997038364, "learning_rate": 4.962304853817769e-05, "loss": 0.00017845556139945985, "step": 177550 }, { "epoch": 50.40022707919387, "grad_norm": 0.03818149119615555, "learning_rate": 4.9620210048254326e-05, "loss": 0.0010662486776709557, "step": 177560 }, { "epoch": 50.40306556911723, "grad_norm": 0.009933644905686378, "learning_rate": 4.9617371558330974e-05, "loss": 0.0005038745701313018, "step": 177570 }, { "epoch": 50.40590405904059, "grad_norm": 0.06796284765005112, "learning_rate": 4.961453306840761e-05, "loss": 0.0008041054010391235, "step": 177580 }, { "epoch": 50.40874254896395, "grad_norm": 0.00119889120105654, "learning_rate": 4.961169457848425e-05, "loss": 0.0013750141486525535, "step": 177590 }, { "epoch": 50.41158103888731, "grad_norm": 0.036606285721063614, "learning_rate": 4.960885608856089e-05, "loss": 0.000538322702050209, "step": 177600 }, { "epoch": 50.414419528810676, "grad_norm": 0.01879917085170746, "learning_rate": 4.9606017598637526e-05, "loss": 0.0014165217056870461, "step": 177610 }, { "epoch": 50.41725801873403, "grad_norm": 0.007400041911751032, "learning_rate": 4.960317910871417e-05, "loss": 0.0011753609403967857, "step": 177620 }, { "epoch": 50.420096508657394, "grad_norm": 0.7243146300315857, "learning_rate": 4.960034061879081e-05, "loss": 0.0014083972200751305, "step": 177630 }, { "epoch": 50.42293499858076, "grad_norm": 0.026450952515006065, "learning_rate": 4.959750212886744e-05, "loss": 0.00035388581454753876, "step": 177640 }, { "epoch": 50.42577348850411, "grad_norm": 0.018202465027570724, "learning_rate": 4.9594663638944084e-05, "loss": 0.0006942572072148323, "step": 177650 }, { "epoch": 50.428611978427476, "grad_norm": 0.09457089751958847, "learning_rate": 4.959182514902072e-05, "loss": 0.00011273380368947983, "step": 177660 }, { "epoch": 50.43145046835084, "grad_norm": 0.00624358095228672, "learning_rate": 4.958898665909737e-05, "loss": 0.0007002167403697968, "step": 177670 }, { "epoch": 50.434288958274195, "grad_norm": 0.00694616325199604, "learning_rate": 4.9586148169174e-05, "loss": 0.0009652851149439812, "step": 177680 }, { "epoch": 50.43712744819756, "grad_norm": 0.07239057123661041, "learning_rate": 4.9583309679250636e-05, "loss": 0.00024339146912097931, "step": 177690 }, { "epoch": 50.43996593812092, "grad_norm": 4.8899245262146, "learning_rate": 4.9580471189327285e-05, "loss": 0.0015636367723345756, "step": 177700 }, { "epoch": 50.44280442804428, "grad_norm": 0.8131787180900574, "learning_rate": 4.957763269940392e-05, "loss": 0.00036377348005771636, "step": 177710 }, { "epoch": 50.44564291796764, "grad_norm": 0.15132571756839752, "learning_rate": 4.957479420948056e-05, "loss": 0.0005336759611964225, "step": 177720 }, { "epoch": 50.448481407891, "grad_norm": 0.054499946534633636, "learning_rate": 4.95719557195572e-05, "loss": 0.0003223717212677002, "step": 177730 }, { "epoch": 50.451319897814365, "grad_norm": 3.475471019744873, "learning_rate": 4.9569117229633836e-05, "loss": 0.000968000665307045, "step": 177740 }, { "epoch": 50.45415838773772, "grad_norm": 0.03734726458787918, "learning_rate": 4.956627873971048e-05, "loss": 0.0006064778193831444, "step": 177750 }, { "epoch": 50.45699687766108, "grad_norm": 0.18727990984916687, "learning_rate": 4.956344024978711e-05, "loss": 0.0008647337555885315, "step": 177760 }, { "epoch": 50.459835367584446, "grad_norm": 0.9058598279953003, "learning_rate": 4.9560601759863754e-05, "loss": 0.0031972043216228485, "step": 177770 }, { "epoch": 50.46267385750781, "grad_norm": 0.13639159500598907, "learning_rate": 4.9557763269940395e-05, "loss": 0.0008560791611671448, "step": 177780 }, { "epoch": 50.465512347431165, "grad_norm": 0.12852437794208527, "learning_rate": 4.955492478001703e-05, "loss": 0.001597229577600956, "step": 177790 }, { "epoch": 50.46835083735453, "grad_norm": 0.003737176302820444, "learning_rate": 4.955208629009367e-05, "loss": 0.012401670962572098, "step": 177800 }, { "epoch": 50.47118932727789, "grad_norm": 0.019455691799521446, "learning_rate": 4.954924780017031e-05, "loss": 0.0019724445417523386, "step": 177810 }, { "epoch": 50.47402781720125, "grad_norm": 0.3104507029056549, "learning_rate": 4.954640931024695e-05, "loss": 0.0005352212116122246, "step": 177820 }, { "epoch": 50.47686630712461, "grad_norm": 1.5828455686569214, "learning_rate": 4.9543570820323595e-05, "loss": 0.0017439603805541992, "step": 177830 }, { "epoch": 50.47970479704797, "grad_norm": 0.038023125380277634, "learning_rate": 4.954073233040023e-05, "loss": 0.000939876027405262, "step": 177840 }, { "epoch": 50.48254328697133, "grad_norm": 0.03705032169818878, "learning_rate": 4.9537893840476864e-05, "loss": 0.0011279700323939324, "step": 177850 }, { "epoch": 50.48538177689469, "grad_norm": 0.030725756660103798, "learning_rate": 4.9535055350553506e-05, "loss": 0.0004879012703895569, "step": 177860 }, { "epoch": 50.488220266818054, "grad_norm": 0.0574231818318367, "learning_rate": 4.953221686063015e-05, "loss": 0.0007849382236599922, "step": 177870 }, { "epoch": 50.49105875674142, "grad_norm": 0.026541393250226974, "learning_rate": 4.952937837070679e-05, "loss": 0.00048138033598661424, "step": 177880 }, { "epoch": 50.49389724666477, "grad_norm": 0.06079494580626488, "learning_rate": 4.952653988078342e-05, "loss": 0.0005018297582864761, "step": 177890 }, { "epoch": 50.496735736588136, "grad_norm": 0.003461429849267006, "learning_rate": 4.9523701390860065e-05, "loss": 0.00011999011039733886, "step": 177900 }, { "epoch": 50.4995742265115, "grad_norm": 0.040008578449487686, "learning_rate": 4.9520862900936706e-05, "loss": 0.005064338445663452, "step": 177910 }, { "epoch": 50.502412716434854, "grad_norm": 0.043657831847667694, "learning_rate": 4.951802441101334e-05, "loss": 0.0006017681211233139, "step": 177920 }, { "epoch": 50.50525120635822, "grad_norm": 0.006505503784865141, "learning_rate": 4.951518592108998e-05, "loss": 0.015132586658000945, "step": 177930 }, { "epoch": 50.50808969628158, "grad_norm": 0.1394028663635254, "learning_rate": 4.951234743116662e-05, "loss": 0.001602652482688427, "step": 177940 }, { "epoch": 50.510928186204936, "grad_norm": 0.025399422273039818, "learning_rate": 4.950950894124326e-05, "loss": 0.00038293041288852694, "step": 177950 }, { "epoch": 50.5137666761283, "grad_norm": 0.3579500913619995, "learning_rate": 4.95066704513199e-05, "loss": 0.011957348883152008, "step": 177960 }, { "epoch": 50.51660516605166, "grad_norm": 0.3935405910015106, "learning_rate": 4.950383196139654e-05, "loss": 0.0007551506161689759, "step": 177970 }, { "epoch": 50.519443655975024, "grad_norm": 0.004281311295926571, "learning_rate": 4.9500993471473175e-05, "loss": 0.0007873352617025376, "step": 177980 }, { "epoch": 50.52228214589838, "grad_norm": 0.006165917497128248, "learning_rate": 4.949815498154982e-05, "loss": 0.00043694339692592623, "step": 177990 }, { "epoch": 50.52512063582174, "grad_norm": 0.290931761264801, "learning_rate": 4.949531649162646e-05, "loss": 0.0010159110650420188, "step": 178000 }, { "epoch": 50.52512063582174, "eval_accuracy": 0.9835315063266993, "eval_loss": 0.06759390234947205, "eval_runtime": 40.7067, "eval_samples_per_second": 386.349, "eval_steps_per_second": 6.043, "step": 178000 }, { "epoch": 50.527959125745106, "grad_norm": 2.3404603004455566, "learning_rate": 4.94924780017031e-05, "loss": 0.004411763697862625, "step": 178010 }, { "epoch": 50.53079761566846, "grad_norm": 0.02579922415316105, "learning_rate": 4.9489639511779734e-05, "loss": 0.0005772171542048454, "step": 178020 }, { "epoch": 50.533636105591825, "grad_norm": 1.0700116157531738, "learning_rate": 4.9486801021856375e-05, "loss": 0.008119182288646698, "step": 178030 }, { "epoch": 50.53647459551519, "grad_norm": 4.8109941482543945, "learning_rate": 4.948396253193302e-05, "loss": 0.012446007132530213, "step": 178040 }, { "epoch": 50.53931308543854, "grad_norm": 1.0110079050064087, "learning_rate": 4.948112404200965e-05, "loss": 0.0007580531761050225, "step": 178050 }, { "epoch": 50.542151575361906, "grad_norm": 8.49814224243164, "learning_rate": 4.947828555208629e-05, "loss": 0.005822759866714477, "step": 178060 }, { "epoch": 50.54499006528527, "grad_norm": 1.6804269552230835, "learning_rate": 4.9475447062162934e-05, "loss": 0.0017199566587805747, "step": 178070 }, { "epoch": 50.54782855520863, "grad_norm": 0.03616025298833847, "learning_rate": 4.947260857223957e-05, "loss": 0.004221775382757187, "step": 178080 }, { "epoch": 50.55066704513199, "grad_norm": 0.07166191190481186, "learning_rate": 4.946977008231621e-05, "loss": 0.002099497988820076, "step": 178090 }, { "epoch": 50.55350553505535, "grad_norm": 0.08170256018638611, "learning_rate": 4.946693159239285e-05, "loss": 0.00045775752514600756, "step": 178100 }, { "epoch": 50.55634402497871, "grad_norm": 0.01513454969972372, "learning_rate": 4.9464093102469486e-05, "loss": 0.0003535013645887375, "step": 178110 }, { "epoch": 50.55918251490207, "grad_norm": 0.009716467931866646, "learning_rate": 4.946125461254613e-05, "loss": 0.0010290322825312614, "step": 178120 }, { "epoch": 50.56202100482543, "grad_norm": 0.07659623771905899, "learning_rate": 4.945841612262277e-05, "loss": 0.0005571220070123672, "step": 178130 }, { "epoch": 50.564859494748795, "grad_norm": 0.029689202085137367, "learning_rate": 4.945557763269941e-05, "loss": 0.00038351211696863174, "step": 178140 }, { "epoch": 50.56769798467215, "grad_norm": 0.003572858404368162, "learning_rate": 4.9452739142776045e-05, "loss": 0.003618927299976349, "step": 178150 }, { "epoch": 50.570536474595514, "grad_norm": 0.28603795170783997, "learning_rate": 4.944990065285268e-05, "loss": 0.0013179656118154526, "step": 178160 }, { "epoch": 50.57337496451888, "grad_norm": 0.0397309809923172, "learning_rate": 4.944706216292933e-05, "loss": 0.0014713196083903312, "step": 178170 }, { "epoch": 50.57621345444224, "grad_norm": 0.08268775045871735, "learning_rate": 4.944422367300596e-05, "loss": 0.00045121926814317703, "step": 178180 }, { "epoch": 50.579051944365595, "grad_norm": 0.022843673825263977, "learning_rate": 4.9441385183082603e-05, "loss": 0.0015566516667604446, "step": 178190 }, { "epoch": 50.58189043428896, "grad_norm": 0.00436963839456439, "learning_rate": 4.9438546693159245e-05, "loss": 0.012062124907970428, "step": 178200 }, { "epoch": 50.58472892421232, "grad_norm": 0.07315248250961304, "learning_rate": 4.943570820323588e-05, "loss": 0.00470237135887146, "step": 178210 }, { "epoch": 50.58756741413568, "grad_norm": 0.5179181694984436, "learning_rate": 4.943286971331252e-05, "loss": 0.0004626106470823288, "step": 178220 }, { "epoch": 50.59040590405904, "grad_norm": 0.23702092468738556, "learning_rate": 4.943003122338916e-05, "loss": 0.0006530953571200371, "step": 178230 }, { "epoch": 50.5932443939824, "grad_norm": 0.09163252264261246, "learning_rate": 4.94271927334658e-05, "loss": 0.0013814514502882957, "step": 178240 }, { "epoch": 50.596082883905765, "grad_norm": 0.016159044578671455, "learning_rate": 4.942435424354244e-05, "loss": 0.00024245604872703553, "step": 178250 }, { "epoch": 50.59892137382912, "grad_norm": 0.14793919026851654, "learning_rate": 4.942151575361907e-05, "loss": 0.0011219549924135208, "step": 178260 }, { "epoch": 50.601759863752484, "grad_norm": 0.07245628535747528, "learning_rate": 4.9418677263695714e-05, "loss": 0.00018126890063285828, "step": 178270 }, { "epoch": 50.60459835367585, "grad_norm": 0.04030456766486168, "learning_rate": 4.9415838773772356e-05, "loss": 0.005288044735789299, "step": 178280 }, { "epoch": 50.6074368435992, "grad_norm": 2.2107815742492676, "learning_rate": 4.941300028384899e-05, "loss": 0.0028426121920347215, "step": 178290 }, { "epoch": 50.610275333522566, "grad_norm": 0.3610652983188629, "learning_rate": 4.941016179392564e-05, "loss": 0.0006088268011808396, "step": 178300 }, { "epoch": 50.61311382344593, "grad_norm": 0.31119322776794434, "learning_rate": 4.940732330400227e-05, "loss": 0.001041305437684059, "step": 178310 }, { "epoch": 50.615952313369284, "grad_norm": 1.9523742198944092, "learning_rate": 4.940448481407891e-05, "loss": 0.008518999814987183, "step": 178320 }, { "epoch": 50.61879080329265, "grad_norm": 2.925356149673462, "learning_rate": 4.9401646324155556e-05, "loss": 0.005059754475951195, "step": 178330 }, { "epoch": 50.62162929321601, "grad_norm": 0.022409062832593918, "learning_rate": 4.939880783423219e-05, "loss": 0.012076853215694428, "step": 178340 }, { "epoch": 50.62446778313937, "grad_norm": 0.06059812381863594, "learning_rate": 4.939596934430883e-05, "loss": 0.0020471949130296705, "step": 178350 }, { "epoch": 50.62730627306273, "grad_norm": 0.058066293597221375, "learning_rate": 4.9393414703377805e-05, "loss": 0.010087721794843674, "step": 178360 }, { "epoch": 50.63014476298609, "grad_norm": 0.21247069537639618, "learning_rate": 4.939057621345444e-05, "loss": 0.0025983568280935287, "step": 178370 }, { "epoch": 50.632983252909455, "grad_norm": 0.05629691854119301, "learning_rate": 4.938773772353109e-05, "loss": 0.011957169324159623, "step": 178380 }, { "epoch": 50.63582174283281, "grad_norm": 0.025649383664131165, "learning_rate": 4.938489923360772e-05, "loss": 0.0004917487502098083, "step": 178390 }, { "epoch": 50.63866023275617, "grad_norm": 0.003942321985960007, "learning_rate": 4.9382060743684363e-05, "loss": 0.0007713185623288155, "step": 178400 }, { "epoch": 50.641498722679536, "grad_norm": 0.06471015512943268, "learning_rate": 4.9379222253761005e-05, "loss": 0.0008469484746456146, "step": 178410 }, { "epoch": 50.64433721260289, "grad_norm": 0.21105678379535675, "learning_rate": 4.937638376383764e-05, "loss": 0.0030028074979782104, "step": 178420 }, { "epoch": 50.647175702526255, "grad_norm": 0.03996838629245758, "learning_rate": 4.937354527391428e-05, "loss": 0.0016567815095186234, "step": 178430 }, { "epoch": 50.65001419244962, "grad_norm": 0.19031208753585815, "learning_rate": 4.9370706783990915e-05, "loss": 0.0026785360649228098, "step": 178440 }, { "epoch": 50.65285268237298, "grad_norm": 0.006391600240021944, "learning_rate": 4.936786829406756e-05, "loss": 0.0003261411562561989, "step": 178450 }, { "epoch": 50.655691172296336, "grad_norm": 0.006637935061007738, "learning_rate": 4.93650298041442e-05, "loss": 0.0004019120708107948, "step": 178460 }, { "epoch": 50.6585296622197, "grad_norm": 0.026042984798550606, "learning_rate": 4.936219131422083e-05, "loss": 0.0009416745975613594, "step": 178470 }, { "epoch": 50.66136815214306, "grad_norm": 0.007077690679579973, "learning_rate": 4.935935282429748e-05, "loss": 0.0062716960906982425, "step": 178480 }, { "epoch": 50.66420664206642, "grad_norm": 0.1667357236146927, "learning_rate": 4.9356514334374116e-05, "loss": 0.0017212904989719392, "step": 178490 }, { "epoch": 50.66704513198978, "grad_norm": 0.007081514690071344, "learning_rate": 4.935367584445075e-05, "loss": 0.0004252402111887932, "step": 178500 }, { "epoch": 50.66704513198978, "eval_accuracy": 0.979779996184905, "eval_loss": 0.080845408141613, "eval_runtime": 36.4996, "eval_samples_per_second": 430.881, "eval_steps_per_second": 6.74, "step": 178500 }, { "epoch": 50.669883621913144, "grad_norm": 1.1558313369750977, "learning_rate": 4.93508373545274e-05, "loss": 0.004490319266915321, "step": 178510 }, { "epoch": 50.67272211183651, "grad_norm": 0.03087504394352436, "learning_rate": 4.934799886460403e-05, "loss": 0.0027544630691409113, "step": 178520 }, { "epoch": 50.67556060175986, "grad_norm": 0.6632950901985168, "learning_rate": 4.9345160374680674e-05, "loss": 0.001104673370718956, "step": 178530 }, { "epoch": 50.678399091683225, "grad_norm": 0.045756515115499496, "learning_rate": 4.934232188475731e-05, "loss": 0.0004526948556303978, "step": 178540 }, { "epoch": 50.68123758160659, "grad_norm": 0.018487637862563133, "learning_rate": 4.933948339483395e-05, "loss": 0.007010775059461594, "step": 178550 }, { "epoch": 50.684076071529944, "grad_norm": 0.08060003817081451, "learning_rate": 4.933664490491059e-05, "loss": 0.008528627455234528, "step": 178560 }, { "epoch": 50.68691456145331, "grad_norm": 0.00779944472014904, "learning_rate": 4.9333806414987226e-05, "loss": 0.014698423445224762, "step": 178570 }, { "epoch": 50.68975305137667, "grad_norm": 0.07016563415527344, "learning_rate": 4.933096792506387e-05, "loss": 0.005509041994810104, "step": 178580 }, { "epoch": 50.692591541300025, "grad_norm": 0.013894298113882542, "learning_rate": 4.932812943514051e-05, "loss": 0.001547354646027088, "step": 178590 }, { "epoch": 50.69543003122339, "grad_norm": 0.07374076545238495, "learning_rate": 4.9325290945217144e-05, "loss": 0.0007901988923549652, "step": 178600 }, { "epoch": 50.69826852114675, "grad_norm": 0.04902556166052818, "learning_rate": 4.932245245529379e-05, "loss": 0.000431976281106472, "step": 178610 }, { "epoch": 50.701107011070114, "grad_norm": 0.026593172922730446, "learning_rate": 4.9319613965370426e-05, "loss": 0.0003003165125846863, "step": 178620 }, { "epoch": 50.70394550099347, "grad_norm": 0.014001669362187386, "learning_rate": 4.931677547544706e-05, "loss": 0.0007499255239963532, "step": 178630 }, { "epoch": 50.70678399091683, "grad_norm": 0.09237119555473328, "learning_rate": 4.93139369855237e-05, "loss": 0.00536670871078968, "step": 178640 }, { "epoch": 50.709622480840196, "grad_norm": 0.2357618659734726, "learning_rate": 4.9311098495600344e-05, "loss": 0.0002983778715133667, "step": 178650 }, { "epoch": 50.71246097076355, "grad_norm": 0.1375218778848648, "learning_rate": 4.9308260005676985e-05, "loss": 0.00014305859804153442, "step": 178660 }, { "epoch": 50.715299460686914, "grad_norm": 0.0751088559627533, "learning_rate": 4.930542151575362e-05, "loss": 0.006683362275362014, "step": 178670 }, { "epoch": 50.71813795061028, "grad_norm": 0.21063295006752014, "learning_rate": 4.930258302583026e-05, "loss": 0.0006577042862772941, "step": 178680 }, { "epoch": 50.72097644053363, "grad_norm": 0.25937002897262573, "learning_rate": 4.92997445359069e-05, "loss": 0.0037137456238269806, "step": 178690 }, { "epoch": 50.723814930456996, "grad_norm": 13.788745880126953, "learning_rate": 4.929690604598354e-05, "loss": 0.005152755230665207, "step": 178700 }, { "epoch": 50.72665342038036, "grad_norm": 0.01650199480354786, "learning_rate": 4.929406755606018e-05, "loss": 0.00846119448542595, "step": 178710 }, { "epoch": 50.72949191030372, "grad_norm": 0.14352953433990479, "learning_rate": 4.929122906613682e-05, "loss": 0.0003270063549280167, "step": 178720 }, { "epoch": 50.73233040022708, "grad_norm": 0.04552276432514191, "learning_rate": 4.9288390576213454e-05, "loss": 0.0004505755379796028, "step": 178730 }, { "epoch": 50.73516889015044, "grad_norm": 0.003676564432680607, "learning_rate": 4.9285552086290096e-05, "loss": 0.0024886392056941986, "step": 178740 }, { "epoch": 50.7380073800738, "grad_norm": 0.09496837854385376, "learning_rate": 4.928271359636674e-05, "loss": 0.0017604229971766472, "step": 178750 }, { "epoch": 50.74084586999716, "grad_norm": 0.013088447973132133, "learning_rate": 4.927987510644337e-05, "loss": 0.0007150562480092049, "step": 178760 }, { "epoch": 50.74368435992052, "grad_norm": 1.005145788192749, "learning_rate": 4.927703661652001e-05, "loss": 0.002523892931640148, "step": 178770 }, { "epoch": 50.746522849843885, "grad_norm": 0.6953666806221008, "learning_rate": 4.9274198126596654e-05, "loss": 0.0005369767546653748, "step": 178780 }, { "epoch": 50.74936133976724, "grad_norm": 0.017528757452964783, "learning_rate": 4.927135963667329e-05, "loss": 0.0012129437178373336, "step": 178790 }, { "epoch": 50.7521998296906, "grad_norm": 0.026179812848567963, "learning_rate": 4.926852114674993e-05, "loss": 0.0012494506314396857, "step": 178800 }, { "epoch": 50.755038319613966, "grad_norm": 0.14635993540287018, "learning_rate": 4.926568265682657e-05, "loss": 0.00030093472450971604, "step": 178810 }, { "epoch": 50.75787680953733, "grad_norm": 0.23282845318317413, "learning_rate": 4.926284416690321e-05, "loss": 0.001109175942838192, "step": 178820 }, { "epoch": 50.760715299460685, "grad_norm": 0.045073386281728745, "learning_rate": 4.926000567697985e-05, "loss": 0.00207152534276247, "step": 178830 }, { "epoch": 50.76355378938405, "grad_norm": 0.019965635612607002, "learning_rate": 4.925716718705649e-05, "loss": 0.0015240028500556947, "step": 178840 }, { "epoch": 50.76639227930741, "grad_norm": 0.03289882466197014, "learning_rate": 4.925432869713313e-05, "loss": 0.0005108390003442764, "step": 178850 }, { "epoch": 50.76923076923077, "grad_norm": 0.1136886328458786, "learning_rate": 4.9251490207209765e-05, "loss": 0.00039244648069143296, "step": 178860 }, { "epoch": 50.77206925915413, "grad_norm": 0.023850787431001663, "learning_rate": 4.9248651717286406e-05, "loss": 0.0009686781093478202, "step": 178870 }, { "epoch": 50.77490774907749, "grad_norm": 0.03711152821779251, "learning_rate": 4.924581322736305e-05, "loss": 0.0009189654141664505, "step": 178880 }, { "epoch": 50.77774623900085, "grad_norm": 0.3102394938468933, "learning_rate": 4.924297473743968e-05, "loss": 0.005232961475849151, "step": 178890 }, { "epoch": 50.78058472892421, "grad_norm": 0.05296027287840843, "learning_rate": 4.9240136247516324e-05, "loss": 0.002931940369307995, "step": 178900 }, { "epoch": 50.783423218847574, "grad_norm": 3.6736443042755127, "learning_rate": 4.9237297757592965e-05, "loss": 0.0013008330017328263, "step": 178910 }, { "epoch": 50.78626170877094, "grad_norm": 0.011712496168911457, "learning_rate": 4.92344592676696e-05, "loss": 0.0008196715265512466, "step": 178920 }, { "epoch": 50.78910019869429, "grad_norm": 0.048041749745607376, "learning_rate": 4.923162077774624e-05, "loss": 0.0004193456843495369, "step": 178930 }, { "epoch": 50.791938688617655, "grad_norm": 0.2884804308414459, "learning_rate": 4.922878228782288e-05, "loss": 0.0004962392151355744, "step": 178940 }, { "epoch": 50.79477717854102, "grad_norm": 0.02751844748854637, "learning_rate": 4.9225943797899524e-05, "loss": 0.0004226258024573326, "step": 178950 }, { "epoch": 50.797615668464374, "grad_norm": 0.6899861097335815, "learning_rate": 4.922310530797616e-05, "loss": 0.0003818713128566742, "step": 178960 }, { "epoch": 50.80045415838774, "grad_norm": 0.01316869631409645, "learning_rate": 4.922026681805279e-05, "loss": 0.0003929775208234787, "step": 178970 }, { "epoch": 50.8032926483111, "grad_norm": 0.6012015342712402, "learning_rate": 4.921742832812944e-05, "loss": 0.0024693846702575684, "step": 178980 }, { "epoch": 50.80613113823446, "grad_norm": 0.31851428747177124, "learning_rate": 4.9214589838206076e-05, "loss": 0.0020862199366092683, "step": 178990 }, { "epoch": 50.80896962815782, "grad_norm": 0.03809988871216774, "learning_rate": 4.921175134828272e-05, "loss": 9.509343653917313e-05, "step": 179000 }, { "epoch": 50.80896962815782, "eval_accuracy": 0.9832771666560692, "eval_loss": 0.06583996117115021, "eval_runtime": 34.7309, "eval_samples_per_second": 452.824, "eval_steps_per_second": 7.083, "step": 179000 }, { "epoch": 50.81180811808118, "grad_norm": 0.040925316512584686, "learning_rate": 4.920891285835936e-05, "loss": 0.0002210894599556923, "step": 179010 }, { "epoch": 50.814646608004544, "grad_norm": 0.03678292781114578, "learning_rate": 4.920607436843599e-05, "loss": 0.0003953877836465836, "step": 179020 }, { "epoch": 50.8174850979279, "grad_norm": 0.22770637273788452, "learning_rate": 4.9203235878512635e-05, "loss": 0.000862090103328228, "step": 179030 }, { "epoch": 50.82032358785126, "grad_norm": 0.05221158638596535, "learning_rate": 4.9200397388589276e-05, "loss": 0.00024539995938539506, "step": 179040 }, { "epoch": 50.823162077774626, "grad_norm": 0.0055334619246423244, "learning_rate": 4.919755889866591e-05, "loss": 0.0001894678920507431, "step": 179050 }, { "epoch": 50.82600056769798, "grad_norm": 0.097907654941082, "learning_rate": 4.919472040874255e-05, "loss": 0.0014814017340540886, "step": 179060 }, { "epoch": 50.828839057621344, "grad_norm": 0.13753169775009155, "learning_rate": 4.9191881918819186e-05, "loss": 0.001325295679271221, "step": 179070 }, { "epoch": 50.83167754754471, "grad_norm": 0.008349619805812836, "learning_rate": 4.9189043428895835e-05, "loss": 9.024795144796371e-05, "step": 179080 }, { "epoch": 50.83451603746807, "grad_norm": 0.2214701622724533, "learning_rate": 4.918620493897247e-05, "loss": 0.00034897457808256147, "step": 179090 }, { "epoch": 50.837354527391426, "grad_norm": 0.4899151027202606, "learning_rate": 4.9183366449049104e-05, "loss": 0.002065099775791168, "step": 179100 }, { "epoch": 50.84019301731479, "grad_norm": 0.07751981914043427, "learning_rate": 4.918052795912575e-05, "loss": 0.000617833249270916, "step": 179110 }, { "epoch": 50.84303150723815, "grad_norm": 0.01716454140841961, "learning_rate": 4.9177689469202387e-05, "loss": 0.01281128078699112, "step": 179120 }, { "epoch": 50.84586999716151, "grad_norm": 0.45877984166145325, "learning_rate": 4.917485097927903e-05, "loss": 0.0006223198026418686, "step": 179130 }, { "epoch": 50.84870848708487, "grad_norm": 0.022862128913402557, "learning_rate": 4.917201248935567e-05, "loss": 0.0036527566611766816, "step": 179140 }, { "epoch": 50.85154697700823, "grad_norm": 0.19685031473636627, "learning_rate": 4.9169173999432304e-05, "loss": 0.0007452258840203285, "step": 179150 }, { "epoch": 50.85438546693159, "grad_norm": 0.017496410757303238, "learning_rate": 4.9166335509508945e-05, "loss": 0.0015909850597381591, "step": 179160 }, { "epoch": 50.85722395685495, "grad_norm": 0.03233885392546654, "learning_rate": 4.916349701958558e-05, "loss": 0.0016158102080225945, "step": 179170 }, { "epoch": 50.860062446778315, "grad_norm": 0.0644092857837677, "learning_rate": 4.916065852966222e-05, "loss": 0.0012915847823023797, "step": 179180 }, { "epoch": 50.86290093670168, "grad_norm": 0.00634399987757206, "learning_rate": 4.915782003973886e-05, "loss": 0.002448227442800999, "step": 179190 }, { "epoch": 50.865739426625034, "grad_norm": 7.147115707397461, "learning_rate": 4.91549815498155e-05, "loss": 0.0016829822212457656, "step": 179200 }, { "epoch": 50.868577916548396, "grad_norm": 1.0665534734725952, "learning_rate": 4.915214305989214e-05, "loss": 0.00236334465444088, "step": 179210 }, { "epoch": 50.87141640647176, "grad_norm": 0.012645980343222618, "learning_rate": 4.914930456996878e-05, "loss": 0.0069883309304714205, "step": 179220 }, { "epoch": 50.874254896395115, "grad_norm": 0.2337924689054489, "learning_rate": 4.9146466080045415e-05, "loss": 0.0031379949301481246, "step": 179230 }, { "epoch": 50.87709338631848, "grad_norm": 0.4835957884788513, "learning_rate": 4.914362759012206e-05, "loss": 0.0014294421300292016, "step": 179240 }, { "epoch": 50.87993187624184, "grad_norm": 0.15273645520210266, "learning_rate": 4.91407891001987e-05, "loss": 0.001043703593313694, "step": 179250 }, { "epoch": 50.8827703661652, "grad_norm": 0.006965849548578262, "learning_rate": 4.913795061027533e-05, "loss": 0.001082521677017212, "step": 179260 }, { "epoch": 50.88560885608856, "grad_norm": 0.08320628851652145, "learning_rate": 4.913511212035197e-05, "loss": 0.0006198791787028312, "step": 179270 }, { "epoch": 50.88844734601192, "grad_norm": 0.4638696014881134, "learning_rate": 4.9132273630428615e-05, "loss": 0.0006835728883743287, "step": 179280 }, { "epoch": 50.891285835935285, "grad_norm": 0.009739060886204243, "learning_rate": 4.9129435140505256e-05, "loss": 0.0019548645243048666, "step": 179290 }, { "epoch": 50.89412432585864, "grad_norm": 0.01120669674128294, "learning_rate": 4.912659665058189e-05, "loss": 0.00610712319612503, "step": 179300 }, { "epoch": 50.896962815782004, "grad_norm": 0.46306151151657104, "learning_rate": 4.912375816065853e-05, "loss": 0.0029123468324542046, "step": 179310 }, { "epoch": 50.89980130570537, "grad_norm": 0.02891502156853676, "learning_rate": 4.9120919670735173e-05, "loss": 0.000684121623635292, "step": 179320 }, { "epoch": 50.90263979562872, "grad_norm": 0.06457027792930603, "learning_rate": 4.911808118081181e-05, "loss": 0.0007144594565033913, "step": 179330 }, { "epoch": 50.905478285552086, "grad_norm": 0.21431808173656464, "learning_rate": 4.911524269088845e-05, "loss": 0.0033987484872341155, "step": 179340 }, { "epoch": 50.90831677547545, "grad_norm": 0.9364064931869507, "learning_rate": 4.911240420096509e-05, "loss": 0.0004824221134185791, "step": 179350 }, { "epoch": 50.91115526539881, "grad_norm": 0.008920293301343918, "learning_rate": 4.9109565711041725e-05, "loss": 0.0007490737363696099, "step": 179360 }, { "epoch": 50.91399375532217, "grad_norm": 2.9292166233062744, "learning_rate": 4.910672722111837e-05, "loss": 0.003353738039731979, "step": 179370 }, { "epoch": 50.91683224524553, "grad_norm": 0.08703646808862686, "learning_rate": 4.910388873119501e-05, "loss": 0.00029677674174308775, "step": 179380 }, { "epoch": 50.91967073516889, "grad_norm": 0.02223220467567444, "learning_rate": 4.910105024127164e-05, "loss": 0.00017034821212291718, "step": 179390 }, { "epoch": 50.92250922509225, "grad_norm": 0.0333072803914547, "learning_rate": 4.9098211751348284e-05, "loss": 0.00023503396660089492, "step": 179400 }, { "epoch": 50.92534771501561, "grad_norm": 13.47027587890625, "learning_rate": 4.9095373261424925e-05, "loss": 0.004080486297607422, "step": 179410 }, { "epoch": 50.928186204938974, "grad_norm": 0.07667466253042221, "learning_rate": 4.909253477150157e-05, "loss": 0.004984279721975326, "step": 179420 }, { "epoch": 50.93102469486233, "grad_norm": 0.044266264885663986, "learning_rate": 4.90896962815782e-05, "loss": 0.0035048123449087144, "step": 179430 }, { "epoch": 50.93386318478569, "grad_norm": 0.01236622966825962, "learning_rate": 4.908685779165484e-05, "loss": 0.0012715840712189674, "step": 179440 }, { "epoch": 50.936701674709056, "grad_norm": 0.017933649942278862, "learning_rate": 4.9084019301731484e-05, "loss": 0.0001538880169391632, "step": 179450 }, { "epoch": 50.93954016463242, "grad_norm": 2.1924099922180176, "learning_rate": 4.908118081180812e-05, "loss": 0.0006908215582370758, "step": 179460 }, { "epoch": 50.942378654555775, "grad_norm": 0.52016681432724, "learning_rate": 4.907834232188476e-05, "loss": 0.00036052558571100237, "step": 179470 }, { "epoch": 50.94521714447914, "grad_norm": 0.0048692114651203156, "learning_rate": 4.90755038319614e-05, "loss": 0.0007019290700554848, "step": 179480 }, { "epoch": 50.9480556344025, "grad_norm": 0.004293966107070446, "learning_rate": 4.9072665342038036e-05, "loss": 0.004859199747443199, "step": 179490 }, { "epoch": 50.950894124325856, "grad_norm": 0.01572313904762268, "learning_rate": 4.906982685211468e-05, "loss": 0.0002309473231434822, "step": 179500 }, { "epoch": 50.950894124325856, "eval_accuracy": 0.9835950912443568, "eval_loss": 0.070470429956913, "eval_runtime": 52.2086, "eval_samples_per_second": 301.234, "eval_steps_per_second": 4.712, "step": 179500 }, { "epoch": 50.95373261424922, "grad_norm": 0.08651293814182281, "learning_rate": 4.906698836219132e-05, "loss": 0.005856377631425857, "step": 179510 }, { "epoch": 50.95657110417258, "grad_norm": 0.38009586930274963, "learning_rate": 4.9064149872267953e-05, "loss": 0.0003680475056171417, "step": 179520 }, { "epoch": 50.95940959409594, "grad_norm": 0.09583362936973572, "learning_rate": 4.9061311382344595e-05, "loss": 0.0003339018672704697, "step": 179530 }, { "epoch": 50.9622480840193, "grad_norm": 5.458598613739014, "learning_rate": 4.9058472892421236e-05, "loss": 0.001268400065600872, "step": 179540 }, { "epoch": 50.96508657394266, "grad_norm": 2.402792453765869, "learning_rate": 4.905563440249788e-05, "loss": 0.002457424812018871, "step": 179550 }, { "epoch": 50.967925063866026, "grad_norm": 0.5129597783088684, "learning_rate": 4.905279591257451e-05, "loss": 0.0006177527830004692, "step": 179560 }, { "epoch": 50.97076355378938, "grad_norm": 16.32640838623047, "learning_rate": 4.904995742265115e-05, "loss": 0.017386679351329804, "step": 179570 }, { "epoch": 50.973602043712745, "grad_norm": 0.023011812940239906, "learning_rate": 4.9047118932727795e-05, "loss": 0.0010430153459310531, "step": 179580 }, { "epoch": 50.97644053363611, "grad_norm": 0.18746885657310486, "learning_rate": 4.904428044280443e-05, "loss": 0.0026110116392374038, "step": 179590 }, { "epoch": 50.979279023559464, "grad_norm": 0.0039805639535188675, "learning_rate": 4.904144195288107e-05, "loss": 0.00043531470000743864, "step": 179600 }, { "epoch": 50.98211751348283, "grad_norm": 1.1307872533798218, "learning_rate": 4.903860346295771e-05, "loss": 0.0013998165726661683, "step": 179610 }, { "epoch": 50.98495600340619, "grad_norm": 0.27946752309799194, "learning_rate": 4.903576497303435e-05, "loss": 0.0006341461092233657, "step": 179620 }, { "epoch": 50.987794493329545, "grad_norm": 0.011028838343918324, "learning_rate": 4.903292648311099e-05, "loss": 0.00027085598558187487, "step": 179630 }, { "epoch": 50.99063298325291, "grad_norm": 0.014275617897510529, "learning_rate": 4.903008799318763e-05, "loss": 0.001770283654332161, "step": 179640 }, { "epoch": 50.99347147317627, "grad_norm": 0.018380673602223396, "learning_rate": 4.9027249503264264e-05, "loss": 0.00014509093016386033, "step": 179650 }, { "epoch": 50.996309963099634, "grad_norm": 0.06115612015128136, "learning_rate": 4.9024411013340906e-05, "loss": 0.0006128611043095588, "step": 179660 }, { "epoch": 50.99914845302299, "grad_norm": 0.0032879537902772427, "learning_rate": 4.902157252341754e-05, "loss": 0.0002945601940155029, "step": 179670 }, { "epoch": 51.00198694294635, "grad_norm": 0.015535629354417324, "learning_rate": 4.901873403349418e-05, "loss": 0.00024250182323157788, "step": 179680 }, { "epoch": 51.004825432869715, "grad_norm": 0.07265198975801468, "learning_rate": 4.901589554357082e-05, "loss": 0.0002588614821434021, "step": 179690 }, { "epoch": 51.00766392279307, "grad_norm": 0.006979939993470907, "learning_rate": 4.901305705364746e-05, "loss": 0.00034997109323740003, "step": 179700 }, { "epoch": 51.010502412716434, "grad_norm": 0.01846596971154213, "learning_rate": 4.9010218563724106e-05, "loss": 0.00020672641694545745, "step": 179710 }, { "epoch": 51.0133409026398, "grad_norm": 0.02632579393684864, "learning_rate": 4.900738007380074e-05, "loss": 0.00048722755163908, "step": 179720 }, { "epoch": 51.01617939256316, "grad_norm": 0.18845035135746002, "learning_rate": 4.9004541583877375e-05, "loss": 0.00039603691548109057, "step": 179730 }, { "epoch": 51.019017882486516, "grad_norm": 0.4031141400337219, "learning_rate": 4.900170309395402e-05, "loss": 0.00017477069050073624, "step": 179740 }, { "epoch": 51.02185637240988, "grad_norm": 0.2109980434179306, "learning_rate": 4.899886460403066e-05, "loss": 0.00027760639786720274, "step": 179750 }, { "epoch": 51.02469486233324, "grad_norm": 0.010779169388115406, "learning_rate": 4.89960261141073e-05, "loss": 9.042434394359588e-05, "step": 179760 }, { "epoch": 51.0275333522566, "grad_norm": 0.0365447960793972, "learning_rate": 4.8993187624183934e-05, "loss": 0.00010203681886196136, "step": 179770 }, { "epoch": 51.03037184217996, "grad_norm": 0.006328749004751444, "learning_rate": 4.8990349134260575e-05, "loss": 0.00034064240753650665, "step": 179780 }, { "epoch": 51.03321033210332, "grad_norm": 0.016456831246614456, "learning_rate": 4.8987510644337216e-05, "loss": 6.719231605529785e-05, "step": 179790 }, { "epoch": 51.03604882202668, "grad_norm": 0.10145506262779236, "learning_rate": 4.898467215441385e-05, "loss": 0.0002528112381696701, "step": 179800 }, { "epoch": 51.03888731195004, "grad_norm": 0.01077834703028202, "learning_rate": 4.898183366449049e-05, "loss": 0.0002768928185105324, "step": 179810 }, { "epoch": 51.041725801873405, "grad_norm": 0.004873296711593866, "learning_rate": 4.8978995174567134e-05, "loss": 0.00031968224793672563, "step": 179820 }, { "epoch": 51.04456429179677, "grad_norm": 0.028611335903406143, "learning_rate": 4.897615668464377e-05, "loss": 9.012985974550247e-05, "step": 179830 }, { "epoch": 51.04740278172012, "grad_norm": 0.0036443218123167753, "learning_rate": 4.8973318194720416e-05, "loss": 0.0003278551623225212, "step": 179840 }, { "epoch": 51.050241271643486, "grad_norm": 0.004815959371626377, "learning_rate": 4.897047970479705e-05, "loss": 0.00016498100012540817, "step": 179850 }, { "epoch": 51.05307976156685, "grad_norm": 0.03127272427082062, "learning_rate": 4.8967641214873686e-05, "loss": 0.004517809301614761, "step": 179860 }, { "epoch": 51.055918251490205, "grad_norm": 0.02662700042128563, "learning_rate": 4.896480272495033e-05, "loss": 0.0004983149468898773, "step": 179870 }, { "epoch": 51.05875674141357, "grad_norm": 0.007584971841424704, "learning_rate": 4.896196423502697e-05, "loss": 0.0005077928304672241, "step": 179880 }, { "epoch": 51.06159523133693, "grad_norm": 0.04191885516047478, "learning_rate": 4.895912574510361e-05, "loss": 0.0008306512609124184, "step": 179890 }, { "epoch": 51.064433721260286, "grad_norm": 0.5322880744934082, "learning_rate": 4.8956287255180244e-05, "loss": 0.0010651836171746254, "step": 179900 }, { "epoch": 51.06727221118365, "grad_norm": 0.022372450679540634, "learning_rate": 4.8953448765256886e-05, "loss": 0.00048083458095788957, "step": 179910 }, { "epoch": 51.07011070110701, "grad_norm": 0.10790558904409409, "learning_rate": 4.895061027533353e-05, "loss": 0.0004332292824983597, "step": 179920 }, { "epoch": 51.072949191030375, "grad_norm": 0.07608035206794739, "learning_rate": 4.894777178541016e-05, "loss": 0.005403590947389602, "step": 179930 }, { "epoch": 51.07578768095373, "grad_norm": 8.405282020568848, "learning_rate": 4.89449332954868e-05, "loss": 0.006927412003278732, "step": 179940 }, { "epoch": 51.078626170877094, "grad_norm": 0.011767646297812462, "learning_rate": 4.8942094805563445e-05, "loss": 0.00023028776049613953, "step": 179950 }, { "epoch": 51.08146466080046, "grad_norm": 0.02937108464539051, "learning_rate": 4.893925631564008e-05, "loss": 0.0019316550344228745, "step": 179960 }, { "epoch": 51.08430315072381, "grad_norm": 0.4426285922527313, "learning_rate": 4.893641782571672e-05, "loss": 0.01119036003947258, "step": 179970 }, { "epoch": 51.087141640647175, "grad_norm": 0.07856959849596024, "learning_rate": 4.893357933579336e-05, "loss": 0.005194760113954544, "step": 179980 }, { "epoch": 51.08998013057054, "grad_norm": 0.053970109671354294, "learning_rate": 4.8930740845869996e-05, "loss": 0.001546274870634079, "step": 179990 }, { "epoch": 51.092818620493894, "grad_norm": 0.15672941505908966, "learning_rate": 4.892790235594664e-05, "loss": 0.0014833524823188783, "step": 180000 }, { "epoch": 51.092818620493894, "eval_accuracy": 0.97990716602022, "eval_loss": 0.0813787430524826, "eval_runtime": 36.7775, "eval_samples_per_second": 427.625, "eval_steps_per_second": 6.689, "step": 180000 }, { "epoch": 51.09565711041726, "grad_norm": 0.6907253265380859, "learning_rate": 4.892506386602328e-05, "loss": 0.0026443788781762125, "step": 180010 }, { "epoch": 51.09849560034062, "grad_norm": 0.46394240856170654, "learning_rate": 4.8922225376099914e-05, "loss": 0.002196582593023777, "step": 180020 }, { "epoch": 51.10133409026398, "grad_norm": 13.488675117492676, "learning_rate": 4.8919386886176555e-05, "loss": 0.003026919439435005, "step": 180030 }, { "epoch": 51.10417258018734, "grad_norm": 0.011296909302473068, "learning_rate": 4.8916548396253197e-05, "loss": 0.0003596633672714233, "step": 180040 }, { "epoch": 51.1070110701107, "grad_norm": 0.3446290194988251, "learning_rate": 4.891370990632984e-05, "loss": 0.0010158466175198555, "step": 180050 }, { "epoch": 51.109849560034064, "grad_norm": 0.0035746388603001833, "learning_rate": 4.891087141640647e-05, "loss": 0.0010903449729084969, "step": 180060 }, { "epoch": 51.11268804995742, "grad_norm": 0.018381942063570023, "learning_rate": 4.8908032926483114e-05, "loss": 0.0004896927624940872, "step": 180070 }, { "epoch": 51.11552653988078, "grad_norm": 0.013624325394630432, "learning_rate": 4.8905194436559755e-05, "loss": 0.000591176189482212, "step": 180080 }, { "epoch": 51.118365029804146, "grad_norm": 0.013983860611915588, "learning_rate": 4.890235594663639e-05, "loss": 0.005197717621922493, "step": 180090 }, { "epoch": 51.1212035197275, "grad_norm": 0.02479427680373192, "learning_rate": 4.889951745671303e-05, "loss": 0.002130427397787571, "step": 180100 }, { "epoch": 51.124042009650864, "grad_norm": 0.0043724337592720985, "learning_rate": 4.889667896678967e-05, "loss": 0.0001799141988158226, "step": 180110 }, { "epoch": 51.12688049957423, "grad_norm": 0.04990105703473091, "learning_rate": 4.889384047686631e-05, "loss": 0.00028492175042629244, "step": 180120 }, { "epoch": 51.12971898949759, "grad_norm": 1.119217872619629, "learning_rate": 4.889100198694295e-05, "loss": 0.00045188721269369126, "step": 180130 }, { "epoch": 51.132557479420946, "grad_norm": 0.23008142411708832, "learning_rate": 4.888816349701959e-05, "loss": 0.0006237102672457695, "step": 180140 }, { "epoch": 51.13539596934431, "grad_norm": 0.012487195432186127, "learning_rate": 4.8885325007096225e-05, "loss": 0.0003145536407828331, "step": 180150 }, { "epoch": 51.13823445926767, "grad_norm": 0.42015159130096436, "learning_rate": 4.8882486517172866e-05, "loss": 0.0020379900932312013, "step": 180160 }, { "epoch": 51.14107294919103, "grad_norm": 1.5830810070037842, "learning_rate": 4.887964802724951e-05, "loss": 0.002533311769366264, "step": 180170 }, { "epoch": 51.14391143911439, "grad_norm": 0.002293346682563424, "learning_rate": 4.887680953732615e-05, "loss": 0.0008530288934707642, "step": 180180 }, { "epoch": 51.14674992903775, "grad_norm": 0.022629713639616966, "learning_rate": 4.887397104740278e-05, "loss": 0.004350553825497627, "step": 180190 }, { "epoch": 51.149588418961116, "grad_norm": 0.13498817384243011, "learning_rate": 4.887113255747942e-05, "loss": 0.0002024821937084198, "step": 180200 }, { "epoch": 51.15242690888447, "grad_norm": 0.035899996757507324, "learning_rate": 4.8868294067556066e-05, "loss": 0.00047992002218961717, "step": 180210 }, { "epoch": 51.155265398807835, "grad_norm": 0.6929156184196472, "learning_rate": 4.88654555776327e-05, "loss": 0.003365549445152283, "step": 180220 }, { "epoch": 51.1581038887312, "grad_norm": 0.08379805088043213, "learning_rate": 4.886261708770934e-05, "loss": 0.0004920007660984993, "step": 180230 }, { "epoch": 51.16094237865455, "grad_norm": 0.2904570400714874, "learning_rate": 4.8859778597785983e-05, "loss": 0.0003342419862747192, "step": 180240 }, { "epoch": 51.163780868577916, "grad_norm": 2.0471649169921875, "learning_rate": 4.885694010786262e-05, "loss": 0.00233907550573349, "step": 180250 }, { "epoch": 51.16661935850128, "grad_norm": 0.07139784842729568, "learning_rate": 4.885410161793926e-05, "loss": 0.008347225934267044, "step": 180260 }, { "epoch": 51.169457848424635, "grad_norm": 0.04072858393192291, "learning_rate": 4.88512631280159e-05, "loss": 0.0001543872058391571, "step": 180270 }, { "epoch": 51.172296338348, "grad_norm": 0.020023638382554054, "learning_rate": 4.8848424638092535e-05, "loss": 0.01790345162153244, "step": 180280 }, { "epoch": 51.17513482827136, "grad_norm": 0.03488869220018387, "learning_rate": 4.884558614816918e-05, "loss": 0.004289426282048226, "step": 180290 }, { "epoch": 51.177973318194724, "grad_norm": 2.236534833908081, "learning_rate": 4.884274765824581e-05, "loss": 0.0008053762838244438, "step": 180300 }, { "epoch": 51.18081180811808, "grad_norm": 0.22639253735542297, "learning_rate": 4.883990916832246e-05, "loss": 0.0004237344488501549, "step": 180310 }, { "epoch": 51.18365029804144, "grad_norm": 0.06501737982034683, "learning_rate": 4.8837070678399094e-05, "loss": 0.0016047380864620208, "step": 180320 }, { "epoch": 51.186488787964805, "grad_norm": 0.009642795659601688, "learning_rate": 4.883423218847573e-05, "loss": 0.00018676016479730607, "step": 180330 }, { "epoch": 51.18932727788816, "grad_norm": 0.006783571094274521, "learning_rate": 4.883139369855238e-05, "loss": 0.0003836601972579956, "step": 180340 }, { "epoch": 51.192165767811524, "grad_norm": 0.011815792880952358, "learning_rate": 4.882855520862901e-05, "loss": 0.00039806254208087923, "step": 180350 }, { "epoch": 51.19500425773489, "grad_norm": 0.06277616322040558, "learning_rate": 4.882571671870565e-05, "loss": 0.0011429861187934875, "step": 180360 }, { "epoch": 51.19784274765824, "grad_norm": 0.37347713112831116, "learning_rate": 4.8822878228782294e-05, "loss": 0.0009090131148695946, "step": 180370 }, { "epoch": 51.200681237581605, "grad_norm": 0.014797259122133255, "learning_rate": 4.882003973885893e-05, "loss": 0.00020438991487026215, "step": 180380 }, { "epoch": 51.20351972750497, "grad_norm": 0.4140276312828064, "learning_rate": 4.881720124893557e-05, "loss": 0.0013041844591498374, "step": 180390 }, { "epoch": 51.20635821742833, "grad_norm": 1.3986579179763794, "learning_rate": 4.8814362759012205e-05, "loss": 0.00046810805797576905, "step": 180400 }, { "epoch": 51.20919670735169, "grad_norm": 0.01123049110174179, "learning_rate": 4.8811524269088846e-05, "loss": 0.011978434771299363, "step": 180410 }, { "epoch": 51.21203519727505, "grad_norm": 23.64059829711914, "learning_rate": 4.880868577916549e-05, "loss": 0.004934072494506836, "step": 180420 }, { "epoch": 51.21487368719841, "grad_norm": 0.15461134910583496, "learning_rate": 4.880584728924212e-05, "loss": 0.0003268718719482422, "step": 180430 }, { "epoch": 51.21771217712177, "grad_norm": 0.019366364926099777, "learning_rate": 4.8803008799318763e-05, "loss": 0.00024615060538053514, "step": 180440 }, { "epoch": 51.22055066704513, "grad_norm": 0.086615189909935, "learning_rate": 4.8800170309395405e-05, "loss": 0.000323539599776268, "step": 180450 }, { "epoch": 51.223389156968494, "grad_norm": 11.886871337890625, "learning_rate": 4.879733181947204e-05, "loss": 0.005815085023641586, "step": 180460 }, { "epoch": 51.22622764689185, "grad_norm": 0.013870378956198692, "learning_rate": 4.879449332954869e-05, "loss": 0.0007988071069121361, "step": 180470 }, { "epoch": 51.22906613681521, "grad_norm": 0.005727153737097979, "learning_rate": 4.879165483962532e-05, "loss": 0.0007539499551057815, "step": 180480 }, { "epoch": 51.231904626738576, "grad_norm": 0.11050159484148026, "learning_rate": 4.878881634970196e-05, "loss": 0.01290183961391449, "step": 180490 }, { "epoch": 51.23474311666194, "grad_norm": 0.024128984659910202, "learning_rate": 4.87859778597786e-05, "loss": 0.0010465646162629127, "step": 180500 }, { "epoch": 51.23474311666194, "eval_accuracy": 0.9832771666560692, "eval_loss": 0.06912874430418015, "eval_runtime": 45.6258, "eval_samples_per_second": 344.695, "eval_steps_per_second": 5.392, "step": 180500 }, { "epoch": 51.237581606585294, "grad_norm": 0.028332341462373734, "learning_rate": 4.878313936985524e-05, "loss": 0.002401738613843918, "step": 180510 }, { "epoch": 51.24042009650866, "grad_norm": 0.07142985612154007, "learning_rate": 4.878030087993188e-05, "loss": 0.00018204320222139358, "step": 180520 }, { "epoch": 51.24325858643202, "grad_norm": 0.041881170123815536, "learning_rate": 4.8777462390008516e-05, "loss": 0.00029032640159130094, "step": 180530 }, { "epoch": 51.246097076355376, "grad_norm": 0.020077915862202644, "learning_rate": 4.877462390008516e-05, "loss": 8.571427315473557e-05, "step": 180540 }, { "epoch": 51.24893556627874, "grad_norm": 0.9170270562171936, "learning_rate": 4.87717854101618e-05, "loss": 0.00034840535372495653, "step": 180550 }, { "epoch": 51.2517740562021, "grad_norm": 0.054025523364543915, "learning_rate": 4.876894692023843e-05, "loss": 0.000287320651113987, "step": 180560 }, { "epoch": 51.254612546125465, "grad_norm": 0.0169900543987751, "learning_rate": 4.8766108430315074e-05, "loss": 0.000379122793674469, "step": 180570 }, { "epoch": 51.25745103604882, "grad_norm": 0.007658412680029869, "learning_rate": 4.8763269940391716e-05, "loss": 0.0012271642684936524, "step": 180580 }, { "epoch": 51.26028952597218, "grad_norm": 0.062451526522636414, "learning_rate": 4.876043145046835e-05, "loss": 0.0005726903676986694, "step": 180590 }, { "epoch": 51.263128015895546, "grad_norm": 0.14610551297664642, "learning_rate": 4.875759296054499e-05, "loss": 0.0007543221116065979, "step": 180600 }, { "epoch": 51.2659665058189, "grad_norm": 0.021060442551970482, "learning_rate": 4.875475447062163e-05, "loss": 0.0016336021944880486, "step": 180610 }, { "epoch": 51.268804995742265, "grad_norm": 0.07176677137613297, "learning_rate": 4.875191598069827e-05, "loss": 0.0011267853900790215, "step": 180620 }, { "epoch": 51.27164348566563, "grad_norm": 2.098573923110962, "learning_rate": 4.874907749077491e-05, "loss": 0.0012720232829451562, "step": 180630 }, { "epoch": 51.274481975588984, "grad_norm": 0.022294774651527405, "learning_rate": 4.874623900085155e-05, "loss": 0.0005931377410888672, "step": 180640 }, { "epoch": 51.27732046551235, "grad_norm": 1.674778699874878, "learning_rate": 4.874340051092819e-05, "loss": 0.013150952756404877, "step": 180650 }, { "epoch": 51.28015895543571, "grad_norm": 0.34016817808151245, "learning_rate": 4.8740562021004826e-05, "loss": 0.002353581227362156, "step": 180660 }, { "epoch": 51.28299744535907, "grad_norm": 0.2319812774658203, "learning_rate": 4.873772353108147e-05, "loss": 0.0003409339115023613, "step": 180670 }, { "epoch": 51.28583593528243, "grad_norm": 0.18770049512386322, "learning_rate": 4.873488504115811e-05, "loss": 0.0014117268845438957, "step": 180680 }, { "epoch": 51.28867442520579, "grad_norm": 0.10342095792293549, "learning_rate": 4.8732046551234744e-05, "loss": 0.0004954451695084571, "step": 180690 }, { "epoch": 51.291512915129154, "grad_norm": 0.07115955650806427, "learning_rate": 4.8729208061311385e-05, "loss": 0.001740557886660099, "step": 180700 }, { "epoch": 51.29435140505251, "grad_norm": 0.7634807825088501, "learning_rate": 4.8726369571388026e-05, "loss": 0.0015783898532390594, "step": 180710 }, { "epoch": 51.29718989497587, "grad_norm": 0.032094795256853104, "learning_rate": 4.872353108146466e-05, "loss": 0.010688894242048264, "step": 180720 }, { "epoch": 51.300028384899235, "grad_norm": 1.8505420684814453, "learning_rate": 4.87206925915413e-05, "loss": 0.0018114905804395675, "step": 180730 }, { "epoch": 51.30286687482259, "grad_norm": 2.307677745819092, "learning_rate": 4.8717854101617944e-05, "loss": 0.004142354428768158, "step": 180740 }, { "epoch": 51.305705364745954, "grad_norm": 0.0015319200465455651, "learning_rate": 4.871501561169458e-05, "loss": 0.0035139992833137512, "step": 180750 }, { "epoch": 51.30854385466932, "grad_norm": 0.05981353670358658, "learning_rate": 4.871217712177122e-05, "loss": 0.008780862390995025, "step": 180760 }, { "epoch": 51.31138234459268, "grad_norm": 0.1633567363023758, "learning_rate": 4.870933863184786e-05, "loss": 0.001862475834786892, "step": 180770 }, { "epoch": 51.314220834516036, "grad_norm": 0.28252655267715454, "learning_rate": 4.87065001419245e-05, "loss": 0.0011407917365431786, "step": 180780 }, { "epoch": 51.3170593244394, "grad_norm": 0.7364253997802734, "learning_rate": 4.870366165200114e-05, "loss": 0.006165556609630585, "step": 180790 }, { "epoch": 51.31989781436276, "grad_norm": 0.123813696205616, "learning_rate": 4.870082316207777e-05, "loss": 0.00029992591589689255, "step": 180800 }, { "epoch": 51.32273630428612, "grad_norm": 0.19602690637111664, "learning_rate": 4.869798467215442e-05, "loss": 0.0013063732534646989, "step": 180810 }, { "epoch": 51.32557479420948, "grad_norm": 0.18182505667209625, "learning_rate": 4.8695146182231054e-05, "loss": 0.0025439277291297913, "step": 180820 }, { "epoch": 51.32841328413284, "grad_norm": 0.297616183757782, "learning_rate": 4.8692307692307696e-05, "loss": 0.0012201763689517974, "step": 180830 }, { "epoch": 51.3312517740562, "grad_norm": 0.05996781587600708, "learning_rate": 4.868946920238434e-05, "loss": 0.010211491584777832, "step": 180840 }, { "epoch": 51.33409026397956, "grad_norm": 0.016495665535330772, "learning_rate": 4.868663071246097e-05, "loss": 0.004931270331144333, "step": 180850 }, { "epoch": 51.336928753902924, "grad_norm": 0.09550175815820694, "learning_rate": 4.868379222253761e-05, "loss": 0.0005275975912809372, "step": 180860 }, { "epoch": 51.33976724382629, "grad_norm": 0.005717665422707796, "learning_rate": 4.8680953732614254e-05, "loss": 0.0005700625479221344, "step": 180870 }, { "epoch": 51.34260573374964, "grad_norm": 0.15120646357536316, "learning_rate": 4.867811524269089e-05, "loss": 0.011209557950496673, "step": 180880 }, { "epoch": 51.345444223673006, "grad_norm": 0.06654311716556549, "learning_rate": 4.867527675276753e-05, "loss": 0.0005420686677098275, "step": 180890 }, { "epoch": 51.34828271359637, "grad_norm": 0.010868842713534832, "learning_rate": 4.8672438262844165e-05, "loss": 0.0034493491053581237, "step": 180900 }, { "epoch": 51.351121203519725, "grad_norm": 0.159968301653862, "learning_rate": 4.8669599772920806e-05, "loss": 0.0007274238392710686, "step": 180910 }, { "epoch": 51.35395969344309, "grad_norm": 0.010260616429150105, "learning_rate": 4.866676128299745e-05, "loss": 0.0009080398827791214, "step": 180920 }, { "epoch": 51.35679818336645, "grad_norm": 0.3320360481739044, "learning_rate": 4.866392279307408e-05, "loss": 0.0003233151510357857, "step": 180930 }, { "epoch": 51.35963667328981, "grad_norm": 0.029749900102615356, "learning_rate": 4.866108430315073e-05, "loss": 0.0006294451653957367, "step": 180940 }, { "epoch": 51.36247516321317, "grad_norm": 0.20113421976566315, "learning_rate": 4.8658245813227365e-05, "loss": 0.0032470196485519407, "step": 180950 }, { "epoch": 51.36531365313653, "grad_norm": 0.03074519895017147, "learning_rate": 4.8655407323304e-05, "loss": 0.00019111894071102142, "step": 180960 }, { "epoch": 51.368152143059895, "grad_norm": 0.06712842732667923, "learning_rate": 4.865256883338065e-05, "loss": 0.00016612224280834197, "step": 180970 }, { "epoch": 51.37099063298325, "grad_norm": 0.011298414319753647, "learning_rate": 4.864973034345728e-05, "loss": 0.000124216265976429, "step": 180980 }, { "epoch": 51.37382912290661, "grad_norm": 0.04710685461759567, "learning_rate": 4.8646891853533924e-05, "loss": 0.00013313237577676772, "step": 180990 }, { "epoch": 51.376667612829976, "grad_norm": 0.16910472512245178, "learning_rate": 4.864405336361056e-05, "loss": 0.00042001549154520036, "step": 181000 }, { "epoch": 51.376667612829976, "eval_accuracy": 0.9834679214090418, "eval_loss": 0.07491544634103775, "eval_runtime": 68.1584, "eval_samples_per_second": 230.742, "eval_steps_per_second": 3.609, "step": 181000 }, { "epoch": 51.37950610275333, "grad_norm": 4.163790225982666, "learning_rate": 4.86412148736872e-05, "loss": 0.0015105636790394782, "step": 181010 }, { "epoch": 51.382344592676695, "grad_norm": 0.014922333881258965, "learning_rate": 4.863837638376384e-05, "loss": 0.0002088889479637146, "step": 181020 }, { "epoch": 51.38518308260006, "grad_norm": 3.011754274368286, "learning_rate": 4.8635537893840476e-05, "loss": 0.007263009250164032, "step": 181030 }, { "epoch": 51.38802157252342, "grad_norm": 1.1216708421707153, "learning_rate": 4.863269940391712e-05, "loss": 0.011853066086769105, "step": 181040 }, { "epoch": 51.39086006244678, "grad_norm": 0.3283454477787018, "learning_rate": 4.862986091399376e-05, "loss": 0.0004145026206970215, "step": 181050 }, { "epoch": 51.39369855237014, "grad_norm": 0.03661312162876129, "learning_rate": 4.862702242407039e-05, "loss": 0.003011048771440983, "step": 181060 }, { "epoch": 51.3965370422935, "grad_norm": 0.007874730043113232, "learning_rate": 4.862418393414704e-05, "loss": 0.00015082452446222306, "step": 181070 }, { "epoch": 51.39937553221686, "grad_norm": 0.3112446963787079, "learning_rate": 4.862162929321601e-05, "loss": 0.007867148518562317, "step": 181080 }, { "epoch": 51.40221402214022, "grad_norm": 0.047347817569971085, "learning_rate": 4.861879080329265e-05, "loss": 0.00036245230585336686, "step": 181090 }, { "epoch": 51.405052512063584, "grad_norm": 0.45238226652145386, "learning_rate": 4.861595231336929e-05, "loss": 0.0007823603227734566, "step": 181100 }, { "epoch": 51.40789100198694, "grad_norm": 5.21401834487915, "learning_rate": 4.8613113823445925e-05, "loss": 0.005320234969258308, "step": 181110 }, { "epoch": 51.4107294919103, "grad_norm": 0.7559774518013, "learning_rate": 4.861027533352257e-05, "loss": 0.0014086185023188592, "step": 181120 }, { "epoch": 51.413567981833665, "grad_norm": 0.022652240470051765, "learning_rate": 4.860743684359921e-05, "loss": 0.00017260536551475525, "step": 181130 }, { "epoch": 51.41640647175703, "grad_norm": 0.4337335228919983, "learning_rate": 4.860459835367584e-05, "loss": 0.0019842227920889854, "step": 181140 }, { "epoch": 51.419244961680384, "grad_norm": 0.15239965915679932, "learning_rate": 4.860175986375249e-05, "loss": 0.00028395820409059523, "step": 181150 }, { "epoch": 51.42208345160375, "grad_norm": 0.24019671976566315, "learning_rate": 4.8598921373829125e-05, "loss": 0.0006512686610221863, "step": 181160 }, { "epoch": 51.42492194152711, "grad_norm": 0.04920477420091629, "learning_rate": 4.8596082883905767e-05, "loss": 0.003197953850030899, "step": 181170 }, { "epoch": 51.427760431450466, "grad_norm": 0.1865280270576477, "learning_rate": 4.85932443939824e-05, "loss": 0.0010337302461266517, "step": 181180 }, { "epoch": 51.43059892137383, "grad_norm": 0.01043909601867199, "learning_rate": 4.859040590405904e-05, "loss": 0.0011025508865714073, "step": 181190 }, { "epoch": 51.43343741129719, "grad_norm": 0.007655365392565727, "learning_rate": 4.8587567414135684e-05, "loss": 0.0005174798890948296, "step": 181200 }, { "epoch": 51.43627590122055, "grad_norm": 0.427606999874115, "learning_rate": 4.858472892421232e-05, "loss": 0.0027486231178045274, "step": 181210 }, { "epoch": 51.43911439114391, "grad_norm": 0.06120625510811806, "learning_rate": 4.858189043428896e-05, "loss": 0.002879833057522774, "step": 181220 }, { "epoch": 51.44195288106727, "grad_norm": 0.016124000772833824, "learning_rate": 4.85790519443656e-05, "loss": 0.0027857553213834763, "step": 181230 }, { "epoch": 51.444791370990636, "grad_norm": 0.043916620314121246, "learning_rate": 4.8576213454442236e-05, "loss": 0.0028188690543174745, "step": 181240 }, { "epoch": 51.44762986091399, "grad_norm": 0.4950377941131592, "learning_rate": 4.8573374964518884e-05, "loss": 0.0015357997268438338, "step": 181250 }, { "epoch": 51.450468350837355, "grad_norm": 0.015519693493843079, "learning_rate": 4.857053647459552e-05, "loss": 0.00043325088918209074, "step": 181260 }, { "epoch": 51.45330684076072, "grad_norm": 0.1495392769575119, "learning_rate": 4.856769798467215e-05, "loss": 0.0010190941393375397, "step": 181270 }, { "epoch": 51.45614533068407, "grad_norm": 0.04143283888697624, "learning_rate": 4.8564859494748795e-05, "loss": 0.0007109979167580604, "step": 181280 }, { "epoch": 51.458983820607436, "grad_norm": 0.04086276516318321, "learning_rate": 4.8562021004825436e-05, "loss": 0.0035272590816020966, "step": 181290 }, { "epoch": 51.4618223105308, "grad_norm": 0.0791335180401802, "learning_rate": 4.855918251490208e-05, "loss": 0.0002785995602607727, "step": 181300 }, { "epoch": 51.464660800454155, "grad_norm": 2.4866302013397217, "learning_rate": 4.855634402497871e-05, "loss": 0.0006739288568496704, "step": 181310 }, { "epoch": 51.46749929037752, "grad_norm": 1.3097666501998901, "learning_rate": 4.855350553505535e-05, "loss": 0.0027167947962880136, "step": 181320 }, { "epoch": 51.47033778030088, "grad_norm": 0.023985302075743675, "learning_rate": 4.8550667045131995e-05, "loss": 0.00023569464683532716, "step": 181330 }, { "epoch": 51.47317627022424, "grad_norm": 0.02709297277033329, "learning_rate": 4.854782855520863e-05, "loss": 0.008522319048643113, "step": 181340 }, { "epoch": 51.4760147601476, "grad_norm": 0.22064745426177979, "learning_rate": 4.854499006528527e-05, "loss": 0.00905052199959755, "step": 181350 }, { "epoch": 51.47885325007096, "grad_norm": 0.0703018382191658, "learning_rate": 4.854215157536191e-05, "loss": 0.004931185394525528, "step": 181360 }, { "epoch": 51.481691739994325, "grad_norm": 3.4906766414642334, "learning_rate": 4.8539313085438547e-05, "loss": 0.0054199911653995516, "step": 181370 }, { "epoch": 51.48453022991768, "grad_norm": 0.07231257110834122, "learning_rate": 4.853647459551519e-05, "loss": 0.0060994036495685576, "step": 181380 }, { "epoch": 51.487368719841044, "grad_norm": 0.16288021206855774, "learning_rate": 4.853363610559183e-05, "loss": 0.0015440855175256729, "step": 181390 }, { "epoch": 51.49020720976441, "grad_norm": 0.01964697241783142, "learning_rate": 4.8530797615668464e-05, "loss": 0.00025652367621660234, "step": 181400 }, { "epoch": 51.49304569968777, "grad_norm": 0.09616731107234955, "learning_rate": 4.8527959125745105e-05, "loss": 0.000365879014134407, "step": 181410 }, { "epoch": 51.495884189611125, "grad_norm": 0.01533968560397625, "learning_rate": 4.852512063582175e-05, "loss": 0.000657520443201065, "step": 181420 }, { "epoch": 51.49872267953449, "grad_norm": 0.12906908988952637, "learning_rate": 4.852228214589838e-05, "loss": 0.000275455042719841, "step": 181430 }, { "epoch": 51.50156116945785, "grad_norm": 0.8040350079536438, "learning_rate": 4.851944365597502e-05, "loss": 0.0005903398618102073, "step": 181440 }, { "epoch": 51.50439965938121, "grad_norm": 0.0032069978769868612, "learning_rate": 4.8516605166051664e-05, "loss": 0.00033767689019441604, "step": 181450 }, { "epoch": 51.50723814930457, "grad_norm": 7.126280784606934, "learning_rate": 4.8513766676128305e-05, "loss": 0.001784094236791134, "step": 181460 }, { "epoch": 51.51007663922793, "grad_norm": 0.1468098908662796, "learning_rate": 4.851092818620494e-05, "loss": 0.00031558144837617874, "step": 181470 }, { "epoch": 51.51291512915129, "grad_norm": 0.004485203884541988, "learning_rate": 4.8508089696281575e-05, "loss": 0.00024930480867624283, "step": 181480 }, { "epoch": 51.51575361907465, "grad_norm": 0.0051109869964420795, "learning_rate": 4.850525120635822e-05, "loss": 0.00013273414224386216, "step": 181490 }, { "epoch": 51.518592108998014, "grad_norm": 0.16966843605041504, "learning_rate": 4.850241271643486e-05, "loss": 0.0001524878665804863, "step": 181500 }, { "epoch": 51.518592108998014, "eval_accuracy": 0.9822598079735487, "eval_loss": 0.07243403792381287, "eval_runtime": 45.1373, "eval_samples_per_second": 348.426, "eval_steps_per_second": 5.45, "step": 181500 }, { "epoch": 51.52143059892138, "grad_norm": 0.009116139262914658, "learning_rate": 4.84995742265115e-05, "loss": 0.00017643552273511888, "step": 181510 }, { "epoch": 51.52426908884473, "grad_norm": 0.005287639796733856, "learning_rate": 4.849673573658814e-05, "loss": 0.00012254472821950912, "step": 181520 }, { "epoch": 51.527107578768096, "grad_norm": 0.5076610445976257, "learning_rate": 4.8493897246664775e-05, "loss": 0.00040447823703289033, "step": 181530 }, { "epoch": 51.52994606869146, "grad_norm": 0.006555914878845215, "learning_rate": 4.8491058756741416e-05, "loss": 0.00026138313114643097, "step": 181540 }, { "epoch": 51.532784558614814, "grad_norm": 0.021124519407749176, "learning_rate": 4.848822026681806e-05, "loss": 0.00031262952834367754, "step": 181550 }, { "epoch": 51.53562304853818, "grad_norm": 0.013481332920491695, "learning_rate": 4.848538177689469e-05, "loss": 0.00011723190546035767, "step": 181560 }, { "epoch": 51.53846153846154, "grad_norm": 1.1680601835250854, "learning_rate": 4.8482543286971333e-05, "loss": 0.00035188980400562285, "step": 181570 }, { "epoch": 51.541300028384896, "grad_norm": 0.07266868650913239, "learning_rate": 4.847970479704797e-05, "loss": 0.0003941057249903679, "step": 181580 }, { "epoch": 51.54413851830826, "grad_norm": 0.16748861968517303, "learning_rate": 4.8476866307124616e-05, "loss": 0.0007473949342966079, "step": 181590 }, { "epoch": 51.54697700823162, "grad_norm": 1.2310738563537598, "learning_rate": 4.847402781720125e-05, "loss": 0.001342669129371643, "step": 181600 }, { "epoch": 51.549815498154985, "grad_norm": 0.09823717176914215, "learning_rate": 4.8471189327277885e-05, "loss": 0.0006542271003127098, "step": 181610 }, { "epoch": 51.55265398807834, "grad_norm": 0.049825333058834076, "learning_rate": 4.8468350837354534e-05, "loss": 0.00037668664008378984, "step": 181620 }, { "epoch": 51.5554924780017, "grad_norm": 0.3020535409450531, "learning_rate": 4.846551234743117e-05, "loss": 0.0021153220906853674, "step": 181630 }, { "epoch": 51.558330967925066, "grad_norm": 2.723722219467163, "learning_rate": 4.846267385750781e-05, "loss": 0.0021420205011963844, "step": 181640 }, { "epoch": 51.56116945784842, "grad_norm": 0.12901359796524048, "learning_rate": 4.845983536758445e-05, "loss": 0.0006101107224822045, "step": 181650 }, { "epoch": 51.564007947771785, "grad_norm": 0.048288170248270035, "learning_rate": 4.8456996877661085e-05, "loss": 0.0003862598910927773, "step": 181660 }, { "epoch": 51.56684643769515, "grad_norm": 0.033457010984420776, "learning_rate": 4.845415838773773e-05, "loss": 0.0018064329400658608, "step": 181670 }, { "epoch": 51.56968492761851, "grad_norm": 0.053446006029844284, "learning_rate": 4.845131989781436e-05, "loss": 0.0006125690415501595, "step": 181680 }, { "epoch": 51.572523417541866, "grad_norm": 3.929089069366455, "learning_rate": 4.8448481407891e-05, "loss": 0.003491602838039398, "step": 181690 }, { "epoch": 51.57536190746523, "grad_norm": 0.0563458576798439, "learning_rate": 4.8445642917967644e-05, "loss": 0.0023972855880856512, "step": 181700 }, { "epoch": 51.57820039738859, "grad_norm": 0.12576255202293396, "learning_rate": 4.844280442804428e-05, "loss": 0.0010721514001488686, "step": 181710 }, { "epoch": 51.58103888731195, "grad_norm": 4.1002044677734375, "learning_rate": 4.843996593812093e-05, "loss": 0.0019088437780737876, "step": 181720 }, { "epoch": 51.58387737723531, "grad_norm": 0.015928000211715698, "learning_rate": 4.843712744819756e-05, "loss": 0.005492843687534332, "step": 181730 }, { "epoch": 51.586715867158674, "grad_norm": 0.03169090300798416, "learning_rate": 4.8434288958274196e-05, "loss": 0.0011778144165873528, "step": 181740 }, { "epoch": 51.58955435708203, "grad_norm": 0.2942458987236023, "learning_rate": 4.8431450468350844e-05, "loss": 0.0011663420125842094, "step": 181750 }, { "epoch": 51.59239284700539, "grad_norm": 1.3005199432373047, "learning_rate": 4.842861197842748e-05, "loss": 0.0008841041475534439, "step": 181760 }, { "epoch": 51.595231336928755, "grad_norm": 0.0034293527714908123, "learning_rate": 4.842577348850412e-05, "loss": 0.0009131275117397308, "step": 181770 }, { "epoch": 51.59806982685212, "grad_norm": 0.07655858993530273, "learning_rate": 4.842293499858076e-05, "loss": 0.0035119153559207915, "step": 181780 }, { "epoch": 51.600908316775474, "grad_norm": 0.06944380700588226, "learning_rate": 4.8420096508657396e-05, "loss": 0.00036933254450559615, "step": 181790 }, { "epoch": 51.60374680669884, "grad_norm": 0.03487740457057953, "learning_rate": 4.841725801873404e-05, "loss": 0.0047565590590238575, "step": 181800 }, { "epoch": 51.6065852966222, "grad_norm": 0.009979678317904472, "learning_rate": 4.841441952881067e-05, "loss": 0.0004262872040271759, "step": 181810 }, { "epoch": 51.609423786545555, "grad_norm": 0.024565527215600014, "learning_rate": 4.8411581038887314e-05, "loss": 0.0004484517499804497, "step": 181820 }, { "epoch": 51.61226227646892, "grad_norm": 0.1225619912147522, "learning_rate": 4.8408742548963955e-05, "loss": 0.0006640886887907982, "step": 181830 }, { "epoch": 51.61510076639228, "grad_norm": 0.604719877243042, "learning_rate": 4.840590405904059e-05, "loss": 0.005428457260131836, "step": 181840 }, { "epoch": 51.61793925631564, "grad_norm": 0.7308973670005798, "learning_rate": 4.840306556911723e-05, "loss": 0.0015622902661561965, "step": 181850 }, { "epoch": 51.620777746239, "grad_norm": 0.024829300120472908, "learning_rate": 4.840022707919387e-05, "loss": 0.0025354070588946343, "step": 181860 }, { "epoch": 51.62361623616236, "grad_norm": 0.0035279912408441305, "learning_rate": 4.839738858927051e-05, "loss": 0.004454202204942703, "step": 181870 }, { "epoch": 51.626454726085726, "grad_norm": 0.08412385731935501, "learning_rate": 4.8394550099347155e-05, "loss": 0.0013804910704493523, "step": 181880 }, { "epoch": 51.62929321600908, "grad_norm": 0.01418780442327261, "learning_rate": 4.839171160942379e-05, "loss": 0.0006354525685310364, "step": 181890 }, { "epoch": 51.632131705932444, "grad_norm": 0.07605573534965515, "learning_rate": 4.8388873119500424e-05, "loss": 0.00044759437441825866, "step": 181900 }, { "epoch": 51.63497019585581, "grad_norm": 0.12307585775852203, "learning_rate": 4.8386034629577066e-05, "loss": 0.00023594163358211517, "step": 181910 }, { "epoch": 51.63780868577916, "grad_norm": 0.05307338386774063, "learning_rate": 4.838319613965371e-05, "loss": 0.000559818185865879, "step": 181920 }, { "epoch": 51.640647175702526, "grad_norm": 0.0594932921230793, "learning_rate": 4.838035764973035e-05, "loss": 0.0003274247050285339, "step": 181930 }, { "epoch": 51.64348566562589, "grad_norm": 0.8616644740104675, "learning_rate": 4.837751915980698e-05, "loss": 0.00025785136967897414, "step": 181940 }, { "epoch": 51.646324155549244, "grad_norm": 0.002087725093588233, "learning_rate": 4.8374680669883624e-05, "loss": 7.887687534093856e-05, "step": 181950 }, { "epoch": 51.64916264547261, "grad_norm": 5.163238048553467, "learning_rate": 4.8371842179960266e-05, "loss": 0.0034507390111684797, "step": 181960 }, { "epoch": 51.65200113539597, "grad_norm": 0.39698436856269836, "learning_rate": 4.83690036900369e-05, "loss": 0.006439575552940368, "step": 181970 }, { "epoch": 51.65483962531933, "grad_norm": 0.17869263887405396, "learning_rate": 4.836616520011354e-05, "loss": 0.0023475361987948416, "step": 181980 }, { "epoch": 51.65767811524269, "grad_norm": 0.012788558378815651, "learning_rate": 4.836332671019018e-05, "loss": 0.0005406951531767845, "step": 181990 }, { "epoch": 51.66051660516605, "grad_norm": 0.023373225703835487, "learning_rate": 4.836048822026682e-05, "loss": 0.00042643025517463684, "step": 182000 }, { "epoch": 51.66051660516605, "eval_accuracy": 0.9828320722324665, "eval_loss": 0.07272430509328842, "eval_runtime": 45.2592, "eval_samples_per_second": 347.487, "eval_steps_per_second": 5.435, "step": 182000 }, { "epoch": 51.663355095089415, "grad_norm": 0.7884178757667542, "learning_rate": 4.835764973034346e-05, "loss": 0.0019462250173091888, "step": 182010 }, { "epoch": 51.66619358501277, "grad_norm": 0.07478602975606918, "learning_rate": 4.83548112404201e-05, "loss": 0.0009512431919574737, "step": 182020 }, { "epoch": 51.66903207493613, "grad_norm": 0.13866542279720306, "learning_rate": 4.8351972750496735e-05, "loss": 0.005365577340126037, "step": 182030 }, { "epoch": 51.671870564859496, "grad_norm": 0.6749101281166077, "learning_rate": 4.8349134260573376e-05, "loss": 0.004835335910320282, "step": 182040 }, { "epoch": 51.67470905478285, "grad_norm": 3.6102569103240967, "learning_rate": 4.834629577065002e-05, "loss": 0.001209740899503231, "step": 182050 }, { "epoch": 51.677547544706215, "grad_norm": 0.11554107069969177, "learning_rate": 4.834345728072666e-05, "loss": 0.0006127974018454551, "step": 182060 }, { "epoch": 51.68038603462958, "grad_norm": 2.7747321128845215, "learning_rate": 4.8340618790803294e-05, "loss": 0.0011236954480409623, "step": 182070 }, { "epoch": 51.68322452455294, "grad_norm": 0.35607287287712097, "learning_rate": 4.8337780300879935e-05, "loss": 0.0009669063612818718, "step": 182080 }, { "epoch": 51.6860630144763, "grad_norm": 0.015576550737023354, "learning_rate": 4.8334941810956576e-05, "loss": 0.0036664292216300963, "step": 182090 }, { "epoch": 51.68890150439966, "grad_norm": 0.024622738361358643, "learning_rate": 4.833210332103321e-05, "loss": 0.006873185187578202, "step": 182100 }, { "epoch": 51.69173999432302, "grad_norm": 0.05521416291594505, "learning_rate": 4.832926483110985e-05, "loss": 0.0017897427082061768, "step": 182110 }, { "epoch": 51.69457848424638, "grad_norm": 0.754984974861145, "learning_rate": 4.8326426341186494e-05, "loss": 0.002592265233397484, "step": 182120 }, { "epoch": 51.69741697416974, "grad_norm": 0.16624091565608978, "learning_rate": 4.832358785126313e-05, "loss": 0.0005876729264855385, "step": 182130 }, { "epoch": 51.700255464093104, "grad_norm": 7.201970100402832, "learning_rate": 4.832074936133977e-05, "loss": 0.004020648822188377, "step": 182140 }, { "epoch": 51.70309395401647, "grad_norm": 0.3087092638015747, "learning_rate": 4.831791087141641e-05, "loss": 0.0013429127633571626, "step": 182150 }, { "epoch": 51.70593244393982, "grad_norm": 0.010258601047098637, "learning_rate": 4.8315072381493046e-05, "loss": 0.006605461239814758, "step": 182160 }, { "epoch": 51.708770933863185, "grad_norm": 0.023155706003308296, "learning_rate": 4.831223389156969e-05, "loss": 0.004018225520849228, "step": 182170 }, { "epoch": 51.71160942378655, "grad_norm": 0.021818215027451515, "learning_rate": 4.830939540164633e-05, "loss": 0.0015736553817987443, "step": 182180 }, { "epoch": 51.714447913709904, "grad_norm": 2.6289544105529785, "learning_rate": 4.830655691172297e-05, "loss": 0.0009970763698220254, "step": 182190 }, { "epoch": 51.71728640363327, "grad_norm": 11.049707412719727, "learning_rate": 4.8303718421799605e-05, "loss": 0.0038177695125341414, "step": 182200 }, { "epoch": 51.72012489355663, "grad_norm": 0.1516902595758438, "learning_rate": 4.830087993187624e-05, "loss": 0.0007951620966196061, "step": 182210 }, { "epoch": 51.722963383479986, "grad_norm": 0.03143930435180664, "learning_rate": 4.829804144195289e-05, "loss": 0.0031747549772262575, "step": 182220 }, { "epoch": 51.72580187340335, "grad_norm": 0.01598392426967621, "learning_rate": 4.829520295202952e-05, "loss": 0.0008220134302973747, "step": 182230 }, { "epoch": 51.72864036332671, "grad_norm": 2.6038875579833984, "learning_rate": 4.829236446210616e-05, "loss": 0.002396497130393982, "step": 182240 }, { "epoch": 51.731478853250074, "grad_norm": 0.05570011958479881, "learning_rate": 4.8289525972182805e-05, "loss": 0.003613603860139847, "step": 182250 }, { "epoch": 51.73431734317343, "grad_norm": 3.8909778594970703, "learning_rate": 4.828668748225944e-05, "loss": 0.001528846099972725, "step": 182260 }, { "epoch": 51.73715583309679, "grad_norm": 0.07329601049423218, "learning_rate": 4.828384899233608e-05, "loss": 0.001365932822227478, "step": 182270 }, { "epoch": 51.739994323020156, "grad_norm": 0.26005417108535767, "learning_rate": 4.828101050241272e-05, "loss": 0.00030077137053012847, "step": 182280 }, { "epoch": 51.74283281294351, "grad_norm": 0.02545754425227642, "learning_rate": 4.8278172012489357e-05, "loss": 0.0026941444724798203, "step": 182290 }, { "epoch": 51.745671302866874, "grad_norm": 0.1750079244375229, "learning_rate": 4.8275333522566e-05, "loss": 0.0074586614966392515, "step": 182300 }, { "epoch": 51.74850979279024, "grad_norm": 0.010293406434357166, "learning_rate": 4.827249503264263e-05, "loss": 0.0012641586363315582, "step": 182310 }, { "epoch": 51.75134828271359, "grad_norm": 0.007529971189796925, "learning_rate": 4.8269656542719274e-05, "loss": 0.00046210847795009614, "step": 182320 }, { "epoch": 51.754186772636956, "grad_norm": 0.02943962626159191, "learning_rate": 4.8266818052795915e-05, "loss": 0.0004169780761003494, "step": 182330 }, { "epoch": 51.75702526256032, "grad_norm": 0.044608499854803085, "learning_rate": 4.826397956287255e-05, "loss": 0.0001300206407904625, "step": 182340 }, { "epoch": 51.75986375248368, "grad_norm": 0.5301635265350342, "learning_rate": 4.82611410729492e-05, "loss": 0.0021256355568766592, "step": 182350 }, { "epoch": 51.76270224240704, "grad_norm": 0.07393375039100647, "learning_rate": 4.825830258302583e-05, "loss": 0.0021904367953538896, "step": 182360 }, { "epoch": 51.7655407323304, "grad_norm": 0.04221870005130768, "learning_rate": 4.825546409310247e-05, "loss": 0.0010221535339951515, "step": 182370 }, { "epoch": 51.76837922225376, "grad_norm": 0.1656215637922287, "learning_rate": 4.8252625603179115e-05, "loss": 0.0016012663021683694, "step": 182380 }, { "epoch": 51.77121771217712, "grad_norm": 7.8316521644592285, "learning_rate": 4.824978711325575e-05, "loss": 0.006246248632669449, "step": 182390 }, { "epoch": 51.77405620210048, "grad_norm": 0.052908916026353836, "learning_rate": 4.824694862333239e-05, "loss": 0.00024696104228496554, "step": 182400 }, { "epoch": 51.776894692023845, "grad_norm": 0.15995943546295166, "learning_rate": 4.8244110133409026e-05, "loss": 0.003679049015045166, "step": 182410 }, { "epoch": 51.7797331819472, "grad_norm": 4.762759685516357, "learning_rate": 4.824127164348567e-05, "loss": 0.0010506832972168923, "step": 182420 }, { "epoch": 51.78257167187056, "grad_norm": 0.03239697962999344, "learning_rate": 4.823843315356231e-05, "loss": 0.0008323505520820617, "step": 182430 }, { "epoch": 51.785410161793926, "grad_norm": 0.14124150574207306, "learning_rate": 4.823559466363894e-05, "loss": 0.0031724303960800173, "step": 182440 }, { "epoch": 51.78824865171729, "grad_norm": 7.413044452667236, "learning_rate": 4.8232756173715585e-05, "loss": 0.008572220802307129, "step": 182450 }, { "epoch": 51.791087141640645, "grad_norm": 0.04794827848672867, "learning_rate": 4.8229917683792226e-05, "loss": 0.008319950848817825, "step": 182460 }, { "epoch": 51.79392563156401, "grad_norm": 0.09469295293092728, "learning_rate": 4.822707919386886e-05, "loss": 0.0007368423044681549, "step": 182470 }, { "epoch": 51.79676412148737, "grad_norm": 0.2503780424594879, "learning_rate": 4.822424070394551e-05, "loss": 0.004118544608354568, "step": 182480 }, { "epoch": 51.79960261141073, "grad_norm": 0.019550221040844917, "learning_rate": 4.8221402214022143e-05, "loss": 0.0030051158741116525, "step": 182490 }, { "epoch": 51.80244110133409, "grad_norm": 0.018396612256765366, "learning_rate": 4.821856372409878e-05, "loss": 0.003979958221316337, "step": 182500 }, { "epoch": 51.80244110133409, "eval_accuracy": 0.9804158453614803, "eval_loss": 0.07946901023387909, "eval_runtime": 42.3289, "eval_samples_per_second": 371.543, "eval_steps_per_second": 5.812, "step": 182500 }, { "epoch": 51.80527959125745, "grad_norm": 0.00527881970629096, "learning_rate": 4.821572523417542e-05, "loss": 0.006006637588143349, "step": 182510 }, { "epoch": 51.808118081180815, "grad_norm": 0.002578652696684003, "learning_rate": 4.821288674425206e-05, "loss": 0.003195345401763916, "step": 182520 }, { "epoch": 51.81095657110417, "grad_norm": 0.22165748476982117, "learning_rate": 4.82100482543287e-05, "loss": 0.0032996371388435363, "step": 182530 }, { "epoch": 51.813795061027534, "grad_norm": 0.024906687438488007, "learning_rate": 4.820720976440534e-05, "loss": 0.0006811190396547318, "step": 182540 }, { "epoch": 51.8166335509509, "grad_norm": 0.004656089935451746, "learning_rate": 4.820437127448198e-05, "loss": 0.0020625103265047074, "step": 182550 }, { "epoch": 51.81947204087425, "grad_norm": 0.017659589648246765, "learning_rate": 4.820153278455862e-05, "loss": 0.007335825264453888, "step": 182560 }, { "epoch": 51.822310530797616, "grad_norm": 0.022357851266860962, "learning_rate": 4.8198694294635254e-05, "loss": 0.002836567722260952, "step": 182570 }, { "epoch": 51.82514902072098, "grad_norm": 0.02990630641579628, "learning_rate": 4.8195855804711895e-05, "loss": 0.0004434786736965179, "step": 182580 }, { "epoch": 51.827987510644334, "grad_norm": 1.2513577938079834, "learning_rate": 4.819301731478854e-05, "loss": 0.0010381106287240982, "step": 182590 }, { "epoch": 51.8308260005677, "grad_norm": 0.0348358154296875, "learning_rate": 4.819017882486517e-05, "loss": 0.004919740930199623, "step": 182600 }, { "epoch": 51.83366449049106, "grad_norm": 9.877551078796387, "learning_rate": 4.818734033494181e-05, "loss": 0.010529439896345139, "step": 182610 }, { "epoch": 51.83650298041442, "grad_norm": 0.014960972592234612, "learning_rate": 4.8184501845018454e-05, "loss": 0.0005695225670933723, "step": 182620 }, { "epoch": 51.83934147033778, "grad_norm": 0.016016079112887383, "learning_rate": 4.818166335509509e-05, "loss": 0.0010597143322229386, "step": 182630 }, { "epoch": 51.84217996026114, "grad_norm": 0.044424377381801605, "learning_rate": 4.817882486517173e-05, "loss": 0.0028307121247053146, "step": 182640 }, { "epoch": 51.845018450184504, "grad_norm": 0.13467425107955933, "learning_rate": 4.817598637524837e-05, "loss": 0.0011819468811154366, "step": 182650 }, { "epoch": 51.84785694010786, "grad_norm": 0.26799890398979187, "learning_rate": 4.817314788532501e-05, "loss": 0.002586286887526512, "step": 182660 }, { "epoch": 51.85069543003122, "grad_norm": 5.093226432800293, "learning_rate": 4.817030939540165e-05, "loss": 0.0023472214117646216, "step": 182670 }, { "epoch": 51.853533919954586, "grad_norm": 6.831262588500977, "learning_rate": 4.816747090547829e-05, "loss": 0.003980296850204468, "step": 182680 }, { "epoch": 51.85637240987794, "grad_norm": 0.8201576471328735, "learning_rate": 4.816463241555493e-05, "loss": 0.002818104811012745, "step": 182690 }, { "epoch": 51.859210899801305, "grad_norm": 0.21095213294029236, "learning_rate": 4.8161793925631565e-05, "loss": 0.001989252492785454, "step": 182700 }, { "epoch": 51.86204938972467, "grad_norm": 0.16685543954372406, "learning_rate": 4.8158955435708206e-05, "loss": 0.006299319863319397, "step": 182710 }, { "epoch": 51.86488787964803, "grad_norm": 0.13686378300189972, "learning_rate": 4.815611694578485e-05, "loss": 0.0028212105855345724, "step": 182720 }, { "epoch": 51.867726369571386, "grad_norm": 1.100784182548523, "learning_rate": 4.815327845586148e-05, "loss": 0.001128813624382019, "step": 182730 }, { "epoch": 51.87056485949475, "grad_norm": 0.003611004212871194, "learning_rate": 4.8150439965938124e-05, "loss": 0.0031026175245642664, "step": 182740 }, { "epoch": 51.87340334941811, "grad_norm": 0.0023195436224341393, "learning_rate": 4.8147601476014765e-05, "loss": 0.010812245309352875, "step": 182750 }, { "epoch": 51.87624183934147, "grad_norm": 0.19556090235710144, "learning_rate": 4.81447629860914e-05, "loss": 0.007228392362594605, "step": 182760 }, { "epoch": 51.87908032926483, "grad_norm": 0.11013808101415634, "learning_rate": 4.814192449616804e-05, "loss": 0.0006971884518861771, "step": 182770 }, { "epoch": 51.88191881918819, "grad_norm": 0.0009893561946228147, "learning_rate": 4.813908600624468e-05, "loss": 0.00033116452395915986, "step": 182780 }, { "epoch": 51.88475730911155, "grad_norm": 0.014332827180624008, "learning_rate": 4.813624751632132e-05, "loss": 0.0024618688970804215, "step": 182790 }, { "epoch": 51.88759579903491, "grad_norm": 0.047759342938661575, "learning_rate": 4.813340902639796e-05, "loss": 0.0004285367205739021, "step": 182800 }, { "epoch": 51.890434288958275, "grad_norm": 0.03728952631354332, "learning_rate": 4.813057053647459e-05, "loss": 0.0003861796110868454, "step": 182810 }, { "epoch": 51.89327277888164, "grad_norm": 0.2007187157869339, "learning_rate": 4.812773204655124e-05, "loss": 0.0009088754653930664, "step": 182820 }, { "epoch": 51.896111268804994, "grad_norm": 0.1826172024011612, "learning_rate": 4.8124893556627876e-05, "loss": 0.0014579374343156814, "step": 182830 }, { "epoch": 51.89894975872836, "grad_norm": 0.004612017888575792, "learning_rate": 4.812205506670451e-05, "loss": 0.00017700716853141785, "step": 182840 }, { "epoch": 51.90178824865172, "grad_norm": 0.006769486702978611, "learning_rate": 4.811921657678116e-05, "loss": 0.00024742018431425097, "step": 182850 }, { "epoch": 51.904626738575075, "grad_norm": 0.010468934662640095, "learning_rate": 4.811637808685779e-05, "loss": 0.0026016848161816595, "step": 182860 }, { "epoch": 51.90746522849844, "grad_norm": 0.0382649302482605, "learning_rate": 4.8113539596934434e-05, "loss": 0.00017380863428115845, "step": 182870 }, { "epoch": 51.9103037184218, "grad_norm": 0.25346145033836365, "learning_rate": 4.8110701107011076e-05, "loss": 0.0015886861830949784, "step": 182880 }, { "epoch": 51.913142208345164, "grad_norm": 0.33072468638420105, "learning_rate": 4.810786261708771e-05, "loss": 0.00037891734391450884, "step": 182890 }, { "epoch": 51.91598069826852, "grad_norm": 0.2194923609495163, "learning_rate": 4.810502412716435e-05, "loss": 0.0015631748363375663, "step": 182900 }, { "epoch": 51.91881918819188, "grad_norm": 0.026827184483408928, "learning_rate": 4.8102185637240986e-05, "loss": 0.0009382400661706924, "step": 182910 }, { "epoch": 51.921657678115245, "grad_norm": 0.426723837852478, "learning_rate": 4.809934714731763e-05, "loss": 0.0017225667834281921, "step": 182920 }, { "epoch": 51.9244961680386, "grad_norm": 5.990427494049072, "learning_rate": 4.809650865739427e-05, "loss": 0.0029346687719225884, "step": 182930 }, { "epoch": 51.927334657961964, "grad_norm": 0.028409680351614952, "learning_rate": 4.8093670167470904e-05, "loss": 0.006483998894691467, "step": 182940 }, { "epoch": 51.93017314788533, "grad_norm": 0.042898260056972504, "learning_rate": 4.809083167754755e-05, "loss": 0.0015363164246082305, "step": 182950 }, { "epoch": 51.93301163780868, "grad_norm": 2.6698052883148193, "learning_rate": 4.8087993187624186e-05, "loss": 0.0018748490139842034, "step": 182960 }, { "epoch": 51.935850127732046, "grad_norm": 0.03998745232820511, "learning_rate": 4.808515469770082e-05, "loss": 0.0006909798830747604, "step": 182970 }, { "epoch": 51.93868861765541, "grad_norm": 0.41016194224357605, "learning_rate": 4.808231620777747e-05, "loss": 0.0015818437561392785, "step": 182980 }, { "epoch": 51.94152710757877, "grad_norm": 0.1739124357700348, "learning_rate": 4.8079477717854104e-05, "loss": 0.0008472014218568802, "step": 182990 }, { "epoch": 51.94436559750213, "grad_norm": 0.051039665937423706, "learning_rate": 4.8076639227930745e-05, "loss": 0.0007738806307315826, "step": 183000 }, { "epoch": 51.94436559750213, "eval_accuracy": 0.9812424492910282, "eval_loss": 0.07570843398571014, "eval_runtime": 74.0203, "eval_samples_per_second": 212.469, "eval_steps_per_second": 3.323, "step": 183000 }, { "epoch": 51.94720408742549, "grad_norm": 0.023053161799907684, "learning_rate": 4.8073800738007386e-05, "loss": 0.010083813965320588, "step": 183010 }, { "epoch": 51.95004257734885, "grad_norm": 0.015481492504477501, "learning_rate": 4.807096224808402e-05, "loss": 0.0006303193047642707, "step": 183020 }, { "epoch": 51.95288106727221, "grad_norm": 0.0030392282642424107, "learning_rate": 4.806812375816066e-05, "loss": 0.001028667949140072, "step": 183030 }, { "epoch": 51.95571955719557, "grad_norm": 0.002299404935911298, "learning_rate": 4.80652852682373e-05, "loss": 0.0005664043128490448, "step": 183040 }, { "epoch": 51.958558047118935, "grad_norm": 0.059475917369127274, "learning_rate": 4.806244677831394e-05, "loss": 0.0003152988851070404, "step": 183050 }, { "epoch": 51.96139653704229, "grad_norm": 0.0576646625995636, "learning_rate": 4.805960828839058e-05, "loss": 0.00022358857095241546, "step": 183060 }, { "epoch": 51.96423502696565, "grad_norm": 0.013105339370667934, "learning_rate": 4.8056769798467214e-05, "loss": 0.0011486023664474486, "step": 183070 }, { "epoch": 51.967073516889016, "grad_norm": 0.12514479458332062, "learning_rate": 4.805393130854386e-05, "loss": 0.0018512846902012825, "step": 183080 }, { "epoch": 51.96991200681238, "grad_norm": 0.12476160377264023, "learning_rate": 4.80510928186205e-05, "loss": 0.0001902146264910698, "step": 183090 }, { "epoch": 51.972750496735735, "grad_norm": 2.653707504272461, "learning_rate": 4.804825432869713e-05, "loss": 0.0007322093471884728, "step": 183100 }, { "epoch": 51.9755889866591, "grad_norm": 0.005530076567083597, "learning_rate": 4.804541583877378e-05, "loss": 0.00043660365045070646, "step": 183110 }, { "epoch": 51.97842747658246, "grad_norm": 0.5781853795051575, "learning_rate": 4.8042577348850414e-05, "loss": 0.0010422071442008018, "step": 183120 }, { "epoch": 51.981265966505816, "grad_norm": 0.01880376972258091, "learning_rate": 4.8039738858927056e-05, "loss": 0.0002622125670313835, "step": 183130 }, { "epoch": 51.98410445642918, "grad_norm": 0.00287848268635571, "learning_rate": 4.803690036900369e-05, "loss": 0.003766665980219841, "step": 183140 }, { "epoch": 51.98694294635254, "grad_norm": 0.03158586099743843, "learning_rate": 4.803406187908033e-05, "loss": 0.00026955567300319673, "step": 183150 }, { "epoch": 51.9897814362759, "grad_norm": 0.012008865363895893, "learning_rate": 4.803122338915697e-05, "loss": 0.002704116329550743, "step": 183160 }, { "epoch": 51.99261992619926, "grad_norm": 0.8109577298164368, "learning_rate": 4.802838489923361e-05, "loss": 0.007718692719936371, "step": 183170 }, { "epoch": 51.995458416122624, "grad_norm": 0.45362287759780884, "learning_rate": 4.802554640931025e-05, "loss": 0.0007435338571667671, "step": 183180 }, { "epoch": 51.99829690604599, "grad_norm": 0.6717060208320618, "learning_rate": 4.802270791938689e-05, "loss": 0.0018449293449521065, "step": 183190 }, { "epoch": 52.00113539596934, "grad_norm": 0.7840215563774109, "learning_rate": 4.8019869429463525e-05, "loss": 0.0013685999438166618, "step": 183200 }, { "epoch": 52.003973885892705, "grad_norm": 2.3906126022338867, "learning_rate": 4.8017030939540167e-05, "loss": 0.003042948246002197, "step": 183210 }, { "epoch": 52.00681237581607, "grad_norm": 0.10775148123502731, "learning_rate": 4.801419244961681e-05, "loss": 0.00025636907666921617, "step": 183220 }, { "epoch": 52.009650865739424, "grad_norm": 0.03595561906695366, "learning_rate": 4.801135395969344e-05, "loss": 0.0004290280863642693, "step": 183230 }, { "epoch": 52.01248935566279, "grad_norm": 0.2629640996456146, "learning_rate": 4.8008515469770084e-05, "loss": 0.0017379781231284142, "step": 183240 }, { "epoch": 52.01532784558615, "grad_norm": 0.005831924732774496, "learning_rate": 4.8005676979846725e-05, "loss": 0.0005435153841972351, "step": 183250 }, { "epoch": 52.018166335509505, "grad_norm": 0.06222304329276085, "learning_rate": 4.800283848992336e-05, "loss": 0.00011239852756261826, "step": 183260 }, { "epoch": 52.02100482543287, "grad_norm": 0.021376721560955048, "learning_rate": 4.8e-05, "loss": 0.0020376395434141157, "step": 183270 }, { "epoch": 52.02384331535623, "grad_norm": 0.178311288356781, "learning_rate": 4.799716151007664e-05, "loss": 0.004099814221262932, "step": 183280 }, { "epoch": 52.026681805279594, "grad_norm": 0.13934306800365448, "learning_rate": 4.7994323020153284e-05, "loss": 0.0008940955623984337, "step": 183290 }, { "epoch": 52.02952029520295, "grad_norm": 3.914332389831543, "learning_rate": 4.799148453022992e-05, "loss": 0.0007819535210728645, "step": 183300 }, { "epoch": 52.03235878512631, "grad_norm": 1.1078356504440308, "learning_rate": 4.798864604030656e-05, "loss": 0.0040706761181354524, "step": 183310 }, { "epoch": 52.035197275049676, "grad_norm": 0.012901118956506252, "learning_rate": 4.79858075503832e-05, "loss": 0.00036161206662654877, "step": 183320 }, { "epoch": 52.03803576497303, "grad_norm": 0.020278966054320335, "learning_rate": 4.7982969060459836e-05, "loss": 0.0005346981808543206, "step": 183330 }, { "epoch": 52.040874254896394, "grad_norm": 0.08660177886486053, "learning_rate": 4.798013057053648e-05, "loss": 0.0065281249582767485, "step": 183340 }, { "epoch": 52.04371274481976, "grad_norm": 0.09925329685211182, "learning_rate": 4.797729208061312e-05, "loss": 0.004941812157630921, "step": 183350 }, { "epoch": 52.04655123474312, "grad_norm": 0.5745363831520081, "learning_rate": 4.797445359068975e-05, "loss": 0.002034327946603298, "step": 183360 }, { "epoch": 52.049389724666476, "grad_norm": 0.10146824270486832, "learning_rate": 4.7971615100766395e-05, "loss": 0.0012022057548165322, "step": 183370 }, { "epoch": 52.05222821458984, "grad_norm": 0.004010516684502363, "learning_rate": 4.7968776610843036e-05, "loss": 0.00039030611515045166, "step": 183380 }, { "epoch": 52.0550667045132, "grad_norm": 0.12974506616592407, "learning_rate": 4.796593812091967e-05, "loss": 0.0020153256133198737, "step": 183390 }, { "epoch": 52.05790519443656, "grad_norm": 0.0011988189071416855, "learning_rate": 4.796309963099631e-05, "loss": 0.0003418654203414917, "step": 183400 }, { "epoch": 52.06074368435992, "grad_norm": 0.017536131665110588, "learning_rate": 4.796026114107295e-05, "loss": 0.0002381395548582077, "step": 183410 }, { "epoch": 52.06358217428328, "grad_norm": 0.03707705810666084, "learning_rate": 4.7957422651149595e-05, "loss": 0.0006319796666502953, "step": 183420 }, { "epoch": 52.06642066420664, "grad_norm": 0.07376475632190704, "learning_rate": 4.795458416122623e-05, "loss": 0.0012795748189091682, "step": 183430 }, { "epoch": 52.06925915413, "grad_norm": 0.005596641451120377, "learning_rate": 4.7951745671302864e-05, "loss": 0.00017790589481592178, "step": 183440 }, { "epoch": 52.072097644053365, "grad_norm": 12.776966094970703, "learning_rate": 4.794890718137951e-05, "loss": 0.009705387800931931, "step": 183450 }, { "epoch": 52.07493613397673, "grad_norm": 0.00790331419557333, "learning_rate": 4.794606869145615e-05, "loss": 0.00045639947056770326, "step": 183460 }, { "epoch": 52.07777462390008, "grad_norm": 0.0350353866815567, "learning_rate": 4.794323020153279e-05, "loss": 0.0003215586766600609, "step": 183470 }, { "epoch": 52.080613113823446, "grad_norm": 0.003290105378255248, "learning_rate": 4.794039171160943e-05, "loss": 0.0003226153552532196, "step": 183480 }, { "epoch": 52.08345160374681, "grad_norm": 0.007869471795856953, "learning_rate": 4.7937553221686064e-05, "loss": 0.003339795395731926, "step": 183490 }, { "epoch": 52.086290093670165, "grad_norm": 0.12339749187231064, "learning_rate": 4.7934714731762705e-05, "loss": 0.0006091877818107605, "step": 183500 }, { "epoch": 52.086290093670165, "eval_accuracy": 0.982069053220576, "eval_loss": 0.0696084201335907, "eval_runtime": 56.6502, "eval_samples_per_second": 277.616, "eval_steps_per_second": 4.342, "step": 183500 }, { "epoch": 52.08912858359353, "grad_norm": 0.017078295350074768, "learning_rate": 4.793187624183935e-05, "loss": 0.0006214754655957222, "step": 183510 }, { "epoch": 52.09196707351689, "grad_norm": 0.003656325163319707, "learning_rate": 4.792903775191598e-05, "loss": 0.001353026181459427, "step": 183520 }, { "epoch": 52.09480556344025, "grad_norm": 3.3040997982025146, "learning_rate": 4.792619926199262e-05, "loss": 0.0015522636473178863, "step": 183530 }, { "epoch": 52.09764405336361, "grad_norm": 0.03432783484458923, "learning_rate": 4.792336077206926e-05, "loss": 0.00023975782096385956, "step": 183540 }, { "epoch": 52.10048254328697, "grad_norm": 0.027481090277433395, "learning_rate": 4.7920522282145905e-05, "loss": 0.00466746911406517, "step": 183550 }, { "epoch": 52.103321033210335, "grad_norm": 0.03833684325218201, "learning_rate": 4.791768379222254e-05, "loss": 0.00019819680601358415, "step": 183560 }, { "epoch": 52.10615952313369, "grad_norm": 0.05154190957546234, "learning_rate": 4.7914845302299175e-05, "loss": 0.0002051621675491333, "step": 183570 }, { "epoch": 52.108998013057054, "grad_norm": 1.4828730821609497, "learning_rate": 4.791200681237582e-05, "loss": 0.0011456266045570373, "step": 183580 }, { "epoch": 52.11183650298042, "grad_norm": 0.15601906180381775, "learning_rate": 4.790916832245246e-05, "loss": 0.0002102842554450035, "step": 183590 }, { "epoch": 52.11467499290377, "grad_norm": 0.009348904713988304, "learning_rate": 4.79063298325291e-05, "loss": 0.0008832301944494247, "step": 183600 }, { "epoch": 52.117513482827135, "grad_norm": 0.06245242431759834, "learning_rate": 4.790349134260574e-05, "loss": 0.0005089951679110527, "step": 183610 }, { "epoch": 52.1203519727505, "grad_norm": 0.025054970756173134, "learning_rate": 4.7900652852682375e-05, "loss": 0.0004680335521697998, "step": 183620 }, { "epoch": 52.123190462673854, "grad_norm": 0.0798400267958641, "learning_rate": 4.7897814362759016e-05, "loss": 0.004537589848041534, "step": 183630 }, { "epoch": 52.12602895259722, "grad_norm": 0.02349098213016987, "learning_rate": 4.789497587283565e-05, "loss": 0.0002725990489125252, "step": 183640 }, { "epoch": 52.12886744252058, "grad_norm": 0.059369947761297226, "learning_rate": 4.789213738291229e-05, "loss": 0.00024754628539085387, "step": 183650 }, { "epoch": 52.13170593244394, "grad_norm": 1.0507161617279053, "learning_rate": 4.7889298892988934e-05, "loss": 0.00311678908765316, "step": 183660 }, { "epoch": 52.1345444223673, "grad_norm": 0.004871797282248735, "learning_rate": 4.788646040306557e-05, "loss": 0.0007206084206700325, "step": 183670 }, { "epoch": 52.13738291229066, "grad_norm": 0.00604652427136898, "learning_rate": 4.788362191314221e-05, "loss": 0.00047660656273365023, "step": 183680 }, { "epoch": 52.140221402214024, "grad_norm": 0.018387725576758385, "learning_rate": 4.788078342321885e-05, "loss": 0.00013447534292936324, "step": 183690 }, { "epoch": 52.14305989213738, "grad_norm": 0.004038989078253508, "learning_rate": 4.7877944933295485e-05, "loss": 0.004372154176235199, "step": 183700 }, { "epoch": 52.14589838206074, "grad_norm": 0.06221847981214523, "learning_rate": 4.7875106443372134e-05, "loss": 0.0031285438686609267, "step": 183710 }, { "epoch": 52.148736871984106, "grad_norm": 0.005162859335541725, "learning_rate": 4.787226795344877e-05, "loss": 0.02333822250366211, "step": 183720 }, { "epoch": 52.15157536190747, "grad_norm": 0.06779542565345764, "learning_rate": 4.78694294635254e-05, "loss": 0.00097927488386631, "step": 183730 }, { "epoch": 52.154413851830824, "grad_norm": 0.1632527858018875, "learning_rate": 4.7866590973602044e-05, "loss": 0.002234739065170288, "step": 183740 }, { "epoch": 52.15725234175419, "grad_norm": 0.6769421696662903, "learning_rate": 4.7863752483678686e-05, "loss": 0.000937696360051632, "step": 183750 }, { "epoch": 52.16009083167755, "grad_norm": 0.004241309594362974, "learning_rate": 4.786091399375533e-05, "loss": 0.001364721916615963, "step": 183760 }, { "epoch": 52.162929321600906, "grad_norm": 0.10594318807125092, "learning_rate": 4.785807550383196e-05, "loss": 0.009427264332771301, "step": 183770 }, { "epoch": 52.16576781152427, "grad_norm": 0.013491901569068432, "learning_rate": 4.78552370139086e-05, "loss": 0.0005375385284423828, "step": 183780 }, { "epoch": 52.16860630144763, "grad_norm": 0.015053408220410347, "learning_rate": 4.7852398523985244e-05, "loss": 0.0013850804418325425, "step": 183790 }, { "epoch": 52.17144479137099, "grad_norm": 7.4695048332214355, "learning_rate": 4.784956003406188e-05, "loss": 0.0034277193248271943, "step": 183800 }, { "epoch": 52.17428328129435, "grad_norm": 0.00849482137709856, "learning_rate": 4.784672154413852e-05, "loss": 0.0015832044184207917, "step": 183810 }, { "epoch": 52.17712177121771, "grad_norm": 0.03237693011760712, "learning_rate": 4.784388305421516e-05, "loss": 0.0006552934646606445, "step": 183820 }, { "epoch": 52.179960261141076, "grad_norm": 0.06718684732913971, "learning_rate": 4.7841044564291796e-05, "loss": 0.0005317624658346176, "step": 183830 }, { "epoch": 52.18279875106443, "grad_norm": 0.04982101544737816, "learning_rate": 4.783820607436844e-05, "loss": 0.00016460716724395752, "step": 183840 }, { "epoch": 52.185637240987795, "grad_norm": 0.49056529998779297, "learning_rate": 4.783536758444508e-05, "loss": 0.0013139504939317704, "step": 183850 }, { "epoch": 52.18847573091116, "grad_norm": 0.015722157433629036, "learning_rate": 4.7832529094521714e-05, "loss": 0.003331059217453003, "step": 183860 }, { "epoch": 52.19131422083451, "grad_norm": 0.0020256356801837683, "learning_rate": 4.7829690604598355e-05, "loss": 0.00025677569210529326, "step": 183870 }, { "epoch": 52.194152710757876, "grad_norm": 0.006960750557482243, "learning_rate": 4.7826852114674996e-05, "loss": 0.0017177574336528778, "step": 183880 }, { "epoch": 52.19699120068124, "grad_norm": 0.015765776857733727, "learning_rate": 4.782401362475164e-05, "loss": 0.0021311162039637566, "step": 183890 }, { "epoch": 52.199829690604595, "grad_norm": 0.05999329313635826, "learning_rate": 4.782117513482827e-05, "loss": 6.730761379003525e-05, "step": 183900 }, { "epoch": 52.20266818052796, "grad_norm": 0.022580599412322044, "learning_rate": 4.7818336644904914e-05, "loss": 0.00021012239158153535, "step": 183910 }, { "epoch": 52.20550667045132, "grad_norm": 0.9093426465988159, "learning_rate": 4.7815498154981555e-05, "loss": 0.000881659984588623, "step": 183920 }, { "epoch": 52.208345160374684, "grad_norm": 0.23184449970722198, "learning_rate": 4.781265966505819e-05, "loss": 0.00018871929496526717, "step": 183930 }, { "epoch": 52.21118365029804, "grad_norm": 0.03048865497112274, "learning_rate": 4.780982117513483e-05, "loss": 0.00033024083822965624, "step": 183940 }, { "epoch": 52.2140221402214, "grad_norm": 13.118938446044922, "learning_rate": 4.780698268521147e-05, "loss": 0.009487903118133545, "step": 183950 }, { "epoch": 52.216860630144765, "grad_norm": 0.1810690015554428, "learning_rate": 4.780414419528811e-05, "loss": 0.0017492646351456643, "step": 183960 }, { "epoch": 52.21969912006812, "grad_norm": 0.39298906922340393, "learning_rate": 4.780130570536475e-05, "loss": 0.00047609135508537294, "step": 183970 }, { "epoch": 52.222537609991484, "grad_norm": 0.01205577701330185, "learning_rate": 4.779846721544139e-05, "loss": 0.00016761962324380876, "step": 183980 }, { "epoch": 52.22537609991485, "grad_norm": 0.7468529939651489, "learning_rate": 4.7795628725518024e-05, "loss": 0.006904216855764389, "step": 183990 }, { "epoch": 52.2282145898382, "grad_norm": 0.012832017615437508, "learning_rate": 4.7792790235594666e-05, "loss": 0.0001812005415558815, "step": 184000 }, { "epoch": 52.2282145898382, "eval_accuracy": 0.9825777325618363, "eval_loss": 0.06806888431310654, "eval_runtime": 106.9847, "eval_samples_per_second": 147.002, "eval_steps_per_second": 2.299, "step": 184000 }, { "epoch": 52.231053079761566, "grad_norm": 0.0043248445726931095, "learning_rate": 4.778995174567131e-05, "loss": 9.124968200922013e-05, "step": 184010 }, { "epoch": 52.23389156968493, "grad_norm": 0.0213554035872221, "learning_rate": 4.778711325574794e-05, "loss": 0.0003329625353217125, "step": 184020 }, { "epoch": 52.23673005960829, "grad_norm": 0.12562333047389984, "learning_rate": 4.778427476582458e-05, "loss": 0.0008242309093475341, "step": 184030 }, { "epoch": 52.23956854953165, "grad_norm": 0.015277755446732044, "learning_rate": 4.778143627590122e-05, "loss": 0.0001319458708167076, "step": 184040 }, { "epoch": 52.24240703945501, "grad_norm": 0.1721145212650299, "learning_rate": 4.7778597785977866e-05, "loss": 0.010550407320261001, "step": 184050 }, { "epoch": 52.24524552937837, "grad_norm": 0.9174314141273499, "learning_rate": 4.77757592960545e-05, "loss": 0.0010347871109843254, "step": 184060 }, { "epoch": 52.24808401930173, "grad_norm": 0.18096257746219635, "learning_rate": 4.777292080613114e-05, "loss": 0.0011155826970934868, "step": 184070 }, { "epoch": 52.25092250922509, "grad_norm": 0.5854184627532959, "learning_rate": 4.777008231620778e-05, "loss": 0.00026478935033082964, "step": 184080 }, { "epoch": 52.253760999148454, "grad_norm": 0.5008165240287781, "learning_rate": 4.776724382628442e-05, "loss": 0.00021296609193086625, "step": 184090 }, { "epoch": 52.25659948907182, "grad_norm": 0.0035491364542394876, "learning_rate": 4.776440533636106e-05, "loss": 0.00020412635058164597, "step": 184100 }, { "epoch": 52.25943797899517, "grad_norm": 0.11606127768754959, "learning_rate": 4.77615668464377e-05, "loss": 0.0034789588302373887, "step": 184110 }, { "epoch": 52.262276468918536, "grad_norm": 0.040278226137161255, "learning_rate": 4.7758728356514335e-05, "loss": 8.278395980596543e-05, "step": 184120 }, { "epoch": 52.2651149588419, "grad_norm": 0.005022560711950064, "learning_rate": 4.7755889866590976e-05, "loss": 0.00024641696363687516, "step": 184130 }, { "epoch": 52.267953448765255, "grad_norm": 0.0018367163138464093, "learning_rate": 4.775305137666761e-05, "loss": 0.00010818988084793091, "step": 184140 }, { "epoch": 52.27079193868862, "grad_norm": 0.03352326527237892, "learning_rate": 4.775021288674425e-05, "loss": 0.00026732552796602247, "step": 184150 }, { "epoch": 52.27363042861198, "grad_norm": 0.01596655510365963, "learning_rate": 4.7747374396820894e-05, "loss": 6.169527769088745e-05, "step": 184160 }, { "epoch": 52.276468918535336, "grad_norm": 0.0025674826465547085, "learning_rate": 4.774453590689753e-05, "loss": 0.00093032605946064, "step": 184170 }, { "epoch": 52.2793074084587, "grad_norm": 0.008025125600397587, "learning_rate": 4.7741697416974177e-05, "loss": 0.0003178466111421585, "step": 184180 }, { "epoch": 52.28214589838206, "grad_norm": 0.017277689650654793, "learning_rate": 4.773885892705081e-05, "loss": 9.203609079122543e-05, "step": 184190 }, { "epoch": 52.284984388305425, "grad_norm": 0.06670355796813965, "learning_rate": 4.7736020437127446e-05, "loss": 7.944796234369278e-05, "step": 184200 }, { "epoch": 52.28782287822878, "grad_norm": 0.0073315477930009365, "learning_rate": 4.7733181947204094e-05, "loss": 7.488057017326354e-05, "step": 184210 }, { "epoch": 52.29066136815214, "grad_norm": 0.0009059662697836757, "learning_rate": 4.773034345728073e-05, "loss": 0.0007936956360936164, "step": 184220 }, { "epoch": 52.293499858075506, "grad_norm": 0.04336453974246979, "learning_rate": 4.772750496735737e-05, "loss": 0.0028879331424832345, "step": 184230 }, { "epoch": 52.29633834799886, "grad_norm": 0.01494014635682106, "learning_rate": 4.7724666477434005e-05, "loss": 0.0007667064666748047, "step": 184240 }, { "epoch": 52.299176837922225, "grad_norm": 0.0355023518204689, "learning_rate": 4.7721827987510646e-05, "loss": 0.0001322224736213684, "step": 184250 }, { "epoch": 52.30201532784559, "grad_norm": 0.08121570944786072, "learning_rate": 4.771898949758729e-05, "loss": 0.0005963105708360672, "step": 184260 }, { "epoch": 52.304853817768944, "grad_norm": 1.0450962781906128, "learning_rate": 4.771615100766392e-05, "loss": 0.0009920289739966393, "step": 184270 }, { "epoch": 52.30769230769231, "grad_norm": 0.022549988701939583, "learning_rate": 4.771331251774056e-05, "loss": 0.013432863354682922, "step": 184280 }, { "epoch": 52.31053079761567, "grad_norm": 0.007575531490147114, "learning_rate": 4.7710474027817205e-05, "loss": 0.0013511812314391137, "step": 184290 }, { "epoch": 52.31336928753903, "grad_norm": 0.02227567322552204, "learning_rate": 4.770763553789384e-05, "loss": 0.0019323872402310371, "step": 184300 }, { "epoch": 52.31620777746239, "grad_norm": 0.14419783651828766, "learning_rate": 4.770479704797049e-05, "loss": 0.0025687044486403464, "step": 184310 }, { "epoch": 52.31904626738575, "grad_norm": 0.06773044168949127, "learning_rate": 4.770195855804712e-05, "loss": 0.000589117780327797, "step": 184320 }, { "epoch": 52.321884757309114, "grad_norm": 0.12194810807704926, "learning_rate": 4.7699120068123757e-05, "loss": 0.0002095252275466919, "step": 184330 }, { "epoch": 52.32472324723247, "grad_norm": 1.0040738582611084, "learning_rate": 4.7696281578200405e-05, "loss": 0.0006311120465397835, "step": 184340 }, { "epoch": 52.32756173715583, "grad_norm": 0.02569996565580368, "learning_rate": 4.769344308827704e-05, "loss": 0.005300191789865493, "step": 184350 }, { "epoch": 52.330400227079195, "grad_norm": 8.529300689697266, "learning_rate": 4.769060459835368e-05, "loss": 0.0024118976667523383, "step": 184360 }, { "epoch": 52.33323871700255, "grad_norm": 0.16017886996269226, "learning_rate": 4.7687766108430315e-05, "loss": 0.009991241246461868, "step": 184370 }, { "epoch": 52.336077206925914, "grad_norm": 0.1596096009016037, "learning_rate": 4.768492761850696e-05, "loss": 0.0002748483791947365, "step": 184380 }, { "epoch": 52.33891569684928, "grad_norm": 2.616983652114868, "learning_rate": 4.76820891285836e-05, "loss": 0.0017140893265604972, "step": 184390 }, { "epoch": 52.34175418677264, "grad_norm": 4.4687581062316895, "learning_rate": 4.767925063866023e-05, "loss": 0.0009400740265846253, "step": 184400 }, { "epoch": 52.344592676695996, "grad_norm": 0.2547582685947418, "learning_rate": 4.7676412148736874e-05, "loss": 0.0003706773743033409, "step": 184410 }, { "epoch": 52.34743116661936, "grad_norm": 0.012724380940198898, "learning_rate": 4.7673573658813515e-05, "loss": 0.00030427146703004837, "step": 184420 }, { "epoch": 52.35026965654272, "grad_norm": 0.23443737626075745, "learning_rate": 4.767073516889015e-05, "loss": 0.0004746554419398308, "step": 184430 }, { "epoch": 52.35310814646608, "grad_norm": 0.00622439943253994, "learning_rate": 4.766789667896679e-05, "loss": 0.0001987004652619362, "step": 184440 }, { "epoch": 52.35594663638944, "grad_norm": 0.0015576742589473724, "learning_rate": 4.766505818904343e-05, "loss": 0.0018286088481545449, "step": 184450 }, { "epoch": 52.3587851263128, "grad_norm": 0.025734001770615578, "learning_rate": 4.766221969912007e-05, "loss": 0.0004335755482316017, "step": 184460 }, { "epoch": 52.36162361623616, "grad_norm": 0.045438531786203384, "learning_rate": 4.765938120919671e-05, "loss": 0.00035518594086170197, "step": 184470 }, { "epoch": 52.36446210615952, "grad_norm": 0.4304017722606659, "learning_rate": 4.765654271927335e-05, "loss": 0.001335160993039608, "step": 184480 }, { "epoch": 52.367300596082885, "grad_norm": 0.011848308145999908, "learning_rate": 4.7653704229349985e-05, "loss": 0.0008830053731799126, "step": 184490 }, { "epoch": 52.37013908600625, "grad_norm": 0.0103300791233778, "learning_rate": 4.7650865739426626e-05, "loss": 0.0003408946096897125, "step": 184500 }, { "epoch": 52.37013908600625, "eval_accuracy": 0.9835315063266993, "eval_loss": 0.06384346634149551, "eval_runtime": 37.8972, "eval_samples_per_second": 414.991, "eval_steps_per_second": 6.491, "step": 184500 }, { "epoch": 52.3729775759296, "grad_norm": 0.08436889201402664, "learning_rate": 4.764802724950327e-05, "loss": 0.00470757856965065, "step": 184510 }, { "epoch": 52.375816065852966, "grad_norm": 0.005073889158666134, "learning_rate": 4.764518875957991e-05, "loss": 0.0009677173569798469, "step": 184520 }, { "epoch": 52.37865455577633, "grad_norm": 4.295240879058838, "learning_rate": 4.764235026965654e-05, "loss": 0.005196299776434899, "step": 184530 }, { "epoch": 52.381493045699685, "grad_norm": 2.1085891723632812, "learning_rate": 4.7639511779733185e-05, "loss": 0.0003208408132195473, "step": 184540 }, { "epoch": 52.38433153562305, "grad_norm": 0.1466059684753418, "learning_rate": 4.7636673289809826e-05, "loss": 0.00017032865434885025, "step": 184550 }, { "epoch": 52.38717002554641, "grad_norm": 16.578598022460938, "learning_rate": 4.763383479988646e-05, "loss": 0.012964838743209839, "step": 184560 }, { "epoch": 52.39000851546977, "grad_norm": 0.00908625777810812, "learning_rate": 4.76309963099631e-05, "loss": 0.0011850520968437195, "step": 184570 }, { "epoch": 52.39284700539313, "grad_norm": 2.613308906555176, "learning_rate": 4.7628157820039743e-05, "loss": 0.005955878645181656, "step": 184580 }, { "epoch": 52.39568549531649, "grad_norm": 0.18991047143936157, "learning_rate": 4.762531933011638e-05, "loss": 0.00017120689153671265, "step": 184590 }, { "epoch": 52.398523985239855, "grad_norm": 0.643483579158783, "learning_rate": 4.762248084019302e-05, "loss": 0.0003484288230538368, "step": 184600 }, { "epoch": 52.40136247516321, "grad_norm": 0.2979111969470978, "learning_rate": 4.761964235026966e-05, "loss": 0.00018183253705501557, "step": 184610 }, { "epoch": 52.404200965086574, "grad_norm": 0.0015211221762001514, "learning_rate": 4.7616803860346295e-05, "loss": 0.00024641342461109164, "step": 184620 }, { "epoch": 52.40703945500994, "grad_norm": 0.009303228929638863, "learning_rate": 4.761396537042294e-05, "loss": 0.0002114957198500633, "step": 184630 }, { "epoch": 52.40987794493329, "grad_norm": 0.004578766878694296, "learning_rate": 4.761112688049958e-05, "loss": 0.000435197539627552, "step": 184640 }, { "epoch": 52.412716434856655, "grad_norm": 0.17287836968898773, "learning_rate": 4.760828839057622e-05, "loss": 0.00025967396795749663, "step": 184650 }, { "epoch": 52.41555492478002, "grad_norm": 0.004629199393093586, "learning_rate": 4.7605449900652854e-05, "loss": 0.004730609059333801, "step": 184660 }, { "epoch": 52.41839341470338, "grad_norm": 0.3702903389930725, "learning_rate": 4.760261141072949e-05, "loss": 0.0001554638147354126, "step": 184670 }, { "epoch": 52.42123190462674, "grad_norm": 0.9320998787879944, "learning_rate": 4.759977292080614e-05, "loss": 0.000699804536998272, "step": 184680 }, { "epoch": 52.4240703945501, "grad_norm": 0.04080924019217491, "learning_rate": 4.759721827987511e-05, "loss": 0.01109718084335327, "step": 184690 }, { "epoch": 52.42690888447346, "grad_norm": 3.0499937534332275, "learning_rate": 4.759437978995175e-05, "loss": 0.00508180633187294, "step": 184700 }, { "epoch": 52.42974737439682, "grad_norm": 0.07820963114500046, "learning_rate": 4.7591541300028386e-05, "loss": 0.00033747870475053787, "step": 184710 }, { "epoch": 52.43258586432018, "grad_norm": 0.5173702836036682, "learning_rate": 4.758870281010503e-05, "loss": 0.0007862845435738563, "step": 184720 }, { "epoch": 52.435424354243544, "grad_norm": 0.014134817756712437, "learning_rate": 4.758586432018167e-05, "loss": 0.0012783078476786614, "step": 184730 }, { "epoch": 52.4382628441669, "grad_norm": 0.030186433345079422, "learning_rate": 4.7583025830258303e-05, "loss": 0.0006146647036075592, "step": 184740 }, { "epoch": 52.44110133409026, "grad_norm": 0.6354906558990479, "learning_rate": 4.7580187340334945e-05, "loss": 0.0006116906180977822, "step": 184750 }, { "epoch": 52.443939824013626, "grad_norm": 0.019607210531830788, "learning_rate": 4.7577348850411586e-05, "loss": 0.00034982673823833463, "step": 184760 }, { "epoch": 52.44677831393699, "grad_norm": 0.17200075089931488, "learning_rate": 4.757451036048822e-05, "loss": 0.0016802186146378517, "step": 184770 }, { "epoch": 52.449616803860344, "grad_norm": 0.026145966723561287, "learning_rate": 4.757167187056486e-05, "loss": 0.0003431329503655434, "step": 184780 }, { "epoch": 52.45245529378371, "grad_norm": 0.0066056386567652225, "learning_rate": 4.7568833380641503e-05, "loss": 0.0007304895669221878, "step": 184790 }, { "epoch": 52.45529378370707, "grad_norm": 0.3325738310813904, "learning_rate": 4.756599489071814e-05, "loss": 0.002580810710787773, "step": 184800 }, { "epoch": 52.458132273630426, "grad_norm": 6.547428607940674, "learning_rate": 4.756315640079478e-05, "loss": 0.0022193849086761474, "step": 184810 }, { "epoch": 52.46097076355379, "grad_norm": 0.2524906396865845, "learning_rate": 4.756031791087142e-05, "loss": 0.0010090451687574387, "step": 184820 }, { "epoch": 52.46380925347715, "grad_norm": 0.07337880879640579, "learning_rate": 4.755747942094806e-05, "loss": 0.00038292575627565386, "step": 184830 }, { "epoch": 52.46664774340051, "grad_norm": 0.20942217111587524, "learning_rate": 4.75546409310247e-05, "loss": 0.004025715216994285, "step": 184840 }, { "epoch": 52.46948623332387, "grad_norm": 0.23255206644535065, "learning_rate": 4.755180244110133e-05, "loss": 0.010403183847665786, "step": 184850 }, { "epoch": 52.47232472324723, "grad_norm": 0.15190580487251282, "learning_rate": 4.754896395117798e-05, "loss": 0.00018272195011377336, "step": 184860 }, { "epoch": 52.475163213170596, "grad_norm": 0.138035848736763, "learning_rate": 4.7546125461254614e-05, "loss": 0.0018741974607110023, "step": 184870 }, { "epoch": 52.47800170309395, "grad_norm": 0.0386057011783123, "learning_rate": 4.7543286971331256e-05, "loss": 0.0012192757800221442, "step": 184880 }, { "epoch": 52.480840193017315, "grad_norm": 0.11216206103563309, "learning_rate": 4.75404484814079e-05, "loss": 0.002339411899447441, "step": 184890 }, { "epoch": 52.48367868294068, "grad_norm": 0.13699647784233093, "learning_rate": 4.753760999148453e-05, "loss": 0.0006658989936113357, "step": 184900 }, { "epoch": 52.48651717286403, "grad_norm": 0.06419777870178223, "learning_rate": 4.753477150156117e-05, "loss": 0.006701837480068207, "step": 184910 }, { "epoch": 52.489355662787396, "grad_norm": 7.316105365753174, "learning_rate": 4.7531933011637814e-05, "loss": 0.0026676153764128687, "step": 184920 }, { "epoch": 52.49219415271076, "grad_norm": 0.3441883325576782, "learning_rate": 4.752909452171445e-05, "loss": 0.0003369420766830444, "step": 184930 }, { "epoch": 52.49503264263412, "grad_norm": 0.0033262751530855894, "learning_rate": 4.752625603179109e-05, "loss": 0.0033982042223215103, "step": 184940 }, { "epoch": 52.49787113255748, "grad_norm": 0.04805893450975418, "learning_rate": 4.7523417541867725e-05, "loss": 0.00930016189813614, "step": 184950 }, { "epoch": 52.50070962248084, "grad_norm": 0.8275559544563293, "learning_rate": 4.7520579051944366e-05, "loss": 0.0008690839633345604, "step": 184960 }, { "epoch": 52.503548112404204, "grad_norm": 0.03153613209724426, "learning_rate": 4.751774056202101e-05, "loss": 0.0009365875273942947, "step": 184970 }, { "epoch": 52.50638660232756, "grad_norm": 6.822403907775879, "learning_rate": 4.751490207209764e-05, "loss": 0.002045642212033272, "step": 184980 }, { "epoch": 52.50922509225092, "grad_norm": 0.181460440158844, "learning_rate": 4.751206358217429e-05, "loss": 0.0005075916647911072, "step": 184990 }, { "epoch": 52.512063582174285, "grad_norm": 2.6467738151550293, "learning_rate": 4.7509225092250925e-05, "loss": 0.0006339099258184433, "step": 185000 }, { "epoch": 52.512063582174285, "eval_accuracy": 0.9806066001144529, "eval_loss": 0.08013539761304855, "eval_runtime": 45.7148, "eval_samples_per_second": 344.024, "eval_steps_per_second": 5.381, "step": 185000 }, { "epoch": 52.51490207209764, "grad_norm": 0.030762406066060066, "learning_rate": 4.750638660232756e-05, "loss": 0.002031848765909672, "step": 185010 }, { "epoch": 52.517740562021004, "grad_norm": 0.013095525093376637, "learning_rate": 4.750354811240421e-05, "loss": 0.019938762485980987, "step": 185020 }, { "epoch": 52.52057905194437, "grad_norm": 0.060633450746536255, "learning_rate": 4.750070962248084e-05, "loss": 0.010321494191884995, "step": 185030 }, { "epoch": 52.52341754186773, "grad_norm": 13.981313705444336, "learning_rate": 4.7497871132557484e-05, "loss": 0.003101210668683052, "step": 185040 }, { "epoch": 52.526256031791085, "grad_norm": 0.005972123704850674, "learning_rate": 4.749503264263412e-05, "loss": 0.0040931783616542814, "step": 185050 }, { "epoch": 52.52909452171445, "grad_norm": 0.0034314750228077173, "learning_rate": 4.749219415271076e-05, "loss": 0.008468184620141983, "step": 185060 }, { "epoch": 52.53193301163781, "grad_norm": 0.03933482989668846, "learning_rate": 4.74893556627874e-05, "loss": 0.0004971964284777641, "step": 185070 }, { "epoch": 52.53477150156117, "grad_norm": 0.011860634200274944, "learning_rate": 4.7486517172864036e-05, "loss": 0.0004571028053760529, "step": 185080 }, { "epoch": 52.53760999148453, "grad_norm": 0.019094044342637062, "learning_rate": 4.748367868294068e-05, "loss": 0.0008944451808929443, "step": 185090 }, { "epoch": 52.54044848140789, "grad_norm": 0.039611201733350754, "learning_rate": 4.748084019301732e-05, "loss": 0.00971439927816391, "step": 185100 }, { "epoch": 52.54328697133125, "grad_norm": 0.052465371787548065, "learning_rate": 4.747800170309395e-05, "loss": 0.00026902593672275545, "step": 185110 }, { "epoch": 52.54612546125461, "grad_norm": 0.7376463413238525, "learning_rate": 4.74751632131706e-05, "loss": 0.0020393943414092064, "step": 185120 }, { "epoch": 52.548963951177974, "grad_norm": 0.08190733939409256, "learning_rate": 4.7472324723247236e-05, "loss": 0.002291416563093662, "step": 185130 }, { "epoch": 52.55180244110134, "grad_norm": 0.05860094726085663, "learning_rate": 4.746948623332387e-05, "loss": 0.0025515470653772352, "step": 185140 }, { "epoch": 52.55464093102469, "grad_norm": 0.28907090425491333, "learning_rate": 4.746664774340051e-05, "loss": 0.0028783993795514105, "step": 185150 }, { "epoch": 52.557479420948056, "grad_norm": 0.010185535065829754, "learning_rate": 4.746380925347715e-05, "loss": 0.0006204321980476379, "step": 185160 }, { "epoch": 52.56031791087142, "grad_norm": 0.10022414475679398, "learning_rate": 4.7460970763553794e-05, "loss": 0.0004094896838068962, "step": 185170 }, { "epoch": 52.563156400794774, "grad_norm": 0.007032406982034445, "learning_rate": 4.745813227363043e-05, "loss": 0.00322813093662262, "step": 185180 }, { "epoch": 52.56599489071814, "grad_norm": 0.09951729327440262, "learning_rate": 4.745529378370707e-05, "loss": 0.006086259707808494, "step": 185190 }, { "epoch": 52.5688333806415, "grad_norm": 0.08119599521160126, "learning_rate": 4.745245529378371e-05, "loss": 0.0008624441921710968, "step": 185200 }, { "epoch": 52.571671870564856, "grad_norm": 0.014908366836607456, "learning_rate": 4.7449616803860346e-05, "loss": 0.00033486410975456236, "step": 185210 }, { "epoch": 52.57451036048822, "grad_norm": 0.034076374024152756, "learning_rate": 4.744677831393699e-05, "loss": 0.00022605080157518388, "step": 185220 }, { "epoch": 52.57734885041158, "grad_norm": 0.013906584121286869, "learning_rate": 4.744393982401363e-05, "loss": 0.0020087478682398797, "step": 185230 }, { "epoch": 52.580187340334945, "grad_norm": 0.13396890461444855, "learning_rate": 4.7441101334090264e-05, "loss": 0.00038505829870700837, "step": 185240 }, { "epoch": 52.5830258302583, "grad_norm": 0.05336097627878189, "learning_rate": 4.7438262844166905e-05, "loss": 0.0008391117677092552, "step": 185250 }, { "epoch": 52.58586432018166, "grad_norm": 0.5209197402000427, "learning_rate": 4.7435424354243546e-05, "loss": 0.0010123031213879585, "step": 185260 }, { "epoch": 52.588702810105026, "grad_norm": 0.0570748969912529, "learning_rate": 4.743258586432018e-05, "loss": 0.0030731000006198883, "step": 185270 }, { "epoch": 52.59154130002838, "grad_norm": 0.11349531263113022, "learning_rate": 4.742974737439682e-05, "loss": 0.0019340431317687035, "step": 185280 }, { "epoch": 52.594379789951745, "grad_norm": 0.49392661452293396, "learning_rate": 4.7426908884473464e-05, "loss": 0.00028217807412147524, "step": 185290 }, { "epoch": 52.59721827987511, "grad_norm": 5.472712993621826, "learning_rate": 4.7424070394550105e-05, "loss": 0.0018920617178082467, "step": 185300 }, { "epoch": 52.60005676979847, "grad_norm": 0.14974389970302582, "learning_rate": 4.742123190462674e-05, "loss": 0.00013523474335670472, "step": 185310 }, { "epoch": 52.60289525972183, "grad_norm": 0.6442527770996094, "learning_rate": 4.741839341470338e-05, "loss": 0.0013882743194699287, "step": 185320 }, { "epoch": 52.60573374964519, "grad_norm": 0.75871741771698, "learning_rate": 4.741555492478002e-05, "loss": 0.005942117422819138, "step": 185330 }, { "epoch": 52.60857223956855, "grad_norm": 0.05675385519862175, "learning_rate": 4.741271643485666e-05, "loss": 0.003350488469004631, "step": 185340 }, { "epoch": 52.61141072949191, "grad_norm": 0.601360559463501, "learning_rate": 4.74098779449333e-05, "loss": 0.013979756832122802, "step": 185350 }, { "epoch": 52.61424921941527, "grad_norm": 1.3336950540542603, "learning_rate": 4.740703945500994e-05, "loss": 0.0018587268888950348, "step": 185360 }, { "epoch": 52.617087709338634, "grad_norm": 0.3688671886920929, "learning_rate": 4.7404200965086574e-05, "loss": 0.0012560855597257615, "step": 185370 }, { "epoch": 52.61992619926199, "grad_norm": 0.3484702706336975, "learning_rate": 4.7401362475163216e-05, "loss": 0.0013102907687425613, "step": 185380 }, { "epoch": 52.62276468918535, "grad_norm": 0.028412790969014168, "learning_rate": 4.739852398523986e-05, "loss": 0.0004403680562973022, "step": 185390 }, { "epoch": 52.625603179108715, "grad_norm": 0.013050766661763191, "learning_rate": 4.739568549531649e-05, "loss": 0.0008750434964895249, "step": 185400 }, { "epoch": 52.62844166903208, "grad_norm": 0.12130068242549896, "learning_rate": 4.739284700539313e-05, "loss": 0.000410991907119751, "step": 185410 }, { "epoch": 52.631280158955434, "grad_norm": 0.049021463841199875, "learning_rate": 4.7390008515469775e-05, "loss": 0.002135651558637619, "step": 185420 }, { "epoch": 52.6341186488788, "grad_norm": 12.463687896728516, "learning_rate": 4.738717002554641e-05, "loss": 0.0039816610515117645, "step": 185430 }, { "epoch": 52.63695713880216, "grad_norm": 0.10678695142269135, "learning_rate": 4.738433153562305e-05, "loss": 0.004211234301328659, "step": 185440 }, { "epoch": 52.639795628725516, "grad_norm": 0.021233148872852325, "learning_rate": 4.7381493045699685e-05, "loss": 0.0005090618506073952, "step": 185450 }, { "epoch": 52.64263411864888, "grad_norm": 7.264071941375732, "learning_rate": 4.737865455577633e-05, "loss": 0.004531967639923096, "step": 185460 }, { "epoch": 52.64547260857224, "grad_norm": 0.03256160765886307, "learning_rate": 4.737581606585297e-05, "loss": 0.0011610127985477448, "step": 185470 }, { "epoch": 52.6483110984956, "grad_norm": 0.01882196217775345, "learning_rate": 4.73729775759296e-05, "loss": 0.0006921608000993729, "step": 185480 }, { "epoch": 52.65114958841896, "grad_norm": 0.015317656099796295, "learning_rate": 4.737013908600625e-05, "loss": 0.0014936970546841621, "step": 185490 }, { "epoch": 52.65398807834232, "grad_norm": 0.1119641587138176, "learning_rate": 4.7367300596082885e-05, "loss": 0.0029028395190835, "step": 185500 }, { "epoch": 52.65398807834232, "eval_accuracy": 0.9809245247027405, "eval_loss": 0.0759124681353569, "eval_runtime": 56.3642, "eval_samples_per_second": 279.024, "eval_steps_per_second": 4.364, "step": 185500 }, { "epoch": 52.656826568265686, "grad_norm": 0.15363039076328278, "learning_rate": 4.736446210615953e-05, "loss": 0.0009171552956104279, "step": 185510 }, { "epoch": 52.65966505818904, "grad_norm": 0.07287100702524185, "learning_rate": 4.736162361623617e-05, "loss": 0.0008209219202399253, "step": 185520 }, { "epoch": 52.662503548112404, "grad_norm": 0.014530165120959282, "learning_rate": 4.73587851263128e-05, "loss": 0.000656149536371231, "step": 185530 }, { "epoch": 52.66534203803577, "grad_norm": 0.06363289803266525, "learning_rate": 4.7355946636389444e-05, "loss": 0.00034210365265607834, "step": 185540 }, { "epoch": 52.66818052795912, "grad_norm": 0.12391212582588196, "learning_rate": 4.735310814646608e-05, "loss": 0.000950322113931179, "step": 185550 }, { "epoch": 52.671019017882486, "grad_norm": 5.89609432220459, "learning_rate": 4.735026965654272e-05, "loss": 0.0030615219846367838, "step": 185560 }, { "epoch": 52.67385750780585, "grad_norm": 0.10568337887525558, "learning_rate": 4.734743116661936e-05, "loss": 0.0006315674632787705, "step": 185570 }, { "epoch": 52.676695997729205, "grad_norm": 0.10646640509366989, "learning_rate": 4.7344592676695996e-05, "loss": 0.0005443288013339042, "step": 185580 }, { "epoch": 52.67953448765257, "grad_norm": 2.1660890579223633, "learning_rate": 4.7341754186772644e-05, "loss": 0.0012855898588895799, "step": 185590 }, { "epoch": 52.68237297757593, "grad_norm": 0.019104113802313805, "learning_rate": 4.733891569684928e-05, "loss": 0.000501888245344162, "step": 185600 }, { "epoch": 52.68521146749929, "grad_norm": 0.03360052406787872, "learning_rate": 4.733607720692591e-05, "loss": 0.00048347990959882734, "step": 185610 }, { "epoch": 52.68804995742265, "grad_norm": 3.6863536834716797, "learning_rate": 4.733323871700256e-05, "loss": 0.00961771011352539, "step": 185620 }, { "epoch": 52.69088844734601, "grad_norm": 0.05326869338750839, "learning_rate": 4.7330400227079196e-05, "loss": 0.015971855819225313, "step": 185630 }, { "epoch": 52.693726937269375, "grad_norm": 0.09450916200876236, "learning_rate": 4.732756173715584e-05, "loss": 0.004592132940888405, "step": 185640 }, { "epoch": 52.69656542719273, "grad_norm": 0.08915892988443375, "learning_rate": 4.732472324723247e-05, "loss": 0.004639739170670509, "step": 185650 }, { "epoch": 52.69940391711609, "grad_norm": 6.140329837799072, "learning_rate": 4.732188475730911e-05, "loss": 0.001669386960566044, "step": 185660 }, { "epoch": 52.702242407039456, "grad_norm": 0.029488248750567436, "learning_rate": 4.7319046267385755e-05, "loss": 0.00014698877930641174, "step": 185670 }, { "epoch": 52.70508089696281, "grad_norm": 0.04185565933585167, "learning_rate": 4.731620777746239e-05, "loss": 0.004308617860078812, "step": 185680 }, { "epoch": 52.707919386886175, "grad_norm": 0.7924416065216064, "learning_rate": 4.731336928753903e-05, "loss": 0.0002957461401820183, "step": 185690 }, { "epoch": 52.71075787680954, "grad_norm": 14.000730514526367, "learning_rate": 4.731053079761567e-05, "loss": 0.005313391238451004, "step": 185700 }, { "epoch": 52.7135963667329, "grad_norm": 0.11984480172395706, "learning_rate": 4.730769230769231e-05, "loss": 0.0002447478473186493, "step": 185710 }, { "epoch": 52.71643485665626, "grad_norm": 0.2539322078227997, "learning_rate": 4.7304853817768955e-05, "loss": 0.0002592472359538078, "step": 185720 }, { "epoch": 52.71927334657962, "grad_norm": 0.05835887789726257, "learning_rate": 4.730201532784559e-05, "loss": 0.0007873373106122017, "step": 185730 }, { "epoch": 52.72211183650298, "grad_norm": 10.232585906982422, "learning_rate": 4.7299176837922224e-05, "loss": 0.007895128428936004, "step": 185740 }, { "epoch": 52.72495032642634, "grad_norm": 0.001091522048227489, "learning_rate": 4.7296338347998865e-05, "loss": 0.005472860485315323, "step": 185750 }, { "epoch": 52.7277888163497, "grad_norm": 0.012160783633589745, "learning_rate": 4.729349985807551e-05, "loss": 0.00023736972361803054, "step": 185760 }, { "epoch": 52.730627306273064, "grad_norm": 1.2207210063934326, "learning_rate": 4.729066136815215e-05, "loss": 0.0060168102383613585, "step": 185770 }, { "epoch": 52.73346579619643, "grad_norm": 2.3763766288757324, "learning_rate": 4.728782287822878e-05, "loss": 0.008423832058906556, "step": 185780 }, { "epoch": 52.73630428611978, "grad_norm": 16.365976333618164, "learning_rate": 4.7284984388305424e-05, "loss": 0.007823915779590606, "step": 185790 }, { "epoch": 52.739142776043145, "grad_norm": 0.15590150654315948, "learning_rate": 4.7282145898382065e-05, "loss": 0.000490293838083744, "step": 185800 }, { "epoch": 52.74198126596651, "grad_norm": 0.019560817629098892, "learning_rate": 4.72793074084587e-05, "loss": 9.958986192941666e-05, "step": 185810 }, { "epoch": 52.744819755889864, "grad_norm": 0.058778103440999985, "learning_rate": 4.727646891853534e-05, "loss": 0.0004149764776229858, "step": 185820 }, { "epoch": 52.74765824581323, "grad_norm": 0.012627260759472847, "learning_rate": 4.727363042861198e-05, "loss": 0.002369004860520363, "step": 185830 }, { "epoch": 52.75049673573659, "grad_norm": 3.0031826496124268, "learning_rate": 4.727079193868862e-05, "loss": 0.0034863315522670746, "step": 185840 }, { "epoch": 52.753335225659946, "grad_norm": 0.1005353033542633, "learning_rate": 4.726795344876526e-05, "loss": 0.0003981897607445717, "step": 185850 }, { "epoch": 52.75617371558331, "grad_norm": 1.9412529468536377, "learning_rate": 4.72651149588419e-05, "loss": 0.0013687789440155029, "step": 185860 }, { "epoch": 52.75901220550667, "grad_norm": 0.13644549250602722, "learning_rate": 4.7262276468918535e-05, "loss": 0.00038100965321063995, "step": 185870 }, { "epoch": 52.761850695430034, "grad_norm": 15.228586196899414, "learning_rate": 4.7259437978995176e-05, "loss": 0.0063567541539669035, "step": 185880 }, { "epoch": 52.76468918535339, "grad_norm": 0.12208762019872665, "learning_rate": 4.725659948907182e-05, "loss": 0.000528690405189991, "step": 185890 }, { "epoch": 52.76752767527675, "grad_norm": 0.44312846660614014, "learning_rate": 4.725376099914845e-05, "loss": 0.012482520192861557, "step": 185900 }, { "epoch": 52.770366165200116, "grad_norm": 8.12544059753418, "learning_rate": 4.7250922509225094e-05, "loss": 0.0022784382104873656, "step": 185910 }, { "epoch": 52.77320465512347, "grad_norm": 0.0068513681180775166, "learning_rate": 4.7248084019301735e-05, "loss": 0.006821810454130173, "step": 185920 }, { "epoch": 52.776043145046835, "grad_norm": 0.011704165488481522, "learning_rate": 4.7245245529378376e-05, "loss": 0.00030885413289070127, "step": 185930 }, { "epoch": 52.7788816349702, "grad_norm": 0.050474848598241806, "learning_rate": 4.724240703945501e-05, "loss": 0.001341392658650875, "step": 185940 }, { "epoch": 52.78172012489355, "grad_norm": 0.0969727411866188, "learning_rate": 4.7239568549531645e-05, "loss": 0.00021304208785295488, "step": 185950 }, { "epoch": 52.784558614816916, "grad_norm": 0.0031421114690601826, "learning_rate": 4.7236730059608294e-05, "loss": 0.000364525243639946, "step": 185960 }, { "epoch": 52.78739710474028, "grad_norm": 5.721691131591797, "learning_rate": 4.723389156968493e-05, "loss": 0.002230835147202015, "step": 185970 }, { "epoch": 52.79023559466364, "grad_norm": 0.029415858909487724, "learning_rate": 4.723105307976157e-05, "loss": 0.00021257977932691573, "step": 185980 }, { "epoch": 52.793074084587, "grad_norm": 0.224853977560997, "learning_rate": 4.722821458983821e-05, "loss": 0.00020468980073928833, "step": 185990 }, { "epoch": 52.79591257451036, "grad_norm": 0.18704254925251007, "learning_rate": 4.7225376099914846e-05, "loss": 0.00012830812484025954, "step": 186000 }, { "epoch": 52.79591257451036, "eval_accuracy": 0.9835315063266993, "eval_loss": 0.06570006906986237, "eval_runtime": 55.9142, "eval_samples_per_second": 281.27, "eval_steps_per_second": 4.4, "step": 186000 }, { "epoch": 52.79875106443372, "grad_norm": 0.016635598614811897, "learning_rate": 4.722253760999149e-05, "loss": 0.00022850316017866134, "step": 186010 }, { "epoch": 52.80158955435708, "grad_norm": 1.873695731163025, "learning_rate": 4.721969912006813e-05, "loss": 0.0011407041922211647, "step": 186020 }, { "epoch": 52.80442804428044, "grad_norm": 0.04785872995853424, "learning_rate": 4.721686063014476e-05, "loss": 0.00041925553232431413, "step": 186030 }, { "epoch": 52.807266534203805, "grad_norm": 0.0027702387887984514, "learning_rate": 4.7214022140221404e-05, "loss": 0.001776890829205513, "step": 186040 }, { "epoch": 52.81010502412717, "grad_norm": 0.010779018513858318, "learning_rate": 4.7211183650298046e-05, "loss": 0.00012733843177556992, "step": 186050 }, { "epoch": 52.812943514050524, "grad_norm": 0.02788318321108818, "learning_rate": 4.720834516037469e-05, "loss": 0.0077476367354393, "step": 186060 }, { "epoch": 52.81578200397389, "grad_norm": 0.024189719930291176, "learning_rate": 4.720550667045132e-05, "loss": 0.000574318878352642, "step": 186070 }, { "epoch": 52.81862049389725, "grad_norm": 0.07144264131784439, "learning_rate": 4.7202668180527956e-05, "loss": 0.0003532513976097107, "step": 186080 }, { "epoch": 52.821458983820605, "grad_norm": 0.25176456570625305, "learning_rate": 4.7199829690604604e-05, "loss": 0.0005218753591179848, "step": 186090 }, { "epoch": 52.82429747374397, "grad_norm": 0.002251802943646908, "learning_rate": 4.719699120068124e-05, "loss": 0.0010892942547798157, "step": 186100 }, { "epoch": 52.82713596366733, "grad_norm": 3.5346882343292236, "learning_rate": 4.719415271075788e-05, "loss": 0.0008361693471670151, "step": 186110 }, { "epoch": 52.82997445359069, "grad_norm": 0.005217205733060837, "learning_rate": 4.719131422083452e-05, "loss": 0.0004434758797287941, "step": 186120 }, { "epoch": 52.83281294351405, "grad_norm": 1.3556163311004639, "learning_rate": 4.7188475730911156e-05, "loss": 0.00172855481505394, "step": 186130 }, { "epoch": 52.83565143343741, "grad_norm": 0.07533110678195953, "learning_rate": 4.71856372409878e-05, "loss": 0.0002267155796289444, "step": 186140 }, { "epoch": 52.838489923360775, "grad_norm": 0.00495623704046011, "learning_rate": 4.718279875106444e-05, "loss": 0.00018960684537887572, "step": 186150 }, { "epoch": 52.84132841328413, "grad_norm": 0.00418949406594038, "learning_rate": 4.7179960261141074e-05, "loss": 0.0015968812629580497, "step": 186160 }, { "epoch": 52.844166903207494, "grad_norm": 6.000545978546143, "learning_rate": 4.7177121771217715e-05, "loss": 0.001214885152876377, "step": 186170 }, { "epoch": 52.84700539313086, "grad_norm": 0.008242501877248287, "learning_rate": 4.717428328129435e-05, "loss": 0.0013046879321336746, "step": 186180 }, { "epoch": 52.84984388305421, "grad_norm": 0.432036817073822, "learning_rate": 4.7171444791371e-05, "loss": 0.0007231449708342552, "step": 186190 }, { "epoch": 52.852682372977576, "grad_norm": 2.154263734817505, "learning_rate": 4.716860630144763e-05, "loss": 0.0004988312721252441, "step": 186200 }, { "epoch": 52.85552086290094, "grad_norm": 0.042576976120471954, "learning_rate": 4.716576781152427e-05, "loss": 0.007776612043380737, "step": 186210 }, { "epoch": 52.858359352824294, "grad_norm": 2.2684245109558105, "learning_rate": 4.7162929321600915e-05, "loss": 0.0051719781011343, "step": 186220 }, { "epoch": 52.86119784274766, "grad_norm": 0.19413770735263824, "learning_rate": 4.716009083167755e-05, "loss": 0.0028795111924409867, "step": 186230 }, { "epoch": 52.86403633267102, "grad_norm": 0.33108654618263245, "learning_rate": 4.715725234175419e-05, "loss": 0.0022456936538219454, "step": 186240 }, { "epoch": 52.86687482259438, "grad_norm": 0.05616806447505951, "learning_rate": 4.715441385183083e-05, "loss": 0.007983347028493881, "step": 186250 }, { "epoch": 52.86971331251774, "grad_norm": 0.018311521038413048, "learning_rate": 4.715157536190747e-05, "loss": 0.0031798869371414185, "step": 186260 }, { "epoch": 52.8725518024411, "grad_norm": 0.7872881293296814, "learning_rate": 4.714873687198411e-05, "loss": 0.003210112452507019, "step": 186270 }, { "epoch": 52.875390292364465, "grad_norm": 0.26887574791908264, "learning_rate": 4.714589838206074e-05, "loss": 0.0008098864927887917, "step": 186280 }, { "epoch": 52.87822878228782, "grad_norm": 0.09680979698896408, "learning_rate": 4.7143059892137384e-05, "loss": 0.004866090416908264, "step": 186290 }, { "epoch": 52.88106727221118, "grad_norm": 0.3520856499671936, "learning_rate": 4.7140221402214026e-05, "loss": 0.0005068710073828697, "step": 186300 }, { "epoch": 52.883905762134546, "grad_norm": 0.02826826088130474, "learning_rate": 4.713738291229066e-05, "loss": 0.022502207756042482, "step": 186310 }, { "epoch": 52.8867442520579, "grad_norm": 0.02133527211844921, "learning_rate": 4.71345444223673e-05, "loss": 0.0017921241000294686, "step": 186320 }, { "epoch": 52.889582741981265, "grad_norm": 0.011763508431613445, "learning_rate": 4.713170593244394e-05, "loss": 0.002256706170737743, "step": 186330 }, { "epoch": 52.89242123190463, "grad_norm": 0.0240392554551363, "learning_rate": 4.712886744252058e-05, "loss": 0.0004987785592675209, "step": 186340 }, { "epoch": 52.89525972182799, "grad_norm": 0.030537404119968414, "learning_rate": 4.7126028952597226e-05, "loss": 0.00019188448786735534, "step": 186350 }, { "epoch": 52.898098211751346, "grad_norm": 0.026784107089042664, "learning_rate": 4.712319046267386e-05, "loss": 0.003177011013031006, "step": 186360 }, { "epoch": 52.90093670167471, "grad_norm": 0.09901857376098633, "learning_rate": 4.7120351972750495e-05, "loss": 0.0004964919760823249, "step": 186370 }, { "epoch": 52.90377519159807, "grad_norm": 0.05348668247461319, "learning_rate": 4.7117513482827136e-05, "loss": 0.0004711730405688286, "step": 186380 }, { "epoch": 52.90661368152143, "grad_norm": 0.9497933983802795, "learning_rate": 4.711467499290378e-05, "loss": 0.00022356007248163222, "step": 186390 }, { "epoch": 52.90945217144479, "grad_norm": 0.056874942034482956, "learning_rate": 4.711183650298042e-05, "loss": 0.0009997865185141564, "step": 186400 }, { "epoch": 52.912290661368154, "grad_norm": 0.006967104040086269, "learning_rate": 4.7108998013057054e-05, "loss": 0.001365843415260315, "step": 186410 }, { "epoch": 52.91512915129151, "grad_norm": 4.5024614334106445, "learning_rate": 4.7106159523133695e-05, "loss": 0.004343995824456215, "step": 186420 }, { "epoch": 52.91796764121487, "grad_norm": 0.055281419306993484, "learning_rate": 4.7103321033210337e-05, "loss": 0.0032986804842948915, "step": 186430 }, { "epoch": 52.920806131138235, "grad_norm": 1.401594877243042, "learning_rate": 4.710048254328697e-05, "loss": 0.009373389184474945, "step": 186440 }, { "epoch": 52.9236446210616, "grad_norm": 0.2038656324148178, "learning_rate": 4.709764405336361e-05, "loss": 0.0003214653581380844, "step": 186450 }, { "epoch": 52.926483110984954, "grad_norm": 0.05418647825717926, "learning_rate": 4.7094805563440254e-05, "loss": 0.005343636125326156, "step": 186460 }, { "epoch": 52.92932160090832, "grad_norm": 0.014542952179908752, "learning_rate": 4.709196707351689e-05, "loss": 0.012939165532588958, "step": 186470 }, { "epoch": 52.93216009083168, "grad_norm": 0.025729529559612274, "learning_rate": 4.708912858359353e-05, "loss": 0.0006884654983878135, "step": 186480 }, { "epoch": 52.934998580755035, "grad_norm": 0.08771073818206787, "learning_rate": 4.708629009367017e-05, "loss": 0.0005303770303726196, "step": 186490 }, { "epoch": 52.9378370706784, "grad_norm": 0.004762848373502493, "learning_rate": 4.7083451603746806e-05, "loss": 0.0013032592833042145, "step": 186500 }, { "epoch": 52.9378370706784, "eval_accuracy": 0.9834679214090418, "eval_loss": 0.07127892225980759, "eval_runtime": 44.5559, "eval_samples_per_second": 352.972, "eval_steps_per_second": 5.521, "step": 186500 }, { "epoch": 52.94067556060176, "grad_norm": 0.22384561598300934, "learning_rate": 4.708061311382345e-05, "loss": 0.0007330948486924171, "step": 186510 }, { "epoch": 52.943514050525124, "grad_norm": 0.006382892373949289, "learning_rate": 4.707777462390009e-05, "loss": 0.0018702585250139237, "step": 186520 }, { "epoch": 52.94635254044848, "grad_norm": 9.266935348510742, "learning_rate": 4.707493613397673e-05, "loss": 0.0025921519845724106, "step": 186530 }, { "epoch": 52.94919103037184, "grad_norm": 0.02377576008439064, "learning_rate": 4.7072097644053365e-05, "loss": 0.011337600648403168, "step": 186540 }, { "epoch": 52.952029520295206, "grad_norm": 0.2910000681877136, "learning_rate": 4.7069259154130006e-05, "loss": 0.0010868718847632409, "step": 186550 }, { "epoch": 52.95486801021856, "grad_norm": 0.036833204329013824, "learning_rate": 4.706642066420665e-05, "loss": 0.004318244755268097, "step": 186560 }, { "epoch": 52.957706500141924, "grad_norm": 0.04249200597405434, "learning_rate": 4.706358217428328e-05, "loss": 0.0008792255073785781, "step": 186570 }, { "epoch": 52.96054499006529, "grad_norm": 0.0377897284924984, "learning_rate": 4.706074368435992e-05, "loss": 0.0011346610262989998, "step": 186580 }, { "epoch": 52.96338347998864, "grad_norm": 0.3222760856151581, "learning_rate": 4.7057905194436565e-05, "loss": 0.0032673828303813935, "step": 186590 }, { "epoch": 52.966221969912006, "grad_norm": 0.0549791119992733, "learning_rate": 4.70550667045132e-05, "loss": 0.004845529422163963, "step": 186600 }, { "epoch": 52.96906045983537, "grad_norm": 0.05527309328317642, "learning_rate": 4.705222821458984e-05, "loss": 0.000865054689347744, "step": 186610 }, { "epoch": 52.97189894975873, "grad_norm": 0.05705593153834343, "learning_rate": 4.704938972466648e-05, "loss": 0.0016386115923523903, "step": 186620 }, { "epoch": 52.97473743968209, "grad_norm": 0.04857281222939491, "learning_rate": 4.704655123474312e-05, "loss": 0.0006119724363088608, "step": 186630 }, { "epoch": 52.97757592960545, "grad_norm": 0.01844656653702259, "learning_rate": 4.704371274481976e-05, "loss": 0.0003347449004650116, "step": 186640 }, { "epoch": 52.98041441952881, "grad_norm": 0.19077634811401367, "learning_rate": 4.70408742548964e-05, "loss": 0.004211200773715973, "step": 186650 }, { "epoch": 52.98325290945217, "grad_norm": 0.3203301429748535, "learning_rate": 4.703803576497304e-05, "loss": 0.012954367697238922, "step": 186660 }, { "epoch": 52.98609139937553, "grad_norm": 0.10439122468233109, "learning_rate": 4.7035197275049675e-05, "loss": 0.002789392322301865, "step": 186670 }, { "epoch": 52.988929889298895, "grad_norm": 8.618298530578613, "learning_rate": 4.703235878512631e-05, "loss": 0.0031829245388507845, "step": 186680 }, { "epoch": 52.99176837922225, "grad_norm": 0.013003617525100708, "learning_rate": 4.702952029520296e-05, "loss": 0.003096919506788254, "step": 186690 }, { "epoch": 52.99460686914561, "grad_norm": 0.0056255897507071495, "learning_rate": 4.702668180527959e-05, "loss": 0.000544399581849575, "step": 186700 }, { "epoch": 52.997445359068976, "grad_norm": 0.22839057445526123, "learning_rate": 4.7023843315356234e-05, "loss": 0.004065190628170967, "step": 186710 }, { "epoch": 53.00028384899234, "grad_norm": 0.10586441308259964, "learning_rate": 4.7021004825432875e-05, "loss": 0.0009351364336907863, "step": 186720 }, { "epoch": 53.003122338915695, "grad_norm": 3.5694642066955566, "learning_rate": 4.701816633550951e-05, "loss": 0.0024863604456186296, "step": 186730 }, { "epoch": 53.00596082883906, "grad_norm": 0.13441750407218933, "learning_rate": 4.701532784558615e-05, "loss": 0.008861520886421203, "step": 186740 }, { "epoch": 53.00879931876242, "grad_norm": 0.24031712114810944, "learning_rate": 4.701248935566279e-05, "loss": 0.018238991498947144, "step": 186750 }, { "epoch": 53.01163780868578, "grad_norm": 0.03259945288300514, "learning_rate": 4.700965086573943e-05, "loss": 0.0008318010717630386, "step": 186760 }, { "epoch": 53.01447629860914, "grad_norm": 4.942612171173096, "learning_rate": 4.700681237581607e-05, "loss": 0.004444971308112144, "step": 186770 }, { "epoch": 53.0173147885325, "grad_norm": 3.7449097633361816, "learning_rate": 4.70039738858927e-05, "loss": 0.0020644424483180048, "step": 186780 }, { "epoch": 53.02015327845586, "grad_norm": 0.48760169744491577, "learning_rate": 4.7001135395969345e-05, "loss": 0.002169320546090603, "step": 186790 }, { "epoch": 53.02299176837922, "grad_norm": 0.0005498233949765563, "learning_rate": 4.6998296906045986e-05, "loss": 0.005770296603441238, "step": 186800 }, { "epoch": 53.025830258302584, "grad_norm": 0.4825044274330139, "learning_rate": 4.699545841612262e-05, "loss": 0.0022848298773169517, "step": 186810 }, { "epoch": 53.02866874822595, "grad_norm": 0.008722949773073196, "learning_rate": 4.699261992619927e-05, "loss": 0.005037836730480194, "step": 186820 }, { "epoch": 53.0315072381493, "grad_norm": 1.23482346534729, "learning_rate": 4.6989781436275903e-05, "loss": 0.0007717903703451156, "step": 186830 }, { "epoch": 53.034345728072665, "grad_norm": 0.022619714960455894, "learning_rate": 4.698694294635254e-05, "loss": 0.0007439568638801574, "step": 186840 }, { "epoch": 53.03718421799603, "grad_norm": 0.023453237488865852, "learning_rate": 4.6984104456429186e-05, "loss": 0.0003047434613108635, "step": 186850 }, { "epoch": 53.040022707919384, "grad_norm": 0.004586334340274334, "learning_rate": 4.698126596650582e-05, "loss": 0.01631496995687485, "step": 186860 }, { "epoch": 53.04286119784275, "grad_norm": 0.19145101308822632, "learning_rate": 4.697842747658246e-05, "loss": 0.000773053802549839, "step": 186870 }, { "epoch": 53.04569968776611, "grad_norm": 0.0029816182795912027, "learning_rate": 4.69755889866591e-05, "loss": 0.0001376960426568985, "step": 186880 }, { "epoch": 53.04853817768947, "grad_norm": 0.024958837777376175, "learning_rate": 4.697275049673574e-05, "loss": 0.00016320738941431046, "step": 186890 }, { "epoch": 53.05137666761283, "grad_norm": 0.05181051790714264, "learning_rate": 4.696991200681238e-05, "loss": 0.00019502788782119752, "step": 186900 }, { "epoch": 53.05421515753619, "grad_norm": 0.039821576327085495, "learning_rate": 4.6967073516889014e-05, "loss": 0.0005204916000366211, "step": 186910 }, { "epoch": 53.057053647459554, "grad_norm": 0.046234603971242905, "learning_rate": 4.6964235026965656e-05, "loss": 0.002110708877444267, "step": 186920 }, { "epoch": 53.05989213738291, "grad_norm": 0.004494619555771351, "learning_rate": 4.69613965370423e-05, "loss": 0.00018595606088638305, "step": 186930 }, { "epoch": 53.06273062730627, "grad_norm": 0.09560330957174301, "learning_rate": 4.695855804711893e-05, "loss": 0.001325339823961258, "step": 186940 }, { "epoch": 53.065569117229636, "grad_norm": 0.09573312848806381, "learning_rate": 4.695571955719558e-05, "loss": 0.0005349483340978623, "step": 186950 }, { "epoch": 53.06840760715299, "grad_norm": 0.036516133695840836, "learning_rate": 4.6952881067272214e-05, "loss": 0.0029218288138508795, "step": 186960 }, { "epoch": 53.071246097076354, "grad_norm": 0.1750776171684265, "learning_rate": 4.695004257734885e-05, "loss": 0.003328179568052292, "step": 186970 }, { "epoch": 53.07408458699972, "grad_norm": 0.01753259263932705, "learning_rate": 4.694720408742549e-05, "loss": 0.000519055686891079, "step": 186980 }, { "epoch": 53.07692307692308, "grad_norm": 0.49962496757507324, "learning_rate": 4.694436559750213e-05, "loss": 0.0035759113729000092, "step": 186990 }, { "epoch": 53.079761566846436, "grad_norm": 0.235210120677948, "learning_rate": 4.694152710757877e-05, "loss": 0.0014968534931540489, "step": 187000 }, { "epoch": 53.079761566846436, "eval_accuracy": 0.9783811279964393, "eval_loss": 0.09238829463720322, "eval_runtime": 128.4895, "eval_samples_per_second": 122.399, "eval_steps_per_second": 1.915, "step": 187000 }, { "epoch": 53.0826000567698, "grad_norm": 0.0455547459423542, "learning_rate": 4.693868861765541e-05, "loss": 0.002153477445244789, "step": 187010 }, { "epoch": 53.08543854669316, "grad_norm": 5.205793380737305, "learning_rate": 4.693585012773205e-05, "loss": 0.001475325971841812, "step": 187020 }, { "epoch": 53.08827703661652, "grad_norm": 0.008025826886296272, "learning_rate": 4.693301163780869e-05, "loss": 0.0011707143858075142, "step": 187030 }, { "epoch": 53.09111552653988, "grad_norm": 0.10294656455516815, "learning_rate": 4.6930173147885325e-05, "loss": 0.006253089755773544, "step": 187040 }, { "epoch": 53.09395401646324, "grad_norm": 0.04103533551096916, "learning_rate": 4.6927334657961966e-05, "loss": 0.0005041202530264854, "step": 187050 }, { "epoch": 53.0967925063866, "grad_norm": 0.08236850798130035, "learning_rate": 4.692449616803861e-05, "loss": 0.01273840367794037, "step": 187060 }, { "epoch": 53.09963099630996, "grad_norm": 0.06922140717506409, "learning_rate": 4.692165767811524e-05, "loss": 0.0004552677273750305, "step": 187070 }, { "epoch": 53.102469486233325, "grad_norm": 0.06653392314910889, "learning_rate": 4.6918819188191884e-05, "loss": 0.005298956483602524, "step": 187080 }, { "epoch": 53.10530797615669, "grad_norm": 1.9301447868347168, "learning_rate": 4.6915980698268525e-05, "loss": 0.002005183883011341, "step": 187090 }, { "epoch": 53.10814646608004, "grad_norm": 0.04383683577179909, "learning_rate": 4.691314220834516e-05, "loss": 0.009712012112140655, "step": 187100 }, { "epoch": 53.110984956003406, "grad_norm": 0.0017118080286309123, "learning_rate": 4.69103037184218e-05, "loss": 0.0004103686660528183, "step": 187110 }, { "epoch": 53.11382344592677, "grad_norm": 0.013888606801629066, "learning_rate": 4.690746522849844e-05, "loss": 0.00380571186542511, "step": 187120 }, { "epoch": 53.116661935850125, "grad_norm": 0.014326425269246101, "learning_rate": 4.6904626738575084e-05, "loss": 0.0004034256562590599, "step": 187130 }, { "epoch": 53.11950042577349, "grad_norm": 1.5326790809631348, "learning_rate": 4.690178824865172e-05, "loss": 0.0012012403458356857, "step": 187140 }, { "epoch": 53.12233891569685, "grad_norm": 0.80279141664505, "learning_rate": 4.689894975872836e-05, "loss": 0.0007030868902802468, "step": 187150 }, { "epoch": 53.12517740562021, "grad_norm": 0.07523872703313828, "learning_rate": 4.6896111268805e-05, "loss": 0.016684500873088835, "step": 187160 }, { "epoch": 53.12801589554357, "grad_norm": 7.314498424530029, "learning_rate": 4.6893272778881636e-05, "loss": 0.0030850108712911608, "step": 187170 }, { "epoch": 53.13085438546693, "grad_norm": 0.08760488033294678, "learning_rate": 4.689043428895828e-05, "loss": 0.0028865717351436613, "step": 187180 }, { "epoch": 53.133692875390295, "grad_norm": 4.005886554718018, "learning_rate": 4.688759579903492e-05, "loss": 0.0010461851954460144, "step": 187190 }, { "epoch": 53.13653136531365, "grad_norm": 0.012314197607338428, "learning_rate": 4.688475730911155e-05, "loss": 0.006471996009349823, "step": 187200 }, { "epoch": 53.139369855237014, "grad_norm": 0.003432242199778557, "learning_rate": 4.6881918819188194e-05, "loss": 0.0003795234486460686, "step": 187210 }, { "epoch": 53.14220834516038, "grad_norm": 3.476226329803467, "learning_rate": 4.6879080329264836e-05, "loss": 0.003907265514135361, "step": 187220 }, { "epoch": 53.14504683508373, "grad_norm": 12.92143726348877, "learning_rate": 4.687624183934147e-05, "loss": 0.003264114260673523, "step": 187230 }, { "epoch": 53.147885325007096, "grad_norm": 0.2656165063381195, "learning_rate": 4.687340334941811e-05, "loss": 0.00024516694247722626, "step": 187240 }, { "epoch": 53.15072381493046, "grad_norm": 0.5326970815658569, "learning_rate": 4.687056485949475e-05, "loss": 0.0017795601859688758, "step": 187250 }, { "epoch": 53.15356230485382, "grad_norm": 0.024588854983448982, "learning_rate": 4.686772636957139e-05, "loss": 0.003328936919569969, "step": 187260 }, { "epoch": 53.15640079477718, "grad_norm": 0.2559671998023987, "learning_rate": 4.686488787964803e-05, "loss": 0.003309999406337738, "step": 187270 }, { "epoch": 53.15923928470054, "grad_norm": 0.07732243835926056, "learning_rate": 4.6862049389724664e-05, "loss": 0.0002770077437162399, "step": 187280 }, { "epoch": 53.1620777746239, "grad_norm": 0.14040511846542358, "learning_rate": 4.685921089980131e-05, "loss": 0.00014907475560903548, "step": 187290 }, { "epoch": 53.16491626454726, "grad_norm": 0.04558083415031433, "learning_rate": 4.6856372409877946e-05, "loss": 0.00020583607256412505, "step": 187300 }, { "epoch": 53.16775475447062, "grad_norm": 0.01783425733447075, "learning_rate": 4.685353391995458e-05, "loss": 0.0002909122034907341, "step": 187310 }, { "epoch": 53.170593244393984, "grad_norm": 0.025508081540465355, "learning_rate": 4.685069543003123e-05, "loss": 0.00013495534658432008, "step": 187320 }, { "epoch": 53.17343173431734, "grad_norm": 0.04516150429844856, "learning_rate": 4.6847856940107864e-05, "loss": 9.752120822668076e-05, "step": 187330 }, { "epoch": 53.1762702242407, "grad_norm": 0.023202084004878998, "learning_rate": 4.6845018450184505e-05, "loss": 0.0002689015120267868, "step": 187340 }, { "epoch": 53.179108714164066, "grad_norm": 0.002763382624834776, "learning_rate": 4.6842179960261147e-05, "loss": 0.00013608280569314956, "step": 187350 }, { "epoch": 53.18194720408743, "grad_norm": 0.4175342321395874, "learning_rate": 4.683934147033778e-05, "loss": 0.0015945982187986374, "step": 187360 }, { "epoch": 53.184785694010785, "grad_norm": 0.1638878583908081, "learning_rate": 4.683650298041442e-05, "loss": 0.0002814119681715965, "step": 187370 }, { "epoch": 53.18762418393415, "grad_norm": 0.22549423575401306, "learning_rate": 4.6833664490491064e-05, "loss": 0.0002527840435504913, "step": 187380 }, { "epoch": 53.19046267385751, "grad_norm": 0.048118144273757935, "learning_rate": 4.68308260005677e-05, "loss": 0.00015174541622400283, "step": 187390 }, { "epoch": 53.193301163780866, "grad_norm": 0.05120893940329552, "learning_rate": 4.682798751064434e-05, "loss": 0.00015716049820184707, "step": 187400 }, { "epoch": 53.19613965370423, "grad_norm": 0.03949708119034767, "learning_rate": 4.6825149020720974e-05, "loss": 0.0023840010166168214, "step": 187410 }, { "epoch": 53.19897814362759, "grad_norm": 0.02788456529378891, "learning_rate": 4.682231053079762e-05, "loss": 0.0006977297365665436, "step": 187420 }, { "epoch": 53.20181663355095, "grad_norm": 0.07698244601488113, "learning_rate": 4.681947204087426e-05, "loss": 0.00046371836215257644, "step": 187430 }, { "epoch": 53.20465512347431, "grad_norm": 0.010936486534774303, "learning_rate": 4.681663355095089e-05, "loss": 0.0031691744923591616, "step": 187440 }, { "epoch": 53.20749361339767, "grad_norm": 0.019852809607982635, "learning_rate": 4.681379506102754e-05, "loss": 0.00012412108480930328, "step": 187450 }, { "epoch": 53.210332103321036, "grad_norm": 0.021687312051653862, "learning_rate": 4.6810956571104175e-05, "loss": 0.0002482214942574501, "step": 187460 }, { "epoch": 53.21317059324439, "grad_norm": 0.00891370140016079, "learning_rate": 4.6808118081180816e-05, "loss": 0.00013768523931503295, "step": 187470 }, { "epoch": 53.216009083167755, "grad_norm": 0.27394160628318787, "learning_rate": 4.680527959125746e-05, "loss": 0.00039762165397405624, "step": 187480 }, { "epoch": 53.21884757309112, "grad_norm": 0.023942217230796814, "learning_rate": 4.680244110133409e-05, "loss": 0.00015432331711053848, "step": 187490 }, { "epoch": 53.221686063014474, "grad_norm": 1.0118012428283691, "learning_rate": 4.679960261141073e-05, "loss": 0.001561463065445423, "step": 187500 }, { "epoch": 53.221686063014474, "eval_accuracy": 0.9849939594328225, "eval_loss": 0.05925201252102852, "eval_runtime": 118.2469, "eval_samples_per_second": 133.001, "eval_steps_per_second": 2.08, "step": 187500 }, { "epoch": 53.22452455293784, "grad_norm": 0.02988619916141033, "learning_rate": 4.679676412148737e-05, "loss": 8.819717913866043e-05, "step": 187510 }, { "epoch": 53.2273630428612, "grad_norm": 0.012011836282908916, "learning_rate": 4.679392563156401e-05, "loss": 0.00012664198875427245, "step": 187520 }, { "epoch": 53.230201532784555, "grad_norm": 0.04556143656373024, "learning_rate": 4.679108714164065e-05, "loss": 0.001404605619609356, "step": 187530 }, { "epoch": 53.23304002270792, "grad_norm": 1.7296466827392578, "learning_rate": 4.6788248651717285e-05, "loss": 0.0004912916570901871, "step": 187540 }, { "epoch": 53.23587851263128, "grad_norm": 5.337174892425537, "learning_rate": 4.678541016179393e-05, "loss": 0.002336675859987736, "step": 187550 }, { "epoch": 53.238717002554644, "grad_norm": 0.4727272391319275, "learning_rate": 4.678257167187057e-05, "loss": 0.00025420226156711576, "step": 187560 }, { "epoch": 53.241555492478, "grad_norm": 0.07775087654590607, "learning_rate": 4.67797331819472e-05, "loss": 0.002784893475472927, "step": 187570 }, { "epoch": 53.24439398240136, "grad_norm": 0.0056413086131215096, "learning_rate": 4.677689469202385e-05, "loss": 0.00020097997039556504, "step": 187580 }, { "epoch": 53.247232472324725, "grad_norm": 0.047702476382255554, "learning_rate": 4.6774056202100485e-05, "loss": 0.00019128508865833281, "step": 187590 }, { "epoch": 53.25007096224808, "grad_norm": 1.140633463859558, "learning_rate": 4.677121771217713e-05, "loss": 0.000377008318901062, "step": 187600 }, { "epoch": 53.252909452171444, "grad_norm": 0.02369578741490841, "learning_rate": 4.676837922225376e-05, "loss": 0.00043811649084091187, "step": 187610 }, { "epoch": 53.25574794209481, "grad_norm": 0.12477339804172516, "learning_rate": 4.67655407323304e-05, "loss": 0.0016305247321724893, "step": 187620 }, { "epoch": 53.25858643201816, "grad_norm": 0.0860651433467865, "learning_rate": 4.6762702242407044e-05, "loss": 0.0005150493234395981, "step": 187630 }, { "epoch": 53.261424921941526, "grad_norm": 0.03224341943860054, "learning_rate": 4.675986375248368e-05, "loss": 0.0006749993190169334, "step": 187640 }, { "epoch": 53.26426341186489, "grad_norm": 0.10986166447401047, "learning_rate": 4.675702526256032e-05, "loss": 0.0019448798149824143, "step": 187650 }, { "epoch": 53.26710190178825, "grad_norm": 1.31785249710083, "learning_rate": 4.675418677263696e-05, "loss": 0.006024711579084396, "step": 187660 }, { "epoch": 53.26994039171161, "grad_norm": 0.11332842707633972, "learning_rate": 4.6751348282713596e-05, "loss": 0.00016470272094011308, "step": 187670 }, { "epoch": 53.27277888163497, "grad_norm": 0.33314409852027893, "learning_rate": 4.674850979279024e-05, "loss": 0.00016911569982767106, "step": 187680 }, { "epoch": 53.27561737155833, "grad_norm": 0.019298072904348373, "learning_rate": 4.674567130286688e-05, "loss": 9.400825947523118e-05, "step": 187690 }, { "epoch": 53.27845586148169, "grad_norm": 0.019401831552386284, "learning_rate": 4.674283281294351e-05, "loss": 0.00015515629202127456, "step": 187700 }, { "epoch": 53.28129435140505, "grad_norm": 0.39260560274124146, "learning_rate": 4.6739994323020155e-05, "loss": 0.00025101993232965467, "step": 187710 }, { "epoch": 53.284132841328415, "grad_norm": 0.01163747999817133, "learning_rate": 4.6737155833096796e-05, "loss": 4.220269620418549e-05, "step": 187720 }, { "epoch": 53.28697133125178, "grad_norm": 0.04830078035593033, "learning_rate": 4.673431734317343e-05, "loss": 0.00025393590331077574, "step": 187730 }, { "epoch": 53.28980982117513, "grad_norm": 0.9773651957511902, "learning_rate": 4.673147885325007e-05, "loss": 0.0007449511438608169, "step": 187740 }, { "epoch": 53.292648311098496, "grad_norm": 0.00918722152709961, "learning_rate": 4.6728640363326713e-05, "loss": 0.0006824519485235214, "step": 187750 }, { "epoch": 53.29548680102186, "grad_norm": 0.025700319558382034, "learning_rate": 4.6725801873403355e-05, "loss": 0.001888452097773552, "step": 187760 }, { "epoch": 53.298325290945215, "grad_norm": 2.394583225250244, "learning_rate": 4.672296338347999e-05, "loss": 0.011365123838186265, "step": 187770 }, { "epoch": 53.30116378086858, "grad_norm": 0.39612913131713867, "learning_rate": 4.672012489355663e-05, "loss": 0.010001448541879654, "step": 187780 }, { "epoch": 53.30400227079194, "grad_norm": 0.017643805593252182, "learning_rate": 4.671728640363327e-05, "loss": 0.0014145616441965103, "step": 187790 }, { "epoch": 53.306840760715296, "grad_norm": 0.01471593789756298, "learning_rate": 4.671444791370991e-05, "loss": 0.008844371140003204, "step": 187800 }, { "epoch": 53.30967925063866, "grad_norm": 0.11483939737081528, "learning_rate": 4.671160942378655e-05, "loss": 0.0001341402530670166, "step": 187810 }, { "epoch": 53.31251774056202, "grad_norm": 0.01329687051475048, "learning_rate": 4.670877093386319e-05, "loss": 0.00011230576783418655, "step": 187820 }, { "epoch": 53.315356230485385, "grad_norm": 0.6274718046188354, "learning_rate": 4.6705932443939824e-05, "loss": 0.00041721779853105546, "step": 187830 }, { "epoch": 53.31819472040874, "grad_norm": 0.03246826305985451, "learning_rate": 4.6703093954016465e-05, "loss": 0.0008873851969838142, "step": 187840 }, { "epoch": 53.321033210332104, "grad_norm": 0.005229183007031679, "learning_rate": 4.670025546409311e-05, "loss": 0.00020716376602649688, "step": 187850 }, { "epoch": 53.32387170025547, "grad_norm": 6.031008243560791, "learning_rate": 4.669741697416974e-05, "loss": 0.001963845640420914, "step": 187860 }, { "epoch": 53.32671019017882, "grad_norm": 0.4927484691143036, "learning_rate": 4.669457848424638e-05, "loss": 0.00019630920141935348, "step": 187870 }, { "epoch": 53.329548680102185, "grad_norm": 0.36427029967308044, "learning_rate": 4.6691739994323024e-05, "loss": 0.0005524918437004089, "step": 187880 }, { "epoch": 53.33238717002555, "grad_norm": 0.17655245959758759, "learning_rate": 4.6688901504399666e-05, "loss": 0.0014188252389431, "step": 187890 }, { "epoch": 53.335225659948904, "grad_norm": 0.030030881986021996, "learning_rate": 4.66860630144763e-05, "loss": 0.0006216535344719887, "step": 187900 }, { "epoch": 53.33806414987227, "grad_norm": 0.055812545120716095, "learning_rate": 4.6683224524552935e-05, "loss": 0.0011462874710559845, "step": 187910 }, { "epoch": 53.34090263979563, "grad_norm": 0.2547707259654999, "learning_rate": 4.668038603462958e-05, "loss": 0.000880153477191925, "step": 187920 }, { "epoch": 53.34374112971899, "grad_norm": 0.9504252672195435, "learning_rate": 4.667754754470622e-05, "loss": 0.000940871424973011, "step": 187930 }, { "epoch": 53.34657961964235, "grad_norm": 0.1663336157798767, "learning_rate": 4.667470905478286e-05, "loss": 0.0027346517890691757, "step": 187940 }, { "epoch": 53.34941810956571, "grad_norm": 0.014806026592850685, "learning_rate": 4.66718705648595e-05, "loss": 0.00031978823244571686, "step": 187950 }, { "epoch": 53.352256599489074, "grad_norm": 0.007465897593647242, "learning_rate": 4.6669032074936135e-05, "loss": 0.0005019059404730797, "step": 187960 }, { "epoch": 53.35509508941243, "grad_norm": 0.028884125873446465, "learning_rate": 4.6666193585012776e-05, "loss": 0.00014516673982143403, "step": 187970 }, { "epoch": 53.35793357933579, "grad_norm": 0.04816771671175957, "learning_rate": 4.666335509508942e-05, "loss": 0.00391453206539154, "step": 187980 }, { "epoch": 53.360772069259156, "grad_norm": 0.028463082388043404, "learning_rate": 4.666051660516605e-05, "loss": 0.010102176666259765, "step": 187990 }, { "epoch": 53.36361055918251, "grad_norm": 0.014756247401237488, "learning_rate": 4.6657678115242694e-05, "loss": 0.00036175251007080077, "step": 188000 }, { "epoch": 53.36361055918251, "eval_accuracy": 0.9840401856679596, "eval_loss": 0.06733973324298859, "eval_runtime": 61.6004, "eval_samples_per_second": 255.307, "eval_steps_per_second": 3.993, "step": 188000 }, { "epoch": 53.366449049105874, "grad_norm": 0.002362135797739029, "learning_rate": 4.665483962531933e-05, "loss": 0.00010406747460365296, "step": 188010 }, { "epoch": 53.36928753902924, "grad_norm": 0.021062392741441727, "learning_rate": 4.665200113539597e-05, "loss": 9.870119392871857e-05, "step": 188020 }, { "epoch": 53.3721260289526, "grad_norm": 0.014436857774853706, "learning_rate": 4.664916264547261e-05, "loss": 0.0003952670842409134, "step": 188030 }, { "epoch": 53.374964518875956, "grad_norm": 0.0040365648455917835, "learning_rate": 4.6646324155549246e-05, "loss": 0.0024524275213479996, "step": 188040 }, { "epoch": 53.37780300879932, "grad_norm": 0.3052312731742859, "learning_rate": 4.6643485665625894e-05, "loss": 0.0053890891373157505, "step": 188050 }, { "epoch": 53.38064149872268, "grad_norm": 0.02250695414841175, "learning_rate": 4.664064717570253e-05, "loss": 0.00029052626341581346, "step": 188060 }, { "epoch": 53.38347998864604, "grad_norm": 0.025006385520100594, "learning_rate": 4.663780868577917e-05, "loss": 0.004940115660429001, "step": 188070 }, { "epoch": 53.3863184785694, "grad_norm": 0.2868189811706543, "learning_rate": 4.663497019585581e-05, "loss": 0.0016174403950572013, "step": 188080 }, { "epoch": 53.38915696849276, "grad_norm": 0.05843624845147133, "learning_rate": 4.6632131705932446e-05, "loss": 0.001669490709900856, "step": 188090 }, { "epoch": 53.391995458416126, "grad_norm": 0.739940881729126, "learning_rate": 4.662929321600909e-05, "loss": 0.007591456919908523, "step": 188100 }, { "epoch": 53.39483394833948, "grad_norm": 0.015648458153009415, "learning_rate": 4.662645472608572e-05, "loss": 0.005195115879178047, "step": 188110 }, { "epoch": 53.397672438262845, "grad_norm": 0.138453409075737, "learning_rate": 4.662361623616236e-05, "loss": 0.000683356449007988, "step": 188120 }, { "epoch": 53.40051092818621, "grad_norm": 0.007345383055508137, "learning_rate": 4.6620777746239004e-05, "loss": 0.00045944247394800184, "step": 188130 }, { "epoch": 53.40334941810956, "grad_norm": 0.4334501624107361, "learning_rate": 4.661793925631564e-05, "loss": 0.00023753903806209565, "step": 188140 }, { "epoch": 53.406187908032926, "grad_norm": 0.009778575040400028, "learning_rate": 4.661510076639228e-05, "loss": 0.00010264776647090912, "step": 188150 }, { "epoch": 53.40902639795629, "grad_norm": 0.006799999158829451, "learning_rate": 4.661226227646892e-05, "loss": 0.000853295810520649, "step": 188160 }, { "epoch": 53.411864887879645, "grad_norm": 0.017865866422653198, "learning_rate": 4.6609423786545556e-05, "loss": 0.001032237708568573, "step": 188170 }, { "epoch": 53.41470337780301, "grad_norm": 2.3811047077178955, "learning_rate": 4.6606585296622204e-05, "loss": 0.0007740719243884087, "step": 188180 }, { "epoch": 53.41754186772637, "grad_norm": 0.13975439965724945, "learning_rate": 4.660374680669884e-05, "loss": 0.0003060916438698769, "step": 188190 }, { "epoch": 53.420380357649734, "grad_norm": 0.0864756777882576, "learning_rate": 4.6600908316775474e-05, "loss": 0.022727657854557038, "step": 188200 }, { "epoch": 53.42321884757309, "grad_norm": 0.00862489640712738, "learning_rate": 4.6598069826852115e-05, "loss": 0.0006693905219435692, "step": 188210 }, { "epoch": 53.42605733749645, "grad_norm": 0.09340302646160126, "learning_rate": 4.6595231336928756e-05, "loss": 0.002372365817427635, "step": 188220 }, { "epoch": 53.428895827419815, "grad_norm": 1.5941171646118164, "learning_rate": 4.65923928470054e-05, "loss": 0.000690428726375103, "step": 188230 }, { "epoch": 53.43173431734317, "grad_norm": 0.0165711622685194, "learning_rate": 4.658955435708203e-05, "loss": 0.000351598858833313, "step": 188240 }, { "epoch": 53.434572807266534, "grad_norm": 0.01784406416118145, "learning_rate": 4.6586715867158674e-05, "loss": 0.00017346274107694627, "step": 188250 }, { "epoch": 53.4374112971899, "grad_norm": 0.11245511472225189, "learning_rate": 4.6583877377235315e-05, "loss": 0.0018258634954690933, "step": 188260 }, { "epoch": 53.44024978711325, "grad_norm": 0.009684822522103786, "learning_rate": 4.658103888731195e-05, "loss": 0.0006762983277440072, "step": 188270 }, { "epoch": 53.443088277036615, "grad_norm": 0.016440827399492264, "learning_rate": 4.657820039738859e-05, "loss": 0.00021547023206949233, "step": 188280 }, { "epoch": 53.44592676695998, "grad_norm": 0.03228893131017685, "learning_rate": 4.657536190746523e-05, "loss": 0.0002432066947221756, "step": 188290 }, { "epoch": 53.44876525688334, "grad_norm": 0.21194733679294586, "learning_rate": 4.657252341754187e-05, "loss": 0.00011107511818408966, "step": 188300 }, { "epoch": 53.4516037468067, "grad_norm": 0.01628873683512211, "learning_rate": 4.656968492761851e-05, "loss": 0.003270375728607178, "step": 188310 }, { "epoch": 53.45444223673006, "grad_norm": 0.011467044241726398, "learning_rate": 4.656684643769515e-05, "loss": 0.0012820634990930557, "step": 188320 }, { "epoch": 53.45728072665342, "grad_norm": 0.36325937509536743, "learning_rate": 4.6564007947771784e-05, "loss": 0.00022352952510118484, "step": 188330 }, { "epoch": 53.46011921657678, "grad_norm": 0.26970982551574707, "learning_rate": 4.6561169457848426e-05, "loss": 0.0006003664806485176, "step": 188340 }, { "epoch": 53.46295770650014, "grad_norm": 1.4866703748703003, "learning_rate": 4.655833096792507e-05, "loss": 0.00039685480296611784, "step": 188350 }, { "epoch": 53.465796196423504, "grad_norm": 1.838560700416565, "learning_rate": 4.655549247800171e-05, "loss": 0.018084269762039185, "step": 188360 }, { "epoch": 53.46863468634686, "grad_norm": 0.03863668069243431, "learning_rate": 4.655265398807834e-05, "loss": 0.0004861561581492424, "step": 188370 }, { "epoch": 53.47147317627022, "grad_norm": 0.1273009181022644, "learning_rate": 4.6549815498154985e-05, "loss": 0.000776715949177742, "step": 188380 }, { "epoch": 53.474311666193586, "grad_norm": 0.008449741639196873, "learning_rate": 4.6546977008231626e-05, "loss": 0.0026535138487815855, "step": 188390 }, { "epoch": 53.47715015611695, "grad_norm": 0.20691630244255066, "learning_rate": 4.654413851830826e-05, "loss": 0.005734062194824219, "step": 188400 }, { "epoch": 53.479988646040304, "grad_norm": 0.024800224229693413, "learning_rate": 4.65413000283849e-05, "loss": 0.00015778467059135437, "step": 188410 }, { "epoch": 53.48282713596367, "grad_norm": 0.35931167006492615, "learning_rate": 4.653846153846154e-05, "loss": 0.0038161687552928925, "step": 188420 }, { "epoch": 53.48566562588703, "grad_norm": 0.2691011130809784, "learning_rate": 4.653562304853818e-05, "loss": 0.0005351599305868149, "step": 188430 }, { "epoch": 53.488504115810386, "grad_norm": 0.02181868627667427, "learning_rate": 4.653278455861482e-05, "loss": 0.007279783487319946, "step": 188440 }, { "epoch": 53.49134260573375, "grad_norm": 0.011045647785067558, "learning_rate": 4.652994606869146e-05, "loss": 0.0003476545214653015, "step": 188450 }, { "epoch": 53.49418109565711, "grad_norm": 7.658235549926758, "learning_rate": 4.6527107578768095e-05, "loss": 0.002877158112823963, "step": 188460 }, { "epoch": 53.497019585580475, "grad_norm": 0.056653331965208054, "learning_rate": 4.6524269088844737e-05, "loss": 0.0006744461134076118, "step": 188470 }, { "epoch": 53.49985807550383, "grad_norm": 0.10741715133190155, "learning_rate": 4.652143059892138e-05, "loss": 0.005366486310958862, "step": 188480 }, { "epoch": 53.50269656542719, "grad_norm": 0.025944605469703674, "learning_rate": 4.651859210899801e-05, "loss": 0.0005361743271350861, "step": 188490 }, { "epoch": 53.505535055350556, "grad_norm": 2.3596608638763428, "learning_rate": 4.6515753619074654e-05, "loss": 0.0017124230042099952, "step": 188500 }, { "epoch": 53.505535055350556, "eval_accuracy": 0.9836586761620144, "eval_loss": 0.06914114207029343, "eval_runtime": 111.994, "eval_samples_per_second": 140.427, "eval_steps_per_second": 2.197, "step": 188500 }, { "epoch": 53.50837354527391, "grad_norm": 0.25518837571144104, "learning_rate": 4.651291512915129e-05, "loss": 0.003102273680269718, "step": 188510 }, { "epoch": 53.511212035197275, "grad_norm": 0.03356488421559334, "learning_rate": 4.651007663922794e-05, "loss": 0.00016869958490133284, "step": 188520 }, { "epoch": 53.51405052512064, "grad_norm": 0.03591328486800194, "learning_rate": 4.650723814930457e-05, "loss": 0.0009223559871315956, "step": 188530 }, { "epoch": 53.51688901504399, "grad_norm": 0.11283807456493378, "learning_rate": 4.6504399659381206e-05, "loss": 0.0019323019310832023, "step": 188540 }, { "epoch": 53.519727504967356, "grad_norm": 1.4982914924621582, "learning_rate": 4.6501561169457854e-05, "loss": 0.006526142358779907, "step": 188550 }, { "epoch": 53.52256599489072, "grad_norm": 0.14462275803089142, "learning_rate": 4.649872267953449e-05, "loss": 0.00030662957578897476, "step": 188560 }, { "epoch": 53.52540448481408, "grad_norm": 0.0013245915761217475, "learning_rate": 4.649588418961113e-05, "loss": 0.0005568539723753929, "step": 188570 }, { "epoch": 53.52824297473744, "grad_norm": 0.3340049386024475, "learning_rate": 4.649304569968777e-05, "loss": 0.0008017299696803093, "step": 188580 }, { "epoch": 53.5310814646608, "grad_norm": 0.036781392991542816, "learning_rate": 4.6490207209764406e-05, "loss": 0.00021450649946928025, "step": 188590 }, { "epoch": 53.533919954584164, "grad_norm": 0.028521856293082237, "learning_rate": 4.648736871984105e-05, "loss": 0.00025430116802453994, "step": 188600 }, { "epoch": 53.53675844450752, "grad_norm": 0.08726565539836884, "learning_rate": 4.648453022991768e-05, "loss": 0.002336626872420311, "step": 188610 }, { "epoch": 53.53959693443088, "grad_norm": 0.05426472797989845, "learning_rate": 4.648169173999432e-05, "loss": 0.0035942602902650834, "step": 188620 }, { "epoch": 53.542435424354245, "grad_norm": 0.025068264454603195, "learning_rate": 4.6478853250070965e-05, "loss": 0.0003999641165137291, "step": 188630 }, { "epoch": 53.5452739142776, "grad_norm": 0.03102622926235199, "learning_rate": 4.64760147601476e-05, "loss": 0.0010203199461102485, "step": 188640 }, { "epoch": 53.548112404200964, "grad_norm": 0.02043749950826168, "learning_rate": 4.647317627022425e-05, "loss": 0.000161115825176239, "step": 188650 }, { "epoch": 53.55095089412433, "grad_norm": 2.6071410179138184, "learning_rate": 4.647033778030088e-05, "loss": 0.0004925996065139771, "step": 188660 }, { "epoch": 53.55378938404769, "grad_norm": 3.1965599060058594, "learning_rate": 4.646749929037752e-05, "loss": 0.0006359394639730454, "step": 188670 }, { "epoch": 53.556627873971046, "grad_norm": 0.04042806476354599, "learning_rate": 4.6464660800454165e-05, "loss": 0.0003685103729367256, "step": 188680 }, { "epoch": 53.55946636389441, "grad_norm": 0.27232491970062256, "learning_rate": 4.64618223105308e-05, "loss": 0.0016743825748562814, "step": 188690 }, { "epoch": 53.56230485381777, "grad_norm": 12.554143905639648, "learning_rate": 4.645898382060744e-05, "loss": 0.006694009900093079, "step": 188700 }, { "epoch": 53.56514334374113, "grad_norm": 0.05743815004825592, "learning_rate": 4.645614533068408e-05, "loss": 0.00028624869883060456, "step": 188710 }, { "epoch": 53.56798183366449, "grad_norm": 0.09228169173002243, "learning_rate": 4.645330684076072e-05, "loss": 0.003185214102268219, "step": 188720 }, { "epoch": 53.57082032358785, "grad_norm": 5.082214832305908, "learning_rate": 4.64507521998297e-05, "loss": 0.008340996503829957, "step": 188730 }, { "epoch": 53.57365881351121, "grad_norm": 0.04445622116327286, "learning_rate": 4.644791370990633e-05, "loss": 0.00025195274502038957, "step": 188740 }, { "epoch": 53.57649730343457, "grad_norm": 0.019493453204631805, "learning_rate": 4.644507521998297e-05, "loss": 0.00020657330751419067, "step": 188750 }, { "epoch": 53.579335793357934, "grad_norm": 0.010866685770452023, "learning_rate": 4.6442236730059614e-05, "loss": 0.0015641864389181136, "step": 188760 }, { "epoch": 53.5821742832813, "grad_norm": 0.06877195835113525, "learning_rate": 4.643939824013625e-05, "loss": 0.00018914341926574708, "step": 188770 }, { "epoch": 53.58501277320465, "grad_norm": 0.45607632398605347, "learning_rate": 4.643655975021289e-05, "loss": 0.011614413559436798, "step": 188780 }, { "epoch": 53.587851263128016, "grad_norm": 0.052936818450689316, "learning_rate": 4.6433721260289525e-05, "loss": 0.0008688030764460563, "step": 188790 }, { "epoch": 53.59068975305138, "grad_norm": 16.607521057128906, "learning_rate": 4.6430882770366166e-05, "loss": 0.007515013217926025, "step": 188800 }, { "epoch": 53.593528242974735, "grad_norm": 0.010201999917626381, "learning_rate": 4.642804428044281e-05, "loss": 0.00011437442153692246, "step": 188810 }, { "epoch": 53.5963667328981, "grad_norm": 0.8180213570594788, "learning_rate": 4.642520579051944e-05, "loss": 0.0037769585847854613, "step": 188820 }, { "epoch": 53.59920522282146, "grad_norm": 0.11196282505989075, "learning_rate": 4.642236730059609e-05, "loss": 0.0003972774371504784, "step": 188830 }, { "epoch": 53.602043712744816, "grad_norm": 0.0016172754112631083, "learning_rate": 4.6419528810672725e-05, "loss": 0.0002013387158513069, "step": 188840 }, { "epoch": 53.60488220266818, "grad_norm": 0.0313153974711895, "learning_rate": 4.641669032074936e-05, "loss": 0.0008866207674145699, "step": 188850 }, { "epoch": 53.60772069259154, "grad_norm": 0.11963639408349991, "learning_rate": 4.641385183082601e-05, "loss": 0.00012674536556005477, "step": 188860 }, { "epoch": 53.610559182514905, "grad_norm": 0.004625859670341015, "learning_rate": 4.641101334090264e-05, "loss": 0.001459287479519844, "step": 188870 }, { "epoch": 53.61339767243826, "grad_norm": 0.03420775756239891, "learning_rate": 4.640845869997162e-05, "loss": 0.007251472771167755, "step": 188880 }, { "epoch": 53.61623616236162, "grad_norm": 0.02444108948111534, "learning_rate": 4.6405620210048257e-05, "loss": 0.004582945257425308, "step": 188890 }, { "epoch": 53.619074652284986, "grad_norm": 0.007924526929855347, "learning_rate": 4.640278172012489e-05, "loss": 0.00257747620344162, "step": 188900 }, { "epoch": 53.62191314220834, "grad_norm": 0.02250070311129093, "learning_rate": 4.639994323020154e-05, "loss": 0.0013906260952353477, "step": 188910 }, { "epoch": 53.624751632131705, "grad_norm": 0.0807199701666832, "learning_rate": 4.6397104740278174e-05, "loss": 0.00035603605210781096, "step": 188920 }, { "epoch": 53.62759012205507, "grad_norm": 0.05773930251598358, "learning_rate": 4.6394266250354815e-05, "loss": 0.0027982551604509355, "step": 188930 }, { "epoch": 53.63042861197843, "grad_norm": 0.08366863429546356, "learning_rate": 4.639142776043146e-05, "loss": 0.0008898945525288582, "step": 188940 }, { "epoch": 53.63326710190179, "grad_norm": 0.04832851514220238, "learning_rate": 4.638858927050809e-05, "loss": 0.00023361165076494217, "step": 188950 }, { "epoch": 53.63610559182515, "grad_norm": 0.11983712017536163, "learning_rate": 4.638575078058473e-05, "loss": 0.0007010813802480698, "step": 188960 }, { "epoch": 53.63894408174851, "grad_norm": 0.04947201907634735, "learning_rate": 4.638291229066137e-05, "loss": 0.00029759518802165987, "step": 188970 }, { "epoch": 53.64178257167187, "grad_norm": 0.7470290064811707, "learning_rate": 4.638007380073801e-05, "loss": 0.0012236377224326134, "step": 188980 }, { "epoch": 53.64462106159523, "grad_norm": 3.655963659286499, "learning_rate": 4.637723531081465e-05, "loss": 0.0011040594428777695, "step": 188990 }, { "epoch": 53.647459551518594, "grad_norm": 0.0073951841332018375, "learning_rate": 4.6374396820891285e-05, "loss": 0.001299828290939331, "step": 189000 }, { "epoch": 53.647459551518594, "eval_accuracy": 0.9811788643733707, "eval_loss": 0.0775727927684784, "eval_runtime": 49.3771, "eval_samples_per_second": 318.508, "eval_steps_per_second": 4.982, "step": 189000 }, { "epoch": 53.65029804144195, "grad_norm": 1.3927415609359741, "learning_rate": 4.6371558330967926e-05, "loss": 0.0038806650787591933, "step": 189010 }, { "epoch": 53.65313653136531, "grad_norm": 0.03957657143473625, "learning_rate": 4.636871984104457e-05, "loss": 0.0006381155923008919, "step": 189020 }, { "epoch": 53.655975021288675, "grad_norm": 0.014065622352063656, "learning_rate": 4.63658813511212e-05, "loss": 0.014051984250545501, "step": 189030 }, { "epoch": 53.65881351121204, "grad_norm": 0.059735607355833054, "learning_rate": 4.636304286119785e-05, "loss": 0.006496728956699371, "step": 189040 }, { "epoch": 53.661652001135394, "grad_norm": 9.729701042175293, "learning_rate": 4.6360204371274485e-05, "loss": 0.01654416471719742, "step": 189050 }, { "epoch": 53.66449049105876, "grad_norm": 0.13694733381271362, "learning_rate": 4.635736588135112e-05, "loss": 0.0005797401070594787, "step": 189060 }, { "epoch": 53.66732898098212, "grad_norm": 1.5456346273422241, "learning_rate": 4.635452739142776e-05, "loss": 0.0006385134533047676, "step": 189070 }, { "epoch": 53.670167470905476, "grad_norm": 13.094619750976562, "learning_rate": 4.63516889015044e-05, "loss": 0.00872587263584137, "step": 189080 }, { "epoch": 53.67300596082884, "grad_norm": 0.02735893614590168, "learning_rate": 4.6348850411581043e-05, "loss": 0.00016879700124263763, "step": 189090 }, { "epoch": 53.6758444507522, "grad_norm": 0.22320513427257538, "learning_rate": 4.634601192165768e-05, "loss": 0.005682355910539627, "step": 189100 }, { "epoch": 53.67868294067556, "grad_norm": 0.06337951123714447, "learning_rate": 4.634317343173432e-05, "loss": 0.004494468867778778, "step": 189110 }, { "epoch": 53.68152143059892, "grad_norm": 4.399172306060791, "learning_rate": 4.634033494181096e-05, "loss": 0.0020140230655670164, "step": 189120 }, { "epoch": 53.68435992052228, "grad_norm": 4.932733058929443, "learning_rate": 4.6337496451887595e-05, "loss": 0.001331835612654686, "step": 189130 }, { "epoch": 53.687198410445646, "grad_norm": 0.0740760862827301, "learning_rate": 4.633465796196424e-05, "loss": 0.00020419564098119735, "step": 189140 }, { "epoch": 53.690036900369, "grad_norm": 15.768267631530762, "learning_rate": 4.633181947204088e-05, "loss": 0.011903670430183411, "step": 189150 }, { "epoch": 53.692875390292365, "grad_norm": 8.402745246887207, "learning_rate": 4.632898098211751e-05, "loss": 0.002484548091888428, "step": 189160 }, { "epoch": 53.69571388021573, "grad_norm": 0.03605884686112404, "learning_rate": 4.6326142492194154e-05, "loss": 0.0022983988747000694, "step": 189170 }, { "epoch": 53.69855237013908, "grad_norm": 0.018025709316134453, "learning_rate": 4.6323304002270795e-05, "loss": 0.00129899512976408, "step": 189180 }, { "epoch": 53.701390860062446, "grad_norm": 0.21212810277938843, "learning_rate": 4.632046551234743e-05, "loss": 0.001265138015151024, "step": 189190 }, { "epoch": 53.70422934998581, "grad_norm": 0.016666138544678688, "learning_rate": 4.631762702242407e-05, "loss": 0.0001245807856321335, "step": 189200 }, { "epoch": 53.70706783990917, "grad_norm": 0.00994082074612379, "learning_rate": 4.631478853250071e-05, "loss": 0.00040070544928312303, "step": 189210 }, { "epoch": 53.70990632983253, "grad_norm": 0.013446049764752388, "learning_rate": 4.6311950042577354e-05, "loss": 0.00033213049173355105, "step": 189220 }, { "epoch": 53.71274481975589, "grad_norm": 0.4340798556804657, "learning_rate": 4.630911155265399e-05, "loss": 0.0002443775534629822, "step": 189230 }, { "epoch": 53.71558330967925, "grad_norm": 0.034121282398700714, "learning_rate": 4.630627306273063e-05, "loss": 0.00024076905101537703, "step": 189240 }, { "epoch": 53.71842179960261, "grad_norm": 0.004237177781760693, "learning_rate": 4.630343457280727e-05, "loss": 0.0005504956468939781, "step": 189250 }, { "epoch": 53.72126028952597, "grad_norm": 0.17546981573104858, "learning_rate": 4.6300596082883906e-05, "loss": 0.0002904798835515976, "step": 189260 }, { "epoch": 53.724098779449335, "grad_norm": 0.3269042670726776, "learning_rate": 4.629775759296055e-05, "loss": 0.0007888283580541611, "step": 189270 }, { "epoch": 53.72693726937269, "grad_norm": 0.02296910434961319, "learning_rate": 4.629491910303719e-05, "loss": 0.0007970215752720833, "step": 189280 }, { "epoch": 53.729775759296054, "grad_norm": 0.20274607837200165, "learning_rate": 4.6292080613113823e-05, "loss": 0.0005033746361732483, "step": 189290 }, { "epoch": 53.73261424921942, "grad_norm": 0.04692333564162254, "learning_rate": 4.6289242123190465e-05, "loss": 0.0012872109189629554, "step": 189300 }, { "epoch": 53.73545273914278, "grad_norm": 0.17105957865715027, "learning_rate": 4.6286403633267106e-05, "loss": 0.00012218989431858062, "step": 189310 }, { "epoch": 53.738291229066135, "grad_norm": 1.056795597076416, "learning_rate": 4.628356514334374e-05, "loss": 0.001434808410704136, "step": 189320 }, { "epoch": 53.7411297189895, "grad_norm": 0.02653801068663597, "learning_rate": 4.628072665342038e-05, "loss": 0.00525999441742897, "step": 189330 }, { "epoch": 53.74396820891286, "grad_norm": 0.0201258584856987, "learning_rate": 4.6277888163497024e-05, "loss": 0.001682044565677643, "step": 189340 }, { "epoch": 53.74680669883622, "grad_norm": 0.045010969042778015, "learning_rate": 4.6275049673573665e-05, "loss": 0.00039555523544549943, "step": 189350 }, { "epoch": 53.74964518875958, "grad_norm": 0.063954196870327, "learning_rate": 4.62722111836503e-05, "loss": 0.0002050209790468216, "step": 189360 }, { "epoch": 53.75248367868294, "grad_norm": 0.011089172214269638, "learning_rate": 4.6269372693726934e-05, "loss": 0.00020633284002542497, "step": 189370 }, { "epoch": 53.7553221686063, "grad_norm": 0.010972362942993641, "learning_rate": 4.626653420380358e-05, "loss": 0.0004662448540329933, "step": 189380 }, { "epoch": 53.75816065852966, "grad_norm": 0.09469294548034668, "learning_rate": 4.626369571388022e-05, "loss": 0.0004078341647982597, "step": 189390 }, { "epoch": 53.760999148453024, "grad_norm": 0.013310414738953114, "learning_rate": 4.626085722395686e-05, "loss": 0.000128716416656971, "step": 189400 }, { "epoch": 53.76383763837639, "grad_norm": 0.03290676698088646, "learning_rate": 4.62580187340335e-05, "loss": 0.0002382025122642517, "step": 189410 }, { "epoch": 53.76667612829974, "grad_norm": 0.011461338959634304, "learning_rate": 4.6255180244110134e-05, "loss": 0.0007005201652646065, "step": 189420 }, { "epoch": 53.769514618223106, "grad_norm": 0.02186189591884613, "learning_rate": 4.6252341754186776e-05, "loss": 0.0005362752825021743, "step": 189430 }, { "epoch": 53.77235310814647, "grad_norm": 8.91739273071289, "learning_rate": 4.624950326426342e-05, "loss": 0.002043979614973068, "step": 189440 }, { "epoch": 53.775191598069824, "grad_norm": 0.8442699313163757, "learning_rate": 4.624666477434005e-05, "loss": 0.00030569247901439666, "step": 189450 }, { "epoch": 53.77803008799319, "grad_norm": 0.36396926641464233, "learning_rate": 4.624382628441669e-05, "loss": 0.004608438163995743, "step": 189460 }, { "epoch": 53.78086857791655, "grad_norm": 0.6856091022491455, "learning_rate": 4.6240987794493334e-05, "loss": 0.0005191555246710777, "step": 189470 }, { "epoch": 53.783707067839906, "grad_norm": 4.493940830230713, "learning_rate": 4.623814930456997e-05, "loss": 0.004829376935958862, "step": 189480 }, { "epoch": 53.78654555776327, "grad_norm": 0.3273265063762665, "learning_rate": 4.623531081464661e-05, "loss": 0.0003159357234835625, "step": 189490 }, { "epoch": 53.78938404768663, "grad_norm": 0.005045661702752113, "learning_rate": 4.6232472324723245e-05, "loss": 0.00018638167530298232, "step": 189500 }, { "epoch": 53.78938404768663, "eval_accuracy": 0.981941883385261, "eval_loss": 0.0759425014257431, "eval_runtime": 54.6446, "eval_samples_per_second": 287.805, "eval_steps_per_second": 4.502, "step": 189500 }, { "epoch": 53.792222537609995, "grad_norm": 0.7646417617797852, "learning_rate": 4.622963383479989e-05, "loss": 0.0012225838378071784, "step": 189510 }, { "epoch": 53.79506102753335, "grad_norm": 0.003901326796039939, "learning_rate": 4.622679534487653e-05, "loss": 0.011194638162851333, "step": 189520 }, { "epoch": 53.79789951745671, "grad_norm": 0.2750509977340698, "learning_rate": 4.622395685495316e-05, "loss": 0.0004677917808294296, "step": 189530 }, { "epoch": 53.800738007380076, "grad_norm": 0.016233665868639946, "learning_rate": 4.622111836502981e-05, "loss": 0.0006508365273475647, "step": 189540 }, { "epoch": 53.80357649730343, "grad_norm": 0.01918305829167366, "learning_rate": 4.6218279875106445e-05, "loss": 0.001082516461610794, "step": 189550 }, { "epoch": 53.806414987226795, "grad_norm": 0.04892564192414284, "learning_rate": 4.6215441385183086e-05, "loss": 0.00013590939342975617, "step": 189560 }, { "epoch": 53.80925347715016, "grad_norm": 0.3098970651626587, "learning_rate": 4.621260289525973e-05, "loss": 0.000802270695567131, "step": 189570 }, { "epoch": 53.81209196707351, "grad_norm": 0.009372008964419365, "learning_rate": 4.620976440533636e-05, "loss": 0.0002748388797044754, "step": 189580 }, { "epoch": 53.814930456996876, "grad_norm": 0.005947744008153677, "learning_rate": 4.6206925915413004e-05, "loss": 0.001145431585609913, "step": 189590 }, { "epoch": 53.81776894692024, "grad_norm": 0.011961015872657299, "learning_rate": 4.620408742548964e-05, "loss": 0.0005663910880684853, "step": 189600 }, { "epoch": 53.8206074368436, "grad_norm": 0.04851677268743515, "learning_rate": 4.620124893556628e-05, "loss": 0.00020267479121685028, "step": 189610 }, { "epoch": 53.82344592676696, "grad_norm": 0.08980961889028549, "learning_rate": 4.619841044564292e-05, "loss": 0.00037792790681123734, "step": 189620 }, { "epoch": 53.82628441669032, "grad_norm": 0.0191870778799057, "learning_rate": 4.6195571955719556e-05, "loss": 0.00018060076981782914, "step": 189630 }, { "epoch": 53.829122906613684, "grad_norm": 0.01977810636162758, "learning_rate": 4.6192733465796204e-05, "loss": 0.0006695149466395378, "step": 189640 }, { "epoch": 53.83196139653704, "grad_norm": 0.01802624762058258, "learning_rate": 4.618989497587284e-05, "loss": 0.007672140002250671, "step": 189650 }, { "epoch": 53.8347998864604, "grad_norm": 7.520473480224609, "learning_rate": 4.618705648594947e-05, "loss": 0.002672400325536728, "step": 189660 }, { "epoch": 53.837638376383765, "grad_norm": 3.229665756225586, "learning_rate": 4.618421799602612e-05, "loss": 0.0005695356056094169, "step": 189670 }, { "epoch": 53.84047686630713, "grad_norm": 0.008979198522865772, "learning_rate": 4.6181379506102756e-05, "loss": 0.00029690954834222795, "step": 189680 }, { "epoch": 53.843315356230484, "grad_norm": 0.9789970517158508, "learning_rate": 4.61785410161794e-05, "loss": 0.00039329100400209427, "step": 189690 }, { "epoch": 53.84615384615385, "grad_norm": 0.3081745505332947, "learning_rate": 4.617570252625603e-05, "loss": 0.00046401582658290864, "step": 189700 }, { "epoch": 53.84899233607721, "grad_norm": 0.08751548826694489, "learning_rate": 4.617286403633267e-05, "loss": 0.0008253386244177818, "step": 189710 }, { "epoch": 53.851830826000565, "grad_norm": 0.055416129529476166, "learning_rate": 4.6170025546409315e-05, "loss": 0.003929581120610237, "step": 189720 }, { "epoch": 53.85466931592393, "grad_norm": 0.685020387172699, "learning_rate": 4.616718705648595e-05, "loss": 0.0036735877394676207, "step": 189730 }, { "epoch": 53.85750780584729, "grad_norm": 0.004805266857147217, "learning_rate": 4.616434856656259e-05, "loss": 0.006885233521461487, "step": 189740 }, { "epoch": 53.86034629577065, "grad_norm": 0.004988999105989933, "learning_rate": 4.616151007663923e-05, "loss": 0.00013206098228693008, "step": 189750 }, { "epoch": 53.86318478569401, "grad_norm": 0.028466112911701202, "learning_rate": 4.6158671586715866e-05, "loss": 0.0011451246216893196, "step": 189760 }, { "epoch": 53.86602327561737, "grad_norm": 19.573701858520508, "learning_rate": 4.6155833096792515e-05, "loss": 0.009526769816875457, "step": 189770 }, { "epoch": 53.868861765540736, "grad_norm": 0.0124558350071311, "learning_rate": 4.615299460686915e-05, "loss": 0.0013864412903785706, "step": 189780 }, { "epoch": 53.87170025546409, "grad_norm": 0.3003636598587036, "learning_rate": 4.6150156116945784e-05, "loss": 0.000785125233232975, "step": 189790 }, { "epoch": 53.874538745387454, "grad_norm": 0.18566519021987915, "learning_rate": 4.6147317627022425e-05, "loss": 0.00025735348463058473, "step": 189800 }, { "epoch": 53.87737723531082, "grad_norm": 0.04423654079437256, "learning_rate": 4.6144479137099067e-05, "loss": 0.0007343351840972901, "step": 189810 }, { "epoch": 53.88021572523417, "grad_norm": 2.541562795639038, "learning_rate": 4.614164064717571e-05, "loss": 0.0015449332073330878, "step": 189820 }, { "epoch": 53.883054215157536, "grad_norm": 0.495269238948822, "learning_rate": 4.613880215725234e-05, "loss": 0.005017444491386414, "step": 189830 }, { "epoch": 53.8858927050809, "grad_norm": 0.1418406218290329, "learning_rate": 4.6135963667328984e-05, "loss": 0.0007180899381637573, "step": 189840 }, { "epoch": 53.888731195004254, "grad_norm": 0.03608325123786926, "learning_rate": 4.6133125177405625e-05, "loss": 0.000430634617805481, "step": 189850 }, { "epoch": 53.89156968492762, "grad_norm": 0.03547391667962074, "learning_rate": 4.613028668748226e-05, "loss": 0.0003783361986279488, "step": 189860 }, { "epoch": 53.89440817485098, "grad_norm": 0.11277720332145691, "learning_rate": 4.61274481975589e-05, "loss": 0.003684426844120026, "step": 189870 }, { "epoch": 53.89724666477434, "grad_norm": 0.017689798027276993, "learning_rate": 4.612460970763554e-05, "loss": 0.001893717236816883, "step": 189880 }, { "epoch": 53.9000851546977, "grad_norm": 0.01183159090578556, "learning_rate": 4.612177121771218e-05, "loss": 0.0002439316362142563, "step": 189890 }, { "epoch": 53.90292364462106, "grad_norm": 0.024794356897473335, "learning_rate": 4.611893272778882e-05, "loss": 0.0034011371433734896, "step": 189900 }, { "epoch": 53.905762134544425, "grad_norm": 2.690617799758911, "learning_rate": 4.611609423786546e-05, "loss": 0.001027100719511509, "step": 189910 }, { "epoch": 53.90860062446778, "grad_norm": 0.023632042109966278, "learning_rate": 4.6113255747942095e-05, "loss": 0.0006873574107885361, "step": 189920 }, { "epoch": 53.91143911439114, "grad_norm": 0.19372133910655975, "learning_rate": 4.6110417258018736e-05, "loss": 0.0007862523198127747, "step": 189930 }, { "epoch": 53.914277604314506, "grad_norm": 0.010311619378626347, "learning_rate": 4.610757876809538e-05, "loss": 0.0003518274053931236, "step": 189940 }, { "epoch": 53.91711609423786, "grad_norm": 0.6795199513435364, "learning_rate": 4.610474027817201e-05, "loss": 0.000753210298717022, "step": 189950 }, { "epoch": 53.919954584161225, "grad_norm": 0.061086464673280716, "learning_rate": 4.610190178824865e-05, "loss": 0.00013076532632112504, "step": 189960 }, { "epoch": 53.92279307408459, "grad_norm": 0.003132045967504382, "learning_rate": 4.6099063298325295e-05, "loss": 0.00012506972998380662, "step": 189970 }, { "epoch": 53.92563156400795, "grad_norm": 0.35549265146255493, "learning_rate": 4.6096224808401936e-05, "loss": 0.000584195926785469, "step": 189980 }, { "epoch": 53.928470053931306, "grad_norm": 0.4577452540397644, "learning_rate": 4.609338631847857e-05, "loss": 0.00029350966215133665, "step": 189990 }, { "epoch": 53.93130854385467, "grad_norm": 0.007941840216517448, "learning_rate": 4.6090547828555205e-05, "loss": 0.00011797063052654266, "step": 190000 }, { "epoch": 53.93130854385467, "eval_accuracy": 0.9845488650092198, "eval_loss": 0.06604253500699997, "eval_runtime": 43.9305, "eval_samples_per_second": 357.997, "eval_steps_per_second": 5.6, "step": 190000 }, { "epoch": 53.93414703377803, "grad_norm": 0.08684831112623215, "learning_rate": 4.608770933863185e-05, "loss": 0.00034168194979429246, "step": 190010 }, { "epoch": 53.93698552370139, "grad_norm": 0.033112332224845886, "learning_rate": 4.608487084870849e-05, "loss": 0.0003241602331399918, "step": 190020 }, { "epoch": 53.93982401362475, "grad_norm": 2.74444842338562, "learning_rate": 4.608203235878513e-05, "loss": 0.0008616261184215546, "step": 190030 }, { "epoch": 53.942662503548114, "grad_norm": 0.2000969648361206, "learning_rate": 4.607919386886177e-05, "loss": 0.0002275114879012108, "step": 190040 }, { "epoch": 53.94550099347148, "grad_norm": 0.014845119789242744, "learning_rate": 4.6076355378938405e-05, "loss": 0.0007760105654597282, "step": 190050 }, { "epoch": 53.94833948339483, "grad_norm": 2.4375336170196533, "learning_rate": 4.607351688901505e-05, "loss": 0.0036192499101161957, "step": 190060 }, { "epoch": 53.951177973318195, "grad_norm": 1.7013068199157715, "learning_rate": 4.607067839909169e-05, "loss": 0.002633309178054333, "step": 190070 }, { "epoch": 53.95401646324156, "grad_norm": 3.2091610431671143, "learning_rate": 4.606783990916832e-05, "loss": 0.005106598883867264, "step": 190080 }, { "epoch": 53.956854953164914, "grad_norm": 0.1653066873550415, "learning_rate": 4.6065001419244964e-05, "loss": 0.00021071024239063262, "step": 190090 }, { "epoch": 53.95969344308828, "grad_norm": 0.08675233274698257, "learning_rate": 4.60621629293216e-05, "loss": 0.000677373819053173, "step": 190100 }, { "epoch": 53.96253193301164, "grad_norm": 0.07476390153169632, "learning_rate": 4.605932443939825e-05, "loss": 0.0004682142287492752, "step": 190110 }, { "epoch": 53.965370422934996, "grad_norm": 0.00535902613773942, "learning_rate": 4.605648594947488e-05, "loss": 0.00016221366822719575, "step": 190120 }, { "epoch": 53.96820891285836, "grad_norm": 0.041251055896282196, "learning_rate": 4.6053647459551516e-05, "loss": 0.002545905485749245, "step": 190130 }, { "epoch": 53.97104740278172, "grad_norm": 0.012506509199738503, "learning_rate": 4.6050808969628164e-05, "loss": 0.0015868714079260826, "step": 190140 }, { "epoch": 53.973885892705084, "grad_norm": 0.0027558444999158382, "learning_rate": 4.60479704797048e-05, "loss": 0.0003585612401366234, "step": 190150 }, { "epoch": 53.97672438262844, "grad_norm": 0.24469693005084991, "learning_rate": 4.604513198978144e-05, "loss": 0.009033334255218507, "step": 190160 }, { "epoch": 53.9795628725518, "grad_norm": 0.21822616457939148, "learning_rate": 4.604229349985808e-05, "loss": 0.0017407020553946496, "step": 190170 }, { "epoch": 53.982401362475166, "grad_norm": 0.010096813552081585, "learning_rate": 4.6039455009934716e-05, "loss": 0.00013115257024765015, "step": 190180 }, { "epoch": 53.98523985239852, "grad_norm": 0.04978295415639877, "learning_rate": 4.603661652001136e-05, "loss": 0.0005077039822936058, "step": 190190 }, { "epoch": 53.988078342321884, "grad_norm": 0.0049904691986739635, "learning_rate": 4.603377803008799e-05, "loss": 0.0031947784125804903, "step": 190200 }, { "epoch": 53.99091683224525, "grad_norm": 0.016691233962774277, "learning_rate": 4.6030939540164633e-05, "loss": 0.0010430768132209777, "step": 190210 }, { "epoch": 53.9937553221686, "grad_norm": 0.010783550329506397, "learning_rate": 4.6028101050241275e-05, "loss": 0.0009651875123381614, "step": 190220 }, { "epoch": 53.996593812091966, "grad_norm": 0.013921806588768959, "learning_rate": 4.602526256031791e-05, "loss": 0.00047653224319219587, "step": 190230 }, { "epoch": 53.99943230201533, "grad_norm": 0.01157322432845831, "learning_rate": 4.602242407039455e-05, "loss": 0.0015597226098179816, "step": 190240 }, { "epoch": 54.00227079193869, "grad_norm": 0.37584367394447327, "learning_rate": 4.601958558047119e-05, "loss": 0.0011652464047074318, "step": 190250 }, { "epoch": 54.00510928186205, "grad_norm": 0.0033412885386496782, "learning_rate": 4.601674709054783e-05, "loss": 0.004041979461908341, "step": 190260 }, { "epoch": 54.00794777178541, "grad_norm": 0.04609382897615433, "learning_rate": 4.6013908600624475e-05, "loss": 0.0001924131065607071, "step": 190270 }, { "epoch": 54.01078626170877, "grad_norm": 0.17642465233802795, "learning_rate": 4.601107011070111e-05, "loss": 0.0002402154728770256, "step": 190280 }, { "epoch": 54.01362475163213, "grad_norm": 0.3360649049282074, "learning_rate": 4.600823162077775e-05, "loss": 0.0002730850130319595, "step": 190290 }, { "epoch": 54.01646324155549, "grad_norm": 0.017280366271734238, "learning_rate": 4.6005393130854385e-05, "loss": 0.00036724042147397997, "step": 190300 }, { "epoch": 54.019301731478855, "grad_norm": 0.08880309760570526, "learning_rate": 4.600255464093103e-05, "loss": 0.00544855073094368, "step": 190310 }, { "epoch": 54.02214022140221, "grad_norm": 0.014564997516572475, "learning_rate": 4.599971615100767e-05, "loss": 0.0012991923838853836, "step": 190320 }, { "epoch": 54.02497871132557, "grad_norm": 0.06647034734487534, "learning_rate": 4.59968776610843e-05, "loss": 0.00244639553129673, "step": 190330 }, { "epoch": 54.027817201248936, "grad_norm": 0.1527172476053238, "learning_rate": 4.5994039171160944e-05, "loss": 0.0003997540101408958, "step": 190340 }, { "epoch": 54.0306556911723, "grad_norm": 0.0013088710838928819, "learning_rate": 4.5991200681237586e-05, "loss": 9.585507214069367e-05, "step": 190350 }, { "epoch": 54.033494181095655, "grad_norm": 0.6106647253036499, "learning_rate": 4.598836219131422e-05, "loss": 0.0006642861291766167, "step": 190360 }, { "epoch": 54.03633267101902, "grad_norm": 0.0022176390048116446, "learning_rate": 4.598552370139086e-05, "loss": 0.002499566972255707, "step": 190370 }, { "epoch": 54.03917116094238, "grad_norm": 0.02502485364675522, "learning_rate": 4.59826852114675e-05, "loss": 0.00034857653081417085, "step": 190380 }, { "epoch": 54.04200965086574, "grad_norm": 0.004255208186805248, "learning_rate": 4.597984672154414e-05, "loss": 0.00017508473247289657, "step": 190390 }, { "epoch": 54.0448481407891, "grad_norm": 0.016944624483585358, "learning_rate": 4.597700823162078e-05, "loss": 0.00026632528752088545, "step": 190400 }, { "epoch": 54.04768663071246, "grad_norm": 0.18637984991073608, "learning_rate": 4.597416974169742e-05, "loss": 0.002970102429389954, "step": 190410 }, { "epoch": 54.050525120635825, "grad_norm": 0.027776256203651428, "learning_rate": 4.5971331251774055e-05, "loss": 0.00016451403498649598, "step": 190420 }, { "epoch": 54.05336361055918, "grad_norm": 0.2083183377981186, "learning_rate": 4.5968492761850696e-05, "loss": 0.00015344861894845964, "step": 190430 }, { "epoch": 54.056202100482544, "grad_norm": 0.019516758620738983, "learning_rate": 4.596565427192734e-05, "loss": 0.0003059186041355133, "step": 190440 }, { "epoch": 54.05904059040591, "grad_norm": 0.3191641867160797, "learning_rate": 4.596281578200398e-05, "loss": 0.0025381851941347123, "step": 190450 }, { "epoch": 54.06187908032926, "grad_norm": 0.001976362429559231, "learning_rate": 4.5959977292080614e-05, "loss": 0.0009740570560097695, "step": 190460 }, { "epoch": 54.064717570252625, "grad_norm": 15.624918937683105, "learning_rate": 4.5957138802157255e-05, "loss": 0.007219599932432175, "step": 190470 }, { "epoch": 54.06755606017599, "grad_norm": 0.21728704869747162, "learning_rate": 4.5954300312233896e-05, "loss": 0.0002430493012070656, "step": 190480 }, { "epoch": 54.070394550099344, "grad_norm": 0.04083319380879402, "learning_rate": 4.595146182231053e-05, "loss": 0.0022603927180171015, "step": 190490 }, { "epoch": 54.07323304002271, "grad_norm": 0.21584297716617584, "learning_rate": 4.594862333238717e-05, "loss": 0.013448812067508698, "step": 190500 }, { "epoch": 54.07323304002271, "eval_accuracy": 0.9806066001144529, "eval_loss": 0.07817097008228302, "eval_runtime": 42.0569, "eval_samples_per_second": 373.946, "eval_steps_per_second": 5.849, "step": 190500 }, { "epoch": 54.07607152994607, "grad_norm": 0.04507270082831383, "learning_rate": 4.5945784842463814e-05, "loss": 0.0035927526652812956, "step": 190510 }, { "epoch": 54.07891001986943, "grad_norm": 0.030005767941474915, "learning_rate": 4.594294635254045e-05, "loss": 0.00028201248496770857, "step": 190520 }, { "epoch": 54.08174850979279, "grad_norm": 0.008833479136228561, "learning_rate": 4.594010786261709e-05, "loss": 9.588170796632766e-05, "step": 190530 }, { "epoch": 54.08458699971615, "grad_norm": 0.013142202980816364, "learning_rate": 4.593726937269373e-05, "loss": 0.0003481704741716385, "step": 190540 }, { "epoch": 54.087425489639514, "grad_norm": 0.003511840710416436, "learning_rate": 4.5934430882770366e-05, "loss": 8.17909836769104e-05, "step": 190550 }, { "epoch": 54.09026397956287, "grad_norm": 0.004576374311000109, "learning_rate": 4.593159239284701e-05, "loss": 0.0008722925558686256, "step": 190560 }, { "epoch": 54.09310246948623, "grad_norm": 0.020194001495838165, "learning_rate": 4.592875390292365e-05, "loss": 0.0021860605105757714, "step": 190570 }, { "epoch": 54.095940959409596, "grad_norm": 0.10217925906181335, "learning_rate": 4.592591541300029e-05, "loss": 0.0018097557127475738, "step": 190580 }, { "epoch": 54.09877944933295, "grad_norm": 0.0870048999786377, "learning_rate": 4.5923076923076924e-05, "loss": 0.00021804794669151307, "step": 190590 }, { "epoch": 54.101617939256315, "grad_norm": 0.05358836054801941, "learning_rate": 4.592023843315356e-05, "loss": 0.00037942826747894287, "step": 190600 }, { "epoch": 54.10445642917968, "grad_norm": 0.026259899139404297, "learning_rate": 4.591739994323021e-05, "loss": 0.0008878653869032859, "step": 190610 }, { "epoch": 54.10729491910304, "grad_norm": 0.011564427986741066, "learning_rate": 4.591456145330684e-05, "loss": 0.0002399969846010208, "step": 190620 }, { "epoch": 54.110133409026396, "grad_norm": 0.04251236096024513, "learning_rate": 4.591172296338348e-05, "loss": 0.0011945299804210663, "step": 190630 }, { "epoch": 54.11297189894976, "grad_norm": 0.020655237138271332, "learning_rate": 4.5908884473460124e-05, "loss": 0.0012996621429920197, "step": 190640 }, { "epoch": 54.11581038887312, "grad_norm": 0.026955291628837585, "learning_rate": 4.590604598353676e-05, "loss": 0.0003188159316778183, "step": 190650 }, { "epoch": 54.11864887879648, "grad_norm": 0.012146062217652798, "learning_rate": 4.59032074936134e-05, "loss": 0.0002993375062942505, "step": 190660 }, { "epoch": 54.12148736871984, "grad_norm": 0.016726054251194, "learning_rate": 4.590036900369004e-05, "loss": 0.0007043056190013886, "step": 190670 }, { "epoch": 54.1243258586432, "grad_norm": 0.036568533629179, "learning_rate": 4.5897530513766676e-05, "loss": 0.00035764239728450773, "step": 190680 }, { "epoch": 54.12716434856656, "grad_norm": 0.0213422030210495, "learning_rate": 4.589469202384332e-05, "loss": 0.0005284907296299934, "step": 190690 }, { "epoch": 54.13000283848992, "grad_norm": 0.017272479832172394, "learning_rate": 4.589185353391995e-05, "loss": 0.0018841285258531571, "step": 190700 }, { "epoch": 54.132841328413285, "grad_norm": 0.06840751320123672, "learning_rate": 4.5889015043996594e-05, "loss": 0.00534508191049099, "step": 190710 }, { "epoch": 54.13567981833665, "grad_norm": 0.06482769548892975, "learning_rate": 4.5886176554073235e-05, "loss": 0.005528939515352249, "step": 190720 }, { "epoch": 54.138518308260004, "grad_norm": 14.253637313842773, "learning_rate": 4.588333806414987e-05, "loss": 0.00868128016591072, "step": 190730 }, { "epoch": 54.14135679818337, "grad_norm": 0.17376567423343658, "learning_rate": 4.588049957422652e-05, "loss": 0.0011878704652190208, "step": 190740 }, { "epoch": 54.14419528810673, "grad_norm": 0.09293340891599655, "learning_rate": 4.587766108430315e-05, "loss": 0.0036683909595012663, "step": 190750 }, { "epoch": 54.147033778030085, "grad_norm": 0.009795104153454304, "learning_rate": 4.587482259437979e-05, "loss": 0.0010532809421420098, "step": 190760 }, { "epoch": 54.14987226795345, "grad_norm": 10.001526832580566, "learning_rate": 4.5871984104456435e-05, "loss": 0.007667312771081925, "step": 190770 }, { "epoch": 54.15271075787681, "grad_norm": 0.024048693478107452, "learning_rate": 4.586914561453307e-05, "loss": 0.008118594437837601, "step": 190780 }, { "epoch": 54.15554924780017, "grad_norm": 1.2737699747085571, "learning_rate": 4.586630712460971e-05, "loss": 0.0011240888386964798, "step": 190790 }, { "epoch": 54.15838773772353, "grad_norm": 9.929186820983887, "learning_rate": 4.586346863468635e-05, "loss": 0.007573813199996948, "step": 190800 }, { "epoch": 54.16122622764689, "grad_norm": 0.01637445017695427, "learning_rate": 4.586063014476299e-05, "loss": 0.0009152283892035484, "step": 190810 }, { "epoch": 54.164064717570255, "grad_norm": 0.09937369078397751, "learning_rate": 4.585779165483963e-05, "loss": 0.00044987481087446215, "step": 190820 }, { "epoch": 54.16690320749361, "grad_norm": 0.09698852896690369, "learning_rate": 4.585495316491626e-05, "loss": 0.0006941083818674087, "step": 190830 }, { "epoch": 54.169741697416974, "grad_norm": 0.054464906454086304, "learning_rate": 4.5852114674992905e-05, "loss": 0.00022445842623710632, "step": 190840 }, { "epoch": 54.17258018734034, "grad_norm": 0.3711312711238861, "learning_rate": 4.5849276185069546e-05, "loss": 0.0007343536242842674, "step": 190850 }, { "epoch": 54.17541867726369, "grad_norm": 0.01842070184648037, "learning_rate": 4.584643769514618e-05, "loss": 7.618293166160584e-05, "step": 190860 }, { "epoch": 54.178257167187056, "grad_norm": 0.0014800382778048515, "learning_rate": 4.584359920522283e-05, "loss": 0.0004112543538212776, "step": 190870 }, { "epoch": 54.18109565711042, "grad_norm": 7.100272178649902, "learning_rate": 4.584076071529946e-05, "loss": 0.0014778519049286841, "step": 190880 }, { "epoch": 54.18393414703378, "grad_norm": 0.055788662284612656, "learning_rate": 4.58379222253761e-05, "loss": 0.0002687355503439903, "step": 190890 }, { "epoch": 54.18677263695714, "grad_norm": 1.2506282329559326, "learning_rate": 4.5835083735452746e-05, "loss": 0.004138866066932678, "step": 190900 }, { "epoch": 54.1896111268805, "grad_norm": 0.08004114776849747, "learning_rate": 4.583224524552938e-05, "loss": 0.0014463644474744797, "step": 190910 }, { "epoch": 54.19244961680386, "grad_norm": 0.029444565996527672, "learning_rate": 4.582940675560602e-05, "loss": 0.005632150545716285, "step": 190920 }, { "epoch": 54.19528810672722, "grad_norm": 0.4362070858478546, "learning_rate": 4.5826568265682657e-05, "loss": 0.0004934361204504967, "step": 190930 }, { "epoch": 54.19812659665058, "grad_norm": 2.0412962436676025, "learning_rate": 4.58237297757593e-05, "loss": 0.0022028712555766106, "step": 190940 }, { "epoch": 54.200965086573945, "grad_norm": 0.15375511348247528, "learning_rate": 4.582089128583594e-05, "loss": 0.002274056151509285, "step": 190950 }, { "epoch": 54.2038035764973, "grad_norm": 0.6254417300224304, "learning_rate": 4.5818052795912574e-05, "loss": 0.000630655325949192, "step": 190960 }, { "epoch": 54.20664206642066, "grad_norm": 0.08883338421583176, "learning_rate": 4.5815214305989215e-05, "loss": 0.0003741692751646042, "step": 190970 }, { "epoch": 54.209480556344026, "grad_norm": 0.004453449510037899, "learning_rate": 4.581237581606586e-05, "loss": 0.0003468029201030731, "step": 190980 }, { "epoch": 54.21231904626739, "grad_norm": 0.06574394553899765, "learning_rate": 4.580953732614249e-05, "loss": 0.0009920930489897728, "step": 190990 }, { "epoch": 54.215157536190745, "grad_norm": 0.006683845538645983, "learning_rate": 4.580669883621914e-05, "loss": 0.0014210812747478484, "step": 191000 }, { "epoch": 54.215157536190745, "eval_accuracy": 0.9824505627265213, "eval_loss": 0.07033862173557281, "eval_runtime": 40.7718, "eval_samples_per_second": 385.732, "eval_steps_per_second": 6.034, "step": 191000 }, { "epoch": 54.21799602611411, "grad_norm": 0.04931987076997757, "learning_rate": 4.5803860346295774e-05, "loss": 0.003402175009250641, "step": 191010 }, { "epoch": 54.22083451603747, "grad_norm": 0.12875889241695404, "learning_rate": 4.580102185637241e-05, "loss": 0.0016660764813423157, "step": 191020 }, { "epoch": 54.223673005960826, "grad_norm": 0.014981526881456375, "learning_rate": 4.579818336644905e-05, "loss": 0.003883081674575806, "step": 191030 }, { "epoch": 54.22651149588419, "grad_norm": 0.029408229514956474, "learning_rate": 4.579534487652569e-05, "loss": 0.00014332812279462814, "step": 191040 }, { "epoch": 54.22934998580755, "grad_norm": 0.04651094600558281, "learning_rate": 4.579250638660233e-05, "loss": 0.001131337508559227, "step": 191050 }, { "epoch": 54.23218847573091, "grad_norm": 0.1428709626197815, "learning_rate": 4.578966789667897e-05, "loss": 0.0002713773399591446, "step": 191060 }, { "epoch": 54.23502696565427, "grad_norm": 0.04517003521323204, "learning_rate": 4.578682940675561e-05, "loss": 0.0015693699941039085, "step": 191070 }, { "epoch": 54.237865455577634, "grad_norm": 0.006915591657161713, "learning_rate": 4.578399091683225e-05, "loss": 0.00841061696410179, "step": 191080 }, { "epoch": 54.240703945501, "grad_norm": 0.022629378363490105, "learning_rate": 4.5781152426908885e-05, "loss": 0.0006859462708234787, "step": 191090 }, { "epoch": 54.24354243542435, "grad_norm": 1.4509940147399902, "learning_rate": 4.5778313936985526e-05, "loss": 0.0023778043687343597, "step": 191100 }, { "epoch": 54.246380925347715, "grad_norm": 0.007038708310574293, "learning_rate": 4.577547544706217e-05, "loss": 0.0005652736872434616, "step": 191110 }, { "epoch": 54.24921941527108, "grad_norm": 0.005898010917007923, "learning_rate": 4.57726369571388e-05, "loss": 0.0007489627227187156, "step": 191120 }, { "epoch": 54.252057905194434, "grad_norm": 0.6912152171134949, "learning_rate": 4.5769798467215443e-05, "loss": 0.0006272399798035622, "step": 191130 }, { "epoch": 54.2548963951178, "grad_norm": 0.19058503210544586, "learning_rate": 4.5766959977292085e-05, "loss": 0.0034085988998413088, "step": 191140 }, { "epoch": 54.25773488504116, "grad_norm": 0.0036099208518862724, "learning_rate": 4.576412148736872e-05, "loss": 0.000684843398630619, "step": 191150 }, { "epoch": 54.260573374964515, "grad_norm": 0.37888476252555847, "learning_rate": 4.576128299744536e-05, "loss": 0.0005179587751626968, "step": 191160 }, { "epoch": 54.26341186488788, "grad_norm": 3.251241683959961, "learning_rate": 4.5758444507522e-05, "loss": 0.0017612941563129425, "step": 191170 }, { "epoch": 54.26625035481124, "grad_norm": 0.009697280824184418, "learning_rate": 4.575560601759864e-05, "loss": 0.0008218657225370407, "step": 191180 }, { "epoch": 54.269088844734604, "grad_norm": 0.02537047304213047, "learning_rate": 4.575276752767528e-05, "loss": 0.0001945171505212784, "step": 191190 }, { "epoch": 54.27192733465796, "grad_norm": 0.11907897144556046, "learning_rate": 4.574992903775192e-05, "loss": 0.00019554737955331802, "step": 191200 }, { "epoch": 54.27476582458132, "grad_norm": 0.014189692214131355, "learning_rate": 4.574709054782856e-05, "loss": 0.0008047556504607201, "step": 191210 }, { "epoch": 54.277604314504686, "grad_norm": 0.004385105334222317, "learning_rate": 4.5744252057905195e-05, "loss": 0.0003991687670350075, "step": 191220 }, { "epoch": 54.28044280442804, "grad_norm": 0.039046045392751694, "learning_rate": 4.574141356798183e-05, "loss": 0.0007608089596033097, "step": 191230 }, { "epoch": 54.283281294351404, "grad_norm": 0.3058881163597107, "learning_rate": 4.573857507805848e-05, "loss": 0.00048110298812389376, "step": 191240 }, { "epoch": 54.28611978427477, "grad_norm": 0.10003606230020523, "learning_rate": 4.573573658813511e-05, "loss": 0.0005426384508609772, "step": 191250 }, { "epoch": 54.28895827419813, "grad_norm": 0.01898346096277237, "learning_rate": 4.5732898098211754e-05, "loss": 0.001714981161057949, "step": 191260 }, { "epoch": 54.291796764121486, "grad_norm": 2.1546316146850586, "learning_rate": 4.5730059608288396e-05, "loss": 0.0007059680297970772, "step": 191270 }, { "epoch": 54.29463525404485, "grad_norm": 0.007196248974651098, "learning_rate": 4.572722111836503e-05, "loss": 0.00036553777754306794, "step": 191280 }, { "epoch": 54.29747374396821, "grad_norm": 0.07236552238464355, "learning_rate": 4.572438262844167e-05, "loss": 0.004900218546390533, "step": 191290 }, { "epoch": 54.30031223389157, "grad_norm": 0.053825486451387405, "learning_rate": 4.572154413851831e-05, "loss": 0.000808263011276722, "step": 191300 }, { "epoch": 54.30315072381493, "grad_norm": 0.09848487377166748, "learning_rate": 4.571870564859495e-05, "loss": 0.0003578662872314453, "step": 191310 }, { "epoch": 54.30598921373829, "grad_norm": 0.05222589150071144, "learning_rate": 4.571586715867159e-05, "loss": 0.0011047899723052978, "step": 191320 }, { "epoch": 54.30882770366165, "grad_norm": 0.6093037128448486, "learning_rate": 4.5713028668748223e-05, "loss": 0.0014565475285053253, "step": 191330 }, { "epoch": 54.31166619358501, "grad_norm": 0.04963597282767296, "learning_rate": 4.571019017882487e-05, "loss": 0.0013636587187647819, "step": 191340 }, { "epoch": 54.314504683508375, "grad_norm": 0.08743149787187576, "learning_rate": 4.5707351688901506e-05, "loss": 0.0019537238404154778, "step": 191350 }, { "epoch": 54.31734317343174, "grad_norm": 0.015919100493192673, "learning_rate": 4.570451319897814e-05, "loss": 0.0008477829396724701, "step": 191360 }, { "epoch": 54.32018166335509, "grad_norm": 0.11332909017801285, "learning_rate": 4.570167470905479e-05, "loss": 0.0006107047200202941, "step": 191370 }, { "epoch": 54.323020153278456, "grad_norm": 0.04986986517906189, "learning_rate": 4.5698836219131424e-05, "loss": 0.0005971897393465042, "step": 191380 }, { "epoch": 54.32585864320182, "grad_norm": 0.06060997396707535, "learning_rate": 4.5695997729208065e-05, "loss": 0.002334235608577728, "step": 191390 }, { "epoch": 54.328697133125175, "grad_norm": 0.02502197027206421, "learning_rate": 4.5693159239284706e-05, "loss": 0.0022453505545854567, "step": 191400 }, { "epoch": 54.33153562304854, "grad_norm": 0.06753949075937271, "learning_rate": 4.569032074936134e-05, "loss": 0.0018291622400283813, "step": 191410 }, { "epoch": 54.3343741129719, "grad_norm": 0.01085540559142828, "learning_rate": 4.568748225943798e-05, "loss": 0.007162697613239288, "step": 191420 }, { "epoch": 54.33721260289526, "grad_norm": 0.1285451203584671, "learning_rate": 4.568464376951462e-05, "loss": 0.0005025934427976608, "step": 191430 }, { "epoch": 54.34005109281862, "grad_norm": 0.07590620219707489, "learning_rate": 4.568180527959126e-05, "loss": 0.00019328538328409195, "step": 191440 }, { "epoch": 54.34288958274198, "grad_norm": 0.024827685207128525, "learning_rate": 4.56789667896679e-05, "loss": 0.0006661336869001388, "step": 191450 }, { "epoch": 54.345728072665345, "grad_norm": 0.03126230463385582, "learning_rate": 4.5676128299744534e-05, "loss": 0.00035784058272838595, "step": 191460 }, { "epoch": 54.3485665625887, "grad_norm": 0.05159525200724602, "learning_rate": 4.567328980982118e-05, "loss": 0.0029566904529929163, "step": 191470 }, { "epoch": 54.351405052512064, "grad_norm": 0.0022437029983848333, "learning_rate": 4.567045131989782e-05, "loss": 0.000482875294983387, "step": 191480 }, { "epoch": 54.35424354243543, "grad_norm": 0.1233314722776413, "learning_rate": 4.566761282997445e-05, "loss": 0.0001694338396191597, "step": 191490 }, { "epoch": 54.35708203235878, "grad_norm": 0.007723743095993996, "learning_rate": 4.56647743400511e-05, "loss": 0.0016968781128525734, "step": 191500 }, { "epoch": 54.35708203235878, "eval_accuracy": 0.9841673555032746, "eval_loss": 0.06096944585442543, "eval_runtime": 47.7065, "eval_samples_per_second": 329.662, "eval_steps_per_second": 5.157, "step": 191500 }, { "epoch": 54.359920522282145, "grad_norm": 0.004273984115570784, "learning_rate": 4.5661935850127734e-05, "loss": 0.0002090759575366974, "step": 191510 }, { "epoch": 54.36275901220551, "grad_norm": 0.012817904353141785, "learning_rate": 4.5659097360204376e-05, "loss": 0.0002112004905939102, "step": 191520 }, { "epoch": 54.365597502128864, "grad_norm": 0.013240176253020763, "learning_rate": 4.565625887028101e-05, "loss": 7.420387119054795e-05, "step": 191530 }, { "epoch": 54.36843599205223, "grad_norm": 8.263883590698242, "learning_rate": 4.565342038035765e-05, "loss": 0.0026922281831502914, "step": 191540 }, { "epoch": 54.37127448197559, "grad_norm": 0.007833377458155155, "learning_rate": 4.565058189043429e-05, "loss": 0.00024509299546480177, "step": 191550 }, { "epoch": 54.37411297189895, "grad_norm": 0.014611168764531612, "learning_rate": 4.564774340051093e-05, "loss": 0.002987583726644516, "step": 191560 }, { "epoch": 54.37695146182231, "grad_norm": 0.011737599037587643, "learning_rate": 4.564490491058757e-05, "loss": 0.0009190520271658897, "step": 191570 }, { "epoch": 54.37978995174567, "grad_norm": 0.012013218365609646, "learning_rate": 4.564206642066421e-05, "loss": 0.0001667054370045662, "step": 191580 }, { "epoch": 54.382628441669034, "grad_norm": 0.024258917197585106, "learning_rate": 4.5639227930740845e-05, "loss": 0.0002763865515589714, "step": 191590 }, { "epoch": 54.38546693159239, "grad_norm": 0.9652913212776184, "learning_rate": 4.5636389440817486e-05, "loss": 0.00038535892963409424, "step": 191600 }, { "epoch": 54.38830542151575, "grad_norm": 0.16906869411468506, "learning_rate": 4.563355095089413e-05, "loss": 0.0004235204309225082, "step": 191610 }, { "epoch": 54.391143911439116, "grad_norm": 0.05568309873342514, "learning_rate": 4.563071246097076e-05, "loss": 0.0016136720776557922, "step": 191620 }, { "epoch": 54.39398240136248, "grad_norm": 0.02753007411956787, "learning_rate": 4.5627873971047404e-05, "loss": 0.00044361911714077, "step": 191630 }, { "epoch": 54.396820891285834, "grad_norm": 0.03895726054906845, "learning_rate": 4.5625035481124045e-05, "loss": 0.0001481626182794571, "step": 191640 }, { "epoch": 54.3996593812092, "grad_norm": 0.013151643797755241, "learning_rate": 4.562219699120068e-05, "loss": 0.0001843247562646866, "step": 191650 }, { "epoch": 54.40249787113256, "grad_norm": 0.4053748548030853, "learning_rate": 4.561935850127732e-05, "loss": 0.001143617369234562, "step": 191660 }, { "epoch": 54.405336361055916, "grad_norm": 0.0418759323656559, "learning_rate": 4.561652001135396e-05, "loss": 0.005377231538295746, "step": 191670 }, { "epoch": 54.40817485097928, "grad_norm": 0.0037808562628924847, "learning_rate": 4.5613681521430604e-05, "loss": 0.00025658551603555677, "step": 191680 }, { "epoch": 54.41101334090264, "grad_norm": 0.009715386666357517, "learning_rate": 4.561084303150724e-05, "loss": 0.00097472183406353, "step": 191690 }, { "epoch": 54.413851830826, "grad_norm": 4.065836429595947, "learning_rate": 4.560800454158388e-05, "loss": 0.0008434167131781578, "step": 191700 }, { "epoch": 54.41669032074936, "grad_norm": 0.03886028379201889, "learning_rate": 4.560516605166052e-05, "loss": 0.0013890322297811508, "step": 191710 }, { "epoch": 54.41952881067272, "grad_norm": 0.03206910938024521, "learning_rate": 4.5602327561737156e-05, "loss": 0.0008830081671476365, "step": 191720 }, { "epoch": 54.422367300596086, "grad_norm": 0.5488797426223755, "learning_rate": 4.55994890718138e-05, "loss": 0.0018126962706446648, "step": 191730 }, { "epoch": 54.42520579051944, "grad_norm": 0.0393313430249691, "learning_rate": 4.559665058189044e-05, "loss": 0.0015893135219812392, "step": 191740 }, { "epoch": 54.428044280442805, "grad_norm": 0.026315603405237198, "learning_rate": 4.559381209196707e-05, "loss": 0.000504329614341259, "step": 191750 }, { "epoch": 54.43088277036617, "grad_norm": 0.10197784006595612, "learning_rate": 4.5590973602043714e-05, "loss": 0.0030629925429821016, "step": 191760 }, { "epoch": 54.43372126028952, "grad_norm": 0.033348679542541504, "learning_rate": 4.5588135112120356e-05, "loss": 0.0012871906161308288, "step": 191770 }, { "epoch": 54.436559750212886, "grad_norm": 0.0039787269197404385, "learning_rate": 4.558529662219699e-05, "loss": 0.0008902078494429589, "step": 191780 }, { "epoch": 54.43939824013625, "grad_norm": 0.7328409552574158, "learning_rate": 4.558245813227363e-05, "loss": 0.0005648959428071976, "step": 191790 }, { "epoch": 54.442236730059605, "grad_norm": 0.03885439783334732, "learning_rate": 4.557961964235027e-05, "loss": 0.0003925073891878128, "step": 191800 }, { "epoch": 54.44507521998297, "grad_norm": 0.011614996939897537, "learning_rate": 4.5576781152426915e-05, "loss": 0.0001165175810456276, "step": 191810 }, { "epoch": 54.44791370990633, "grad_norm": 0.04014132544398308, "learning_rate": 4.557394266250355e-05, "loss": 0.00021155867725610732, "step": 191820 }, { "epoch": 54.450752199829694, "grad_norm": 0.006200533825904131, "learning_rate": 4.5571104172580184e-05, "loss": 0.0012742673978209495, "step": 191830 }, { "epoch": 54.45359068975305, "grad_norm": 0.010838789865374565, "learning_rate": 4.556826568265683e-05, "loss": 0.001877022720873356, "step": 191840 }, { "epoch": 54.45642917967641, "grad_norm": 0.3556594252586365, "learning_rate": 4.5565711041725805e-05, "loss": 0.0064496234059333805, "step": 191850 }, { "epoch": 54.459267669599775, "grad_norm": 0.0027635330334305763, "learning_rate": 4.5562872551802446e-05, "loss": 0.00023214109241962433, "step": 191860 }, { "epoch": 54.46210615952313, "grad_norm": 0.3707144260406494, "learning_rate": 4.556003406187908e-05, "loss": 0.00030602272599935534, "step": 191870 }, { "epoch": 54.464944649446494, "grad_norm": 0.0381397120654583, "learning_rate": 4.555719557195572e-05, "loss": 0.0002945391461253166, "step": 191880 }, { "epoch": 54.46778313936986, "grad_norm": 0.00830460712313652, "learning_rate": 4.5554357082032364e-05, "loss": 0.00022785216569900512, "step": 191890 }, { "epoch": 54.47062162929321, "grad_norm": 0.030947253108024597, "learning_rate": 4.5551518592109e-05, "loss": 0.0001427600160241127, "step": 191900 }, { "epoch": 54.473460119216575, "grad_norm": 0.1472376137971878, "learning_rate": 4.554868010218564e-05, "loss": 0.00018120408058166503, "step": 191910 }, { "epoch": 54.47629860913994, "grad_norm": 0.031989723443984985, "learning_rate": 4.554584161226228e-05, "loss": 9.629260748624801e-05, "step": 191920 }, { "epoch": 54.4791370990633, "grad_norm": 0.04816126450896263, "learning_rate": 4.5543003122338916e-05, "loss": 0.00014143176376819612, "step": 191930 }, { "epoch": 54.48197558898666, "grad_norm": 0.01253377553075552, "learning_rate": 4.554016463241556e-05, "loss": 0.00028610434383153916, "step": 191940 }, { "epoch": 54.48481407891002, "grad_norm": 0.07632718980312347, "learning_rate": 4.55373261424922e-05, "loss": 0.00015967898070812225, "step": 191950 }, { "epoch": 54.48765256883338, "grad_norm": 0.04659533128142357, "learning_rate": 4.553448765256883e-05, "loss": 6.907153874635696e-05, "step": 191960 }, { "epoch": 54.49049105875674, "grad_norm": 0.2823970913887024, "learning_rate": 4.5531649162645475e-05, "loss": 0.001120796613395214, "step": 191970 }, { "epoch": 54.4933295486801, "grad_norm": 0.03533518314361572, "learning_rate": 4.5528810672722116e-05, "loss": 0.002152743563055992, "step": 191980 }, { "epoch": 54.496168038603464, "grad_norm": 0.010019311681389809, "learning_rate": 4.552597218279876e-05, "loss": 0.00019864998757839202, "step": 191990 }, { "epoch": 54.49900652852683, "grad_norm": 0.09484206140041351, "learning_rate": 4.552313369287539e-05, "loss": 8.710883557796478e-05, "step": 192000 }, { "epoch": 54.49900652852683, "eval_accuracy": 0.983976600750302, "eval_loss": 0.06531690806150436, "eval_runtime": 51.9822, "eval_samples_per_second": 302.546, "eval_steps_per_second": 4.732, "step": 192000 }, { "epoch": 54.50184501845018, "grad_norm": 0.014974432997405529, "learning_rate": 4.5520295202952026e-05, "loss": 0.00014268960803747176, "step": 192010 }, { "epoch": 54.504683508373546, "grad_norm": 0.006548742763698101, "learning_rate": 4.5517456713028675e-05, "loss": 6.0793571174144744e-05, "step": 192020 }, { "epoch": 54.50752199829691, "grad_norm": 0.015164359472692013, "learning_rate": 4.551461822310531e-05, "loss": 8.090361952781678e-05, "step": 192030 }, { "epoch": 54.510360488220265, "grad_norm": 0.15551909804344177, "learning_rate": 4.551177973318195e-05, "loss": 0.00014599543064832687, "step": 192040 }, { "epoch": 54.51319897814363, "grad_norm": 0.005726775620132685, "learning_rate": 4.550894124325859e-05, "loss": 4.8694200813770294e-05, "step": 192050 }, { "epoch": 54.51603746806699, "grad_norm": 0.005713941063731909, "learning_rate": 4.5506102753335227e-05, "loss": 0.00048201102763414385, "step": 192060 }, { "epoch": 54.518875957990346, "grad_norm": 0.05574870482087135, "learning_rate": 4.550326426341187e-05, "loss": 0.004895606637001037, "step": 192070 }, { "epoch": 54.52171444791371, "grad_norm": 0.12417971342802048, "learning_rate": 4.550042577348851e-05, "loss": 0.0008541252464056015, "step": 192080 }, { "epoch": 54.52455293783707, "grad_norm": 0.01127287931740284, "learning_rate": 4.5497587283565144e-05, "loss": 0.007612506300210953, "step": 192090 }, { "epoch": 54.527391427760435, "grad_norm": 0.007544501684606075, "learning_rate": 4.5494748793641785e-05, "loss": 0.0008919462561607361, "step": 192100 }, { "epoch": 54.53022991768379, "grad_norm": 0.26456084847450256, "learning_rate": 4.549191030371842e-05, "loss": 0.000513894110918045, "step": 192110 }, { "epoch": 54.53306840760715, "grad_norm": 8.569132804870605, "learning_rate": 4.548907181379506e-05, "loss": 0.0026318751275539397, "step": 192120 }, { "epoch": 54.535906897530516, "grad_norm": 0.003394099185243249, "learning_rate": 4.54862333238717e-05, "loss": 0.003697022795677185, "step": 192130 }, { "epoch": 54.53874538745387, "grad_norm": 0.08504726737737656, "learning_rate": 4.548339483394834e-05, "loss": 0.004259496554732322, "step": 192140 }, { "epoch": 54.541583877377235, "grad_norm": 0.3000730872154236, "learning_rate": 4.5480556344024985e-05, "loss": 0.00020918548107147218, "step": 192150 }, { "epoch": 54.5444223673006, "grad_norm": 0.06770108640193939, "learning_rate": 4.547771785410162e-05, "loss": 0.0002525376155972481, "step": 192160 }, { "epoch": 54.547260857223954, "grad_norm": 0.010367273353040218, "learning_rate": 4.5474879364178255e-05, "loss": 0.0016673695296049117, "step": 192170 }, { "epoch": 54.55009934714732, "grad_norm": 0.17157067358493805, "learning_rate": 4.54720408742549e-05, "loss": 0.003189980983734131, "step": 192180 }, { "epoch": 54.55293783707068, "grad_norm": 0.15250824391841888, "learning_rate": 4.546920238433154e-05, "loss": 9.948145598173141e-05, "step": 192190 }, { "epoch": 54.55577632699404, "grad_norm": 0.023107493296265602, "learning_rate": 4.546636389440818e-05, "loss": 8.7742879986763e-05, "step": 192200 }, { "epoch": 54.5586148169174, "grad_norm": 0.013622024096548557, "learning_rate": 4.546352540448481e-05, "loss": 0.00013998225331306456, "step": 192210 }, { "epoch": 54.56145330684076, "grad_norm": 0.045190803706645966, "learning_rate": 4.5460686914561455e-05, "loss": 0.00030763652175664903, "step": 192220 }, { "epoch": 54.564291796764124, "grad_norm": 0.03588143363595009, "learning_rate": 4.5457848424638096e-05, "loss": 0.0003246372565627098, "step": 192230 }, { "epoch": 54.56713028668748, "grad_norm": 0.016458630561828613, "learning_rate": 4.545500993471473e-05, "loss": 0.00039087049663066864, "step": 192240 }, { "epoch": 54.56996877661084, "grad_norm": 0.09335679560899734, "learning_rate": 4.545217144479137e-05, "loss": 0.0004141833633184433, "step": 192250 }, { "epoch": 54.572807266534205, "grad_norm": 0.009585287421941757, "learning_rate": 4.544933295486801e-05, "loss": 0.00012166332453489304, "step": 192260 }, { "epoch": 54.57564575645756, "grad_norm": 0.008399480022490025, "learning_rate": 4.544649446494465e-05, "loss": 0.0009557755663990974, "step": 192270 }, { "epoch": 54.578484246380924, "grad_norm": 0.2989215552806854, "learning_rate": 4.5443655975021296e-05, "loss": 0.00032878555357456206, "step": 192280 }, { "epoch": 54.58132273630429, "grad_norm": 0.057394783943891525, "learning_rate": 4.544081748509793e-05, "loss": 0.00024279970675706864, "step": 192290 }, { "epoch": 54.58416122622765, "grad_norm": 0.222061425447464, "learning_rate": 4.5437978995174565e-05, "loss": 0.0005757447332143783, "step": 192300 }, { "epoch": 54.586999716151006, "grad_norm": 0.02650044485926628, "learning_rate": 4.543514050525121e-05, "loss": 0.0007023854181170463, "step": 192310 }, { "epoch": 54.58983820607437, "grad_norm": 0.12348424643278122, "learning_rate": 4.543230201532785e-05, "loss": 0.00294289905577898, "step": 192320 }, { "epoch": 54.59267669599773, "grad_norm": 0.020416170358657837, "learning_rate": 4.542946352540449e-05, "loss": 0.0003790454939007759, "step": 192330 }, { "epoch": 54.59551518592109, "grad_norm": 0.054905153810977936, "learning_rate": 4.5426625035481124e-05, "loss": 0.0001643965020775795, "step": 192340 }, { "epoch": 54.59835367584445, "grad_norm": 0.02084040641784668, "learning_rate": 4.5423786545557765e-05, "loss": 0.0002922751009464264, "step": 192350 }, { "epoch": 54.60119216576781, "grad_norm": 0.13534702360630035, "learning_rate": 4.542094805563441e-05, "loss": 0.0017129583284258842, "step": 192360 }, { "epoch": 54.604030655691176, "grad_norm": 0.08847290277481079, "learning_rate": 4.541810956571104e-05, "loss": 0.0003975868225097656, "step": 192370 }, { "epoch": 54.60686914561453, "grad_norm": 0.1993718147277832, "learning_rate": 4.541527107578768e-05, "loss": 0.000529254786670208, "step": 192380 }, { "epoch": 54.609707635537895, "grad_norm": 0.029777465388178825, "learning_rate": 4.5412432585864324e-05, "loss": 0.0005756299942731857, "step": 192390 }, { "epoch": 54.61254612546126, "grad_norm": 12.226848602294922, "learning_rate": 4.540959409594096e-05, "loss": 0.006296759098768234, "step": 192400 }, { "epoch": 54.61538461538461, "grad_norm": 0.046338196843862534, "learning_rate": 4.540675560601761e-05, "loss": 0.003153657540678978, "step": 192410 }, { "epoch": 54.618223105307976, "grad_norm": 0.1336543709039688, "learning_rate": 4.540391711609424e-05, "loss": 0.0011418418958783149, "step": 192420 }, { "epoch": 54.62106159523134, "grad_norm": 0.02257530204951763, "learning_rate": 4.5401078626170876e-05, "loss": 0.0022623199969530106, "step": 192430 }, { "epoch": 54.623900085154695, "grad_norm": 0.09807053208351135, "learning_rate": 4.539824013624752e-05, "loss": 0.0002616344019770622, "step": 192440 }, { "epoch": 54.62673857507806, "grad_norm": 0.015347742475569248, "learning_rate": 4.539540164632416e-05, "loss": 0.0019212989136576653, "step": 192450 }, { "epoch": 54.62957706500142, "grad_norm": 0.039031773805618286, "learning_rate": 4.53925631564008e-05, "loss": 0.00025289487093687056, "step": 192460 }, { "epoch": 54.63241555492478, "grad_norm": 0.0020961700938642025, "learning_rate": 4.5389724666477435e-05, "loss": 0.0007729107514023781, "step": 192470 }, { "epoch": 54.63525404484814, "grad_norm": 0.10191962867975235, "learning_rate": 4.5386886176554076e-05, "loss": 0.0022242872044444086, "step": 192480 }, { "epoch": 54.6380925347715, "grad_norm": 0.01481013372540474, "learning_rate": 4.538404768663072e-05, "loss": 0.0003489537164568901, "step": 192490 }, { "epoch": 54.640931024694865, "grad_norm": 0.051633570343256, "learning_rate": 4.538120919670735e-05, "loss": 0.0004135670140385628, "step": 192500 }, { "epoch": 54.640931024694865, "eval_accuracy": 0.9828320722324665, "eval_loss": 0.07021360844373703, "eval_runtime": 63.7195, "eval_samples_per_second": 246.816, "eval_steps_per_second": 3.861, "step": 192500 }, { "epoch": 54.64376951461822, "grad_norm": 0.026447851210832596, "learning_rate": 4.5378370706783994e-05, "loss": 0.007306606322526932, "step": 192510 }, { "epoch": 54.646608004541584, "grad_norm": 0.03945891186594963, "learning_rate": 4.5375532216860635e-05, "loss": 0.00027496572583913804, "step": 192520 }, { "epoch": 54.64944649446495, "grad_norm": 0.01646108366549015, "learning_rate": 4.537269372693727e-05, "loss": 0.007318487018346786, "step": 192530 }, { "epoch": 54.6522849843883, "grad_norm": 0.49642083048820496, "learning_rate": 4.536985523701391e-05, "loss": 0.00024075936526060104, "step": 192540 }, { "epoch": 54.655123474311665, "grad_norm": 0.013612660579383373, "learning_rate": 4.536701674709055e-05, "loss": 0.0003137364983558655, "step": 192550 }, { "epoch": 54.65796196423503, "grad_norm": 0.006490636151283979, "learning_rate": 4.536417825716719e-05, "loss": 0.0001771150156855583, "step": 192560 }, { "epoch": 54.66080045415839, "grad_norm": 0.010489491745829582, "learning_rate": 4.536133976724383e-05, "loss": 0.0034479141235351563, "step": 192570 }, { "epoch": 54.66363894408175, "grad_norm": 0.011069044470787048, "learning_rate": 4.535850127732047e-05, "loss": 0.004225600883364678, "step": 192580 }, { "epoch": 54.66647743400511, "grad_norm": 0.04690450802445412, "learning_rate": 4.5355662787397104e-05, "loss": 0.0007904181256890297, "step": 192590 }, { "epoch": 54.66931592392847, "grad_norm": 0.04783828184008598, "learning_rate": 4.5352824297473746e-05, "loss": 0.0008823482319712638, "step": 192600 }, { "epoch": 54.67215441385183, "grad_norm": 0.05413959547877312, "learning_rate": 4.534998580755039e-05, "loss": 0.0034518718719482424, "step": 192610 }, { "epoch": 54.67499290377519, "grad_norm": 0.006609982345253229, "learning_rate": 4.534714731762703e-05, "loss": 0.006613720953464508, "step": 192620 }, { "epoch": 54.677831393698554, "grad_norm": 0.3625205159187317, "learning_rate": 4.534430882770366e-05, "loss": 0.002170359343290329, "step": 192630 }, { "epoch": 54.68066988362191, "grad_norm": 0.4748673439025879, "learning_rate": 4.53414703377803e-05, "loss": 0.0031913887709379197, "step": 192640 }, { "epoch": 54.68350837354527, "grad_norm": 1.7756963968276978, "learning_rate": 4.5338631847856946e-05, "loss": 0.00728776752948761, "step": 192650 }, { "epoch": 54.686346863468636, "grad_norm": 0.06127496808767319, "learning_rate": 4.533579335793358e-05, "loss": 0.0016968544572591782, "step": 192660 }, { "epoch": 54.689185353392, "grad_norm": 0.01937936618924141, "learning_rate": 4.533295486801022e-05, "loss": 0.0011773353442549705, "step": 192670 }, { "epoch": 54.692023843315354, "grad_norm": 6.818027496337891, "learning_rate": 4.533011637808686e-05, "loss": 0.003615030646324158, "step": 192680 }, { "epoch": 54.69486233323872, "grad_norm": 0.03067547269165516, "learning_rate": 4.53272778881635e-05, "loss": 0.0006062835454940796, "step": 192690 }, { "epoch": 54.69770082316208, "grad_norm": 0.9392488598823547, "learning_rate": 4.532443939824014e-05, "loss": 0.0009191339835524559, "step": 192700 }, { "epoch": 54.700539313085436, "grad_norm": 0.022312453016638756, "learning_rate": 4.532160090831678e-05, "loss": 0.00026782508939504623, "step": 192710 }, { "epoch": 54.7033778030088, "grad_norm": 0.023149389773607254, "learning_rate": 4.5318762418393415e-05, "loss": 0.002413647808134556, "step": 192720 }, { "epoch": 54.70621629293216, "grad_norm": 0.11487707495689392, "learning_rate": 4.5315923928470056e-05, "loss": 0.000509045459330082, "step": 192730 }, { "epoch": 54.70905478285552, "grad_norm": 0.030542129650712013, "learning_rate": 4.531308543854669e-05, "loss": 0.0010950924828648568, "step": 192740 }, { "epoch": 54.71189327277888, "grad_norm": 0.7014197707176208, "learning_rate": 4.531024694862334e-05, "loss": 0.006592617928981781, "step": 192750 }, { "epoch": 54.71473176270224, "grad_norm": 0.007684205658733845, "learning_rate": 4.5307408458699974e-05, "loss": 0.0004914011806249619, "step": 192760 }, { "epoch": 54.717570252625606, "grad_norm": 0.07414848357439041, "learning_rate": 4.530456996877661e-05, "loss": 0.004444437474012375, "step": 192770 }, { "epoch": 54.72040874254896, "grad_norm": 0.5024648904800415, "learning_rate": 4.5301731478853256e-05, "loss": 0.0004521669819951057, "step": 192780 }, { "epoch": 54.723247232472325, "grad_norm": 0.01349195558577776, "learning_rate": 4.529889298892989e-05, "loss": 0.00015091951936483384, "step": 192790 }, { "epoch": 54.72608572239569, "grad_norm": 0.012229151092469692, "learning_rate": 4.529605449900653e-05, "loss": 0.0014611920341849327, "step": 192800 }, { "epoch": 54.72892421231904, "grad_norm": 0.9194833040237427, "learning_rate": 4.5293216009083174e-05, "loss": 0.0003559555858373642, "step": 192810 }, { "epoch": 54.731762702242406, "grad_norm": 0.030960332602262497, "learning_rate": 4.529037751915981e-05, "loss": 0.00020516477525234222, "step": 192820 }, { "epoch": 54.73460119216577, "grad_norm": 0.02009507082402706, "learning_rate": 4.528753902923645e-05, "loss": 0.0006173742935061455, "step": 192830 }, { "epoch": 54.73743968208913, "grad_norm": 0.07110659033060074, "learning_rate": 4.5284700539313084e-05, "loss": 0.004573377221822739, "step": 192840 }, { "epoch": 54.74027817201249, "grad_norm": 0.10026941448450089, "learning_rate": 4.5281862049389726e-05, "loss": 0.0008718408644199371, "step": 192850 }, { "epoch": 54.74311666193585, "grad_norm": 0.011367810890078545, "learning_rate": 4.527902355946637e-05, "loss": 0.014231880009174348, "step": 192860 }, { "epoch": 54.745955151859214, "grad_norm": 0.24491985142230988, "learning_rate": 4.5276185069543e-05, "loss": 0.0007977534085512161, "step": 192870 }, { "epoch": 54.74879364178257, "grad_norm": 0.0846492126584053, "learning_rate": 4.527334657961965e-05, "loss": 0.0008064338937401772, "step": 192880 }, { "epoch": 54.75163213170593, "grad_norm": 1.692842960357666, "learning_rate": 4.5270508089696284e-05, "loss": 0.0011630089953541755, "step": 192890 }, { "epoch": 54.754470621629295, "grad_norm": 0.04640423133969307, "learning_rate": 4.526766959977292e-05, "loss": 0.0010259930044412614, "step": 192900 }, { "epoch": 54.75730911155265, "grad_norm": 1.484537959098816, "learning_rate": 4.526483110984957e-05, "loss": 0.001083962619304657, "step": 192910 }, { "epoch": 54.760147601476014, "grad_norm": 0.10233692079782486, "learning_rate": 4.52619926199262e-05, "loss": 0.006503915786743164, "step": 192920 }, { "epoch": 54.76298609139938, "grad_norm": 0.0806490033864975, "learning_rate": 4.525915413000284e-05, "loss": 0.00099781583994627, "step": 192930 }, { "epoch": 54.76582458132274, "grad_norm": 0.052941881120204926, "learning_rate": 4.525631564007948e-05, "loss": 0.0007001793012022973, "step": 192940 }, { "epoch": 54.768663071246095, "grad_norm": 0.04108201712369919, "learning_rate": 4.525347715015612e-05, "loss": 0.007110331952571869, "step": 192950 }, { "epoch": 54.77150156116946, "grad_norm": 0.017093345522880554, "learning_rate": 4.525063866023276e-05, "loss": 0.00173615962266922, "step": 192960 }, { "epoch": 54.77434005109282, "grad_norm": 0.2874625623226166, "learning_rate": 4.5247800170309395e-05, "loss": 0.0003387702628970146, "step": 192970 }, { "epoch": 54.77717854101618, "grad_norm": 0.07851719856262207, "learning_rate": 4.5244961680386037e-05, "loss": 0.0010233411565423011, "step": 192980 }, { "epoch": 54.78001703093954, "grad_norm": 0.048444319516420364, "learning_rate": 4.524212319046268e-05, "loss": 0.0005053101107478142, "step": 192990 }, { "epoch": 54.7828555208629, "grad_norm": 0.36564844846725464, "learning_rate": 4.523928470053931e-05, "loss": 0.0027918003499507902, "step": 193000 }, { "epoch": 54.7828555208629, "eval_accuracy": 0.9813696191263432, "eval_loss": 0.07791388034820557, "eval_runtime": 123.6567, "eval_samples_per_second": 127.183, "eval_steps_per_second": 1.989, "step": 193000 }, { "epoch": 54.78569401078626, "grad_norm": 0.07282327115535736, "learning_rate": 4.5236446210615954e-05, "loss": 0.0008751580491662026, "step": 193010 }, { "epoch": 54.78853250070962, "grad_norm": 0.015067698433995247, "learning_rate": 4.5233607720692595e-05, "loss": 0.0006235426291823388, "step": 193020 }, { "epoch": 54.791370990632984, "grad_norm": 0.02771715074777603, "learning_rate": 4.523076923076923e-05, "loss": 0.0008682716637849808, "step": 193030 }, { "epoch": 54.79420948055635, "grad_norm": 0.06607253849506378, "learning_rate": 4.522793074084587e-05, "loss": 0.00020210817456245422, "step": 193040 }, { "epoch": 54.7970479704797, "grad_norm": 0.021404970437288284, "learning_rate": 4.522509225092251e-05, "loss": 0.0026617640629410743, "step": 193050 }, { "epoch": 54.799886460403066, "grad_norm": 0.13448166847229004, "learning_rate": 4.522225376099915e-05, "loss": 0.001213696226477623, "step": 193060 }, { "epoch": 54.80272495032643, "grad_norm": 0.056227896362543106, "learning_rate": 4.521941527107579e-05, "loss": 0.0003352692350745201, "step": 193070 }, { "epoch": 54.805563440249784, "grad_norm": 0.1455516517162323, "learning_rate": 4.521657678115243e-05, "loss": 0.001643851399421692, "step": 193080 }, { "epoch": 54.80840193017315, "grad_norm": 16.862165451049805, "learning_rate": 4.521373829122907e-05, "loss": 0.012758849561214447, "step": 193090 }, { "epoch": 54.81124042009651, "grad_norm": 0.8537924289703369, "learning_rate": 4.5210899801305706e-05, "loss": 0.000396021269261837, "step": 193100 }, { "epoch": 54.814078910019866, "grad_norm": 2.2322587966918945, "learning_rate": 4.520806131138235e-05, "loss": 0.0012009207159280777, "step": 193110 }, { "epoch": 54.81691739994323, "grad_norm": 0.04395675286650658, "learning_rate": 4.520522282145899e-05, "loss": 0.0032121822237968444, "step": 193120 }, { "epoch": 54.81975588986659, "grad_norm": 10.017614364624023, "learning_rate": 4.520238433153562e-05, "loss": 0.0037251286208629607, "step": 193130 }, { "epoch": 54.822594379789955, "grad_norm": 0.12523691356182098, "learning_rate": 4.5199545841612265e-05, "loss": 0.001602901890873909, "step": 193140 }, { "epoch": 54.82543286971331, "grad_norm": 0.3749733865261078, "learning_rate": 4.5196707351688906e-05, "loss": 0.0017090650275349618, "step": 193150 }, { "epoch": 54.82827135963667, "grad_norm": 0.02219538204371929, "learning_rate": 4.519386886176554e-05, "loss": 0.0018239933997392654, "step": 193160 }, { "epoch": 54.831109849560036, "grad_norm": 0.6341480612754822, "learning_rate": 4.519103037184218e-05, "loss": 0.0012517523020505906, "step": 193170 }, { "epoch": 54.83394833948339, "grad_norm": 0.09828321635723114, "learning_rate": 4.518819188191882e-05, "loss": 0.00018946006894111634, "step": 193180 }, { "epoch": 54.836786829406755, "grad_norm": 0.07595732063055038, "learning_rate": 4.518535339199546e-05, "loss": 0.006517069041728973, "step": 193190 }, { "epoch": 54.83962531933012, "grad_norm": 0.1677674949169159, "learning_rate": 4.51825149020721e-05, "loss": 0.0014323435723781585, "step": 193200 }, { "epoch": 54.84246380925348, "grad_norm": 13.786569595336914, "learning_rate": 4.517967641214874e-05, "loss": 0.0079066701233387, "step": 193210 }, { "epoch": 54.845302299176836, "grad_norm": 0.049374986439943314, "learning_rate": 4.517683792222538e-05, "loss": 0.0008757615461945534, "step": 193220 }, { "epoch": 54.8481407891002, "grad_norm": 9.628390312194824, "learning_rate": 4.517399943230202e-05, "loss": 0.004345757886767387, "step": 193230 }, { "epoch": 54.85097927902356, "grad_norm": 4.783392906188965, "learning_rate": 4.517116094237865e-05, "loss": 0.0033275045454502104, "step": 193240 }, { "epoch": 54.85381776894692, "grad_norm": 0.46303942799568176, "learning_rate": 4.51683224524553e-05, "loss": 0.012067306786775589, "step": 193250 }, { "epoch": 54.85665625887028, "grad_norm": 0.07849337160587311, "learning_rate": 4.5165483962531934e-05, "loss": 0.0010042322799563407, "step": 193260 }, { "epoch": 54.859494748793644, "grad_norm": 0.08947447687387466, "learning_rate": 4.5162645472608575e-05, "loss": 0.0016044985502958297, "step": 193270 }, { "epoch": 54.862333238717, "grad_norm": 0.547167956829071, "learning_rate": 4.515980698268522e-05, "loss": 0.0031228775158524515, "step": 193280 }, { "epoch": 54.86517172864036, "grad_norm": 0.1142302006483078, "learning_rate": 4.515696849276185e-05, "loss": 0.013638591766357422, "step": 193290 }, { "epoch": 54.868010218563725, "grad_norm": 0.04046625643968582, "learning_rate": 4.515413000283849e-05, "loss": 0.000415848009288311, "step": 193300 }, { "epoch": 54.87084870848709, "grad_norm": 0.009270045906305313, "learning_rate": 4.5151291512915134e-05, "loss": 0.004253911226987839, "step": 193310 }, { "epoch": 54.873687198410444, "grad_norm": 0.003636718960478902, "learning_rate": 4.514845302299177e-05, "loss": 0.003195042908191681, "step": 193320 }, { "epoch": 54.87652568833381, "grad_norm": 0.016791725531220436, "learning_rate": 4.514561453306841e-05, "loss": 0.0005133112892508507, "step": 193330 }, { "epoch": 54.87936417825717, "grad_norm": 15.84851360321045, "learning_rate": 4.5142776043145045e-05, "loss": 0.004341410100460052, "step": 193340 }, { "epoch": 54.882202668180526, "grad_norm": 0.2078407257795334, "learning_rate": 4.513993755322169e-05, "loss": 0.004577413946390152, "step": 193350 }, { "epoch": 54.88504115810389, "grad_norm": 0.5252276062965393, "learning_rate": 4.513709906329833e-05, "loss": 0.008568103611469268, "step": 193360 }, { "epoch": 54.88787964802725, "grad_norm": 0.05660800263285637, "learning_rate": 4.513426057337496e-05, "loss": 0.010044850409030914, "step": 193370 }, { "epoch": 54.89071813795061, "grad_norm": 1.0048794746398926, "learning_rate": 4.513142208345161e-05, "loss": 0.00044487398117780684, "step": 193380 }, { "epoch": 54.89355662787397, "grad_norm": 0.016351627185940742, "learning_rate": 4.5128583593528245e-05, "loss": 0.0037177994847297667, "step": 193390 }, { "epoch": 54.89639511779733, "grad_norm": 0.008379078470170498, "learning_rate": 4.5125745103604886e-05, "loss": 0.00015638917684555054, "step": 193400 }, { "epoch": 54.899233607720696, "grad_norm": 1.1532753705978394, "learning_rate": 4.512290661368153e-05, "loss": 0.00041521433740854263, "step": 193410 }, { "epoch": 54.90207209764405, "grad_norm": 0.006749431136995554, "learning_rate": 4.512006812375816e-05, "loss": 0.0001915993168950081, "step": 193420 }, { "epoch": 54.904910587567414, "grad_norm": 0.03899946063756943, "learning_rate": 4.5117229633834804e-05, "loss": 0.0001774173229932785, "step": 193430 }, { "epoch": 54.90774907749078, "grad_norm": 0.016806945204734802, "learning_rate": 4.511439114391144e-05, "loss": 0.00029223430901765824, "step": 193440 }, { "epoch": 54.91058756741413, "grad_norm": 0.0846453458070755, "learning_rate": 4.511155265398808e-05, "loss": 0.0004778679460287094, "step": 193450 }, { "epoch": 54.913426057337496, "grad_norm": 0.011756057851016521, "learning_rate": 4.510871416406472e-05, "loss": 0.0004795258864760399, "step": 193460 }, { "epoch": 54.91626454726086, "grad_norm": 0.21275174617767334, "learning_rate": 4.5105875674141355e-05, "loss": 0.0003373933956027031, "step": 193470 }, { "epoch": 54.919103037184215, "grad_norm": 0.012381155975162983, "learning_rate": 4.5103037184218e-05, "loss": 9.984038770198822e-05, "step": 193480 }, { "epoch": 54.92194152710758, "grad_norm": 0.023949021473526955, "learning_rate": 4.510019869429464e-05, "loss": 0.00025176461786031724, "step": 193490 }, { "epoch": 54.92478001703094, "grad_norm": 0.010240057483315468, "learning_rate": 4.509736020437127e-05, "loss": 0.0001477416604757309, "step": 193500 }, { "epoch": 54.92478001703094, "eval_accuracy": 0.9844216951739048, "eval_loss": 0.060648854821920395, "eval_runtime": 50.22, "eval_samples_per_second": 313.162, "eval_steps_per_second": 4.898, "step": 193500 }, { "epoch": 54.9276185069543, "grad_norm": 0.0029364549554884434, "learning_rate": 4.509452171444792e-05, "loss": 0.00010594557970762253, "step": 193510 }, { "epoch": 54.93045699687766, "grad_norm": 0.48801547288894653, "learning_rate": 4.5091683224524556e-05, "loss": 0.00016595590859651564, "step": 193520 }, { "epoch": 54.93329548680102, "grad_norm": 0.009567868895828724, "learning_rate": 4.508884473460119e-05, "loss": 8.490364998579026e-05, "step": 193530 }, { "epoch": 54.936133976724385, "grad_norm": 0.01280547771602869, "learning_rate": 4.508600624467783e-05, "loss": 6.430447101593017e-05, "step": 193540 }, { "epoch": 54.93897246664774, "grad_norm": 0.006894399877637625, "learning_rate": 4.508316775475447e-05, "loss": 0.00014150701463222504, "step": 193550 }, { "epoch": 54.9418109565711, "grad_norm": 0.003998226951807737, "learning_rate": 4.5080329264831114e-05, "loss": 7.05065205693245e-05, "step": 193560 }, { "epoch": 54.944649446494466, "grad_norm": 0.0030835410580039024, "learning_rate": 4.507749077490775e-05, "loss": 6.752889603376389e-05, "step": 193570 }, { "epoch": 54.94748793641783, "grad_norm": 0.009466051124036312, "learning_rate": 4.507465228498439e-05, "loss": 7.875710725784302e-05, "step": 193580 }, { "epoch": 54.950326426341185, "grad_norm": 0.16595634818077087, "learning_rate": 4.507181379506103e-05, "loss": 0.0005288667976856232, "step": 193590 }, { "epoch": 54.95316491626455, "grad_norm": 0.006605510599911213, "learning_rate": 4.5068975305137666e-05, "loss": 8.78952443599701e-05, "step": 193600 }, { "epoch": 54.95600340618791, "grad_norm": 0.02147243358194828, "learning_rate": 4.506613681521431e-05, "loss": 0.0018711099401116371, "step": 193610 }, { "epoch": 54.95884189611127, "grad_norm": 0.4459642171859741, "learning_rate": 4.506329832529095e-05, "loss": 0.0003830503672361374, "step": 193620 }, { "epoch": 54.96168038603463, "grad_norm": 0.01584130898118019, "learning_rate": 4.5060459835367584e-05, "loss": 0.00015764720737934112, "step": 193630 }, { "epoch": 54.96451887595799, "grad_norm": 0.00454764673486352, "learning_rate": 4.5057621345444225e-05, "loss": 0.00022390000522136687, "step": 193640 }, { "epoch": 54.96735736588135, "grad_norm": 0.21951782703399658, "learning_rate": 4.5054782855520866e-05, "loss": 0.00028665363788604736, "step": 193650 }, { "epoch": 54.97019585580471, "grad_norm": 0.5783572793006897, "learning_rate": 4.50519443655975e-05, "loss": 0.0004273321479558945, "step": 193660 }, { "epoch": 54.973034345728074, "grad_norm": 0.06291798502206802, "learning_rate": 4.504910587567414e-05, "loss": 0.0003999127075076103, "step": 193670 }, { "epoch": 54.97587283565144, "grad_norm": 0.02423013001680374, "learning_rate": 4.5046267385750784e-05, "loss": 0.0012881342321634293, "step": 193680 }, { "epoch": 54.97871132557479, "grad_norm": 0.014246251434087753, "learning_rate": 4.5043428895827425e-05, "loss": 0.00030828025192022326, "step": 193690 }, { "epoch": 54.981549815498155, "grad_norm": 0.13229256868362427, "learning_rate": 4.504059040590406e-05, "loss": 0.002072133496403694, "step": 193700 }, { "epoch": 54.98438830542152, "grad_norm": 0.014490113593637943, "learning_rate": 4.50377519159807e-05, "loss": 0.00221199132502079, "step": 193710 }, { "epoch": 54.987226795344874, "grad_norm": 0.004114766139537096, "learning_rate": 4.503491342605734e-05, "loss": 0.004036936163902283, "step": 193720 }, { "epoch": 54.99006528526824, "grad_norm": 13.052156448364258, "learning_rate": 4.503207493613398e-05, "loss": 0.007613097131252289, "step": 193730 }, { "epoch": 54.9929037751916, "grad_norm": 11.507161140441895, "learning_rate": 4.502923644621062e-05, "loss": 0.011925514042377471, "step": 193740 }, { "epoch": 54.995742265114956, "grad_norm": 12.441472053527832, "learning_rate": 4.502639795628726e-05, "loss": 0.011107388138771056, "step": 193750 }, { "epoch": 54.99858075503832, "grad_norm": 0.28441256284713745, "learning_rate": 4.5023559466363894e-05, "loss": 0.007498999685049057, "step": 193760 }, { "epoch": 55.00141924496168, "grad_norm": 14.98615837097168, "learning_rate": 4.5020720976440536e-05, "loss": 0.029174339771270753, "step": 193770 }, { "epoch": 55.004257734885044, "grad_norm": 0.010947538539767265, "learning_rate": 4.501788248651718e-05, "loss": 0.0005444569513201713, "step": 193780 }, { "epoch": 55.0070962248084, "grad_norm": 0.006670959293842316, "learning_rate": 4.501504399659381e-05, "loss": 0.00034816805273294447, "step": 193790 }, { "epoch": 55.00993471473176, "grad_norm": 0.09836491197347641, "learning_rate": 4.501220550667045e-05, "loss": 0.0002043910324573517, "step": 193800 }, { "epoch": 55.012773204655126, "grad_norm": 0.05367155745625496, "learning_rate": 4.5009367016747094e-05, "loss": 0.0001995135098695755, "step": 193810 }, { "epoch": 55.01561169457848, "grad_norm": 0.060508571565151215, "learning_rate": 4.5006528526823736e-05, "loss": 0.0005565008148550987, "step": 193820 }, { "epoch": 55.018450184501845, "grad_norm": 0.005995527841150761, "learning_rate": 4.500369003690037e-05, "loss": 0.00012349318712949752, "step": 193830 }, { "epoch": 55.02128867442521, "grad_norm": 0.00795137882232666, "learning_rate": 4.500085154697701e-05, "loss": 0.0008497383445501327, "step": 193840 }, { "epoch": 55.02412716434856, "grad_norm": 0.2261267900466919, "learning_rate": 4.499801305705365e-05, "loss": 0.00033513419330120085, "step": 193850 }, { "epoch": 55.026965654271926, "grad_norm": 0.008271649479866028, "learning_rate": 4.499517456713029e-05, "loss": 8.713733404874802e-05, "step": 193860 }, { "epoch": 55.02980414419529, "grad_norm": 0.020922981202602386, "learning_rate": 4.499233607720693e-05, "loss": 0.0003995819017291069, "step": 193870 }, { "epoch": 55.03264263411865, "grad_norm": 0.02507244050502777, "learning_rate": 4.498949758728357e-05, "loss": 7.437765598297119e-05, "step": 193880 }, { "epoch": 55.03548112404201, "grad_norm": 0.01941041462123394, "learning_rate": 4.4986659097360205e-05, "loss": 8.326955139636993e-05, "step": 193890 }, { "epoch": 55.03831961396537, "grad_norm": 0.0035607642494142056, "learning_rate": 4.4983820607436846e-05, "loss": 0.0033524058759212493, "step": 193900 }, { "epoch": 55.04115810388873, "grad_norm": 0.1526203751564026, "learning_rate": 4.498098211751349e-05, "loss": 0.001467418484389782, "step": 193910 }, { "epoch": 55.04399659381209, "grad_norm": 0.009348146617412567, "learning_rate": 4.497814362759012e-05, "loss": 0.0008956918492913247, "step": 193920 }, { "epoch": 55.04683508373545, "grad_norm": 0.040458135306835175, "learning_rate": 4.4975305137666764e-05, "loss": 0.0006689468398690223, "step": 193930 }, { "epoch": 55.049673573658815, "grad_norm": 0.02106056921184063, "learning_rate": 4.4972466647743405e-05, "loss": 0.012070161104202271, "step": 193940 }, { "epoch": 55.05251206358217, "grad_norm": 0.00886416807770729, "learning_rate": 4.496962815782004e-05, "loss": 0.0008742667734622956, "step": 193950 }, { "epoch": 55.055350553505534, "grad_norm": 0.005946963559836149, "learning_rate": 4.496678966789668e-05, "loss": 0.000319034606218338, "step": 193960 }, { "epoch": 55.0581890434289, "grad_norm": 0.14058421552181244, "learning_rate": 4.4963951177973316e-05, "loss": 0.00033539235591888427, "step": 193970 }, { "epoch": 55.06102753335226, "grad_norm": 0.011808240786194801, "learning_rate": 4.4961112688049964e-05, "loss": 0.004423485696315765, "step": 193980 }, { "epoch": 55.063866023275615, "grad_norm": 0.011796864680945873, "learning_rate": 4.49582741981266e-05, "loss": 0.0009531263262033462, "step": 193990 }, { "epoch": 55.06670451319898, "grad_norm": 0.005793310701847076, "learning_rate": 4.495543570820323e-05, "loss": 0.0005064135417342186, "step": 194000 }, { "epoch": 55.06670451319898, "eval_accuracy": 0.9840401856679596, "eval_loss": 0.07058624178171158, "eval_runtime": 144.6508, "eval_samples_per_second": 108.724, "eval_steps_per_second": 1.701, "step": 194000 }, { "epoch": 55.06954300312234, "grad_norm": 0.0543113648891449, "learning_rate": 4.495259721827988e-05, "loss": 0.00016110278666019439, "step": 194010 }, { "epoch": 55.0723814930457, "grad_norm": 0.19397801160812378, "learning_rate": 4.4949758728356516e-05, "loss": 0.00013835672289133071, "step": 194020 }, { "epoch": 55.07521998296906, "grad_norm": 0.0168585367500782, "learning_rate": 4.494692023843316e-05, "loss": 0.0004998091608285904, "step": 194030 }, { "epoch": 55.07805847289242, "grad_norm": 0.015994206070899963, "learning_rate": 4.49440817485098e-05, "loss": 0.002047915756702423, "step": 194040 }, { "epoch": 55.080896962815785, "grad_norm": 0.0034099791664630175, "learning_rate": 4.494124325858643e-05, "loss": 0.0005460130050778389, "step": 194050 }, { "epoch": 55.08373545273914, "grad_norm": 0.003760315477848053, "learning_rate": 4.4938404768663075e-05, "loss": 0.001231636479496956, "step": 194060 }, { "epoch": 55.086573942662504, "grad_norm": 0.009898156858980656, "learning_rate": 4.493556627873971e-05, "loss": 0.0001534741371870041, "step": 194070 }, { "epoch": 55.08941243258587, "grad_norm": 0.8528783321380615, "learning_rate": 4.493272778881635e-05, "loss": 0.00040370672941207885, "step": 194080 }, { "epoch": 55.09225092250922, "grad_norm": 0.15557129681110382, "learning_rate": 4.492988929889299e-05, "loss": 0.00011458229273557663, "step": 194090 }, { "epoch": 55.095089412432586, "grad_norm": 0.048124413937330246, "learning_rate": 4.4927050808969627e-05, "loss": 0.000593630038201809, "step": 194100 }, { "epoch": 55.09792790235595, "grad_norm": 0.01416081003844738, "learning_rate": 4.4924212319046275e-05, "loss": 0.00012620501220226288, "step": 194110 }, { "epoch": 55.100766392279304, "grad_norm": 0.1446279138326645, "learning_rate": 4.492137382912291e-05, "loss": 0.003388427197933197, "step": 194120 }, { "epoch": 55.10360488220267, "grad_norm": 0.04279107227921486, "learning_rate": 4.4918535339199544e-05, "loss": 0.00031034965068101885, "step": 194130 }, { "epoch": 55.10644337212603, "grad_norm": 0.006195713300257921, "learning_rate": 4.491569684927619e-05, "loss": 0.00013695787638425826, "step": 194140 }, { "epoch": 55.10928186204939, "grad_norm": 0.0017665316117927432, "learning_rate": 4.491285835935283e-05, "loss": 0.0007924292236566544, "step": 194150 }, { "epoch": 55.11212035197275, "grad_norm": 0.03964250907301903, "learning_rate": 4.491001986942947e-05, "loss": 0.0001645892858505249, "step": 194160 }, { "epoch": 55.11495884189611, "grad_norm": 0.017500892281532288, "learning_rate": 4.49071813795061e-05, "loss": 0.00019539706408977509, "step": 194170 }, { "epoch": 55.117797331819474, "grad_norm": 0.01378620695322752, "learning_rate": 4.4904342889582744e-05, "loss": 8.381810039281845e-05, "step": 194180 }, { "epoch": 55.12063582174283, "grad_norm": 0.010054513812065125, "learning_rate": 4.4901504399659385e-05, "loss": 7.642339915037155e-05, "step": 194190 }, { "epoch": 55.12347431166619, "grad_norm": 0.044801611453294754, "learning_rate": 4.489866590973602e-05, "loss": 7.643792778253556e-05, "step": 194200 }, { "epoch": 55.126312801589556, "grad_norm": 0.006907267030328512, "learning_rate": 4.489582741981266e-05, "loss": 0.00016278345137834548, "step": 194210 }, { "epoch": 55.12915129151291, "grad_norm": 0.0011941120028495789, "learning_rate": 4.48929889298893e-05, "loss": 5.816332995891571e-05, "step": 194220 }, { "epoch": 55.131989781436275, "grad_norm": 0.004232687875628471, "learning_rate": 4.489015043996594e-05, "loss": 9.866766631603242e-05, "step": 194230 }, { "epoch": 55.13482827135964, "grad_norm": 0.004292620345950127, "learning_rate": 4.488731195004258e-05, "loss": 5.722232162952423e-05, "step": 194240 }, { "epoch": 55.137666761283, "grad_norm": 0.000960996956564486, "learning_rate": 4.488447346011922e-05, "loss": 0.00011157840490341186, "step": 194250 }, { "epoch": 55.140505251206356, "grad_norm": 0.002416253322735429, "learning_rate": 4.4881634970195855e-05, "loss": 5.664415657520294e-05, "step": 194260 }, { "epoch": 55.14334374112972, "grad_norm": 0.00601982744410634, "learning_rate": 4.4878796480272496e-05, "loss": 7.307417690753937e-05, "step": 194270 }, { "epoch": 55.14618223105308, "grad_norm": 0.10597673803567886, "learning_rate": 4.487595799034914e-05, "loss": 0.00013429764658212662, "step": 194280 }, { "epoch": 55.14902072097644, "grad_norm": 0.0030332484748214483, "learning_rate": 4.487311950042578e-05, "loss": 4.39763069152832e-05, "step": 194290 }, { "epoch": 55.1518592108998, "grad_norm": 0.012339373119175434, "learning_rate": 4.487028101050241e-05, "loss": 0.003155343234539032, "step": 194300 }, { "epoch": 55.154697700823164, "grad_norm": 0.022807439789175987, "learning_rate": 4.4867442520579055e-05, "loss": 4.879608750343323e-05, "step": 194310 }, { "epoch": 55.15753619074652, "grad_norm": 0.007771359756588936, "learning_rate": 4.4864604030655696e-05, "loss": 6.777960807085038e-05, "step": 194320 }, { "epoch": 55.16037468066988, "grad_norm": 0.005733709316700697, "learning_rate": 4.486176554073233e-05, "loss": 0.0006851799786090851, "step": 194330 }, { "epoch": 55.163213170593245, "grad_norm": 0.004121336620301008, "learning_rate": 4.485892705080897e-05, "loss": 0.0006191020831465722, "step": 194340 }, { "epoch": 55.16605166051661, "grad_norm": 6.585513591766357, "learning_rate": 4.4856088560885613e-05, "loss": 0.0038505196571350098, "step": 194350 }, { "epoch": 55.168890150439964, "grad_norm": 0.00261095748282969, "learning_rate": 4.485325007096225e-05, "loss": 0.00010057073086500168, "step": 194360 }, { "epoch": 55.17172864036333, "grad_norm": 0.0012909005163237453, "learning_rate": 4.485041158103889e-05, "loss": 0.0021119413897395132, "step": 194370 }, { "epoch": 55.17456713028669, "grad_norm": 0.026270683854818344, "learning_rate": 4.484757309111553e-05, "loss": 0.0023670105263590814, "step": 194380 }, { "epoch": 55.177405620210045, "grad_norm": 0.0018299805233255029, "learning_rate": 4.4844734601192165e-05, "loss": 0.008427438139915467, "step": 194390 }, { "epoch": 55.18024411013341, "grad_norm": 0.10378177464008331, "learning_rate": 4.484189611126881e-05, "loss": 0.00011174045503139495, "step": 194400 }, { "epoch": 55.18308260005677, "grad_norm": 0.07555242627859116, "learning_rate": 4.483905762134545e-05, "loss": 0.0002757763490080833, "step": 194410 }, { "epoch": 55.185921089980134, "grad_norm": 0.007339188829064369, "learning_rate": 4.483621913142208e-05, "loss": 0.00011574998497962952, "step": 194420 }, { "epoch": 55.18875957990349, "grad_norm": 0.0625089704990387, "learning_rate": 4.4833380641498724e-05, "loss": 0.0005650853738188744, "step": 194430 }, { "epoch": 55.19159806982685, "grad_norm": 0.053970616310834885, "learning_rate": 4.4830542151575366e-05, "loss": 7.87077471613884e-05, "step": 194440 }, { "epoch": 55.194436559750216, "grad_norm": 0.03237726166844368, "learning_rate": 4.482770366165201e-05, "loss": 0.01740296185016632, "step": 194450 }, { "epoch": 55.19727504967357, "grad_norm": 0.027087340131402016, "learning_rate": 4.482486517172864e-05, "loss": 0.0012054037302732467, "step": 194460 }, { "epoch": 55.200113539596934, "grad_norm": 0.005346999503672123, "learning_rate": 4.4822026681805276e-05, "loss": 0.000575941614806652, "step": 194470 }, { "epoch": 55.2029520295203, "grad_norm": 0.010538638569414616, "learning_rate": 4.4819188191881924e-05, "loss": 0.0005863642320036889, "step": 194480 }, { "epoch": 55.20579051944365, "grad_norm": 0.0055944654159247875, "learning_rate": 4.481634970195856e-05, "loss": 0.00024870671331882476, "step": 194490 }, { "epoch": 55.208629009367016, "grad_norm": 0.022144179791212082, "learning_rate": 4.48135112120352e-05, "loss": 0.000275726243853569, "step": 194500 }, { "epoch": 55.208629009367016, "eval_accuracy": 0.9841673555032746, "eval_loss": 0.07032251358032227, "eval_runtime": 48.5581, "eval_samples_per_second": 323.88, "eval_steps_per_second": 5.066, "step": 194500 }, { "epoch": 55.21146749929038, "grad_norm": 0.1267208755016327, "learning_rate": 4.481067272211184e-05, "loss": 0.0003151200711727142, "step": 194510 }, { "epoch": 55.21430598921374, "grad_norm": 0.08362055569887161, "learning_rate": 4.4807834232188476e-05, "loss": 0.001182916946709156, "step": 194520 }, { "epoch": 55.2171444791371, "grad_norm": 0.022572599351406097, "learning_rate": 4.480499574226512e-05, "loss": 0.0025451799854636194, "step": 194530 }, { "epoch": 55.21998296906046, "grad_norm": 0.1020248681306839, "learning_rate": 4.480215725234176e-05, "loss": 0.00016140770167112352, "step": 194540 }, { "epoch": 55.22282145898382, "grad_norm": 0.0419151671230793, "learning_rate": 4.4799318762418394e-05, "loss": 0.0003430729731917381, "step": 194550 }, { "epoch": 55.22565994890718, "grad_norm": 0.26805004477500916, "learning_rate": 4.4796480272495035e-05, "loss": 0.005532246455550194, "step": 194560 }, { "epoch": 55.22849843883054, "grad_norm": 0.004316349048167467, "learning_rate": 4.479364178257167e-05, "loss": 0.0011755036190152167, "step": 194570 }, { "epoch": 55.231336928753905, "grad_norm": 0.01991913840174675, "learning_rate": 4.479080329264832e-05, "loss": 0.006624486297369003, "step": 194580 }, { "epoch": 55.23417541867726, "grad_norm": 0.09438136219978333, "learning_rate": 4.478796480272495e-05, "loss": 0.00014286991208791732, "step": 194590 }, { "epoch": 55.23701390860062, "grad_norm": 0.1726161241531372, "learning_rate": 4.478512631280159e-05, "loss": 0.0008210491389036179, "step": 194600 }, { "epoch": 55.239852398523986, "grad_norm": 0.390165776014328, "learning_rate": 4.4782287822878235e-05, "loss": 0.00031985342502593994, "step": 194610 }, { "epoch": 55.24269088844735, "grad_norm": 0.5847591161727905, "learning_rate": 4.477944933295487e-05, "loss": 0.00016013886779546737, "step": 194620 }, { "epoch": 55.245529378370705, "grad_norm": 0.08853617310523987, "learning_rate": 4.477661084303151e-05, "loss": 0.0008459886536002159, "step": 194630 }, { "epoch": 55.24836786829407, "grad_norm": 0.22771918773651123, "learning_rate": 4.477377235310815e-05, "loss": 0.0008335186168551445, "step": 194640 }, { "epoch": 55.25120635821743, "grad_norm": 1.9578229188919067, "learning_rate": 4.477093386318479e-05, "loss": 0.000704212486743927, "step": 194650 }, { "epoch": 55.254044848140786, "grad_norm": 0.05577240139245987, "learning_rate": 4.476809537326143e-05, "loss": 0.0016048729419708252, "step": 194660 }, { "epoch": 55.25688333806415, "grad_norm": 4.391153812408447, "learning_rate": 4.476525688333806e-05, "loss": 0.004118426144123078, "step": 194670 }, { "epoch": 55.25972182798751, "grad_norm": 0.04717998951673508, "learning_rate": 4.4762418393414704e-05, "loss": 0.0013465330004692078, "step": 194680 }, { "epoch": 55.26256031791087, "grad_norm": 0.10761284828186035, "learning_rate": 4.4759579903491346e-05, "loss": 0.00024326369166374206, "step": 194690 }, { "epoch": 55.26539880783423, "grad_norm": 0.5529335737228394, "learning_rate": 4.475674141356798e-05, "loss": 0.00041960831731557846, "step": 194700 }, { "epoch": 55.268237297757594, "grad_norm": 7.546971321105957, "learning_rate": 4.475390292364462e-05, "loss": 0.00722113847732544, "step": 194710 }, { "epoch": 55.27107578768096, "grad_norm": 1.4880553483963013, "learning_rate": 4.475106443372126e-05, "loss": 0.002985895797610283, "step": 194720 }, { "epoch": 55.27391427760431, "grad_norm": 0.02400212362408638, "learning_rate": 4.47482259437979e-05, "loss": 0.002765130251646042, "step": 194730 }, { "epoch": 55.276752767527675, "grad_norm": 0.5492367148399353, "learning_rate": 4.4745387453874546e-05, "loss": 0.004885012656450272, "step": 194740 }, { "epoch": 55.27959125745104, "grad_norm": 0.005639590322971344, "learning_rate": 4.474254896395118e-05, "loss": 0.001282562129199505, "step": 194750 }, { "epoch": 55.282429747374394, "grad_norm": 0.036139264702796936, "learning_rate": 4.4739710474027815e-05, "loss": 0.0017902815714478492, "step": 194760 }, { "epoch": 55.28526823729776, "grad_norm": 0.22221988439559937, "learning_rate": 4.4736871984104456e-05, "loss": 0.0009003894403576851, "step": 194770 }, { "epoch": 55.28810672722112, "grad_norm": 0.7639148235321045, "learning_rate": 4.47340334941811e-05, "loss": 0.006559325754642487, "step": 194780 }, { "epoch": 55.29094521714448, "grad_norm": 2.7254087924957275, "learning_rate": 4.473119500425774e-05, "loss": 0.002880456857383251, "step": 194790 }, { "epoch": 55.29378370706784, "grad_norm": 0.021778477355837822, "learning_rate": 4.4728356514334374e-05, "loss": 0.0050011370331048965, "step": 194800 }, { "epoch": 55.2966221969912, "grad_norm": 2.0462985038757324, "learning_rate": 4.4725518024411015e-05, "loss": 0.004820814728736878, "step": 194810 }, { "epoch": 55.299460686914564, "grad_norm": 0.06999637186527252, "learning_rate": 4.4722679534487656e-05, "loss": 0.014174938201904297, "step": 194820 }, { "epoch": 55.30229917683792, "grad_norm": 0.6805114150047302, "learning_rate": 4.471984104456429e-05, "loss": 0.0006172116845846176, "step": 194830 }, { "epoch": 55.30513766676128, "grad_norm": 0.04292476177215576, "learning_rate": 4.471700255464093e-05, "loss": 0.00014025941491127014, "step": 194840 }, { "epoch": 55.307976156684646, "grad_norm": 0.3916357457637787, "learning_rate": 4.4714164064717574e-05, "loss": 0.0018750939518213273, "step": 194850 }, { "epoch": 55.310814646608, "grad_norm": 0.028374608606100082, "learning_rate": 4.471132557479421e-05, "loss": 0.0034442182630300524, "step": 194860 }, { "epoch": 55.313653136531364, "grad_norm": 0.2437649965286255, "learning_rate": 4.470848708487085e-05, "loss": 0.0037118948996067045, "step": 194870 }, { "epoch": 55.31649162645473, "grad_norm": 0.038879092782735825, "learning_rate": 4.470564859494749e-05, "loss": 0.004486935585737229, "step": 194880 }, { "epoch": 55.31933011637809, "grad_norm": 0.017185673117637634, "learning_rate": 4.4702810105024126e-05, "loss": 0.007956235110759735, "step": 194890 }, { "epoch": 55.322168606301446, "grad_norm": 0.03125578165054321, "learning_rate": 4.469997161510077e-05, "loss": 0.0007298648357391358, "step": 194900 }, { "epoch": 55.32500709622481, "grad_norm": 18.30401611328125, "learning_rate": 4.469713312517741e-05, "loss": 0.016490407288074493, "step": 194910 }, { "epoch": 55.32784558614817, "grad_norm": 0.5029742121696472, "learning_rate": 4.469429463525405e-05, "loss": 0.0018033750355243682, "step": 194920 }, { "epoch": 55.33068407607153, "grad_norm": 1.7405054569244385, "learning_rate": 4.4691456145330684e-05, "loss": 0.003883466124534607, "step": 194930 }, { "epoch": 55.33352256599489, "grad_norm": 0.2938055396080017, "learning_rate": 4.4688617655407326e-05, "loss": 0.0013112051412463189, "step": 194940 }, { "epoch": 55.33636105591825, "grad_norm": 0.036934979259967804, "learning_rate": 4.468577916548397e-05, "loss": 0.0007288826629519463, "step": 194950 }, { "epoch": 55.33919954584161, "grad_norm": 12.580159187316895, "learning_rate": 4.46829406755606e-05, "loss": 0.004124692082405091, "step": 194960 }, { "epoch": 55.34203803576497, "grad_norm": 0.13031607866287231, "learning_rate": 4.468010218563724e-05, "loss": 0.004049822688102722, "step": 194970 }, { "epoch": 55.344876525688335, "grad_norm": 2.974677085876465, "learning_rate": 4.4677547544706216e-05, "loss": 0.007550232112407684, "step": 194980 }, { "epoch": 55.3477150156117, "grad_norm": 0.027909034863114357, "learning_rate": 4.467470905478286e-05, "loss": 0.0017822872847318648, "step": 194990 }, { "epoch": 55.35055350553505, "grad_norm": 0.3736148476600647, "learning_rate": 4.46718705648595e-05, "loss": 0.003762286901473999, "step": 195000 }, { "epoch": 55.35055350553505, "eval_accuracy": 0.9822598079735487, "eval_loss": 0.0765436440706253, "eval_runtime": 54.8785, "eval_samples_per_second": 286.578, "eval_steps_per_second": 4.483, "step": 195000 }, { "epoch": 55.353391995458416, "grad_norm": 0.023757915943861008, "learning_rate": 4.4669032074936134e-05, "loss": 0.00037448890507221224, "step": 195010 }, { "epoch": 55.35623048538178, "grad_norm": 0.042442694306373596, "learning_rate": 4.4666193585012775e-05, "loss": 0.00419374592602253, "step": 195020 }, { "epoch": 55.359068975305135, "grad_norm": 0.013135523535311222, "learning_rate": 4.4663355095089416e-05, "loss": 0.00021758526563644409, "step": 195030 }, { "epoch": 55.3619074652285, "grad_norm": 0.02325598895549774, "learning_rate": 4.466051660516605e-05, "loss": 0.00014998335391283035, "step": 195040 }, { "epoch": 55.36474595515186, "grad_norm": 0.07941745966672897, "learning_rate": 4.465767811524269e-05, "loss": 0.002583748660981655, "step": 195050 }, { "epoch": 55.36758444507522, "grad_norm": 0.015485280193388462, "learning_rate": 4.4654839625319334e-05, "loss": 0.0003481073305010796, "step": 195060 }, { "epoch": 55.37042293499858, "grad_norm": 0.5656043887138367, "learning_rate": 4.465200113539597e-05, "loss": 0.001855204626917839, "step": 195070 }, { "epoch": 55.37326142492194, "grad_norm": 0.0023959644604474306, "learning_rate": 4.464916264547261e-05, "loss": 0.0005655964836478233, "step": 195080 }, { "epoch": 55.376099914845305, "grad_norm": 0.4102248251438141, "learning_rate": 4.464632415554925e-05, "loss": 0.0003956679254770279, "step": 195090 }, { "epoch": 55.37893840476866, "grad_norm": 0.04580170661211014, "learning_rate": 4.464348566562589e-05, "loss": 0.0006984058767557144, "step": 195100 }, { "epoch": 55.381776894692024, "grad_norm": 0.01211943756788969, "learning_rate": 4.464064717570253e-05, "loss": 0.0005425501614809037, "step": 195110 }, { "epoch": 55.38461538461539, "grad_norm": 0.020317403599619865, "learning_rate": 4.463780868577917e-05, "loss": 0.0010394573211669922, "step": 195120 }, { "epoch": 55.38745387453874, "grad_norm": 0.5168998837471008, "learning_rate": 4.463497019585581e-05, "loss": 0.0021314308047294616, "step": 195130 }, { "epoch": 55.390292364462105, "grad_norm": 0.01044557150453329, "learning_rate": 4.4632131705932444e-05, "loss": 0.0013287633657455445, "step": 195140 }, { "epoch": 55.39313085438547, "grad_norm": 0.025776289403438568, "learning_rate": 4.4629293216009086e-05, "loss": 4.3985247611999514e-05, "step": 195150 }, { "epoch": 55.395969344308824, "grad_norm": 0.11383198201656342, "learning_rate": 4.462645472608573e-05, "loss": 0.00017238818109035491, "step": 195160 }, { "epoch": 55.39880783423219, "grad_norm": 0.0010264507727697492, "learning_rate": 4.462361623616236e-05, "loss": 0.0003459060564637184, "step": 195170 }, { "epoch": 55.40164632415555, "grad_norm": 0.007911855354905128, "learning_rate": 4.4620777746239e-05, "loss": 0.00039393678307533266, "step": 195180 }, { "epoch": 55.40448481407891, "grad_norm": 0.02232002280652523, "learning_rate": 4.4617939256315645e-05, "loss": 0.010775182396173477, "step": 195190 }, { "epoch": 55.40732330400227, "grad_norm": 0.10130492597818375, "learning_rate": 4.461510076639228e-05, "loss": 0.011732381582260133, "step": 195200 }, { "epoch": 55.41016179392563, "grad_norm": 0.007879002951085567, "learning_rate": 4.461226227646892e-05, "loss": 0.0033590834587812423, "step": 195210 }, { "epoch": 55.413000283848994, "grad_norm": 0.020494816824793816, "learning_rate": 4.460942378654556e-05, "loss": 0.0003485189750790596, "step": 195220 }, { "epoch": 55.41583877377235, "grad_norm": 0.010163416154682636, "learning_rate": 4.4606585296622197e-05, "loss": 0.003259432315826416, "step": 195230 }, { "epoch": 55.41867726369571, "grad_norm": 0.21580550074577332, "learning_rate": 4.460374680669884e-05, "loss": 0.0020579153671860693, "step": 195240 }, { "epoch": 55.421515753619076, "grad_norm": 1.6976350545883179, "learning_rate": 4.460090831677547e-05, "loss": 0.0004126805812120438, "step": 195250 }, { "epoch": 55.42435424354244, "grad_norm": 0.01854328252375126, "learning_rate": 4.459806982685212e-05, "loss": 0.016163945198059082, "step": 195260 }, { "epoch": 55.427192733465795, "grad_norm": 0.023635270074009895, "learning_rate": 4.4595231336928755e-05, "loss": 0.0015143029391765594, "step": 195270 }, { "epoch": 55.43003122338916, "grad_norm": 0.0947963297367096, "learning_rate": 4.459239284700539e-05, "loss": 0.0007087217643857002, "step": 195280 }, { "epoch": 55.43286971331252, "grad_norm": 0.016464749351143837, "learning_rate": 4.458955435708204e-05, "loss": 0.0009771136566996575, "step": 195290 }, { "epoch": 55.435708203235876, "grad_norm": 0.0026092552579939365, "learning_rate": 4.458671586715867e-05, "loss": 0.008669708669185639, "step": 195300 }, { "epoch": 55.43854669315924, "grad_norm": 0.015170138329267502, "learning_rate": 4.4583877377235314e-05, "loss": 0.00036119110882282257, "step": 195310 }, { "epoch": 55.4413851830826, "grad_norm": 0.024473780766129494, "learning_rate": 4.4581038887311955e-05, "loss": 0.0055530272424221035, "step": 195320 }, { "epoch": 55.44422367300596, "grad_norm": 0.20320990681648254, "learning_rate": 4.457820039738859e-05, "loss": 0.0015645332634449006, "step": 195330 }, { "epoch": 55.44706216292932, "grad_norm": 0.07925903052091599, "learning_rate": 4.457536190746523e-05, "loss": 0.00044185109436511993, "step": 195340 }, { "epoch": 55.44990065285268, "grad_norm": 0.019409658387303352, "learning_rate": 4.4572523417541866e-05, "loss": 0.008825599402189254, "step": 195350 }, { "epoch": 55.452739142776046, "grad_norm": 0.009372581727802753, "learning_rate": 4.456968492761851e-05, "loss": 0.00035075489431619645, "step": 195360 }, { "epoch": 55.4555776326994, "grad_norm": 0.03951925039291382, "learning_rate": 4.456684643769515e-05, "loss": 0.00040906909853219984, "step": 195370 }, { "epoch": 55.458416122622765, "grad_norm": 0.014313840307295322, "learning_rate": 4.456400794777178e-05, "loss": 0.00014572571963071822, "step": 195380 }, { "epoch": 55.46125461254613, "grad_norm": 0.3686244487762451, "learning_rate": 4.456116945784843e-05, "loss": 0.0010678844526410102, "step": 195390 }, { "epoch": 55.464093102469484, "grad_norm": 0.008845273405313492, "learning_rate": 4.4558330967925066e-05, "loss": 7.430259138345719e-05, "step": 195400 }, { "epoch": 55.46693159239285, "grad_norm": 0.009329921565949917, "learning_rate": 4.45554924780017e-05, "loss": 0.015455906093120576, "step": 195410 }, { "epoch": 55.46977008231621, "grad_norm": 0.00841832347214222, "learning_rate": 4.455265398807835e-05, "loss": 0.0009765218943357468, "step": 195420 }, { "epoch": 55.472608572239565, "grad_norm": 0.0644708201289177, "learning_rate": 4.454981549815498e-05, "loss": 0.0008693991228938102, "step": 195430 }, { "epoch": 55.47544706216293, "grad_norm": 1.2290140390396118, "learning_rate": 4.4546977008231625e-05, "loss": 0.00392555445432663, "step": 195440 }, { "epoch": 55.47828555208629, "grad_norm": 0.027846165001392365, "learning_rate": 4.4544138518308266e-05, "loss": 0.0005967730656266212, "step": 195450 }, { "epoch": 55.481124042009654, "grad_norm": 0.06370195746421814, "learning_rate": 4.45413000283849e-05, "loss": 0.0007418407127261162, "step": 195460 }, { "epoch": 55.48396253193301, "grad_norm": 0.10293885320425034, "learning_rate": 4.453846153846154e-05, "loss": 0.014577478170394897, "step": 195470 }, { "epoch": 55.48680102185637, "grad_norm": 0.5985180139541626, "learning_rate": 4.453562304853818e-05, "loss": 0.0006462845951318741, "step": 195480 }, { "epoch": 55.489639511779735, "grad_norm": 0.8853315114974976, "learning_rate": 4.453278455861482e-05, "loss": 0.0004788957536220551, "step": 195490 }, { "epoch": 55.49247800170309, "grad_norm": 0.012526878155767918, "learning_rate": 4.452994606869146e-05, "loss": 0.00021288841962814332, "step": 195500 }, { "epoch": 55.49247800170309, "eval_accuracy": 0.9832135817384117, "eval_loss": 0.0677262470126152, "eval_runtime": 51.2973, "eval_samples_per_second": 306.585, "eval_steps_per_second": 4.796, "step": 195500 }, { "epoch": 55.495316491626454, "grad_norm": 0.004277139902114868, "learning_rate": 4.4527107578768094e-05, "loss": 0.0037710674107074736, "step": 195510 }, { "epoch": 55.49815498154982, "grad_norm": 0.016556253656744957, "learning_rate": 4.452426908884474e-05, "loss": 7.80690461397171e-05, "step": 195520 }, { "epoch": 55.50099347147318, "grad_norm": 0.03364359959959984, "learning_rate": 4.452143059892138e-05, "loss": 0.004102124273777008, "step": 195530 }, { "epoch": 55.503831961396536, "grad_norm": 0.053162332624197006, "learning_rate": 4.451859210899801e-05, "loss": 0.0003471529111266136, "step": 195540 }, { "epoch": 55.5066704513199, "grad_norm": 0.022733818739652634, "learning_rate": 4.451575361907466e-05, "loss": 0.0006468323990702629, "step": 195550 }, { "epoch": 55.50950894124326, "grad_norm": 0.2758643329143524, "learning_rate": 4.4512915129151294e-05, "loss": 0.0004995429888367652, "step": 195560 }, { "epoch": 55.51234743116662, "grad_norm": 0.20630759000778198, "learning_rate": 4.4510076639227935e-05, "loss": 0.001885942742228508, "step": 195570 }, { "epoch": 55.51518592108998, "grad_norm": 0.1526523381471634, "learning_rate": 4.450723814930457e-05, "loss": 0.006911285221576691, "step": 195580 }, { "epoch": 55.51802441101334, "grad_norm": 0.053299497812986374, "learning_rate": 4.450439965938121e-05, "loss": 0.0003657367080450058, "step": 195590 }, { "epoch": 55.5208629009367, "grad_norm": 0.43112602829933167, "learning_rate": 4.450156116945785e-05, "loss": 0.00254063680768013, "step": 195600 }, { "epoch": 55.52370139086006, "grad_norm": 0.05777968093752861, "learning_rate": 4.449872267953449e-05, "loss": 0.0007688671350479126, "step": 195610 }, { "epoch": 55.526539880783425, "grad_norm": 0.029163138940930367, "learning_rate": 4.449588418961113e-05, "loss": 0.0016445601359009743, "step": 195620 }, { "epoch": 55.52937837070679, "grad_norm": 0.001979414140805602, "learning_rate": 4.449304569968777e-05, "loss": 0.003008873201906681, "step": 195630 }, { "epoch": 55.53221686063014, "grad_norm": 0.5818972587585449, "learning_rate": 4.4490207209764405e-05, "loss": 0.0004702882841229439, "step": 195640 }, { "epoch": 55.535055350553506, "grad_norm": 0.08175569772720337, "learning_rate": 4.4487368719841046e-05, "loss": 0.000550004467368126, "step": 195650 }, { "epoch": 55.53789384047687, "grad_norm": 0.8184710741043091, "learning_rate": 4.448453022991769e-05, "loss": 0.0003374312072992325, "step": 195660 }, { "epoch": 55.540732330400225, "grad_norm": 0.018583355471491814, "learning_rate": 4.448169173999432e-05, "loss": 9.783394634723663e-05, "step": 195670 }, { "epoch": 55.54357082032359, "grad_norm": 0.006773182190954685, "learning_rate": 4.4478853250070964e-05, "loss": 0.000791669636964798, "step": 195680 }, { "epoch": 55.54640931024695, "grad_norm": 0.060331933200359344, "learning_rate": 4.4476014760147605e-05, "loss": 0.00018601585179567337, "step": 195690 }, { "epoch": 55.549247800170306, "grad_norm": 0.0629505142569542, "learning_rate": 4.447317627022424e-05, "loss": 0.0001453833654522896, "step": 195700 }, { "epoch": 55.55208629009367, "grad_norm": 0.012749810703098774, "learning_rate": 4.447033778030088e-05, "loss": 0.0011129019781947135, "step": 195710 }, { "epoch": 55.55492478001703, "grad_norm": 0.010168912820518017, "learning_rate": 4.446749929037752e-05, "loss": 0.0005175001919269562, "step": 195720 }, { "epoch": 55.557763269940395, "grad_norm": 0.17671087384223938, "learning_rate": 4.4464660800454164e-05, "loss": 0.0007771745324134827, "step": 195730 }, { "epoch": 55.56060175986375, "grad_norm": 0.22832632064819336, "learning_rate": 4.44618223105308e-05, "loss": 0.0003432411700487137, "step": 195740 }, { "epoch": 55.563440249787114, "grad_norm": 0.025459526106715202, "learning_rate": 4.445898382060744e-05, "loss": 0.0011042848229408265, "step": 195750 }, { "epoch": 55.56627873971048, "grad_norm": 0.05860445648431778, "learning_rate": 4.445614533068408e-05, "loss": 0.0005949411541223526, "step": 195760 }, { "epoch": 55.56911722963383, "grad_norm": 0.25145792961120605, "learning_rate": 4.4453306840760716e-05, "loss": 0.00017481371760368346, "step": 195770 }, { "epoch": 55.571955719557195, "grad_norm": 0.0038857050240039825, "learning_rate": 4.445046835083736e-05, "loss": 0.0002806277945637703, "step": 195780 }, { "epoch": 55.57479420948056, "grad_norm": 0.07402122765779495, "learning_rate": 4.4447629860914e-05, "loss": 0.0004888132214546203, "step": 195790 }, { "epoch": 55.577632699403914, "grad_norm": 0.01219688355922699, "learning_rate": 4.444479137099063e-05, "loss": 0.00025747641921043397, "step": 195800 }, { "epoch": 55.58047118932728, "grad_norm": 0.0343656912446022, "learning_rate": 4.4441952881067274e-05, "loss": 0.007895450294017791, "step": 195810 }, { "epoch": 55.58330967925064, "grad_norm": 0.440844863653183, "learning_rate": 4.4439114391143916e-05, "loss": 0.006470087170600891, "step": 195820 }, { "epoch": 55.586148169174, "grad_norm": 1.339097023010254, "learning_rate": 4.443627590122055e-05, "loss": 0.00956808477640152, "step": 195830 }, { "epoch": 55.58898665909736, "grad_norm": 0.18000143766403198, "learning_rate": 4.443343741129719e-05, "loss": 0.0010519662871956825, "step": 195840 }, { "epoch": 55.59182514902072, "grad_norm": 0.5184979438781738, "learning_rate": 4.443059892137383e-05, "loss": 0.0012176375836133956, "step": 195850 }, { "epoch": 55.594663638944084, "grad_norm": 3.0974104404449463, "learning_rate": 4.4427760431450474e-05, "loss": 0.014044804871082306, "step": 195860 }, { "epoch": 55.59750212886744, "grad_norm": 0.012973460368812084, "learning_rate": 4.442492194152711e-05, "loss": 0.0002634810283780098, "step": 195870 }, { "epoch": 55.6003406187908, "grad_norm": 0.004745359066873789, "learning_rate": 4.4422083451603744e-05, "loss": 0.0009549122303724289, "step": 195880 }, { "epoch": 55.603179108714166, "grad_norm": 0.009191228076815605, "learning_rate": 4.441924496168039e-05, "loss": 0.0004607627168297768, "step": 195890 }, { "epoch": 55.60601759863752, "grad_norm": 0.01231689564883709, "learning_rate": 4.4416406471757026e-05, "loss": 0.00027777738869190217, "step": 195900 }, { "epoch": 55.608856088560884, "grad_norm": 0.04279909282922745, "learning_rate": 4.441356798183367e-05, "loss": 0.0007174272090196609, "step": 195910 }, { "epoch": 55.61169457848425, "grad_norm": 0.0185853261500597, "learning_rate": 4.441072949191031e-05, "loss": 0.00015005096793174744, "step": 195920 }, { "epoch": 55.61453306840761, "grad_norm": 0.03333339840173721, "learning_rate": 4.4407891001986944e-05, "loss": 0.0001995038241147995, "step": 195930 }, { "epoch": 55.617371558330966, "grad_norm": 0.795214056968689, "learning_rate": 4.4405052512063585e-05, "loss": 0.00036151353269815446, "step": 195940 }, { "epoch": 55.62021004825433, "grad_norm": 0.00816095620393753, "learning_rate": 4.4402214022140226e-05, "loss": 0.00010197665542364121, "step": 195950 }, { "epoch": 55.62304853817769, "grad_norm": 0.4671464264392853, "learning_rate": 4.439937553221686e-05, "loss": 0.0017955891788005828, "step": 195960 }, { "epoch": 55.62588702810105, "grad_norm": 0.1585160791873932, "learning_rate": 4.43965370422935e-05, "loss": 0.00027977656573057174, "step": 195970 }, { "epoch": 55.62872551802441, "grad_norm": 0.16842274367809296, "learning_rate": 4.439369855237014e-05, "loss": 0.0001930026337504387, "step": 195980 }, { "epoch": 55.63156400794777, "grad_norm": 0.016167251393198967, "learning_rate": 4.4390860062446785e-05, "loss": 0.00023186393082141876, "step": 195990 }, { "epoch": 55.634402497871136, "grad_norm": 0.13466334342956543, "learning_rate": 4.438802157252342e-05, "loss": 0.0014369046315550805, "step": 196000 }, { "epoch": 55.634402497871136, "eval_accuracy": 0.9816875437146309, "eval_loss": 0.07295652478933334, "eval_runtime": 43.7596, "eval_samples_per_second": 359.395, "eval_steps_per_second": 5.622, "step": 196000 }, { "epoch": 55.63724098779449, "grad_norm": 0.014339206740260124, "learning_rate": 4.4385183082600054e-05, "loss": 0.0024510690942406653, "step": 196010 }, { "epoch": 55.640079477717855, "grad_norm": 0.5333653092384338, "learning_rate": 4.43823445926767e-05, "loss": 0.006219533830881118, "step": 196020 }, { "epoch": 55.64291796764122, "grad_norm": 10.6636381149292, "learning_rate": 4.437950610275334e-05, "loss": 0.018538498878479005, "step": 196030 }, { "epoch": 55.64575645756457, "grad_norm": 0.038209520280361176, "learning_rate": 4.437666761282998e-05, "loss": 0.0005813699215650558, "step": 196040 }, { "epoch": 55.648594947487936, "grad_norm": 0.7077522873878479, "learning_rate": 4.437382912290662e-05, "loss": 0.0027345724403858183, "step": 196050 }, { "epoch": 55.6514334374113, "grad_norm": 0.08470551669597626, "learning_rate": 4.4370990632983254e-05, "loss": 0.00038325246423482896, "step": 196060 }, { "epoch": 55.654271927334655, "grad_norm": 0.3651424050331116, "learning_rate": 4.4368152143059896e-05, "loss": 0.0006201427429914474, "step": 196070 }, { "epoch": 55.65711041725802, "grad_norm": 4.159010887145996, "learning_rate": 4.436531365313653e-05, "loss": 0.0014470621943473815, "step": 196080 }, { "epoch": 55.65994890718138, "grad_norm": 0.017386550083756447, "learning_rate": 4.436247516321317e-05, "loss": 0.0005401700735092163, "step": 196090 }, { "epoch": 55.66278739710474, "grad_norm": 0.4847719371318817, "learning_rate": 4.435963667328981e-05, "loss": 0.0005089074373245239, "step": 196100 }, { "epoch": 55.6656258870281, "grad_norm": 0.025495506823062897, "learning_rate": 4.435679818336645e-05, "loss": 0.00014065206050872803, "step": 196110 }, { "epoch": 55.66846437695146, "grad_norm": 0.007550947368144989, "learning_rate": 4.435395969344309e-05, "loss": 0.00026952717453241346, "step": 196120 }, { "epoch": 55.671302866874825, "grad_norm": 0.08512173593044281, "learning_rate": 4.435112120351973e-05, "loss": 0.00017672460526227952, "step": 196130 }, { "epoch": 55.67414135679818, "grad_norm": 0.015879260376095772, "learning_rate": 4.4348282713596365e-05, "loss": 0.0002460937947034836, "step": 196140 }, { "epoch": 55.676979846721544, "grad_norm": 0.24115817248821259, "learning_rate": 4.434544422367301e-05, "loss": 0.00023420080542564393, "step": 196150 }, { "epoch": 55.67981833664491, "grad_norm": 0.004243307281285524, "learning_rate": 4.434260573374965e-05, "loss": 0.00012163519859313965, "step": 196160 }, { "epoch": 55.68265682656826, "grad_norm": 0.09291132539510727, "learning_rate": 4.433976724382628e-05, "loss": 0.0011517027392983436, "step": 196170 }, { "epoch": 55.685495316491625, "grad_norm": 0.021319884806871414, "learning_rate": 4.4336928753902924e-05, "loss": 0.0011739104986190796, "step": 196180 }, { "epoch": 55.68833380641499, "grad_norm": 0.08847673237323761, "learning_rate": 4.4334090263979565e-05, "loss": 0.0007363289594650268, "step": 196190 }, { "epoch": 55.69117229633835, "grad_norm": 0.10774095356464386, "learning_rate": 4.4331251774056207e-05, "loss": 0.0006121618673205376, "step": 196200 }, { "epoch": 55.69401078626171, "grad_norm": 0.4457453489303589, "learning_rate": 4.432841328413284e-05, "loss": 0.0011502973735332488, "step": 196210 }, { "epoch": 55.69684927618507, "grad_norm": 1.9655834436416626, "learning_rate": 4.432557479420948e-05, "loss": 0.0016965391114354134, "step": 196220 }, { "epoch": 55.69968776610843, "grad_norm": 0.8238658308982849, "learning_rate": 4.4322736304286124e-05, "loss": 0.016632293164730073, "step": 196230 }, { "epoch": 55.70252625603179, "grad_norm": 0.02391999587416649, "learning_rate": 4.431989781436276e-05, "loss": 0.00027452707290649415, "step": 196240 }, { "epoch": 55.70536474595515, "grad_norm": 0.00316080404445529, "learning_rate": 4.43170593244394e-05, "loss": 0.002337997406721115, "step": 196250 }, { "epoch": 55.708203235878514, "grad_norm": 0.04705654829740524, "learning_rate": 4.431422083451604e-05, "loss": 0.0036091230809688567, "step": 196260 }, { "epoch": 55.71104172580187, "grad_norm": 0.1685914397239685, "learning_rate": 4.4311382344592676e-05, "loss": 0.0005366230383515358, "step": 196270 }, { "epoch": 55.71388021572523, "grad_norm": 0.0018450048519298434, "learning_rate": 4.430854385466932e-05, "loss": 0.0006835192441940307, "step": 196280 }, { "epoch": 55.716718705648596, "grad_norm": 1.0380218029022217, "learning_rate": 4.430570536474596e-05, "loss": 0.0005008533596992492, "step": 196290 }, { "epoch": 55.71955719557196, "grad_norm": 0.006679201498627663, "learning_rate": 4.430286687482259e-05, "loss": 0.002300572209060192, "step": 196300 }, { "epoch": 55.722395685495314, "grad_norm": 13.155611038208008, "learning_rate": 4.4300028384899235e-05, "loss": 0.007824069261550904, "step": 196310 }, { "epoch": 55.72523417541868, "grad_norm": 0.03588593006134033, "learning_rate": 4.4297189894975876e-05, "loss": 0.0006073309108614921, "step": 196320 }, { "epoch": 55.72807266534204, "grad_norm": 0.02677471935749054, "learning_rate": 4.429435140505252e-05, "loss": 0.0003228556364774704, "step": 196330 }, { "epoch": 55.730911155265396, "grad_norm": 0.11674758791923523, "learning_rate": 4.429151291512915e-05, "loss": 0.003599432110786438, "step": 196340 }, { "epoch": 55.73374964518876, "grad_norm": 2.3775994777679443, "learning_rate": 4.428867442520579e-05, "loss": 0.0011542508378624915, "step": 196350 }, { "epoch": 55.73658813511212, "grad_norm": 22.034006118774414, "learning_rate": 4.4285835935282435e-05, "loss": 0.005841448903083801, "step": 196360 }, { "epoch": 55.739426625035485, "grad_norm": 0.020805599167943, "learning_rate": 4.428299744535907e-05, "loss": 0.0015375517308712006, "step": 196370 }, { "epoch": 55.74226511495884, "grad_norm": 0.004847255535423756, "learning_rate": 4.428015895543571e-05, "loss": 7.790438830852508e-05, "step": 196380 }, { "epoch": 55.7451036048822, "grad_norm": 0.11942699551582336, "learning_rate": 4.427732046551235e-05, "loss": 0.0002386104315519333, "step": 196390 }, { "epoch": 55.747942094805566, "grad_norm": 0.03265266865491867, "learning_rate": 4.427448197558899e-05, "loss": 0.0008118521422147751, "step": 196400 }, { "epoch": 55.75078058472892, "grad_norm": 0.04501074552536011, "learning_rate": 4.427164348566563e-05, "loss": 0.0003537330776453018, "step": 196410 }, { "epoch": 55.753619074652285, "grad_norm": 0.31819894909858704, "learning_rate": 4.426880499574227e-05, "loss": 0.00017409808933734893, "step": 196420 }, { "epoch": 55.75645756457565, "grad_norm": 0.018366165459156036, "learning_rate": 4.4265966505818904e-05, "loss": 0.0002445012331008911, "step": 196430 }, { "epoch": 55.759296054499, "grad_norm": 0.03831537067890167, "learning_rate": 4.4263128015895545e-05, "loss": 0.000222061388194561, "step": 196440 }, { "epoch": 55.762134544422366, "grad_norm": 0.009816695004701614, "learning_rate": 4.426028952597219e-05, "loss": 0.00022650305181741713, "step": 196450 }, { "epoch": 55.76497303434573, "grad_norm": 0.05060211569070816, "learning_rate": 4.425745103604883e-05, "loss": 0.0002873556688427925, "step": 196460 }, { "epoch": 55.76781152426909, "grad_norm": 0.033522047102451324, "learning_rate": 4.425461254612546e-05, "loss": 0.00023836139589548112, "step": 196470 }, { "epoch": 55.77065001419245, "grad_norm": 0.013183526694774628, "learning_rate": 4.42517740562021e-05, "loss": 0.0003687251359224319, "step": 196480 }, { "epoch": 55.77348850411581, "grad_norm": 0.015441070310771465, "learning_rate": 4.4248935566278745e-05, "loss": 0.017087490856647493, "step": 196490 }, { "epoch": 55.776326994039174, "grad_norm": 0.015289708971977234, "learning_rate": 4.424609707635538e-05, "loss": 0.0008404096588492393, "step": 196500 }, { "epoch": 55.776326994039174, "eval_accuracy": 0.9804158453614803, "eval_loss": 0.07951135188341141, "eval_runtime": 53.0355, "eval_samples_per_second": 296.537, "eval_steps_per_second": 4.638, "step": 196500 }, { "epoch": 55.77916548396253, "grad_norm": 0.021557379513978958, "learning_rate": 4.424325858643202e-05, "loss": 0.002095990814268589, "step": 196510 }, { "epoch": 55.78200397388589, "grad_norm": 0.0063910819590091705, "learning_rate": 4.424042009650866e-05, "loss": 0.0003084756433963776, "step": 196520 }, { "epoch": 55.784842463809255, "grad_norm": 0.002911107847467065, "learning_rate": 4.42375816065853e-05, "loss": 0.0007454650476574898, "step": 196530 }, { "epoch": 55.78768095373261, "grad_norm": 0.3421761691570282, "learning_rate": 4.423474311666194e-05, "loss": 0.0033546369522809982, "step": 196540 }, { "epoch": 55.790519443655974, "grad_norm": 0.12013914436101913, "learning_rate": 4.423190462673858e-05, "loss": 0.005099375545978546, "step": 196550 }, { "epoch": 55.79335793357934, "grad_norm": 0.047236714512109756, "learning_rate": 4.4229066136815215e-05, "loss": 0.00012446828186511992, "step": 196560 }, { "epoch": 55.7961964235027, "grad_norm": 0.003503880463540554, "learning_rate": 4.4226227646891856e-05, "loss": 0.000690830871462822, "step": 196570 }, { "epoch": 55.799034913426055, "grad_norm": 0.41110873222351074, "learning_rate": 4.422338915696849e-05, "loss": 0.00042043942958116534, "step": 196580 }, { "epoch": 55.80187340334942, "grad_norm": 0.013458204455673695, "learning_rate": 4.422055066704513e-05, "loss": 0.0007535373792052269, "step": 196590 }, { "epoch": 55.80471189327278, "grad_norm": 0.0062384894117712975, "learning_rate": 4.4217712177121773e-05, "loss": 0.006015152111649514, "step": 196600 }, { "epoch": 55.80755038319614, "grad_norm": 0.031165968626737595, "learning_rate": 4.421487368719841e-05, "loss": 0.0009334642440080642, "step": 196610 }, { "epoch": 55.8103888731195, "grad_norm": 0.17681315541267395, "learning_rate": 4.4212035197275056e-05, "loss": 0.0009269390255212783, "step": 196620 }, { "epoch": 55.81322736304286, "grad_norm": 0.06713367998600006, "learning_rate": 4.420919670735169e-05, "loss": 0.00017329324036836625, "step": 196630 }, { "epoch": 55.81606585296622, "grad_norm": 0.00806956272572279, "learning_rate": 4.4206358217428325e-05, "loss": 0.0012783430516719817, "step": 196640 }, { "epoch": 55.81890434288958, "grad_norm": 0.07543978095054626, "learning_rate": 4.4203519727504974e-05, "loss": 0.00019868072122335433, "step": 196650 }, { "epoch": 55.821742832812944, "grad_norm": 0.5272852778434753, "learning_rate": 4.420068123758161e-05, "loss": 0.0007408400997519493, "step": 196660 }, { "epoch": 55.82458132273631, "grad_norm": 1.1839450597763062, "learning_rate": 4.419784274765825e-05, "loss": 0.0008226940408349037, "step": 196670 }, { "epoch": 55.82741981265966, "grad_norm": 0.10093621164560318, "learning_rate": 4.4195004257734884e-05, "loss": 6.655901670455932e-05, "step": 196680 }, { "epoch": 55.830258302583026, "grad_norm": 0.015493909828364849, "learning_rate": 4.4192165767811526e-05, "loss": 0.0024442946538329124, "step": 196690 }, { "epoch": 55.83309679250639, "grad_norm": 0.012169396504759789, "learning_rate": 4.418932727788817e-05, "loss": 0.0017765145748853683, "step": 196700 }, { "epoch": 55.835935282429745, "grad_norm": 0.03588740527629852, "learning_rate": 4.41864887879648e-05, "loss": 9.153615683317184e-05, "step": 196710 }, { "epoch": 55.83877377235311, "grad_norm": 1.4265331029891968, "learning_rate": 4.418365029804144e-05, "loss": 0.0005217345431447029, "step": 196720 }, { "epoch": 55.84161226227647, "grad_norm": 0.05049467459321022, "learning_rate": 4.4180811808118084e-05, "loss": 0.0001984803006052971, "step": 196730 }, { "epoch": 55.84445075219983, "grad_norm": 0.08259903639554977, "learning_rate": 4.417797331819472e-05, "loss": 0.00282609760761261, "step": 196740 }, { "epoch": 55.84728924212319, "grad_norm": 0.03974974527955055, "learning_rate": 4.417513482827137e-05, "loss": 0.0001546451821923256, "step": 196750 }, { "epoch": 55.85012773204655, "grad_norm": 0.01480604987591505, "learning_rate": 4.4172296338348e-05, "loss": 0.00018654093146324157, "step": 196760 }, { "epoch": 55.852966221969915, "grad_norm": 0.00953599065542221, "learning_rate": 4.4169457848424636e-05, "loss": 0.0003697158768773079, "step": 196770 }, { "epoch": 55.85580471189327, "grad_norm": 0.0982782170176506, "learning_rate": 4.4166619358501284e-05, "loss": 0.0001032814383506775, "step": 196780 }, { "epoch": 55.85864320181663, "grad_norm": 0.014882607385516167, "learning_rate": 4.416378086857792e-05, "loss": 0.000308988057076931, "step": 196790 }, { "epoch": 55.861481691739996, "grad_norm": 0.018242990598082542, "learning_rate": 4.416094237865456e-05, "loss": 0.003091609477996826, "step": 196800 }, { "epoch": 55.86432018166335, "grad_norm": 0.025441473349928856, "learning_rate": 4.4158103888731195e-05, "loss": 0.0005348697304725647, "step": 196810 }, { "epoch": 55.867158671586715, "grad_norm": 0.041567008942365646, "learning_rate": 4.4155265398807836e-05, "loss": 0.0003047395497560501, "step": 196820 }, { "epoch": 55.86999716151008, "grad_norm": 0.04043574631214142, "learning_rate": 4.415242690888448e-05, "loss": 0.0020376672968268395, "step": 196830 }, { "epoch": 55.87283565143344, "grad_norm": 0.0655561089515686, "learning_rate": 4.414958841896111e-05, "loss": 0.00045767873525619506, "step": 196840 }, { "epoch": 55.8756741413568, "grad_norm": 0.07422874122858047, "learning_rate": 4.4146749929037754e-05, "loss": 0.0004155099391937256, "step": 196850 }, { "epoch": 55.87851263128016, "grad_norm": 0.03921515867114067, "learning_rate": 4.4143911439114395e-05, "loss": 0.00015900880098342895, "step": 196860 }, { "epoch": 55.88135112120352, "grad_norm": 5.791692733764648, "learning_rate": 4.414107294919103e-05, "loss": 0.0012139400467276573, "step": 196870 }, { "epoch": 55.88418961112688, "grad_norm": 0.02007434330880642, "learning_rate": 4.413823445926768e-05, "loss": 0.00012686308473348619, "step": 196880 }, { "epoch": 55.88702810105024, "grad_norm": 0.0481533482670784, "learning_rate": 4.413539596934431e-05, "loss": 0.0013536466285586357, "step": 196890 }, { "epoch": 55.889866590973604, "grad_norm": 0.05327156186103821, "learning_rate": 4.413255747942095e-05, "loss": 0.00020508263260126113, "step": 196900 }, { "epoch": 55.89270508089696, "grad_norm": 22.54557228088379, "learning_rate": 4.412971898949759e-05, "loss": 0.0059399716556072235, "step": 196910 }, { "epoch": 55.89554357082032, "grad_norm": 0.2598065733909607, "learning_rate": 4.412688049957423e-05, "loss": 0.0019242463633418084, "step": 196920 }, { "epoch": 55.898382060743685, "grad_norm": 0.2713048458099365, "learning_rate": 4.412404200965087e-05, "loss": 0.0005001241341233253, "step": 196930 }, { "epoch": 55.90122055066705, "grad_norm": 0.027322858572006226, "learning_rate": 4.4121203519727506e-05, "loss": 0.00014628414064645767, "step": 196940 }, { "epoch": 55.904059040590404, "grad_norm": 0.0010179419768974185, "learning_rate": 4.411836502980415e-05, "loss": 0.00427926704287529, "step": 196950 }, { "epoch": 55.90689753051377, "grad_norm": 1.9189693927764893, "learning_rate": 4.411552653988079e-05, "loss": 0.0011025862768292427, "step": 196960 }, { "epoch": 55.90973602043713, "grad_norm": 0.013417189009487629, "learning_rate": 4.411268804995742e-05, "loss": 0.001301615685224533, "step": 196970 }, { "epoch": 55.912574510360486, "grad_norm": 0.017411360517144203, "learning_rate": 4.4109849560034064e-05, "loss": 0.0005613379180431366, "step": 196980 }, { "epoch": 55.91541300028385, "grad_norm": 0.009184901602566242, "learning_rate": 4.4107011070110706e-05, "loss": 0.00019912850111722947, "step": 196990 }, { "epoch": 55.91825149020721, "grad_norm": 1.672029972076416, "learning_rate": 4.410417258018734e-05, "loss": 0.0004966497421264648, "step": 197000 }, { "epoch": 55.91825149020721, "eval_accuracy": 0.9842945253385896, "eval_loss": 0.06399284303188324, "eval_runtime": 40.4073, "eval_samples_per_second": 389.212, "eval_steps_per_second": 6.088, "step": 197000 }, { "epoch": 55.92108998013057, "grad_norm": 0.013070494867861271, "learning_rate": 4.410133409026398e-05, "loss": 6.25256448984146e-05, "step": 197010 }, { "epoch": 55.92392847005393, "grad_norm": 0.030026718974113464, "learning_rate": 4.409849560034062e-05, "loss": 0.00021469853818416594, "step": 197020 }, { "epoch": 55.92676695997729, "grad_norm": 0.7906685471534729, "learning_rate": 4.409565711041726e-05, "loss": 0.0007855705916881561, "step": 197030 }, { "epoch": 55.929605449900656, "grad_norm": 0.01074166502803564, "learning_rate": 4.40928186204939e-05, "loss": 0.0001175273209810257, "step": 197040 }, { "epoch": 55.93244393982401, "grad_norm": 0.13740628957748413, "learning_rate": 4.408998013057054e-05, "loss": 0.0004767082631587982, "step": 197050 }, { "epoch": 55.935282429747375, "grad_norm": 0.01692841574549675, "learning_rate": 4.4087141640647175e-05, "loss": 0.0011439187452197074, "step": 197060 }, { "epoch": 55.93812091967074, "grad_norm": 0.025303488597273827, "learning_rate": 4.4084303150723816e-05, "loss": 0.001860872469842434, "step": 197070 }, { "epoch": 55.94095940959409, "grad_norm": 6.001023292541504, "learning_rate": 4.408146466080046e-05, "loss": 0.0012865569442510605, "step": 197080 }, { "epoch": 55.943797899517456, "grad_norm": 2.399824380874634, "learning_rate": 4.40786261708771e-05, "loss": 0.00044643934816122053, "step": 197090 }, { "epoch": 55.94663638944082, "grad_norm": 0.010929466225206852, "learning_rate": 4.4075787680953734e-05, "loss": 0.0002171289175748825, "step": 197100 }, { "epoch": 55.949474879364175, "grad_norm": 0.014746841043233871, "learning_rate": 4.407294919103037e-05, "loss": 0.00023623388260602952, "step": 197110 }, { "epoch": 55.95231336928754, "grad_norm": 0.135015070438385, "learning_rate": 4.4070110701107017e-05, "loss": 0.0011438841000199317, "step": 197120 }, { "epoch": 55.9551518592109, "grad_norm": 0.0007474495214410126, "learning_rate": 4.406727221118365e-05, "loss": 0.0003593720495700836, "step": 197130 }, { "epoch": 55.95799034913426, "grad_norm": 0.020238656550645828, "learning_rate": 4.406443372126029e-05, "loss": 0.000325227715075016, "step": 197140 }, { "epoch": 55.96082883905762, "grad_norm": 0.07050474733114243, "learning_rate": 4.4061595231336934e-05, "loss": 0.00032708123326301574, "step": 197150 }, { "epoch": 55.96366732898098, "grad_norm": 0.005509276408702135, "learning_rate": 4.405875674141357e-05, "loss": 0.00015748478472232819, "step": 197160 }, { "epoch": 55.966505818904345, "grad_norm": 0.034700796008110046, "learning_rate": 4.405591825149021e-05, "loss": 0.0005282644182443619, "step": 197170 }, { "epoch": 55.9693443088277, "grad_norm": 0.03109235316514969, "learning_rate": 4.405307976156685e-05, "loss": 9.878724813461304e-05, "step": 197180 }, { "epoch": 55.972182798751064, "grad_norm": 11.959446907043457, "learning_rate": 4.4050241271643486e-05, "loss": 0.002447402477264404, "step": 197190 }, { "epoch": 55.97502128867443, "grad_norm": 0.01258369255810976, "learning_rate": 4.404740278172013e-05, "loss": 0.0007269816473126411, "step": 197200 }, { "epoch": 55.97785977859779, "grad_norm": 0.02710784040391445, "learning_rate": 4.404456429179676e-05, "loss": 0.0003687527030706406, "step": 197210 }, { "epoch": 55.980698268521145, "grad_norm": 0.005270322784781456, "learning_rate": 4.404172580187341e-05, "loss": 0.004638302326202393, "step": 197220 }, { "epoch": 55.98353675844451, "grad_norm": 0.005701661109924316, "learning_rate": 4.4038887311950045e-05, "loss": 0.0004634050652384758, "step": 197230 }, { "epoch": 55.98637524836787, "grad_norm": 0.011613554321229458, "learning_rate": 4.403604882202668e-05, "loss": 0.0004680270329117775, "step": 197240 }, { "epoch": 55.98921373829123, "grad_norm": 0.01729651540517807, "learning_rate": 4.403321033210333e-05, "loss": 9.647104889154434e-05, "step": 197250 }, { "epoch": 55.99205222821459, "grad_norm": 0.01139920111745596, "learning_rate": 4.403037184217996e-05, "loss": 0.00766889825463295, "step": 197260 }, { "epoch": 55.99489071813795, "grad_norm": 0.7092458605766296, "learning_rate": 4.40275333522566e-05, "loss": 0.0006305910646915436, "step": 197270 }, { "epoch": 55.99772920806131, "grad_norm": 0.007414695806801319, "learning_rate": 4.4024694862333245e-05, "loss": 0.00022681504487991332, "step": 197280 }, { "epoch": 56.00056769798467, "grad_norm": 0.004047184716910124, "learning_rate": 4.402185637240988e-05, "loss": 0.00032158833928406236, "step": 197290 }, { "epoch": 56.003406187908034, "grad_norm": 0.009253040887415409, "learning_rate": 4.401901788248652e-05, "loss": 6.005112081766129e-05, "step": 197300 }, { "epoch": 56.0062446778314, "grad_norm": 0.025909705087542534, "learning_rate": 4.4016179392563155e-05, "loss": 0.0008279070258140564, "step": 197310 }, { "epoch": 56.00908316775475, "grad_norm": 0.005667968187481165, "learning_rate": 4.4013340902639797e-05, "loss": 0.001235099509358406, "step": 197320 }, { "epoch": 56.011921657678116, "grad_norm": 0.009245979599654675, "learning_rate": 4.401050241271644e-05, "loss": 0.000119108147919178, "step": 197330 }, { "epoch": 56.01476014760148, "grad_norm": 0.1123918890953064, "learning_rate": 4.400766392279307e-05, "loss": 0.001118985190987587, "step": 197340 }, { "epoch": 56.017598637524834, "grad_norm": 0.04747099429368973, "learning_rate": 4.400482543286972e-05, "loss": 0.002441955730319023, "step": 197350 }, { "epoch": 56.0204371274482, "grad_norm": 0.07664189487695694, "learning_rate": 4.4001986942946355e-05, "loss": 0.005145514011383056, "step": 197360 }, { "epoch": 56.02327561737156, "grad_norm": 0.015117737464606762, "learning_rate": 4.399914845302299e-05, "loss": 0.0042982656508684155, "step": 197370 }, { "epoch": 56.026114107294916, "grad_norm": 0.8702237010002136, "learning_rate": 4.399630996309964e-05, "loss": 0.0012946177273988723, "step": 197380 }, { "epoch": 56.02895259721828, "grad_norm": 0.580944299697876, "learning_rate": 4.399347147317627e-05, "loss": 0.0012264838442206383, "step": 197390 }, { "epoch": 56.03179108714164, "grad_norm": 0.01528135221451521, "learning_rate": 4.3990632983252914e-05, "loss": 0.000802108459174633, "step": 197400 }, { "epoch": 56.034629577065004, "grad_norm": 0.009059195406734943, "learning_rate": 4.398779449332955e-05, "loss": 0.0008625952526926994, "step": 197410 }, { "epoch": 56.03746806698836, "grad_norm": 0.0288576427847147, "learning_rate": 4.398495600340619e-05, "loss": 0.0015732022002339363, "step": 197420 }, { "epoch": 56.04030655691172, "grad_norm": 10.090566635131836, "learning_rate": 4.398211751348283e-05, "loss": 0.010475891083478928, "step": 197430 }, { "epoch": 56.043145046835086, "grad_norm": 0.369086891412735, "learning_rate": 4.3979279023559466e-05, "loss": 0.0007627198472619056, "step": 197440 }, { "epoch": 56.04598353675844, "grad_norm": 0.024036051705479622, "learning_rate": 4.397644053363611e-05, "loss": 0.00023680441081523894, "step": 197450 }, { "epoch": 56.048822026681805, "grad_norm": 0.02544509992003441, "learning_rate": 4.397360204371275e-05, "loss": 0.0009199738502502442, "step": 197460 }, { "epoch": 56.05166051660517, "grad_norm": 0.011269173584878445, "learning_rate": 4.397076355378938e-05, "loss": 0.00019003953784704208, "step": 197470 }, { "epoch": 56.05449900652852, "grad_norm": 0.17354752123355865, "learning_rate": 4.3967925063866025e-05, "loss": 0.00048138517886400225, "step": 197480 }, { "epoch": 56.057337496451886, "grad_norm": 0.003213202813640237, "learning_rate": 4.3965086573942666e-05, "loss": 0.00034799259155988694, "step": 197490 }, { "epoch": 56.06017598637525, "grad_norm": 0.012725956737995148, "learning_rate": 4.39622480840193e-05, "loss": 0.015676915645599365, "step": 197500 }, { "epoch": 56.06017598637525, "eval_accuracy": 0.9823233928912062, "eval_loss": 0.07227962464094162, "eval_runtime": 53.0777, "eval_samples_per_second": 296.301, "eval_steps_per_second": 4.635, "step": 197500 }, { "epoch": 56.06301447629861, "grad_norm": 0.02387797273695469, "learning_rate": 4.395940959409594e-05, "loss": 9.156521409749984e-05, "step": 197510 }, { "epoch": 56.06585296622197, "grad_norm": 0.002848807256668806, "learning_rate": 4.3956571104172583e-05, "loss": 0.0018209893256425857, "step": 197520 }, { "epoch": 56.06869145614533, "grad_norm": 0.0051090409979224205, "learning_rate": 4.395373261424922e-05, "loss": 0.00037751253694295883, "step": 197530 }, { "epoch": 56.071529946068694, "grad_norm": 0.04605802148580551, "learning_rate": 4.395089412432586e-05, "loss": 0.00295895803719759, "step": 197540 }, { "epoch": 56.07436843599205, "grad_norm": 0.03325134888291359, "learning_rate": 4.39480556344025e-05, "loss": 7.930789142847061e-05, "step": 197550 }, { "epoch": 56.07720692591541, "grad_norm": 0.07746249437332153, "learning_rate": 4.394521714447914e-05, "loss": 0.021711333096027373, "step": 197560 }, { "epoch": 56.080045415838775, "grad_norm": 0.03724145516753197, "learning_rate": 4.394237865455578e-05, "loss": 0.00033968277275562284, "step": 197570 }, { "epoch": 56.08288390576214, "grad_norm": 0.030269049108028412, "learning_rate": 4.393954016463242e-05, "loss": 0.00022076070308685303, "step": 197580 }, { "epoch": 56.085722395685494, "grad_norm": 0.0013174881460145116, "learning_rate": 4.393670167470906e-05, "loss": 6.210058927536011e-05, "step": 197590 }, { "epoch": 56.08856088560886, "grad_norm": 0.0035779543686658144, "learning_rate": 4.3933863184785694e-05, "loss": 0.00016096308827400206, "step": 197600 }, { "epoch": 56.09139937553222, "grad_norm": 0.032865334302186966, "learning_rate": 4.3931024694862335e-05, "loss": 0.004288946837186813, "step": 197610 }, { "epoch": 56.094237865455575, "grad_norm": 1.352529525756836, "learning_rate": 4.392818620493898e-05, "loss": 0.00027949400246143343, "step": 197620 }, { "epoch": 56.09707635537894, "grad_norm": 0.014028088189661503, "learning_rate": 4.392534771501561e-05, "loss": 0.00047229863703250884, "step": 197630 }, { "epoch": 56.0999148453023, "grad_norm": 0.012866310775279999, "learning_rate": 4.392250922509225e-05, "loss": 0.00013956781476736068, "step": 197640 }, { "epoch": 56.10275333522566, "grad_norm": 0.4325336217880249, "learning_rate": 4.3919670735168894e-05, "loss": 0.00019534621387720107, "step": 197650 }, { "epoch": 56.10559182514902, "grad_norm": 0.048562150448560715, "learning_rate": 4.391683224524553e-05, "loss": 0.00045220162719488143, "step": 197660 }, { "epoch": 56.10843031507238, "grad_norm": 0.016927283257246017, "learning_rate": 4.391399375532217e-05, "loss": 7.749814540147781e-05, "step": 197670 }, { "epoch": 56.111268804995746, "grad_norm": 0.053159262984991074, "learning_rate": 4.391115526539881e-05, "loss": 0.0065431766211986545, "step": 197680 }, { "epoch": 56.1141072949191, "grad_norm": 0.008588004857301712, "learning_rate": 4.390831677547545e-05, "loss": 0.00015869643539190292, "step": 197690 }, { "epoch": 56.116945784842464, "grad_norm": 0.4942217171192169, "learning_rate": 4.390547828555209e-05, "loss": 9.814538061618805e-05, "step": 197700 }, { "epoch": 56.11978427476583, "grad_norm": 0.019606545567512512, "learning_rate": 4.390263979562872e-05, "loss": 0.00010349992662668228, "step": 197710 }, { "epoch": 56.12262276468918, "grad_norm": 0.022320793941617012, "learning_rate": 4.389980130570537e-05, "loss": 0.00013157818466424942, "step": 197720 }, { "epoch": 56.125461254612546, "grad_norm": 0.13051168620586395, "learning_rate": 4.3896962815782005e-05, "loss": 7.764790207147599e-05, "step": 197730 }, { "epoch": 56.12829974453591, "grad_norm": 11.348395347595215, "learning_rate": 4.3894124325858646e-05, "loss": 0.0026571515947580336, "step": 197740 }, { "epoch": 56.131138234459264, "grad_norm": 0.005375501699745655, "learning_rate": 4.389128583593529e-05, "loss": 5.179531872272492e-05, "step": 197750 }, { "epoch": 56.13397672438263, "grad_norm": 0.04252206161618233, "learning_rate": 4.388844734601192e-05, "loss": 0.001125026121735573, "step": 197760 }, { "epoch": 56.13681521430599, "grad_norm": 0.01104933675378561, "learning_rate": 4.3885608856088564e-05, "loss": 0.0003015339374542236, "step": 197770 }, { "epoch": 56.13965370422935, "grad_norm": 0.007121171336621046, "learning_rate": 4.3882770366165205e-05, "loss": 0.0002878652885556221, "step": 197780 }, { "epoch": 56.14249219415271, "grad_norm": 0.06426697224378586, "learning_rate": 4.387993187624184e-05, "loss": 0.0013051675632596016, "step": 197790 }, { "epoch": 56.14533068407607, "grad_norm": 0.0019965036772191525, "learning_rate": 4.387709338631848e-05, "loss": 0.00029746666550636294, "step": 197800 }, { "epoch": 56.148169173999435, "grad_norm": 0.0030009886249899864, "learning_rate": 4.3874254896395116e-05, "loss": 0.0005005041137337684, "step": 197810 }, { "epoch": 56.15100766392279, "grad_norm": 0.0326855331659317, "learning_rate": 4.3871416406471764e-05, "loss": 0.00042383652180433273, "step": 197820 }, { "epoch": 56.15384615384615, "grad_norm": 0.26934391260147095, "learning_rate": 4.38685779165484e-05, "loss": 0.0022258009761571885, "step": 197830 }, { "epoch": 56.156684643769516, "grad_norm": 0.006958479061722755, "learning_rate": 4.386573942662503e-05, "loss": 0.0007209533825516701, "step": 197840 }, { "epoch": 56.15952313369287, "grad_norm": 0.9007774591445923, "learning_rate": 4.386290093670168e-05, "loss": 0.0009369507431983947, "step": 197850 }, { "epoch": 56.162361623616235, "grad_norm": 0.4588879644870758, "learning_rate": 4.3860062446778316e-05, "loss": 0.0002367960289120674, "step": 197860 }, { "epoch": 56.1652001135396, "grad_norm": 0.00490093557164073, "learning_rate": 4.385722395685496e-05, "loss": 0.00032670535147190095, "step": 197870 }, { "epoch": 56.16803860346296, "grad_norm": 0.09726215898990631, "learning_rate": 4.38543854669316e-05, "loss": 0.00024436693638563156, "step": 197880 }, { "epoch": 56.170877093386316, "grad_norm": 0.0032983613200485706, "learning_rate": 4.385154697700823e-05, "loss": 0.003728875517845154, "step": 197890 }, { "epoch": 56.17371558330968, "grad_norm": 0.019366420805454254, "learning_rate": 4.3848708487084874e-05, "loss": 0.0002840593457221985, "step": 197900 }, { "epoch": 56.17655407323304, "grad_norm": 3.601414680480957, "learning_rate": 4.384586999716151e-05, "loss": 0.0025863759219646454, "step": 197910 }, { "epoch": 56.1793925631564, "grad_norm": 0.15890124440193176, "learning_rate": 4.384303150723815e-05, "loss": 0.0002547942101955414, "step": 197920 }, { "epoch": 56.18223105307976, "grad_norm": 0.07824277877807617, "learning_rate": 4.384019301731479e-05, "loss": 0.001334497518837452, "step": 197930 }, { "epoch": 56.185069543003124, "grad_norm": 0.09588097780942917, "learning_rate": 4.3837354527391426e-05, "loss": 0.0003628060221672058, "step": 197940 }, { "epoch": 56.18790803292649, "grad_norm": 0.02037093974649906, "learning_rate": 4.383451603746807e-05, "loss": 9.552240371704101e-05, "step": 197950 }, { "epoch": 56.19074652284984, "grad_norm": 0.0030250591225922108, "learning_rate": 4.383167754754471e-05, "loss": 0.0006094848737120629, "step": 197960 }, { "epoch": 56.193585012773205, "grad_norm": 0.10411802679300308, "learning_rate": 4.3828839057621344e-05, "loss": 0.0006388088688254357, "step": 197970 }, { "epoch": 56.19642350269657, "grad_norm": 0.06297773122787476, "learning_rate": 4.382600056769799e-05, "loss": 0.0009738199412822723, "step": 197980 }, { "epoch": 56.199261992619924, "grad_norm": 0.046721383929252625, "learning_rate": 4.3823162077774626e-05, "loss": 0.0007610851898789406, "step": 197990 }, { "epoch": 56.20210048254329, "grad_norm": 0.283433735370636, "learning_rate": 4.382032358785126e-05, "loss": 0.0015446040779352188, "step": 198000 }, { "epoch": 56.20210048254329, "eval_accuracy": 0.9813060342086857, "eval_loss": 0.07756064087152481, "eval_runtime": 50.207, "eval_samples_per_second": 313.243, "eval_steps_per_second": 4.9, "step": 198000 }, { "epoch": 56.20493897246665, "grad_norm": 0.040324028581380844, "learning_rate": 4.38174850979279e-05, "loss": 0.0036095619201660157, "step": 198010 }, { "epoch": 56.207777462390005, "grad_norm": 0.5701713562011719, "learning_rate": 4.3814930456996876e-05, "loss": 0.013895155489444732, "step": 198020 }, { "epoch": 56.21061595231337, "grad_norm": 0.06704879552125931, "learning_rate": 4.3812091967073524e-05, "loss": 0.0008170841261744499, "step": 198030 }, { "epoch": 56.21345444223673, "grad_norm": 0.0019485317170619965, "learning_rate": 4.380925347715016e-05, "loss": 0.0005928898230195045, "step": 198040 }, { "epoch": 56.216292932160094, "grad_norm": 9.3867769241333, "learning_rate": 4.380641498722679e-05, "loss": 0.009047240018844604, "step": 198050 }, { "epoch": 56.21913142208345, "grad_norm": 0.1362396627664566, "learning_rate": 4.380357649730344e-05, "loss": 0.009828491508960724, "step": 198060 }, { "epoch": 56.22196991200681, "grad_norm": 0.007184769958257675, "learning_rate": 4.3800738007380076e-05, "loss": 0.0010289020836353302, "step": 198070 }, { "epoch": 56.224808401930176, "grad_norm": 0.01400736067444086, "learning_rate": 4.379789951745672e-05, "loss": 0.0002689924091100693, "step": 198080 }, { "epoch": 56.22764689185353, "grad_norm": 0.1349758356809616, "learning_rate": 4.379506102753335e-05, "loss": 0.0014778459444642067, "step": 198090 }, { "epoch": 56.230485381776894, "grad_norm": 0.006347889080643654, "learning_rate": 4.379222253760999e-05, "loss": 0.00023510623723268508, "step": 198100 }, { "epoch": 56.23332387170026, "grad_norm": 0.04468465596437454, "learning_rate": 4.3789384047686634e-05, "loss": 0.0021865693852305412, "step": 198110 }, { "epoch": 56.23616236162361, "grad_norm": 0.010460597462952137, "learning_rate": 4.378654555776327e-05, "loss": 0.005195742100477218, "step": 198120 }, { "epoch": 56.239000851546976, "grad_norm": 0.044905442744493484, "learning_rate": 4.378370706783991e-05, "loss": 0.00018957778811454773, "step": 198130 }, { "epoch": 56.24183934147034, "grad_norm": 0.018778353929519653, "learning_rate": 4.378086857791655e-05, "loss": 0.00011121649295091629, "step": 198140 }, { "epoch": 56.2446778313937, "grad_norm": 0.05801999941468239, "learning_rate": 4.3778030087993186e-05, "loss": 0.0004779709503054619, "step": 198150 }, { "epoch": 56.24751632131706, "grad_norm": 0.03863867372274399, "learning_rate": 4.3775191598069834e-05, "loss": 8.60210508108139e-05, "step": 198160 }, { "epoch": 56.25035481124042, "grad_norm": 0.007144803646951914, "learning_rate": 4.377235310814647e-05, "loss": 0.00010152440518140793, "step": 198170 }, { "epoch": 56.25319330116378, "grad_norm": 0.021687950938940048, "learning_rate": 4.3769514618223104e-05, "loss": 0.0026099288836121557, "step": 198180 }, { "epoch": 56.25603179108714, "grad_norm": 0.013937327079474926, "learning_rate": 4.3766676128299745e-05, "loss": 0.0001999272033572197, "step": 198190 }, { "epoch": 56.2588702810105, "grad_norm": 0.012533076107501984, "learning_rate": 4.3763837638376386e-05, "loss": 6.430838257074357e-05, "step": 198200 }, { "epoch": 56.261708770933865, "grad_norm": 0.7791585922241211, "learning_rate": 4.376099914845303e-05, "loss": 0.0003641251474618912, "step": 198210 }, { "epoch": 56.26454726085722, "grad_norm": 0.024072812870144844, "learning_rate": 4.375816065852966e-05, "loss": 0.0003377761691808701, "step": 198220 }, { "epoch": 56.26738575078058, "grad_norm": 0.008122351951897144, "learning_rate": 4.3755322168606304e-05, "loss": 0.0002026386559009552, "step": 198230 }, { "epoch": 56.270224240703946, "grad_norm": 0.647007167339325, "learning_rate": 4.3752483678682945e-05, "loss": 0.0005246389657258988, "step": 198240 }, { "epoch": 56.27306273062731, "grad_norm": 1.055873990058899, "learning_rate": 4.374964518875958e-05, "loss": 0.0009849017485976219, "step": 198250 }, { "epoch": 56.275901220550665, "grad_norm": 0.037737298756837845, "learning_rate": 4.374680669883622e-05, "loss": 0.0004978859797120094, "step": 198260 }, { "epoch": 56.27873971047403, "grad_norm": 0.1933894157409668, "learning_rate": 4.374396820891286e-05, "loss": 0.004610618948936463, "step": 198270 }, { "epoch": 56.28157820039739, "grad_norm": 0.022102730348706245, "learning_rate": 4.37411297189895e-05, "loss": 0.0009102772921323777, "step": 198280 }, { "epoch": 56.28441669032075, "grad_norm": 0.28972136974334717, "learning_rate": 4.373829122906614e-05, "loss": 0.000493527390062809, "step": 198290 }, { "epoch": 56.28725518024411, "grad_norm": 1.0753470659255981, "learning_rate": 4.373545273914278e-05, "loss": 0.00045156851410865784, "step": 198300 }, { "epoch": 56.29009367016747, "grad_norm": 0.013276035897433758, "learning_rate": 4.3732614249219414e-05, "loss": 0.0025663917884230615, "step": 198310 }, { "epoch": 56.29293216009083, "grad_norm": 0.2333613783121109, "learning_rate": 4.3729775759296056e-05, "loss": 0.0011578183621168137, "step": 198320 }, { "epoch": 56.29577065001419, "grad_norm": 0.34334176778793335, "learning_rate": 4.37269372693727e-05, "loss": 0.008874938637018204, "step": 198330 }, { "epoch": 56.298609139937554, "grad_norm": 0.011193474754691124, "learning_rate": 4.372409877944933e-05, "loss": 0.0005197791382670402, "step": 198340 }, { "epoch": 56.30144762986092, "grad_norm": 0.7480949759483337, "learning_rate": 4.372126028952597e-05, "loss": 0.0006563054397702217, "step": 198350 }, { "epoch": 56.30428611978427, "grad_norm": 1.7625528573989868, "learning_rate": 4.3718421799602615e-05, "loss": 0.0008338218554854393, "step": 198360 }, { "epoch": 56.307124609707635, "grad_norm": 0.021232260391116142, "learning_rate": 4.3715583309679256e-05, "loss": 0.00042540766298770905, "step": 198370 }, { "epoch": 56.309963099631, "grad_norm": 0.00040469819214195013, "learning_rate": 4.371274481975589e-05, "loss": 0.0020660631358623504, "step": 198380 }, { "epoch": 56.312801589554354, "grad_norm": 23.069496154785156, "learning_rate": 4.3709906329832525e-05, "loss": 0.014421385526657105, "step": 198390 }, { "epoch": 56.31564007947772, "grad_norm": 0.16604621708393097, "learning_rate": 4.370706783990917e-05, "loss": 0.0020814986899495127, "step": 198400 }, { "epoch": 56.31847856940108, "grad_norm": 0.12649494409561157, "learning_rate": 4.370422934998581e-05, "loss": 0.013648708164691926, "step": 198410 }, { "epoch": 56.32131705932444, "grad_norm": 0.09572067111730576, "learning_rate": 4.370139086006245e-05, "loss": 0.004460952058434486, "step": 198420 }, { "epoch": 56.3241555492478, "grad_norm": 5.0249552726745605, "learning_rate": 4.369855237013909e-05, "loss": 0.00225492212921381, "step": 198430 }, { "epoch": 56.32699403917116, "grad_norm": 0.2561461329460144, "learning_rate": 4.3695713880215725e-05, "loss": 0.0004267960786819458, "step": 198440 }, { "epoch": 56.329832529094524, "grad_norm": 0.03057241067290306, "learning_rate": 4.3692875390292367e-05, "loss": 0.00270828977227211, "step": 198450 }, { "epoch": 56.33267101901788, "grad_norm": 0.011159952729940414, "learning_rate": 4.369003690036901e-05, "loss": 0.0033027440309524535, "step": 198460 }, { "epoch": 56.33550950894124, "grad_norm": 0.02049381472170353, "learning_rate": 4.368719841044564e-05, "loss": 0.00024381019175052644, "step": 198470 }, { "epoch": 56.338347998864606, "grad_norm": 0.7302871942520142, "learning_rate": 4.3684359920522284e-05, "loss": 0.009684424102306365, "step": 198480 }, { "epoch": 56.34118648878796, "grad_norm": 0.0023115789517760277, "learning_rate": 4.3681521430598925e-05, "loss": 0.003991736471652985, "step": 198490 }, { "epoch": 56.344024978711325, "grad_norm": 0.23720011115074158, "learning_rate": 4.367868294067557e-05, "loss": 0.0002777343615889549, "step": 198500 }, { "epoch": 56.344024978711325, "eval_accuracy": 0.9845488650092198, "eval_loss": 0.06592947244644165, "eval_runtime": 39.3992, "eval_samples_per_second": 399.17, "eval_steps_per_second": 6.244, "step": 198500 }, { "epoch": 56.34686346863469, "grad_norm": 0.01698381081223488, "learning_rate": 4.36758444507522e-05, "loss": 0.00016094893217086793, "step": 198510 }, { "epoch": 56.34970195855805, "grad_norm": 0.01419064961373806, "learning_rate": 4.3673005960828836e-05, "loss": 0.00010805632919073105, "step": 198520 }, { "epoch": 56.352540448481406, "grad_norm": 0.08597785979509354, "learning_rate": 4.3670167470905484e-05, "loss": 0.00012591406702995301, "step": 198530 }, { "epoch": 56.35537893840477, "grad_norm": 0.8376737236976624, "learning_rate": 4.366732898098212e-05, "loss": 0.00018195323646068572, "step": 198540 }, { "epoch": 56.35821742832813, "grad_norm": 0.08579262346029282, "learning_rate": 4.366449049105876e-05, "loss": 0.000404508039355278, "step": 198550 }, { "epoch": 56.36105591825149, "grad_norm": 0.04165482893586159, "learning_rate": 4.36616520011354e-05, "loss": 0.0002170450985431671, "step": 198560 }, { "epoch": 56.36389440817485, "grad_norm": 0.05170481279492378, "learning_rate": 4.3658813511212036e-05, "loss": 0.0005578046664595604, "step": 198570 }, { "epoch": 56.36673289809821, "grad_norm": 0.037261903285980225, "learning_rate": 4.365597502128868e-05, "loss": 0.0025792524218559267, "step": 198580 }, { "epoch": 56.36957138802157, "grad_norm": 0.07815025001764297, "learning_rate": 4.365313653136532e-05, "loss": 0.00010724719613790512, "step": 198590 }, { "epoch": 56.37240987794493, "grad_norm": 2.0016610622406006, "learning_rate": 4.365029804144195e-05, "loss": 0.0005327804014086723, "step": 198600 }, { "epoch": 56.375248367868295, "grad_norm": 0.004031897056847811, "learning_rate": 4.3647459551518595e-05, "loss": 0.0005017200484871865, "step": 198610 }, { "epoch": 56.37808685779166, "grad_norm": 0.1919809728860855, "learning_rate": 4.364462106159523e-05, "loss": 0.00013600457459688186, "step": 198620 }, { "epoch": 56.380925347715014, "grad_norm": 0.12485166639089584, "learning_rate": 4.364178257167188e-05, "loss": 0.00011360030621290206, "step": 198630 }, { "epoch": 56.38376383763838, "grad_norm": 0.23635388910770416, "learning_rate": 4.363894408174851e-05, "loss": 0.001718786545097828, "step": 198640 }, { "epoch": 56.38660232756174, "grad_norm": 1.006447672843933, "learning_rate": 4.363610559182515e-05, "loss": 0.00020026247948408128, "step": 198650 }, { "epoch": 56.389440817485095, "grad_norm": 6.545494079589844, "learning_rate": 4.3633267101901795e-05, "loss": 0.001499214768409729, "step": 198660 }, { "epoch": 56.39227930740846, "grad_norm": 0.005413380451500416, "learning_rate": 4.363042861197843e-05, "loss": 0.00038654375821352006, "step": 198670 }, { "epoch": 56.39511779733182, "grad_norm": 0.002532422775402665, "learning_rate": 4.362759012205507e-05, "loss": 8.463878184556961e-05, "step": 198680 }, { "epoch": 56.39795628725518, "grad_norm": 5.662610054016113, "learning_rate": 4.362475163213171e-05, "loss": 0.0010285697877407073, "step": 198690 }, { "epoch": 56.40079477717854, "grad_norm": 0.06452569365501404, "learning_rate": 4.362191314220835e-05, "loss": 0.00013581868261098863, "step": 198700 }, { "epoch": 56.4036332671019, "grad_norm": 0.05370039865374565, "learning_rate": 4.361907465228499e-05, "loss": 0.0027631957083940504, "step": 198710 }, { "epoch": 56.406471757025265, "grad_norm": 0.010466009378433228, "learning_rate": 4.361623616236162e-05, "loss": 0.0005453035235404968, "step": 198720 }, { "epoch": 56.40931024694862, "grad_norm": 0.1499733328819275, "learning_rate": 4.3613397672438264e-05, "loss": 0.00036234892904758454, "step": 198730 }, { "epoch": 56.412148736871984, "grad_norm": 6.272953987121582, "learning_rate": 4.3610559182514905e-05, "loss": 0.008430622518062592, "step": 198740 }, { "epoch": 56.41498722679535, "grad_norm": 0.027178198099136353, "learning_rate": 4.360772069259154e-05, "loss": 0.0035271108150482177, "step": 198750 }, { "epoch": 56.4178257167187, "grad_norm": 0.00719047337770462, "learning_rate": 4.360488220266818e-05, "loss": 0.00043502785265445707, "step": 198760 }, { "epoch": 56.420664206642066, "grad_norm": 0.013827892951667309, "learning_rate": 4.360204371274482e-05, "loss": 0.0016874663531780242, "step": 198770 }, { "epoch": 56.42350269656543, "grad_norm": 0.08861231803894043, "learning_rate": 4.359920522282146e-05, "loss": 0.0002082947641611099, "step": 198780 }, { "epoch": 56.42634118648879, "grad_norm": 0.2823312282562256, "learning_rate": 4.3596366732898106e-05, "loss": 0.0003139341250061989, "step": 198790 }, { "epoch": 56.42917967641215, "grad_norm": 0.029464656487107277, "learning_rate": 4.359352824297474e-05, "loss": 0.00020407363772392272, "step": 198800 }, { "epoch": 56.43201816633551, "grad_norm": 0.007261758670210838, "learning_rate": 4.3590689753051375e-05, "loss": 0.00025793910026550293, "step": 198810 }, { "epoch": 56.43485665625887, "grad_norm": 0.010170351713895798, "learning_rate": 4.3587851263128016e-05, "loss": 0.0002931633964180946, "step": 198820 }, { "epoch": 56.43769514618223, "grad_norm": 0.0014401975786313415, "learning_rate": 4.358501277320466e-05, "loss": 0.0004221079871058464, "step": 198830 }, { "epoch": 56.44053363610559, "grad_norm": 0.056493185460567474, "learning_rate": 4.35821742832813e-05, "loss": 0.0007908696308732033, "step": 198840 }, { "epoch": 56.443372126028954, "grad_norm": 6.408132076263428, "learning_rate": 4.3579335793357933e-05, "loss": 0.005444823950529099, "step": 198850 }, { "epoch": 56.44621061595231, "grad_norm": 0.4299863576889038, "learning_rate": 4.3576497303434575e-05, "loss": 0.016877003014087677, "step": 198860 }, { "epoch": 56.44904910587567, "grad_norm": 0.01961735263466835, "learning_rate": 4.3573658813511216e-05, "loss": 0.014478686451911926, "step": 198870 }, { "epoch": 56.451887595799036, "grad_norm": 0.11180971562862396, "learning_rate": 4.357082032358785e-05, "loss": 0.007451574504375458, "step": 198880 }, { "epoch": 56.4547260857224, "grad_norm": 0.13737852871418, "learning_rate": 4.356798183366449e-05, "loss": 0.0005698934197425843, "step": 198890 }, { "epoch": 56.457564575645755, "grad_norm": 0.39626994729042053, "learning_rate": 4.3565143343741134e-05, "loss": 0.002464568428695202, "step": 198900 }, { "epoch": 56.46040306556912, "grad_norm": 0.0034873613622039557, "learning_rate": 4.356230485381777e-05, "loss": 0.0035219117999076843, "step": 198910 }, { "epoch": 56.46324155549248, "grad_norm": 0.028882957994937897, "learning_rate": 4.355946636389441e-05, "loss": 0.00138271301984787, "step": 198920 }, { "epoch": 56.466080045415836, "grad_norm": 2.6362719535827637, "learning_rate": 4.355662787397105e-05, "loss": 0.0007344469428062439, "step": 198930 }, { "epoch": 56.4689185353392, "grad_norm": 0.004396883305162191, "learning_rate": 4.3553789384047686e-05, "loss": 0.002502652443945408, "step": 198940 }, { "epoch": 56.47175702526256, "grad_norm": 0.057465262711048126, "learning_rate": 4.355095089412433e-05, "loss": 0.003564237430691719, "step": 198950 }, { "epoch": 56.47459551518592, "grad_norm": 0.01413268968462944, "learning_rate": 4.354811240420097e-05, "loss": 0.01172259971499443, "step": 198960 }, { "epoch": 56.47743400510928, "grad_norm": 0.14697550237178802, "learning_rate": 4.354527391427761e-05, "loss": 0.006959168612957001, "step": 198970 }, { "epoch": 56.480272495032644, "grad_norm": 0.7091041803359985, "learning_rate": 4.3542435424354244e-05, "loss": 0.0021712034940719604, "step": 198980 }, { "epoch": 56.48311098495601, "grad_norm": 0.6777627468109131, "learning_rate": 4.3539596934430886e-05, "loss": 0.0017945442348718643, "step": 198990 }, { "epoch": 56.48594947487936, "grad_norm": 0.31705334782600403, "learning_rate": 4.353675844450753e-05, "loss": 0.0017508290708065032, "step": 199000 }, { "epoch": 56.48594947487936, "eval_accuracy": 0.9825777325618363, "eval_loss": 0.06830704212188721, "eval_runtime": 35.9899, "eval_samples_per_second": 436.984, "eval_steps_per_second": 6.835, "step": 199000 }, { "epoch": 56.488787964802725, "grad_norm": 0.7580232620239258, "learning_rate": 4.353391995458416e-05, "loss": 0.0002487834542989731, "step": 199010 }, { "epoch": 56.49162645472609, "grad_norm": 1.494709849357605, "learning_rate": 4.35310814646608e-05, "loss": 0.0012490354478359223, "step": 199020 }, { "epoch": 56.494464944649444, "grad_norm": 0.9608729481697083, "learning_rate": 4.3528242974737444e-05, "loss": 0.0019204750657081603, "step": 199030 }, { "epoch": 56.49730343457281, "grad_norm": 0.002025889465585351, "learning_rate": 4.352540448481408e-05, "loss": 0.00025028735399246216, "step": 199040 }, { "epoch": 56.50014192449617, "grad_norm": 0.23473705351352692, "learning_rate": 4.352256599489072e-05, "loss": 0.00259595587849617, "step": 199050 }, { "epoch": 56.502980414419525, "grad_norm": 0.3453466296195984, "learning_rate": 4.351972750496736e-05, "loss": 0.00042348168790340424, "step": 199060 }, { "epoch": 56.50581890434289, "grad_norm": 0.4433499872684479, "learning_rate": 4.3516889015043996e-05, "loss": 0.00018043164163827896, "step": 199070 }, { "epoch": 56.50865739426625, "grad_norm": 0.017926214262843132, "learning_rate": 4.351405052512064e-05, "loss": 0.000590030662715435, "step": 199080 }, { "epoch": 56.511495884189614, "grad_norm": 0.028024615719914436, "learning_rate": 4.351121203519728e-05, "loss": 0.000547175295650959, "step": 199090 }, { "epoch": 56.51433437411297, "grad_norm": 0.15873108804225922, "learning_rate": 4.350837354527392e-05, "loss": 0.0017322143539786339, "step": 199100 }, { "epoch": 56.51717286403633, "grad_norm": 5.815763473510742, "learning_rate": 4.3505535055350555e-05, "loss": 0.001539510302245617, "step": 199110 }, { "epoch": 56.520011353959696, "grad_norm": 0.14462104439735413, "learning_rate": 4.350269656542719e-05, "loss": 0.0010532913729548454, "step": 199120 }, { "epoch": 56.52284984388305, "grad_norm": 0.019481955096125603, "learning_rate": 4.349985807550384e-05, "loss": 0.0003230711445212364, "step": 199130 }, { "epoch": 56.525688333806414, "grad_norm": 0.009224030189216137, "learning_rate": 4.349701958558047e-05, "loss": 0.0016349175944924354, "step": 199140 }, { "epoch": 56.52852682372978, "grad_norm": 0.08401194959878922, "learning_rate": 4.3494181095657114e-05, "loss": 0.00022901613265275956, "step": 199150 }, { "epoch": 56.53136531365314, "grad_norm": 0.08383073657751083, "learning_rate": 4.3491342605733755e-05, "loss": 0.00040029529482126235, "step": 199160 }, { "epoch": 56.534203803576496, "grad_norm": 0.037908297032117844, "learning_rate": 4.348850411581039e-05, "loss": 8.65323469042778e-05, "step": 199170 }, { "epoch": 56.53704229349986, "grad_norm": 0.09869197010993958, "learning_rate": 4.348566562588703e-05, "loss": 0.007505869865417481, "step": 199180 }, { "epoch": 56.53988078342322, "grad_norm": 0.07585539668798447, "learning_rate": 4.348282713596367e-05, "loss": 8.370112627744674e-05, "step": 199190 }, { "epoch": 56.54271927334658, "grad_norm": 0.006048263516277075, "learning_rate": 4.347998864604031e-05, "loss": 0.0003254033625125885, "step": 199200 }, { "epoch": 56.54555776326994, "grad_norm": 0.10684408247470856, "learning_rate": 4.347715015611695e-05, "loss": 0.00014068856835365296, "step": 199210 }, { "epoch": 56.5483962531933, "grad_norm": 0.0018447859911248088, "learning_rate": 4.347431166619358e-05, "loss": 8.904039859771728e-05, "step": 199220 }, { "epoch": 56.55123474311666, "grad_norm": 0.055188730359077454, "learning_rate": 4.3471473176270224e-05, "loss": 7.038861513137817e-05, "step": 199230 }, { "epoch": 56.55407323304002, "grad_norm": 0.00783497653901577, "learning_rate": 4.3468634686346866e-05, "loss": 0.00048712007701396943, "step": 199240 }, { "epoch": 56.556911722963385, "grad_norm": 0.11453283578157425, "learning_rate": 4.34657961964235e-05, "loss": 0.0001227518543601036, "step": 199250 }, { "epoch": 56.55975021288675, "grad_norm": 0.0072301351465284824, "learning_rate": 4.346295770650015e-05, "loss": 0.00015524663031101226, "step": 199260 }, { "epoch": 56.5625887028101, "grad_norm": 0.1348278969526291, "learning_rate": 4.346011921657678e-05, "loss": 0.00018224604427814485, "step": 199270 }, { "epoch": 56.565427192733466, "grad_norm": 0.12652865052223206, "learning_rate": 4.345728072665342e-05, "loss": 0.001503993570804596, "step": 199280 }, { "epoch": 56.56826568265683, "grad_norm": 0.033712901175022125, "learning_rate": 4.3454442236730066e-05, "loss": 7.401928305625916e-05, "step": 199290 }, { "epoch": 56.571104172580185, "grad_norm": 0.0083811916410923, "learning_rate": 4.34516037468067e-05, "loss": 0.00013444386422634125, "step": 199300 }, { "epoch": 56.57394266250355, "grad_norm": 0.0017204801551997662, "learning_rate": 4.344876525688334e-05, "loss": 3.1299889087677e-05, "step": 199310 }, { "epoch": 56.57678115242691, "grad_norm": 0.014109092764556408, "learning_rate": 4.3445926766959976e-05, "loss": 0.00019370447844266892, "step": 199320 }, { "epoch": 56.579619642350266, "grad_norm": 0.02321114018559456, "learning_rate": 4.344308827703662e-05, "loss": 0.00042491015046834945, "step": 199330 }, { "epoch": 56.58245813227363, "grad_norm": 0.06641009449958801, "learning_rate": 4.344024978711326e-05, "loss": 0.0008592942729592323, "step": 199340 }, { "epoch": 56.58529662219699, "grad_norm": 0.6306858062744141, "learning_rate": 4.3437411297189894e-05, "loss": 0.00041872691363096235, "step": 199350 }, { "epoch": 56.588135112120355, "grad_norm": 0.023022178560495377, "learning_rate": 4.3434572807266535e-05, "loss": 0.00011380072683095933, "step": 199360 }, { "epoch": 56.59097360204371, "grad_norm": 0.010813006199896336, "learning_rate": 4.3431734317343177e-05, "loss": 0.005287303030490876, "step": 199370 }, { "epoch": 56.593812091967074, "grad_norm": 0.5762266516685486, "learning_rate": 4.342889582741981e-05, "loss": 0.00035679060965776443, "step": 199380 }, { "epoch": 56.59665058189044, "grad_norm": 0.2447841614484787, "learning_rate": 4.342605733749646e-05, "loss": 0.0009618932381272316, "step": 199390 }, { "epoch": 56.59948907181379, "grad_norm": 0.018287120386958122, "learning_rate": 4.3423218847573094e-05, "loss": 0.011832577735185623, "step": 199400 }, { "epoch": 56.602327561737155, "grad_norm": 0.016764072701334953, "learning_rate": 4.342038035764973e-05, "loss": 0.00023435000330209732, "step": 199410 }, { "epoch": 56.60516605166052, "grad_norm": 0.003438618965446949, "learning_rate": 4.341754186772637e-05, "loss": 0.0007936196401715278, "step": 199420 }, { "epoch": 56.608004541583874, "grad_norm": 0.14809226989746094, "learning_rate": 4.341470337780301e-05, "loss": 0.003323240578174591, "step": 199430 }, { "epoch": 56.61084303150724, "grad_norm": 0.012182911857962608, "learning_rate": 4.341186488787965e-05, "loss": 0.002789340354502201, "step": 199440 }, { "epoch": 56.6136815214306, "grad_norm": 0.06500353664159775, "learning_rate": 4.340902639795629e-05, "loss": 0.0006333533674478531, "step": 199450 }, { "epoch": 56.61652001135396, "grad_norm": 0.08630966395139694, "learning_rate": 4.340618790803293e-05, "loss": 0.0031152496114373205, "step": 199460 }, { "epoch": 56.61935850127732, "grad_norm": 0.011027457192540169, "learning_rate": 4.340334941810957e-05, "loss": 0.000574139878153801, "step": 199470 }, { "epoch": 56.62219699120068, "grad_norm": 0.17931634187698364, "learning_rate": 4.3400510928186205e-05, "loss": 0.0008697371929883956, "step": 199480 }, { "epoch": 56.625035481124044, "grad_norm": 0.009929285384714603, "learning_rate": 4.3397672438262846e-05, "loss": 0.00046685840934515, "step": 199490 }, { "epoch": 56.6278739710474, "grad_norm": 0.011975632049143314, "learning_rate": 4.339483394833949e-05, "loss": 0.0026724863797426223, "step": 199500 }, { "epoch": 56.6278739710474, "eval_accuracy": 0.982895657150124, "eval_loss": 0.063777394592762, "eval_runtime": 35.7084, "eval_samples_per_second": 440.428, "eval_steps_per_second": 6.889, "step": 199500 }, { "epoch": 56.63071246097076, "grad_norm": 0.11640800535678864, "learning_rate": 4.339199545841612e-05, "loss": 0.0004205886274576187, "step": 199510 }, { "epoch": 56.633550950894126, "grad_norm": 0.24400685727596283, "learning_rate": 4.338915696849276e-05, "loss": 0.0009060099720954895, "step": 199520 }, { "epoch": 56.63638944081748, "grad_norm": 3.31264066696167, "learning_rate": 4.3386318478569405e-05, "loss": 0.0008352629840373993, "step": 199530 }, { "epoch": 56.639227930740844, "grad_norm": 0.029967524111270905, "learning_rate": 4.338347998864604e-05, "loss": 0.00037363562732934953, "step": 199540 }, { "epoch": 56.64206642066421, "grad_norm": 0.02359953336417675, "learning_rate": 4.338064149872268e-05, "loss": 0.0005130000412464142, "step": 199550 }, { "epoch": 56.64490491058757, "grad_norm": 0.027882883325219154, "learning_rate": 4.337780300879932e-05, "loss": 0.0011762922629714012, "step": 199560 }, { "epoch": 56.647743400510926, "grad_norm": 0.1444571167230606, "learning_rate": 4.337496451887596e-05, "loss": 0.0006865881383419037, "step": 199570 }, { "epoch": 56.65058189043429, "grad_norm": 0.024608729407191277, "learning_rate": 4.33721260289526e-05, "loss": 0.00011908356100320816, "step": 199580 }, { "epoch": 56.65342038035765, "grad_norm": 2.1310789585113525, "learning_rate": 4.336928753902924e-05, "loss": 0.0020233446732163428, "step": 199590 }, { "epoch": 56.65625887028101, "grad_norm": 0.011480499990284443, "learning_rate": 4.336644904910588e-05, "loss": 0.0007751427590847015, "step": 199600 }, { "epoch": 56.65909736020437, "grad_norm": 0.6367864012718201, "learning_rate": 4.3363610559182515e-05, "loss": 0.002490927651524544, "step": 199610 }, { "epoch": 56.66193585012773, "grad_norm": 1.9555631875991821, "learning_rate": 4.336077206925916e-05, "loss": 0.000517759844660759, "step": 199620 }, { "epoch": 56.664774340051096, "grad_norm": 0.08103771507740021, "learning_rate": 4.33579335793358e-05, "loss": 0.002174799330532551, "step": 199630 }, { "epoch": 56.66761282997445, "grad_norm": 0.004641574341803789, "learning_rate": 4.335509508941243e-05, "loss": 0.0016210844740271568, "step": 199640 }, { "epoch": 56.670451319897815, "grad_norm": 0.010158021934330463, "learning_rate": 4.3352256599489074e-05, "loss": 0.0018592678010463715, "step": 199650 }, { "epoch": 56.67328980982118, "grad_norm": 0.6768555045127869, "learning_rate": 4.3349418109565715e-05, "loss": 0.003335777297616005, "step": 199660 }, { "epoch": 56.67612829974453, "grad_norm": 0.022731993347406387, "learning_rate": 4.334657961964235e-05, "loss": 0.0006267581135034561, "step": 199670 }, { "epoch": 56.678966789667896, "grad_norm": 7.034890651702881, "learning_rate": 4.334374112971899e-05, "loss": 0.015464025735855102, "step": 199680 }, { "epoch": 56.68180527959126, "grad_norm": 0.049622759222984314, "learning_rate": 4.334090263979563e-05, "loss": 0.00037515275180339814, "step": 199690 }, { "epoch": 56.684643769514615, "grad_norm": 0.14981511235237122, "learning_rate": 4.333806414987227e-05, "loss": 0.0009472079575061799, "step": 199700 }, { "epoch": 56.68748225943798, "grad_norm": 0.0028830745723098516, "learning_rate": 4.333522565994891e-05, "loss": 0.0014258000999689102, "step": 199710 }, { "epoch": 56.69032074936134, "grad_norm": 0.021452048793435097, "learning_rate": 4.333238717002555e-05, "loss": 0.0026443036273121833, "step": 199720 }, { "epoch": 56.693159239284704, "grad_norm": 0.06119934096932411, "learning_rate": 4.332954868010219e-05, "loss": 0.0005479343235492706, "step": 199730 }, { "epoch": 56.69599772920806, "grad_norm": 0.03288640081882477, "learning_rate": 4.3326710190178826e-05, "loss": 0.00045134928077459333, "step": 199740 }, { "epoch": 56.69883621913142, "grad_norm": 0.475544273853302, "learning_rate": 4.332387170025546e-05, "loss": 0.0015074798837304115, "step": 199750 }, { "epoch": 56.701674709054785, "grad_norm": 0.014434571377933025, "learning_rate": 4.332103321033211e-05, "loss": 9.583588689565659e-05, "step": 199760 }, { "epoch": 56.70451319897814, "grad_norm": 0.03247644752264023, "learning_rate": 4.3318194720408743e-05, "loss": 0.006158362329006195, "step": 199770 }, { "epoch": 56.707351688901504, "grad_norm": 1.827043056488037, "learning_rate": 4.3315356230485385e-05, "loss": 0.0008493786677718163, "step": 199780 }, { "epoch": 56.71019017882487, "grad_norm": 0.38556936383247375, "learning_rate": 4.3312517740562026e-05, "loss": 0.0068342223763465885, "step": 199790 }, { "epoch": 56.71302866874822, "grad_norm": 0.008059491403400898, "learning_rate": 4.330967925063866e-05, "loss": 0.0008750209584832191, "step": 199800 }, { "epoch": 56.715867158671585, "grad_norm": 0.32461878657341003, "learning_rate": 4.33068407607153e-05, "loss": 0.0005095094442367553, "step": 199810 }, { "epoch": 56.71870564859495, "grad_norm": 0.01566697657108307, "learning_rate": 4.3304002270791944e-05, "loss": 0.0035647470504045486, "step": 199820 }, { "epoch": 56.72154413851831, "grad_norm": 0.03677720949053764, "learning_rate": 4.330116378086858e-05, "loss": 0.000707155279815197, "step": 199830 }, { "epoch": 56.72438262844167, "grad_norm": 6.725507736206055, "learning_rate": 4.329832529094522e-05, "loss": 0.002322092652320862, "step": 199840 }, { "epoch": 56.72722111836503, "grad_norm": 1.4971303939819336, "learning_rate": 4.3295486801021854e-05, "loss": 0.0024478621780872345, "step": 199850 }, { "epoch": 56.73005960828839, "grad_norm": 0.02997642196714878, "learning_rate": 4.32926483110985e-05, "loss": 0.0007483255118131638, "step": 199860 }, { "epoch": 56.73289809821175, "grad_norm": 0.8515902757644653, "learning_rate": 4.328980982117514e-05, "loss": 0.0026931781321763992, "step": 199870 }, { "epoch": 56.73573658813511, "grad_norm": 1.2296746969223022, "learning_rate": 4.328697133125177e-05, "loss": 0.003604254126548767, "step": 199880 }, { "epoch": 56.738575078058474, "grad_norm": 0.18580405414104462, "learning_rate": 4.328413284132842e-05, "loss": 0.0011319568380713462, "step": 199890 }, { "epoch": 56.74141356798184, "grad_norm": 0.13477645814418793, "learning_rate": 4.3281294351405054e-05, "loss": 0.0004250314086675644, "step": 199900 }, { "epoch": 56.74425205790519, "grad_norm": 0.204545795917511, "learning_rate": 4.3278455861481696e-05, "loss": 0.0004265321418642998, "step": 199910 }, { "epoch": 56.747090547828556, "grad_norm": 5.774175643920898, "learning_rate": 4.327561737155834e-05, "loss": 0.00424254760146141, "step": 199920 }, { "epoch": 56.74992903775192, "grad_norm": 0.04098029434680939, "learning_rate": 4.327277888163497e-05, "loss": 0.001560477539896965, "step": 199930 }, { "epoch": 56.752767527675275, "grad_norm": 4.046591281890869, "learning_rate": 4.326994039171161e-05, "loss": 0.0024779697880148886, "step": 199940 }, { "epoch": 56.75560601759864, "grad_norm": 0.078312948346138, "learning_rate": 4.326710190178825e-05, "loss": 0.0006954576820135117, "step": 199950 }, { "epoch": 56.758444507522, "grad_norm": 0.026832997798919678, "learning_rate": 4.326426341186489e-05, "loss": 0.00737585574388504, "step": 199960 }, { "epoch": 56.761282997445356, "grad_norm": 0.3384014666080475, "learning_rate": 4.326142492194153e-05, "loss": 0.0029466770589351653, "step": 199970 }, { "epoch": 56.76412148736872, "grad_norm": 0.03117465041577816, "learning_rate": 4.3258586432018165e-05, "loss": 0.0035546086728572845, "step": 199980 }, { "epoch": 56.76695997729208, "grad_norm": 0.008736362680792809, "learning_rate": 4.325574794209481e-05, "loss": 0.0008653711527585983, "step": 199990 }, { "epoch": 56.769798467215445, "grad_norm": 0.6964974403381348, "learning_rate": 4.325290945217145e-05, "loss": 0.0008676620200276375, "step": 200000 }, { "epoch": 56.769798467215445, "eval_accuracy": 0.9808609397850829, "eval_loss": 0.08367916196584702, "eval_runtime": 51.4277, "eval_samples_per_second": 305.808, "eval_steps_per_second": 4.783, "step": 200000 }, { "epoch": 56.7726369571388, "grad_norm": 0.1706821173429489, "learning_rate": 4.325007096224808e-05, "loss": 0.0025394454598426817, "step": 200010 }, { "epoch": 56.77547544706216, "grad_norm": 3.6748669147491455, "learning_rate": 4.324723247232473e-05, "loss": 0.0013019038364291192, "step": 200020 }, { "epoch": 56.778313936985526, "grad_norm": 0.023154284805059433, "learning_rate": 4.3244393982401365e-05, "loss": 0.003214021027088165, "step": 200030 }, { "epoch": 56.78115242690888, "grad_norm": 1.0456935167312622, "learning_rate": 4.3241555492478006e-05, "loss": 0.0008925687521696091, "step": 200040 }, { "epoch": 56.783990916832245, "grad_norm": 0.37819960713386536, "learning_rate": 4.323871700255464e-05, "loss": 0.0010249588638544082, "step": 200050 }, { "epoch": 56.78682940675561, "grad_norm": 0.1639273464679718, "learning_rate": 4.323587851263128e-05, "loss": 0.0003880353644490242, "step": 200060 }, { "epoch": 56.789667896678964, "grad_norm": 0.06470564752817154, "learning_rate": 4.3233040022707924e-05, "loss": 0.0037177935242652892, "step": 200070 }, { "epoch": 56.79250638660233, "grad_norm": 0.33975890278816223, "learning_rate": 4.323020153278456e-05, "loss": 0.0005210373550653457, "step": 200080 }, { "epoch": 56.79534487652569, "grad_norm": 0.06619832664728165, "learning_rate": 4.32273630428612e-05, "loss": 0.00013055838644504547, "step": 200090 }, { "epoch": 56.79818336644905, "grad_norm": 0.008256138302385807, "learning_rate": 4.322452455293784e-05, "loss": 0.00015029869973659515, "step": 200100 }, { "epoch": 56.80102185637241, "grad_norm": 0.0017131813801825047, "learning_rate": 4.3221686063014476e-05, "loss": 0.00012066774070262909, "step": 200110 }, { "epoch": 56.80386034629577, "grad_norm": 0.0742904469370842, "learning_rate": 4.321884757309112e-05, "loss": 7.301494479179382e-05, "step": 200120 }, { "epoch": 56.806698836219134, "grad_norm": 0.041040439158678055, "learning_rate": 4.321600908316776e-05, "loss": 0.00028288979083299635, "step": 200130 }, { "epoch": 56.80953732614249, "grad_norm": 0.28698205947875977, "learning_rate": 4.321317059324439e-05, "loss": 0.0003218604251742363, "step": 200140 }, { "epoch": 56.81237581606585, "grad_norm": 0.006039248779416084, "learning_rate": 4.3210332103321034e-05, "loss": 0.00017606131732463836, "step": 200150 }, { "epoch": 56.815214305989215, "grad_norm": 0.4734894633293152, "learning_rate": 4.3207493613397676e-05, "loss": 0.0004800194874405861, "step": 200160 }, { "epoch": 56.81805279591257, "grad_norm": 0.013547956012189388, "learning_rate": 4.320465512347431e-05, "loss": 0.005441297590732574, "step": 200170 }, { "epoch": 56.820891285835934, "grad_norm": 0.009182598441839218, "learning_rate": 4.320181663355095e-05, "loss": 0.002172587625682354, "step": 200180 }, { "epoch": 56.8237297757593, "grad_norm": 0.05193159729242325, "learning_rate": 4.319897814362759e-05, "loss": 8.548181504011154e-05, "step": 200190 }, { "epoch": 56.82656826568266, "grad_norm": 0.4383801519870758, "learning_rate": 4.3196139653704234e-05, "loss": 0.006369280070066452, "step": 200200 }, { "epoch": 56.829406755606016, "grad_norm": 1.0028343200683594, "learning_rate": 4.319330116378087e-05, "loss": 0.0002981286495923996, "step": 200210 }, { "epoch": 56.83224524552938, "grad_norm": 0.1442606896162033, "learning_rate": 4.319046267385751e-05, "loss": 0.00019937139004468917, "step": 200220 }, { "epoch": 56.83508373545274, "grad_norm": 0.019408011808991432, "learning_rate": 4.318762418393415e-05, "loss": 0.0001628084108233452, "step": 200230 }, { "epoch": 56.8379222253761, "grad_norm": 0.023822767660021782, "learning_rate": 4.3184785694010786e-05, "loss": 9.926743805408478e-05, "step": 200240 }, { "epoch": 56.84076071529946, "grad_norm": 0.03614043444395065, "learning_rate": 4.318194720408743e-05, "loss": 0.0011742867529392243, "step": 200250 }, { "epoch": 56.84359920522282, "grad_norm": 0.6682681441307068, "learning_rate": 4.317910871416407e-05, "loss": 0.016282905638217927, "step": 200260 }, { "epoch": 56.84643769514618, "grad_norm": 0.08008457720279694, "learning_rate": 4.3176270224240704e-05, "loss": 0.007689002901315689, "step": 200270 }, { "epoch": 56.84927618506954, "grad_norm": 5.503469944000244, "learning_rate": 4.3173431734317345e-05, "loss": 0.004398864880204201, "step": 200280 }, { "epoch": 56.852114674992904, "grad_norm": 0.7031262516975403, "learning_rate": 4.3170593244393986e-05, "loss": 0.0026428531855344772, "step": 200290 }, { "epoch": 56.85495316491627, "grad_norm": 0.1836603879928589, "learning_rate": 4.316775475447062e-05, "loss": 0.002237423323094845, "step": 200300 }, { "epoch": 56.85779165483962, "grad_norm": 0.03509273752570152, "learning_rate": 4.316491626454726e-05, "loss": 0.0010780271142721177, "step": 200310 }, { "epoch": 56.860630144762986, "grad_norm": 0.13877004384994507, "learning_rate": 4.3162077774623904e-05, "loss": 0.0005738180130720139, "step": 200320 }, { "epoch": 56.86346863468635, "grad_norm": 0.005741424858570099, "learning_rate": 4.3159239284700545e-05, "loss": 0.0038068048655986787, "step": 200330 }, { "epoch": 56.866307124609705, "grad_norm": 0.10125653445720673, "learning_rate": 4.315640079477718e-05, "loss": 0.0007321091368794441, "step": 200340 }, { "epoch": 56.86914561453307, "grad_norm": 0.05097721144556999, "learning_rate": 4.3153562304853814e-05, "loss": 0.0022936612367630005, "step": 200350 }, { "epoch": 56.87198410445643, "grad_norm": 10.116399765014648, "learning_rate": 4.3151007663922794e-05, "loss": 0.009990173578262328, "step": 200360 }, { "epoch": 56.87482259437979, "grad_norm": 0.03138645365834236, "learning_rate": 4.3148169173999436e-05, "loss": 0.0007095998153090477, "step": 200370 }, { "epoch": 56.87766108430315, "grad_norm": 0.0377257876098156, "learning_rate": 4.314533068407608e-05, "loss": 0.007134775817394257, "step": 200380 }, { "epoch": 56.88049957422651, "grad_norm": 0.0061553469859063625, "learning_rate": 4.314249219415271e-05, "loss": 0.007331055402755737, "step": 200390 }, { "epoch": 56.883338064149875, "grad_norm": 1.7728322744369507, "learning_rate": 4.313965370422935e-05, "loss": 0.0016033312305808068, "step": 200400 }, { "epoch": 56.88617655407323, "grad_norm": 0.10951589047908783, "learning_rate": 4.3136815214305994e-05, "loss": 0.0003247272223234177, "step": 200410 }, { "epoch": 56.889015043996594, "grad_norm": 0.02065885439515114, "learning_rate": 4.313397672438263e-05, "loss": 0.002904365956783295, "step": 200420 }, { "epoch": 56.89185353391996, "grad_norm": 7.979493141174316, "learning_rate": 4.313113823445927e-05, "loss": 0.001221560686826706, "step": 200430 }, { "epoch": 56.89469202384331, "grad_norm": 2.3958699703216553, "learning_rate": 4.312829974453591e-05, "loss": 0.0011310806497931481, "step": 200440 }, { "epoch": 56.897530513766675, "grad_norm": 0.3112446367740631, "learning_rate": 4.3125461254612546e-05, "loss": 0.007407684624195099, "step": 200450 }, { "epoch": 56.90036900369004, "grad_norm": 0.019388319924473763, "learning_rate": 4.312262276468919e-05, "loss": 0.004316274076700211, "step": 200460 }, { "epoch": 56.9032074936134, "grad_norm": 0.1271466761827469, "learning_rate": 4.311978427476583e-05, "loss": 0.0003315458074212074, "step": 200470 }, { "epoch": 56.90604598353676, "grad_norm": 0.11441940069198608, "learning_rate": 4.3116945784842464e-05, "loss": 0.00035340115427970884, "step": 200480 }, { "epoch": 56.90888447346012, "grad_norm": 0.005213042255491018, "learning_rate": 4.3114107294919105e-05, "loss": 0.004474176838994026, "step": 200490 }, { "epoch": 56.91172296338348, "grad_norm": 0.703421413898468, "learning_rate": 4.3111268804995747e-05, "loss": 0.0023689158260822296, "step": 200500 }, { "epoch": 56.91172296338348, "eval_accuracy": 0.9800343358555351, "eval_loss": 0.0815652385354042, "eval_runtime": 45.0238, "eval_samples_per_second": 349.304, "eval_steps_per_second": 5.464, "step": 200500 }, { "epoch": 56.91456145330684, "grad_norm": 0.09579836577177048, "learning_rate": 4.310843031507239e-05, "loss": 0.0013414554297924042, "step": 200510 }, { "epoch": 56.9173999432302, "grad_norm": 0.04338997229933739, "learning_rate": 4.310559182514902e-05, "loss": 0.000565536506474018, "step": 200520 }, { "epoch": 56.920238433153564, "grad_norm": 0.28845223784446716, "learning_rate": 4.310275333522566e-05, "loss": 0.008166631311178207, "step": 200530 }, { "epoch": 56.92307692307692, "grad_norm": 0.045670900493860245, "learning_rate": 4.3099914845302305e-05, "loss": 0.01460760533809662, "step": 200540 }, { "epoch": 56.92591541300028, "grad_norm": 0.02651914395391941, "learning_rate": 4.309707635537894e-05, "loss": 0.00046152807772159576, "step": 200550 }, { "epoch": 56.928753902923646, "grad_norm": 0.007876810617744923, "learning_rate": 4.309423786545558e-05, "loss": 0.004584544524550438, "step": 200560 }, { "epoch": 56.93159239284701, "grad_norm": 0.10283250361680984, "learning_rate": 4.309139937553222e-05, "loss": 0.0004092622548341751, "step": 200570 }, { "epoch": 56.934430882770364, "grad_norm": 8.55689811706543, "learning_rate": 4.308856088560886e-05, "loss": 0.002316378988325596, "step": 200580 }, { "epoch": 56.93726937269373, "grad_norm": 0.09818337112665176, "learning_rate": 4.30857223956855e-05, "loss": 0.00024046394973993302, "step": 200590 }, { "epoch": 56.94010786261709, "grad_norm": 0.9809251427650452, "learning_rate": 4.308288390576214e-05, "loss": 0.00055483840405941, "step": 200600 }, { "epoch": 56.942946352540446, "grad_norm": 0.4181215167045593, "learning_rate": 4.3080045415838775e-05, "loss": 0.0013261465355753899, "step": 200610 }, { "epoch": 56.94578484246381, "grad_norm": 0.04423905164003372, "learning_rate": 4.3077206925915416e-05, "loss": 0.000170099176466465, "step": 200620 }, { "epoch": 56.94862333238717, "grad_norm": 0.05593624711036682, "learning_rate": 4.307436843599205e-05, "loss": 0.0003826586529612541, "step": 200630 }, { "epoch": 56.95146182231053, "grad_norm": 0.0013284011511132121, "learning_rate": 4.307152994606869e-05, "loss": 0.0003876818343997002, "step": 200640 }, { "epoch": 56.95430031223389, "grad_norm": 0.004681653808802366, "learning_rate": 4.306869145614533e-05, "loss": 0.00015777777880430222, "step": 200650 }, { "epoch": 56.95713880215725, "grad_norm": 0.11444778740406036, "learning_rate": 4.306585296622197e-05, "loss": 0.00017657820135354995, "step": 200660 }, { "epoch": 56.959977292080616, "grad_norm": 0.3551996648311615, "learning_rate": 4.3063014476298616e-05, "loss": 0.0009276337921619415, "step": 200670 }, { "epoch": 56.96281578200397, "grad_norm": 0.005282767117023468, "learning_rate": 4.306017598637525e-05, "loss": 0.0017676973715424539, "step": 200680 }, { "epoch": 56.965654271927335, "grad_norm": 0.12482593953609467, "learning_rate": 4.3057337496451885e-05, "loss": 0.0014751743525266646, "step": 200690 }, { "epoch": 56.9684927618507, "grad_norm": 0.04914768412709236, "learning_rate": 4.305449900652853e-05, "loss": 0.003935389965772629, "step": 200700 }, { "epoch": 56.97133125177405, "grad_norm": 0.08920599520206451, "learning_rate": 4.305166051660517e-05, "loss": 0.0015912353992462158, "step": 200710 }, { "epoch": 56.974169741697416, "grad_norm": 0.005471900105476379, "learning_rate": 4.304882202668181e-05, "loss": 0.0001653013750910759, "step": 200720 }, { "epoch": 56.97700823162078, "grad_norm": 0.021977510303258896, "learning_rate": 4.3045983536758444e-05, "loss": 0.0017232919111847878, "step": 200730 }, { "epoch": 56.97984672154414, "grad_norm": 0.006676674820482731, "learning_rate": 4.3043145046835085e-05, "loss": 0.0012032924219965934, "step": 200740 }, { "epoch": 56.9826852114675, "grad_norm": 1.5218604803085327, "learning_rate": 4.304030655691173e-05, "loss": 0.0031338103115558626, "step": 200750 }, { "epoch": 56.98552370139086, "grad_norm": 0.10124434530735016, "learning_rate": 4.303746806698836e-05, "loss": 0.0029994828626513483, "step": 200760 }, { "epoch": 56.98836219131422, "grad_norm": 0.003475987119600177, "learning_rate": 4.3034629577065e-05, "loss": 0.0001700516790151596, "step": 200770 }, { "epoch": 56.99120068123758, "grad_norm": 0.316010981798172, "learning_rate": 4.3031791087141644e-05, "loss": 0.0007312305271625519, "step": 200780 }, { "epoch": 56.99403917116094, "grad_norm": 0.3799320161342621, "learning_rate": 4.302895259721828e-05, "loss": 0.0038750495761632918, "step": 200790 }, { "epoch": 56.996877661084305, "grad_norm": 0.008212137036025524, "learning_rate": 4.302611410729493e-05, "loss": 0.0008768554776906967, "step": 200800 }, { "epoch": 56.99971615100766, "grad_norm": 0.002965626074001193, "learning_rate": 4.302327561737156e-05, "loss": 6.837639957666398e-05, "step": 200810 }, { "epoch": 57.002554640931024, "grad_norm": 0.029711272567510605, "learning_rate": 4.3020437127448196e-05, "loss": 0.0002678976161405444, "step": 200820 }, { "epoch": 57.00539313085439, "grad_norm": 4.153948783874512, "learning_rate": 4.301759863752484e-05, "loss": 0.0007315918803215027, "step": 200830 }, { "epoch": 57.00823162077775, "grad_norm": 0.00037227434222586453, "learning_rate": 4.301476014760148e-05, "loss": 0.00013035200536251068, "step": 200840 }, { "epoch": 57.011070110701105, "grad_norm": 0.012201223522424698, "learning_rate": 4.301192165767812e-05, "loss": 0.0012736352160573005, "step": 200850 }, { "epoch": 57.01390860062447, "grad_norm": 0.014203613623976707, "learning_rate": 4.3009083167754755e-05, "loss": 0.00011913739144802094, "step": 200860 }, { "epoch": 57.01674709054783, "grad_norm": 0.015201122500002384, "learning_rate": 4.3006244677831396e-05, "loss": 0.00012543313205242156, "step": 200870 }, { "epoch": 57.01958558047119, "grad_norm": 0.008547620847821236, "learning_rate": 4.300340618790804e-05, "loss": 0.0010377511382102967, "step": 200880 }, { "epoch": 57.02242407039455, "grad_norm": 0.06460034102201462, "learning_rate": 4.300056769798467e-05, "loss": 0.00026095546782016753, "step": 200890 }, { "epoch": 57.02526256031791, "grad_norm": 0.27102944254875183, "learning_rate": 4.2997729208061313e-05, "loss": 0.0001531612128019333, "step": 200900 }, { "epoch": 57.02810105024127, "grad_norm": 0.00961413886398077, "learning_rate": 4.2994890718137955e-05, "loss": 0.00023345835506916046, "step": 200910 }, { "epoch": 57.03093954016463, "grad_norm": 0.9001064896583557, "learning_rate": 4.299205222821459e-05, "loss": 0.0002249995246529579, "step": 200920 }, { "epoch": 57.033778030087994, "grad_norm": 0.18222272396087646, "learning_rate": 4.298921373829123e-05, "loss": 0.00012836344540119172, "step": 200930 }, { "epoch": 57.03661652001136, "grad_norm": 0.36295464634895325, "learning_rate": 4.298637524836787e-05, "loss": 0.0014009634032845497, "step": 200940 }, { "epoch": 57.03945500993471, "grad_norm": 0.003224053652957082, "learning_rate": 4.298353675844451e-05, "loss": 8.787307888269424e-05, "step": 200950 }, { "epoch": 57.042293499858076, "grad_norm": 0.04130338877439499, "learning_rate": 4.298069826852115e-05, "loss": 7.38043338060379e-05, "step": 200960 }, { "epoch": 57.04513198978144, "grad_norm": 0.0008734855800867081, "learning_rate": 4.297785977859779e-05, "loss": 0.0004138397052884102, "step": 200970 }, { "epoch": 57.047970479704794, "grad_norm": 0.021267658099532127, "learning_rate": 4.2975021288674424e-05, "loss": 0.003424660861492157, "step": 200980 }, { "epoch": 57.05080896962816, "grad_norm": 0.08898667246103287, "learning_rate": 4.2972182798751065e-05, "loss": 0.0001392429694533348, "step": 200990 }, { "epoch": 57.05364745955152, "grad_norm": 0.05716047063469887, "learning_rate": 4.296934430882771e-05, "loss": 0.006430525332689285, "step": 201000 }, { "epoch": 57.05364745955152, "eval_accuracy": 0.9845488650092198, "eval_loss": 0.06281840801239014, "eval_runtime": 58.5096, "eval_samples_per_second": 268.793, "eval_steps_per_second": 4.204, "step": 201000 }, { "epoch": 57.056485949474876, "grad_norm": 0.07290830463171005, "learning_rate": 4.296650581890435e-05, "loss": 8.849166333675384e-05, "step": 201010 }, { "epoch": 57.05932443939824, "grad_norm": 0.040279045701026917, "learning_rate": 4.296366732898098e-05, "loss": 0.010915316641330719, "step": 201020 }, { "epoch": 57.0621629293216, "grad_norm": 0.07359074056148529, "learning_rate": 4.2960828839057624e-05, "loss": 0.0017316166311502456, "step": 201030 }, { "epoch": 57.065001419244965, "grad_norm": 0.004268597811460495, "learning_rate": 4.2957990349134266e-05, "loss": 0.00030737817287445067, "step": 201040 }, { "epoch": 57.06783990916832, "grad_norm": 0.001961461966857314, "learning_rate": 4.29551518592109e-05, "loss": 0.00012709330767393112, "step": 201050 }, { "epoch": 57.07067839909168, "grad_norm": 0.007153013721108437, "learning_rate": 4.295231336928754e-05, "loss": 0.00026391968131065366, "step": 201060 }, { "epoch": 57.073516889015046, "grad_norm": 0.49170634150505066, "learning_rate": 4.294947487936418e-05, "loss": 0.0002552220597863197, "step": 201070 }, { "epoch": 57.0763553789384, "grad_norm": 0.6579737067222595, "learning_rate": 4.294663638944082e-05, "loss": 0.00022227577865123748, "step": 201080 }, { "epoch": 57.079193868861765, "grad_norm": 0.3129804730415344, "learning_rate": 4.294379789951746e-05, "loss": 0.00127479899674654, "step": 201090 }, { "epoch": 57.08203235878513, "grad_norm": 0.637331485748291, "learning_rate": 4.29409594095941e-05, "loss": 0.003283052891492844, "step": 201100 }, { "epoch": 57.08487084870849, "grad_norm": 0.0196132343262434, "learning_rate": 4.2938120919670735e-05, "loss": 0.00011466443538665771, "step": 201110 }, { "epoch": 57.087709338631846, "grad_norm": 0.04640167951583862, "learning_rate": 4.2935282429747376e-05, "loss": 0.00010603759437799454, "step": 201120 }, { "epoch": 57.09054782855521, "grad_norm": 0.03560611605644226, "learning_rate": 4.293244393982401e-05, "loss": 0.00021868478506803513, "step": 201130 }, { "epoch": 57.09338631847857, "grad_norm": 7.757505893707275, "learning_rate": 4.292960544990066e-05, "loss": 0.006375628709793091, "step": 201140 }, { "epoch": 57.09622480840193, "grad_norm": 0.0743345096707344, "learning_rate": 4.2926766959977294e-05, "loss": 0.0005938911810517312, "step": 201150 }, { "epoch": 57.09906329832529, "grad_norm": 0.005905576050281525, "learning_rate": 4.292392847005393e-05, "loss": 0.00033413376659154894, "step": 201160 }, { "epoch": 57.101901788248654, "grad_norm": 0.006386006250977516, "learning_rate": 4.2921089980130576e-05, "loss": 0.00043900925666093824, "step": 201170 }, { "epoch": 57.10474027817201, "grad_norm": 0.00635431706905365, "learning_rate": 4.291825149020721e-05, "loss": 0.00033421330153942107, "step": 201180 }, { "epoch": 57.10757876809537, "grad_norm": 0.019685273990035057, "learning_rate": 4.291541300028385e-05, "loss": 0.00010615866631269455, "step": 201190 }, { "epoch": 57.110417258018735, "grad_norm": 0.029806938022375107, "learning_rate": 4.2912574510360494e-05, "loss": 0.003922153264284134, "step": 201200 }, { "epoch": 57.1132557479421, "grad_norm": 0.023373447358608246, "learning_rate": 4.290973602043713e-05, "loss": 9.944438934326172e-05, "step": 201210 }, { "epoch": 57.116094237865454, "grad_norm": 0.014387930743396282, "learning_rate": 4.290689753051377e-05, "loss": 0.000286964513361454, "step": 201220 }, { "epoch": 57.11893272778882, "grad_norm": 0.014653053134679794, "learning_rate": 4.2904059040590404e-05, "loss": 0.0003304433077573776, "step": 201230 }, { "epoch": 57.12177121771218, "grad_norm": 0.17957618832588196, "learning_rate": 4.2901220550667046e-05, "loss": 0.0021234450861811636, "step": 201240 }, { "epoch": 57.124609707635535, "grad_norm": 0.003915113862603903, "learning_rate": 4.289838206074369e-05, "loss": 0.0004284324124455452, "step": 201250 }, { "epoch": 57.1274481975589, "grad_norm": 0.07087022811174393, "learning_rate": 4.289554357082032e-05, "loss": 0.0005905637517571449, "step": 201260 }, { "epoch": 57.13028668748226, "grad_norm": 0.14239192008972168, "learning_rate": 4.289270508089697e-05, "loss": 0.0019050652161240577, "step": 201270 }, { "epoch": 57.13312517740562, "grad_norm": 0.019674915820360184, "learning_rate": 4.2889866590973604e-05, "loss": 6.719194352626801e-05, "step": 201280 }, { "epoch": 57.13596366732898, "grad_norm": 0.00526432367041707, "learning_rate": 4.288702810105024e-05, "loss": 0.0002195999026298523, "step": 201290 }, { "epoch": 57.13880215725234, "grad_norm": 0.11082307249307632, "learning_rate": 4.288418961112689e-05, "loss": 0.00012285225093364716, "step": 201300 }, { "epoch": 57.141640647175706, "grad_norm": 1.583146333694458, "learning_rate": 4.288135112120352e-05, "loss": 0.00032080747187137606, "step": 201310 }, { "epoch": 57.14447913709906, "grad_norm": 0.18969035148620605, "learning_rate": 4.287851263128016e-05, "loss": 0.0003559675067663193, "step": 201320 }, { "epoch": 57.147317627022424, "grad_norm": 0.005999284330755472, "learning_rate": 4.28756741413568e-05, "loss": 0.0007981948554515838, "step": 201330 }, { "epoch": 57.15015611694579, "grad_norm": 0.0063325283117592335, "learning_rate": 4.287283565143344e-05, "loss": 0.0007188208401203156, "step": 201340 }, { "epoch": 57.15299460686914, "grad_norm": 0.12646055221557617, "learning_rate": 4.286999716151008e-05, "loss": 0.0015875644981861114, "step": 201350 }, { "epoch": 57.155833096792506, "grad_norm": 0.21422812342643738, "learning_rate": 4.2867158671586715e-05, "loss": 0.00036078561097383497, "step": 201360 }, { "epoch": 57.15867158671587, "grad_norm": 0.011001108214259148, "learning_rate": 4.2864320181663356e-05, "loss": 0.00933220386505127, "step": 201370 }, { "epoch": 57.161510076639225, "grad_norm": 0.31107664108276367, "learning_rate": 4.286148169174e-05, "loss": 0.006809338182210922, "step": 201380 }, { "epoch": 57.16434856656259, "grad_norm": 0.230112224817276, "learning_rate": 4.285864320181663e-05, "loss": 0.0007543271407485008, "step": 201390 }, { "epoch": 57.16718705648595, "grad_norm": 0.041146062314510345, "learning_rate": 4.2855804711893274e-05, "loss": 0.005023331567645073, "step": 201400 }, { "epoch": 57.17002554640931, "grad_norm": 0.7397584915161133, "learning_rate": 4.2852966221969915e-05, "loss": 0.0007960952818393708, "step": 201410 }, { "epoch": 57.17286403633267, "grad_norm": 1.065293550491333, "learning_rate": 4.285012773204655e-05, "loss": 0.00022749416530132294, "step": 201420 }, { "epoch": 57.17570252625603, "grad_norm": 0.07659462839365005, "learning_rate": 4.28472892421232e-05, "loss": 0.0006978109478950501, "step": 201430 }, { "epoch": 57.178541016179395, "grad_norm": 13.511161804199219, "learning_rate": 4.284445075219983e-05, "loss": 0.01055871695280075, "step": 201440 }, { "epoch": 57.18137950610275, "grad_norm": 3.6397624015808105, "learning_rate": 4.284161226227647e-05, "loss": 0.001404930092394352, "step": 201450 }, { "epoch": 57.18421799602611, "grad_norm": 0.2197936624288559, "learning_rate": 4.283877377235311e-05, "loss": 0.0007232187315821647, "step": 201460 }, { "epoch": 57.187056485949476, "grad_norm": 0.01118815690279007, "learning_rate": 4.283593528242975e-05, "loss": 0.0007627811282873154, "step": 201470 }, { "epoch": 57.18989497587283, "grad_norm": 0.19808273017406464, "learning_rate": 4.283309679250639e-05, "loss": 0.0009289281442761421, "step": 201480 }, { "epoch": 57.192733465796195, "grad_norm": 0.043053630739450455, "learning_rate": 4.2830258302583026e-05, "loss": 0.0010351356118917464, "step": 201490 }, { "epoch": 57.19557195571956, "grad_norm": 0.2403254210948944, "learning_rate": 4.282741981265967e-05, "loss": 0.007270152121782303, "step": 201500 }, { "epoch": 57.19557195571956, "eval_accuracy": 0.9843581102562472, "eval_loss": 0.06742896139621735, "eval_runtime": 42.285, "eval_samples_per_second": 371.929, "eval_steps_per_second": 5.818, "step": 201500 }, { "epoch": 57.19841044564292, "grad_norm": 0.07544822245836258, "learning_rate": 4.282458132273631e-05, "loss": 0.00020829495042562485, "step": 201510 }, { "epoch": 57.20124893556628, "grad_norm": 0.0026839138008654118, "learning_rate": 4.282174283281294e-05, "loss": 0.0006943210959434509, "step": 201520 }, { "epoch": 57.20408742548964, "grad_norm": 0.035866688936948776, "learning_rate": 4.2818904342889584e-05, "loss": 0.00010775960981845856, "step": 201530 }, { "epoch": 57.206925915413, "grad_norm": 0.022545672953128815, "learning_rate": 4.2816065852966226e-05, "loss": 9.339079260826111e-05, "step": 201540 }, { "epoch": 57.20976440533636, "grad_norm": 0.006996192038059235, "learning_rate": 4.281322736304286e-05, "loss": 0.00021772757172584535, "step": 201550 }, { "epoch": 57.21260289525972, "grad_norm": 0.2948511242866516, "learning_rate": 4.28103888731195e-05, "loss": 0.0001642892137169838, "step": 201560 }, { "epoch": 57.215441385183084, "grad_norm": 0.02468504011631012, "learning_rate": 4.280755038319614e-05, "loss": 0.0001254696398973465, "step": 201570 }, { "epoch": 57.21827987510645, "grad_norm": 0.2790483236312866, "learning_rate": 4.280471189327278e-05, "loss": 0.00014106445014476775, "step": 201580 }, { "epoch": 57.2211183650298, "grad_norm": 0.008327087387442589, "learning_rate": 4.280187340334942e-05, "loss": 0.00012128371745347976, "step": 201590 }, { "epoch": 57.223956854953165, "grad_norm": 0.1296684890985489, "learning_rate": 4.279903491342606e-05, "loss": 0.00015028417110443115, "step": 201600 }, { "epoch": 57.22679534487653, "grad_norm": 0.01670212671160698, "learning_rate": 4.27961964235027e-05, "loss": 5.4542161524295804e-05, "step": 201610 }, { "epoch": 57.229633834799884, "grad_norm": 0.014974592253565788, "learning_rate": 4.2793357933579337e-05, "loss": 4.7219172120094297e-05, "step": 201620 }, { "epoch": 57.23247232472325, "grad_norm": 0.020902510732412338, "learning_rate": 4.279051944365598e-05, "loss": 9.612776339054107e-05, "step": 201630 }, { "epoch": 57.23531081464661, "grad_norm": 0.0071930717676877975, "learning_rate": 4.278768095373262e-05, "loss": 0.00010871700942516327, "step": 201640 }, { "epoch": 57.238149304569966, "grad_norm": 0.12967967987060547, "learning_rate": 4.2784842463809254e-05, "loss": 0.00043224263936281204, "step": 201650 }, { "epoch": 57.24098779449333, "grad_norm": 1.6192128658294678, "learning_rate": 4.2782003973885895e-05, "loss": 0.0003606241196393967, "step": 201660 }, { "epoch": 57.24382628441669, "grad_norm": 0.026432834565639496, "learning_rate": 4.277916548396254e-05, "loss": 0.00021165739744901657, "step": 201670 }, { "epoch": 57.246664774340054, "grad_norm": 0.2798638939857483, "learning_rate": 4.277632699403917e-05, "loss": 0.00012375134974718094, "step": 201680 }, { "epoch": 57.24950326426341, "grad_norm": 0.18089522421360016, "learning_rate": 4.277348850411581e-05, "loss": 0.000828101672232151, "step": 201690 }, { "epoch": 57.25234175418677, "grad_norm": 0.012409855611622334, "learning_rate": 4.2770650014192454e-05, "loss": 0.00010203570127487183, "step": 201700 }, { "epoch": 57.255180244110136, "grad_norm": 0.038507770746946335, "learning_rate": 4.276781152426909e-05, "loss": 0.0006305454298853874, "step": 201710 }, { "epoch": 57.25801873403349, "grad_norm": 0.003644531359896064, "learning_rate": 4.276497303434573e-05, "loss": 0.00038850419223308564, "step": 201720 }, { "epoch": 57.260857223956855, "grad_norm": 0.16383840143680573, "learning_rate": 4.276213454442237e-05, "loss": 0.0002361226826906204, "step": 201730 }, { "epoch": 57.26369571388022, "grad_norm": 0.18557105958461761, "learning_rate": 4.275929605449901e-05, "loss": 8.786842226982117e-05, "step": 201740 }, { "epoch": 57.26653420380357, "grad_norm": 0.02999189868569374, "learning_rate": 4.275645756457565e-05, "loss": 0.0001741703599691391, "step": 201750 }, { "epoch": 57.269372693726936, "grad_norm": 1.6525449752807617, "learning_rate": 4.275361907465228e-05, "loss": 0.000302065908908844, "step": 201760 }, { "epoch": 57.2722111836503, "grad_norm": 0.0016988072311505675, "learning_rate": 4.275078058472893e-05, "loss": 0.00036756470799446104, "step": 201770 }, { "epoch": 57.27504967357366, "grad_norm": 1.2683590650558472, "learning_rate": 4.2747942094805565e-05, "loss": 0.0003636457026004791, "step": 201780 }, { "epoch": 57.27788816349702, "grad_norm": 0.03589041903614998, "learning_rate": 4.2745103604882206e-05, "loss": 6.347484886646271e-05, "step": 201790 }, { "epoch": 57.28072665342038, "grad_norm": 0.11035144329071045, "learning_rate": 4.274226511495885e-05, "loss": 0.0012046631425619125, "step": 201800 }, { "epoch": 57.28356514334374, "grad_norm": 0.16670335829257965, "learning_rate": 4.273942662503548e-05, "loss": 0.00023599416017532348, "step": 201810 }, { "epoch": 57.2864036332671, "grad_norm": 0.11020839214324951, "learning_rate": 4.273658813511212e-05, "loss": 0.00014796964824199678, "step": 201820 }, { "epoch": 57.28924212319046, "grad_norm": 0.011358107440173626, "learning_rate": 4.2733749645188765e-05, "loss": 7.457099854946136e-05, "step": 201830 }, { "epoch": 57.292080613113825, "grad_norm": 0.0012794059002771974, "learning_rate": 4.27309111552654e-05, "loss": 0.00011978521943092346, "step": 201840 }, { "epoch": 57.29491910303718, "grad_norm": 0.0016433285782113671, "learning_rate": 4.272807266534204e-05, "loss": 5.955416709184647e-05, "step": 201850 }, { "epoch": 57.297757592960544, "grad_norm": 0.002181311370804906, "learning_rate": 4.2725234175418675e-05, "loss": 0.00011012926697731018, "step": 201860 }, { "epoch": 57.30059608288391, "grad_norm": 0.020553365349769592, "learning_rate": 4.272239568549532e-05, "loss": 5.409307777881622e-05, "step": 201870 }, { "epoch": 57.30343457280727, "grad_norm": 0.007502020336687565, "learning_rate": 4.271955719557196e-05, "loss": 0.00014439802616834642, "step": 201880 }, { "epoch": 57.306273062730625, "grad_norm": 0.003330540144816041, "learning_rate": 4.271671870564859e-05, "loss": 9.281598031520843e-05, "step": 201890 }, { "epoch": 57.30911155265399, "grad_norm": 4.09855842590332, "learning_rate": 4.271388021572524e-05, "loss": 0.0007676547393202782, "step": 201900 }, { "epoch": 57.31195004257735, "grad_norm": 0.01825573667883873, "learning_rate": 4.2711041725801875e-05, "loss": 0.002604599855840206, "step": 201910 }, { "epoch": 57.31478853250071, "grad_norm": 0.03389079123735428, "learning_rate": 4.270820323587851e-05, "loss": 0.000426616333425045, "step": 201920 }, { "epoch": 57.31762702242407, "grad_norm": 0.03197309747338295, "learning_rate": 4.270536474595516e-05, "loss": 0.0009400257840752602, "step": 201930 }, { "epoch": 57.32046551234743, "grad_norm": 0.008016006089746952, "learning_rate": 4.270252625603179e-05, "loss": 0.001008567214012146, "step": 201940 }, { "epoch": 57.323304002270795, "grad_norm": 0.04109169542789459, "learning_rate": 4.2699687766108434e-05, "loss": 0.00019933618605136872, "step": 201950 }, { "epoch": 57.32614249219415, "grad_norm": 0.0032593763899058104, "learning_rate": 4.269684927618507e-05, "loss": 0.00022899433970451354, "step": 201960 }, { "epoch": 57.328980982117514, "grad_norm": 1.257613182067871, "learning_rate": 4.269401078626171e-05, "loss": 0.0006715236231684685, "step": 201970 }, { "epoch": 57.33181947204088, "grad_norm": 0.028476690873503685, "learning_rate": 4.269117229633835e-05, "loss": 0.00021584015339612961, "step": 201980 }, { "epoch": 57.33465796196423, "grad_norm": 0.024882683530449867, "learning_rate": 4.2688333806414986e-05, "loss": 0.012250572443008423, "step": 201990 }, { "epoch": 57.337496451887596, "grad_norm": 0.0430910661816597, "learning_rate": 4.268549531649163e-05, "loss": 0.0002593357115983963, "step": 202000 }, { "epoch": 57.337496451887596, "eval_accuracy": 0.9830864119030965, "eval_loss": 0.07221687585115433, "eval_runtime": 42.1693, "eval_samples_per_second": 372.949, "eval_steps_per_second": 5.834, "step": 202000 }, { "epoch": 57.34033494181096, "grad_norm": 0.03818212449550629, "learning_rate": 4.268265682656827e-05, "loss": 0.00025797877460718153, "step": 202010 }, { "epoch": 57.343173431734314, "grad_norm": 0.09961555153131485, "learning_rate": 4.2679818336644903e-05, "loss": 0.00032104030251502993, "step": 202020 }, { "epoch": 57.34601192165768, "grad_norm": 0.21914798021316528, "learning_rate": 4.267697984672155e-05, "loss": 0.0024338413029909136, "step": 202030 }, { "epoch": 57.34885041158104, "grad_norm": 0.012010221369564533, "learning_rate": 4.2674141356798186e-05, "loss": 0.00381704717874527, "step": 202040 }, { "epoch": 57.3516889015044, "grad_norm": 0.1183546707034111, "learning_rate": 4.267130286687482e-05, "loss": 0.002022194303572178, "step": 202050 }, { "epoch": 57.35452739142776, "grad_norm": 0.028977159410715103, "learning_rate": 4.266846437695146e-05, "loss": 0.00046722926199436186, "step": 202060 }, { "epoch": 57.35736588135112, "grad_norm": 0.013260328210890293, "learning_rate": 4.2665625887028104e-05, "loss": 0.004720559716224671, "step": 202070 }, { "epoch": 57.360204371274484, "grad_norm": 3.7666850090026855, "learning_rate": 4.2662787397104745e-05, "loss": 0.001383223757147789, "step": 202080 }, { "epoch": 57.36304286119784, "grad_norm": 0.007220523431897163, "learning_rate": 4.265994890718138e-05, "loss": 0.0005866976454854012, "step": 202090 }, { "epoch": 57.3658813511212, "grad_norm": 0.04792432859539986, "learning_rate": 4.265711041725802e-05, "loss": 0.00032220762223005294, "step": 202100 }, { "epoch": 57.368719841044566, "grad_norm": 0.04054882749915123, "learning_rate": 4.265427192733466e-05, "loss": 0.0004513842985033989, "step": 202110 }, { "epoch": 57.37155833096792, "grad_norm": 0.14124515652656555, "learning_rate": 4.26514334374113e-05, "loss": 0.0008384145796298981, "step": 202120 }, { "epoch": 57.374396820891285, "grad_norm": 0.009454109705984592, "learning_rate": 4.264859494748794e-05, "loss": 0.0003635600209236145, "step": 202130 }, { "epoch": 57.37723531081465, "grad_norm": 0.00787662249058485, "learning_rate": 4.264575645756458e-05, "loss": 0.0005184954032301902, "step": 202140 }, { "epoch": 57.38007380073801, "grad_norm": 0.42431268095970154, "learning_rate": 4.2642917967641214e-05, "loss": 0.00023828279227018357, "step": 202150 }, { "epoch": 57.382912290661366, "grad_norm": 0.30335333943367004, "learning_rate": 4.2640079477717856e-05, "loss": 0.00025163684040308, "step": 202160 }, { "epoch": 57.38575078058473, "grad_norm": 0.004182441625744104, "learning_rate": 4.26372409877945e-05, "loss": 0.0005083303898572922, "step": 202170 }, { "epoch": 57.38858927050809, "grad_norm": 0.052665285766124725, "learning_rate": 4.263440249787113e-05, "loss": 0.00018539130687713623, "step": 202180 }, { "epoch": 57.39142776043145, "grad_norm": 0.11523208022117615, "learning_rate": 4.263156400794777e-05, "loss": 0.0003767373040318489, "step": 202190 }, { "epoch": 57.39426625035481, "grad_norm": 0.6007826328277588, "learning_rate": 4.2628725518024414e-05, "loss": 0.000745423324406147, "step": 202200 }, { "epoch": 57.39710474027817, "grad_norm": 10.529060363769531, "learning_rate": 4.2625887028101056e-05, "loss": 0.006010232865810395, "step": 202210 }, { "epoch": 57.39994323020153, "grad_norm": 1.8219443559646606, "learning_rate": 4.262304853817769e-05, "loss": 0.001402055099606514, "step": 202220 }, { "epoch": 57.40278172012489, "grad_norm": 3.974405288696289, "learning_rate": 4.262021004825433e-05, "loss": 0.0008996794000267983, "step": 202230 }, { "epoch": 57.405620210048255, "grad_norm": 0.017327891662716866, "learning_rate": 4.261737155833097e-05, "loss": 7.73504376411438e-05, "step": 202240 }, { "epoch": 57.40845869997162, "grad_norm": 0.09815216064453125, "learning_rate": 4.261453306840761e-05, "loss": 0.000499354675412178, "step": 202250 }, { "epoch": 57.411297189894974, "grad_norm": 0.009789010509848595, "learning_rate": 4.261169457848425e-05, "loss": 0.0003216588869690895, "step": 202260 }, { "epoch": 57.41413567981834, "grad_norm": 0.09584695100784302, "learning_rate": 4.260885608856089e-05, "loss": 0.00019048284739255905, "step": 202270 }, { "epoch": 57.4169741697417, "grad_norm": 0.033562690019607544, "learning_rate": 4.2606017598637525e-05, "loss": 0.00028961077332496644, "step": 202280 }, { "epoch": 57.419812659665055, "grad_norm": 0.015964601188898087, "learning_rate": 4.2603179108714166e-05, "loss": 0.0013031614944338799, "step": 202290 }, { "epoch": 57.42265114958842, "grad_norm": 0.014594079926609993, "learning_rate": 4.260034061879081e-05, "loss": 0.0015088697895407678, "step": 202300 }, { "epoch": 57.42548963951178, "grad_norm": 0.004304548725485802, "learning_rate": 4.259750212886744e-05, "loss": 0.0009609611704945564, "step": 202310 }, { "epoch": 57.428328129435144, "grad_norm": 0.3653506338596344, "learning_rate": 4.2594663638944084e-05, "loss": 0.0002501487731933594, "step": 202320 }, { "epoch": 57.4311666193585, "grad_norm": 4.366530895233154, "learning_rate": 4.2591825149020725e-05, "loss": 0.0010052198544144631, "step": 202330 }, { "epoch": 57.43400510928186, "grad_norm": 0.08402373641729355, "learning_rate": 4.258898665909736e-05, "loss": 0.00018087625503540039, "step": 202340 }, { "epoch": 57.436843599205226, "grad_norm": 1.4592713117599487, "learning_rate": 4.2586148169174e-05, "loss": 0.0013895301148295403, "step": 202350 }, { "epoch": 57.43968208912858, "grad_norm": 0.007248336914926767, "learning_rate": 4.2583309679250636e-05, "loss": 0.00016669072210788726, "step": 202360 }, { "epoch": 57.442520579051944, "grad_norm": 0.08554715663194656, "learning_rate": 4.2580471189327284e-05, "loss": 0.0020168283954262733, "step": 202370 }, { "epoch": 57.44535906897531, "grad_norm": 0.01099064014852047, "learning_rate": 4.257763269940392e-05, "loss": 0.00028739701956510545, "step": 202380 }, { "epoch": 57.44819755889866, "grad_norm": 0.1976761668920517, "learning_rate": 4.257479420948055e-05, "loss": 0.00018488690257072448, "step": 202390 }, { "epoch": 57.451036048822026, "grad_norm": 0.013988872058689594, "learning_rate": 4.25719557195572e-05, "loss": 0.008494452387094498, "step": 202400 }, { "epoch": 57.45387453874539, "grad_norm": 0.6067211627960205, "learning_rate": 4.2569117229633836e-05, "loss": 0.0007396716624498368, "step": 202410 }, { "epoch": 57.45671302866875, "grad_norm": 1.4673258066177368, "learning_rate": 4.256627873971048e-05, "loss": 0.0005999203771352768, "step": 202420 }, { "epoch": 57.45955151859211, "grad_norm": 0.028604434803128242, "learning_rate": 4.256344024978712e-05, "loss": 0.0002831580117344856, "step": 202430 }, { "epoch": 57.46239000851547, "grad_norm": 0.005595603957772255, "learning_rate": 4.256060175986375e-05, "loss": 0.0002671662718057632, "step": 202440 }, { "epoch": 57.46522849843883, "grad_norm": 7.103536605834961, "learning_rate": 4.2557763269940394e-05, "loss": 0.0037362542003393172, "step": 202450 }, { "epoch": 57.46806698836219, "grad_norm": 0.0361204594373703, "learning_rate": 4.255492478001703e-05, "loss": 0.0010911200195550918, "step": 202460 }, { "epoch": 57.47090547828555, "grad_norm": 0.07851731032133102, "learning_rate": 4.255208629009367e-05, "loss": 0.0020420026034116746, "step": 202470 }, { "epoch": 57.473743968208915, "grad_norm": 0.007717506028711796, "learning_rate": 4.254924780017031e-05, "loss": 0.002081305533647537, "step": 202480 }, { "epoch": 57.47658245813227, "grad_norm": 0.0015903301537036896, "learning_rate": 4.2546409310246946e-05, "loss": 0.00026048868894577024, "step": 202490 }, { "epoch": 57.47942094805563, "grad_norm": 4.0723419189453125, "learning_rate": 4.2543570820323595e-05, "loss": 0.0018754133954644204, "step": 202500 }, { "epoch": 57.47942094805563, "eval_accuracy": 0.9841673555032746, "eval_loss": 0.06699483096599579, "eval_runtime": 39.1207, "eval_samples_per_second": 402.012, "eval_steps_per_second": 6.288, "step": 202500 }, { "epoch": 57.482259437978996, "grad_norm": 0.006239387206733227, "learning_rate": 4.254073233040023e-05, "loss": 0.0008057959377765656, "step": 202510 }, { "epoch": 57.48509792790236, "grad_norm": 0.2653391659259796, "learning_rate": 4.2537893840476864e-05, "loss": 0.004606819897890091, "step": 202520 }, { "epoch": 57.487936417825715, "grad_norm": 0.03986141458153725, "learning_rate": 4.253505535055351e-05, "loss": 0.0019304567947983741, "step": 202530 }, { "epoch": 57.49077490774908, "grad_norm": 0.04033580794930458, "learning_rate": 4.2532216860630146e-05, "loss": 0.00398004911839962, "step": 202540 }, { "epoch": 57.49361339767244, "grad_norm": 0.005047224927693605, "learning_rate": 4.252937837070679e-05, "loss": 0.0007926151156425476, "step": 202550 }, { "epoch": 57.496451887595796, "grad_norm": 0.007797031197696924, "learning_rate": 4.252653988078342e-05, "loss": 0.00016236267983913422, "step": 202560 }, { "epoch": 57.49929037751916, "grad_norm": 0.01340277586132288, "learning_rate": 4.2523701390860064e-05, "loss": 0.0013141483068466187, "step": 202570 }, { "epoch": 57.50212886744252, "grad_norm": 0.09334974735975266, "learning_rate": 4.2520862900936705e-05, "loss": 0.004511886462569237, "step": 202580 }, { "epoch": 57.50496735736588, "grad_norm": 0.028889870271086693, "learning_rate": 4.251802441101334e-05, "loss": 0.00010369867086410523, "step": 202590 }, { "epoch": 57.50780584728924, "grad_norm": 0.10473625361919403, "learning_rate": 4.251518592108998e-05, "loss": 0.0006894109770655632, "step": 202600 }, { "epoch": 57.510644337212604, "grad_norm": 0.015022397972643375, "learning_rate": 4.251234743116662e-05, "loss": 0.0009259266778826713, "step": 202610 }, { "epoch": 57.51348282713597, "grad_norm": 0.012310651130974293, "learning_rate": 4.250950894124326e-05, "loss": 0.0019141806289553643, "step": 202620 }, { "epoch": 57.51632131705932, "grad_norm": 0.012752238661050797, "learning_rate": 4.2506670451319905e-05, "loss": 0.0008115783333778381, "step": 202630 }, { "epoch": 57.519159806982685, "grad_norm": 0.005237864330410957, "learning_rate": 4.250383196139654e-05, "loss": 0.0004089314490556717, "step": 202640 }, { "epoch": 57.52199829690605, "grad_norm": 0.5531271696090698, "learning_rate": 4.2500993471473175e-05, "loss": 0.0009759750217199325, "step": 202650 }, { "epoch": 57.524836786829404, "grad_norm": 2.1256115436553955, "learning_rate": 4.2498154981549816e-05, "loss": 0.0010030793026089668, "step": 202660 }, { "epoch": 57.52767527675277, "grad_norm": 0.05811072140932083, "learning_rate": 4.249531649162646e-05, "loss": 0.0015679756179451942, "step": 202670 }, { "epoch": 57.53051376667613, "grad_norm": 0.02122645080089569, "learning_rate": 4.24924780017031e-05, "loss": 0.0011475820094347, "step": 202680 }, { "epoch": 57.533352256599485, "grad_norm": 0.10629257559776306, "learning_rate": 4.248963951177973e-05, "loss": 0.00033167395740747453, "step": 202690 }, { "epoch": 57.53619074652285, "grad_norm": 0.02959279529750347, "learning_rate": 4.2486801021856375e-05, "loss": 0.0006155923008918762, "step": 202700 }, { "epoch": 57.53902923644621, "grad_norm": 0.0034460085444152355, "learning_rate": 4.2483962531933016e-05, "loss": 0.0004671933129429817, "step": 202710 }, { "epoch": 57.541867726369574, "grad_norm": 0.17742371559143066, "learning_rate": 4.248112404200965e-05, "loss": 0.0010149618610739708, "step": 202720 }, { "epoch": 57.54470621629293, "grad_norm": 0.31097930669784546, "learning_rate": 4.247828555208629e-05, "loss": 0.0076976239681243895, "step": 202730 }, { "epoch": 57.54754470621629, "grad_norm": 1.7786834239959717, "learning_rate": 4.247544706216293e-05, "loss": 0.0007954105734825134, "step": 202740 }, { "epoch": 57.550383196139656, "grad_norm": 0.02021031640470028, "learning_rate": 4.247260857223957e-05, "loss": 0.007048764079809189, "step": 202750 }, { "epoch": 57.55322168606301, "grad_norm": 2.5728049278259277, "learning_rate": 4.246977008231621e-05, "loss": 0.010003305971622467, "step": 202760 }, { "epoch": 57.556060175986374, "grad_norm": 1.270110011100769, "learning_rate": 4.246693159239285e-05, "loss": 0.008660735934972763, "step": 202770 }, { "epoch": 57.55889866590974, "grad_norm": 0.0041773454286158085, "learning_rate": 4.2464093102469485e-05, "loss": 0.002781110256910324, "step": 202780 }, { "epoch": 57.5617371558331, "grad_norm": 19.425798416137695, "learning_rate": 4.246125461254613e-05, "loss": 0.008257406949996948, "step": 202790 }, { "epoch": 57.564575645756456, "grad_norm": 9.35728645324707, "learning_rate": 4.245841612262277e-05, "loss": 0.004942653700709343, "step": 202800 }, { "epoch": 57.56741413567982, "grad_norm": 5.300628185272217, "learning_rate": 4.24555776326994e-05, "loss": 0.0031262658536434173, "step": 202810 }, { "epoch": 57.57025262560318, "grad_norm": 0.9297796487808228, "learning_rate": 4.2452739142776044e-05, "loss": 0.002285885252058506, "step": 202820 }, { "epoch": 57.57309111552654, "grad_norm": 2.7575302124023438, "learning_rate": 4.2449900652852685e-05, "loss": 0.0013570643961429596, "step": 202830 }, { "epoch": 57.5759296054499, "grad_norm": 0.109645314514637, "learning_rate": 4.244706216292933e-05, "loss": 0.0009970953688025475, "step": 202840 }, { "epoch": 57.57876809537326, "grad_norm": 0.10115757584571838, "learning_rate": 4.244422367300596e-05, "loss": 0.00013093184679746628, "step": 202850 }, { "epoch": 57.58160658529662, "grad_norm": 0.12285703420639038, "learning_rate": 4.24413851830826e-05, "loss": 0.0028287680819630624, "step": 202860 }, { "epoch": 57.58444507521998, "grad_norm": 0.043122414499521255, "learning_rate": 4.2438546693159244e-05, "loss": 0.01212909072637558, "step": 202870 }, { "epoch": 57.587283565143345, "grad_norm": 0.015341403894126415, "learning_rate": 4.243570820323588e-05, "loss": 0.0034239225089550017, "step": 202880 }, { "epoch": 57.59012205506671, "grad_norm": 0.2726324796676636, "learning_rate": 4.243286971331252e-05, "loss": 0.0002864183858036995, "step": 202890 }, { "epoch": 57.59296054499006, "grad_norm": 0.14105606079101562, "learning_rate": 4.243003122338916e-05, "loss": 0.00023011360317468644, "step": 202900 }, { "epoch": 57.595799034913426, "grad_norm": 0.6390876173973083, "learning_rate": 4.2427192733465796e-05, "loss": 0.0004533145576715469, "step": 202910 }, { "epoch": 57.59863752483679, "grad_norm": 0.0020715859718620777, "learning_rate": 4.242435424354244e-05, "loss": 0.0006755325943231582, "step": 202920 }, { "epoch": 57.601476014760145, "grad_norm": 0.1919023096561432, "learning_rate": 4.242151575361908e-05, "loss": 0.00015293098986148834, "step": 202930 }, { "epoch": 57.60431450468351, "grad_norm": 0.36147764325141907, "learning_rate": 4.241867726369571e-05, "loss": 0.00017043258994817734, "step": 202940 }, { "epoch": 57.60715299460687, "grad_norm": 0.03040226548910141, "learning_rate": 4.2415838773772355e-05, "loss": 0.00017628241330385207, "step": 202950 }, { "epoch": 57.60999148453023, "grad_norm": 0.0040245456621050835, "learning_rate": 4.2413000283848996e-05, "loss": 0.0002841118723154068, "step": 202960 }, { "epoch": 57.61282997445359, "grad_norm": 0.08078529685735703, "learning_rate": 4.241016179392564e-05, "loss": 0.0005389606580138206, "step": 202970 }, { "epoch": 57.61566846437695, "grad_norm": 0.021495318040251732, "learning_rate": 4.240732330400227e-05, "loss": 0.00018292833119630813, "step": 202980 }, { "epoch": 57.618506954300315, "grad_norm": 0.029706167057156563, "learning_rate": 4.240448481407891e-05, "loss": 0.0005054650828242302, "step": 202990 }, { "epoch": 57.62134544422367, "grad_norm": 0.01108373049646616, "learning_rate": 4.2401646324155555e-05, "loss": 0.0030164815485477447, "step": 203000 }, { "epoch": 57.62134544422367, "eval_accuracy": 0.9836586761620144, "eval_loss": 0.07125885039567947, "eval_runtime": 37.6284, "eval_samples_per_second": 417.955, "eval_steps_per_second": 6.538, "step": 203000 }, { "epoch": 57.624183934147034, "grad_norm": 0.015712600201368332, "learning_rate": 4.239880783423219e-05, "loss": 0.00026429686695337293, "step": 203010 }, { "epoch": 57.6270224240704, "grad_norm": 0.7159900665283203, "learning_rate": 4.239596934430883e-05, "loss": 0.0005341419950127601, "step": 203020 }, { "epoch": 57.62986091399375, "grad_norm": 0.08473460376262665, "learning_rate": 4.239313085438547e-05, "loss": 0.0005368592217564582, "step": 203030 }, { "epoch": 57.632699403917115, "grad_norm": 0.6088956594467163, "learning_rate": 4.239029236446211e-05, "loss": 0.000510069914162159, "step": 203040 }, { "epoch": 57.63553789384048, "grad_norm": 0.029369834810495377, "learning_rate": 4.238745387453875e-05, "loss": 0.0013591691851615906, "step": 203050 }, { "epoch": 57.63837638376384, "grad_norm": 0.786099374294281, "learning_rate": 4.238461538461539e-05, "loss": 0.00042926836758852007, "step": 203060 }, { "epoch": 57.6412148736872, "grad_norm": 0.4283079504966736, "learning_rate": 4.238206074368436e-05, "loss": 0.004772941768169403, "step": 203070 }, { "epoch": 57.64405336361056, "grad_norm": 0.005562254227697849, "learning_rate": 4.2379222253761004e-05, "loss": 0.00036058593541383744, "step": 203080 }, { "epoch": 57.64689185353392, "grad_norm": 0.08390610665082932, "learning_rate": 4.237638376383764e-05, "loss": 0.00019658226519823075, "step": 203090 }, { "epoch": 57.64973034345728, "grad_norm": 0.06524210423231125, "learning_rate": 4.237354527391428e-05, "loss": 0.00018549319356679915, "step": 203100 }, { "epoch": 57.65256883338064, "grad_norm": 0.09091628342866898, "learning_rate": 4.237070678399092e-05, "loss": 0.00017567910254001617, "step": 203110 }, { "epoch": 57.655407323304004, "grad_norm": 0.005736266728490591, "learning_rate": 4.2367868294067556e-05, "loss": 0.0004236845299601555, "step": 203120 }, { "epoch": 57.65824581322736, "grad_norm": 0.03188026323914528, "learning_rate": 4.23650298041442e-05, "loss": 0.00021999720484018327, "step": 203130 }, { "epoch": 57.66108430315072, "grad_norm": 0.058857034891843796, "learning_rate": 4.236219131422084e-05, "loss": 0.00019069388508796692, "step": 203140 }, { "epoch": 57.663922793074086, "grad_norm": 0.07989774644374847, "learning_rate": 4.235935282429748e-05, "loss": 0.00021582040935754775, "step": 203150 }, { "epoch": 57.66676128299745, "grad_norm": 0.01233550813049078, "learning_rate": 4.2356514334374115e-05, "loss": 4.250742495059967e-05, "step": 203160 }, { "epoch": 57.669599772920805, "grad_norm": 0.07226639240980148, "learning_rate": 4.235367584445075e-05, "loss": 7.202010601758957e-05, "step": 203170 }, { "epoch": 57.67243826284417, "grad_norm": 0.014648223295807838, "learning_rate": 4.23508373545274e-05, "loss": 0.00024143811315298082, "step": 203180 }, { "epoch": 57.67527675276753, "grad_norm": 0.006735254544764757, "learning_rate": 4.234799886460403e-05, "loss": 0.00014146212488412856, "step": 203190 }, { "epoch": 57.678115242690886, "grad_norm": 5.762024879455566, "learning_rate": 4.2345160374680673e-05, "loss": 0.001677645742893219, "step": 203200 }, { "epoch": 57.68095373261425, "grad_norm": 0.01090710237622261, "learning_rate": 4.2342321884757315e-05, "loss": 0.0001121658831834793, "step": 203210 }, { "epoch": 57.68379222253761, "grad_norm": 0.08700192719697952, "learning_rate": 4.233948339483395e-05, "loss": 0.0005599252879619598, "step": 203220 }, { "epoch": 57.68663071246097, "grad_norm": 0.1482071429491043, "learning_rate": 4.233664490491059e-05, "loss": 0.00010366290807723999, "step": 203230 }, { "epoch": 57.68946920238433, "grad_norm": 0.14142973721027374, "learning_rate": 4.233380641498723e-05, "loss": 0.000244341604411602, "step": 203240 }, { "epoch": 57.69230769230769, "grad_norm": 0.021300548687577248, "learning_rate": 4.233096792506387e-05, "loss": 0.0002038903534412384, "step": 203250 }, { "epoch": 57.695146182231056, "grad_norm": 0.002170736202970147, "learning_rate": 4.232812943514051e-05, "loss": 0.0008296594023704529, "step": 203260 }, { "epoch": 57.69798467215441, "grad_norm": 0.07954013347625732, "learning_rate": 4.232529094521714e-05, "loss": 0.0009273150935769081, "step": 203270 }, { "epoch": 57.700823162077775, "grad_norm": 0.13100074231624603, "learning_rate": 4.2322452455293784e-05, "loss": 0.009188656508922578, "step": 203280 }, { "epoch": 57.70366165200114, "grad_norm": 0.004010801203548908, "learning_rate": 4.2319613965370426e-05, "loss": 0.00020463839173316954, "step": 203290 }, { "epoch": 57.706500141924494, "grad_norm": 0.07685814797878265, "learning_rate": 4.231677547544706e-05, "loss": 0.003975810110569, "step": 203300 }, { "epoch": 57.70933863184786, "grad_norm": 15.143173217773438, "learning_rate": 4.231393698552371e-05, "loss": 0.004408398270606994, "step": 203310 }, { "epoch": 57.71217712177122, "grad_norm": 0.00857956800609827, "learning_rate": 4.231109849560034e-05, "loss": 0.00015250295400619506, "step": 203320 }, { "epoch": 57.715015611694575, "grad_norm": 0.019729234278202057, "learning_rate": 4.230826000567698e-05, "loss": 0.0003301497548818588, "step": 203330 }, { "epoch": 57.71785410161794, "grad_norm": 0.0028597244527190924, "learning_rate": 4.2305421515753626e-05, "loss": 0.00028910897672176363, "step": 203340 }, { "epoch": 57.7206925915413, "grad_norm": 0.7940860390663147, "learning_rate": 4.230258302583026e-05, "loss": 0.0005207385867834092, "step": 203350 }, { "epoch": 57.723531081464664, "grad_norm": 0.016808509826660156, "learning_rate": 4.22997445359069e-05, "loss": 0.0002978844568133354, "step": 203360 }, { "epoch": 57.72636957138802, "grad_norm": 0.016991982236504555, "learning_rate": 4.2296906045983536e-05, "loss": 0.00047624781727790834, "step": 203370 }, { "epoch": 57.72920806131138, "grad_norm": 0.027873851358890533, "learning_rate": 4.229406755606018e-05, "loss": 0.00012164190411567688, "step": 203380 }, { "epoch": 57.732046551234745, "grad_norm": 0.04021799564361572, "learning_rate": 4.229122906613682e-05, "loss": 0.00014433357864618301, "step": 203390 }, { "epoch": 57.7348850411581, "grad_norm": 0.2537989318370819, "learning_rate": 4.2288390576213454e-05, "loss": 0.00020823907107114792, "step": 203400 }, { "epoch": 57.737723531081464, "grad_norm": 0.013828873634338379, "learning_rate": 4.2285552086290095e-05, "loss": 0.0001482456922531128, "step": 203410 }, { "epoch": 57.74056202100483, "grad_norm": 0.7408146262168884, "learning_rate": 4.2282713596366736e-05, "loss": 0.0013576384633779525, "step": 203420 }, { "epoch": 57.74340051092818, "grad_norm": 0.004475674591958523, "learning_rate": 4.227987510644337e-05, "loss": 0.0015964491292834281, "step": 203430 }, { "epoch": 57.746239000851546, "grad_norm": 15.331304550170898, "learning_rate": 4.227703661652002e-05, "loss": 0.008864612132310868, "step": 203440 }, { "epoch": 57.74907749077491, "grad_norm": 0.08740225434303284, "learning_rate": 4.2274198126596654e-05, "loss": 0.0005367999896407127, "step": 203450 }, { "epoch": 57.75191598069827, "grad_norm": 0.009458662010729313, "learning_rate": 4.227135963667329e-05, "loss": 9.627286344766617e-05, "step": 203460 }, { "epoch": 57.75475447062163, "grad_norm": 0.1032077819108963, "learning_rate": 4.226852114674993e-05, "loss": 0.000304223969578743, "step": 203470 }, { "epoch": 57.75759296054499, "grad_norm": 0.2992275059223175, "learning_rate": 4.226568265682657e-05, "loss": 0.00047272369265556337, "step": 203480 }, { "epoch": 57.76043145046835, "grad_norm": 0.027655070647597313, "learning_rate": 4.226284416690321e-05, "loss": 0.0016506019979715346, "step": 203490 }, { "epoch": 57.76326994039171, "grad_norm": 0.017761556431651115, "learning_rate": 4.226000567697985e-05, "loss": 0.00017746072262525558, "step": 203500 }, { "epoch": 57.76326994039171, "eval_accuracy": 0.9839130158326445, "eval_loss": 0.06675492972135544, "eval_runtime": 46.2182, "eval_samples_per_second": 340.277, "eval_steps_per_second": 5.323, "step": 203500 }, { "epoch": 57.76610843031507, "grad_norm": 0.45094048976898193, "learning_rate": 4.225716718705649e-05, "loss": 0.0004664303734898567, "step": 203510 }, { "epoch": 57.768946920238434, "grad_norm": 0.18102285265922546, "learning_rate": 4.225432869713313e-05, "loss": 0.00017662160098552704, "step": 203520 }, { "epoch": 57.7717854101618, "grad_norm": 0.02508150041103363, "learning_rate": 4.2251490207209764e-05, "loss": 0.001357259787619114, "step": 203530 }, { "epoch": 57.77462390008515, "grad_norm": 0.03552192822098732, "learning_rate": 4.2248651717286406e-05, "loss": 0.0019998835399746896, "step": 203540 }, { "epoch": 57.777462390008516, "grad_norm": 0.020072611048817635, "learning_rate": 4.224581322736305e-05, "loss": 0.0010696835815906525, "step": 203550 }, { "epoch": 57.78030087993188, "grad_norm": 0.051287829875946045, "learning_rate": 4.224297473743968e-05, "loss": 0.00022164154797792434, "step": 203560 }, { "epoch": 57.783139369855235, "grad_norm": 0.008236619643867016, "learning_rate": 4.224013624751632e-05, "loss": 0.00030083488672971725, "step": 203570 }, { "epoch": 57.7859778597786, "grad_norm": 0.052282288670539856, "learning_rate": 4.2237297757592964e-05, "loss": 0.0030819982290267943, "step": 203580 }, { "epoch": 57.78881634970196, "grad_norm": 0.07736941426992416, "learning_rate": 4.22344592676696e-05, "loss": 0.01344061940908432, "step": 203590 }, { "epoch": 57.791654839625316, "grad_norm": 0.034134067595005035, "learning_rate": 4.223162077774624e-05, "loss": 0.00015809591859579087, "step": 203600 }, { "epoch": 57.79449332954868, "grad_norm": 0.06208336353302002, "learning_rate": 4.222878228782288e-05, "loss": 0.001151539571583271, "step": 203610 }, { "epoch": 57.79733181947204, "grad_norm": 0.06317959725856781, "learning_rate": 4.222594379789952e-05, "loss": 0.00010071750730276108, "step": 203620 }, { "epoch": 57.800170309395405, "grad_norm": 0.09918051213026047, "learning_rate": 4.222310530797616e-05, "loss": 0.0005573194473981857, "step": 203630 }, { "epoch": 57.80300879931876, "grad_norm": 0.01522153615951538, "learning_rate": 4.22202668180528e-05, "loss": 0.0013016566634178161, "step": 203640 }, { "epoch": 57.805847289242124, "grad_norm": 1.429951786994934, "learning_rate": 4.221742832812944e-05, "loss": 0.00030874814838171004, "step": 203650 }, { "epoch": 57.80868577916549, "grad_norm": 0.00763853220269084, "learning_rate": 4.2214589838206075e-05, "loss": 0.00015529301017522812, "step": 203660 }, { "epoch": 57.81152426908884, "grad_norm": 0.009625235572457314, "learning_rate": 4.2211751348282716e-05, "loss": 0.00700499564409256, "step": 203670 }, { "epoch": 57.814362759012205, "grad_norm": 0.08778926730155945, "learning_rate": 4.220891285835936e-05, "loss": 0.001986737176775932, "step": 203680 }, { "epoch": 57.81720124893557, "grad_norm": 0.08397795259952545, "learning_rate": 4.220607436843599e-05, "loss": 0.0007229529321193696, "step": 203690 }, { "epoch": 57.820039738858924, "grad_norm": 0.006949938368052244, "learning_rate": 4.2203235878512634e-05, "loss": 9.281337261199952e-05, "step": 203700 }, { "epoch": 57.82287822878229, "grad_norm": 0.0993160605430603, "learning_rate": 4.2200397388589275e-05, "loss": 0.006746414303779602, "step": 203710 }, { "epoch": 57.82571671870565, "grad_norm": 2.083768606185913, "learning_rate": 4.219755889866591e-05, "loss": 0.0008499689400196075, "step": 203720 }, { "epoch": 57.82855520862901, "grad_norm": 0.02971445769071579, "learning_rate": 4.219472040874255e-05, "loss": 9.366553276777268e-05, "step": 203730 }, { "epoch": 57.83139369855237, "grad_norm": 3.050022840499878, "learning_rate": 4.219188191881919e-05, "loss": 0.0009065745398402214, "step": 203740 }, { "epoch": 57.83423218847573, "grad_norm": 0.005243698135018349, "learning_rate": 4.218904342889583e-05, "loss": 0.00506092756986618, "step": 203750 }, { "epoch": 57.837070678399094, "grad_norm": 0.010429153218865395, "learning_rate": 4.218620493897247e-05, "loss": 0.001139805279672146, "step": 203760 }, { "epoch": 57.83990916832245, "grad_norm": 0.014278383925557137, "learning_rate": 4.21833664490491e-05, "loss": 0.00032329782843589785, "step": 203770 }, { "epoch": 57.84274765824581, "grad_norm": 0.30297014117240906, "learning_rate": 4.218052795912575e-05, "loss": 0.0002933071926236153, "step": 203780 }, { "epoch": 57.845586148169176, "grad_norm": 0.015834655612707138, "learning_rate": 4.2177689469202386e-05, "loss": 0.0013052146881818772, "step": 203790 }, { "epoch": 57.84842463809253, "grad_norm": 0.018174616619944572, "learning_rate": 4.217485097927902e-05, "loss": 0.00021954327821731566, "step": 203800 }, { "epoch": 57.851263128015894, "grad_norm": 0.22122371196746826, "learning_rate": 4.217201248935567e-05, "loss": 0.006826517730951309, "step": 203810 }, { "epoch": 57.85410161793926, "grad_norm": 0.06126975268125534, "learning_rate": 4.21691739994323e-05, "loss": 0.0046892926096916195, "step": 203820 }, { "epoch": 57.85694010786262, "grad_norm": 2.5355236530303955, "learning_rate": 4.2166335509508945e-05, "loss": 0.0006275711581110955, "step": 203830 }, { "epoch": 57.859778597785976, "grad_norm": 9.399971961975098, "learning_rate": 4.2163497019585586e-05, "loss": 0.0016947995871305465, "step": 203840 }, { "epoch": 57.86261708770934, "grad_norm": 0.015573473647236824, "learning_rate": 4.216065852966222e-05, "loss": 0.0036891497671604156, "step": 203850 }, { "epoch": 57.8654555776327, "grad_norm": 0.02475043758749962, "learning_rate": 4.215782003973886e-05, "loss": 0.004684619605541229, "step": 203860 }, { "epoch": 57.86829406755606, "grad_norm": 0.12593543529510498, "learning_rate": 4.2154981549815497e-05, "loss": 0.0008407196030020714, "step": 203870 }, { "epoch": 57.87113255747942, "grad_norm": 0.2400837391614914, "learning_rate": 4.215214305989214e-05, "loss": 0.002679497003555298, "step": 203880 }, { "epoch": 57.87397104740278, "grad_norm": 0.0179639533162117, "learning_rate": 4.214930456996878e-05, "loss": 0.0014939969405531883, "step": 203890 }, { "epoch": 57.876809537326146, "grad_norm": 0.5118404626846313, "learning_rate": 4.2146466080045414e-05, "loss": 0.00020773597061634063, "step": 203900 }, { "epoch": 57.8796480272495, "grad_norm": 0.14373400807380676, "learning_rate": 4.214362759012206e-05, "loss": 0.00010704528540372849, "step": 203910 }, { "epoch": 57.882486517172865, "grad_norm": 0.007298670243471861, "learning_rate": 4.21407891001987e-05, "loss": 0.0008625131100416184, "step": 203920 }, { "epoch": 57.88532500709623, "grad_norm": 0.4244762659072876, "learning_rate": 4.213795061027533e-05, "loss": 0.000239652581512928, "step": 203930 }, { "epoch": 57.88816349701958, "grad_norm": 0.7429831027984619, "learning_rate": 4.213511212035198e-05, "loss": 0.003645344823598862, "step": 203940 }, { "epoch": 57.891001986942946, "grad_norm": 4.380570411682129, "learning_rate": 4.2132273630428614e-05, "loss": 0.0016321493312716484, "step": 203950 }, { "epoch": 57.89384047686631, "grad_norm": 0.01716320589184761, "learning_rate": 4.2129435140505255e-05, "loss": 0.0002130618318915367, "step": 203960 }, { "epoch": 57.896678966789665, "grad_norm": 0.2513074278831482, "learning_rate": 4.212659665058189e-05, "loss": 0.00024190787225961686, "step": 203970 }, { "epoch": 57.89951745671303, "grad_norm": 0.13880851864814758, "learning_rate": 4.212375816065853e-05, "loss": 0.00026567578315734864, "step": 203980 }, { "epoch": 57.90235594663639, "grad_norm": 0.033847980201244354, "learning_rate": 4.212091967073517e-05, "loss": 0.0003432856872677803, "step": 203990 }, { "epoch": 57.90519443655975, "grad_norm": 0.015934962779283524, "learning_rate": 4.211808118081181e-05, "loss": 0.0005915133282542228, "step": 204000 }, { "epoch": 57.90519443655975, "eval_accuracy": 0.9827049023971514, "eval_loss": 0.06984560191631317, "eval_runtime": 49.1707, "eval_samples_per_second": 319.845, "eval_steps_per_second": 5.003, "step": 204000 }, { "epoch": 57.90803292648311, "grad_norm": 0.009725905954837799, "learning_rate": 4.211524269088845e-05, "loss": 0.014453896880149841, "step": 204010 }, { "epoch": 57.91087141640647, "grad_norm": 0.006037051323801279, "learning_rate": 4.211240420096509e-05, "loss": 0.006601151823997497, "step": 204020 }, { "epoch": 57.913709906329835, "grad_norm": 0.03662240877747536, "learning_rate": 4.2109565711041725e-05, "loss": 0.01169254183769226, "step": 204030 }, { "epoch": 57.91654839625319, "grad_norm": 0.05253510922193527, "learning_rate": 4.210672722111837e-05, "loss": 0.001469518058001995, "step": 204040 }, { "epoch": 57.919386886176554, "grad_norm": 0.0061020925641059875, "learning_rate": 4.210388873119501e-05, "loss": 0.00041989516466856003, "step": 204050 }, { "epoch": 57.92222537609992, "grad_norm": 0.013830959796905518, "learning_rate": 4.210105024127164e-05, "loss": 0.00465863049030304, "step": 204060 }, { "epoch": 57.92506386602327, "grad_norm": 0.0124836266040802, "learning_rate": 4.209821175134828e-05, "loss": 0.0031386643648147585, "step": 204070 }, { "epoch": 57.927902355946635, "grad_norm": 6.5296549797058105, "learning_rate": 4.2095373261424925e-05, "loss": 0.0028921248391270637, "step": 204080 }, { "epoch": 57.93074084587, "grad_norm": 0.008454585447907448, "learning_rate": 4.2092534771501566e-05, "loss": 0.00012193936854600906, "step": 204090 }, { "epoch": 57.93357933579336, "grad_norm": 0.024127807468175888, "learning_rate": 4.20896962815782e-05, "loss": 0.00019901115447282792, "step": 204100 }, { "epoch": 57.93641782571672, "grad_norm": 0.010838376358151436, "learning_rate": 4.208685779165484e-05, "loss": 0.00010701734572649002, "step": 204110 }, { "epoch": 57.93925631564008, "grad_norm": 0.44912129640579224, "learning_rate": 4.2084019301731483e-05, "loss": 0.00017727818340063095, "step": 204120 }, { "epoch": 57.94209480556344, "grad_norm": 0.007221502717584372, "learning_rate": 4.208118081180812e-05, "loss": 0.00022831875830888748, "step": 204130 }, { "epoch": 57.9449332954868, "grad_norm": 0.021240737289190292, "learning_rate": 4.207834232188476e-05, "loss": 0.0010749265551567079, "step": 204140 }, { "epoch": 57.94777178541016, "grad_norm": 0.01344070490449667, "learning_rate": 4.20755038319614e-05, "loss": 0.00027344990521669386, "step": 204150 }, { "epoch": 57.950610275333524, "grad_norm": 0.04256868362426758, "learning_rate": 4.2072665342038035e-05, "loss": 0.0006231257691979408, "step": 204160 }, { "epoch": 57.95344876525688, "grad_norm": 0.04533788189291954, "learning_rate": 4.206982685211468e-05, "loss": 0.00012459829449653625, "step": 204170 }, { "epoch": 57.95628725518024, "grad_norm": 0.08262239396572113, "learning_rate": 4.206698836219132e-05, "loss": 0.002105170302093029, "step": 204180 }, { "epoch": 57.959125745103606, "grad_norm": 0.3402833938598633, "learning_rate": 4.206414987226795e-05, "loss": 0.0004901552572846412, "step": 204190 }, { "epoch": 57.96196423502697, "grad_norm": 0.0179071556776762, "learning_rate": 4.2061311382344594e-05, "loss": 0.0027936911210417747, "step": 204200 }, { "epoch": 57.964802724950324, "grad_norm": 0.012182429432868958, "learning_rate": 4.2058472892421236e-05, "loss": 0.0066946461796760556, "step": 204210 }, { "epoch": 57.96764121487369, "grad_norm": 0.04969494044780731, "learning_rate": 4.205563440249787e-05, "loss": 0.00020987670868635177, "step": 204220 }, { "epoch": 57.97047970479705, "grad_norm": 1.097992181777954, "learning_rate": 4.205279591257451e-05, "loss": 0.0003207802772521973, "step": 204230 }, { "epoch": 57.973318194720406, "grad_norm": 0.0022251338232308626, "learning_rate": 4.204995742265115e-05, "loss": 0.0008688408881425858, "step": 204240 }, { "epoch": 57.97615668464377, "grad_norm": 0.047018587589263916, "learning_rate": 4.2047118932727794e-05, "loss": 0.002119891718029976, "step": 204250 }, { "epoch": 57.97899517456713, "grad_norm": 0.7453769445419312, "learning_rate": 4.204428044280443e-05, "loss": 0.002792319282889366, "step": 204260 }, { "epoch": 57.981833664490495, "grad_norm": 0.022239092737436295, "learning_rate": 4.2041441952881063e-05, "loss": 0.0003782855346798897, "step": 204270 }, { "epoch": 57.98467215441385, "grad_norm": 0.24206700921058655, "learning_rate": 4.203860346295771e-05, "loss": 0.0020780937746167185, "step": 204280 }, { "epoch": 57.98751064433721, "grad_norm": 2.3478569984436035, "learning_rate": 4.2035764973034346e-05, "loss": 0.0003379948437213898, "step": 204290 }, { "epoch": 57.990349134260576, "grad_norm": 0.017278388142585754, "learning_rate": 4.203292648311099e-05, "loss": 0.0005168667063117027, "step": 204300 }, { "epoch": 57.99318762418393, "grad_norm": 0.04423820599913597, "learning_rate": 4.203008799318763e-05, "loss": 0.006134023517370224, "step": 204310 }, { "epoch": 57.996026114107295, "grad_norm": 0.48115724325180054, "learning_rate": 4.2027249503264264e-05, "loss": 0.00038707200437784197, "step": 204320 }, { "epoch": 57.99886460403066, "grad_norm": 0.3982360064983368, "learning_rate": 4.2024411013340905e-05, "loss": 0.00014454834163188935, "step": 204330 }, { "epoch": 58.00170309395401, "grad_norm": 0.014609374105930328, "learning_rate": 4.2021572523417546e-05, "loss": 0.00484803095459938, "step": 204340 }, { "epoch": 58.004541583877376, "grad_norm": 0.004720091354101896, "learning_rate": 4.201873403349418e-05, "loss": 0.0011188682168722154, "step": 204350 }, { "epoch": 58.00738007380074, "grad_norm": 0.04944813624024391, "learning_rate": 4.201589554357082e-05, "loss": 0.008696547150611878, "step": 204360 }, { "epoch": 58.0102185637241, "grad_norm": 0.04251408204436302, "learning_rate": 4.201305705364746e-05, "loss": 0.0001962706446647644, "step": 204370 }, { "epoch": 58.01305705364746, "grad_norm": 0.0041936528868973255, "learning_rate": 4.2010218563724105e-05, "loss": 0.0004284117370843887, "step": 204380 }, { "epoch": 58.01589554357082, "grad_norm": 5.875022888183594, "learning_rate": 4.200738007380074e-05, "loss": 0.0045247931033372876, "step": 204390 }, { "epoch": 58.018734033494184, "grad_norm": 0.09876479208469391, "learning_rate": 4.2004541583877374e-05, "loss": 0.001708426885306835, "step": 204400 }, { "epoch": 58.02157252341754, "grad_norm": 0.2148202806711197, "learning_rate": 4.200170309395402e-05, "loss": 0.006711287796497345, "step": 204410 }, { "epoch": 58.0244110133409, "grad_norm": 0.056353602558374405, "learning_rate": 4.199886460403066e-05, "loss": 0.00024582128971815107, "step": 204420 }, { "epoch": 58.027249503264265, "grad_norm": 0.03376562520861626, "learning_rate": 4.19960261141073e-05, "loss": 0.0003696959465742111, "step": 204430 }, { "epoch": 58.03008799318762, "grad_norm": 0.01804240234196186, "learning_rate": 4.199318762418394e-05, "loss": 0.0006461232900619507, "step": 204440 }, { "epoch": 58.032926483110984, "grad_norm": 2.7686243057250977, "learning_rate": 4.1990349134260574e-05, "loss": 0.008172833919525146, "step": 204450 }, { "epoch": 58.03576497303435, "grad_norm": 0.012485534884035587, "learning_rate": 4.1987510644337216e-05, "loss": 0.00032588206231594087, "step": 204460 }, { "epoch": 58.03860346295771, "grad_norm": 0.7644084095954895, "learning_rate": 4.198467215441386e-05, "loss": 0.00040119923651218416, "step": 204470 }, { "epoch": 58.041441952881065, "grad_norm": 0.007969729602336884, "learning_rate": 4.198183366449049e-05, "loss": 8.038021624088288e-05, "step": 204480 }, { "epoch": 58.04428044280443, "grad_norm": 0.01118133682757616, "learning_rate": 4.197899517456713e-05, "loss": 0.0006231948733329773, "step": 204490 }, { "epoch": 58.04711893272779, "grad_norm": 0.003313856665045023, "learning_rate": 4.197615668464377e-05, "loss": 0.0013464104384183883, "step": 204500 }, { "epoch": 58.04711893272779, "eval_accuracy": 0.981941883385261, "eval_loss": 0.07285170257091522, "eval_runtime": 38.4084, "eval_samples_per_second": 409.468, "eval_steps_per_second": 6.405, "step": 204500 }, { "epoch": 58.04995742265115, "grad_norm": 0.007787053007632494, "learning_rate": 4.1973318194720416e-05, "loss": 0.0017691485583782196, "step": 204510 }, { "epoch": 58.05279591257451, "grad_norm": 0.014412426389753819, "learning_rate": 4.197047970479705e-05, "loss": 0.00014282409101724626, "step": 204520 }, { "epoch": 58.05563440249787, "grad_norm": 0.01822320930659771, "learning_rate": 4.1967641214873685e-05, "loss": 0.003937309980392456, "step": 204530 }, { "epoch": 58.05847289242123, "grad_norm": 0.0028957969043403864, "learning_rate": 4.196480272495033e-05, "loss": 0.0010397227481007576, "step": 204540 }, { "epoch": 58.06131138234459, "grad_norm": 0.05166057124733925, "learning_rate": 4.196196423502697e-05, "loss": 0.0002224663272500038, "step": 204550 }, { "epoch": 58.064149872267954, "grad_norm": 0.019763192161917686, "learning_rate": 4.195912574510361e-05, "loss": 0.0011306118220090866, "step": 204560 }, { "epoch": 58.06698836219132, "grad_norm": 0.5204048156738281, "learning_rate": 4.195628725518025e-05, "loss": 0.010461674630641937, "step": 204570 }, { "epoch": 58.06982685211467, "grad_norm": 0.8840888738632202, "learning_rate": 4.1953448765256885e-05, "loss": 0.006362476199865341, "step": 204580 }, { "epoch": 58.072665342038036, "grad_norm": 0.22537623345851898, "learning_rate": 4.1950610275333526e-05, "loss": 0.003700114041566849, "step": 204590 }, { "epoch": 58.0755038319614, "grad_norm": 0.018554173409938812, "learning_rate": 4.194777178541016e-05, "loss": 0.0006221882998943329, "step": 204600 }, { "epoch": 58.078342321884755, "grad_norm": 0.1935230940580368, "learning_rate": 4.19449332954868e-05, "loss": 0.004090043902397156, "step": 204610 }, { "epoch": 58.08118081180812, "grad_norm": 0.03689482435584068, "learning_rate": 4.1942094805563444e-05, "loss": 0.00032245200127363206, "step": 204620 }, { "epoch": 58.08401930173148, "grad_norm": 0.005835431627929211, "learning_rate": 4.193925631564008e-05, "loss": 0.0006609981879591942, "step": 204630 }, { "epoch": 58.086857791654836, "grad_norm": 0.09874115884304047, "learning_rate": 4.193641782571672e-05, "loss": 7.767584174871445e-05, "step": 204640 }, { "epoch": 58.0896962815782, "grad_norm": 0.017990795895457268, "learning_rate": 4.193357933579336e-05, "loss": 0.00010504424571990966, "step": 204650 }, { "epoch": 58.09253477150156, "grad_norm": 1.0317108631134033, "learning_rate": 4.1930740845869996e-05, "loss": 0.00081552155315876, "step": 204660 }, { "epoch": 58.095373261424925, "grad_norm": 0.007205036003142595, "learning_rate": 4.1927902355946644e-05, "loss": 0.0015722692012786865, "step": 204670 }, { "epoch": 58.09821175134828, "grad_norm": 0.02487705647945404, "learning_rate": 4.192506386602328e-05, "loss": 0.0006824120879173279, "step": 204680 }, { "epoch": 58.10105024127164, "grad_norm": 0.08504097908735275, "learning_rate": 4.192222537609991e-05, "loss": 0.0016058724373579024, "step": 204690 }, { "epoch": 58.103888731195006, "grad_norm": 0.01226204913109541, "learning_rate": 4.1919386886176554e-05, "loss": 0.003087632916867733, "step": 204700 }, { "epoch": 58.10672722111836, "grad_norm": 0.8652583360671997, "learning_rate": 4.1916548396253196e-05, "loss": 0.012092893570661544, "step": 204710 }, { "epoch": 58.109565711041725, "grad_norm": 0.21987271308898926, "learning_rate": 4.191370990632984e-05, "loss": 0.00038542896509170533, "step": 204720 }, { "epoch": 58.11240420096509, "grad_norm": 0.06906081736087799, "learning_rate": 4.191087141640647e-05, "loss": 0.008192746341228485, "step": 204730 }, { "epoch": 58.11524269088845, "grad_norm": 2.061520576477051, "learning_rate": 4.190803292648311e-05, "loss": 0.0015306895598769187, "step": 204740 }, { "epoch": 58.11808118081181, "grad_norm": 0.6251250505447388, "learning_rate": 4.1905194436559755e-05, "loss": 0.0002821385860443115, "step": 204750 }, { "epoch": 58.12091967073517, "grad_norm": 0.052021417766809464, "learning_rate": 4.190235594663639e-05, "loss": 0.001352747529745102, "step": 204760 }, { "epoch": 58.12375816065853, "grad_norm": 0.007577709387987852, "learning_rate": 4.189951745671303e-05, "loss": 0.0008208589628338814, "step": 204770 }, { "epoch": 58.12659665058189, "grad_norm": 0.42864522337913513, "learning_rate": 4.189667896678967e-05, "loss": 0.0008409891277551651, "step": 204780 }, { "epoch": 58.12943514050525, "grad_norm": 2.4764277935028076, "learning_rate": 4.1893840476866306e-05, "loss": 0.0006575148552656173, "step": 204790 }, { "epoch": 58.132273630428614, "grad_norm": 0.009276926517486572, "learning_rate": 4.189100198694295e-05, "loss": 0.00019526053220033645, "step": 204800 }, { "epoch": 58.13511212035197, "grad_norm": 0.06408967077732086, "learning_rate": 4.188816349701959e-05, "loss": 0.002093321643769741, "step": 204810 }, { "epoch": 58.13795061027533, "grad_norm": 0.5544347763061523, "learning_rate": 4.1885325007096224e-05, "loss": 0.0023248100653290747, "step": 204820 }, { "epoch": 58.140789100198695, "grad_norm": 0.10763656347990036, "learning_rate": 4.1882486517172865e-05, "loss": 0.005840480700135231, "step": 204830 }, { "epoch": 58.14362759012206, "grad_norm": 0.0037287890445441008, "learning_rate": 4.1879648027249507e-05, "loss": 0.0009956980124115945, "step": 204840 }, { "epoch": 58.146466080045414, "grad_norm": 0.07216931879520416, "learning_rate": 4.187680953732615e-05, "loss": 0.0005460694432258606, "step": 204850 }, { "epoch": 58.14930456996878, "grad_norm": 0.0033426196314394474, "learning_rate": 4.187397104740278e-05, "loss": 0.0004671970382332802, "step": 204860 }, { "epoch": 58.15214305989214, "grad_norm": 0.03497885540127754, "learning_rate": 4.1871132557479424e-05, "loss": 0.0004595711827278137, "step": 204870 }, { "epoch": 58.154981549815496, "grad_norm": 0.004295795690268278, "learning_rate": 4.1868294067556065e-05, "loss": 0.00015981979668140412, "step": 204880 }, { "epoch": 58.15782003973886, "grad_norm": 0.2138180136680603, "learning_rate": 4.18654555776327e-05, "loss": 0.001340038515627384, "step": 204890 }, { "epoch": 58.16065852966222, "grad_norm": 0.05375741794705391, "learning_rate": 4.186261708770934e-05, "loss": 0.0003528803586959839, "step": 204900 }, { "epoch": 58.16349701958558, "grad_norm": 0.01251355279237032, "learning_rate": 4.185977859778598e-05, "loss": 0.0002212468534708023, "step": 204910 }, { "epoch": 58.16633550950894, "grad_norm": 0.03464754670858383, "learning_rate": 4.185694010786262e-05, "loss": 0.00016168579459190368, "step": 204920 }, { "epoch": 58.1691739994323, "grad_norm": 0.020966041833162308, "learning_rate": 4.185410161793926e-05, "loss": 0.00010698251426219941, "step": 204930 }, { "epoch": 58.172012489355666, "grad_norm": 0.009227864444255829, "learning_rate": 4.18512631280159e-05, "loss": 7.316544651985169e-05, "step": 204940 }, { "epoch": 58.17485097927902, "grad_norm": 0.012709129601716995, "learning_rate": 4.1848424638092535e-05, "loss": 0.00025119241327047347, "step": 204950 }, { "epoch": 58.177689469202384, "grad_norm": 1.4369302988052368, "learning_rate": 4.1845586148169176e-05, "loss": 0.00032895617187023163, "step": 204960 }, { "epoch": 58.18052795912575, "grad_norm": 0.02272634394466877, "learning_rate": 4.184274765824582e-05, "loss": 0.0003662155941128731, "step": 204970 }, { "epoch": 58.1833664490491, "grad_norm": 0.03162050619721413, "learning_rate": 4.183990916832245e-05, "loss": 0.00028894301503896714, "step": 204980 }, { "epoch": 58.186204938972466, "grad_norm": 1.1812448501586914, "learning_rate": 4.183707067839909e-05, "loss": 0.00035430155694484713, "step": 204990 }, { "epoch": 58.18904342889583, "grad_norm": 0.01792343147099018, "learning_rate": 4.183423218847573e-05, "loss": 0.0036519087851047518, "step": 205000 }, { "epoch": 58.18904342889583, "eval_accuracy": 0.9834043364913843, "eval_loss": 0.0674113780260086, "eval_runtime": 44.0965, "eval_samples_per_second": 356.649, "eval_steps_per_second": 5.579, "step": 205000 }, { "epoch": 58.191881918819185, "grad_norm": 0.029925743117928505, "learning_rate": 4.1831393698552376e-05, "loss": 0.0005703367292881012, "step": 205010 }, { "epoch": 58.19472040874255, "grad_norm": 0.0093675022944808, "learning_rate": 4.182855520862901e-05, "loss": 0.00038388129323720934, "step": 205020 }, { "epoch": 58.19755889866591, "grad_norm": 0.017377253621816635, "learning_rate": 4.182571671870565e-05, "loss": 0.0003394393250346184, "step": 205030 }, { "epoch": 58.20039738858927, "grad_norm": 0.013774787075817585, "learning_rate": 4.1822878228782293e-05, "loss": 0.0005149845033884049, "step": 205040 }, { "epoch": 58.20323587851263, "grad_norm": 0.23435036838054657, "learning_rate": 4.182003973885893e-05, "loss": 0.0007537446916103363, "step": 205050 }, { "epoch": 58.20607436843599, "grad_norm": 0.09139774739742279, "learning_rate": 4.181720124893557e-05, "loss": 0.0023949107155203818, "step": 205060 }, { "epoch": 58.208912858359355, "grad_norm": 0.03301401063799858, "learning_rate": 4.181436275901221e-05, "loss": 0.005078157782554627, "step": 205070 }, { "epoch": 58.21175134828271, "grad_norm": 0.002391914138570428, "learning_rate": 4.181180811808118e-05, "loss": 0.017702288925647736, "step": 205080 }, { "epoch": 58.214589838206074, "grad_norm": 0.2635616958141327, "learning_rate": 4.1808969628157825e-05, "loss": 0.00217257309705019, "step": 205090 }, { "epoch": 58.21742832812944, "grad_norm": 0.007323818746954203, "learning_rate": 4.180613113823446e-05, "loss": 0.009561610221862794, "step": 205100 }, { "epoch": 58.2202668180528, "grad_norm": 0.06545212119817734, "learning_rate": 4.18032926483111e-05, "loss": 0.00486823171377182, "step": 205110 }, { "epoch": 58.223105307976155, "grad_norm": 0.23934005200862885, "learning_rate": 4.180045415838774e-05, "loss": 0.0007272385060787201, "step": 205120 }, { "epoch": 58.22594379789952, "grad_norm": 0.4647933840751648, "learning_rate": 4.179761566846438e-05, "loss": 0.0010395411401987076, "step": 205130 }, { "epoch": 58.22878228782288, "grad_norm": 2.2753896713256836, "learning_rate": 4.179477717854102e-05, "loss": 0.00046726260334253313, "step": 205140 }, { "epoch": 58.23162077774624, "grad_norm": 1.7255712747573853, "learning_rate": 4.179193868861766e-05, "loss": 0.0009852660819888114, "step": 205150 }, { "epoch": 58.2344592676696, "grad_norm": 0.16914959251880646, "learning_rate": 4.1789100198694295e-05, "loss": 0.00690164566040039, "step": 205160 }, { "epoch": 58.23729775759296, "grad_norm": 0.02531939372420311, "learning_rate": 4.1786261708770936e-05, "loss": 0.00035314764827489855, "step": 205170 }, { "epoch": 58.24013624751632, "grad_norm": 12.328615188598633, "learning_rate": 4.178342321884757e-05, "loss": 0.008323120325803757, "step": 205180 }, { "epoch": 58.24297473743968, "grad_norm": 1.2588225603103638, "learning_rate": 4.178058472892422e-05, "loss": 0.0021214336156845093, "step": 205190 }, { "epoch": 58.245813227363044, "grad_norm": 0.005272527690976858, "learning_rate": 4.177774623900085e-05, "loss": 0.00035147611051797865, "step": 205200 }, { "epoch": 58.24865171728641, "grad_norm": 0.23246152698993683, "learning_rate": 4.177490774907749e-05, "loss": 0.0010761380195617675, "step": 205210 }, { "epoch": 58.25149020720976, "grad_norm": 0.18662580847740173, "learning_rate": 4.1772069259154136e-05, "loss": 0.0077549397945404054, "step": 205220 }, { "epoch": 58.254328697133126, "grad_norm": 0.05780060216784477, "learning_rate": 4.176923076923077e-05, "loss": 0.0002190183848142624, "step": 205230 }, { "epoch": 58.25716718705649, "grad_norm": 0.007901027798652649, "learning_rate": 4.176639227930741e-05, "loss": 0.0007043356075882911, "step": 205240 }, { "epoch": 58.260005676979844, "grad_norm": 0.11251160502433777, "learning_rate": 4.1763553789384053e-05, "loss": 0.00048215072602033617, "step": 205250 }, { "epoch": 58.26284416690321, "grad_norm": 0.16683025658130646, "learning_rate": 4.176071529946069e-05, "loss": 0.00021763816475868226, "step": 205260 }, { "epoch": 58.26568265682657, "grad_norm": 0.01084259431809187, "learning_rate": 4.175787680953733e-05, "loss": 0.00030349288135766983, "step": 205270 }, { "epoch": 58.268521146749926, "grad_norm": 0.06273460388183594, "learning_rate": 4.1755038319613964e-05, "loss": 0.0006123093888163566, "step": 205280 }, { "epoch": 58.27135963667329, "grad_norm": 0.04946700111031532, "learning_rate": 4.1752199829690605e-05, "loss": 0.0005493922159075737, "step": 205290 }, { "epoch": 58.27419812659665, "grad_norm": 0.0479201041162014, "learning_rate": 4.174936133976725e-05, "loss": 0.0008614597842097283, "step": 205300 }, { "epoch": 58.277036616520014, "grad_norm": 2.6028575897216797, "learning_rate": 4.174652284984388e-05, "loss": 0.0034229643642902374, "step": 205310 }, { "epoch": 58.27987510644337, "grad_norm": 0.0597568079829216, "learning_rate": 4.174368435992053e-05, "loss": 0.0013471111655235291, "step": 205320 }, { "epoch": 58.28271359636673, "grad_norm": 0.03462396562099457, "learning_rate": 4.1740845869997164e-05, "loss": 0.0010512370616197586, "step": 205330 }, { "epoch": 58.285552086290096, "grad_norm": 0.09809529781341553, "learning_rate": 4.17380073800738e-05, "loss": 0.0017828481271862985, "step": 205340 }, { "epoch": 58.28839057621345, "grad_norm": 0.28974446654319763, "learning_rate": 4.173516889015045e-05, "loss": 0.00272450614720583, "step": 205350 }, { "epoch": 58.291229066136815, "grad_norm": 2.663470506668091, "learning_rate": 4.173233040022708e-05, "loss": 0.0015726488083600998, "step": 205360 }, { "epoch": 58.29406755606018, "grad_norm": 0.009592766873538494, "learning_rate": 4.172949191030372e-05, "loss": 0.0005734635517001152, "step": 205370 }, { "epoch": 58.29690604598353, "grad_norm": 0.46052566170692444, "learning_rate": 4.172665342038036e-05, "loss": 0.00017244555056095123, "step": 205380 }, { "epoch": 58.299744535906896, "grad_norm": 0.00583167327567935, "learning_rate": 4.1723814930457e-05, "loss": 0.0027358245104551317, "step": 205390 }, { "epoch": 58.30258302583026, "grad_norm": 0.004385358653962612, "learning_rate": 4.172097644053364e-05, "loss": 0.0009457256644964218, "step": 205400 }, { "epoch": 58.30542151575362, "grad_norm": 0.021628104150295258, "learning_rate": 4.1718137950610275e-05, "loss": 0.0058965303003788, "step": 205410 }, { "epoch": 58.30826000567698, "grad_norm": 2.812877655029297, "learning_rate": 4.1715299460686916e-05, "loss": 0.0034448686987161637, "step": 205420 }, { "epoch": 58.31109849560034, "grad_norm": 0.16251885890960693, "learning_rate": 4.171246097076356e-05, "loss": 0.0002921389415860176, "step": 205430 }, { "epoch": 58.3139369855237, "grad_norm": 0.0036625878419727087, "learning_rate": 4.170962248084019e-05, "loss": 0.005190795660018921, "step": 205440 }, { "epoch": 58.31677547544706, "grad_norm": 0.013199378736317158, "learning_rate": 4.1706783990916833e-05, "loss": 0.0015552449971437455, "step": 205450 }, { "epoch": 58.31961396537042, "grad_norm": 3.2538769245147705, "learning_rate": 4.1703945500993475e-05, "loss": 0.0018636001273989677, "step": 205460 }, { "epoch": 58.322452455293785, "grad_norm": 0.004262113012373447, "learning_rate": 4.170110701107011e-05, "loss": 0.00024249088019132615, "step": 205470 }, { "epoch": 58.32529094521715, "grad_norm": 0.014740459620952606, "learning_rate": 4.169826852114675e-05, "loss": 0.0009349964559078217, "step": 205480 }, { "epoch": 58.328129435140504, "grad_norm": 0.49443376064300537, "learning_rate": 4.169543003122339e-05, "loss": 0.0002319561317563057, "step": 205490 }, { "epoch": 58.33096792506387, "grad_norm": 0.04594168812036514, "learning_rate": 4.169259154130003e-05, "loss": 0.00014371126890182494, "step": 205500 }, { "epoch": 58.33096792506387, "eval_accuracy": 0.9828320722324665, "eval_loss": 0.07338529825210571, "eval_runtime": 37.9833, "eval_samples_per_second": 414.05, "eval_steps_per_second": 6.477, "step": 205500 }, { "epoch": 58.33380641498723, "grad_norm": 0.05919140204787254, "learning_rate": 4.168975305137667e-05, "loss": 0.0001548275351524353, "step": 205510 }, { "epoch": 58.336644904910585, "grad_norm": 0.08505497127771378, "learning_rate": 4.168691456145331e-05, "loss": 0.00016193781048059463, "step": 205520 }, { "epoch": 58.33948339483395, "grad_norm": 0.020746586844325066, "learning_rate": 4.168407607152995e-05, "loss": 0.0013565123081207275, "step": 205530 }, { "epoch": 58.34232188475731, "grad_norm": 0.017156919464468956, "learning_rate": 4.1681237581606586e-05, "loss": 0.0004105160012841225, "step": 205540 }, { "epoch": 58.34516037468067, "grad_norm": 0.15528810024261475, "learning_rate": 4.167839909168323e-05, "loss": 0.0003421945497393608, "step": 205550 }, { "epoch": 58.34799886460403, "grad_norm": 2.7749853134155273, "learning_rate": 4.167556060175987e-05, "loss": 0.0012890396639704705, "step": 205560 }, { "epoch": 58.35083735452739, "grad_norm": 0.007283724844455719, "learning_rate": 4.16727221118365e-05, "loss": 0.0001368766650557518, "step": 205570 }, { "epoch": 58.353675844450756, "grad_norm": 0.4202253818511963, "learning_rate": 4.1669883621913144e-05, "loss": 0.0017622077837586402, "step": 205580 }, { "epoch": 58.35651433437411, "grad_norm": 0.05958292633295059, "learning_rate": 4.1667045131989786e-05, "loss": 0.0002933496609330177, "step": 205590 }, { "epoch": 58.359352824297474, "grad_norm": 0.027468958869576454, "learning_rate": 4.166420664206642e-05, "loss": 0.00016287006437778472, "step": 205600 }, { "epoch": 58.36219131422084, "grad_norm": 0.1755456179380417, "learning_rate": 4.166136815214306e-05, "loss": 0.0018975432962179185, "step": 205610 }, { "epoch": 58.36502980414419, "grad_norm": 0.0024679331108927727, "learning_rate": 4.16585296622197e-05, "loss": 0.00013727582991123198, "step": 205620 }, { "epoch": 58.367868294067556, "grad_norm": 0.04784143716096878, "learning_rate": 4.165569117229634e-05, "loss": 0.00011996291577816009, "step": 205630 }, { "epoch": 58.37070678399092, "grad_norm": 0.06402751058340073, "learning_rate": 4.165285268237298e-05, "loss": 0.0005816943943500519, "step": 205640 }, { "epoch": 58.373545273914274, "grad_norm": 0.021792611107230186, "learning_rate": 4.165001419244962e-05, "loss": 0.00014340169727802276, "step": 205650 }, { "epoch": 58.37638376383764, "grad_norm": 0.02513652667403221, "learning_rate": 4.164717570252626e-05, "loss": 0.00010726060718297958, "step": 205660 }, { "epoch": 58.379222253761, "grad_norm": 0.03312306106090546, "learning_rate": 4.1644337212602896e-05, "loss": 0.00028573386371135714, "step": 205670 }, { "epoch": 58.38206074368436, "grad_norm": 0.7505316734313965, "learning_rate": 4.164149872267953e-05, "loss": 0.0004943571984767914, "step": 205680 }, { "epoch": 58.38489923360772, "grad_norm": 0.011143319308757782, "learning_rate": 4.163866023275618e-05, "loss": 0.0004932396113872528, "step": 205690 }, { "epoch": 58.38773772353108, "grad_norm": 0.029147887602448463, "learning_rate": 4.1635821742832814e-05, "loss": 0.00018346235156059266, "step": 205700 }, { "epoch": 58.390576213454445, "grad_norm": 0.036241937428712845, "learning_rate": 4.1632983252909455e-05, "loss": 7.183216512203216e-05, "step": 205710 }, { "epoch": 58.3934147033778, "grad_norm": 0.003327962476760149, "learning_rate": 4.1630144762986096e-05, "loss": 0.0001353394240140915, "step": 205720 }, { "epoch": 58.39625319330116, "grad_norm": 0.010289386846125126, "learning_rate": 4.162730627306273e-05, "loss": 4.100259393453598e-05, "step": 205730 }, { "epoch": 58.399091683224526, "grad_norm": 0.005497752223163843, "learning_rate": 4.162446778313937e-05, "loss": 0.00015054550021886826, "step": 205740 }, { "epoch": 58.40193017314788, "grad_norm": 0.04654363542795181, "learning_rate": 4.1621629293216014e-05, "loss": 0.00017995182424783706, "step": 205750 }, { "epoch": 58.404768663071245, "grad_norm": 0.005712355021387339, "learning_rate": 4.161879080329265e-05, "loss": 0.0002234259620308876, "step": 205760 }, { "epoch": 58.40760715299461, "grad_norm": 0.005517516750842333, "learning_rate": 4.161595231336929e-05, "loss": 0.00219841692596674, "step": 205770 }, { "epoch": 58.41044564291797, "grad_norm": 0.044593267142772675, "learning_rate": 4.1613113823445924e-05, "loss": 0.0009855015203356742, "step": 205780 }, { "epoch": 58.413284132841326, "grad_norm": 0.07433732599020004, "learning_rate": 4.161027533352257e-05, "loss": 8.283816277980805e-05, "step": 205790 }, { "epoch": 58.41612262276469, "grad_norm": 0.006761065684258938, "learning_rate": 4.160743684359921e-05, "loss": 0.00017592646181583405, "step": 205800 }, { "epoch": 58.41896111268805, "grad_norm": 0.021525848656892776, "learning_rate": 4.160459835367584e-05, "loss": 0.000332818366587162, "step": 205810 }, { "epoch": 58.42179960261141, "grad_norm": 0.06596382707357407, "learning_rate": 4.160175986375249e-05, "loss": 0.00010679271072149277, "step": 205820 }, { "epoch": 58.42463809253477, "grad_norm": 0.008186429738998413, "learning_rate": 4.1598921373829124e-05, "loss": 0.0013776861131191253, "step": 205830 }, { "epoch": 58.427476582458134, "grad_norm": 0.04325934499502182, "learning_rate": 4.1596082883905766e-05, "loss": 0.0001565564423799515, "step": 205840 }, { "epoch": 58.43031507238149, "grad_norm": 0.007758022751659155, "learning_rate": 4.159324439398241e-05, "loss": 0.009622807800769805, "step": 205850 }, { "epoch": 58.43315356230485, "grad_norm": 0.0008436045609414577, "learning_rate": 4.159040590405904e-05, "loss": 5.627274513244629e-05, "step": 205860 }, { "epoch": 58.435992052228215, "grad_norm": 0.003909075167030096, "learning_rate": 4.158756741413568e-05, "loss": 0.0007440011948347091, "step": 205870 }, { "epoch": 58.43883054215158, "grad_norm": 0.037505462765693665, "learning_rate": 4.158472892421232e-05, "loss": 0.0005415501073002815, "step": 205880 }, { "epoch": 58.441669032074934, "grad_norm": 0.10548684746026993, "learning_rate": 4.158189043428896e-05, "loss": 0.00022433362901210784, "step": 205890 }, { "epoch": 58.4445075219983, "grad_norm": 0.011826513335108757, "learning_rate": 4.15790519443656e-05, "loss": 0.0002094617113471031, "step": 205900 }, { "epoch": 58.44734601192166, "grad_norm": 0.02385350875556469, "learning_rate": 4.1576213454442235e-05, "loss": 0.0005576690658926964, "step": 205910 }, { "epoch": 58.450184501845015, "grad_norm": 0.005970241501927376, "learning_rate": 4.1573374964518876e-05, "loss": 0.001280825212597847, "step": 205920 }, { "epoch": 58.45302299176838, "grad_norm": 0.44238701462745667, "learning_rate": 4.157053647459552e-05, "loss": 0.00025926437228918076, "step": 205930 }, { "epoch": 58.45586148169174, "grad_norm": 0.0065049282275140285, "learning_rate": 4.156769798467215e-05, "loss": 0.0038384430110454558, "step": 205940 }, { "epoch": 58.458699971615104, "grad_norm": 0.05856052786111832, "learning_rate": 4.15648594947488e-05, "loss": 0.00023126956075429915, "step": 205950 }, { "epoch": 58.46153846153846, "grad_norm": 0.006132196635007858, "learning_rate": 4.1562021004825435e-05, "loss": 0.006654073297977447, "step": 205960 }, { "epoch": 58.46437695146182, "grad_norm": 0.0008627644274383783, "learning_rate": 4.155918251490207e-05, "loss": 0.006358840316534042, "step": 205970 }, { "epoch": 58.467215441385186, "grad_norm": 0.008628240786492825, "learning_rate": 4.155634402497871e-05, "loss": 0.0006374139338731766, "step": 205980 }, { "epoch": 58.47005393130854, "grad_norm": 0.053890977054834366, "learning_rate": 4.155350553505535e-05, "loss": 0.0009001288563013077, "step": 205990 }, { "epoch": 58.472892421231904, "grad_norm": 0.03054971434175968, "learning_rate": 4.1550667045131994e-05, "loss": 0.0003484591841697693, "step": 206000 }, { "epoch": 58.472892421231904, "eval_accuracy": 0.981941883385261, "eval_loss": 0.07506387680768967, "eval_runtime": 40.7188, "eval_samples_per_second": 386.234, "eval_steps_per_second": 6.041, "step": 206000 }, { "epoch": 58.47573091115527, "grad_norm": 0.012647801078855991, "learning_rate": 4.154782855520863e-05, "loss": 0.00047483965754508973, "step": 206010 }, { "epoch": 58.47856940107862, "grad_norm": 0.13962577283382416, "learning_rate": 4.154499006528527e-05, "loss": 0.000630873441696167, "step": 206020 }, { "epoch": 58.481407891001986, "grad_norm": 0.06640667468309402, "learning_rate": 4.154215157536191e-05, "loss": 0.0001446818932890892, "step": 206030 }, { "epoch": 58.48424638092535, "grad_norm": 0.1520245373249054, "learning_rate": 4.1539313085438546e-05, "loss": 0.00024001076817512513, "step": 206040 }, { "epoch": 58.48708487084871, "grad_norm": 0.0598839595913887, "learning_rate": 4.153647459551519e-05, "loss": 0.00016199573874473572, "step": 206050 }, { "epoch": 58.48992336077207, "grad_norm": 0.043329961597919464, "learning_rate": 4.153363610559183e-05, "loss": 0.00013883523643016816, "step": 206060 }, { "epoch": 58.49276185069543, "grad_norm": 0.04808885231614113, "learning_rate": 4.153079761566846e-05, "loss": 0.0001732330769300461, "step": 206070 }, { "epoch": 58.49560034061879, "grad_norm": 0.11921722441911697, "learning_rate": 4.1527959125745105e-05, "loss": 0.00012607965618371964, "step": 206080 }, { "epoch": 58.49843883054215, "grad_norm": 0.07622722536325455, "learning_rate": 4.1525120635821746e-05, "loss": 7.968265563249589e-05, "step": 206090 }, { "epoch": 58.50127732046551, "grad_norm": 0.008238743990659714, "learning_rate": 4.152228214589838e-05, "loss": 0.0011409003287553786, "step": 206100 }, { "epoch": 58.504115810388875, "grad_norm": 0.16532908380031586, "learning_rate": 4.151944365597502e-05, "loss": 0.0023183852434158327, "step": 206110 }, { "epoch": 58.50695430031223, "grad_norm": 0.4173431396484375, "learning_rate": 4.151660516605166e-05, "loss": 0.0006516281515359879, "step": 206120 }, { "epoch": 58.50979279023559, "grad_norm": 0.05585500970482826, "learning_rate": 4.1513766676128305e-05, "loss": 0.00011192765086889266, "step": 206130 }, { "epoch": 58.512631280158956, "grad_norm": 0.06118696182966232, "learning_rate": 4.151092818620494e-05, "loss": 0.00030275266617536546, "step": 206140 }, { "epoch": 58.51546977008232, "grad_norm": 0.004585965070873499, "learning_rate": 4.150808969628158e-05, "loss": 3.874916583299637e-05, "step": 206150 }, { "epoch": 58.518308260005675, "grad_norm": 0.022876981645822525, "learning_rate": 4.150525120635822e-05, "loss": 0.012019737809896468, "step": 206160 }, { "epoch": 58.52114674992904, "grad_norm": 0.011714717373251915, "learning_rate": 4.150241271643486e-05, "loss": 7.792580872774124e-05, "step": 206170 }, { "epoch": 58.5239852398524, "grad_norm": 0.09019877761602402, "learning_rate": 4.14995742265115e-05, "loss": 0.0001373184844851494, "step": 206180 }, { "epoch": 58.52682372977576, "grad_norm": 0.8618117570877075, "learning_rate": 4.149673573658814e-05, "loss": 0.0003401866182684898, "step": 206190 }, { "epoch": 58.52966221969912, "grad_norm": 0.031998831778764725, "learning_rate": 4.1493897246664774e-05, "loss": 0.0006629014387726784, "step": 206200 }, { "epoch": 58.53250070962248, "grad_norm": 1.8911199569702148, "learning_rate": 4.1491058756741415e-05, "loss": 0.0009357692673802376, "step": 206210 }, { "epoch": 58.535339199545845, "grad_norm": 0.06340353935956955, "learning_rate": 4.148822026681806e-05, "loss": 0.0002682209014892578, "step": 206220 }, { "epoch": 58.5381776894692, "grad_norm": 0.0025399047881364822, "learning_rate": 4.148538177689469e-05, "loss": 0.00014417357742786409, "step": 206230 }, { "epoch": 58.541016179392564, "grad_norm": 0.04785706847906113, "learning_rate": 4.148254328697133e-05, "loss": 0.00043162815272808076, "step": 206240 }, { "epoch": 58.54385466931593, "grad_norm": 0.04280814900994301, "learning_rate": 4.1479704797047974e-05, "loss": 0.00010405741631984711, "step": 206250 }, { "epoch": 58.54669315923928, "grad_norm": 0.007211896125227213, "learning_rate": 4.1476866307124615e-05, "loss": 0.0001559469848871231, "step": 206260 }, { "epoch": 58.549531649162645, "grad_norm": 0.41072937846183777, "learning_rate": 4.147402781720125e-05, "loss": 0.0007785046473145485, "step": 206270 }, { "epoch": 58.55237013908601, "grad_norm": 0.03221600875258446, "learning_rate": 4.147118932727789e-05, "loss": 0.0005664357915520668, "step": 206280 }, { "epoch": 58.555208629009364, "grad_norm": 0.014113855548202991, "learning_rate": 4.146835083735453e-05, "loss": 0.001332275941967964, "step": 206290 }, { "epoch": 58.55804711893273, "grad_norm": 0.04967552050948143, "learning_rate": 4.146551234743117e-05, "loss": 0.00012233182787895202, "step": 206300 }, { "epoch": 58.56088560885609, "grad_norm": 0.06412232667207718, "learning_rate": 4.146267385750781e-05, "loss": 0.0007674882188439369, "step": 206310 }, { "epoch": 58.56372409877945, "grad_norm": 0.03517277538776398, "learning_rate": 4.145983536758445e-05, "loss": 0.00015060361474752426, "step": 206320 }, { "epoch": 58.56656258870281, "grad_norm": 0.1897529512643814, "learning_rate": 4.1456996877661085e-05, "loss": 0.00016882847994565965, "step": 206330 }, { "epoch": 58.56940107862617, "grad_norm": 0.02464669942855835, "learning_rate": 4.1454158387737726e-05, "loss": 0.0003928510472178459, "step": 206340 }, { "epoch": 58.572239568549534, "grad_norm": 4.697910785675049, "learning_rate": 4.145131989781437e-05, "loss": 0.0013181466609239578, "step": 206350 }, { "epoch": 58.57507805847289, "grad_norm": 13.180850982666016, "learning_rate": 4.1448481407891e-05, "loss": 0.005881599336862564, "step": 206360 }, { "epoch": 58.57791654839625, "grad_norm": 0.08839767426252365, "learning_rate": 4.1445642917967643e-05, "loss": 0.0006431523710489273, "step": 206370 }, { "epoch": 58.580755038319616, "grad_norm": 0.028471684083342552, "learning_rate": 4.1442804428044285e-05, "loss": 0.0032212164252996443, "step": 206380 }, { "epoch": 58.58359352824297, "grad_norm": 0.33601465821266174, "learning_rate": 4.143996593812092e-05, "loss": 0.0011606188490986824, "step": 206390 }, { "epoch": 58.586432018166335, "grad_norm": 0.8924002051353455, "learning_rate": 4.143712744819756e-05, "loss": 0.01631752699613571, "step": 206400 }, { "epoch": 58.5892705080897, "grad_norm": 0.18819598853588104, "learning_rate": 4.1434288958274195e-05, "loss": 0.00024470705538988114, "step": 206410 }, { "epoch": 58.59210899801306, "grad_norm": 0.020847603678703308, "learning_rate": 4.1431450468350844e-05, "loss": 0.0004959924146533013, "step": 206420 }, { "epoch": 58.594947487936416, "grad_norm": 0.016195915639400482, "learning_rate": 4.142861197842748e-05, "loss": 0.0023323871195316316, "step": 206430 }, { "epoch": 58.59778597785978, "grad_norm": 0.0031910387333482504, "learning_rate": 4.142577348850411e-05, "loss": 0.0021481698378920553, "step": 206440 }, { "epoch": 58.60062446778314, "grad_norm": 0.15850377082824707, "learning_rate": 4.142293499858076e-05, "loss": 0.0005995191633701325, "step": 206450 }, { "epoch": 58.6034629577065, "grad_norm": 1.5459582805633545, "learning_rate": 4.1420096508657395e-05, "loss": 0.0009465213865041733, "step": 206460 }, { "epoch": 58.60630144762986, "grad_norm": 0.0877140462398529, "learning_rate": 4.141725801873404e-05, "loss": 0.005752123519778251, "step": 206470 }, { "epoch": 58.60913993755322, "grad_norm": 0.018954943865537643, "learning_rate": 4.141441952881068e-05, "loss": 0.0015621120110154152, "step": 206480 }, { "epoch": 58.61197842747658, "grad_norm": 0.010974126867949963, "learning_rate": 4.141158103888731e-05, "loss": 0.0002536749467253685, "step": 206490 }, { "epoch": 58.61481691739994, "grad_norm": 0.07330929487943649, "learning_rate": 4.1408742548963954e-05, "loss": 0.0002752089872956276, "step": 206500 }, { "epoch": 58.61481691739994, "eval_accuracy": 0.982069053220576, "eval_loss": 0.07377365231513977, "eval_runtime": 52.724, "eval_samples_per_second": 298.289, "eval_steps_per_second": 4.666, "step": 206500 }, { "epoch": 58.617655407323305, "grad_norm": 0.024090440943837166, "learning_rate": 4.140590405904059e-05, "loss": 0.0011633003130555152, "step": 206510 }, { "epoch": 58.62049389724667, "grad_norm": 0.08180319517850876, "learning_rate": 4.140306556911723e-05, "loss": 0.002426164597272873, "step": 206520 }, { "epoch": 58.623332387170024, "grad_norm": 0.25393521785736084, "learning_rate": 4.140022707919387e-05, "loss": 0.003691478818655014, "step": 206530 }, { "epoch": 58.62617087709339, "grad_norm": 0.028325993567705154, "learning_rate": 4.1397388589270506e-05, "loss": 0.007782790064811707, "step": 206540 }, { "epoch": 58.62900936701675, "grad_norm": 0.0044400980696082115, "learning_rate": 4.1394550099347154e-05, "loss": 0.00041911378502845764, "step": 206550 }, { "epoch": 58.631847856940105, "grad_norm": 0.1866627186536789, "learning_rate": 4.139171160942379e-05, "loss": 0.0004887344315648079, "step": 206560 }, { "epoch": 58.63468634686347, "grad_norm": 1.3670369386672974, "learning_rate": 4.1388873119500424e-05, "loss": 0.002914123050868511, "step": 206570 }, { "epoch": 58.63752483678683, "grad_norm": 0.8975334763526917, "learning_rate": 4.138603462957707e-05, "loss": 0.002093156985938549, "step": 206580 }, { "epoch": 58.64036332671019, "grad_norm": 0.048118721693754196, "learning_rate": 4.1383196139653706e-05, "loss": 0.0032589659094810486, "step": 206590 }, { "epoch": 58.64320181663355, "grad_norm": 0.007556712254881859, "learning_rate": 4.138035764973035e-05, "loss": 0.0022525111213326453, "step": 206600 }, { "epoch": 58.64604030655691, "grad_norm": 0.03205880895256996, "learning_rate": 4.137751915980698e-05, "loss": 0.0014811836183071137, "step": 206610 }, { "epoch": 58.648878796480275, "grad_norm": 0.004895390011370182, "learning_rate": 4.1374680669883624e-05, "loss": 0.0007096046581864357, "step": 206620 }, { "epoch": 58.65171728640363, "grad_norm": 0.5038222074508667, "learning_rate": 4.1371842179960265e-05, "loss": 0.003775915503501892, "step": 206630 }, { "epoch": 58.654555776326994, "grad_norm": 0.0027992359828203917, "learning_rate": 4.13690036900369e-05, "loss": 0.005871494114398956, "step": 206640 }, { "epoch": 58.65739426625036, "grad_norm": 0.1297023892402649, "learning_rate": 4.136616520011354e-05, "loss": 0.0008082563057541847, "step": 206650 }, { "epoch": 58.66023275617371, "grad_norm": 0.023692414164543152, "learning_rate": 4.136332671019018e-05, "loss": 0.0012805145233869553, "step": 206660 }, { "epoch": 58.663071246097076, "grad_norm": 0.04678185284137726, "learning_rate": 4.136048822026682e-05, "loss": 0.0008598867803812027, "step": 206670 }, { "epoch": 58.66590973602044, "grad_norm": 0.08608536422252655, "learning_rate": 4.1357649730343465e-05, "loss": 0.00024086758494377137, "step": 206680 }, { "epoch": 58.6687482259438, "grad_norm": 0.043327104300260544, "learning_rate": 4.13548112404201e-05, "loss": 0.00028334371745586395, "step": 206690 }, { "epoch": 58.67158671586716, "grad_norm": 0.6518345475196838, "learning_rate": 4.1351972750496734e-05, "loss": 0.0015212493017315865, "step": 206700 }, { "epoch": 58.67442520579052, "grad_norm": 0.10125099867582321, "learning_rate": 4.1349134260573376e-05, "loss": 0.0003245124593377113, "step": 206710 }, { "epoch": 58.67726369571388, "grad_norm": 0.05355486646294594, "learning_rate": 4.134629577065002e-05, "loss": 0.0008546985685825348, "step": 206720 }, { "epoch": 58.68010218563724, "grad_norm": 0.009921795688569546, "learning_rate": 4.134345728072666e-05, "loss": 0.00016415789723396302, "step": 206730 }, { "epoch": 58.6829406755606, "grad_norm": 0.08284489065408707, "learning_rate": 4.134061879080329e-05, "loss": 0.000462493859231472, "step": 206740 }, { "epoch": 58.685779165483964, "grad_norm": 1.9619470834732056, "learning_rate": 4.1337780300879934e-05, "loss": 0.00043160580098629, "step": 206750 }, { "epoch": 58.68861765540732, "grad_norm": 0.031323954463005066, "learning_rate": 4.1334941810956576e-05, "loss": 0.000316205807030201, "step": 206760 }, { "epoch": 58.69145614533068, "grad_norm": 5.80590295791626, "learning_rate": 4.133210332103321e-05, "loss": 0.004509701952338218, "step": 206770 }, { "epoch": 58.694294635254046, "grad_norm": 0.020485520362854004, "learning_rate": 4.132926483110985e-05, "loss": 0.00043350253254175185, "step": 206780 }, { "epoch": 58.69713312517741, "grad_norm": 0.5521941184997559, "learning_rate": 4.132642634118649e-05, "loss": 0.0003230936825275421, "step": 206790 }, { "epoch": 58.699971615100765, "grad_norm": 0.033075690269470215, "learning_rate": 4.132358785126313e-05, "loss": 0.0005178937688469886, "step": 206800 }, { "epoch": 58.70281010502413, "grad_norm": 0.15201885998249054, "learning_rate": 4.132074936133977e-05, "loss": 0.00017072837799787522, "step": 206810 }, { "epoch": 58.70564859494749, "grad_norm": 2.0626463890075684, "learning_rate": 4.131791087141641e-05, "loss": 0.0003743628039956093, "step": 206820 }, { "epoch": 58.708487084870846, "grad_norm": 4.198628902435303, "learning_rate": 4.1315072381493045e-05, "loss": 0.000531979650259018, "step": 206830 }, { "epoch": 58.71132557479421, "grad_norm": 0.003813875140622258, "learning_rate": 4.1312233891569686e-05, "loss": 0.001281389594078064, "step": 206840 }, { "epoch": 58.71416406471757, "grad_norm": 0.5690349340438843, "learning_rate": 4.130939540164633e-05, "loss": 0.0002711476758122444, "step": 206850 }, { "epoch": 58.71700255464093, "grad_norm": 0.021884460002183914, "learning_rate": 4.130655691172296e-05, "loss": 0.003585262596607208, "step": 206860 }, { "epoch": 58.71984104456429, "grad_norm": 0.11941570043563843, "learning_rate": 4.1303718421799604e-05, "loss": 0.0013854298740625381, "step": 206870 }, { "epoch": 58.72267953448765, "grad_norm": 0.021600404754281044, "learning_rate": 4.1300879931876245e-05, "loss": 0.002559078112244606, "step": 206880 }, { "epoch": 58.725518024411016, "grad_norm": 0.1441032886505127, "learning_rate": 4.1298041441952887e-05, "loss": 0.00023447908461093903, "step": 206890 }, { "epoch": 58.72835651433437, "grad_norm": 0.014068868942558765, "learning_rate": 4.129520295202952e-05, "loss": 0.0017884679138660431, "step": 206900 }, { "epoch": 58.731195004257735, "grad_norm": 0.06894334405660629, "learning_rate": 4.1292364462106156e-05, "loss": 0.0034451112151145935, "step": 206910 }, { "epoch": 58.7340334941811, "grad_norm": 5.602851390838623, "learning_rate": 4.1289525972182804e-05, "loss": 0.011946853995323182, "step": 206920 }, { "epoch": 58.736871984104454, "grad_norm": 0.14715395867824554, "learning_rate": 4.128668748225944e-05, "loss": 0.007113057374954224, "step": 206930 }, { "epoch": 58.73971047402782, "grad_norm": 2.63448429107666, "learning_rate": 4.128384899233608e-05, "loss": 0.003221502900123596, "step": 206940 }, { "epoch": 58.74254896395118, "grad_norm": 0.07270588725805283, "learning_rate": 4.128101050241272e-05, "loss": 0.00596543550491333, "step": 206950 }, { "epoch": 58.745387453874535, "grad_norm": 0.0013790101511403918, "learning_rate": 4.1278172012489356e-05, "loss": 0.0014792768284678458, "step": 206960 }, { "epoch": 58.7482259437979, "grad_norm": 0.010601082816720009, "learning_rate": 4.1275333522566e-05, "loss": 0.00046185553073883054, "step": 206970 }, { "epoch": 58.75106443372126, "grad_norm": 0.00454025249928236, "learning_rate": 4.127249503264264e-05, "loss": 0.02258116900920868, "step": 206980 }, { "epoch": 58.753902923644624, "grad_norm": 0.020931657403707504, "learning_rate": 4.126965654271927e-05, "loss": 0.008192752301692963, "step": 206990 }, { "epoch": 58.75674141356798, "grad_norm": 0.06214379146695137, "learning_rate": 4.1266818052795915e-05, "loss": 0.00044973473995923996, "step": 207000 }, { "epoch": 58.75674141356798, "eval_accuracy": 0.9800343358555351, "eval_loss": 0.08271778374910355, "eval_runtime": 43.9858, "eval_samples_per_second": 357.547, "eval_steps_per_second": 5.593, "step": 207000 }, { "epoch": 58.75957990349134, "grad_norm": 0.011215495876967907, "learning_rate": 4.126397956287255e-05, "loss": 0.001125599630177021, "step": 207010 }, { "epoch": 58.762418393414706, "grad_norm": 0.08337889611721039, "learning_rate": 4.12611410729492e-05, "loss": 0.0005610628053545952, "step": 207020 }, { "epoch": 58.76525688333806, "grad_norm": 0.07996755093336105, "learning_rate": 4.125830258302583e-05, "loss": 0.0004415825009346008, "step": 207030 }, { "epoch": 58.768095373261424, "grad_norm": 0.14194105565547943, "learning_rate": 4.1255464093102466e-05, "loss": 0.00206023920327425, "step": 207040 }, { "epoch": 58.77093386318479, "grad_norm": 0.007965769618749619, "learning_rate": 4.1252625603179115e-05, "loss": 0.0007494391873478889, "step": 207050 }, { "epoch": 58.77377235310815, "grad_norm": 0.08076447248458862, "learning_rate": 4.124978711325575e-05, "loss": 0.001026405580341816, "step": 207060 }, { "epoch": 58.776610843031506, "grad_norm": 0.013795719482004642, "learning_rate": 4.124694862333239e-05, "loss": 0.0009989034384489059, "step": 207070 }, { "epoch": 58.77944933295487, "grad_norm": 0.0820017009973526, "learning_rate": 4.124411013340903e-05, "loss": 0.00019229073077440262, "step": 207080 }, { "epoch": 58.78228782287823, "grad_norm": 0.005801402032375336, "learning_rate": 4.1241271643485667e-05, "loss": 0.00026163067668676375, "step": 207090 }, { "epoch": 58.78512631280159, "grad_norm": 0.006656254641711712, "learning_rate": 4.123843315356231e-05, "loss": 0.000263395719230175, "step": 207100 }, { "epoch": 58.78796480272495, "grad_norm": 0.028442703187465668, "learning_rate": 4.123559466363894e-05, "loss": 0.006219441443681717, "step": 207110 }, { "epoch": 58.79080329264831, "grad_norm": 0.42356812953948975, "learning_rate": 4.1232756173715584e-05, "loss": 0.002117685228586197, "step": 207120 }, { "epoch": 58.79364178257167, "grad_norm": 0.03114050067961216, "learning_rate": 4.1229917683792225e-05, "loss": 0.001658092439174652, "step": 207130 }, { "epoch": 58.79648027249503, "grad_norm": 0.18057580292224884, "learning_rate": 4.122707919386886e-05, "loss": 0.0002351248636841774, "step": 207140 }, { "epoch": 58.799318762418395, "grad_norm": 0.017805159091949463, "learning_rate": 4.122424070394551e-05, "loss": 0.0011510802432894707, "step": 207150 }, { "epoch": 58.80215725234176, "grad_norm": 0.008851089514791965, "learning_rate": 4.122140221402214e-05, "loss": 0.0076877005398273465, "step": 207160 }, { "epoch": 58.80499574226511, "grad_norm": 0.006798402406275272, "learning_rate": 4.121856372409878e-05, "loss": 0.00012288745492696762, "step": 207170 }, { "epoch": 58.807834232188476, "grad_norm": 0.00334836239926517, "learning_rate": 4.1215725234175425e-05, "loss": 0.0006169736385345459, "step": 207180 }, { "epoch": 58.81067272211184, "grad_norm": 2.538172960281372, "learning_rate": 4.121288674425206e-05, "loss": 0.0003510594367980957, "step": 207190 }, { "epoch": 58.813511212035195, "grad_norm": 2.227835178375244, "learning_rate": 4.12100482543287e-05, "loss": 0.0007921164855360985, "step": 207200 }, { "epoch": 58.81634970195856, "grad_norm": 0.10649516433477402, "learning_rate": 4.1207209764405336e-05, "loss": 0.00032348614186048506, "step": 207210 }, { "epoch": 58.81918819188192, "grad_norm": 0.19342844188213348, "learning_rate": 4.120437127448198e-05, "loss": 0.0003549730405211449, "step": 207220 }, { "epoch": 58.822026681805276, "grad_norm": 0.017077716067433357, "learning_rate": 4.120153278455862e-05, "loss": 0.0004759186878800392, "step": 207230 }, { "epoch": 58.82486517172864, "grad_norm": 0.0092593589797616, "learning_rate": 4.119869429463525e-05, "loss": 0.00045069810003042223, "step": 207240 }, { "epoch": 58.827703661652, "grad_norm": 0.0019929378759115934, "learning_rate": 4.1195855804711895e-05, "loss": 0.003890169784426689, "step": 207250 }, { "epoch": 58.830542151575365, "grad_norm": 1.412842869758606, "learning_rate": 4.1193017314788536e-05, "loss": 0.0005001129582524299, "step": 207260 }, { "epoch": 58.83338064149872, "grad_norm": 0.03516707941889763, "learning_rate": 4.119017882486517e-05, "loss": 0.00015340577811002732, "step": 207270 }, { "epoch": 58.836219131422084, "grad_norm": 0.04004595801234245, "learning_rate": 4.118734033494181e-05, "loss": 4.783011972904205e-05, "step": 207280 }, { "epoch": 58.83905762134545, "grad_norm": 0.019372455775737762, "learning_rate": 4.1184501845018453e-05, "loss": 0.00023420676589012147, "step": 207290 }, { "epoch": 58.8418961112688, "grad_norm": 0.007115007843822241, "learning_rate": 4.118166335509509e-05, "loss": 0.00032247137278318405, "step": 207300 }, { "epoch": 58.844734601192165, "grad_norm": 0.0035189581103622913, "learning_rate": 4.117882486517173e-05, "loss": 4.299208521842957e-05, "step": 207310 }, { "epoch": 58.84757309111553, "grad_norm": 0.013859681785106659, "learning_rate": 4.117598637524837e-05, "loss": 0.0001862362027168274, "step": 207320 }, { "epoch": 58.850411581038884, "grad_norm": 0.606306791305542, "learning_rate": 4.1173147885325005e-05, "loss": 0.00023835692554712294, "step": 207330 }, { "epoch": 58.85325007096225, "grad_norm": 0.8677060008049011, "learning_rate": 4.117030939540165e-05, "loss": 0.0002740915864706039, "step": 207340 }, { "epoch": 58.85608856088561, "grad_norm": 0.00976644828915596, "learning_rate": 4.116747090547829e-05, "loss": 0.00011562667787075042, "step": 207350 }, { "epoch": 58.85892705080897, "grad_norm": 0.0007254530792124569, "learning_rate": 4.116463241555493e-05, "loss": 0.0001583786681294441, "step": 207360 }, { "epoch": 58.86176554073233, "grad_norm": 0.014670968055725098, "learning_rate": 4.1161793925631564e-05, "loss": 6.184410303831101e-05, "step": 207370 }, { "epoch": 58.86460403065569, "grad_norm": 0.04349493235349655, "learning_rate": 4.1158955435708205e-05, "loss": 5.266182124614715e-05, "step": 207380 }, { "epoch": 58.867442520579054, "grad_norm": 0.005031864158809185, "learning_rate": 4.115611694578485e-05, "loss": 4.4737569987773894e-05, "step": 207390 }, { "epoch": 58.87028101050241, "grad_norm": 0.3102908730506897, "learning_rate": 4.115327845586148e-05, "loss": 0.003684881329536438, "step": 207400 }, { "epoch": 58.87311950042577, "grad_norm": 0.00519011914730072, "learning_rate": 4.115043996593812e-05, "loss": 0.000642918050289154, "step": 207410 }, { "epoch": 58.875957990349136, "grad_norm": 0.00187978136818856, "learning_rate": 4.1147601476014764e-05, "loss": 9.214002639055253e-05, "step": 207420 }, { "epoch": 58.8787964802725, "grad_norm": 1.4677621126174927, "learning_rate": 4.11447629860914e-05, "loss": 0.00030038487166166304, "step": 207430 }, { "epoch": 58.881634970195854, "grad_norm": 0.012847566045820713, "learning_rate": 4.114192449616804e-05, "loss": 0.00014824923127889633, "step": 207440 }, { "epoch": 58.88447346011922, "grad_norm": 0.014515170827507973, "learning_rate": 4.113908600624468e-05, "loss": 0.00011200513690710068, "step": 207450 }, { "epoch": 58.88731195004258, "grad_norm": 0.05517411604523659, "learning_rate": 4.1136247516321316e-05, "loss": 8.637607097625733e-05, "step": 207460 }, { "epoch": 58.890150439965936, "grad_norm": 0.26656943559646606, "learning_rate": 4.113340902639796e-05, "loss": 0.00020764917135238648, "step": 207470 }, { "epoch": 58.8929889298893, "grad_norm": 0.026403483003377914, "learning_rate": 4.11305705364746e-05, "loss": 0.00015253443270921708, "step": 207480 }, { "epoch": 58.89582741981266, "grad_norm": 0.11446212232112885, "learning_rate": 4.112773204655124e-05, "loss": 0.0026875279843807222, "step": 207490 }, { "epoch": 58.89866590973602, "grad_norm": 0.025181716307997704, "learning_rate": 4.1124893556627875e-05, "loss": 0.001032467931509018, "step": 207500 }, { "epoch": 58.89866590973602, "eval_accuracy": 0.9844852800915623, "eval_loss": 0.06441488862037659, "eval_runtime": 59.0705, "eval_samples_per_second": 266.241, "eval_steps_per_second": 4.165, "step": 207500 }, { "epoch": 58.90150439965938, "grad_norm": 0.4447009861469269, "learning_rate": 4.1122055066704516e-05, "loss": 0.0008323101326823234, "step": 207510 }, { "epoch": 58.90434288958274, "grad_norm": 0.09048935025930405, "learning_rate": 4.111921657678116e-05, "loss": 0.00018766894936561584, "step": 207520 }, { "epoch": 58.907181379506106, "grad_norm": 0.03563573211431503, "learning_rate": 4.111637808685779e-05, "loss": 0.0032196260988712313, "step": 207530 }, { "epoch": 58.91001986942946, "grad_norm": 0.005942624993622303, "learning_rate": 4.1113539596934434e-05, "loss": 4.162117838859558e-05, "step": 207540 }, { "epoch": 58.912858359352825, "grad_norm": 0.020423535257577896, "learning_rate": 4.1110701107011075e-05, "loss": 0.0016193719580769538, "step": 207550 }, { "epoch": 58.91569684927619, "grad_norm": 0.02044607698917389, "learning_rate": 4.110786261708771e-05, "loss": 0.00015349499881267548, "step": 207560 }, { "epoch": 58.91853533919954, "grad_norm": 0.03962383419275284, "learning_rate": 4.110502412716435e-05, "loss": 0.00046864952892065046, "step": 207570 }, { "epoch": 58.921373829122906, "grad_norm": 0.002598236780613661, "learning_rate": 4.110218563724099e-05, "loss": 0.00018177088350057602, "step": 207580 }, { "epoch": 58.92421231904627, "grad_norm": 0.678168535232544, "learning_rate": 4.109934714731763e-05, "loss": 0.0018678674474358558, "step": 207590 }, { "epoch": 58.927050808969625, "grad_norm": 0.0030501338187605143, "learning_rate": 4.109650865739427e-05, "loss": 0.00030171405524015427, "step": 207600 }, { "epoch": 58.92988929889299, "grad_norm": 0.023160558193922043, "learning_rate": 4.109367016747091e-05, "loss": 0.00016614124178886412, "step": 207610 }, { "epoch": 58.93272778881635, "grad_norm": 0.004084189422428608, "learning_rate": 4.109083167754755e-05, "loss": 0.0010272720828652382, "step": 207620 }, { "epoch": 58.935566278739714, "grad_norm": 0.012795711867511272, "learning_rate": 4.1087993187624186e-05, "loss": 0.00015019606798887252, "step": 207630 }, { "epoch": 58.93840476866307, "grad_norm": 0.06707807630300522, "learning_rate": 4.108515469770082e-05, "loss": 0.00019864104688167573, "step": 207640 }, { "epoch": 58.94124325858643, "grad_norm": 0.24083751440048218, "learning_rate": 4.108231620777747e-05, "loss": 0.0008563788607716561, "step": 207650 }, { "epoch": 58.944081748509795, "grad_norm": 0.024753738194704056, "learning_rate": 4.10794777178541e-05, "loss": 0.00030694641172885896, "step": 207660 }, { "epoch": 58.94692023843315, "grad_norm": 0.44913995265960693, "learning_rate": 4.1076639227930744e-05, "loss": 0.0019171556457877159, "step": 207670 }, { "epoch": 58.949758728356514, "grad_norm": 0.020342528820037842, "learning_rate": 4.1073800738007386e-05, "loss": 0.0009683417156338692, "step": 207680 }, { "epoch": 58.95259721827988, "grad_norm": 0.0607452318072319, "learning_rate": 4.107096224808402e-05, "loss": 0.0001538306474685669, "step": 207690 }, { "epoch": 58.95543570820323, "grad_norm": 0.0048987530171871185, "learning_rate": 4.106812375816066e-05, "loss": 0.0029213326051831245, "step": 207700 }, { "epoch": 58.958274198126595, "grad_norm": 2.0909080505371094, "learning_rate": 4.10652852682373e-05, "loss": 0.00045837592333555224, "step": 207710 }, { "epoch": 58.96111268804996, "grad_norm": 0.6607640385627747, "learning_rate": 4.106244677831394e-05, "loss": 0.0009275771677494049, "step": 207720 }, { "epoch": 58.96395117797332, "grad_norm": 0.025543328374624252, "learning_rate": 4.105960828839058e-05, "loss": 0.00022105854004621505, "step": 207730 }, { "epoch": 58.96678966789668, "grad_norm": 0.007326331455260515, "learning_rate": 4.1056769798467214e-05, "loss": 0.0007075611501932144, "step": 207740 }, { "epoch": 58.96962815782004, "grad_norm": 0.004673543851822615, "learning_rate": 4.1053931308543855e-05, "loss": 0.000606379471719265, "step": 207750 }, { "epoch": 58.9724666477434, "grad_norm": 0.02461967058479786, "learning_rate": 4.1051092818620496e-05, "loss": 0.0019239088520407676, "step": 207760 }, { "epoch": 58.97530513766676, "grad_norm": 0.04310738667845726, "learning_rate": 4.104825432869713e-05, "loss": 0.00038965474814176557, "step": 207770 }, { "epoch": 58.97814362759012, "grad_norm": 13.762035369873047, "learning_rate": 4.104541583877378e-05, "loss": 0.002920476160943508, "step": 207780 }, { "epoch": 58.980982117513484, "grad_norm": 0.045173339545726776, "learning_rate": 4.1042577348850414e-05, "loss": 0.00024058986455202102, "step": 207790 }, { "epoch": 58.98382060743684, "grad_norm": 0.003350113518536091, "learning_rate": 4.103973885892705e-05, "loss": 0.0006956655532121658, "step": 207800 }, { "epoch": 58.9866590973602, "grad_norm": 0.03454151749610901, "learning_rate": 4.1036900369003696e-05, "loss": 0.00010771621018648147, "step": 207810 }, { "epoch": 58.989497587283566, "grad_norm": 0.0011264195200055838, "learning_rate": 4.103406187908033e-05, "loss": 0.004323891550302506, "step": 207820 }, { "epoch": 58.99233607720693, "grad_norm": 0.026170099154114723, "learning_rate": 4.103122338915697e-05, "loss": 0.0001048501580953598, "step": 207830 }, { "epoch": 58.995174567130285, "grad_norm": 0.5732447504997253, "learning_rate": 4.102838489923361e-05, "loss": 0.00050810556858778, "step": 207840 }, { "epoch": 58.99801305705365, "grad_norm": 0.06431736052036285, "learning_rate": 4.102554640931025e-05, "loss": 0.00034429971128702164, "step": 207850 }, { "epoch": 59.00085154697701, "grad_norm": 0.5130406618118286, "learning_rate": 4.102270791938689e-05, "loss": 0.000189088168554008, "step": 207860 }, { "epoch": 59.003690036900366, "grad_norm": 0.015134327113628387, "learning_rate": 4.1019869429463524e-05, "loss": 6.875377148389817e-05, "step": 207870 }, { "epoch": 59.00652852682373, "grad_norm": 0.017836684361100197, "learning_rate": 4.1017030939540166e-05, "loss": 0.00010348483920097351, "step": 207880 }, { "epoch": 59.00936701674709, "grad_norm": 0.631777822971344, "learning_rate": 4.101419244961681e-05, "loss": 0.0001931793987751007, "step": 207890 }, { "epoch": 59.012205506670455, "grad_norm": 0.048391908407211304, "learning_rate": 4.101135395969344e-05, "loss": 0.0002501444891095161, "step": 207900 }, { "epoch": 59.01504399659381, "grad_norm": 0.1129768043756485, "learning_rate": 4.100851546977009e-05, "loss": 0.00035223737359046936, "step": 207910 }, { "epoch": 59.01788248651717, "grad_norm": 0.02360677719116211, "learning_rate": 4.1005676979846725e-05, "loss": 0.00026530921459197996, "step": 207920 }, { "epoch": 59.020720976440536, "grad_norm": 0.36158010363578796, "learning_rate": 4.100283848992336e-05, "loss": 0.000365702249109745, "step": 207930 }, { "epoch": 59.02355946636389, "grad_norm": 0.0030775372870266438, "learning_rate": 4.1e-05, "loss": 0.0008199496194720268, "step": 207940 }, { "epoch": 59.026397956287255, "grad_norm": 0.21586450934410095, "learning_rate": 4.099716151007664e-05, "loss": 0.0003287043422460556, "step": 207950 }, { "epoch": 59.02923644621062, "grad_norm": 0.014518966898322105, "learning_rate": 4.099432302015328e-05, "loss": 0.00013194046914577484, "step": 207960 }, { "epoch": 59.032074936133974, "grad_norm": 0.029857616871595383, "learning_rate": 4.099148453022992e-05, "loss": 0.0024063028395175934, "step": 207970 }, { "epoch": 59.03491342605734, "grad_norm": 0.0065198722295463085, "learning_rate": 4.098864604030656e-05, "loss": 0.00021703410893678666, "step": 207980 }, { "epoch": 59.0377519159807, "grad_norm": 9.542771339416504, "learning_rate": 4.09858075503832e-05, "loss": 0.001573118194937706, "step": 207990 }, { "epoch": 59.04059040590406, "grad_norm": 0.8779779672622681, "learning_rate": 4.0982969060459835e-05, "loss": 0.00021855756640434265, "step": 208000 }, { "epoch": 59.04059040590406, "eval_accuracy": 0.9826413174794939, "eval_loss": 0.07320564240217209, "eval_runtime": 43.6084, "eval_samples_per_second": 360.642, "eval_steps_per_second": 5.641, "step": 208000 }, { "epoch": 59.04342889582742, "grad_norm": 0.013837386853992939, "learning_rate": 4.0980130570536477e-05, "loss": 0.0036223024129867555, "step": 208010 }, { "epoch": 59.04626738575078, "grad_norm": 0.026340560987591743, "learning_rate": 4.097729208061312e-05, "loss": 0.00039888676255941393, "step": 208020 }, { "epoch": 59.049105875674144, "grad_norm": 0.024918390437960625, "learning_rate": 4.097445359068975e-05, "loss": 0.00015136376023292542, "step": 208030 }, { "epoch": 59.0519443655975, "grad_norm": 11.004297256469727, "learning_rate": 4.0971615100766394e-05, "loss": 0.0020657243207097054, "step": 208040 }, { "epoch": 59.05478285552086, "grad_norm": 0.24577820301055908, "learning_rate": 4.0968776610843035e-05, "loss": 0.006358386576175689, "step": 208050 }, { "epoch": 59.057621345444225, "grad_norm": 0.003685550531372428, "learning_rate": 4.096593812091967e-05, "loss": 0.00020489878952503204, "step": 208060 }, { "epoch": 59.06045983536758, "grad_norm": 0.05219616740942001, "learning_rate": 4.096309963099631e-05, "loss": 0.00010627955198287964, "step": 208070 }, { "epoch": 59.063298325290944, "grad_norm": 0.045587748289108276, "learning_rate": 4.096026114107295e-05, "loss": 7.346980273723603e-05, "step": 208080 }, { "epoch": 59.06613681521431, "grad_norm": 0.10569508373737335, "learning_rate": 4.0957422651149594e-05, "loss": 0.00011331252753734588, "step": 208090 }, { "epoch": 59.06897530513767, "grad_norm": 0.07571864873170853, "learning_rate": 4.095458416122623e-05, "loss": 0.00013066139072179795, "step": 208100 }, { "epoch": 59.071813795061026, "grad_norm": 0.00626629963517189, "learning_rate": 4.095174567130287e-05, "loss": 0.00014532022178173065, "step": 208110 }, { "epoch": 59.07465228498439, "grad_norm": 16.804384231567383, "learning_rate": 4.094890718137951e-05, "loss": 0.012627130746841431, "step": 208120 }, { "epoch": 59.07749077490775, "grad_norm": 11.031539916992188, "learning_rate": 4.0946068691456146e-05, "loss": 0.002651475742459297, "step": 208130 }, { "epoch": 59.08032926483111, "grad_norm": 0.024239132180809975, "learning_rate": 4.094323020153279e-05, "loss": 0.00045367646962404253, "step": 208140 }, { "epoch": 59.08316775475447, "grad_norm": 0.0465371236205101, "learning_rate": 4.094039171160943e-05, "loss": 0.0001005532220005989, "step": 208150 }, { "epoch": 59.08600624467783, "grad_norm": 0.020679030567407608, "learning_rate": 4.093755322168606e-05, "loss": 7.835458964109421e-05, "step": 208160 }, { "epoch": 59.08884473460119, "grad_norm": 0.013292450457811356, "learning_rate": 4.0934714731762705e-05, "loss": 0.00010971222072839737, "step": 208170 }, { "epoch": 59.09168322452455, "grad_norm": 0.03834826126694679, "learning_rate": 4.0931876241839346e-05, "loss": 0.000372038409113884, "step": 208180 }, { "epoch": 59.094521714447914, "grad_norm": 0.01719238981604576, "learning_rate": 4.092903775191598e-05, "loss": 0.002553468942642212, "step": 208190 }, { "epoch": 59.09736020437128, "grad_norm": 0.4009615480899811, "learning_rate": 4.092619926199262e-05, "loss": 0.00015432965010404588, "step": 208200 }, { "epoch": 59.10019869429463, "grad_norm": 0.12766209244728088, "learning_rate": 4.092336077206926e-05, "loss": 0.00019216015934944152, "step": 208210 }, { "epoch": 59.103037184217996, "grad_norm": 0.007929329760372639, "learning_rate": 4.09205222821459e-05, "loss": 9.78708267211914e-05, "step": 208220 }, { "epoch": 59.10587567414136, "grad_norm": 0.04059099406003952, "learning_rate": 4.091768379222254e-05, "loss": 7.702801376581192e-05, "step": 208230 }, { "epoch": 59.108714164064715, "grad_norm": 0.11360719054937363, "learning_rate": 4.0914845302299174e-05, "loss": 7.335003465414047e-05, "step": 208240 }, { "epoch": 59.11155265398808, "grad_norm": 0.04538987949490547, "learning_rate": 4.091200681237582e-05, "loss": 0.00011540781706571579, "step": 208250 }, { "epoch": 59.11439114391144, "grad_norm": 0.01351894997060299, "learning_rate": 4.090916832245246e-05, "loss": 3.505591303110123e-05, "step": 208260 }, { "epoch": 59.1172296338348, "grad_norm": 0.0935216173529625, "learning_rate": 4.090632983252909e-05, "loss": 4.791878163814545e-05, "step": 208270 }, { "epoch": 59.12006812375816, "grad_norm": 0.007126193959265947, "learning_rate": 4.090349134260574e-05, "loss": 5.090758204460144e-05, "step": 208280 }, { "epoch": 59.12290661368152, "grad_norm": 0.005976926535367966, "learning_rate": 4.0900652852682374e-05, "loss": 4.2195618152618405e-05, "step": 208290 }, { "epoch": 59.125745103604885, "grad_norm": 0.0073175448924303055, "learning_rate": 4.0897814362759015e-05, "loss": 0.0003720199689269066, "step": 208300 }, { "epoch": 59.12858359352824, "grad_norm": 0.0009284274419769645, "learning_rate": 4.089497587283566e-05, "loss": 0.0001853879541158676, "step": 208310 }, { "epoch": 59.131422083451604, "grad_norm": 0.045125965029001236, "learning_rate": 4.089213738291229e-05, "loss": 6.751082837581634e-05, "step": 208320 }, { "epoch": 59.134260573374966, "grad_norm": 0.001259294105693698, "learning_rate": 4.088929889298893e-05, "loss": 0.00031258407980203626, "step": 208330 }, { "epoch": 59.13709906329832, "grad_norm": 0.018535463139414787, "learning_rate": 4.088646040306557e-05, "loss": 0.000224187970161438, "step": 208340 }, { "epoch": 59.139937553221685, "grad_norm": 0.33951762318611145, "learning_rate": 4.088362191314221e-05, "loss": 0.0001232227310538292, "step": 208350 }, { "epoch": 59.14277604314505, "grad_norm": 0.004870286211371422, "learning_rate": 4.088078342321885e-05, "loss": 5.0273910164833066e-05, "step": 208360 }, { "epoch": 59.14561453306841, "grad_norm": 0.0022467963863164186, "learning_rate": 4.0877944933295485e-05, "loss": 2.7806498110294342e-05, "step": 208370 }, { "epoch": 59.14845302299177, "grad_norm": 0.017092138528823853, "learning_rate": 4.087510644337213e-05, "loss": 3.9726123213768004e-05, "step": 208380 }, { "epoch": 59.15129151291513, "grad_norm": 0.012495877221226692, "learning_rate": 4.087226795344877e-05, "loss": 0.0036077313125133514, "step": 208390 }, { "epoch": 59.15413000283849, "grad_norm": 0.01968565210700035, "learning_rate": 4.08694294635254e-05, "loss": 0.0005574259907007217, "step": 208400 }, { "epoch": 59.15696849276185, "grad_norm": 0.00507538765668869, "learning_rate": 4.086659097360205e-05, "loss": 0.0002204321324825287, "step": 208410 }, { "epoch": 59.15980698268521, "grad_norm": 0.11241290718317032, "learning_rate": 4.0863752483678685e-05, "loss": 0.0006449256092309952, "step": 208420 }, { "epoch": 59.162645472608574, "grad_norm": 0.05606142431497574, "learning_rate": 4.0860913993755326e-05, "loss": 0.0004172854125499725, "step": 208430 }, { "epoch": 59.16548396253193, "grad_norm": 0.009968576952815056, "learning_rate": 4.085807550383196e-05, "loss": 0.00018044449388980864, "step": 208440 }, { "epoch": 59.16832245245529, "grad_norm": 0.5391976833343506, "learning_rate": 4.08552370139086e-05, "loss": 0.0002049693837761879, "step": 208450 }, { "epoch": 59.171160942378656, "grad_norm": 0.007235636934638023, "learning_rate": 4.0852398523985244e-05, "loss": 0.00011760946363210678, "step": 208460 }, { "epoch": 59.17399943230202, "grad_norm": 0.12323497235774994, "learning_rate": 4.084956003406188e-05, "loss": 0.0002536924555897713, "step": 208470 }, { "epoch": 59.176837922225374, "grad_norm": 0.012095142155885696, "learning_rate": 4.084672154413852e-05, "loss": 0.0001299440860748291, "step": 208480 }, { "epoch": 59.17967641214874, "grad_norm": 0.07805734872817993, "learning_rate": 4.084388305421516e-05, "loss": 0.00013174451887607575, "step": 208490 }, { "epoch": 59.1825149020721, "grad_norm": 0.006247860379517078, "learning_rate": 4.0841044564291795e-05, "loss": 5.1802210509777066e-05, "step": 208500 }, { "epoch": 59.1825149020721, "eval_accuracy": 0.9851211292681376, "eval_loss": 0.061026688665151596, "eval_runtime": 51.5538, "eval_samples_per_second": 305.06, "eval_steps_per_second": 4.772, "step": 208500 }, { "epoch": 59.185353391995456, "grad_norm": 0.004596065729856491, "learning_rate": 4.0838206074368444e-05, "loss": 9.623132646083832e-05, "step": 208510 }, { "epoch": 59.18819188191882, "grad_norm": 1.6705896854400635, "learning_rate": 4.083536758444508e-05, "loss": 0.0011702582240104675, "step": 208520 }, { "epoch": 59.19103037184218, "grad_norm": 0.021618010476231575, "learning_rate": 4.083252909452171e-05, "loss": 0.013897106051445007, "step": 208530 }, { "epoch": 59.19386886176554, "grad_norm": 0.02614051103591919, "learning_rate": 4.0829690604598354e-05, "loss": 0.0005759980529546737, "step": 208540 }, { "epoch": 59.1967073516889, "grad_norm": 0.03865274041891098, "learning_rate": 4.0826852114674996e-05, "loss": 0.00036140959709882734, "step": 208550 }, { "epoch": 59.19954584161226, "grad_norm": 0.050611138343811035, "learning_rate": 4.082401362475164e-05, "loss": 0.0002586979418992996, "step": 208560 }, { "epoch": 59.202384331535626, "grad_norm": 12.844073295593262, "learning_rate": 4.082117513482827e-05, "loss": 0.0034250572323799135, "step": 208570 }, { "epoch": 59.20522282145898, "grad_norm": 1.2342175245285034, "learning_rate": 4.081833664490491e-05, "loss": 0.019642660021781923, "step": 208580 }, { "epoch": 59.208061311382345, "grad_norm": 0.008083652704954147, "learning_rate": 4.0815498154981554e-05, "loss": 0.004543268680572509, "step": 208590 }, { "epoch": 59.21089980130571, "grad_norm": 0.6903533339500427, "learning_rate": 4.081265966505819e-05, "loss": 0.002797786332666874, "step": 208600 }, { "epoch": 59.21373829122906, "grad_norm": 7.586381435394287, "learning_rate": 4.080982117513483e-05, "loss": 0.0009785499423742294, "step": 208610 }, { "epoch": 59.216576781152426, "grad_norm": 0.9337437152862549, "learning_rate": 4.080698268521147e-05, "loss": 0.003929135203361511, "step": 208620 }, { "epoch": 59.21941527107579, "grad_norm": 0.004562055226415396, "learning_rate": 4.0804144195288106e-05, "loss": 0.0005863303318619728, "step": 208630 }, { "epoch": 59.22225376099915, "grad_norm": 0.021296486258506775, "learning_rate": 4.080130570536475e-05, "loss": 0.0002712370827794075, "step": 208640 }, { "epoch": 59.22509225092251, "grad_norm": 0.5584021210670471, "learning_rate": 4.079846721544139e-05, "loss": 0.014972594380378724, "step": 208650 }, { "epoch": 59.22793074084587, "grad_norm": 0.11860156804323196, "learning_rate": 4.0795628725518024e-05, "loss": 0.0002619178965687752, "step": 208660 }, { "epoch": 59.23076923076923, "grad_norm": 12.495570182800293, "learning_rate": 4.0792790235594665e-05, "loss": 0.003148600459098816, "step": 208670 }, { "epoch": 59.23360772069259, "grad_norm": 1.8243142366409302, "learning_rate": 4.0789951745671306e-05, "loss": 0.0013840319588780403, "step": 208680 }, { "epoch": 59.23644621061595, "grad_norm": 0.015378271229565144, "learning_rate": 4.078711325574794e-05, "loss": 0.0027985829859972, "step": 208690 }, { "epoch": 59.239284700539315, "grad_norm": 0.20122399926185608, "learning_rate": 4.078427476582458e-05, "loss": 0.011848890781402588, "step": 208700 }, { "epoch": 59.24212319046267, "grad_norm": 1.5269813537597656, "learning_rate": 4.0781436275901224e-05, "loss": 0.00045494977384805677, "step": 208710 }, { "epoch": 59.244961680386034, "grad_norm": 0.12247257679700851, "learning_rate": 4.0778597785977865e-05, "loss": 0.0006377946585416794, "step": 208720 }, { "epoch": 59.2478001703094, "grad_norm": 0.073937326669693, "learning_rate": 4.07757592960545e-05, "loss": 0.003839216381311417, "step": 208730 }, { "epoch": 59.25063866023276, "grad_norm": 0.046129047870635986, "learning_rate": 4.077292080613114e-05, "loss": 0.00032760854810476303, "step": 208740 }, { "epoch": 59.253477150156115, "grad_norm": 0.07898551970720291, "learning_rate": 4.077008231620778e-05, "loss": 0.00029379799962043763, "step": 208750 }, { "epoch": 59.25631564007948, "grad_norm": 0.8155443072319031, "learning_rate": 4.076724382628442e-05, "loss": 0.0016493124887347222, "step": 208760 }, { "epoch": 59.25915413000284, "grad_norm": 0.9851663708686829, "learning_rate": 4.076440533636106e-05, "loss": 0.0021877747029066088, "step": 208770 }, { "epoch": 59.2619926199262, "grad_norm": 0.13184547424316406, "learning_rate": 4.07615668464377e-05, "loss": 0.0022490015253424644, "step": 208780 }, { "epoch": 59.26483110984956, "grad_norm": 0.030161168426275253, "learning_rate": 4.0758728356514334e-05, "loss": 0.00021853111684322357, "step": 208790 }, { "epoch": 59.26766959977292, "grad_norm": 0.006075549405068159, "learning_rate": 4.0755889866590976e-05, "loss": 0.003928828239440918, "step": 208800 }, { "epoch": 59.27050808969628, "grad_norm": 0.023158565163612366, "learning_rate": 4.075305137666762e-05, "loss": 0.0001693442463874817, "step": 208810 }, { "epoch": 59.27334657961964, "grad_norm": 0.004996785428375006, "learning_rate": 4.075021288674425e-05, "loss": 0.001147199235856533, "step": 208820 }, { "epoch": 59.276185069543004, "grad_norm": 0.07731928676366806, "learning_rate": 4.074737439682089e-05, "loss": 0.00019136350601911545, "step": 208830 }, { "epoch": 59.27902355946637, "grad_norm": 0.01631273329257965, "learning_rate": 4.0744535906897534e-05, "loss": 0.0009206283837556839, "step": 208840 }, { "epoch": 59.28186204938972, "grad_norm": 1.5962164402008057, "learning_rate": 4.0741697416974176e-05, "loss": 0.00045950878411531447, "step": 208850 }, { "epoch": 59.284700539313086, "grad_norm": 0.3322031497955322, "learning_rate": 4.073885892705081e-05, "loss": 0.006980029493570327, "step": 208860 }, { "epoch": 59.28753902923645, "grad_norm": 0.02163473330438137, "learning_rate": 4.0736020437127445e-05, "loss": 0.0007492845878005027, "step": 208870 }, { "epoch": 59.290377519159804, "grad_norm": 0.06263180077075958, "learning_rate": 4.073318194720409e-05, "loss": 0.0035546287894248963, "step": 208880 }, { "epoch": 59.29321600908317, "grad_norm": 0.1559010148048401, "learning_rate": 4.073034345728073e-05, "loss": 0.00043515749275684356, "step": 208890 }, { "epoch": 59.29605449900653, "grad_norm": 0.032513488084077835, "learning_rate": 4.072750496735737e-05, "loss": 0.0009453052654862404, "step": 208900 }, { "epoch": 59.298892988929886, "grad_norm": 0.03644878417253494, "learning_rate": 4.072466647743401e-05, "loss": 0.00022212602198123932, "step": 208910 }, { "epoch": 59.30173147885325, "grad_norm": 0.5239670872688293, "learning_rate": 4.0721827987510645e-05, "loss": 0.0027746308594942095, "step": 208920 }, { "epoch": 59.30456996877661, "grad_norm": 0.011013501323759556, "learning_rate": 4.0718989497587287e-05, "loss": 0.00016819443553686141, "step": 208930 }, { "epoch": 59.307408458699975, "grad_norm": 0.3261617124080658, "learning_rate": 4.071615100766393e-05, "loss": 0.000497373379766941, "step": 208940 }, { "epoch": 59.31024694862333, "grad_norm": 0.05601045861840248, "learning_rate": 4.071331251774056e-05, "loss": 0.00047509651631116867, "step": 208950 }, { "epoch": 59.31308543854669, "grad_norm": 6.6335225105285645, "learning_rate": 4.0710474027817204e-05, "loss": 0.0018949199467897416, "step": 208960 }, { "epoch": 59.315923928470056, "grad_norm": 0.1517571359872818, "learning_rate": 4.070763553789384e-05, "loss": 0.004727303981781006, "step": 208970 }, { "epoch": 59.31876241839341, "grad_norm": 0.10796132683753967, "learning_rate": 4.070479704797048e-05, "loss": 0.0003145255148410797, "step": 208980 }, { "epoch": 59.321600908316775, "grad_norm": 0.00427694246172905, "learning_rate": 4.070195855804712e-05, "loss": 0.0031417056918144224, "step": 208990 }, { "epoch": 59.32443939824014, "grad_norm": 1.1105939149856567, "learning_rate": 4.0699120068123756e-05, "loss": 0.0005503922700881958, "step": 209000 }, { "epoch": 59.32443939824014, "eval_accuracy": 0.9832771666560692, "eval_loss": 0.06800995022058487, "eval_runtime": 51.7676, "eval_samples_per_second": 303.8, "eval_steps_per_second": 4.752, "step": 209000 }, { "epoch": 59.32727788816349, "grad_norm": 2.3050549030303955, "learning_rate": 4.0696281578200404e-05, "loss": 0.0005675625056028366, "step": 209010 }, { "epoch": 59.330116378086856, "grad_norm": 0.09174767881631851, "learning_rate": 4.069344308827704e-05, "loss": 9.496845304965973e-05, "step": 209020 }, { "epoch": 59.33295486801022, "grad_norm": 0.04311138391494751, "learning_rate": 4.069060459835368e-05, "loss": 0.0008425207808613778, "step": 209030 }, { "epoch": 59.33579335793358, "grad_norm": 0.14760245382785797, "learning_rate": 4.068776610843032e-05, "loss": 0.00339864045381546, "step": 209040 }, { "epoch": 59.33863184785694, "grad_norm": 0.010105427354574203, "learning_rate": 4.0684927618506956e-05, "loss": 0.0008387994021177292, "step": 209050 }, { "epoch": 59.3414703377803, "grad_norm": 0.0008430539164692163, "learning_rate": 4.06820891285836e-05, "loss": 0.0012360725551843644, "step": 209060 }, { "epoch": 59.344308827703664, "grad_norm": 0.05451909825205803, "learning_rate": 4.067925063866023e-05, "loss": 0.0017375240102410316, "step": 209070 }, { "epoch": 59.34714731762702, "grad_norm": 2.9144349098205566, "learning_rate": 4.067641214873687e-05, "loss": 0.002154899574816227, "step": 209080 }, { "epoch": 59.34998580755038, "grad_norm": 0.03187168762087822, "learning_rate": 4.0673573658813515e-05, "loss": 0.0005174780264496804, "step": 209090 }, { "epoch": 59.352824297473745, "grad_norm": 0.06277491897344589, "learning_rate": 4.067073516889015e-05, "loss": 0.010179243981838226, "step": 209100 }, { "epoch": 59.35566278739711, "grad_norm": 1.8055312633514404, "learning_rate": 4.066789667896679e-05, "loss": 0.0017857408151030541, "step": 209110 }, { "epoch": 59.358501277320464, "grad_norm": 0.24745240807533264, "learning_rate": 4.066505818904343e-05, "loss": 0.0002473166212439537, "step": 209120 }, { "epoch": 59.36133976724383, "grad_norm": 0.02266867645084858, "learning_rate": 4.0662219699120067e-05, "loss": 0.000246022455394268, "step": 209130 }, { "epoch": 59.36417825716719, "grad_norm": 0.33099204301834106, "learning_rate": 4.0659381209196715e-05, "loss": 0.004586952179670334, "step": 209140 }, { "epoch": 59.367016747090545, "grad_norm": 0.030950693413615227, "learning_rate": 4.065654271927335e-05, "loss": 0.00027334969490766525, "step": 209150 }, { "epoch": 59.36985523701391, "grad_norm": 0.006041825283318758, "learning_rate": 4.0653704229349984e-05, "loss": 0.0015481488779187202, "step": 209160 }, { "epoch": 59.37269372693727, "grad_norm": 0.152247354388237, "learning_rate": 4.0651149588418964e-05, "loss": 0.006775186955928802, "step": 209170 }, { "epoch": 59.37553221686063, "grad_norm": 0.08080053329467773, "learning_rate": 4.06483110984956e-05, "loss": 0.0013595422729849815, "step": 209180 }, { "epoch": 59.37837070678399, "grad_norm": 0.12253227084875107, "learning_rate": 4.064547260857225e-05, "loss": 0.0028357407078146934, "step": 209190 }, { "epoch": 59.38120919670735, "grad_norm": 0.006574396509677172, "learning_rate": 4.064263411864888e-05, "loss": 0.0005186280235648155, "step": 209200 }, { "epoch": 59.384047686630716, "grad_norm": 5.578652858734131, "learning_rate": 4.0639795628725516e-05, "loss": 0.003120157681405544, "step": 209210 }, { "epoch": 59.38688617655407, "grad_norm": 0.03376499563455582, "learning_rate": 4.0636957138802164e-05, "loss": 0.006442315131425858, "step": 209220 }, { "epoch": 59.389724666477434, "grad_norm": 0.3002016544342041, "learning_rate": 4.06341186488788e-05, "loss": 0.0380127876996994, "step": 209230 }, { "epoch": 59.3925631564008, "grad_norm": 0.26126664876937866, "learning_rate": 4.063128015895544e-05, "loss": 0.0004123944789171219, "step": 209240 }, { "epoch": 59.39540164632415, "grad_norm": 0.006406567059457302, "learning_rate": 4.0628441669032075e-05, "loss": 0.002198541350662708, "step": 209250 }, { "epoch": 59.398240136247516, "grad_norm": 0.08050786703824997, "learning_rate": 4.0625603179108716e-05, "loss": 0.0004383603110909462, "step": 209260 }, { "epoch": 59.40107862617088, "grad_norm": 0.0024966660421341658, "learning_rate": 4.062276468918536e-05, "loss": 0.005590541660785675, "step": 209270 }, { "epoch": 59.403917116094235, "grad_norm": 0.005818074569106102, "learning_rate": 4.061992619926199e-05, "loss": 0.0011248691007494926, "step": 209280 }, { "epoch": 59.4067556060176, "grad_norm": 0.0034512002021074295, "learning_rate": 4.061708770933863e-05, "loss": 9.277760982513427e-05, "step": 209290 }, { "epoch": 59.40959409594096, "grad_norm": 1.3204978704452515, "learning_rate": 4.0614249219415275e-05, "loss": 0.0021379608660936356, "step": 209300 }, { "epoch": 59.41243258586432, "grad_norm": 10.567334175109863, "learning_rate": 4.061141072949191e-05, "loss": 0.005088707804679871, "step": 209310 }, { "epoch": 59.41527107578768, "grad_norm": 0.003972927574068308, "learning_rate": 4.060857223956856e-05, "loss": 0.0005264928564429283, "step": 209320 }, { "epoch": 59.41810956571104, "grad_norm": 0.13195107877254486, "learning_rate": 4.060573374964519e-05, "loss": 0.003124219924211502, "step": 209330 }, { "epoch": 59.420948055634405, "grad_norm": 0.24458910524845123, "learning_rate": 4.0602895259721827e-05, "loss": 0.0016252171248197556, "step": 209340 }, { "epoch": 59.42378654555776, "grad_norm": 13.875762939453125, "learning_rate": 4.060005676979847e-05, "loss": 0.0025946320965886116, "step": 209350 }, { "epoch": 59.42662503548112, "grad_norm": 0.09595219045877457, "learning_rate": 4.059721827987511e-05, "loss": 0.0003556860610842705, "step": 209360 }, { "epoch": 59.429463525404486, "grad_norm": 0.05400305613875389, "learning_rate": 4.059437978995175e-05, "loss": 0.0005359511822462081, "step": 209370 }, { "epoch": 59.43230201532784, "grad_norm": 0.21772778034210205, "learning_rate": 4.0591541300028385e-05, "loss": 0.00044602546840906144, "step": 209380 }, { "epoch": 59.435140505251205, "grad_norm": 0.0038484970573335886, "learning_rate": 4.058870281010503e-05, "loss": 0.001323869451880455, "step": 209390 }, { "epoch": 59.43797899517457, "grad_norm": 0.008350418880581856, "learning_rate": 4.058586432018167e-05, "loss": 0.0018163010478019713, "step": 209400 }, { "epoch": 59.44081748509793, "grad_norm": 0.19818513095378876, "learning_rate": 4.05830258302583e-05, "loss": 0.00012485329061746598, "step": 209410 }, { "epoch": 59.44365597502129, "grad_norm": 0.9925462603569031, "learning_rate": 4.0580187340334944e-05, "loss": 0.0014838926494121552, "step": 209420 }, { "epoch": 59.44649446494465, "grad_norm": 0.1155548021197319, "learning_rate": 4.0577348850411585e-05, "loss": 0.0012501237913966178, "step": 209430 }, { "epoch": 59.44933295486801, "grad_norm": 0.057228703051805496, "learning_rate": 4.057451036048822e-05, "loss": 6.333757191896438e-05, "step": 209440 }, { "epoch": 59.45217144479137, "grad_norm": 0.0033313785679638386, "learning_rate": 4.057167187056486e-05, "loss": 0.0002046743407845497, "step": 209450 }, { "epoch": 59.45500993471473, "grad_norm": 0.059183429926633835, "learning_rate": 4.05688333806415e-05, "loss": 0.00042438264936208727, "step": 209460 }, { "epoch": 59.457848424638094, "grad_norm": 0.057143405079841614, "learning_rate": 4.056599489071814e-05, "loss": 0.0008848967030644417, "step": 209470 }, { "epoch": 59.46068691456146, "grad_norm": 2.8670618534088135, "learning_rate": 4.056315640079478e-05, "loss": 0.0008901610970497131, "step": 209480 }, { "epoch": 59.46352540448481, "grad_norm": 0.012700549326837063, "learning_rate": 4.056031791087142e-05, "loss": 0.00028976406902074813, "step": 209490 }, { "epoch": 59.466363894408175, "grad_norm": 0.033958278596401215, "learning_rate": 4.0557479420948055e-05, "loss": 8.33595171570778e-05, "step": 209500 }, { "epoch": 59.466363894408175, "eval_accuracy": 0.9856933935270554, "eval_loss": 0.058904979377985, "eval_runtime": 46.606, "eval_samples_per_second": 337.446, "eval_steps_per_second": 5.278, "step": 209500 }, { "epoch": 59.46920238433154, "grad_norm": 0.026906389743089676, "learning_rate": 4.0554640931024696e-05, "loss": 0.0008737413212656975, "step": 209510 }, { "epoch": 59.472040874254894, "grad_norm": 0.030644169077277184, "learning_rate": 4.055180244110134e-05, "loss": 0.00027681440114974977, "step": 209520 }, { "epoch": 59.47487936417826, "grad_norm": 0.057465389370918274, "learning_rate": 4.054896395117798e-05, "loss": 0.0003419315442442894, "step": 209530 }, { "epoch": 59.47771785410162, "grad_norm": 0.025262445211410522, "learning_rate": 4.0546125461254613e-05, "loss": 0.0007949858903884887, "step": 209540 }, { "epoch": 59.480556344024976, "grad_norm": 0.003866976359859109, "learning_rate": 4.054328697133125e-05, "loss": 7.033906877040863e-05, "step": 209550 }, { "epoch": 59.48339483394834, "grad_norm": 8.875951766967773, "learning_rate": 4.0540448481407896e-05, "loss": 0.005482359975576401, "step": 209560 }, { "epoch": 59.4862333238717, "grad_norm": 0.09175582975149155, "learning_rate": 4.053760999148453e-05, "loss": 0.0005606308579444886, "step": 209570 }, { "epoch": 59.489071813795064, "grad_norm": 0.008105779998004436, "learning_rate": 4.053477150156117e-05, "loss": 0.0006829682737588882, "step": 209580 }, { "epoch": 59.49191030371842, "grad_norm": 0.01195067260414362, "learning_rate": 4.0531933011637814e-05, "loss": 0.0001885896548628807, "step": 209590 }, { "epoch": 59.49474879364178, "grad_norm": 0.011962617747485638, "learning_rate": 4.052909452171445e-05, "loss": 0.00011696498841047287, "step": 209600 }, { "epoch": 59.497587283565146, "grad_norm": 0.006526998244225979, "learning_rate": 4.052625603179109e-05, "loss": 0.00024119522422552108, "step": 209610 }, { "epoch": 59.5004257734885, "grad_norm": 0.0019405940547585487, "learning_rate": 4.052341754186773e-05, "loss": 0.0001097414642572403, "step": 209620 }, { "epoch": 59.503264263411864, "grad_norm": 0.8814250230789185, "learning_rate": 4.0520579051944365e-05, "loss": 0.00042445771396160126, "step": 209630 }, { "epoch": 59.50610275333523, "grad_norm": 0.3282906413078308, "learning_rate": 4.051774056202101e-05, "loss": 0.0008623134344816208, "step": 209640 }, { "epoch": 59.50894124325858, "grad_norm": 0.09421703964471817, "learning_rate": 4.051490207209764e-05, "loss": 0.00010571796447038651, "step": 209650 }, { "epoch": 59.511779733181946, "grad_norm": 0.034750353544950485, "learning_rate": 4.051206358217429e-05, "loss": 0.0003120275214314461, "step": 209660 }, { "epoch": 59.51461822310531, "grad_norm": 0.18998248875141144, "learning_rate": 4.0509225092250924e-05, "loss": 0.0003042345866560936, "step": 209670 }, { "epoch": 59.51745671302867, "grad_norm": 0.15742367506027222, "learning_rate": 4.050638660232756e-05, "loss": 0.0003263920545578003, "step": 209680 }, { "epoch": 59.52029520295203, "grad_norm": 0.004634558223187923, "learning_rate": 4.050354811240421e-05, "loss": 0.0009516969323158265, "step": 209690 }, { "epoch": 59.52313369287539, "grad_norm": 0.0008833525935187936, "learning_rate": 4.050070962248084e-05, "loss": 0.00014888085424900055, "step": 209700 }, { "epoch": 59.52597218279875, "grad_norm": 0.038599926978349686, "learning_rate": 4.049787113255748e-05, "loss": 0.0003020020201802254, "step": 209710 }, { "epoch": 59.52881067272211, "grad_norm": 0.9685364961624146, "learning_rate": 4.0495032642634124e-05, "loss": 0.0013916177675127983, "step": 209720 }, { "epoch": 59.53164916264547, "grad_norm": 0.01601978950202465, "learning_rate": 4.049219415271076e-05, "loss": 0.0015367278829216957, "step": 209730 }, { "epoch": 59.534487652568835, "grad_norm": 0.037056345492601395, "learning_rate": 4.04893556627874e-05, "loss": 0.001572825014591217, "step": 209740 }, { "epoch": 59.53732614249219, "grad_norm": 0.006103822495788336, "learning_rate": 4.0486517172864035e-05, "loss": 0.0033022671937942504, "step": 209750 }, { "epoch": 59.540164632415554, "grad_norm": 0.1327001303434372, "learning_rate": 4.0483678682940676e-05, "loss": 0.0029650839045643807, "step": 209760 }, { "epoch": 59.54300312233892, "grad_norm": 0.42782828211784363, "learning_rate": 4.048084019301732e-05, "loss": 0.0004030119627714157, "step": 209770 }, { "epoch": 59.54584161226228, "grad_norm": 0.8482135534286499, "learning_rate": 4.047800170309395e-05, "loss": 0.0010706692934036254, "step": 209780 }, { "epoch": 59.548680102185635, "grad_norm": 0.0036592399701476097, "learning_rate": 4.04751632131706e-05, "loss": 0.0022724892944097517, "step": 209790 }, { "epoch": 59.551518592109, "grad_norm": 0.015785710886120796, "learning_rate": 4.0472324723247235e-05, "loss": 0.0008964033797383309, "step": 209800 }, { "epoch": 59.55435708203236, "grad_norm": 0.019166043028235435, "learning_rate": 4.046948623332387e-05, "loss": 0.00025486350059509275, "step": 209810 }, { "epoch": 59.55719557195572, "grad_norm": 0.04115685075521469, "learning_rate": 4.046664774340052e-05, "loss": 8.680447936058045e-05, "step": 209820 }, { "epoch": 59.56003406187908, "grad_norm": 0.00789150595664978, "learning_rate": 4.046380925347715e-05, "loss": 0.0014266639947891236, "step": 209830 }, { "epoch": 59.56287255180244, "grad_norm": 0.004335254430770874, "learning_rate": 4.0460970763553794e-05, "loss": 0.0004758477210998535, "step": 209840 }, { "epoch": 59.565711041725805, "grad_norm": 0.01617295667529106, "learning_rate": 4.045813227363043e-05, "loss": 0.00014093201607465745, "step": 209850 }, { "epoch": 59.56854953164916, "grad_norm": 0.06810112297534943, "learning_rate": 4.045529378370707e-05, "loss": 0.0002751961350440979, "step": 209860 }, { "epoch": 59.571388021572524, "grad_norm": 0.31053897738456726, "learning_rate": 4.045245529378371e-05, "loss": 0.002346888929605484, "step": 209870 }, { "epoch": 59.57422651149589, "grad_norm": 0.24291442334651947, "learning_rate": 4.0449616803860346e-05, "loss": 0.0002107728272676468, "step": 209880 }, { "epoch": 59.57706500141924, "grad_norm": 0.11660916358232498, "learning_rate": 4.044677831393699e-05, "loss": 0.00015582125633955, "step": 209890 }, { "epoch": 59.579903491342606, "grad_norm": 0.36715808510780334, "learning_rate": 4.044393982401363e-05, "loss": 0.00016945768147706984, "step": 209900 }, { "epoch": 59.58274198126597, "grad_norm": 0.0062269121408462524, "learning_rate": 4.044110133409026e-05, "loss": 0.0024847712367773054, "step": 209910 }, { "epoch": 59.585580471189324, "grad_norm": 0.07008063048124313, "learning_rate": 4.0438262844166904e-05, "loss": 0.0012739406898617744, "step": 209920 }, { "epoch": 59.58841896111269, "grad_norm": 0.033344682306051254, "learning_rate": 4.0435424354243546e-05, "loss": 0.003394139185547829, "step": 209930 }, { "epoch": 59.59125745103605, "grad_norm": 0.013995596207678318, "learning_rate": 4.043258586432018e-05, "loss": 0.0051654942333698274, "step": 209940 }, { "epoch": 59.59409594095941, "grad_norm": 3.794924736022949, "learning_rate": 4.042974737439682e-05, "loss": 0.011932146549224854, "step": 209950 }, { "epoch": 59.59693443088277, "grad_norm": 0.02480378746986389, "learning_rate": 4.0427192733465795e-05, "loss": 0.010939006507396699, "step": 209960 }, { "epoch": 59.59977292080613, "grad_norm": 0.01926366798579693, "learning_rate": 4.0424354243542436e-05, "loss": 0.00017244983464479447, "step": 209970 }, { "epoch": 59.602611410729494, "grad_norm": 0.026686491444706917, "learning_rate": 4.042151575361908e-05, "loss": 0.0022194307297468186, "step": 209980 }, { "epoch": 59.60544990065285, "grad_norm": 1.2721641063690186, "learning_rate": 4.041867726369571e-05, "loss": 0.0020581422373652457, "step": 209990 }, { "epoch": 59.60828839057621, "grad_norm": 0.04786679148674011, "learning_rate": 4.041583877377236e-05, "loss": 0.00042538512498140334, "step": 210000 }, { "epoch": 59.60828839057621, "eval_accuracy": 0.984103770585617, "eval_loss": 0.06459614634513855, "eval_runtime": 42.8621, "eval_samples_per_second": 366.921, "eval_steps_per_second": 5.739, "step": 210000 }, { "epoch": 59.611126880499576, "grad_norm": 0.015447904355823994, "learning_rate": 4.0413000283848995e-05, "loss": 0.00047887489199638367, "step": 210010 }, { "epoch": 59.61396537042293, "grad_norm": 0.025573546066880226, "learning_rate": 4.041016179392563e-05, "loss": 0.00022531170397996903, "step": 210020 }, { "epoch": 59.616803860346295, "grad_norm": 0.0032345035579055548, "learning_rate": 4.040732330400227e-05, "loss": 0.0001589260995388031, "step": 210030 }, { "epoch": 59.61964235026966, "grad_norm": 0.04299372807145119, "learning_rate": 4.040448481407891e-05, "loss": 0.00010633394122123718, "step": 210040 }, { "epoch": 59.62248084019302, "grad_norm": 0.0539042130112648, "learning_rate": 4.0401646324155554e-05, "loss": 0.00014725737273693084, "step": 210050 }, { "epoch": 59.625319330116376, "grad_norm": 3.57108998298645, "learning_rate": 4.039880783423219e-05, "loss": 0.0008285531774163246, "step": 210060 }, { "epoch": 59.62815782003974, "grad_norm": 0.006800636649131775, "learning_rate": 4.039596934430883e-05, "loss": 8.221380412578582e-05, "step": 210070 }, { "epoch": 59.6309963099631, "grad_norm": 0.018361855298280716, "learning_rate": 4.039313085438547e-05, "loss": 0.0003800084814429283, "step": 210080 }, { "epoch": 59.63383479988646, "grad_norm": 0.004929245915263891, "learning_rate": 4.0390292364462106e-05, "loss": 0.0005758322775363922, "step": 210090 }, { "epoch": 59.63667328980982, "grad_norm": 0.5332697033882141, "learning_rate": 4.038745387453875e-05, "loss": 0.000209902785718441, "step": 210100 }, { "epoch": 59.63951177973318, "grad_norm": 0.04607729986310005, "learning_rate": 4.038461538461539e-05, "loss": 0.00019071847200393676, "step": 210110 }, { "epoch": 59.64235026965654, "grad_norm": 0.06580177694559097, "learning_rate": 4.038177689469202e-05, "loss": 0.00044355448335409164, "step": 210120 }, { "epoch": 59.6451887595799, "grad_norm": 0.003099102294072509, "learning_rate": 4.0378938404768664e-05, "loss": 0.0006796270608901978, "step": 210130 }, { "epoch": 59.648027249503265, "grad_norm": 0.0022461882326751947, "learning_rate": 4.0376099914845306e-05, "loss": 3.063436597585678e-05, "step": 210140 }, { "epoch": 59.65086573942663, "grad_norm": 0.016202697530388832, "learning_rate": 4.037326142492194e-05, "loss": 0.0007920237258076668, "step": 210150 }, { "epoch": 59.653704229349984, "grad_norm": 11.176751136779785, "learning_rate": 4.037042293499858e-05, "loss": 0.0031495191156864165, "step": 210160 }, { "epoch": 59.65654271927335, "grad_norm": 5.805490493774414, "learning_rate": 4.036758444507522e-05, "loss": 0.0016501041129231453, "step": 210170 }, { "epoch": 59.65938120919671, "grad_norm": 0.05522219464182854, "learning_rate": 4.0364745955151864e-05, "loss": 0.0006545023992657661, "step": 210180 }, { "epoch": 59.662219699120065, "grad_norm": 0.008759953081607819, "learning_rate": 4.03619074652285e-05, "loss": 0.00018653273582458496, "step": 210190 }, { "epoch": 59.66505818904343, "grad_norm": 0.05629510059952736, "learning_rate": 4.035906897530514e-05, "loss": 0.0010196978226304054, "step": 210200 }, { "epoch": 59.66789667896679, "grad_norm": 0.00825746450573206, "learning_rate": 4.035623048538178e-05, "loss": 0.0015587979927659036, "step": 210210 }, { "epoch": 59.67073516889015, "grad_norm": 0.039079416543245316, "learning_rate": 4.0353391995458416e-05, "loss": 0.00017795450985431672, "step": 210220 }, { "epoch": 59.67357365881351, "grad_norm": 0.04754771292209625, "learning_rate": 4.035055350553506e-05, "loss": 0.001348208449780941, "step": 210230 }, { "epoch": 59.67641214873687, "grad_norm": 0.16308680176734924, "learning_rate": 4.03477150156117e-05, "loss": 0.0018207317218184472, "step": 210240 }, { "epoch": 59.679250638660235, "grad_norm": 0.026084255427122116, "learning_rate": 4.0344876525688334e-05, "loss": 0.00012580323964357377, "step": 210250 }, { "epoch": 59.68208912858359, "grad_norm": 6.039462566375732, "learning_rate": 4.0342038035764975e-05, "loss": 0.0007340487092733384, "step": 210260 }, { "epoch": 59.684927618506954, "grad_norm": 0.012714739888906479, "learning_rate": 4.0339199545841616e-05, "loss": 0.0006600255146622658, "step": 210270 }, { "epoch": 59.68776610843032, "grad_norm": 0.0740511566400528, "learning_rate": 4.033636105591825e-05, "loss": 9.971484541893005e-05, "step": 210280 }, { "epoch": 59.69060459835367, "grad_norm": 0.008175455965101719, "learning_rate": 4.033352256599489e-05, "loss": 0.0009753864258527755, "step": 210290 }, { "epoch": 59.693443088277036, "grad_norm": 0.009145496413111687, "learning_rate": 4.0330684076071534e-05, "loss": 0.00048306528478860857, "step": 210300 }, { "epoch": 59.6962815782004, "grad_norm": 0.051245320588350296, "learning_rate": 4.0327845586148175e-05, "loss": 0.00035928599536418917, "step": 210310 }, { "epoch": 59.69912006812376, "grad_norm": 0.004784907680004835, "learning_rate": 4.032500709622481e-05, "loss": 0.00014443453401327133, "step": 210320 }, { "epoch": 59.70195855804712, "grad_norm": 0.07438329607248306, "learning_rate": 4.0322168606301444e-05, "loss": 0.0002257928252220154, "step": 210330 }, { "epoch": 59.70479704797048, "grad_norm": 0.014108735136687756, "learning_rate": 4.031933011637809e-05, "loss": 0.006265962123870849, "step": 210340 }, { "epoch": 59.70763553789384, "grad_norm": 1.6176137924194336, "learning_rate": 4.031649162645473e-05, "loss": 0.00038754288107156754, "step": 210350 }, { "epoch": 59.7104740278172, "grad_norm": 0.03425375744700432, "learning_rate": 4.031365313653137e-05, "loss": 0.007917419821023942, "step": 210360 }, { "epoch": 59.71331251774056, "grad_norm": 0.2555633783340454, "learning_rate": 4.031081464660801e-05, "loss": 0.00016409289091825485, "step": 210370 }, { "epoch": 59.716151007663925, "grad_norm": 0.023549320176243782, "learning_rate": 4.0307976156684645e-05, "loss": 0.001745769940316677, "step": 210380 }, { "epoch": 59.71898949758728, "grad_norm": 4.26236629486084, "learning_rate": 4.0305137666761286e-05, "loss": 0.0008392773568630218, "step": 210390 }, { "epoch": 59.72182798751064, "grad_norm": 13.710404396057129, "learning_rate": 4.030229917683793e-05, "loss": 0.006067518517374993, "step": 210400 }, { "epoch": 59.724666477434006, "grad_norm": 0.016028761863708496, "learning_rate": 4.029946068691456e-05, "loss": 0.00022939201444387435, "step": 210410 }, { "epoch": 59.72750496735737, "grad_norm": 0.006967571098357439, "learning_rate": 4.02966221969912e-05, "loss": 0.0010559726506471634, "step": 210420 }, { "epoch": 59.730343457280725, "grad_norm": 0.030907010659575462, "learning_rate": 4.029378370706784e-05, "loss": 0.005079259723424911, "step": 210430 }, { "epoch": 59.73318194720409, "grad_norm": 0.01959521323442459, "learning_rate": 4.029094521714448e-05, "loss": 0.00043913386762142184, "step": 210440 }, { "epoch": 59.73602043712745, "grad_norm": 0.016544360667467117, "learning_rate": 4.028810672722112e-05, "loss": 0.00023094844073057175, "step": 210450 }, { "epoch": 59.738858927050806, "grad_norm": 0.4420657753944397, "learning_rate": 4.0285268237297755e-05, "loss": 0.00019466932862997056, "step": 210460 }, { "epoch": 59.74169741697417, "grad_norm": 0.021600710228085518, "learning_rate": 4.02824297473744e-05, "loss": 9.876731783151627e-05, "step": 210470 }, { "epoch": 59.74453590689753, "grad_norm": 0.043795835226774216, "learning_rate": 4.027959125745104e-05, "loss": 6.838440895080566e-05, "step": 210480 }, { "epoch": 59.74737439682089, "grad_norm": 0.006226823665201664, "learning_rate": 4.027675276752767e-05, "loss": 0.00022675953805446626, "step": 210490 }, { "epoch": 59.75021288674425, "grad_norm": 0.04934493824839592, "learning_rate": 4.027391427760432e-05, "loss": 0.0002881461754441261, "step": 210500 }, { "epoch": 59.75021288674425, "eval_accuracy": 0.9828320722324665, "eval_loss": 0.07708508521318436, "eval_runtime": 49.1611, "eval_samples_per_second": 319.908, "eval_steps_per_second": 5.004, "step": 210500 }, { "epoch": 59.753051376667614, "grad_norm": 0.047501228749752045, "learning_rate": 4.0271075787680955e-05, "loss": 0.000873025506734848, "step": 210510 }, { "epoch": 59.75588986659098, "grad_norm": 0.0260254368185997, "learning_rate": 4.02682372977576e-05, "loss": 0.0014946965500712394, "step": 210520 }, { "epoch": 59.75872835651433, "grad_norm": 0.026270005851984024, "learning_rate": 4.026539880783423e-05, "loss": 0.004414426535367966, "step": 210530 }, { "epoch": 59.761566846437695, "grad_norm": 0.0211776252835989, "learning_rate": 4.026256031791087e-05, "loss": 0.0001258648931980133, "step": 210540 }, { "epoch": 59.76440533636106, "grad_norm": 0.005121561698615551, "learning_rate": 4.0259721827987514e-05, "loss": 0.00045231878757476804, "step": 210550 }, { "epoch": 59.767243826284414, "grad_norm": 0.03876017406582832, "learning_rate": 4.025688333806415e-05, "loss": 0.0017632003873586655, "step": 210560 }, { "epoch": 59.77008231620778, "grad_norm": 0.013244347646832466, "learning_rate": 4.025404484814079e-05, "loss": 0.0020803909748792647, "step": 210570 }, { "epoch": 59.77292080613114, "grad_norm": 0.010953772813081741, "learning_rate": 4.025120635821743e-05, "loss": 0.00032911486923694613, "step": 210580 }, { "epoch": 59.7757592960545, "grad_norm": 0.08209783583879471, "learning_rate": 4.0248367868294066e-05, "loss": 0.009014500677585602, "step": 210590 }, { "epoch": 59.77859778597786, "grad_norm": 0.1986963152885437, "learning_rate": 4.0245529378370714e-05, "loss": 0.00025404058396816254, "step": 210600 }, { "epoch": 59.78143627590122, "grad_norm": 0.007274511270225048, "learning_rate": 4.024269088844735e-05, "loss": 0.0003824206069111824, "step": 210610 }, { "epoch": 59.784274765824584, "grad_norm": 0.019795170053839684, "learning_rate": 4.023985239852398e-05, "loss": 0.00011840462684631348, "step": 210620 }, { "epoch": 59.78711325574794, "grad_norm": 0.05097790062427521, "learning_rate": 4.0237013908600625e-05, "loss": 0.00021423306316137314, "step": 210630 }, { "epoch": 59.7899517456713, "grad_norm": 0.026979275047779083, "learning_rate": 4.0234175418677266e-05, "loss": 0.0002631392329931259, "step": 210640 }, { "epoch": 59.792790235594666, "grad_norm": 0.010032949037849903, "learning_rate": 4.023133692875391e-05, "loss": 0.0005153145641088486, "step": 210650 }, { "epoch": 59.79562872551802, "grad_norm": 0.003798228921368718, "learning_rate": 4.022849843883054e-05, "loss": 9.913258254528046e-05, "step": 210660 }, { "epoch": 59.798467215441384, "grad_norm": 0.02813803032040596, "learning_rate": 4.022565994890718e-05, "loss": 0.0011233502998948097, "step": 210670 }, { "epoch": 59.80130570536475, "grad_norm": 0.018761225044727325, "learning_rate": 4.0222821458983825e-05, "loss": 0.0017597028985619546, "step": 210680 }, { "epoch": 59.80414419528811, "grad_norm": 0.04273398220539093, "learning_rate": 4.021998296906046e-05, "loss": 0.00010440349578857421, "step": 210690 }, { "epoch": 59.806982685211466, "grad_norm": 0.2622203528881073, "learning_rate": 4.02171444791371e-05, "loss": 0.0010709809139370918, "step": 210700 }, { "epoch": 59.80982117513483, "grad_norm": 0.31841719150543213, "learning_rate": 4.021430598921374e-05, "loss": 0.0003669349476695061, "step": 210710 }, { "epoch": 59.81265966505819, "grad_norm": 4.808688640594482, "learning_rate": 4.021146749929038e-05, "loss": 0.0010280989110469819, "step": 210720 }, { "epoch": 59.81549815498155, "grad_norm": 0.7483634352684021, "learning_rate": 4.020862900936702e-05, "loss": 0.0005241613835096359, "step": 210730 }, { "epoch": 59.81833664490491, "grad_norm": 0.011925162747502327, "learning_rate": 4.020579051944366e-05, "loss": 0.0002148820087313652, "step": 210740 }, { "epoch": 59.82117513482827, "grad_norm": 0.249178946018219, "learning_rate": 4.0202952029520294e-05, "loss": 0.00011719521135091781, "step": 210750 }, { "epoch": 59.82401362475163, "grad_norm": 0.006705583073198795, "learning_rate": 4.0200113539596935e-05, "loss": 0.00017497334629297258, "step": 210760 }, { "epoch": 59.82685211467499, "grad_norm": 0.5271938443183899, "learning_rate": 4.019727504967358e-05, "loss": 0.0005043981596827507, "step": 210770 }, { "epoch": 59.829690604598355, "grad_norm": 0.052666075527668, "learning_rate": 4.019443655975022e-05, "loss": 0.001373761147260666, "step": 210780 }, { "epoch": 59.83252909452172, "grad_norm": 0.1852218359708786, "learning_rate": 4.019159806982685e-05, "loss": 0.002062247134745121, "step": 210790 }, { "epoch": 59.83536758444507, "grad_norm": 0.07324817031621933, "learning_rate": 4.0188759579903494e-05, "loss": 0.00019755661487579345, "step": 210800 }, { "epoch": 59.838206074368436, "grad_norm": 0.010061172768473625, "learning_rate": 4.0185921089980136e-05, "loss": 0.00047082845121622087, "step": 210810 }, { "epoch": 59.8410445642918, "grad_norm": 0.0613379068672657, "learning_rate": 4.018308260005677e-05, "loss": 7.816236466169357e-05, "step": 210820 }, { "epoch": 59.843883054215155, "grad_norm": 0.5641124248504639, "learning_rate": 4.018024411013341e-05, "loss": 0.0002775399014353752, "step": 210830 }, { "epoch": 59.84672154413852, "grad_norm": 0.1299961656332016, "learning_rate": 4.017740562021005e-05, "loss": 0.0003475060686469078, "step": 210840 }, { "epoch": 59.84956003406188, "grad_norm": 0.06207150220870972, "learning_rate": 4.017456713028669e-05, "loss": 0.00029970817267894746, "step": 210850 }, { "epoch": 59.85239852398524, "grad_norm": 0.021081140264868736, "learning_rate": 4.017172864036333e-05, "loss": 0.0005849167704582214, "step": 210860 }, { "epoch": 59.8552370139086, "grad_norm": 0.08063389360904694, "learning_rate": 4.016889015043997e-05, "loss": 0.0008012996986508369, "step": 210870 }, { "epoch": 59.85807550383196, "grad_norm": 0.03809959441423416, "learning_rate": 4.0166051660516605e-05, "loss": 0.0012021102011203766, "step": 210880 }, { "epoch": 59.860913993755325, "grad_norm": 0.002306652721017599, "learning_rate": 4.0163213170593246e-05, "loss": 0.0004895903170108795, "step": 210890 }, { "epoch": 59.86375248367868, "grad_norm": 0.0027965912595391273, "learning_rate": 4.016037468066989e-05, "loss": 0.012453050166368485, "step": 210900 }, { "epoch": 59.866590973602044, "grad_norm": 0.0605337917804718, "learning_rate": 4.015753619074652e-05, "loss": 0.00020292233675718309, "step": 210910 }, { "epoch": 59.86942946352541, "grad_norm": 0.05769046023488045, "learning_rate": 4.0154697700823164e-05, "loss": 0.0019155282527208329, "step": 210920 }, { "epoch": 59.87226795344876, "grad_norm": 0.008247571997344494, "learning_rate": 4.0151859210899805e-05, "loss": 0.0007664173841476441, "step": 210930 }, { "epoch": 59.875106443372125, "grad_norm": 17.77686882019043, "learning_rate": 4.0149020720976446e-05, "loss": 0.005630841851234436, "step": 210940 }, { "epoch": 59.87794493329549, "grad_norm": 0.004648994654417038, "learning_rate": 4.014618223105308e-05, "loss": 0.0003875220194458961, "step": 210950 }, { "epoch": 59.880783423218844, "grad_norm": 0.0019209394231438637, "learning_rate": 4.0143343741129715e-05, "loss": 9.198654443025589e-05, "step": 210960 }, { "epoch": 59.88362191314221, "grad_norm": 0.0416763573884964, "learning_rate": 4.0140505251206364e-05, "loss": 0.00029029715806245805, "step": 210970 }, { "epoch": 59.88646040306557, "grad_norm": 0.03155788779258728, "learning_rate": 4.0137666761283e-05, "loss": 0.012865766882896423, "step": 210980 }, { "epoch": 59.88929889298893, "grad_norm": 0.0812373012304306, "learning_rate": 4.013482827135964e-05, "loss": 0.00020758211612701417, "step": 210990 }, { "epoch": 59.89213738291229, "grad_norm": 0.013183712027966976, "learning_rate": 4.013198978143628e-05, "loss": 0.00010536834597587585, "step": 211000 }, { "epoch": 59.89213738291229, "eval_accuracy": 0.9837858459973294, "eval_loss": 0.06623616069555283, "eval_runtime": 49.7998, "eval_samples_per_second": 315.804, "eval_steps_per_second": 4.94, "step": 211000 }, { "epoch": 59.89497587283565, "grad_norm": 0.28924357891082764, "learning_rate": 4.0129151291512916e-05, "loss": 0.0006390308961272239, "step": 211010 }, { "epoch": 59.897814362759014, "grad_norm": 0.2971698045730591, "learning_rate": 4.012631280158956e-05, "loss": 0.00018947385251522064, "step": 211020 }, { "epoch": 59.90065285268237, "grad_norm": 0.0074814301915466785, "learning_rate": 4.01234743116662e-05, "loss": 0.0008505212143063545, "step": 211030 }, { "epoch": 59.90349134260573, "grad_norm": 0.5590770840644836, "learning_rate": 4.012063582174283e-05, "loss": 0.011995954811573029, "step": 211040 }, { "epoch": 59.906329832529096, "grad_norm": 0.042872242629528046, "learning_rate": 4.0117797331819474e-05, "loss": 0.0003357740119099617, "step": 211050 }, { "epoch": 59.90916832245246, "grad_norm": 0.007372106425464153, "learning_rate": 4.011495884189611e-05, "loss": 0.0001923808827996254, "step": 211060 }, { "epoch": 59.912006812375814, "grad_norm": 19.612497329711914, "learning_rate": 4.011212035197276e-05, "loss": 0.008224330097436904, "step": 211070 }, { "epoch": 59.91484530229918, "grad_norm": 0.03895479440689087, "learning_rate": 4.010928186204939e-05, "loss": 0.00016968157142400743, "step": 211080 }, { "epoch": 59.91768379222254, "grad_norm": 0.17549452185630798, "learning_rate": 4.0106443372126026e-05, "loss": 0.0009268904104828835, "step": 211090 }, { "epoch": 59.920522282145896, "grad_norm": 0.5387036800384521, "learning_rate": 4.0103604882202674e-05, "loss": 0.00012839734554290772, "step": 211100 }, { "epoch": 59.92336077206926, "grad_norm": 0.01617310754954815, "learning_rate": 4.010076639227931e-05, "loss": 0.0039011090993881226, "step": 211110 }, { "epoch": 59.92619926199262, "grad_norm": 0.02556696906685829, "learning_rate": 4.009792790235595e-05, "loss": 0.0006923109292984008, "step": 211120 }, { "epoch": 59.92903775191598, "grad_norm": 0.10463695973157883, "learning_rate": 4.009508941243259e-05, "loss": 0.00046621710062026976, "step": 211130 }, { "epoch": 59.93187624183934, "grad_norm": 0.09827682375907898, "learning_rate": 4.0092250922509226e-05, "loss": 0.0006188038736581802, "step": 211140 }, { "epoch": 59.9347147317627, "grad_norm": 0.3243017792701721, "learning_rate": 4.008941243258587e-05, "loss": 0.0059475615620613095, "step": 211150 }, { "epoch": 59.937553221686066, "grad_norm": 0.245350182056427, "learning_rate": 4.00865739426625e-05, "loss": 0.0018785659223794938, "step": 211160 }, { "epoch": 59.94039171160942, "grad_norm": 6.560412883758545, "learning_rate": 4.0083735452739144e-05, "loss": 0.0009349018335342407, "step": 211170 }, { "epoch": 59.943230201532785, "grad_norm": 0.008826768025755882, "learning_rate": 4.0080896962815785e-05, "loss": 0.00014126691967248918, "step": 211180 }, { "epoch": 59.94606869145615, "grad_norm": 0.01769791543483734, "learning_rate": 4.007805847289242e-05, "loss": 0.00014474354684352876, "step": 211190 }, { "epoch": 59.948907181379504, "grad_norm": 2.01171875, "learning_rate": 4.007521998296906e-05, "loss": 0.0003966676071286201, "step": 211200 }, { "epoch": 59.95174567130287, "grad_norm": 0.043366171419620514, "learning_rate": 4.00723814930457e-05, "loss": 0.0001292165368795395, "step": 211210 }, { "epoch": 59.95458416122623, "grad_norm": 0.05338599532842636, "learning_rate": 4.006954300312234e-05, "loss": 0.004366294294595718, "step": 211220 }, { "epoch": 59.957422651149585, "grad_norm": 0.0406644381582737, "learning_rate": 4.0066704513198985e-05, "loss": 0.00024694427847862246, "step": 211230 }, { "epoch": 59.96026114107295, "grad_norm": 0.2649052143096924, "learning_rate": 4.006386602327562e-05, "loss": 0.006828048080205917, "step": 211240 }, { "epoch": 59.96309963099631, "grad_norm": 0.003307137405499816, "learning_rate": 4.006102753335226e-05, "loss": 0.00023749321699142456, "step": 211250 }, { "epoch": 59.965938120919674, "grad_norm": 0.013107714243233204, "learning_rate": 4.0058189043428896e-05, "loss": 0.00024860557168722154, "step": 211260 }, { "epoch": 59.96877661084303, "grad_norm": 0.0023221303708851337, "learning_rate": 4.005535055350554e-05, "loss": 0.0006845733150839806, "step": 211270 }, { "epoch": 59.97161510076639, "grad_norm": 5.905044078826904, "learning_rate": 4.005251206358218e-05, "loss": 0.002938707172870636, "step": 211280 }, { "epoch": 59.974453590689755, "grad_norm": 0.007921175099909306, "learning_rate": 4.004967357365881e-05, "loss": 0.00025388021022081374, "step": 211290 }, { "epoch": 59.97729208061311, "grad_norm": 0.023220928385853767, "learning_rate": 4.0046835083735454e-05, "loss": 0.00010630488395690918, "step": 211300 }, { "epoch": 59.980130570536474, "grad_norm": 0.6433682441711426, "learning_rate": 4.0043996593812096e-05, "loss": 0.0059087049216032025, "step": 211310 }, { "epoch": 59.98296906045984, "grad_norm": 0.011476395651698112, "learning_rate": 4.004115810388873e-05, "loss": 0.0001728970557451248, "step": 211320 }, { "epoch": 59.98580755038319, "grad_norm": 5.1097493171691895, "learning_rate": 4.003831961396537e-05, "loss": 0.0014769097790122033, "step": 211330 }, { "epoch": 59.988646040306556, "grad_norm": 0.043365076184272766, "learning_rate": 4.003548112404201e-05, "loss": 0.001399499736726284, "step": 211340 }, { "epoch": 59.99148453022992, "grad_norm": 0.03213369846343994, "learning_rate": 4.003264263411865e-05, "loss": 0.0003226403146982193, "step": 211350 }, { "epoch": 59.99432302015328, "grad_norm": 0.0014793562004342675, "learning_rate": 4.002980414419529e-05, "loss": 0.002831130102276802, "step": 211360 }, { "epoch": 59.99716151007664, "grad_norm": 0.0007799636223353446, "learning_rate": 4.002696565427193e-05, "loss": 0.002318821661174297, "step": 211370 }, { "epoch": 60.0, "grad_norm": 0.010991208255290985, "learning_rate": 4.0024127164348565e-05, "loss": 3.370088816154748e-05, "step": 211380 }, { "epoch": 60.00283848992336, "grad_norm": 0.026767943054437637, "learning_rate": 4.0021288674425207e-05, "loss": 0.001680317148566246, "step": 211390 }, { "epoch": 60.00567697984672, "grad_norm": 0.13352073729038239, "learning_rate": 4.001845018450185e-05, "loss": 0.00018309373408555984, "step": 211400 }, { "epoch": 60.00851546977008, "grad_norm": 0.009162954986095428, "learning_rate": 4.001561169457849e-05, "loss": 0.0007371518760919571, "step": 211410 }, { "epoch": 60.011353959693444, "grad_norm": 0.12033513188362122, "learning_rate": 4.0012773204655124e-05, "loss": 7.536336779594422e-05, "step": 211420 }, { "epoch": 60.01419244961681, "grad_norm": 0.003172724274918437, "learning_rate": 4.0009934714731765e-05, "loss": 6.837155669927597e-05, "step": 211430 }, { "epoch": 60.01703093954016, "grad_norm": 0.16391775012016296, "learning_rate": 4.0007096224808407e-05, "loss": 0.002206452749669552, "step": 211440 }, { "epoch": 60.019869429463526, "grad_norm": 0.27258020639419556, "learning_rate": 4.000425773488504e-05, "loss": 0.00010881554335355759, "step": 211450 }, { "epoch": 60.02270791938689, "grad_norm": 0.00591244176030159, "learning_rate": 4.000141924496168e-05, "loss": 6.704181432724e-05, "step": 211460 }, { "epoch": 60.025546409310245, "grad_norm": 0.0021940649021416903, "learning_rate": 3.9998580755038324e-05, "loss": 0.00011674538254737853, "step": 211470 }, { "epoch": 60.02838489923361, "grad_norm": 0.016538552939891815, "learning_rate": 3.999574226511496e-05, "loss": 0.00010307040065526962, "step": 211480 }, { "epoch": 60.03122338915697, "grad_norm": 0.039475709199905396, "learning_rate": 3.99929037751916e-05, "loss": 6.884671747684479e-05, "step": 211490 }, { "epoch": 60.034061879080326, "grad_norm": 0.0032733792904764414, "learning_rate": 3.999006528526824e-05, "loss": 3.2367929816246035e-05, "step": 211500 }, { "epoch": 60.034061879080326, "eval_accuracy": 0.9848667895975075, "eval_loss": 0.060948558151721954, "eval_runtime": 53.9296, "eval_samples_per_second": 291.621, "eval_steps_per_second": 4.562, "step": 211500 }, { "epoch": 60.03690036900369, "grad_norm": 0.08333180099725723, "learning_rate": 3.9987226795344876e-05, "loss": 7.370878010988235e-05, "step": 211510 }, { "epoch": 60.03973885892705, "grad_norm": 0.003095032647252083, "learning_rate": 3.998438830542152e-05, "loss": 6.972029805183411e-05, "step": 211520 }, { "epoch": 60.042577348850415, "grad_norm": 0.005878860130906105, "learning_rate": 3.998154981549816e-05, "loss": 0.00010675713419914246, "step": 211530 }, { "epoch": 60.04541583877377, "grad_norm": 0.005596591159701347, "learning_rate": 3.99787113255748e-05, "loss": 4.643909633159637e-05, "step": 211540 }, { "epoch": 60.04825432869713, "grad_norm": 0.017724383622407913, "learning_rate": 3.9975872835651435e-05, "loss": 0.005855198204517365, "step": 211550 }, { "epoch": 60.051092818620496, "grad_norm": 0.07526236772537231, "learning_rate": 3.997303434572807e-05, "loss": 8.684564381837845e-05, "step": 211560 }, { "epoch": 60.05393130854385, "grad_norm": 0.008497546426951885, "learning_rate": 3.997019585580472e-05, "loss": 0.0038025252521038055, "step": 211570 }, { "epoch": 60.056769798467215, "grad_norm": 0.005576486699283123, "learning_rate": 3.996735736588135e-05, "loss": 4.453882575035095e-05, "step": 211580 }, { "epoch": 60.05960828839058, "grad_norm": 0.1235622763633728, "learning_rate": 3.996451887595799e-05, "loss": 0.0002044636756181717, "step": 211590 }, { "epoch": 60.062446778313934, "grad_norm": 0.005634631495922804, "learning_rate": 3.9961680386034635e-05, "loss": 0.00011747777462005616, "step": 211600 }, { "epoch": 60.0652852682373, "grad_norm": 0.006744015030562878, "learning_rate": 3.995884189611127e-05, "loss": 0.00013620518147945405, "step": 211610 }, { "epoch": 60.06812375816066, "grad_norm": 0.15454912185668945, "learning_rate": 3.995600340618791e-05, "loss": 0.00029403213411569593, "step": 211620 }, { "epoch": 60.07096224808402, "grad_norm": 0.5112202167510986, "learning_rate": 3.995316491626455e-05, "loss": 0.009015548229217529, "step": 211630 }, { "epoch": 60.07380073800738, "grad_norm": 0.07801752537488937, "learning_rate": 3.995032642634119e-05, "loss": 0.002673209831118584, "step": 211640 }, { "epoch": 60.07663922793074, "grad_norm": 0.393461674451828, "learning_rate": 3.994748793641783e-05, "loss": 0.0008581424131989479, "step": 211650 }, { "epoch": 60.079477717854104, "grad_norm": 0.027619944885373116, "learning_rate": 3.994464944649446e-05, "loss": 5.6906230747699736e-05, "step": 211660 }, { "epoch": 60.08231620777746, "grad_norm": 0.05443212389945984, "learning_rate": 3.9941810956571104e-05, "loss": 0.0027154933661222456, "step": 211670 }, { "epoch": 60.08515469770082, "grad_norm": 0.012250023894011974, "learning_rate": 3.9938972466647745e-05, "loss": 0.00238351933658123, "step": 211680 }, { "epoch": 60.087993187624186, "grad_norm": 0.04887106269598007, "learning_rate": 3.993613397672438e-05, "loss": 0.0020824361592531202, "step": 211690 }, { "epoch": 60.09083167754754, "grad_norm": 0.029552146792411804, "learning_rate": 3.993329548680103e-05, "loss": 8.28620046377182e-05, "step": 211700 }, { "epoch": 60.093670167470904, "grad_norm": 0.3733278512954712, "learning_rate": 3.993045699687766e-05, "loss": 0.0002599136903882027, "step": 211710 }, { "epoch": 60.09650865739427, "grad_norm": 0.04744645580649376, "learning_rate": 3.99276185069543e-05, "loss": 0.00015619974583387376, "step": 211720 }, { "epoch": 60.09934714731763, "grad_norm": 0.0030681106727570295, "learning_rate": 3.9924780017030945e-05, "loss": 0.006231621652841568, "step": 211730 }, { "epoch": 60.102185637240986, "grad_norm": 8.875021934509277, "learning_rate": 3.992194152710758e-05, "loss": 0.0024746071547269823, "step": 211740 }, { "epoch": 60.10502412716435, "grad_norm": 1.6084117889404297, "learning_rate": 3.991910303718422e-05, "loss": 0.007599867880344391, "step": 211750 }, { "epoch": 60.10786261708771, "grad_norm": 0.8438500165939331, "learning_rate": 3.9916264547260856e-05, "loss": 0.004552953690290451, "step": 211760 }, { "epoch": 60.11070110701107, "grad_norm": 0.09593651443719864, "learning_rate": 3.99134260573375e-05, "loss": 0.0006748326122760773, "step": 211770 }, { "epoch": 60.11353959693443, "grad_norm": 0.1369381994009018, "learning_rate": 3.991058756741414e-05, "loss": 0.0018650606274604798, "step": 211780 }, { "epoch": 60.11637808685779, "grad_norm": 0.20985642075538635, "learning_rate": 3.9907749077490773e-05, "loss": 0.006280989199876785, "step": 211790 }, { "epoch": 60.119216576781156, "grad_norm": 0.0010590087622404099, "learning_rate": 3.9904910587567415e-05, "loss": 0.004740633442997933, "step": 211800 }, { "epoch": 60.12205506670451, "grad_norm": 0.2266254872083664, "learning_rate": 3.9902072097644056e-05, "loss": 0.0075053729116916655, "step": 211810 }, { "epoch": 60.124893556627875, "grad_norm": 0.005999004002660513, "learning_rate": 3.989923360772069e-05, "loss": 0.00711791068315506, "step": 211820 }, { "epoch": 60.12773204655124, "grad_norm": 0.006154725793749094, "learning_rate": 3.989639511779734e-05, "loss": 0.0006235135719180107, "step": 211830 }, { "epoch": 60.13057053647459, "grad_norm": 7.755245208740234, "learning_rate": 3.9893556627873974e-05, "loss": 0.003603578358888626, "step": 211840 }, { "epoch": 60.133409026397956, "grad_norm": 0.10767850279808044, "learning_rate": 3.989071813795061e-05, "loss": 0.00024323891848325728, "step": 211850 }, { "epoch": 60.13624751632132, "grad_norm": 0.038033220916986465, "learning_rate": 3.988787964802725e-05, "loss": 0.00041391607373952866, "step": 211860 }, { "epoch": 60.139086006244675, "grad_norm": 0.01381654292345047, "learning_rate": 3.988504115810389e-05, "loss": 0.0004308277741074562, "step": 211870 }, { "epoch": 60.14192449616804, "grad_norm": 0.020964283496141434, "learning_rate": 3.988220266818053e-05, "loss": 0.00031476058065891264, "step": 211880 }, { "epoch": 60.1447629860914, "grad_norm": 0.17476144433021545, "learning_rate": 3.987936417825717e-05, "loss": 0.000702284649014473, "step": 211890 }, { "epoch": 60.14760147601476, "grad_norm": 0.018786799162626266, "learning_rate": 3.987652568833381e-05, "loss": 0.00040723979473114016, "step": 211900 }, { "epoch": 60.15043996593812, "grad_norm": 0.7711598873138428, "learning_rate": 3.987368719841045e-05, "loss": 0.002673698402941227, "step": 211910 }, { "epoch": 60.15327845586148, "grad_norm": 0.5119607448577881, "learning_rate": 3.9870848708487084e-05, "loss": 0.0004138389602303505, "step": 211920 }, { "epoch": 60.156116945784845, "grad_norm": 0.2739642858505249, "learning_rate": 3.9868010218563726e-05, "loss": 0.00020728427916765212, "step": 211930 }, { "epoch": 60.1589554357082, "grad_norm": 0.005416936241090298, "learning_rate": 3.986517172864037e-05, "loss": 0.0023627204820513724, "step": 211940 }, { "epoch": 60.161793925631564, "grad_norm": 0.05311058089137077, "learning_rate": 3.9862333238717e-05, "loss": 0.0006171122193336486, "step": 211950 }, { "epoch": 60.16463241555493, "grad_norm": 0.057205162942409515, "learning_rate": 3.985949474879364e-05, "loss": 0.0025084838271141052, "step": 211960 }, { "epoch": 60.16747090547828, "grad_norm": 0.10606003552675247, "learning_rate": 3.9856656258870284e-05, "loss": 0.0007351776584982872, "step": 211970 }, { "epoch": 60.170309395401645, "grad_norm": 1.2796502113342285, "learning_rate": 3.985381776894692e-05, "loss": 0.0013513611629605293, "step": 211980 }, { "epoch": 60.17314788532501, "grad_norm": 0.06100192666053772, "learning_rate": 3.985097927902356e-05, "loss": 0.0009834714233875274, "step": 211990 }, { "epoch": 60.17598637524837, "grad_norm": 0.04597804695367813, "learning_rate": 3.98481407891002e-05, "loss": 0.0030307872220873833, "step": 212000 }, { "epoch": 60.17598637524837, "eval_accuracy": 0.9820054683029186, "eval_loss": 0.07369047403335571, "eval_runtime": 41.3849, "eval_samples_per_second": 380.018, "eval_steps_per_second": 5.944, "step": 212000 }, { "epoch": 60.17882486517173, "grad_norm": 0.03173098713159561, "learning_rate": 3.984530229917684e-05, "loss": 0.0006128091365098954, "step": 212010 }, { "epoch": 60.18166335509509, "grad_norm": 3.2473950386047363, "learning_rate": 3.984246380925348e-05, "loss": 0.0013378188014030457, "step": 212020 }, { "epoch": 60.18450184501845, "grad_norm": 0.12000750005245209, "learning_rate": 3.983962531933012e-05, "loss": 0.00022695884108543397, "step": 212030 }, { "epoch": 60.18734033494181, "grad_norm": 0.04735637456178665, "learning_rate": 3.983678682940676e-05, "loss": 0.0024904211983084678, "step": 212040 }, { "epoch": 60.19017882486517, "grad_norm": 0.03069641813635826, "learning_rate": 3.9833948339483395e-05, "loss": 0.0009209020063281059, "step": 212050 }, { "epoch": 60.193017314788534, "grad_norm": 0.01586238294839859, "learning_rate": 3.9831109849560036e-05, "loss": 0.005956894159317017, "step": 212060 }, { "epoch": 60.19585580471189, "grad_norm": 0.21611036360263824, "learning_rate": 3.982827135963668e-05, "loss": 0.00029775314033031464, "step": 212070 }, { "epoch": 60.19869429463525, "grad_norm": 0.10711481422185898, "learning_rate": 3.982543286971331e-05, "loss": 0.00025357510894536973, "step": 212080 }, { "epoch": 60.201532784558616, "grad_norm": 0.009639152325689793, "learning_rate": 3.9822594379789954e-05, "loss": 0.0010482540354132652, "step": 212090 }, { "epoch": 60.20437127448198, "grad_norm": 0.24615854024887085, "learning_rate": 3.9819755889866595e-05, "loss": 0.0002727033570408821, "step": 212100 }, { "epoch": 60.207209764405334, "grad_norm": 0.002861861838027835, "learning_rate": 3.981691739994323e-05, "loss": 0.010339881479740142, "step": 212110 }, { "epoch": 60.2100482543287, "grad_norm": 0.007371113635599613, "learning_rate": 3.981407891001987e-05, "loss": 0.00015680454671382905, "step": 212120 }, { "epoch": 60.21288674425206, "grad_norm": 0.044957563281059265, "learning_rate": 3.981124042009651e-05, "loss": 0.0003361782059073448, "step": 212130 }, { "epoch": 60.215725234175416, "grad_norm": 0.021893000230193138, "learning_rate": 3.980840193017315e-05, "loss": 0.007991394400596619, "step": 212140 }, { "epoch": 60.21856372409878, "grad_norm": 0.12670545279979706, "learning_rate": 3.980556344024979e-05, "loss": 0.0049216743558645245, "step": 212150 }, { "epoch": 60.22140221402214, "grad_norm": 0.002751643769443035, "learning_rate": 3.980272495032643e-05, "loss": 0.00012104548513889313, "step": 212160 }, { "epoch": 60.2242407039455, "grad_norm": 0.03987631946802139, "learning_rate": 3.979988646040307e-05, "loss": 0.006974906474351883, "step": 212170 }, { "epoch": 60.22707919386886, "grad_norm": 0.18805108964443207, "learning_rate": 3.9797047970479706e-05, "loss": 0.0002377023920416832, "step": 212180 }, { "epoch": 60.22991768379222, "grad_norm": 0.029305772855877876, "learning_rate": 3.979420948055634e-05, "loss": 0.0003559805452823639, "step": 212190 }, { "epoch": 60.232756173715586, "grad_norm": 0.021374380216002464, "learning_rate": 3.979137099063299e-05, "loss": 0.00021773930639028548, "step": 212200 }, { "epoch": 60.23559466363894, "grad_norm": 4.740433216094971, "learning_rate": 3.978853250070962e-05, "loss": 0.004634656384587288, "step": 212210 }, { "epoch": 60.238433153562305, "grad_norm": 0.008489562198519707, "learning_rate": 3.9785694010786264e-05, "loss": 0.0002492103725671768, "step": 212220 }, { "epoch": 60.24127164348567, "grad_norm": 0.28650179505348206, "learning_rate": 3.9782855520862906e-05, "loss": 0.006513088196516037, "step": 212230 }, { "epoch": 60.24411013340902, "grad_norm": 8.41603946685791, "learning_rate": 3.978001703093954e-05, "loss": 0.003095368854701519, "step": 212240 }, { "epoch": 60.246948623332386, "grad_norm": 0.36297813057899475, "learning_rate": 3.977717854101618e-05, "loss": 0.0006263529881834984, "step": 212250 }, { "epoch": 60.24978711325575, "grad_norm": 0.011391627602279186, "learning_rate": 3.977434005109282e-05, "loss": 0.0004589211195707321, "step": 212260 }, { "epoch": 60.25262560317911, "grad_norm": 0.024777693673968315, "learning_rate": 3.977150156116946e-05, "loss": 0.008516910672187804, "step": 212270 }, { "epoch": 60.25546409310247, "grad_norm": 0.14799749851226807, "learning_rate": 3.97686630712461e-05, "loss": 0.008817258477210998, "step": 212280 }, { "epoch": 60.25830258302583, "grad_norm": 0.07922844588756561, "learning_rate": 3.9765824581322734e-05, "loss": 0.004270610213279724, "step": 212290 }, { "epoch": 60.261141072949194, "grad_norm": 0.002668931381776929, "learning_rate": 3.976298609139938e-05, "loss": 0.007077110558748245, "step": 212300 }, { "epoch": 60.26397956287255, "grad_norm": 0.25558459758758545, "learning_rate": 3.9760147601476016e-05, "loss": 0.0015852119773626327, "step": 212310 }, { "epoch": 60.26681805279591, "grad_norm": 0.1825808733701706, "learning_rate": 3.975730911155265e-05, "loss": 0.006506072729825974, "step": 212320 }, { "epoch": 60.269656542719275, "grad_norm": 3.6982338428497314, "learning_rate": 3.97544706216293e-05, "loss": 0.0020778125151991846, "step": 212330 }, { "epoch": 60.27249503264263, "grad_norm": 0.123346246778965, "learning_rate": 3.9751632131705934e-05, "loss": 0.00015386193990707397, "step": 212340 }, { "epoch": 60.275333522565994, "grad_norm": 0.024068539962172508, "learning_rate": 3.9748793641782575e-05, "loss": 0.004615358263254166, "step": 212350 }, { "epoch": 60.27817201248936, "grad_norm": 0.02783055044710636, "learning_rate": 3.9745955151859217e-05, "loss": 0.0002046896144747734, "step": 212360 }, { "epoch": 60.28101050241272, "grad_norm": 0.20249062776565552, "learning_rate": 3.974311666193585e-05, "loss": 0.0016035359352827073, "step": 212370 }, { "epoch": 60.283848992336075, "grad_norm": 0.0015400421107187867, "learning_rate": 3.974027817201249e-05, "loss": 0.00030311383306980133, "step": 212380 }, { "epoch": 60.28668748225944, "grad_norm": 0.007303687743842602, "learning_rate": 3.973743968208913e-05, "loss": 0.00023809317499399186, "step": 212390 }, { "epoch": 60.2895259721828, "grad_norm": 0.18284624814987183, "learning_rate": 3.973460119216577e-05, "loss": 0.002119842730462551, "step": 212400 }, { "epoch": 60.29236446210616, "grad_norm": 0.012710212729871273, "learning_rate": 3.973176270224241e-05, "loss": 0.0002568347379565239, "step": 212410 }, { "epoch": 60.29520295202952, "grad_norm": 0.0434952937066555, "learning_rate": 3.9728924212319044e-05, "loss": 0.0028581660240888595, "step": 212420 }, { "epoch": 60.29804144195288, "grad_norm": 0.27677181363105774, "learning_rate": 3.972608572239569e-05, "loss": 0.0003568863496184349, "step": 212430 }, { "epoch": 60.30087993187624, "grad_norm": 0.03731871023774147, "learning_rate": 3.972324723247233e-05, "loss": 0.0018178654834628105, "step": 212440 }, { "epoch": 60.3037184217996, "grad_norm": 0.020549559965729713, "learning_rate": 3.972040874254896e-05, "loss": 0.0002761168405413628, "step": 212450 }, { "epoch": 60.306556911722964, "grad_norm": 0.1276530921459198, "learning_rate": 3.971757025262561e-05, "loss": 0.0005835961550474167, "step": 212460 }, { "epoch": 60.30939540164633, "grad_norm": 0.0029632558580487967, "learning_rate": 3.9714731762702245e-05, "loss": 0.0011969443410634994, "step": 212470 }, { "epoch": 60.31223389156968, "grad_norm": 0.031773701310157776, "learning_rate": 3.9711893272778886e-05, "loss": 0.002903798408806324, "step": 212480 }, { "epoch": 60.315072381493046, "grad_norm": 0.049988530576229095, "learning_rate": 3.970905478285552e-05, "loss": 0.0003855705261230469, "step": 212490 }, { "epoch": 60.31791087141641, "grad_norm": 0.0515836663544178, "learning_rate": 3.970621629293216e-05, "loss": 0.00014488231390714646, "step": 212500 }, { "epoch": 60.31791087141641, "eval_accuracy": 0.9840401856679596, "eval_loss": 0.06775567680597305, "eval_runtime": 39.9802, "eval_samples_per_second": 393.37, "eval_steps_per_second": 6.153, "step": 212500 }, { "epoch": 60.320749361339765, "grad_norm": 0.009554225951433182, "learning_rate": 3.97033778030088e-05, "loss": 0.00010433457791805267, "step": 212510 }, { "epoch": 60.32358785126313, "grad_norm": 0.11526530236005783, "learning_rate": 3.970053931308544e-05, "loss": 9.222757071256638e-05, "step": 212520 }, { "epoch": 60.32642634118649, "grad_norm": 0.011600814759731293, "learning_rate": 3.969770082316208e-05, "loss": 0.00026355013251304625, "step": 212530 }, { "epoch": 60.329264831109846, "grad_norm": 0.037375982850790024, "learning_rate": 3.969486233323872e-05, "loss": 0.00010185372084379197, "step": 212540 }, { "epoch": 60.33210332103321, "grad_norm": 0.00511297257617116, "learning_rate": 3.9692023843315355e-05, "loss": 0.0005006628111004829, "step": 212550 }, { "epoch": 60.33494181095657, "grad_norm": 0.012043802067637444, "learning_rate": 3.9689185353392e-05, "loss": 0.00029445458203554155, "step": 212560 }, { "epoch": 60.337780300879935, "grad_norm": 0.011465472169220448, "learning_rate": 3.968634686346864e-05, "loss": 0.0001866759732365608, "step": 212570 }, { "epoch": 60.34061879080329, "grad_norm": 0.300385981798172, "learning_rate": 3.968350837354527e-05, "loss": 0.00010404791682958603, "step": 212580 }, { "epoch": 60.34345728072665, "grad_norm": 0.008204919286072254, "learning_rate": 3.9680669883621914e-05, "loss": 0.0001692861318588257, "step": 212590 }, { "epoch": 60.346295770650016, "grad_norm": 0.0020606147591024637, "learning_rate": 3.9677831393698555e-05, "loss": 0.00022705979645252228, "step": 212600 }, { "epoch": 60.34913426057337, "grad_norm": 0.002358651952818036, "learning_rate": 3.967499290377519e-05, "loss": 0.0001716088503599167, "step": 212610 }, { "epoch": 60.351972750496735, "grad_norm": 0.009669364430010319, "learning_rate": 3.967215441385183e-05, "loss": 0.00015949569642543794, "step": 212620 }, { "epoch": 60.3548112404201, "grad_norm": 0.10240329056978226, "learning_rate": 3.966931592392847e-05, "loss": 0.000335659459233284, "step": 212630 }, { "epoch": 60.35764973034346, "grad_norm": 0.06683558970689774, "learning_rate": 3.9666477434005114e-05, "loss": 0.00019307881593704223, "step": 212640 }, { "epoch": 60.36048822026682, "grad_norm": 0.11259228736162186, "learning_rate": 3.966363894408175e-05, "loss": 0.0003532428294420242, "step": 212650 }, { "epoch": 60.36332671019018, "grad_norm": 0.01091963890939951, "learning_rate": 3.966080045415839e-05, "loss": 7.384754717350006e-05, "step": 212660 }, { "epoch": 60.36616520011354, "grad_norm": 0.06079873815178871, "learning_rate": 3.965796196423503e-05, "loss": 0.0019289782270789147, "step": 212670 }, { "epoch": 60.3690036900369, "grad_norm": 0.04085516929626465, "learning_rate": 3.9655123474311666e-05, "loss": 0.00012993812561035156, "step": 212680 }, { "epoch": 60.37184217996026, "grad_norm": 0.07402053475379944, "learning_rate": 3.965228498438831e-05, "loss": 9.128283709287643e-05, "step": 212690 }, { "epoch": 60.374680669883624, "grad_norm": 0.024295996874570847, "learning_rate": 3.964944649446495e-05, "loss": 5.838684737682343e-05, "step": 212700 }, { "epoch": 60.37751915980698, "grad_norm": 0.02222076989710331, "learning_rate": 3.964660800454158e-05, "loss": 0.00016742870211601258, "step": 212710 }, { "epoch": 60.38035764973034, "grad_norm": 0.012124808505177498, "learning_rate": 3.9643769514618225e-05, "loss": 0.0027657948434352876, "step": 212720 }, { "epoch": 60.383196139653705, "grad_norm": 0.1403605043888092, "learning_rate": 3.9640931024694866e-05, "loss": 0.0013705506920814515, "step": 212730 }, { "epoch": 60.38603462957707, "grad_norm": 1.0123380422592163, "learning_rate": 3.96380925347715e-05, "loss": 0.0014742378145456315, "step": 212740 }, { "epoch": 60.388873119500424, "grad_norm": 1.6932437419891357, "learning_rate": 3.963525404484814e-05, "loss": 0.0007838778197765351, "step": 212750 }, { "epoch": 60.39171160942379, "grad_norm": 0.08223035931587219, "learning_rate": 3.9632415554924783e-05, "loss": 0.0001398816704750061, "step": 212760 }, { "epoch": 60.39455009934715, "grad_norm": 0.07912920415401459, "learning_rate": 3.9629577065001425e-05, "loss": 6.663594394922257e-05, "step": 212770 }, { "epoch": 60.397388589270506, "grad_norm": 0.01673322357237339, "learning_rate": 3.962673857507806e-05, "loss": 0.0009954029694199563, "step": 212780 }, { "epoch": 60.40022707919387, "grad_norm": 0.02815474569797516, "learning_rate": 3.9623900085154694e-05, "loss": 6.127860397100448e-05, "step": 212790 }, { "epoch": 60.40306556911723, "grad_norm": 0.00808800756931305, "learning_rate": 3.962106159523134e-05, "loss": 0.0002549702301621437, "step": 212800 }, { "epoch": 60.40590405904059, "grad_norm": 0.04017455503344536, "learning_rate": 3.961822310530798e-05, "loss": 0.00024487953633069994, "step": 212810 }, { "epoch": 60.40874254896395, "grad_norm": 0.010161147452890873, "learning_rate": 3.961538461538462e-05, "loss": 0.00012553911656141282, "step": 212820 }, { "epoch": 60.41158103888731, "grad_norm": 0.005441841669380665, "learning_rate": 3.961254612546126e-05, "loss": 0.00021953284740447999, "step": 212830 }, { "epoch": 60.414419528810676, "grad_norm": 0.02748204953968525, "learning_rate": 3.9609707635537894e-05, "loss": 7.799919694662094e-05, "step": 212840 }, { "epoch": 60.41725801873403, "grad_norm": 0.005837460979819298, "learning_rate": 3.9606869145614536e-05, "loss": 3.552455455064773e-05, "step": 212850 }, { "epoch": 60.420096508657394, "grad_norm": 0.005937260575592518, "learning_rate": 3.960403065569118e-05, "loss": 0.0005227418616414071, "step": 212860 }, { "epoch": 60.42293499858076, "grad_norm": 0.1617201864719391, "learning_rate": 3.960119216576781e-05, "loss": 0.000337611511349678, "step": 212870 }, { "epoch": 60.42577348850411, "grad_norm": 0.05012418329715729, "learning_rate": 3.959835367584445e-05, "loss": 0.014780962467193603, "step": 212880 }, { "epoch": 60.428611978427476, "grad_norm": 0.006646816153079271, "learning_rate": 3.959551518592109e-05, "loss": 0.005425352603197098, "step": 212890 }, { "epoch": 60.43145046835084, "grad_norm": 17.30858612060547, "learning_rate": 3.9592676695997736e-05, "loss": 0.00393594540655613, "step": 212900 }, { "epoch": 60.434288958274195, "grad_norm": 0.06989622116088867, "learning_rate": 3.958983820607437e-05, "loss": 0.00021839775145053862, "step": 212910 }, { "epoch": 60.43712744819756, "grad_norm": 0.03129958733916283, "learning_rate": 3.9586999716151005e-05, "loss": 0.00020412523299455642, "step": 212920 }, { "epoch": 60.43996593812092, "grad_norm": 0.011874311603605747, "learning_rate": 3.958416122622765e-05, "loss": 8.548032492399216e-05, "step": 212930 }, { "epoch": 60.44280442804428, "grad_norm": 0.06468351185321808, "learning_rate": 3.958132273630429e-05, "loss": 4.236679524183273e-05, "step": 212940 }, { "epoch": 60.44564291796764, "grad_norm": 0.17168624699115753, "learning_rate": 3.957848424638093e-05, "loss": 0.00018309224396944045, "step": 212950 }, { "epoch": 60.448481407891, "grad_norm": 0.0013270537601783872, "learning_rate": 3.957564575645757e-05, "loss": 0.00013394262641668319, "step": 212960 }, { "epoch": 60.451319897814365, "grad_norm": 0.021746791899204254, "learning_rate": 3.9572807266534205e-05, "loss": 0.0005394915118813514, "step": 212970 }, { "epoch": 60.45415838773772, "grad_norm": 0.005612494423985481, "learning_rate": 3.9569968776610846e-05, "loss": 0.00035475660115480423, "step": 212980 }, { "epoch": 60.45699687766108, "grad_norm": 0.3112824559211731, "learning_rate": 3.956713028668748e-05, "loss": 8.608829230070115e-05, "step": 212990 }, { "epoch": 60.459835367584446, "grad_norm": 0.0039004371501505375, "learning_rate": 3.956429179676412e-05, "loss": 3.140978515148163e-05, "step": 213000 }, { "epoch": 60.459835367584446, "eval_accuracy": 0.9831499968207541, "eval_loss": 0.06976000964641571, "eval_runtime": 43.913, "eval_samples_per_second": 358.14, "eval_steps_per_second": 5.602, "step": 213000 }, { "epoch": 60.46267385750781, "grad_norm": 0.027713239192962646, "learning_rate": 3.9561453306840764e-05, "loss": 0.00013099517673254013, "step": 213010 }, { "epoch": 60.465512347431165, "grad_norm": 0.36507532000541687, "learning_rate": 3.95586148169174e-05, "loss": 0.0002602305263280869, "step": 213020 }, { "epoch": 60.46835083735453, "grad_norm": 0.12986885011196136, "learning_rate": 3.955577632699404e-05, "loss": 0.00036572236567735674, "step": 213030 }, { "epoch": 60.47118932727789, "grad_norm": 0.036873675882816315, "learning_rate": 3.955293783707068e-05, "loss": 0.00010195504873991012, "step": 213040 }, { "epoch": 60.47402781720125, "grad_norm": 0.010244925506412983, "learning_rate": 3.9550099347147316e-05, "loss": 0.00015963073819875718, "step": 213050 }, { "epoch": 60.47686630712461, "grad_norm": 0.1484581083059311, "learning_rate": 3.9547260857223964e-05, "loss": 0.00024148300290107727, "step": 213060 }, { "epoch": 60.47970479704797, "grad_norm": 0.01764155551791191, "learning_rate": 3.95444223673006e-05, "loss": 0.0021985484287142753, "step": 213070 }, { "epoch": 60.48254328697133, "grad_norm": 0.032153449952602386, "learning_rate": 3.954158387737723e-05, "loss": 8.634906262159348e-05, "step": 213080 }, { "epoch": 60.48538177689469, "grad_norm": 0.6058706641197205, "learning_rate": 3.9538745387453874e-05, "loss": 0.0002772077918052673, "step": 213090 }, { "epoch": 60.488220266818054, "grad_norm": 8.43421745300293, "learning_rate": 3.9535906897530516e-05, "loss": 0.0033555418252944947, "step": 213100 }, { "epoch": 60.49105875674142, "grad_norm": 0.07822538167238235, "learning_rate": 3.953306840760716e-05, "loss": 0.00014041606336832047, "step": 213110 }, { "epoch": 60.49389724666477, "grad_norm": 0.07423517107963562, "learning_rate": 3.953022991768379e-05, "loss": 0.00011880286037921906, "step": 213120 }, { "epoch": 60.496735736588136, "grad_norm": 0.029012804850935936, "learning_rate": 3.952739142776043e-05, "loss": 0.00017169862985610962, "step": 213130 }, { "epoch": 60.4995742265115, "grad_norm": 0.006618664599955082, "learning_rate": 3.9524552937837074e-05, "loss": 8.489079773426056e-05, "step": 213140 }, { "epoch": 60.502412716434854, "grad_norm": 0.008407771587371826, "learning_rate": 3.952171444791371e-05, "loss": 0.00031046029180288316, "step": 213150 }, { "epoch": 60.50525120635822, "grad_norm": 0.5362502336502075, "learning_rate": 3.951887595799035e-05, "loss": 0.00018949024379253388, "step": 213160 }, { "epoch": 60.50808969628158, "grad_norm": 0.13874736428260803, "learning_rate": 3.951603746806699e-05, "loss": 0.00043400786817073824, "step": 213170 }, { "epoch": 60.510928186204936, "grad_norm": 0.010938040912151337, "learning_rate": 3.9513198978143626e-05, "loss": 0.0011048583313822747, "step": 213180 }, { "epoch": 60.5137666761283, "grad_norm": 0.0024654387962073088, "learning_rate": 3.951036048822027e-05, "loss": 0.00025408826768398286, "step": 213190 }, { "epoch": 60.51660516605166, "grad_norm": 0.013264494948089123, "learning_rate": 3.950752199829691e-05, "loss": 0.0009010830894112587, "step": 213200 }, { "epoch": 60.519443655975024, "grad_norm": 0.08584606647491455, "learning_rate": 3.9504683508373544e-05, "loss": 0.0008974960073828697, "step": 213210 }, { "epoch": 60.52228214589838, "grad_norm": 0.006678230129182339, "learning_rate": 3.9501845018450185e-05, "loss": 0.007195340842008591, "step": 213220 }, { "epoch": 60.52512063582174, "grad_norm": 0.05627494677901268, "learning_rate": 3.9499006528526826e-05, "loss": 0.0008947083726525307, "step": 213230 }, { "epoch": 60.527959125745106, "grad_norm": 4.47646951675415, "learning_rate": 3.949616803860347e-05, "loss": 0.004413256421685219, "step": 213240 }, { "epoch": 60.53079761566846, "grad_norm": 0.006455734837800264, "learning_rate": 3.94933295486801e-05, "loss": 0.0021255116909742354, "step": 213250 }, { "epoch": 60.533636105591825, "grad_norm": 1.8666061162948608, "learning_rate": 3.9490491058756744e-05, "loss": 0.0005376311019062996, "step": 213260 }, { "epoch": 60.53647459551519, "grad_norm": 0.04592534154653549, "learning_rate": 3.9487652568833385e-05, "loss": 0.00027424246072769163, "step": 213270 }, { "epoch": 60.53931308543854, "grad_norm": 0.08362006396055222, "learning_rate": 3.948481407891002e-05, "loss": 0.0018646182492375375, "step": 213280 }, { "epoch": 60.542151575361906, "grad_norm": 0.007867248728871346, "learning_rate": 3.948197558898666e-05, "loss": 0.00011161286383867263, "step": 213290 }, { "epoch": 60.54499006528527, "grad_norm": 0.013029779307544231, "learning_rate": 3.94791370990633e-05, "loss": 0.0003301061689853668, "step": 213300 }, { "epoch": 60.54782855520863, "grad_norm": 0.00488699646666646, "learning_rate": 3.947629860913994e-05, "loss": 0.0003067810088396072, "step": 213310 }, { "epoch": 60.55066704513199, "grad_norm": 0.20604939758777618, "learning_rate": 3.947346011921658e-05, "loss": 0.0038337018340826035, "step": 213320 }, { "epoch": 60.55350553505535, "grad_norm": 0.06271491199731827, "learning_rate": 3.947062162929322e-05, "loss": 0.00670449361205101, "step": 213330 }, { "epoch": 60.55634402497871, "grad_norm": 0.10441937297582626, "learning_rate": 3.9467783139369854e-05, "loss": 0.004537772387266159, "step": 213340 }, { "epoch": 60.55918251490207, "grad_norm": 0.02274416573345661, "learning_rate": 3.9464944649446496e-05, "loss": 0.0003182008862495422, "step": 213350 }, { "epoch": 60.56202100482543, "grad_norm": 0.04895923286676407, "learning_rate": 3.946210615952314e-05, "loss": 0.00023002400994300842, "step": 213360 }, { "epoch": 60.564859494748795, "grad_norm": 0.2800334393978119, "learning_rate": 3.945926766959978e-05, "loss": 0.0005412871018052101, "step": 213370 }, { "epoch": 60.56769798467215, "grad_norm": 9.507326126098633, "learning_rate": 3.945642917967641e-05, "loss": 0.0017484812065958976, "step": 213380 }, { "epoch": 60.570536474595514, "grad_norm": 0.07042920589447021, "learning_rate": 3.945359068975305e-05, "loss": 0.00015174411237239838, "step": 213390 }, { "epoch": 60.57337496451888, "grad_norm": 0.01811349391937256, "learning_rate": 3.9450752199829696e-05, "loss": 0.0001971626654267311, "step": 213400 }, { "epoch": 60.57621345444224, "grad_norm": 0.1376662701368332, "learning_rate": 3.944791370990633e-05, "loss": 0.0007120246067643165, "step": 213410 }, { "epoch": 60.579051944365595, "grad_norm": 0.0027710176073014736, "learning_rate": 3.944507521998297e-05, "loss": 0.0010569386184215545, "step": 213420 }, { "epoch": 60.58189043428896, "grad_norm": 0.005536798387765884, "learning_rate": 3.944223673005961e-05, "loss": 0.0004207141697406769, "step": 213430 }, { "epoch": 60.58472892421232, "grad_norm": 0.01830754056572914, "learning_rate": 3.943939824013625e-05, "loss": 0.0006772620603442192, "step": 213440 }, { "epoch": 60.58756741413568, "grad_norm": 0.0075127724558115005, "learning_rate": 3.943655975021289e-05, "loss": 0.0008020395413041115, "step": 213450 }, { "epoch": 60.59040590405904, "grad_norm": 0.21454578638076782, "learning_rate": 3.943372126028953e-05, "loss": 0.003213921934366226, "step": 213460 }, { "epoch": 60.5932443939824, "grad_norm": 0.00993023905903101, "learning_rate": 3.9430882770366165e-05, "loss": 0.0009991761296987533, "step": 213470 }, { "epoch": 60.596082883905765, "grad_norm": 0.04017363861203194, "learning_rate": 3.9428044280442807e-05, "loss": 0.0017138129100203515, "step": 213480 }, { "epoch": 60.59892137382912, "grad_norm": 0.009501003660261631, "learning_rate": 3.942520579051945e-05, "loss": 0.0013861291110515595, "step": 213490 }, { "epoch": 60.601759863752484, "grad_norm": 16.14926528930664, "learning_rate": 3.942236730059608e-05, "loss": 0.004032497853040695, "step": 213500 }, { "epoch": 60.601759863752484, "eval_accuracy": 0.9785718827494119, "eval_loss": 0.09985098242759705, "eval_runtime": 42.3919, "eval_samples_per_second": 370.991, "eval_steps_per_second": 5.803, "step": 213500 }, { "epoch": 60.60459835367585, "grad_norm": 7.589154243469238, "learning_rate": 3.9419528810672724e-05, "loss": 0.0031957700848579407, "step": 213510 }, { "epoch": 60.6074368435992, "grad_norm": 8.4204740524292, "learning_rate": 3.941669032074936e-05, "loss": 0.0022195495665073397, "step": 213520 }, { "epoch": 60.610275333522566, "grad_norm": 0.10247217118740082, "learning_rate": 3.941385183082601e-05, "loss": 0.0013987571001052856, "step": 213530 }, { "epoch": 60.61311382344593, "grad_norm": 0.08348966389894485, "learning_rate": 3.941101334090264e-05, "loss": 0.0005657609552145004, "step": 213540 }, { "epoch": 60.615952313369284, "grad_norm": 0.0226101316511631, "learning_rate": 3.9408174850979276e-05, "loss": 9.591113775968552e-05, "step": 213550 }, { "epoch": 60.61879080329265, "grad_norm": 6.891611576080322, "learning_rate": 3.9405336361055924e-05, "loss": 0.0012139821425080299, "step": 213560 }, { "epoch": 60.62162929321601, "grad_norm": 0.025423843413591385, "learning_rate": 3.940249787113256e-05, "loss": 6.5586157143116e-05, "step": 213570 }, { "epoch": 60.62446778313937, "grad_norm": 0.05327266827225685, "learning_rate": 3.93996593812092e-05, "loss": 0.0001389913260936737, "step": 213580 }, { "epoch": 60.62730627306273, "grad_norm": 0.003603239543735981, "learning_rate": 3.939682089128584e-05, "loss": 0.0016888437792658805, "step": 213590 }, { "epoch": 60.63014476298609, "grad_norm": 1.3959593772888184, "learning_rate": 3.9393982401362476e-05, "loss": 0.0005242545157670975, "step": 213600 }, { "epoch": 60.632983252909455, "grad_norm": 2.272453546524048, "learning_rate": 3.939114391143912e-05, "loss": 0.0011830855160951614, "step": 213610 }, { "epoch": 60.63582174283281, "grad_norm": 0.024510780349373817, "learning_rate": 3.938830542151575e-05, "loss": 0.00019221454858779906, "step": 213620 }, { "epoch": 60.63866023275617, "grad_norm": 0.05360046774148941, "learning_rate": 3.938546693159239e-05, "loss": 0.000289197638630867, "step": 213630 }, { "epoch": 60.641498722679536, "grad_norm": 0.004090407397598028, "learning_rate": 3.9382628441669035e-05, "loss": 4.865135997533798e-05, "step": 213640 }, { "epoch": 60.64433721260289, "grad_norm": 0.04379843920469284, "learning_rate": 3.937978995174567e-05, "loss": 7.324516773223876e-05, "step": 213650 }, { "epoch": 60.647175702526255, "grad_norm": 0.0037707637529820204, "learning_rate": 3.937695146182232e-05, "loss": 0.00017963312566280366, "step": 213660 }, { "epoch": 60.65001419244962, "grad_norm": 0.005855944473296404, "learning_rate": 3.937411297189895e-05, "loss": 0.00024197418242692946, "step": 213670 }, { "epoch": 60.65285268237298, "grad_norm": 0.1711408793926239, "learning_rate": 3.937127448197559e-05, "loss": 0.00017292201519012452, "step": 213680 }, { "epoch": 60.655691172296336, "grad_norm": 0.03890613839030266, "learning_rate": 3.9368435992052235e-05, "loss": 0.00012836474925279618, "step": 213690 }, { "epoch": 60.6585296622197, "grad_norm": 0.06858834624290466, "learning_rate": 3.936559750212887e-05, "loss": 7.84158706665039e-05, "step": 213700 }, { "epoch": 60.66136815214306, "grad_norm": 0.053603801876306534, "learning_rate": 3.936275901220551e-05, "loss": 0.00011983644217252731, "step": 213710 }, { "epoch": 60.66420664206642, "grad_norm": 0.02404879406094551, "learning_rate": 3.9359920522282145e-05, "loss": 7.991846650838852e-05, "step": 213720 }, { "epoch": 60.66704513198978, "grad_norm": 0.007489291485399008, "learning_rate": 3.935708203235879e-05, "loss": 6.472878158092499e-05, "step": 213730 }, { "epoch": 60.669883621913144, "grad_norm": 0.0032912094611674547, "learning_rate": 3.935424354243543e-05, "loss": 4.141125828027725e-05, "step": 213740 }, { "epoch": 60.67272211183651, "grad_norm": 0.015239782631397247, "learning_rate": 3.935140505251206e-05, "loss": 0.00011171642690896988, "step": 213750 }, { "epoch": 60.67556060175986, "grad_norm": 1.4667319059371948, "learning_rate": 3.9348566562588704e-05, "loss": 0.0004573000594973564, "step": 213760 }, { "epoch": 60.678399091683225, "grad_norm": 0.06814137101173401, "learning_rate": 3.9345728072665345e-05, "loss": 0.00023435931652784347, "step": 213770 }, { "epoch": 60.68123758160659, "grad_norm": 0.021272627636790276, "learning_rate": 3.934288958274198e-05, "loss": 7.00363889336586e-05, "step": 213780 }, { "epoch": 60.684076071529944, "grad_norm": 0.006358006503432989, "learning_rate": 3.934005109281863e-05, "loss": 0.00013403985649347306, "step": 213790 }, { "epoch": 60.68691456145331, "grad_norm": 0.022890321910381317, "learning_rate": 3.933721260289526e-05, "loss": 5.2519142627716064e-05, "step": 213800 }, { "epoch": 60.68975305137667, "grad_norm": 0.014407371170818806, "learning_rate": 3.93343741129719e-05, "loss": 5.1997974514961244e-05, "step": 213810 }, { "epoch": 60.692591541300025, "grad_norm": 0.09076531231403351, "learning_rate": 3.933153562304854e-05, "loss": 6.105620414018632e-05, "step": 213820 }, { "epoch": 60.69543003122339, "grad_norm": 0.028136372566223145, "learning_rate": 3.932869713312518e-05, "loss": 0.0001226704567670822, "step": 213830 }, { "epoch": 60.69826852114675, "grad_norm": 0.05572478473186493, "learning_rate": 3.932585864320182e-05, "loss": 0.0002755040302872658, "step": 213840 }, { "epoch": 60.701107011070114, "grad_norm": 0.015281359665095806, "learning_rate": 3.9323020153278456e-05, "loss": 0.00015835370868444443, "step": 213850 }, { "epoch": 60.70394550099347, "grad_norm": 0.003551091765984893, "learning_rate": 3.93201816633551e-05, "loss": 8.477568626403808e-05, "step": 213860 }, { "epoch": 60.70678399091683, "grad_norm": 0.06654667109251022, "learning_rate": 3.931734317343174e-05, "loss": 6.596017628908157e-05, "step": 213870 }, { "epoch": 60.709622480840196, "grad_norm": 0.004910016432404518, "learning_rate": 3.9314504683508373e-05, "loss": 4.2749568819999695e-05, "step": 213880 }, { "epoch": 60.71246097076355, "grad_norm": 0.0038989705499261618, "learning_rate": 3.9311666193585015e-05, "loss": 2.2003985941410063e-05, "step": 213890 }, { "epoch": 60.715299460686914, "grad_norm": 0.005172279663383961, "learning_rate": 3.9308827703661656e-05, "loss": 4.288423806428909e-05, "step": 213900 }, { "epoch": 60.71813795061028, "grad_norm": 0.0016926905373111367, "learning_rate": 3.930598921373829e-05, "loss": 0.0004381213337182999, "step": 213910 }, { "epoch": 60.72097644053363, "grad_norm": 0.009772086516022682, "learning_rate": 3.930315072381493e-05, "loss": 8.118636906147003e-05, "step": 213920 }, { "epoch": 60.723814930456996, "grad_norm": 0.002740483032539487, "learning_rate": 3.9300312233891574e-05, "loss": 4.38714399933815e-05, "step": 213930 }, { "epoch": 60.72665342038036, "grad_norm": 0.012693853117525578, "learning_rate": 3.929747374396821e-05, "loss": 0.00010142028331756592, "step": 213940 }, { "epoch": 60.72949191030372, "grad_norm": 0.001311756786890328, "learning_rate": 3.929463525404485e-05, "loss": 6.233993917703629e-05, "step": 213950 }, { "epoch": 60.73233040022708, "grad_norm": 0.04803023114800453, "learning_rate": 3.929179676412149e-05, "loss": 0.0002736864611506462, "step": 213960 }, { "epoch": 60.73516889015044, "grad_norm": 0.04728729650378227, "learning_rate": 3.9288958274198126e-05, "loss": 0.00014943201094865798, "step": 213970 }, { "epoch": 60.7380073800738, "grad_norm": 0.005316759925335646, "learning_rate": 3.928611978427477e-05, "loss": 0.0001321239396929741, "step": 213980 }, { "epoch": 60.74084586999716, "grad_norm": 0.04122626781463623, "learning_rate": 3.928328129435141e-05, "loss": 8.235163986682891e-05, "step": 213990 }, { "epoch": 60.74368435992052, "grad_norm": 0.09815623611211777, "learning_rate": 3.928044280442805e-05, "loss": 7.819365710020066e-05, "step": 214000 }, { "epoch": 60.74368435992052, "eval_accuracy": 0.9853118840211101, "eval_loss": 0.06109338626265526, "eval_runtime": 37.8816, "eval_samples_per_second": 415.162, "eval_steps_per_second": 6.494, "step": 214000 }, { "epoch": 60.746522849843885, "grad_norm": 0.0049326312728226185, "learning_rate": 3.9277604314504684e-05, "loss": 0.0016204085201025008, "step": 214010 }, { "epoch": 60.74936133976724, "grad_norm": 0.27567338943481445, "learning_rate": 3.927476582458132e-05, "loss": 0.0032246023416519164, "step": 214020 }, { "epoch": 60.7521998296906, "grad_norm": 0.8196862936019897, "learning_rate": 3.927192733465797e-05, "loss": 0.0004229970276355743, "step": 214030 }, { "epoch": 60.755038319613966, "grad_norm": 2.45080304145813, "learning_rate": 3.92690888447346e-05, "loss": 0.0004660528153181076, "step": 214040 }, { "epoch": 60.75787680953733, "grad_norm": 0.6160638332366943, "learning_rate": 3.926625035481124e-05, "loss": 0.00385577529668808, "step": 214050 }, { "epoch": 60.760715299460685, "grad_norm": 0.06213375926017761, "learning_rate": 3.9263411864887884e-05, "loss": 0.0002661839127540588, "step": 214060 }, { "epoch": 60.76355378938405, "grad_norm": 0.008007527329027653, "learning_rate": 3.926057337496452e-05, "loss": 0.002098855935037136, "step": 214070 }, { "epoch": 60.76639227930741, "grad_norm": 0.004609308205544949, "learning_rate": 3.925773488504116e-05, "loss": 0.0010818982496857642, "step": 214080 }, { "epoch": 60.76923076923077, "grad_norm": 0.11919214576482773, "learning_rate": 3.92548963951178e-05, "loss": 0.012578770518302917, "step": 214090 }, { "epoch": 60.77206925915413, "grad_norm": 0.005242976825684309, "learning_rate": 3.9252057905194436e-05, "loss": 0.00020882152020931243, "step": 214100 }, { "epoch": 60.77490774907749, "grad_norm": 0.01809353567659855, "learning_rate": 3.924921941527108e-05, "loss": 0.0003528118133544922, "step": 214110 }, { "epoch": 60.77774623900085, "grad_norm": 0.22800622880458832, "learning_rate": 3.924638092534771e-05, "loss": 0.0008832832798361778, "step": 214120 }, { "epoch": 60.78058472892421, "grad_norm": 0.030691100284457207, "learning_rate": 3.924354243542436e-05, "loss": 0.00012846067547798157, "step": 214130 }, { "epoch": 60.783423218847574, "grad_norm": 0.04583543539047241, "learning_rate": 3.9240703945500995e-05, "loss": 7.88819044828415e-05, "step": 214140 }, { "epoch": 60.78626170877094, "grad_norm": 0.0020648364443331957, "learning_rate": 3.923786545557763e-05, "loss": 0.0001112859696149826, "step": 214150 }, { "epoch": 60.78910019869429, "grad_norm": 0.016999807208776474, "learning_rate": 3.923502696565428e-05, "loss": 0.008213987201452255, "step": 214160 }, { "epoch": 60.791938688617655, "grad_norm": 0.051119931042194366, "learning_rate": 3.923218847573091e-05, "loss": 0.0027355792000889776, "step": 214170 }, { "epoch": 60.79477717854102, "grad_norm": 0.0019608137663453817, "learning_rate": 3.9229349985807554e-05, "loss": 0.0005192324519157409, "step": 214180 }, { "epoch": 60.797615668464374, "grad_norm": 0.001971855526790023, "learning_rate": 3.9226511495884195e-05, "loss": 0.0002391798421740532, "step": 214190 }, { "epoch": 60.80045415838774, "grad_norm": Infinity, "learning_rate": 3.922367300596083e-05, "loss": 0.002001088485121727, "step": 214200 }, { "epoch": 60.8032926483111, "grad_norm": 0.028501739725470543, "learning_rate": 3.922111836502981e-05, "loss": 0.008277299255132676, "step": 214210 }, { "epoch": 60.80613113823446, "grad_norm": 0.19357866048812866, "learning_rate": 3.9218279875106444e-05, "loss": 0.0034284427762031554, "step": 214220 }, { "epoch": 60.80896962815782, "grad_norm": 0.43592140078544617, "learning_rate": 3.9215441385183086e-05, "loss": 0.006696957349777222, "step": 214230 }, { "epoch": 60.81180811808118, "grad_norm": 0.9685027599334717, "learning_rate": 3.921260289525973e-05, "loss": 0.0024936754256486894, "step": 214240 }, { "epoch": 60.814646608004544, "grad_norm": 0.03901457414031029, "learning_rate": 3.920976440533636e-05, "loss": 0.012036330997943878, "step": 214250 }, { "epoch": 60.8174850979279, "grad_norm": 0.009747853502631187, "learning_rate": 3.9206925915413e-05, "loss": 0.005760134384036064, "step": 214260 }, { "epoch": 60.82032358785126, "grad_norm": 0.12057897448539734, "learning_rate": 3.9204087425489644e-05, "loss": 0.0007894383743405342, "step": 214270 }, { "epoch": 60.823162077774626, "grad_norm": 0.016391726210713387, "learning_rate": 3.920124893556628e-05, "loss": 0.00027350913733243943, "step": 214280 }, { "epoch": 60.82600056769798, "grad_norm": 0.00799722783267498, "learning_rate": 3.919841044564292e-05, "loss": 0.004377474635839462, "step": 214290 }, { "epoch": 60.828839057621344, "grad_norm": 0.002651076763868332, "learning_rate": 3.9195571955719555e-05, "loss": 0.0006714869290590286, "step": 214300 }, { "epoch": 60.83167754754471, "grad_norm": 0.01936710998415947, "learning_rate": 3.91927334657962e-05, "loss": 0.00018442682921886443, "step": 214310 }, { "epoch": 60.83451603746807, "grad_norm": 0.06819567829370499, "learning_rate": 3.918989497587284e-05, "loss": 6.263256072998047e-05, "step": 214320 }, { "epoch": 60.837354527391426, "grad_norm": 0.011429853737354279, "learning_rate": 3.918705648594947e-05, "loss": 0.0012111302465200424, "step": 214330 }, { "epoch": 60.84019301731479, "grad_norm": 1.5303922891616821, "learning_rate": 3.918421799602612e-05, "loss": 0.005402206256985664, "step": 214340 }, { "epoch": 60.84303150723815, "grad_norm": 0.1577722579240799, "learning_rate": 3.9181379506102755e-05, "loss": 0.0016438424587249755, "step": 214350 }, { "epoch": 60.84586999716151, "grad_norm": 1.7209906578063965, "learning_rate": 3.9178541016179396e-05, "loss": 0.000574771873652935, "step": 214360 }, { "epoch": 60.84870848708487, "grad_norm": 0.0161050446331501, "learning_rate": 3.917570252625604e-05, "loss": 0.0026112135499715807, "step": 214370 }, { "epoch": 60.85154697700823, "grad_norm": 9.054632186889648, "learning_rate": 3.917286403633267e-05, "loss": 0.005801300704479218, "step": 214380 }, { "epoch": 60.85438546693159, "grad_norm": 0.15926186740398407, "learning_rate": 3.9170025546409314e-05, "loss": 0.00045921728014945983, "step": 214390 }, { "epoch": 60.85722395685495, "grad_norm": 0.09307624399662018, "learning_rate": 3.916718705648595e-05, "loss": 0.00024261530488729478, "step": 214400 }, { "epoch": 60.860062446778315, "grad_norm": 0.009230637922883034, "learning_rate": 3.916463241555492e-05, "loss": 0.005041534826159477, "step": 214410 }, { "epoch": 60.86290093670168, "grad_norm": 0.018956491723656654, "learning_rate": 3.916179392563157e-05, "loss": 0.0027298407629132273, "step": 214420 }, { "epoch": 60.865739426625034, "grad_norm": 0.04751438647508621, "learning_rate": 3.9158955435708204e-05, "loss": 0.0002383926883339882, "step": 214430 }, { "epoch": 60.868577916548396, "grad_norm": 0.07192785292863846, "learning_rate": 3.9156116945784846e-05, "loss": 0.000675847940146923, "step": 214440 }, { "epoch": 60.87141640647176, "grad_norm": 0.22184371948242188, "learning_rate": 3.915327845586149e-05, "loss": 0.0023702658712863923, "step": 214450 }, { "epoch": 60.874254896395115, "grad_norm": 0.015536140650510788, "learning_rate": 3.915043996593812e-05, "loss": 0.00032811351120471955, "step": 214460 }, { "epoch": 60.87709338631848, "grad_norm": 0.05188475921750069, "learning_rate": 3.914760147601476e-05, "loss": 0.0010587837547063827, "step": 214470 }, { "epoch": 60.87993187624184, "grad_norm": 0.009839038364589214, "learning_rate": 3.91447629860914e-05, "loss": 0.00017779916524887084, "step": 214480 }, { "epoch": 60.8827703661652, "grad_norm": 0.016059735789895058, "learning_rate": 3.914192449616804e-05, "loss": 0.0033363498747348785, "step": 214490 }, { "epoch": 60.88560885608856, "grad_norm": 0.00807732529938221, "learning_rate": 3.913908600624468e-05, "loss": 0.0003398362547159195, "step": 214500 }, { "epoch": 60.88560885608856, "eval_accuracy": 0.9813060342086857, "eval_loss": 0.07727200537919998, "eval_runtime": 38.6027, "eval_samples_per_second": 407.406, "eval_steps_per_second": 6.373, "step": 214500 }, { "epoch": 60.88844734601192, "grad_norm": 0.0032324541825801134, "learning_rate": 3.9136247516321315e-05, "loss": 0.0011986933648586273, "step": 214510 }, { "epoch": 60.891285835935285, "grad_norm": 0.0695023462176323, "learning_rate": 3.913340902639796e-05, "loss": 0.00035083796828985216, "step": 214520 }, { "epoch": 60.89412432585864, "grad_norm": 0.004108366556465626, "learning_rate": 3.91305705364746e-05, "loss": 0.00016593411564826965, "step": 214530 }, { "epoch": 60.896962815782004, "grad_norm": 0.03189703822135925, "learning_rate": 3.912773204655123e-05, "loss": 0.0006124056875705719, "step": 214540 }, { "epoch": 60.89980130570537, "grad_norm": 0.1569475680589676, "learning_rate": 3.912489355662788e-05, "loss": 0.00014022961258888244, "step": 214550 }, { "epoch": 60.90263979562872, "grad_norm": 0.05162340775132179, "learning_rate": 3.9122055066704515e-05, "loss": 0.0004428057000041008, "step": 214560 }, { "epoch": 60.905478285552086, "grad_norm": 0.1312742531299591, "learning_rate": 3.9119216576781156e-05, "loss": 0.00025519374758005143, "step": 214570 }, { "epoch": 60.90831677547545, "grad_norm": 0.07650591433048248, "learning_rate": 3.911637808685779e-05, "loss": 0.0003030255436897278, "step": 214580 }, { "epoch": 60.91115526539881, "grad_norm": 0.1988525241613388, "learning_rate": 3.911353959693443e-05, "loss": 0.00042311791330575945, "step": 214590 }, { "epoch": 60.91399375532217, "grad_norm": 0.0033305208198726177, "learning_rate": 3.9110701107011074e-05, "loss": 0.00018441397696733475, "step": 214600 }, { "epoch": 60.91683224524553, "grad_norm": 0.0038043742533773184, "learning_rate": 3.910786261708771e-05, "loss": 0.0005435789003968239, "step": 214610 }, { "epoch": 60.91967073516889, "grad_norm": 0.006570891477167606, "learning_rate": 3.910502412716435e-05, "loss": 0.0009411407634615898, "step": 214620 }, { "epoch": 60.92250922509225, "grad_norm": 0.08531960844993591, "learning_rate": 3.910218563724099e-05, "loss": 0.004060876369476318, "step": 214630 }, { "epoch": 60.92534771501561, "grad_norm": 0.4852198362350464, "learning_rate": 3.9099347147317626e-05, "loss": 0.0019150320440530777, "step": 214640 }, { "epoch": 60.928186204938974, "grad_norm": 1.3271325826644897, "learning_rate": 3.9096508657394274e-05, "loss": 0.0044267624616622925, "step": 214650 }, { "epoch": 60.93102469486233, "grad_norm": 0.07036394625902176, "learning_rate": 3.909367016747091e-05, "loss": 0.0010359497740864755, "step": 214660 }, { "epoch": 60.93386318478569, "grad_norm": 0.026635250076651573, "learning_rate": 3.909083167754754e-05, "loss": 0.000828481838107109, "step": 214670 }, { "epoch": 60.936701674709056, "grad_norm": 0.2706918716430664, "learning_rate": 3.9087993187624184e-05, "loss": 0.0018626751378178596, "step": 214680 }, { "epoch": 60.93954016463242, "grad_norm": 1.4671199321746826, "learning_rate": 3.9085154697700826e-05, "loss": 0.00035799350589513777, "step": 214690 }, { "epoch": 60.942378654555775, "grad_norm": 0.023988468572497368, "learning_rate": 3.908231620777747e-05, "loss": 0.00015800036489963532, "step": 214700 }, { "epoch": 60.94521714447914, "grad_norm": 0.7037288546562195, "learning_rate": 3.90794777178541e-05, "loss": 0.0007278760895133018, "step": 214710 }, { "epoch": 60.9480556344025, "grad_norm": 7.891810417175293, "learning_rate": 3.907663922793074e-05, "loss": 0.0011647429317235946, "step": 214720 }, { "epoch": 60.950894124325856, "grad_norm": 0.004888218827545643, "learning_rate": 3.9073800738007385e-05, "loss": 0.0004091056063771248, "step": 214730 }, { "epoch": 60.95373261424922, "grad_norm": 0.6219050884246826, "learning_rate": 3.907096224808402e-05, "loss": 0.004478273540735244, "step": 214740 }, { "epoch": 60.95657110417258, "grad_norm": 0.06777628511190414, "learning_rate": 3.906812375816066e-05, "loss": 0.0005618946626782418, "step": 214750 }, { "epoch": 60.95940959409594, "grad_norm": 0.1236407607793808, "learning_rate": 3.90652852682373e-05, "loss": 0.0009548692032694817, "step": 214760 }, { "epoch": 60.9622480840193, "grad_norm": 0.012657816521823406, "learning_rate": 3.9062446778313936e-05, "loss": 0.002930684760212898, "step": 214770 }, { "epoch": 60.96508657394266, "grad_norm": 1.3123105764389038, "learning_rate": 3.905960828839058e-05, "loss": 0.0037126872688531876, "step": 214780 }, { "epoch": 60.967925063866026, "grad_norm": 0.02060435339808464, "learning_rate": 3.905676979846722e-05, "loss": 0.000907297246158123, "step": 214790 }, { "epoch": 60.97076355378938, "grad_norm": 0.46831580996513367, "learning_rate": 3.9053931308543854e-05, "loss": 0.00033249631524086, "step": 214800 }, { "epoch": 60.973602043712745, "grad_norm": 0.010587045922875404, "learning_rate": 3.9051092818620495e-05, "loss": 0.001715429686009884, "step": 214810 }, { "epoch": 60.97644053363611, "grad_norm": 0.018059758469462395, "learning_rate": 3.9048254328697137e-05, "loss": 0.003363357111811638, "step": 214820 }, { "epoch": 60.979279023559464, "grad_norm": 0.0015196171589195728, "learning_rate": 3.904541583877377e-05, "loss": 0.0020876646041870115, "step": 214830 }, { "epoch": 60.98211751348283, "grad_norm": 0.043469589203596115, "learning_rate": 3.904257734885041e-05, "loss": 0.0005063993856310844, "step": 214840 }, { "epoch": 60.98495600340619, "grad_norm": 1.184860348701477, "learning_rate": 3.9039738858927054e-05, "loss": 0.001190374605357647, "step": 214850 }, { "epoch": 60.987794493329545, "grad_norm": 0.5626295208930969, "learning_rate": 3.9036900369003695e-05, "loss": 0.004050831496715546, "step": 214860 }, { "epoch": 60.99063298325291, "grad_norm": 1.385031819343567, "learning_rate": 3.903406187908033e-05, "loss": 0.0063080862164497375, "step": 214870 }, { "epoch": 60.99347147317627, "grad_norm": 1.1080315113067627, "learning_rate": 3.9031223389156965e-05, "loss": 0.0005013003945350647, "step": 214880 }, { "epoch": 60.996309963099634, "grad_norm": 0.004428665153682232, "learning_rate": 3.902838489923361e-05, "loss": 0.00033535640686750413, "step": 214890 }, { "epoch": 60.99914845302299, "grad_norm": 0.1291419118642807, "learning_rate": 3.902554640931025e-05, "loss": 0.0038044698536396026, "step": 214900 }, { "epoch": 61.00198694294635, "grad_norm": 0.1498725712299347, "learning_rate": 3.902270791938689e-05, "loss": 0.00013120411895215512, "step": 214910 }, { "epoch": 61.004825432869715, "grad_norm": 0.03043907880783081, "learning_rate": 3.901986942946353e-05, "loss": 8.495915681123733e-05, "step": 214920 }, { "epoch": 61.00766392279307, "grad_norm": 0.881397545337677, "learning_rate": 3.9017030939540165e-05, "loss": 0.0002869585528969765, "step": 214930 }, { "epoch": 61.010502412716434, "grad_norm": 0.04029855877161026, "learning_rate": 3.9014192449616806e-05, "loss": 0.00018610171973705293, "step": 214940 }, { "epoch": 61.0133409026398, "grad_norm": 0.011123456060886383, "learning_rate": 3.901135395969345e-05, "loss": 0.0011523345485329628, "step": 214950 }, { "epoch": 61.01617939256316, "grad_norm": 0.11260335892438889, "learning_rate": 3.900851546977008e-05, "loss": 0.0013758089393377305, "step": 214960 }, { "epoch": 61.019017882486516, "grad_norm": 0.5116572380065918, "learning_rate": 3.900567697984672e-05, "loss": 0.0003701748326420784, "step": 214970 }, { "epoch": 61.02185637240988, "grad_norm": 0.042852580547332764, "learning_rate": 3.900283848992336e-05, "loss": 0.00015088431537151336, "step": 214980 }, { "epoch": 61.02469486233324, "grad_norm": 0.123707115650177, "learning_rate": 3.9000000000000006e-05, "loss": 0.0004040120169520378, "step": 214990 }, { "epoch": 61.0275333522566, "grad_norm": 0.01099108625203371, "learning_rate": 3.899716151007664e-05, "loss": 8.413847535848618e-05, "step": 215000 }, { "epoch": 61.0275333522566, "eval_accuracy": 0.9813060342086857, "eval_loss": 0.07073196768760681, "eval_runtime": 54.0212, "eval_samples_per_second": 291.127, "eval_steps_per_second": 4.554, "step": 215000 }, { "epoch": 61.03037184217996, "grad_norm": 0.12391441315412521, "learning_rate": 3.8994323020153275e-05, "loss": 0.00023634284734725953, "step": 215010 }, { "epoch": 61.03321033210332, "grad_norm": 0.009317745454609394, "learning_rate": 3.8991484530229923e-05, "loss": 0.00025455765426158906, "step": 215020 }, { "epoch": 61.03604882202668, "grad_norm": 0.0018357901135459542, "learning_rate": 3.898864604030656e-05, "loss": 0.0002290545031428337, "step": 215030 }, { "epoch": 61.03888731195004, "grad_norm": 0.11898523569107056, "learning_rate": 3.89858075503832e-05, "loss": 0.00015061218291521073, "step": 215040 }, { "epoch": 61.041725801873405, "grad_norm": 0.21542401611804962, "learning_rate": 3.898296906045984e-05, "loss": 0.00034370776265859606, "step": 215050 }, { "epoch": 61.04456429179677, "grad_norm": 0.00450526550412178, "learning_rate": 3.8980130570536475e-05, "loss": 0.00034912750124931334, "step": 215060 }, { "epoch": 61.04740278172012, "grad_norm": 0.015182388946413994, "learning_rate": 3.897729208061312e-05, "loss": 0.004097013920545578, "step": 215070 }, { "epoch": 61.050241271643486, "grad_norm": 0.2846381664276123, "learning_rate": 3.897445359068975e-05, "loss": 0.00031242202967405317, "step": 215080 }, { "epoch": 61.05307976156685, "grad_norm": 0.010971024632453918, "learning_rate": 3.897161510076639e-05, "loss": 0.00012094732373952865, "step": 215090 }, { "epoch": 61.055918251490205, "grad_norm": 0.03594818338751793, "learning_rate": 3.8968776610843034e-05, "loss": 0.00014012251049280166, "step": 215100 }, { "epoch": 61.05875674141357, "grad_norm": 0.03885524719953537, "learning_rate": 3.896593812091967e-05, "loss": 0.0002576014026999474, "step": 215110 }, { "epoch": 61.06159523133693, "grad_norm": 0.013603350147604942, "learning_rate": 3.896309963099632e-05, "loss": 0.0003244200721383095, "step": 215120 }, { "epoch": 61.064433721260286, "grad_norm": 0.022890528663992882, "learning_rate": 3.896026114107295e-05, "loss": 0.0004895105957984924, "step": 215130 }, { "epoch": 61.06727221118365, "grad_norm": 1.5457838773727417, "learning_rate": 3.8957422651149586e-05, "loss": 0.004196691885590553, "step": 215140 }, { "epoch": 61.07011070110701, "grad_norm": 0.5067186951637268, "learning_rate": 3.8954584161226234e-05, "loss": 0.0019452473148703574, "step": 215150 }, { "epoch": 61.072949191030375, "grad_norm": 0.004964408464729786, "learning_rate": 3.895174567130287e-05, "loss": 0.0013672558590769768, "step": 215160 }, { "epoch": 61.07578768095373, "grad_norm": 0.010705806314945221, "learning_rate": 3.894890718137951e-05, "loss": 0.00020797420293092728, "step": 215170 }, { "epoch": 61.078626170877094, "grad_norm": 0.09905841201543808, "learning_rate": 3.8946068691456145e-05, "loss": 0.00011631976813077926, "step": 215180 }, { "epoch": 61.08146466080046, "grad_norm": 0.02323761209845543, "learning_rate": 3.8943230201532786e-05, "loss": 0.00015430301427841187, "step": 215190 }, { "epoch": 61.08430315072381, "grad_norm": 0.00807412900030613, "learning_rate": 3.894039171160943e-05, "loss": 6.875321269035339e-05, "step": 215200 }, { "epoch": 61.087141640647175, "grad_norm": 0.005061401519924402, "learning_rate": 3.893755322168606e-05, "loss": 0.00025942418724298475, "step": 215210 }, { "epoch": 61.08998013057054, "grad_norm": 0.2141081541776657, "learning_rate": 3.8934714731762703e-05, "loss": 0.0028090190142393114, "step": 215220 }, { "epoch": 61.092818620493894, "grad_norm": 0.028612401336431503, "learning_rate": 3.8931876241839345e-05, "loss": 0.0008803550153970718, "step": 215230 }, { "epoch": 61.09565711041726, "grad_norm": 0.013393620029091835, "learning_rate": 3.892903775191598e-05, "loss": 0.015657249093055724, "step": 215240 }, { "epoch": 61.09849560034062, "grad_norm": 0.010087399743497372, "learning_rate": 3.892619926199262e-05, "loss": 0.005004753172397613, "step": 215250 }, { "epoch": 61.10133409026398, "grad_norm": 0.3855314552783966, "learning_rate": 3.892336077206926e-05, "loss": 0.0012413475662469864, "step": 215260 }, { "epoch": 61.10417258018734, "grad_norm": 0.11419558525085449, "learning_rate": 3.89205222821459e-05, "loss": 0.0004665607586503029, "step": 215270 }, { "epoch": 61.1070110701107, "grad_norm": 0.19930075109004974, "learning_rate": 3.891768379222254e-05, "loss": 0.00018326118588447572, "step": 215280 }, { "epoch": 61.109849560034064, "grad_norm": 0.004669708665460348, "learning_rate": 3.891484530229918e-05, "loss": 0.004916428029537201, "step": 215290 }, { "epoch": 61.11268804995742, "grad_norm": 0.3113437294960022, "learning_rate": 3.8912006812375814e-05, "loss": 0.0006924215704202652, "step": 215300 }, { "epoch": 61.11552653988078, "grad_norm": 0.03652893379330635, "learning_rate": 3.8909168322452456e-05, "loss": 0.0011399997398257255, "step": 215310 }, { "epoch": 61.118365029804146, "grad_norm": 3.314131021499634, "learning_rate": 3.89063298325291e-05, "loss": 0.0006290558725595474, "step": 215320 }, { "epoch": 61.1212035197275, "grad_norm": 0.016053009778261185, "learning_rate": 3.890349134260574e-05, "loss": 0.00035249758511781695, "step": 215330 }, { "epoch": 61.124042009650864, "grad_norm": 0.0007908826228231192, "learning_rate": 3.890065285268237e-05, "loss": 0.00019604992121458054, "step": 215340 }, { "epoch": 61.12688049957423, "grad_norm": 0.03593110293149948, "learning_rate": 3.8897814362759014e-05, "loss": 0.0001709846779704094, "step": 215350 }, { "epoch": 61.12971898949759, "grad_norm": 0.011880837380886078, "learning_rate": 3.8894975872835656e-05, "loss": 0.00024815481156110765, "step": 215360 }, { "epoch": 61.132557479420946, "grad_norm": 0.02126435749232769, "learning_rate": 3.889213738291229e-05, "loss": 8.38443636894226e-05, "step": 215370 }, { "epoch": 61.13539596934431, "grad_norm": 0.007819142192602158, "learning_rate": 3.888929889298893e-05, "loss": 0.00022143293172121047, "step": 215380 }, { "epoch": 61.13823445926767, "grad_norm": 0.0029165251180529594, "learning_rate": 3.888646040306557e-05, "loss": 0.000197545625269413, "step": 215390 }, { "epoch": 61.14107294919103, "grad_norm": 0.008181850425899029, "learning_rate": 3.888362191314221e-05, "loss": 5.994942039251327e-05, "step": 215400 }, { "epoch": 61.14391143911439, "grad_norm": 0.06722583621740341, "learning_rate": 3.888078342321885e-05, "loss": 0.000561847910284996, "step": 215410 }, { "epoch": 61.14674992903775, "grad_norm": 0.007385158445686102, "learning_rate": 3.887794493329549e-05, "loss": 0.0006197553128004074, "step": 215420 }, { "epoch": 61.149588418961116, "grad_norm": 1.8156729936599731, "learning_rate": 3.8875106443372125e-05, "loss": 0.00030370093882083895, "step": 215430 }, { "epoch": 61.15242690888447, "grad_norm": 0.08110841363668442, "learning_rate": 3.8872267953448766e-05, "loss": 8.12208279967308e-05, "step": 215440 }, { "epoch": 61.155265398807835, "grad_norm": 0.7762265801429749, "learning_rate": 3.886942946352541e-05, "loss": 0.0012097785249352456, "step": 215450 }, { "epoch": 61.1581038887312, "grad_norm": 0.015430431813001633, "learning_rate": 3.886659097360205e-05, "loss": 0.0013276351615786552, "step": 215460 }, { "epoch": 61.16094237865455, "grad_norm": 0.49032366275787354, "learning_rate": 3.8863752483678684e-05, "loss": 0.00041142087429761886, "step": 215470 }, { "epoch": 61.163780868577916, "grad_norm": 0.02148260548710823, "learning_rate": 3.886091399375532e-05, "loss": 9.067114442586898e-05, "step": 215480 }, { "epoch": 61.16661935850128, "grad_norm": 0.05194777995347977, "learning_rate": 3.8858075503831966e-05, "loss": 0.00010352563112974167, "step": 215490 }, { "epoch": 61.169457848424635, "grad_norm": 0.010391764342784882, "learning_rate": 3.88552370139086e-05, "loss": 0.001569850742816925, "step": 215500 }, { "epoch": 61.169457848424635, "eval_accuracy": 0.9832135817384117, "eval_loss": 0.06862149387598038, "eval_runtime": 41.7246, "eval_samples_per_second": 376.924, "eval_steps_per_second": 5.896, "step": 215500 }, { "epoch": 61.172296338348, "grad_norm": 0.005528543144464493, "learning_rate": 3.885239852398524e-05, "loss": 0.0016857944428920746, "step": 215510 }, { "epoch": 61.17513482827136, "grad_norm": 5.670773029327393, "learning_rate": 3.8849560034061884e-05, "loss": 0.0021122708916664124, "step": 215520 }, { "epoch": 61.177973318194724, "grad_norm": 7.369215965270996, "learning_rate": 3.884672154413852e-05, "loss": 0.002021433413028717, "step": 215530 }, { "epoch": 61.18081180811808, "grad_norm": 0.06666296720504761, "learning_rate": 3.884388305421516e-05, "loss": 0.0011505555361509324, "step": 215540 }, { "epoch": 61.18365029804144, "grad_norm": 0.044483110308647156, "learning_rate": 3.88410445642918e-05, "loss": 0.006643933057785034, "step": 215550 }, { "epoch": 61.186488787964805, "grad_norm": 0.1988459825515747, "learning_rate": 3.8838206074368436e-05, "loss": 0.00035329964011907575, "step": 215560 }, { "epoch": 61.18932727788816, "grad_norm": 0.005654084030538797, "learning_rate": 3.883536758444508e-05, "loss": 0.0002487311139702797, "step": 215570 }, { "epoch": 61.192165767811524, "grad_norm": 0.013037003576755524, "learning_rate": 3.883252909452172e-05, "loss": 0.00046612098813056944, "step": 215580 }, { "epoch": 61.19500425773489, "grad_norm": 0.626462459564209, "learning_rate": 3.882969060459836e-05, "loss": 0.0003325032070279121, "step": 215590 }, { "epoch": 61.19784274765824, "grad_norm": 2.4352073669433594, "learning_rate": 3.8826852114674994e-05, "loss": 0.0006537212058901787, "step": 215600 }, { "epoch": 61.200681237581605, "grad_norm": 0.007228936534374952, "learning_rate": 3.882401362475163e-05, "loss": 0.0004022585228085518, "step": 215610 }, { "epoch": 61.20351972750497, "grad_norm": 0.002235027262941003, "learning_rate": 3.882117513482828e-05, "loss": 0.00013683214783668518, "step": 215620 }, { "epoch": 61.20635821742833, "grad_norm": 0.02026335895061493, "learning_rate": 3.881833664490491e-05, "loss": 0.0003580186516046524, "step": 215630 }, { "epoch": 61.20919670735169, "grad_norm": 0.010529802180826664, "learning_rate": 3.881549815498155e-05, "loss": 0.0044173218309879305, "step": 215640 }, { "epoch": 61.21203519727505, "grad_norm": 0.022404588758945465, "learning_rate": 3.8812659665058195e-05, "loss": 0.0030998582020401954, "step": 215650 }, { "epoch": 61.21487368719841, "grad_norm": 0.15525522828102112, "learning_rate": 3.880982117513483e-05, "loss": 0.00021427366882562638, "step": 215660 }, { "epoch": 61.21771217712177, "grad_norm": 0.09594405442476273, "learning_rate": 3.880698268521147e-05, "loss": 0.00021941643208265305, "step": 215670 }, { "epoch": 61.22055066704513, "grad_norm": 0.06092933565378189, "learning_rate": 3.880414419528811e-05, "loss": 0.00010748207569122314, "step": 215680 }, { "epoch": 61.223389156968494, "grad_norm": 0.00551740825176239, "learning_rate": 3.8801305705364746e-05, "loss": 9.347908198833465e-05, "step": 215690 }, { "epoch": 61.22622764689185, "grad_norm": 0.23630325496196747, "learning_rate": 3.879846721544139e-05, "loss": 0.00010285377502441406, "step": 215700 }, { "epoch": 61.22906613681521, "grad_norm": 0.0035669850185513496, "learning_rate": 3.879562872551802e-05, "loss": 0.00010693147778511048, "step": 215710 }, { "epoch": 61.231904626738576, "grad_norm": 0.0243784561753273, "learning_rate": 3.8792790235594664e-05, "loss": 0.0007197981700301171, "step": 215720 }, { "epoch": 61.23474311666194, "grad_norm": 2.1573259830474854, "learning_rate": 3.8789951745671305e-05, "loss": 0.00045462530106306077, "step": 215730 }, { "epoch": 61.237581606585294, "grad_norm": 0.04031414911150932, "learning_rate": 3.878711325574794e-05, "loss": 0.007991567254066467, "step": 215740 }, { "epoch": 61.24042009650866, "grad_norm": 0.001499089994467795, "learning_rate": 3.878427476582459e-05, "loss": 0.00019538570195436477, "step": 215750 }, { "epoch": 61.24325858643202, "grad_norm": 0.008085018955171108, "learning_rate": 3.878143627590122e-05, "loss": 0.0005102397873997689, "step": 215760 }, { "epoch": 61.246097076355376, "grad_norm": 0.01809391751885414, "learning_rate": 3.877859778597786e-05, "loss": 0.0008797172456979751, "step": 215770 }, { "epoch": 61.24893556627874, "grad_norm": 0.1217445358633995, "learning_rate": 3.8775759296054505e-05, "loss": 0.0006907096132636071, "step": 215780 }, { "epoch": 61.2517740562021, "grad_norm": 0.023858048021793365, "learning_rate": 3.877292080613114e-05, "loss": 0.0010727925226092338, "step": 215790 }, { "epoch": 61.254612546125465, "grad_norm": 0.008186137303709984, "learning_rate": 3.877008231620778e-05, "loss": 0.00016558021306991577, "step": 215800 }, { "epoch": 61.25745103604882, "grad_norm": 0.05523005127906799, "learning_rate": 3.8767243826284416e-05, "loss": 0.00011475235223770141, "step": 215810 }, { "epoch": 61.26028952597218, "grad_norm": 0.08849959075450897, "learning_rate": 3.876440533636106e-05, "loss": 0.00028030052781105044, "step": 215820 }, { "epoch": 61.263128015895546, "grad_norm": 0.015640340745449066, "learning_rate": 3.87615668464377e-05, "loss": 0.0001268373802304268, "step": 215830 }, { "epoch": 61.2659665058189, "grad_norm": 0.4845014214515686, "learning_rate": 3.875872835651433e-05, "loss": 0.0003853907808661461, "step": 215840 }, { "epoch": 61.268804995742265, "grad_norm": 0.005184205714613199, "learning_rate": 3.8755889866590975e-05, "loss": 0.00014924630522727966, "step": 215850 }, { "epoch": 61.27164348566563, "grad_norm": 1.1643842458724976, "learning_rate": 3.8753051376667616e-05, "loss": 0.0002471834421157837, "step": 215860 }, { "epoch": 61.274481975588984, "grad_norm": 0.009727486409246922, "learning_rate": 3.875021288674425e-05, "loss": 0.0009482525289058686, "step": 215870 }, { "epoch": 61.27732046551235, "grad_norm": 0.012639603577554226, "learning_rate": 3.87473743968209e-05, "loss": 4.702378064393997e-05, "step": 215880 }, { "epoch": 61.28015895543571, "grad_norm": 0.033220186829566956, "learning_rate": 3.874453590689753e-05, "loss": 0.00020016841590404512, "step": 215890 }, { "epoch": 61.28299744535907, "grad_norm": 0.017372841015458107, "learning_rate": 3.874169741697417e-05, "loss": 0.009349550306797027, "step": 215900 }, { "epoch": 61.28583593528243, "grad_norm": 0.005660126451402903, "learning_rate": 3.873885892705081e-05, "loss": 0.0021123502403497694, "step": 215910 }, { "epoch": 61.28867442520579, "grad_norm": 0.004340420011430979, "learning_rate": 3.873602043712745e-05, "loss": 0.0021865857765078545, "step": 215920 }, { "epoch": 61.291512915129154, "grad_norm": 0.02687898650765419, "learning_rate": 3.873318194720409e-05, "loss": 0.020407670736312868, "step": 215930 }, { "epoch": 61.29435140505251, "grad_norm": 0.085172139108181, "learning_rate": 3.8730343457280727e-05, "loss": 0.002971728891134262, "step": 215940 }, { "epoch": 61.29718989497587, "grad_norm": 0.0848085880279541, "learning_rate": 3.872750496735737e-05, "loss": 0.00038886256515979767, "step": 215950 }, { "epoch": 61.300028384899235, "grad_norm": 0.7170686721801758, "learning_rate": 3.872466647743401e-05, "loss": 0.0005220286548137665, "step": 215960 }, { "epoch": 61.30286687482259, "grad_norm": 0.0020097559317946434, "learning_rate": 3.8721827987510644e-05, "loss": 0.00011513251811265946, "step": 215970 }, { "epoch": 61.305705364745954, "grad_norm": 0.21134178340435028, "learning_rate": 3.8718989497587285e-05, "loss": 0.00033173970878124236, "step": 215980 }, { "epoch": 61.30854385466932, "grad_norm": 0.0489884652197361, "learning_rate": 3.871615100766393e-05, "loss": 0.00017851348966360092, "step": 215990 }, { "epoch": 61.31138234459268, "grad_norm": 0.0838145911693573, "learning_rate": 3.871331251774056e-05, "loss": 0.0006992099806666374, "step": 216000 }, { "epoch": 61.31138234459268, "eval_accuracy": 0.9842945253385896, "eval_loss": 0.06407226622104645, "eval_runtime": 42.6102, "eval_samples_per_second": 369.09, "eval_steps_per_second": 5.773, "step": 216000 }, { "epoch": 61.314220834516036, "grad_norm": 0.04753663018345833, "learning_rate": 3.87104740278172e-05, "loss": 0.00014911666512489318, "step": 216010 }, { "epoch": 61.3170593244394, "grad_norm": 0.0013063129736110568, "learning_rate": 3.8707635537893844e-05, "loss": 8.306372910737991e-05, "step": 216020 }, { "epoch": 61.31989781436276, "grad_norm": 0.042323991656303406, "learning_rate": 3.870479704797048e-05, "loss": 8.984152227640153e-05, "step": 216030 }, { "epoch": 61.32273630428612, "grad_norm": 0.009067123755812645, "learning_rate": 3.870195855804712e-05, "loss": 0.00013987552374601365, "step": 216040 }, { "epoch": 61.32557479420948, "grad_norm": 0.06079697236418724, "learning_rate": 3.869912006812376e-05, "loss": 8.310154080390931e-05, "step": 216050 }, { "epoch": 61.32841328413284, "grad_norm": 0.006627917755395174, "learning_rate": 3.86962815782004e-05, "loss": 0.00010029207915067673, "step": 216060 }, { "epoch": 61.3312517740562, "grad_norm": 0.08907198160886765, "learning_rate": 3.869344308827704e-05, "loss": 5.804244428873062e-05, "step": 216070 }, { "epoch": 61.33409026397956, "grad_norm": 0.0016715530073270202, "learning_rate": 3.869060459835368e-05, "loss": 0.00011517349630594254, "step": 216080 }, { "epoch": 61.336928753902924, "grad_norm": 0.03991604596376419, "learning_rate": 3.868776610843032e-05, "loss": 5.795825272798538e-05, "step": 216090 }, { "epoch": 61.33976724382629, "grad_norm": 0.008741533383727074, "learning_rate": 3.8684927618506955e-05, "loss": 0.0002216516062617302, "step": 216100 }, { "epoch": 61.34260573374964, "grad_norm": 0.11387266218662262, "learning_rate": 3.8682089128583596e-05, "loss": 0.0002015652135014534, "step": 216110 }, { "epoch": 61.345444223673006, "grad_norm": 0.007310954853892326, "learning_rate": 3.867925063866024e-05, "loss": 0.0003678940236568451, "step": 216120 }, { "epoch": 61.34828271359637, "grad_norm": 0.019706515595316887, "learning_rate": 3.867641214873687e-05, "loss": 0.00022271368652582169, "step": 216130 }, { "epoch": 61.351121203519725, "grad_norm": 0.21749521791934967, "learning_rate": 3.8673573658813513e-05, "loss": 0.00011368114501237869, "step": 216140 }, { "epoch": 61.35395969344309, "grad_norm": 0.05986994504928589, "learning_rate": 3.8670735168890155e-05, "loss": 5.350615829229355e-05, "step": 216150 }, { "epoch": 61.35679818336645, "grad_norm": 2.7143287658691406, "learning_rate": 3.866789667896679e-05, "loss": 0.0003752099350094795, "step": 216160 }, { "epoch": 61.35963667328981, "grad_norm": 9.357100486755371, "learning_rate": 3.866505818904343e-05, "loss": 0.0014053776860237122, "step": 216170 }, { "epoch": 61.36247516321317, "grad_norm": 0.005196935497224331, "learning_rate": 3.866221969912007e-05, "loss": 5.1569566130638124e-05, "step": 216180 }, { "epoch": 61.36531365313653, "grad_norm": 0.007114756386727095, "learning_rate": 3.865938120919671e-05, "loss": 0.00020010508596897126, "step": 216190 }, { "epoch": 61.368152143059895, "grad_norm": 0.002489439444616437, "learning_rate": 3.865654271927335e-05, "loss": 0.0002791076898574829, "step": 216200 }, { "epoch": 61.37099063298325, "grad_norm": 0.011609521694481373, "learning_rate": 3.865370422934998e-05, "loss": 9.742043912410736e-05, "step": 216210 }, { "epoch": 61.37382912290661, "grad_norm": 0.24933357536792755, "learning_rate": 3.865086573942663e-05, "loss": 7.432550191879272e-05, "step": 216220 }, { "epoch": 61.376667612829976, "grad_norm": 0.07314355671405792, "learning_rate": 3.8648027249503265e-05, "loss": 0.0007182091474533081, "step": 216230 }, { "epoch": 61.37950610275333, "grad_norm": 0.15792220830917358, "learning_rate": 3.86451887595799e-05, "loss": 0.0001515587791800499, "step": 216240 }, { "epoch": 61.382344592676695, "grad_norm": 0.00661443779245019, "learning_rate": 3.864235026965655e-05, "loss": 9.813662618398666e-05, "step": 216250 }, { "epoch": 61.38518308260006, "grad_norm": 0.016192244365811348, "learning_rate": 3.863951177973318e-05, "loss": 0.00041447971016168595, "step": 216260 }, { "epoch": 61.38802157252342, "grad_norm": 0.013984451070427895, "learning_rate": 3.8636673289809824e-05, "loss": 0.0010323692113161087, "step": 216270 }, { "epoch": 61.39086006244678, "grad_norm": 0.054661788046360016, "learning_rate": 3.8633834799886466e-05, "loss": 8.345581591129303e-05, "step": 216280 }, { "epoch": 61.39369855237014, "grad_norm": 1.6432253122329712, "learning_rate": 3.86309963099631e-05, "loss": 0.001546165719628334, "step": 216290 }, { "epoch": 61.3965370422935, "grad_norm": 0.13290049135684967, "learning_rate": 3.862815782003974e-05, "loss": 0.0005863979458808899, "step": 216300 }, { "epoch": 61.39937553221686, "grad_norm": 1.9907543659210205, "learning_rate": 3.8625319330116376e-05, "loss": 0.0005216158926486969, "step": 216310 }, { "epoch": 61.40221402214022, "grad_norm": 0.16947034001350403, "learning_rate": 3.862248084019302e-05, "loss": 9.459238499403e-05, "step": 216320 }, { "epoch": 61.405052512063584, "grad_norm": 0.36441710591316223, "learning_rate": 3.861964235026966e-05, "loss": 0.0002508888021111488, "step": 216330 }, { "epoch": 61.40789100198694, "grad_norm": 0.004483838099986315, "learning_rate": 3.8616803860346294e-05, "loss": 0.0054378356784582135, "step": 216340 }, { "epoch": 61.4107294919103, "grad_norm": 0.10662450641393661, "learning_rate": 3.861396537042294e-05, "loss": 0.0036784373223781586, "step": 216350 }, { "epoch": 61.413567981833665, "grad_norm": 0.014347336255013943, "learning_rate": 3.8611126880499576e-05, "loss": 0.0003506505861878395, "step": 216360 }, { "epoch": 61.41640647175703, "grad_norm": 1.6831570863723755, "learning_rate": 3.860828839057621e-05, "loss": 0.0017252594232559204, "step": 216370 }, { "epoch": 61.419244961680384, "grad_norm": 0.1382904350757599, "learning_rate": 3.860544990065286e-05, "loss": 0.00017691981047391893, "step": 216380 }, { "epoch": 61.42208345160375, "grad_norm": 0.077756866812706, "learning_rate": 3.8602611410729494e-05, "loss": 0.0009810533374547958, "step": 216390 }, { "epoch": 61.42492194152711, "grad_norm": 17.90628433227539, "learning_rate": 3.8599772920806135e-05, "loss": 0.007521984726190567, "step": 216400 }, { "epoch": 61.427760431450466, "grad_norm": 3.5760035514831543, "learning_rate": 3.859693443088277e-05, "loss": 0.0005726527422666549, "step": 216410 }, { "epoch": 61.43059892137383, "grad_norm": 0.006516309920698404, "learning_rate": 3.859409594095941e-05, "loss": 0.0008845455944538116, "step": 216420 }, { "epoch": 61.43343741129719, "grad_norm": 0.3119204044342041, "learning_rate": 3.859125745103605e-05, "loss": 0.00034962166100740435, "step": 216430 }, { "epoch": 61.43627590122055, "grad_norm": 0.3338439464569092, "learning_rate": 3.858841896111269e-05, "loss": 0.0037611216306686402, "step": 216440 }, { "epoch": 61.43911439114391, "grad_norm": 0.2738892734050751, "learning_rate": 3.858558047118933e-05, "loss": 0.00011652316898107529, "step": 216450 }, { "epoch": 61.44195288106727, "grad_norm": 0.0014335154555737972, "learning_rate": 3.858274198126597e-05, "loss": 0.0005095148459076882, "step": 216460 }, { "epoch": 61.444791370990636, "grad_norm": 0.9189973473548889, "learning_rate": 3.8579903491342604e-05, "loss": 0.00046705305576324465, "step": 216470 }, { "epoch": 61.44762986091399, "grad_norm": 0.04942374676465988, "learning_rate": 3.857706500141925e-05, "loss": 0.0008369613438844681, "step": 216480 }, { "epoch": 61.450468350837355, "grad_norm": 0.0035699792206287384, "learning_rate": 3.857422651149589e-05, "loss": 0.012227299809455871, "step": 216490 }, { "epoch": 61.45330684076072, "grad_norm": 0.01932774856686592, "learning_rate": 3.857138802157252e-05, "loss": 0.00252157486975193, "step": 216500 }, { "epoch": 61.45330684076072, "eval_accuracy": 0.9821962230558912, "eval_loss": 0.07196193188428879, "eval_runtime": 38.4375, "eval_samples_per_second": 409.158, "eval_steps_per_second": 6.4, "step": 216500 }, { "epoch": 61.45614533068407, "grad_norm": 4.0966877937316895, "learning_rate": 3.856854953164916e-05, "loss": 0.002113128826022148, "step": 216510 }, { "epoch": 61.458983820607436, "grad_norm": 0.35722580552101135, "learning_rate": 3.8565711041725804e-05, "loss": 0.00023383330553770066, "step": 216520 }, { "epoch": 61.4618223105308, "grad_norm": 0.021800125017762184, "learning_rate": 3.8562872551802446e-05, "loss": 0.009238272905349731, "step": 216530 }, { "epoch": 61.464660800454155, "grad_norm": 0.16241948306560516, "learning_rate": 3.856003406187908e-05, "loss": 0.000839514471590519, "step": 216540 }, { "epoch": 61.46749929037752, "grad_norm": 0.12076346576213837, "learning_rate": 3.855719557195572e-05, "loss": 0.00013176649808883668, "step": 216550 }, { "epoch": 61.47033778030088, "grad_norm": 0.009669299237430096, "learning_rate": 3.855435708203236e-05, "loss": 0.0006351893767714501, "step": 216560 }, { "epoch": 61.47317627022424, "grad_norm": 0.09515609592199326, "learning_rate": 3.8551518592109e-05, "loss": 0.001214197650551796, "step": 216570 }, { "epoch": 61.4760147601476, "grad_norm": 0.04109816625714302, "learning_rate": 3.854868010218564e-05, "loss": 0.00017898809164762497, "step": 216580 }, { "epoch": 61.47885325007096, "grad_norm": 0.311598539352417, "learning_rate": 3.854584161226228e-05, "loss": 9.920243173837662e-05, "step": 216590 }, { "epoch": 61.481691739994325, "grad_norm": 0.0993102565407753, "learning_rate": 3.8543003122338915e-05, "loss": 0.00010494682937860489, "step": 216600 }, { "epoch": 61.48453022991768, "grad_norm": 0.011044755578041077, "learning_rate": 3.8540164632415556e-05, "loss": 0.0001517072319984436, "step": 216610 }, { "epoch": 61.487368719841044, "grad_norm": 0.0027446921449154615, "learning_rate": 3.85373261424922e-05, "loss": 0.0001566169783473015, "step": 216620 }, { "epoch": 61.49020720976441, "grad_norm": 0.026247834786772728, "learning_rate": 3.853448765256883e-05, "loss": 0.00013827253133058548, "step": 216630 }, { "epoch": 61.49304569968777, "grad_norm": 0.0019457952585071325, "learning_rate": 3.8531649162645474e-05, "loss": 0.0012311967089772224, "step": 216640 }, { "epoch": 61.495884189611125, "grad_norm": 0.010931369848549366, "learning_rate": 3.8528810672722115e-05, "loss": 3.814566880464554e-05, "step": 216650 }, { "epoch": 61.49872267953449, "grad_norm": 0.00522520812228322, "learning_rate": 3.852597218279875e-05, "loss": 0.00027260109782218934, "step": 216660 }, { "epoch": 61.50156116945785, "grad_norm": 0.003976826090365648, "learning_rate": 3.852313369287539e-05, "loss": 0.00023256111890077591, "step": 216670 }, { "epoch": 61.50439965938121, "grad_norm": 0.0035125664435327053, "learning_rate": 3.852029520295203e-05, "loss": 0.00018306709825992583, "step": 216680 }, { "epoch": 61.50723814930457, "grad_norm": 0.009959547780454159, "learning_rate": 3.8517456713028674e-05, "loss": 9.889528155326843e-05, "step": 216690 }, { "epoch": 61.51007663922793, "grad_norm": 0.06447896361351013, "learning_rate": 3.851461822310531e-05, "loss": 0.00020834356546401979, "step": 216700 }, { "epoch": 61.51291512915129, "grad_norm": 0.0006422264850698411, "learning_rate": 3.851177973318194e-05, "loss": 0.0003067467361688614, "step": 216710 }, { "epoch": 61.51575361907465, "grad_norm": 0.014102987013757229, "learning_rate": 3.850894124325859e-05, "loss": 3.587640821933746e-05, "step": 216720 }, { "epoch": 61.518592108998014, "grad_norm": 0.05031342804431915, "learning_rate": 3.8506102753335226e-05, "loss": 0.00023507289588451386, "step": 216730 }, { "epoch": 61.52143059892138, "grad_norm": 0.00488974479958415, "learning_rate": 3.850326426341187e-05, "loss": 0.00011833049356937408, "step": 216740 }, { "epoch": 61.52426908884473, "grad_norm": 2.5416300296783447, "learning_rate": 3.850042577348851e-05, "loss": 0.0006750499829649925, "step": 216750 }, { "epoch": 61.527107578768096, "grad_norm": 0.0023865634575486183, "learning_rate": 3.849758728356514e-05, "loss": 4.795920103788376e-05, "step": 216760 }, { "epoch": 61.52994606869146, "grad_norm": 0.09156517684459686, "learning_rate": 3.8494748793641785e-05, "loss": 0.0004896875470876694, "step": 216770 }, { "epoch": 61.532784558614814, "grad_norm": 0.0038115738425403833, "learning_rate": 3.8491910303718426e-05, "loss": 0.0001560451462864876, "step": 216780 }, { "epoch": 61.53562304853818, "grad_norm": 0.014277840964496136, "learning_rate": 3.848907181379506e-05, "loss": 0.008716406673192978, "step": 216790 }, { "epoch": 61.53846153846154, "grad_norm": 0.33890971541404724, "learning_rate": 3.84862333238717e-05, "loss": 0.00021644067019224166, "step": 216800 }, { "epoch": 61.541300028384896, "grad_norm": 0.004867386072874069, "learning_rate": 3.8483394833948336e-05, "loss": 0.001603522151708603, "step": 216810 }, { "epoch": 61.54413851830826, "grad_norm": 5.124657154083252, "learning_rate": 3.8480556344024985e-05, "loss": 0.0010440368205308914, "step": 216820 }, { "epoch": 61.54697700823162, "grad_norm": 0.0037628444842994213, "learning_rate": 3.847771785410162e-05, "loss": 0.00012911092489957808, "step": 216830 }, { "epoch": 61.549815498154985, "grad_norm": 0.00656378036364913, "learning_rate": 3.8474879364178254e-05, "loss": 0.00023864172399044037, "step": 216840 }, { "epoch": 61.55265398807834, "grad_norm": 0.002943760249763727, "learning_rate": 3.84720408742549e-05, "loss": 6.984677165746688e-05, "step": 216850 }, { "epoch": 61.5554924780017, "grad_norm": 0.02780756540596485, "learning_rate": 3.8469202384331537e-05, "loss": 4.697293043136597e-05, "step": 216860 }, { "epoch": 61.558330967925066, "grad_norm": 0.009549479000270367, "learning_rate": 3.846636389440818e-05, "loss": 0.00014550182968378066, "step": 216870 }, { "epoch": 61.56116945784842, "grad_norm": 0.0038907816633582115, "learning_rate": 3.846352540448482e-05, "loss": 0.00010539032518863678, "step": 216880 }, { "epoch": 61.564007947771785, "grad_norm": 0.805609405040741, "learning_rate": 3.8460686914561454e-05, "loss": 0.00015323646366596221, "step": 216890 }, { "epoch": 61.56684643769515, "grad_norm": 0.0011812869925051928, "learning_rate": 3.8457848424638095e-05, "loss": 5.3542666137218475e-05, "step": 216900 }, { "epoch": 61.56968492761851, "grad_norm": 0.0018110476667061448, "learning_rate": 3.845500993471474e-05, "loss": 0.0003577960655093193, "step": 216910 }, { "epoch": 61.572523417541866, "grad_norm": 0.0034663493279367685, "learning_rate": 3.845217144479137e-05, "loss": 0.00015416499227285385, "step": 216920 }, { "epoch": 61.57536190746523, "grad_norm": 0.8691255450248718, "learning_rate": 3.844933295486801e-05, "loss": 0.002823929116129875, "step": 216930 }, { "epoch": 61.57820039738859, "grad_norm": 0.0026619904674589634, "learning_rate": 3.844649446494465e-05, "loss": 0.001968517526984215, "step": 216940 }, { "epoch": 61.58103888731195, "grad_norm": 0.01317682210355997, "learning_rate": 3.8443655975021295e-05, "loss": 4.1662342846393584e-05, "step": 216950 }, { "epoch": 61.58387737723531, "grad_norm": 0.049866437911987305, "learning_rate": 3.844081748509793e-05, "loss": 0.0018288526684045792, "step": 216960 }, { "epoch": 61.586715867158674, "grad_norm": 0.005650550592690706, "learning_rate": 3.8437978995174565e-05, "loss": 0.004747347161173821, "step": 216970 }, { "epoch": 61.58955435708203, "grad_norm": 0.19943250715732574, "learning_rate": 3.843514050525121e-05, "loss": 8.829608559608459e-05, "step": 216980 }, { "epoch": 61.59239284700539, "grad_norm": 0.0023092529736459255, "learning_rate": 3.843230201532785e-05, "loss": 0.0006953457370400429, "step": 216990 }, { "epoch": 61.595231336928755, "grad_norm": 0.015556196682155132, "learning_rate": 3.842946352540449e-05, "loss": 0.0002447430044412613, "step": 217000 }, { "epoch": 61.595231336928755, "eval_accuracy": 0.9847396197621924, "eval_loss": 0.06073416769504547, "eval_runtime": 37.4003, "eval_samples_per_second": 420.504, "eval_steps_per_second": 6.577, "step": 217000 }, { "epoch": 61.59806982685212, "grad_norm": 0.22712159156799316, "learning_rate": 3.842662503548113e-05, "loss": 0.00011667031794786454, "step": 217010 }, { "epoch": 61.600908316775474, "grad_norm": 0.013304251246154308, "learning_rate": 3.8423786545557765e-05, "loss": 0.00030243247747421267, "step": 217020 }, { "epoch": 61.60374680669884, "grad_norm": 0.6007874011993408, "learning_rate": 3.8420948055634406e-05, "loss": 0.00035675223916769027, "step": 217030 }, { "epoch": 61.6065852966222, "grad_norm": 0.055842556059360504, "learning_rate": 3.841810956571104e-05, "loss": 0.0002794753760099411, "step": 217040 }, { "epoch": 61.609423786545555, "grad_norm": 0.012767369858920574, "learning_rate": 3.841527107578768e-05, "loss": 0.0006224311888217926, "step": 217050 }, { "epoch": 61.61226227646892, "grad_norm": 0.007395104039460421, "learning_rate": 3.8412432585864323e-05, "loss": 0.0005726590752601624, "step": 217060 }, { "epoch": 61.61510076639228, "grad_norm": 3.3215513229370117, "learning_rate": 3.840959409594096e-05, "loss": 0.0010524816811084748, "step": 217070 }, { "epoch": 61.61793925631564, "grad_norm": 0.07460888475179672, "learning_rate": 3.84067556060176e-05, "loss": 0.0075073324143886564, "step": 217080 }, { "epoch": 61.620777746239, "grad_norm": 0.06144621595740318, "learning_rate": 3.840391711609424e-05, "loss": 0.0015381213277578353, "step": 217090 }, { "epoch": 61.62361623616236, "grad_norm": 0.05689697340130806, "learning_rate": 3.8401078626170875e-05, "loss": 0.0056976847350597385, "step": 217100 }, { "epoch": 61.626454726085726, "grad_norm": 0.0734400525689125, "learning_rate": 3.8398240136247524e-05, "loss": 0.0007412809878587723, "step": 217110 }, { "epoch": 61.62929321600908, "grad_norm": 0.05120044946670532, "learning_rate": 3.839540164632416e-05, "loss": 0.00011450108140707017, "step": 217120 }, { "epoch": 61.632131705932444, "grad_norm": 0.1672370284795761, "learning_rate": 3.839256315640079e-05, "loss": 0.0004768708720803261, "step": 217130 }, { "epoch": 61.63497019585581, "grad_norm": 0.006471666973084211, "learning_rate": 3.8389724666477434e-05, "loss": 0.00016456209123134614, "step": 217140 }, { "epoch": 61.63780868577916, "grad_norm": 0.07443400472402573, "learning_rate": 3.8386886176554075e-05, "loss": 0.0006970198825001717, "step": 217150 }, { "epoch": 61.640647175702526, "grad_norm": 2.0898752212524414, "learning_rate": 3.838404768663072e-05, "loss": 0.0009480662643909455, "step": 217160 }, { "epoch": 61.64348566562589, "grad_norm": 0.009258530102670193, "learning_rate": 3.838120919670735e-05, "loss": 0.0029718900099396706, "step": 217170 }, { "epoch": 61.646324155549244, "grad_norm": 7.922914505004883, "learning_rate": 3.837837070678399e-05, "loss": 0.0015622209757566452, "step": 217180 }, { "epoch": 61.64916264547261, "grad_norm": 0.023038843646645546, "learning_rate": 3.8375816065852966e-05, "loss": 0.009499430656433105, "step": 217190 }, { "epoch": 61.65200113539597, "grad_norm": 0.030422227457165718, "learning_rate": 3.837297757592961e-05, "loss": 0.0005658021196722985, "step": 217200 }, { "epoch": 61.65483962531933, "grad_norm": 0.007845188491046429, "learning_rate": 3.837013908600625e-05, "loss": 0.0012282911688089372, "step": 217210 }, { "epoch": 61.65767811524269, "grad_norm": 0.019343774765729904, "learning_rate": 3.836730059608288e-05, "loss": 0.00023734644055366515, "step": 217220 }, { "epoch": 61.66051660516605, "grad_norm": 0.007765268441289663, "learning_rate": 3.8364462106159525e-05, "loss": 0.0005443140864372254, "step": 217230 }, { "epoch": 61.663355095089415, "grad_norm": 0.10278048366308212, "learning_rate": 3.8361623616236166e-05, "loss": 0.0030455656349658968, "step": 217240 }, { "epoch": 61.66619358501277, "grad_norm": 0.25590917468070984, "learning_rate": 3.83587851263128e-05, "loss": 0.004613111913204193, "step": 217250 }, { "epoch": 61.66903207493613, "grad_norm": 0.10031487047672272, "learning_rate": 3.835594663638944e-05, "loss": 0.0021864302456378938, "step": 217260 }, { "epoch": 61.671870564859496, "grad_norm": 0.014592045918107033, "learning_rate": 3.8353108146466083e-05, "loss": 6.813444197177887e-05, "step": 217270 }, { "epoch": 61.67470905478285, "grad_norm": 0.025220714509487152, "learning_rate": 3.835026965654272e-05, "loss": 0.00019437074661254883, "step": 217280 }, { "epoch": 61.677547544706215, "grad_norm": 1.1378206014633179, "learning_rate": 3.8347431166619366e-05, "loss": 0.00029332824051380156, "step": 217290 }, { "epoch": 61.68038603462958, "grad_norm": 0.15194350481033325, "learning_rate": 3.8344592676696e-05, "loss": 0.00027339383959770205, "step": 217300 }, { "epoch": 61.68322452455294, "grad_norm": 0.07387512177228928, "learning_rate": 3.8341754186772635e-05, "loss": 0.008821706473827361, "step": 217310 }, { "epoch": 61.6860630144763, "grad_norm": 0.0015754427295178175, "learning_rate": 3.833891569684928e-05, "loss": 6.193052977323532e-05, "step": 217320 }, { "epoch": 61.68890150439966, "grad_norm": 0.0018308957805857062, "learning_rate": 3.833607720692592e-05, "loss": 0.00022224802523851395, "step": 217330 }, { "epoch": 61.69173999432302, "grad_norm": 0.04222436249256134, "learning_rate": 3.833323871700256e-05, "loss": 0.002479264885187149, "step": 217340 }, { "epoch": 61.69457848424638, "grad_norm": 0.010038880631327629, "learning_rate": 3.8330400227079194e-05, "loss": 0.00011707507073879241, "step": 217350 }, { "epoch": 61.69741697416974, "grad_norm": 0.06490863859653473, "learning_rate": 3.8327561737155835e-05, "loss": 0.0012571511790156365, "step": 217360 }, { "epoch": 61.700255464093104, "grad_norm": 0.12479955703020096, "learning_rate": 3.832472324723248e-05, "loss": 0.0003649415448307991, "step": 217370 }, { "epoch": 61.70309395401647, "grad_norm": 0.14667409658432007, "learning_rate": 3.832188475730911e-05, "loss": 0.009362080693244934, "step": 217380 }, { "epoch": 61.70593244393982, "grad_norm": 7.123400688171387, "learning_rate": 3.831904626738575e-05, "loss": 0.0022306239232420923, "step": 217390 }, { "epoch": 61.708770933863185, "grad_norm": 0.032383378595113754, "learning_rate": 3.8316207777462394e-05, "loss": 0.00023771356791257858, "step": 217400 }, { "epoch": 61.71160942378655, "grad_norm": 0.1365320086479187, "learning_rate": 3.831336928753903e-05, "loss": 7.75778666138649e-05, "step": 217410 }, { "epoch": 61.714447913709904, "grad_norm": 0.051904596388339996, "learning_rate": 3.831053079761567e-05, "loss": 0.0012402260676026345, "step": 217420 }, { "epoch": 61.71728640363327, "grad_norm": 0.0705917552113533, "learning_rate": 3.830769230769231e-05, "loss": 0.006250421702861786, "step": 217430 }, { "epoch": 61.72012489355663, "grad_norm": 16.163270950317383, "learning_rate": 3.8304853817768946e-05, "loss": 0.012521111965179443, "step": 217440 }, { "epoch": 61.722963383479986, "grad_norm": 0.060167692601680756, "learning_rate": 3.830201532784559e-05, "loss": 0.005848586559295654, "step": 217450 }, { "epoch": 61.72580187340335, "grad_norm": 0.009045969694852829, "learning_rate": 3.829917683792223e-05, "loss": 0.00025971736758947375, "step": 217460 }, { "epoch": 61.72864036332671, "grad_norm": 0.01690785214304924, "learning_rate": 3.829633834799887e-05, "loss": 0.0002386096864938736, "step": 217470 }, { "epoch": 61.731478853250074, "grad_norm": 0.001782320556230843, "learning_rate": 3.8293499858075505e-05, "loss": 0.004406949132680893, "step": 217480 }, { "epoch": 61.73431734317343, "grad_norm": 0.0265523511916399, "learning_rate": 3.8290661368152146e-05, "loss": 0.005687960237264633, "step": 217490 }, { "epoch": 61.73715583309679, "grad_norm": 0.27054065465927124, "learning_rate": 3.828782287822879e-05, "loss": 0.0001758582890033722, "step": 217500 }, { "epoch": 61.73715583309679, "eval_accuracy": 0.9799707509378776, "eval_loss": 0.08919933438301086, "eval_runtime": 48.466, "eval_samples_per_second": 324.495, "eval_steps_per_second": 5.076, "step": 217500 }, { "epoch": 61.739994323020156, "grad_norm": 0.9585981965065002, "learning_rate": 3.828498438830542e-05, "loss": 0.007846495509147644, "step": 217510 }, { "epoch": 61.74283281294351, "grad_norm": 18.88501739501953, "learning_rate": 3.8282145898382064e-05, "loss": 0.004376495629549027, "step": 217520 }, { "epoch": 61.745671302866874, "grad_norm": 1.3951138257980347, "learning_rate": 3.8279307408458705e-05, "loss": 0.000617181695997715, "step": 217530 }, { "epoch": 61.74850979279024, "grad_norm": 0.03698063641786575, "learning_rate": 3.827646891853534e-05, "loss": 0.0065174058079719545, "step": 217540 }, { "epoch": 61.75134828271359, "grad_norm": 0.027892695739865303, "learning_rate": 3.827363042861198e-05, "loss": 0.008535146713256836, "step": 217550 }, { "epoch": 61.754186772636956, "grad_norm": 0.02134503424167633, "learning_rate": 3.827079193868862e-05, "loss": 0.00013269856572151184, "step": 217560 }, { "epoch": 61.75702526256032, "grad_norm": 0.009159796871244907, "learning_rate": 3.826795344876526e-05, "loss": 0.00041084345430135726, "step": 217570 }, { "epoch": 61.75986375248368, "grad_norm": 0.05304114893078804, "learning_rate": 3.82651149588419e-05, "loss": 0.00043141841888427734, "step": 217580 }, { "epoch": 61.76270224240704, "grad_norm": 0.12601496279239655, "learning_rate": 3.826227646891854e-05, "loss": 0.0020149525254964827, "step": 217590 }, { "epoch": 61.7655407323304, "grad_norm": 0.016741350293159485, "learning_rate": 3.8259437978995174e-05, "loss": 0.0009188147261738778, "step": 217600 }, { "epoch": 61.76837922225376, "grad_norm": 1.5225194692611694, "learning_rate": 3.8256599489071816e-05, "loss": 0.0006115846335887909, "step": 217610 }, { "epoch": 61.77121771217712, "grad_norm": 2.0134544372558594, "learning_rate": 3.825376099914845e-05, "loss": 0.0006983054801821709, "step": 217620 }, { "epoch": 61.77405620210048, "grad_norm": 2.403027057647705, "learning_rate": 3.82509225092251e-05, "loss": 0.0008329750970005989, "step": 217630 }, { "epoch": 61.776894692023845, "grad_norm": 0.008553680032491684, "learning_rate": 3.824808401930173e-05, "loss": 0.0002314595505595207, "step": 217640 }, { "epoch": 61.7797331819472, "grad_norm": 0.1720002442598343, "learning_rate": 3.824524552937837e-05, "loss": 0.0031741507351398467, "step": 217650 }, { "epoch": 61.78257167187056, "grad_norm": 3.659210205078125, "learning_rate": 3.8242407039455016e-05, "loss": 0.000997081957757473, "step": 217660 }, { "epoch": 61.785410161793926, "grad_norm": 0.006156387738883495, "learning_rate": 3.823956854953165e-05, "loss": 0.0005609579384326935, "step": 217670 }, { "epoch": 61.78824865171729, "grad_norm": 0.3010123670101166, "learning_rate": 3.823673005960829e-05, "loss": 0.004343262314796448, "step": 217680 }, { "epoch": 61.791087141640645, "grad_norm": 2.500779628753662, "learning_rate": 3.823389156968493e-05, "loss": 0.0007551418617367745, "step": 217690 }, { "epoch": 61.79392563156401, "grad_norm": 1.1330931186676025, "learning_rate": 3.823105307976157e-05, "loss": 0.000714520737528801, "step": 217700 }, { "epoch": 61.79676412148737, "grad_norm": 0.07324929535388947, "learning_rate": 3.822821458983821e-05, "loss": 0.00019750911742448806, "step": 217710 }, { "epoch": 61.79960261141073, "grad_norm": 0.06444495916366577, "learning_rate": 3.8225376099914844e-05, "loss": 6.028320640325546e-05, "step": 217720 }, { "epoch": 61.80244110133409, "grad_norm": 0.06525526940822601, "learning_rate": 3.8222537609991485e-05, "loss": 0.016061460971832274, "step": 217730 }, { "epoch": 61.80527959125745, "grad_norm": 0.10675933957099915, "learning_rate": 3.8219699120068126e-05, "loss": 0.0026799535378813745, "step": 217740 }, { "epoch": 61.808118081180815, "grad_norm": 0.018597720190882683, "learning_rate": 3.821686063014476e-05, "loss": 7.21469521522522e-05, "step": 217750 }, { "epoch": 61.81095657110417, "grad_norm": 0.03645294904708862, "learning_rate": 3.821402214022141e-05, "loss": 0.00018490161746740342, "step": 217760 }, { "epoch": 61.813795061027534, "grad_norm": 0.0185192059725523, "learning_rate": 3.8211183650298044e-05, "loss": 0.0027557995170354845, "step": 217770 }, { "epoch": 61.8166335509509, "grad_norm": 0.09961811453104019, "learning_rate": 3.820834516037468e-05, "loss": 0.00037402864545583726, "step": 217780 }, { "epoch": 61.81947204087425, "grad_norm": 7.509219169616699, "learning_rate": 3.8205506670451326e-05, "loss": 0.003178158402442932, "step": 217790 }, { "epoch": 61.822310530797616, "grad_norm": 0.039689067751169205, "learning_rate": 3.820266818052796e-05, "loss": 0.000721186213195324, "step": 217800 }, { "epoch": 61.82514902072098, "grad_norm": 0.21839530766010284, "learning_rate": 3.81998296906046e-05, "loss": 0.00029327850788831713, "step": 217810 }, { "epoch": 61.827987510644334, "grad_norm": 0.0042008631862699986, "learning_rate": 3.819699120068124e-05, "loss": 0.00015079490840435027, "step": 217820 }, { "epoch": 61.8308260005677, "grad_norm": 0.02262157015502453, "learning_rate": 3.819415271075788e-05, "loss": 0.0003459306433796883, "step": 217830 }, { "epoch": 61.83366449049106, "grad_norm": 1.1369282007217407, "learning_rate": 3.819131422083452e-05, "loss": 0.0016152765601873398, "step": 217840 }, { "epoch": 61.83650298041442, "grad_norm": 0.028387637808918953, "learning_rate": 3.8188475730911154e-05, "loss": 0.00018646102398633957, "step": 217850 }, { "epoch": 61.83934147033778, "grad_norm": 0.023615295067429543, "learning_rate": 3.8185637240987796e-05, "loss": 0.0002864968031644821, "step": 217860 }, { "epoch": 61.84217996026114, "grad_norm": 0.00417092302814126, "learning_rate": 3.818279875106444e-05, "loss": 9.718965739011765e-05, "step": 217870 }, { "epoch": 61.845018450184504, "grad_norm": 0.01815824583172798, "learning_rate": 3.817996026114107e-05, "loss": 6.0336291790008544e-05, "step": 217880 }, { "epoch": 61.84785694010786, "grad_norm": 0.008838207460939884, "learning_rate": 3.817712177121771e-05, "loss": 5.667451769113541e-05, "step": 217890 }, { "epoch": 61.85069543003122, "grad_norm": 0.009145930409431458, "learning_rate": 3.8174283281294354e-05, "loss": 0.0003045434132218361, "step": 217900 }, { "epoch": 61.853533919954586, "grad_norm": 0.03988523781299591, "learning_rate": 3.817144479137099e-05, "loss": 0.00014876183122396468, "step": 217910 }, { "epoch": 61.85637240987794, "grad_norm": 0.011049241758883, "learning_rate": 3.816860630144763e-05, "loss": 0.0003600446507334709, "step": 217920 }, { "epoch": 61.859210899801305, "grad_norm": 0.06162289157509804, "learning_rate": 3.816576781152427e-05, "loss": 0.00010682605206966401, "step": 217930 }, { "epoch": 61.86204938972467, "grad_norm": 0.009206174872815609, "learning_rate": 3.816292932160091e-05, "loss": 0.00016649365425109864, "step": 217940 }, { "epoch": 61.86488787964803, "grad_norm": 2.055002212524414, "learning_rate": 3.816009083167755e-05, "loss": 0.0003541434183716774, "step": 217950 }, { "epoch": 61.867726369571386, "grad_norm": 0.5272959470748901, "learning_rate": 3.815725234175419e-05, "loss": 0.00023730583488941194, "step": 217960 }, { "epoch": 61.87056485949475, "grad_norm": 4.819873332977295, "learning_rate": 3.815441385183083e-05, "loss": 0.0038386017084121706, "step": 217970 }, { "epoch": 61.87340334941811, "grad_norm": 0.1760251224040985, "learning_rate": 3.8151575361907465e-05, "loss": 0.0017680739983916283, "step": 217980 }, { "epoch": 61.87624183934147, "grad_norm": 0.10288600623607635, "learning_rate": 3.8148736871984107e-05, "loss": 0.00774703249335289, "step": 217990 }, { "epoch": 61.87908032926483, "grad_norm": 0.01798424683511257, "learning_rate": 3.814589838206075e-05, "loss": 0.017142054438591004, "step": 218000 }, { "epoch": 61.87908032926483, "eval_accuracy": 0.9827684873148089, "eval_loss": 0.0675228163599968, "eval_runtime": 44.098, "eval_samples_per_second": 356.637, "eval_steps_per_second": 5.578, "step": 218000 }, { "epoch": 61.88191881918819, "grad_norm": 0.033173687756061554, "learning_rate": 3.814305989213738e-05, "loss": 0.00026382338255643846, "step": 218010 }, { "epoch": 61.88475730911155, "grad_norm": 1.4475584030151367, "learning_rate": 3.8140221402214024e-05, "loss": 0.0006295954808592796, "step": 218020 }, { "epoch": 61.88759579903491, "grad_norm": 0.015735015273094177, "learning_rate": 3.8137382912290665e-05, "loss": 0.0004129866138100624, "step": 218030 }, { "epoch": 61.890434288958275, "grad_norm": 0.011533143930137157, "learning_rate": 3.81345444223673e-05, "loss": 0.00037387125194072724, "step": 218040 }, { "epoch": 61.89327277888164, "grad_norm": 0.006589195691049099, "learning_rate": 3.813170593244394e-05, "loss": 0.0010079853236675262, "step": 218050 }, { "epoch": 61.896111268804994, "grad_norm": 0.022032732143998146, "learning_rate": 3.812886744252058e-05, "loss": 0.0004018433392047882, "step": 218060 }, { "epoch": 61.89894975872836, "grad_norm": 0.15415059030056, "learning_rate": 3.812602895259722e-05, "loss": 0.00025506243109703065, "step": 218070 }, { "epoch": 61.90178824865172, "grad_norm": 0.15831886231899261, "learning_rate": 3.812319046267386e-05, "loss": 0.002264722436666489, "step": 218080 }, { "epoch": 61.904626738575075, "grad_norm": 0.39260977506637573, "learning_rate": 3.81203519727505e-05, "loss": 0.0098299540579319, "step": 218090 }, { "epoch": 61.90746522849844, "grad_norm": 0.27855798602104187, "learning_rate": 3.811751348282714e-05, "loss": 0.0015761982649564743, "step": 218100 }, { "epoch": 61.9103037184218, "grad_norm": 0.13510414958000183, "learning_rate": 3.8114674992903776e-05, "loss": 0.0008968755602836609, "step": 218110 }, { "epoch": 61.913142208345164, "grad_norm": 0.01346402708441019, "learning_rate": 3.811183650298041e-05, "loss": 0.0006093624979257584, "step": 218120 }, { "epoch": 61.91598069826852, "grad_norm": 0.57992023229599, "learning_rate": 3.810899801305706e-05, "loss": 0.0007610524073243142, "step": 218130 }, { "epoch": 61.91881918819188, "grad_norm": 0.01405173260718584, "learning_rate": 3.810615952313369e-05, "loss": 0.004457943141460419, "step": 218140 }, { "epoch": 61.921657678115245, "grad_norm": 1.2546448707580566, "learning_rate": 3.8103321033210335e-05, "loss": 0.0037544868886470796, "step": 218150 }, { "epoch": 61.9244961680386, "grad_norm": 0.08996561169624329, "learning_rate": 3.8100482543286976e-05, "loss": 0.0003906184807419777, "step": 218160 }, { "epoch": 61.927334657961964, "grad_norm": 0.13120394945144653, "learning_rate": 3.809764405336361e-05, "loss": 0.00021133739501237868, "step": 218170 }, { "epoch": 61.93017314788533, "grad_norm": 0.3011367917060852, "learning_rate": 3.809480556344025e-05, "loss": 0.007305073738098145, "step": 218180 }, { "epoch": 61.93301163780868, "grad_norm": 0.016537899151444435, "learning_rate": 3.809196707351689e-05, "loss": 0.0033513307571411133, "step": 218190 }, { "epoch": 61.935850127732046, "grad_norm": 2.74672532081604, "learning_rate": 3.808912858359353e-05, "loss": 0.004639416560530662, "step": 218200 }, { "epoch": 61.93868861765541, "grad_norm": 0.21851499378681183, "learning_rate": 3.808629009367017e-05, "loss": 0.003064493089914322, "step": 218210 }, { "epoch": 61.94152710757877, "grad_norm": 0.011068925261497498, "learning_rate": 3.8083451603746804e-05, "loss": 0.0008168801665306092, "step": 218220 }, { "epoch": 61.94436559750213, "grad_norm": 0.004341332707554102, "learning_rate": 3.808061311382345e-05, "loss": 0.0025608714669942854, "step": 218230 }, { "epoch": 61.94720408742549, "grad_norm": 0.1148710697889328, "learning_rate": 3.807777462390009e-05, "loss": 0.0031282901763916016, "step": 218240 }, { "epoch": 61.95004257734885, "grad_norm": 0.030896129086613655, "learning_rate": 3.807493613397672e-05, "loss": 0.0005435623228549957, "step": 218250 }, { "epoch": 61.95288106727221, "grad_norm": 0.012812220491468906, "learning_rate": 3.807209764405337e-05, "loss": 0.004351932555437088, "step": 218260 }, { "epoch": 61.95571955719557, "grad_norm": 0.00761455437168479, "learning_rate": 3.8069259154130004e-05, "loss": 0.00012173280119895936, "step": 218270 }, { "epoch": 61.958558047118935, "grad_norm": 0.01971456967294216, "learning_rate": 3.8066420664206645e-05, "loss": 0.00023450125008821486, "step": 218280 }, { "epoch": 61.96139653704229, "grad_norm": 0.0364803820848465, "learning_rate": 3.806358217428329e-05, "loss": 0.003240833431482315, "step": 218290 }, { "epoch": 61.96423502696565, "grad_norm": 0.006698771379888058, "learning_rate": 3.806074368435992e-05, "loss": 0.00103821512311697, "step": 218300 }, { "epoch": 61.967073516889016, "grad_norm": 0.022623693570494652, "learning_rate": 3.805790519443656e-05, "loss": 0.0002483054995536804, "step": 218310 }, { "epoch": 61.96991200681238, "grad_norm": 0.018298352137207985, "learning_rate": 3.80550667045132e-05, "loss": 6.766617298126221e-05, "step": 218320 }, { "epoch": 61.972750496735735, "grad_norm": 0.0030988031066954136, "learning_rate": 3.805222821458984e-05, "loss": 0.00035325996577739716, "step": 218330 }, { "epoch": 61.9755889866591, "grad_norm": 1.3408488035202026, "learning_rate": 3.804938972466648e-05, "loss": 0.0005421100184321403, "step": 218340 }, { "epoch": 61.97842747658246, "grad_norm": 0.22806783020496368, "learning_rate": 3.8046551234743115e-05, "loss": 0.00184494499117136, "step": 218350 }, { "epoch": 61.981265966505816, "grad_norm": 0.057422369718551636, "learning_rate": 3.8043712744819756e-05, "loss": 0.001098409667611122, "step": 218360 }, { "epoch": 61.98410445642918, "grad_norm": 0.010396579280495644, "learning_rate": 3.80408742548964e-05, "loss": 0.0006090978160500527, "step": 218370 }, { "epoch": 61.98694294635254, "grad_norm": 0.007680213078856468, "learning_rate": 3.803803576497303e-05, "loss": 7.275417447090149e-05, "step": 218380 }, { "epoch": 61.9897814362759, "grad_norm": 0.048631586134433746, "learning_rate": 3.803519727504968e-05, "loss": 0.00012879595160484315, "step": 218390 }, { "epoch": 61.99261992619926, "grad_norm": 0.02325817383825779, "learning_rate": 3.8032358785126315e-05, "loss": 0.0010747939348220825, "step": 218400 }, { "epoch": 61.995458416122624, "grad_norm": 0.10332957655191422, "learning_rate": 3.802952029520295e-05, "loss": 0.012292644381523133, "step": 218410 }, { "epoch": 61.99829690604599, "grad_norm": 0.0033600302413105965, "learning_rate": 3.802668180527959e-05, "loss": 0.0009928546845912934, "step": 218420 }, { "epoch": 62.00113539596934, "grad_norm": 0.12465888261795044, "learning_rate": 3.802384331535623e-05, "loss": 0.0003943183459341526, "step": 218430 }, { "epoch": 62.003973885892705, "grad_norm": 0.008044046349823475, "learning_rate": 3.8021004825432874e-05, "loss": 0.00016580913215875627, "step": 218440 }, { "epoch": 62.00681237581607, "grad_norm": 0.020489802584052086, "learning_rate": 3.801816633550951e-05, "loss": 0.0001262987032532692, "step": 218450 }, { "epoch": 62.009650865739424, "grad_norm": 0.004988184664398432, "learning_rate": 3.801532784558615e-05, "loss": 0.0023138348013162615, "step": 218460 }, { "epoch": 62.01248935566279, "grad_norm": 0.012867710553109646, "learning_rate": 3.801248935566279e-05, "loss": 9.307153522968293e-05, "step": 218470 }, { "epoch": 62.01532784558615, "grad_norm": 0.009743023663759232, "learning_rate": 3.8009650865739425e-05, "loss": 0.0003830261528491974, "step": 218480 }, { "epoch": 62.018166335509505, "grad_norm": 0.00596951087936759, "learning_rate": 3.800681237581607e-05, "loss": 0.0019066313281655312, "step": 218490 }, { "epoch": 62.02100482543287, "grad_norm": 0.003472641110420227, "learning_rate": 3.800397388589271e-05, "loss": 0.00010144319385290146, "step": 218500 }, { "epoch": 62.02100482543287, "eval_accuracy": 0.9835950912443568, "eval_loss": 0.06743203848600388, "eval_runtime": 44.98, "eval_samples_per_second": 349.645, "eval_steps_per_second": 5.469, "step": 218500 }, { "epoch": 62.02384331535623, "grad_norm": 0.003336214693263173, "learning_rate": 3.800113539596934e-05, "loss": 0.0008506203070282936, "step": 218510 }, { "epoch": 62.026681805279594, "grad_norm": 0.2644653916358948, "learning_rate": 3.799829690604599e-05, "loss": 0.00040294099599123, "step": 218520 }, { "epoch": 62.02952029520295, "grad_norm": 0.055923935025930405, "learning_rate": 3.7995458416122626e-05, "loss": 0.0002960167825222015, "step": 218530 }, { "epoch": 62.03235878512631, "grad_norm": 0.011558032594621181, "learning_rate": 3.799261992619926e-05, "loss": 0.0002859119325876236, "step": 218540 }, { "epoch": 62.035197275049676, "grad_norm": 0.020116383209824562, "learning_rate": 3.79897814362759e-05, "loss": 0.00018100477755069732, "step": 218550 }, { "epoch": 62.03803576497303, "grad_norm": 0.07896673679351807, "learning_rate": 3.798694294635254e-05, "loss": 0.0003201007843017578, "step": 218560 }, { "epoch": 62.040874254896394, "grad_norm": 0.013229201547801495, "learning_rate": 3.7984104456429184e-05, "loss": 0.0003532333299517632, "step": 218570 }, { "epoch": 62.04371274481976, "grad_norm": 0.007732975762337446, "learning_rate": 3.798126596650582e-05, "loss": 0.0022768953815102575, "step": 218580 }, { "epoch": 62.04655123474312, "grad_norm": 1.0687175989151, "learning_rate": 3.797842747658246e-05, "loss": 0.0002965172752737999, "step": 218590 }, { "epoch": 62.049389724666476, "grad_norm": 0.004664428066462278, "learning_rate": 3.79755889866591e-05, "loss": 0.00014066006988286973, "step": 218600 }, { "epoch": 62.05222821458984, "grad_norm": 0.005672204773873091, "learning_rate": 3.7972750496735736e-05, "loss": 0.00018424727022647858, "step": 218610 }, { "epoch": 62.0550667045132, "grad_norm": 0.014868221245706081, "learning_rate": 3.796991200681238e-05, "loss": 0.0014407718554139137, "step": 218620 }, { "epoch": 62.05790519443656, "grad_norm": 0.02091379277408123, "learning_rate": 3.796707351688902e-05, "loss": 0.00018577706068754197, "step": 218630 }, { "epoch": 62.06074368435992, "grad_norm": 0.04318813234567642, "learning_rate": 3.7964235026965654e-05, "loss": 0.00043972376734018325, "step": 218640 }, { "epoch": 62.06358217428328, "grad_norm": 0.2703405022621155, "learning_rate": 3.7961396537042295e-05, "loss": 0.0005551550537347793, "step": 218650 }, { "epoch": 62.06642066420664, "grad_norm": 0.004599728621542454, "learning_rate": 3.7958558047118936e-05, "loss": 0.0023195480927824972, "step": 218660 }, { "epoch": 62.06925915413, "grad_norm": 0.010701978579163551, "learning_rate": 3.795571955719557e-05, "loss": 0.0020101148635149, "step": 218670 }, { "epoch": 62.072097644053365, "grad_norm": 0.22233523428440094, "learning_rate": 3.795288106727221e-05, "loss": 0.007944200932979584, "step": 218680 }, { "epoch": 62.07493613397673, "grad_norm": 0.3563964068889618, "learning_rate": 3.7950042577348854e-05, "loss": 0.00011294670403003693, "step": 218690 }, { "epoch": 62.07777462390008, "grad_norm": 0.013478759676218033, "learning_rate": 3.7947204087425495e-05, "loss": 0.00023825205862522126, "step": 218700 }, { "epoch": 62.080613113823446, "grad_norm": 0.0022191207390278578, "learning_rate": 3.794436559750213e-05, "loss": 0.002364581264555454, "step": 218710 }, { "epoch": 62.08345160374681, "grad_norm": 0.009042485617101192, "learning_rate": 3.794152710757877e-05, "loss": 0.002914653718471527, "step": 218720 }, { "epoch": 62.086290093670165, "grad_norm": 0.003607544582337141, "learning_rate": 3.793868861765541e-05, "loss": 4.058443009853363e-05, "step": 218730 }, { "epoch": 62.08912858359353, "grad_norm": 0.048471949994564056, "learning_rate": 3.793585012773205e-05, "loss": 0.0007373707368969918, "step": 218740 }, { "epoch": 62.09196707351689, "grad_norm": 0.3543038070201874, "learning_rate": 3.793301163780869e-05, "loss": 0.007870765775442124, "step": 218750 }, { "epoch": 62.09480556344025, "grad_norm": 0.10944781452417374, "learning_rate": 3.793017314788533e-05, "loss": 0.00015185028314590454, "step": 218760 }, { "epoch": 62.09764405336361, "grad_norm": 0.009868176653981209, "learning_rate": 3.7927334657961964e-05, "loss": 0.00019311774522066117, "step": 218770 }, { "epoch": 62.10048254328697, "grad_norm": 0.0015659148339182138, "learning_rate": 3.7924496168038606e-05, "loss": 0.00010973904281854629, "step": 218780 }, { "epoch": 62.103321033210335, "grad_norm": 1.1922376155853271, "learning_rate": 3.792165767811525e-05, "loss": 0.00040133055299520495, "step": 218790 }, { "epoch": 62.10615952313369, "grad_norm": 0.0017644359031692147, "learning_rate": 3.791881918819188e-05, "loss": 0.000526217557489872, "step": 218800 }, { "epoch": 62.108998013057054, "grad_norm": 0.0041069285944104195, "learning_rate": 3.791598069826852e-05, "loss": 0.0009324267506599426, "step": 218810 }, { "epoch": 62.11183650298042, "grad_norm": 0.0037225931882858276, "learning_rate": 3.7913142208345164e-05, "loss": 0.00015638768672943115, "step": 218820 }, { "epoch": 62.11467499290377, "grad_norm": 0.019079413264989853, "learning_rate": 3.79103037184218e-05, "loss": 0.00408598780632019, "step": 218830 }, { "epoch": 62.117513482827135, "grad_norm": 0.007028196472674608, "learning_rate": 3.790746522849844e-05, "loss": 0.025120866298675538, "step": 218840 }, { "epoch": 62.1203519727505, "grad_norm": 1.3851810693740845, "learning_rate": 3.7904626738575075e-05, "loss": 0.00027506854385137556, "step": 218850 }, { "epoch": 62.123190462673854, "grad_norm": 0.0015624506631866097, "learning_rate": 3.790178824865172e-05, "loss": 0.0001332230865955353, "step": 218860 }, { "epoch": 62.12602895259722, "grad_norm": 0.524570643901825, "learning_rate": 3.789894975872836e-05, "loss": 0.00014671832323074341, "step": 218870 }, { "epoch": 62.12886744252058, "grad_norm": 0.11064135283231735, "learning_rate": 3.789611126880499e-05, "loss": 0.0003504011780023575, "step": 218880 }, { "epoch": 62.13170593244394, "grad_norm": 0.6685333251953125, "learning_rate": 3.789327277888164e-05, "loss": 0.00022633131593465805, "step": 218890 }, { "epoch": 62.1345444223673, "grad_norm": 0.003583966288715601, "learning_rate": 3.7890434288958275e-05, "loss": 0.00010598711669445038, "step": 218900 }, { "epoch": 62.13738291229066, "grad_norm": 0.054266877472400665, "learning_rate": 3.7887595799034917e-05, "loss": 6.651151925325393e-05, "step": 218910 }, { "epoch": 62.140221402214024, "grad_norm": 7.973468780517578, "learning_rate": 3.788475730911156e-05, "loss": 0.0016633141785860062, "step": 218920 }, { "epoch": 62.14305989213738, "grad_norm": 0.6289869546890259, "learning_rate": 3.788191881918819e-05, "loss": 0.0004006430506706238, "step": 218930 }, { "epoch": 62.14589838206074, "grad_norm": 0.22587423026561737, "learning_rate": 3.7879080329264834e-05, "loss": 0.00014934539794921874, "step": 218940 }, { "epoch": 62.148736871984106, "grad_norm": 0.004797682631760836, "learning_rate": 3.787624183934147e-05, "loss": 5.412846803665161e-05, "step": 218950 }, { "epoch": 62.15157536190747, "grad_norm": 0.01390974223613739, "learning_rate": 3.787340334941811e-05, "loss": 8.715633302927017e-05, "step": 218960 }, { "epoch": 62.154413851830824, "grad_norm": 0.005702738184481859, "learning_rate": 3.787056485949475e-05, "loss": 0.0002280019223690033, "step": 218970 }, { "epoch": 62.15725234175419, "grad_norm": 0.015853511169552803, "learning_rate": 3.7867726369571386e-05, "loss": 0.00015925467014312744, "step": 218980 }, { "epoch": 62.16009083167755, "grad_norm": 0.3081565797328949, "learning_rate": 3.7864887879648034e-05, "loss": 9.443685412406922e-05, "step": 218990 }, { "epoch": 62.162929321600906, "grad_norm": 0.25581061840057373, "learning_rate": 3.786204938972467e-05, "loss": 5.051400512456894e-05, "step": 219000 }, { "epoch": 62.162929321600906, "eval_accuracy": 0.9855662236917403, "eval_loss": 0.056509871035814285, "eval_runtime": 44.7322, "eval_samples_per_second": 351.581, "eval_steps_per_second": 5.499, "step": 219000 }, { "epoch": 62.16576781152427, "grad_norm": 0.03213577717542648, "learning_rate": 3.78592108998013e-05, "loss": 0.0001005338504910469, "step": 219010 }, { "epoch": 62.16860630144763, "grad_norm": 0.042749177664518356, "learning_rate": 3.785637240987795e-05, "loss": 0.00010075699537992477, "step": 219020 }, { "epoch": 62.17144479137099, "grad_norm": 0.016787828877568245, "learning_rate": 3.7853533919954586e-05, "loss": 0.0013962838798761367, "step": 219030 }, { "epoch": 62.17428328129435, "grad_norm": 0.01584095135331154, "learning_rate": 3.785069543003123e-05, "loss": 8.979365229606628e-05, "step": 219040 }, { "epoch": 62.17712177121771, "grad_norm": 0.005950823426246643, "learning_rate": 3.784785694010786e-05, "loss": 0.0016089225187897681, "step": 219050 }, { "epoch": 62.179960261141076, "grad_norm": 0.0028541521169245243, "learning_rate": 3.78450184501845e-05, "loss": 0.0001763463020324707, "step": 219060 }, { "epoch": 62.18279875106443, "grad_norm": 0.06832931935787201, "learning_rate": 3.7842179960261145e-05, "loss": 0.00022789593786001205, "step": 219070 }, { "epoch": 62.185637240987795, "grad_norm": 0.012924284674227238, "learning_rate": 3.783934147033778e-05, "loss": 7.937848567962646e-05, "step": 219080 }, { "epoch": 62.18847573091116, "grad_norm": 0.019457781687378883, "learning_rate": 3.783650298041442e-05, "loss": 0.00018802937120199203, "step": 219090 }, { "epoch": 62.19131422083451, "grad_norm": 11.92054271697998, "learning_rate": 3.783366449049106e-05, "loss": 0.0016745859757065773, "step": 219100 }, { "epoch": 62.194152710757876, "grad_norm": 0.2716916501522064, "learning_rate": 3.7830826000567697e-05, "loss": 0.00011074412614107133, "step": 219110 }, { "epoch": 62.19699120068124, "grad_norm": 0.013427570462226868, "learning_rate": 3.7827987510644345e-05, "loss": 7.843915373086929e-05, "step": 219120 }, { "epoch": 62.199829690604595, "grad_norm": 0.008422521874308586, "learning_rate": 3.782514902072098e-05, "loss": 0.000111381895840168, "step": 219130 }, { "epoch": 62.20266818052796, "grad_norm": 0.0638069212436676, "learning_rate": 3.7822310530797614e-05, "loss": 3.08595597743988e-05, "step": 219140 }, { "epoch": 62.20550667045132, "grad_norm": 0.040869567543268204, "learning_rate": 3.7819472040874255e-05, "loss": 5.994196981191635e-05, "step": 219150 }, { "epoch": 62.208345160374684, "grad_norm": 0.11514759063720703, "learning_rate": 3.78166335509509e-05, "loss": 0.0003209758549928665, "step": 219160 }, { "epoch": 62.21118365029804, "grad_norm": 0.03702423349022865, "learning_rate": 3.781379506102754e-05, "loss": 9.274538606405259e-05, "step": 219170 }, { "epoch": 62.2140221402214, "grad_norm": 0.005674903281033039, "learning_rate": 3.781095657110417e-05, "loss": 0.00012195613235235214, "step": 219180 }, { "epoch": 62.216860630144765, "grad_norm": 0.005222006235271692, "learning_rate": 3.7808118081180814e-05, "loss": 2.7395784854888916e-05, "step": 219190 }, { "epoch": 62.21969912006812, "grad_norm": 0.0054105049930512905, "learning_rate": 3.7805279591257455e-05, "loss": 6.656907498836517e-05, "step": 219200 }, { "epoch": 62.222537609991484, "grad_norm": 0.011786158196628094, "learning_rate": 3.780244110133409e-05, "loss": 3.964155912399292e-05, "step": 219210 }, { "epoch": 62.22537609991485, "grad_norm": 0.008821964263916016, "learning_rate": 3.779960261141073e-05, "loss": 2.419818192720413e-05, "step": 219220 }, { "epoch": 62.2282145898382, "grad_norm": 0.11238236725330353, "learning_rate": 3.779676412148737e-05, "loss": 0.0001607084646821022, "step": 219230 }, { "epoch": 62.231053079761566, "grad_norm": 0.13854099810123444, "learning_rate": 3.779392563156401e-05, "loss": 0.0002968993037939072, "step": 219240 }, { "epoch": 62.23389156968493, "grad_norm": 0.0011857399949803948, "learning_rate": 3.779108714164065e-05, "loss": 0.00034923907369375227, "step": 219250 }, { "epoch": 62.23673005960829, "grad_norm": 9.282350540161133, "learning_rate": 3.778824865171729e-05, "loss": 0.001978674344718456, "step": 219260 }, { "epoch": 62.23956854953165, "grad_norm": 0.9352453351020813, "learning_rate": 3.7785410161793925e-05, "loss": 0.0006175670772790909, "step": 219270 }, { "epoch": 62.24240703945501, "grad_norm": 0.02134566567838192, "learning_rate": 3.7782571671870566e-05, "loss": 0.0001733435317873955, "step": 219280 }, { "epoch": 62.24524552937837, "grad_norm": 0.005290063098073006, "learning_rate": 3.777973318194721e-05, "loss": 0.0004041539505124092, "step": 219290 }, { "epoch": 62.24808401930173, "grad_norm": 0.9046952724456787, "learning_rate": 3.777689469202384e-05, "loss": 0.0003409754484891891, "step": 219300 }, { "epoch": 62.25092250922509, "grad_norm": 0.13198350369930267, "learning_rate": 3.7774056202100483e-05, "loss": 7.398780435323715e-05, "step": 219310 }, { "epoch": 62.253760999148454, "grad_norm": 0.04335000365972519, "learning_rate": 3.7771501561169457e-05, "loss": 0.006747663766145706, "step": 219320 }, { "epoch": 62.25659948907182, "grad_norm": 0.00608720351010561, "learning_rate": 3.77686630712461e-05, "loss": 0.004023129865527153, "step": 219330 }, { "epoch": 62.25943797899517, "grad_norm": 0.06518077105283737, "learning_rate": 3.776582458132274e-05, "loss": 0.007018899917602539, "step": 219340 }, { "epoch": 62.262276468918536, "grad_norm": 0.005606275983154774, "learning_rate": 3.7762986091399374e-05, "loss": 6.257873028516769e-05, "step": 219350 }, { "epoch": 62.2651149588419, "grad_norm": 0.09107230603694916, "learning_rate": 3.7760147601476015e-05, "loss": 0.00022097304463386536, "step": 219360 }, { "epoch": 62.267953448765255, "grad_norm": 0.07079549878835678, "learning_rate": 3.775730911155266e-05, "loss": 9.357891976833344e-05, "step": 219370 }, { "epoch": 62.27079193868862, "grad_norm": 0.06225697323679924, "learning_rate": 3.77544706216293e-05, "loss": 0.0003832058981060982, "step": 219380 }, { "epoch": 62.27363042861198, "grad_norm": 0.05180567502975464, "learning_rate": 3.775163213170593e-05, "loss": 0.00011831261217594146, "step": 219390 }, { "epoch": 62.276468918535336, "grad_norm": 9.393473625183105, "learning_rate": 3.7748793641782574e-05, "loss": 0.006140315532684326, "step": 219400 }, { "epoch": 62.2793074084587, "grad_norm": 0.014734761789441109, "learning_rate": 3.7745955151859215e-05, "loss": 0.000845726765692234, "step": 219410 }, { "epoch": 62.28214589838206, "grad_norm": 0.031951822340488434, "learning_rate": 3.774311666193585e-05, "loss": 0.00016339775174856186, "step": 219420 }, { "epoch": 62.284984388305425, "grad_norm": 0.014767736196517944, "learning_rate": 3.774027817201249e-05, "loss": 0.00039871223270893097, "step": 219430 }, { "epoch": 62.28782287822878, "grad_norm": 0.01087808609008789, "learning_rate": 3.773743968208913e-05, "loss": 0.0004609197378158569, "step": 219440 }, { "epoch": 62.29066136815214, "grad_norm": 3.024606943130493, "learning_rate": 3.773460119216577e-05, "loss": 0.0008076418191194535, "step": 219450 }, { "epoch": 62.293499858075506, "grad_norm": 0.20115789771080017, "learning_rate": 3.773176270224241e-05, "loss": 9.632762521505356e-05, "step": 219460 }, { "epoch": 62.29633834799886, "grad_norm": 0.006011367309838533, "learning_rate": 3.772892421231905e-05, "loss": 0.0008789902552962303, "step": 219470 }, { "epoch": 62.299176837922225, "grad_norm": 0.963716447353363, "learning_rate": 3.7726085722395685e-05, "loss": 0.0005822759121656418, "step": 219480 }, { "epoch": 62.30201532784559, "grad_norm": 0.06879719346761703, "learning_rate": 3.7723247232472326e-05, "loss": 0.00010098386555910111, "step": 219490 }, { "epoch": 62.304853817768944, "grad_norm": 0.1223682165145874, "learning_rate": 3.772040874254897e-05, "loss": 0.0009800322353839875, "step": 219500 }, { "epoch": 62.304853817768944, "eval_accuracy": 0.9840401856679596, "eval_loss": 0.06343146413564682, "eval_runtime": 41.9214, "eval_samples_per_second": 375.155, "eval_steps_per_second": 5.868, "step": 219500 }, { "epoch": 62.30769230769231, "grad_norm": 0.49148276448249817, "learning_rate": 3.771757025262561e-05, "loss": 0.0005273085087537765, "step": 219510 }, { "epoch": 62.31053079761567, "grad_norm": 0.006396598648279905, "learning_rate": 3.7714731762702243e-05, "loss": 0.00028452370315790176, "step": 219520 }, { "epoch": 62.31336928753903, "grad_norm": 0.012048082426190376, "learning_rate": 3.771189327277888e-05, "loss": 0.0002662979066371918, "step": 219530 }, { "epoch": 62.31620777746239, "grad_norm": 0.015133284032344818, "learning_rate": 3.7709054782855526e-05, "loss": 8.458159863948822e-05, "step": 219540 }, { "epoch": 62.31904626738575, "grad_norm": 0.03283589705824852, "learning_rate": 3.770621629293216e-05, "loss": 0.0023633047938346865, "step": 219550 }, { "epoch": 62.321884757309114, "grad_norm": 0.09253577142953873, "learning_rate": 3.77033778030088e-05, "loss": 0.0008084217086434364, "step": 219560 }, { "epoch": 62.32472324723247, "grad_norm": 0.13539016246795654, "learning_rate": 3.7700539313085444e-05, "loss": 0.0017463983967900276, "step": 219570 }, { "epoch": 62.32756173715583, "grad_norm": 0.019964808598160744, "learning_rate": 3.769770082316208e-05, "loss": 0.00044607985764741895, "step": 219580 }, { "epoch": 62.330400227079195, "grad_norm": 0.2148115634918213, "learning_rate": 3.769486233323872e-05, "loss": 0.0021100517362356184, "step": 219590 }, { "epoch": 62.33323871700255, "grad_norm": 0.00991390272974968, "learning_rate": 3.769202384331536e-05, "loss": 0.0009640753269195557, "step": 219600 }, { "epoch": 62.336077206925914, "grad_norm": 0.018168779090046883, "learning_rate": 3.7689185353391995e-05, "loss": 0.0002241889014840126, "step": 219610 }, { "epoch": 62.33891569684928, "grad_norm": 0.2226472795009613, "learning_rate": 3.768634686346864e-05, "loss": 0.012386713922023774, "step": 219620 }, { "epoch": 62.34175418677264, "grad_norm": 0.021618612110614777, "learning_rate": 3.768350837354527e-05, "loss": 0.00010406877845525742, "step": 219630 }, { "epoch": 62.344592676695996, "grad_norm": 0.006045452319085598, "learning_rate": 3.768066988362192e-05, "loss": 8.940603584051132e-05, "step": 219640 }, { "epoch": 62.34743116661936, "grad_norm": 0.0692061111330986, "learning_rate": 3.7677831393698554e-05, "loss": 0.0013514034450054168, "step": 219650 }, { "epoch": 62.35026965654272, "grad_norm": 0.007571218069642782, "learning_rate": 3.767499290377519e-05, "loss": 6.825849413871765e-05, "step": 219660 }, { "epoch": 62.35310814646608, "grad_norm": 0.04307659715414047, "learning_rate": 3.767215441385184e-05, "loss": 0.0003016062080860138, "step": 219670 }, { "epoch": 62.35594663638944, "grad_norm": 0.006599435582756996, "learning_rate": 3.766931592392847e-05, "loss": 0.0002703256905078888, "step": 219680 }, { "epoch": 62.3587851263128, "grad_norm": 0.0022625632118433714, "learning_rate": 3.766647743400511e-05, "loss": 0.0005349865183234215, "step": 219690 }, { "epoch": 62.36162361623616, "grad_norm": 0.0011344607919454575, "learning_rate": 3.7663638944081754e-05, "loss": 0.0001455940306186676, "step": 219700 }, { "epoch": 62.36446210615952, "grad_norm": 0.020451117306947708, "learning_rate": 3.766080045415839e-05, "loss": 0.00035084392875432967, "step": 219710 }, { "epoch": 62.367300596082885, "grad_norm": 0.02908472716808319, "learning_rate": 3.765796196423503e-05, "loss": 0.00015278421342372893, "step": 219720 }, { "epoch": 62.37013908600625, "grad_norm": 0.0011325790546834469, "learning_rate": 3.7655123474311665e-05, "loss": 0.0003136880695819855, "step": 219730 }, { "epoch": 62.3729775759296, "grad_norm": 0.009378024376928806, "learning_rate": 3.7652284984388306e-05, "loss": 0.00025594104081392286, "step": 219740 }, { "epoch": 62.375816065852966, "grad_norm": 0.05838504806160927, "learning_rate": 3.764944649446495e-05, "loss": 0.00027017854154109955, "step": 219750 }, { "epoch": 62.37865455577633, "grad_norm": 0.09707608819007874, "learning_rate": 3.764660800454158e-05, "loss": 9.116865694522858e-05, "step": 219760 }, { "epoch": 62.381493045699685, "grad_norm": 0.0030192353297024965, "learning_rate": 3.7643769514618224e-05, "loss": 0.0023154690861701965, "step": 219770 }, { "epoch": 62.38433153562305, "grad_norm": 0.0042165732011199, "learning_rate": 3.7640931024694865e-05, "loss": 0.00013125743716955185, "step": 219780 }, { "epoch": 62.38717002554641, "grad_norm": 2.200707197189331, "learning_rate": 3.76380925347715e-05, "loss": 0.0009330479428172111, "step": 219790 }, { "epoch": 62.39000851546977, "grad_norm": 0.0240511205047369, "learning_rate": 3.763525404484815e-05, "loss": 0.0008822070434689522, "step": 219800 }, { "epoch": 62.39284700539313, "grad_norm": 0.010363860055804253, "learning_rate": 3.763241555492478e-05, "loss": 0.0016946053132414819, "step": 219810 }, { "epoch": 62.39568549531649, "grad_norm": 0.04558831825852394, "learning_rate": 3.762957706500142e-05, "loss": 0.00020761452615261077, "step": 219820 }, { "epoch": 62.398523985239855, "grad_norm": 0.01454254426062107, "learning_rate": 3.762673857507806e-05, "loss": 0.00013148915022611617, "step": 219830 }, { "epoch": 62.40136247516321, "grad_norm": 0.12709259986877441, "learning_rate": 3.76239000851547e-05, "loss": 0.0007823789492249489, "step": 219840 }, { "epoch": 62.404200965086574, "grad_norm": 0.0018295423360541463, "learning_rate": 3.762106159523134e-05, "loss": 0.00047463998198509215, "step": 219850 }, { "epoch": 62.40703945500994, "grad_norm": 0.003920285031199455, "learning_rate": 3.7618223105307976e-05, "loss": 0.003023083880543709, "step": 219860 }, { "epoch": 62.40987794493329, "grad_norm": 0.030452126637101173, "learning_rate": 3.761538461538462e-05, "loss": 9.164344519376754e-05, "step": 219870 }, { "epoch": 62.412716434856655, "grad_norm": 0.004515824373811483, "learning_rate": 3.761254612546126e-05, "loss": 0.00012506134808063508, "step": 219880 }, { "epoch": 62.41555492478002, "grad_norm": 0.05989578738808632, "learning_rate": 3.760970763553789e-05, "loss": 0.0002780269831418991, "step": 219890 }, { "epoch": 62.41839341470338, "grad_norm": 0.010582320392131805, "learning_rate": 3.7606869145614534e-05, "loss": 7.773898541927338e-05, "step": 219900 }, { "epoch": 62.42123190462674, "grad_norm": 0.11220178753137589, "learning_rate": 3.7604030655691176e-05, "loss": 0.0001925090327858925, "step": 219910 }, { "epoch": 62.4240703945501, "grad_norm": 0.005525456741452217, "learning_rate": 3.760119216576781e-05, "loss": 0.004916159808635712, "step": 219920 }, { "epoch": 62.42690888447346, "grad_norm": 0.016651567071676254, "learning_rate": 3.759835367584445e-05, "loss": 0.00010059978812932968, "step": 219930 }, { "epoch": 62.42974737439682, "grad_norm": 0.012748811393976212, "learning_rate": 3.759551518592109e-05, "loss": 6.963685154914856e-05, "step": 219940 }, { "epoch": 62.43258586432018, "grad_norm": 0.1452188938856125, "learning_rate": 3.759267669599773e-05, "loss": 7.684826850891113e-05, "step": 219950 }, { "epoch": 62.435424354243544, "grad_norm": 0.06053076311945915, "learning_rate": 3.758983820607437e-05, "loss": 9.44940373301506e-05, "step": 219960 }, { "epoch": 62.4382628441669, "grad_norm": 0.01820943132042885, "learning_rate": 3.758699971615101e-05, "loss": 6.32094219326973e-05, "step": 219970 }, { "epoch": 62.44110133409026, "grad_norm": 0.006424373015761375, "learning_rate": 3.758416122622765e-05, "loss": 0.00037411581724882125, "step": 219980 }, { "epoch": 62.443939824013626, "grad_norm": 0.08564738929271698, "learning_rate": 3.7581322736304286e-05, "loss": 0.0003088315948843956, "step": 219990 }, { "epoch": 62.44677831393699, "grad_norm": 0.018765417858958244, "learning_rate": 3.757848424638093e-05, "loss": 8.912477642297745e-05, "step": 220000 }, { "epoch": 62.44677831393699, "eval_accuracy": 0.982069053220576, "eval_loss": 0.07416573911905289, "eval_runtime": 49.413, "eval_samples_per_second": 318.277, "eval_steps_per_second": 4.978, "step": 220000 }, { "epoch": 62.449616803860344, "grad_norm": 0.03267698734998703, "learning_rate": 3.757564575645757e-05, "loss": 0.00010838452726602554, "step": 220010 }, { "epoch": 62.45245529378371, "grad_norm": 0.01427268236875534, "learning_rate": 3.7572807266534204e-05, "loss": 0.0009541559964418411, "step": 220020 }, { "epoch": 62.45529378370707, "grad_norm": 0.02583497017621994, "learning_rate": 3.7569968776610845e-05, "loss": 0.0026745984330773355, "step": 220030 }, { "epoch": 62.458132273630426, "grad_norm": 0.0030508071649819613, "learning_rate": 3.7567130286687486e-05, "loss": 0.00035267826169729235, "step": 220040 }, { "epoch": 62.46097076355379, "grad_norm": 1.5434744358062744, "learning_rate": 3.756429179676412e-05, "loss": 0.0012251749634742738, "step": 220050 }, { "epoch": 62.46380925347715, "grad_norm": 0.08377199620008469, "learning_rate": 3.756145330684076e-05, "loss": 0.0001377943903207779, "step": 220060 }, { "epoch": 62.46664774340051, "grad_norm": 3.3033382892608643, "learning_rate": 3.7558614816917404e-05, "loss": 0.000549660436809063, "step": 220070 }, { "epoch": 62.46948623332387, "grad_norm": 0.011632164008915424, "learning_rate": 3.755577632699404e-05, "loss": 0.0023821348324418066, "step": 220080 }, { "epoch": 62.47232472324723, "grad_norm": 0.7374579310417175, "learning_rate": 3.755293783707068e-05, "loss": 0.0009950442239642144, "step": 220090 }, { "epoch": 62.475163213170596, "grad_norm": 0.43934962153434753, "learning_rate": 3.755009934714732e-05, "loss": 0.00374351404607296, "step": 220100 }, { "epoch": 62.47800170309395, "grad_norm": 0.040966492146253586, "learning_rate": 3.754726085722396e-05, "loss": 0.001371879130601883, "step": 220110 }, { "epoch": 62.480840193017315, "grad_norm": 11.116790771484375, "learning_rate": 3.75444223673006e-05, "loss": 0.005900357663631439, "step": 220120 }, { "epoch": 62.48367868294068, "grad_norm": 0.013767283409833908, "learning_rate": 3.754158387737723e-05, "loss": 0.0003489496186375618, "step": 220130 }, { "epoch": 62.48651717286403, "grad_norm": 13.88767147064209, "learning_rate": 3.753874538745388e-05, "loss": 0.0050749521702528, "step": 220140 }, { "epoch": 62.489355662787396, "grad_norm": 0.533152163028717, "learning_rate": 3.7535906897530514e-05, "loss": 0.0002938894554972649, "step": 220150 }, { "epoch": 62.49219415271076, "grad_norm": 0.009839028120040894, "learning_rate": 3.7533068407607156e-05, "loss": 0.0034200869500637054, "step": 220160 }, { "epoch": 62.49503264263412, "grad_norm": 9.79494571685791, "learning_rate": 3.75302299176838e-05, "loss": 0.0015102270990610123, "step": 220170 }, { "epoch": 62.49787113255748, "grad_norm": 2.133894205093384, "learning_rate": 3.752739142776043e-05, "loss": 0.005005907639861107, "step": 220180 }, { "epoch": 62.50070962248084, "grad_norm": 14.938088417053223, "learning_rate": 3.752455293783707e-05, "loss": 0.018301564455032348, "step": 220190 }, { "epoch": 62.503548112404204, "grad_norm": 0.025629322975873947, "learning_rate": 3.7521714447913715e-05, "loss": 0.001777181401848793, "step": 220200 }, { "epoch": 62.50638660232756, "grad_norm": 0.0038588440511375666, "learning_rate": 3.751887595799035e-05, "loss": 0.0006521051749587059, "step": 220210 }, { "epoch": 62.50922509225092, "grad_norm": 1.2761179208755493, "learning_rate": 3.751603746806699e-05, "loss": 0.0005859062075614929, "step": 220220 }, { "epoch": 62.512063582174285, "grad_norm": 0.11342551559209824, "learning_rate": 3.751319897814363e-05, "loss": 0.00010941401124000549, "step": 220230 }, { "epoch": 62.51490207209764, "grad_norm": 0.004630816634744406, "learning_rate": 3.7510360488220267e-05, "loss": 0.0003211602568626404, "step": 220240 }, { "epoch": 62.517740562021004, "grad_norm": 0.0025472892448306084, "learning_rate": 3.750752199829691e-05, "loss": 0.0001054719090461731, "step": 220250 }, { "epoch": 62.52057905194437, "grad_norm": 0.016757559031248093, "learning_rate": 3.750468350837354e-05, "loss": 0.0003121295943856239, "step": 220260 }, { "epoch": 62.52341754186773, "grad_norm": 0.1768456995487213, "learning_rate": 3.750184501845019e-05, "loss": 0.00010025203227996826, "step": 220270 }, { "epoch": 62.526256031791085, "grad_norm": 0.003928502090275288, "learning_rate": 3.7499006528526825e-05, "loss": 9.3957781791687e-05, "step": 220280 }, { "epoch": 62.52909452171445, "grad_norm": 0.00902804359793663, "learning_rate": 3.749616803860346e-05, "loss": 0.00013037119060754776, "step": 220290 }, { "epoch": 62.53193301163781, "grad_norm": 0.0029795991722494364, "learning_rate": 3.749332954868011e-05, "loss": 7.890146225690842e-05, "step": 220300 }, { "epoch": 62.53477150156117, "grad_norm": 0.020709151402115822, "learning_rate": 3.749049105875674e-05, "loss": 0.00017892196774482727, "step": 220310 }, { "epoch": 62.53760999148453, "grad_norm": 0.006356533616781235, "learning_rate": 3.7487652568833384e-05, "loss": 0.00014461297541856766, "step": 220320 }, { "epoch": 62.54044848140789, "grad_norm": 0.002796758897602558, "learning_rate": 3.7484814078910025e-05, "loss": 0.00032955575734376906, "step": 220330 }, { "epoch": 62.54328697133125, "grad_norm": 0.0010747959604486823, "learning_rate": 3.748197558898666e-05, "loss": 0.00014339573681354523, "step": 220340 }, { "epoch": 62.54612546125461, "grad_norm": 0.04276711493730545, "learning_rate": 3.74791370990633e-05, "loss": 0.0004278447479009628, "step": 220350 }, { "epoch": 62.548963951177974, "grad_norm": 0.23187053203582764, "learning_rate": 3.7476298609139936e-05, "loss": 0.000161094032227993, "step": 220360 }, { "epoch": 62.55180244110134, "grad_norm": 0.05988434702157974, "learning_rate": 3.747346011921658e-05, "loss": 0.0019667040556669234, "step": 220370 }, { "epoch": 62.55464093102469, "grad_norm": 0.5683221220970154, "learning_rate": 3.747062162929322e-05, "loss": 0.000618082843720913, "step": 220380 }, { "epoch": 62.557479420948056, "grad_norm": 0.08315339684486389, "learning_rate": 3.746778313936985e-05, "loss": 0.00068510752171278, "step": 220390 }, { "epoch": 62.56031791087142, "grad_norm": 0.01890740729868412, "learning_rate": 3.74649446494465e-05, "loss": 0.0003050236031413078, "step": 220400 }, { "epoch": 62.563156400794774, "grad_norm": 0.18179762363433838, "learning_rate": 3.7462106159523136e-05, "loss": 0.00036461614072322843, "step": 220410 }, { "epoch": 62.56599489071814, "grad_norm": 0.15224821865558624, "learning_rate": 3.745926766959977e-05, "loss": 0.002846017852425575, "step": 220420 }, { "epoch": 62.5688333806415, "grad_norm": 0.12604930996894836, "learning_rate": 3.745642917967642e-05, "loss": 0.0001254882663488388, "step": 220430 }, { "epoch": 62.571671870564856, "grad_norm": 0.04976412281394005, "learning_rate": 3.745359068975305e-05, "loss": 0.0003022246062755585, "step": 220440 }, { "epoch": 62.57451036048822, "grad_norm": 0.024695679545402527, "learning_rate": 3.7450752199829695e-05, "loss": 0.0005061164498329163, "step": 220450 }, { "epoch": 62.57734885041158, "grad_norm": 0.006316266488283873, "learning_rate": 3.744791370990633e-05, "loss": 0.00012270323932170868, "step": 220460 }, { "epoch": 62.580187340334945, "grad_norm": 0.006977717392146587, "learning_rate": 3.744507521998297e-05, "loss": 0.00010023787617683411, "step": 220470 }, { "epoch": 62.5830258302583, "grad_norm": 0.2006162852048874, "learning_rate": 3.744223673005961e-05, "loss": 0.0145297110080719, "step": 220480 }, { "epoch": 62.58586432018166, "grad_norm": 0.05265793576836586, "learning_rate": 3.743939824013625e-05, "loss": 0.0032962780445814134, "step": 220490 }, { "epoch": 62.588702810105026, "grad_norm": 1.9985859394073486, "learning_rate": 3.743655975021289e-05, "loss": 0.0009648358449339866, "step": 220500 }, { "epoch": 62.588702810105026, "eval_accuracy": 0.9837858459973294, "eval_loss": 0.07100653648376465, "eval_runtime": 47.803, "eval_samples_per_second": 328.996, "eval_steps_per_second": 5.146, "step": 220500 }, { "epoch": 62.59154130002838, "grad_norm": 1.1531281471252441, "learning_rate": 3.743372126028953e-05, "loss": 0.00032753385603427886, "step": 220510 }, { "epoch": 62.594379789951745, "grad_norm": 0.015403607860207558, "learning_rate": 3.7430882770366164e-05, "loss": 0.00018192455172538757, "step": 220520 }, { "epoch": 62.59721827987511, "grad_norm": 0.008638427592813969, "learning_rate": 3.742804428044281e-05, "loss": 0.002391372248530388, "step": 220530 }, { "epoch": 62.60005676979847, "grad_norm": 1.8694266080856323, "learning_rate": 3.742520579051945e-05, "loss": 0.001432894729077816, "step": 220540 }, { "epoch": 62.60289525972183, "grad_norm": 0.010673589073121548, "learning_rate": 3.742236730059608e-05, "loss": 0.00035642366856336594, "step": 220550 }, { "epoch": 62.60573374964519, "grad_norm": 0.023918533697724342, "learning_rate": 3.741952881067272e-05, "loss": 0.004375559091567993, "step": 220560 }, { "epoch": 62.60857223956855, "grad_norm": 0.02156318537890911, "learning_rate": 3.7416690320749364e-05, "loss": 0.00012646131217479705, "step": 220570 }, { "epoch": 62.61141072949191, "grad_norm": 0.008405453525483608, "learning_rate": 3.7413851830826006e-05, "loss": 0.0010029150173068047, "step": 220580 }, { "epoch": 62.61424921941527, "grad_norm": 0.17024676501750946, "learning_rate": 3.741101334090264e-05, "loss": 0.00014328304678201675, "step": 220590 }, { "epoch": 62.617087709338634, "grad_norm": 0.006379419472068548, "learning_rate": 3.740817485097928e-05, "loss": 0.00019136592745780945, "step": 220600 }, { "epoch": 62.61992619926199, "grad_norm": 0.018088893964886665, "learning_rate": 3.740533636105592e-05, "loss": 6.099231541156769e-05, "step": 220610 }, { "epoch": 62.62276468918535, "grad_norm": 0.3928585350513458, "learning_rate": 3.740249787113256e-05, "loss": 0.0007711097598075867, "step": 220620 }, { "epoch": 62.625603179108715, "grad_norm": 0.08444476127624512, "learning_rate": 3.73996593812092e-05, "loss": 0.00017585642635822296, "step": 220630 }, { "epoch": 62.62844166903208, "grad_norm": 0.004788947757333517, "learning_rate": 3.739682089128584e-05, "loss": 8.840896189212799e-05, "step": 220640 }, { "epoch": 62.631280158955434, "grad_norm": 0.05840986222028732, "learning_rate": 3.7393982401362475e-05, "loss": 0.00020171888172626495, "step": 220650 }, { "epoch": 62.6341186488788, "grad_norm": 0.0201556496322155, "learning_rate": 3.7391143911439116e-05, "loss": 0.0006630638614296913, "step": 220660 }, { "epoch": 62.63695713880216, "grad_norm": 0.17859841883182526, "learning_rate": 3.738830542151576e-05, "loss": 0.0009109875187277794, "step": 220670 }, { "epoch": 62.639795628725516, "grad_norm": 0.5398225784301758, "learning_rate": 3.738546693159239e-05, "loss": 0.00019946247339248658, "step": 220680 }, { "epoch": 62.64263411864888, "grad_norm": 3.533757448196411, "learning_rate": 3.7382628441669034e-05, "loss": 0.0005588768050074578, "step": 220690 }, { "epoch": 62.64547260857224, "grad_norm": 0.09615182131528854, "learning_rate": 3.7379789951745675e-05, "loss": 0.00018749870359897615, "step": 220700 }, { "epoch": 62.6483110984956, "grad_norm": 0.012153811752796173, "learning_rate": 3.737695146182231e-05, "loss": 0.0022905793040990828, "step": 220710 }, { "epoch": 62.65114958841896, "grad_norm": 0.33130913972854614, "learning_rate": 3.737411297189895e-05, "loss": 0.0005036193877458572, "step": 220720 }, { "epoch": 62.65398807834232, "grad_norm": 0.16271090507507324, "learning_rate": 3.737127448197559e-05, "loss": 0.0031650058925151827, "step": 220730 }, { "epoch": 62.656826568265686, "grad_norm": 1.0554990768432617, "learning_rate": 3.7368435992052234e-05, "loss": 0.0020589355379343035, "step": 220740 }, { "epoch": 62.65966505818904, "grad_norm": 0.11529508233070374, "learning_rate": 3.736559750212887e-05, "loss": 0.000388861820101738, "step": 220750 }, { "epoch": 62.662503548112404, "grad_norm": 0.007986431010067463, "learning_rate": 3.73627590122055e-05, "loss": 0.00026831794530153276, "step": 220760 }, { "epoch": 62.66534203803577, "grad_norm": 2.086160898208618, "learning_rate": 3.735992052228215e-05, "loss": 0.005193571001291275, "step": 220770 }, { "epoch": 62.66818052795912, "grad_norm": 0.019174590706825256, "learning_rate": 3.7357082032358786e-05, "loss": 7.806215435266495e-05, "step": 220780 }, { "epoch": 62.671019017882486, "grad_norm": 0.04345263913273811, "learning_rate": 3.735424354243543e-05, "loss": 0.00440257266163826, "step": 220790 }, { "epoch": 62.67385750780585, "grad_norm": 0.8009650707244873, "learning_rate": 3.735140505251207e-05, "loss": 0.0005345726385712624, "step": 220800 }, { "epoch": 62.676695997729205, "grad_norm": 0.19517332315444946, "learning_rate": 3.73485665625887e-05, "loss": 0.00011772569268941879, "step": 220810 }, { "epoch": 62.67953448765257, "grad_norm": 0.014633498154580593, "learning_rate": 3.7345728072665344e-05, "loss": 0.0013575833290815353, "step": 220820 }, { "epoch": 62.68237297757593, "grad_norm": 0.23462602496147156, "learning_rate": 3.7342889582741986e-05, "loss": 0.00031790174543857577, "step": 220830 }, { "epoch": 62.68521146749929, "grad_norm": 0.018797364085912704, "learning_rate": 3.734005109281862e-05, "loss": 9.723231196403503e-05, "step": 220840 }, { "epoch": 62.68804995742265, "grad_norm": 0.03973087668418884, "learning_rate": 3.733721260289526e-05, "loss": 0.002306771092116833, "step": 220850 }, { "epoch": 62.69088844734601, "grad_norm": 2.769559383392334, "learning_rate": 3.7334374112971896e-05, "loss": 0.0007613897323608398, "step": 220860 }, { "epoch": 62.693726937269375, "grad_norm": 0.1046842485666275, "learning_rate": 3.7331535623048544e-05, "loss": 0.00022577326744794847, "step": 220870 }, { "epoch": 62.69656542719273, "grad_norm": 0.004218053072690964, "learning_rate": 3.732869713312518e-05, "loss": 0.0009283591061830521, "step": 220880 }, { "epoch": 62.69940391711609, "grad_norm": 0.1200825572013855, "learning_rate": 3.7325858643201814e-05, "loss": 0.0001466287299990654, "step": 220890 }, { "epoch": 62.702242407039456, "grad_norm": 0.0008585339528508484, "learning_rate": 3.732302015327846e-05, "loss": 0.00019778348505496978, "step": 220900 }, { "epoch": 62.70508089696281, "grad_norm": 0.005040234886109829, "learning_rate": 3.7320181663355096e-05, "loss": 0.0005005840212106705, "step": 220910 }, { "epoch": 62.707919386886175, "grad_norm": 0.0019603287801146507, "learning_rate": 3.731734317343174e-05, "loss": 0.00024630650877952574, "step": 220920 }, { "epoch": 62.71075787680954, "grad_norm": 0.054673850536346436, "learning_rate": 3.731450468350838e-05, "loss": 0.002318875677883625, "step": 220930 }, { "epoch": 62.7135963667329, "grad_norm": 0.02325632981956005, "learning_rate": 3.7311666193585014e-05, "loss": 0.00012050643563270569, "step": 220940 }, { "epoch": 62.71643485665626, "grad_norm": 0.007727459073066711, "learning_rate": 3.7308827703661655e-05, "loss": 0.003136606514453888, "step": 220950 }, { "epoch": 62.71927334657962, "grad_norm": 0.0030369474552571774, "learning_rate": 3.730598921373829e-05, "loss": 0.00010334718972444534, "step": 220960 }, { "epoch": 62.72211183650298, "grad_norm": 0.014010444283485413, "learning_rate": 3.730315072381493e-05, "loss": 0.0003545619547367096, "step": 220970 }, { "epoch": 62.72495032642634, "grad_norm": 0.05258215591311455, "learning_rate": 3.730031223389157e-05, "loss": 0.00013130027800798417, "step": 220980 }, { "epoch": 62.7277888163497, "grad_norm": 0.0430951751768589, "learning_rate": 3.729747374396821e-05, "loss": 0.0008249295875430107, "step": 220990 }, { "epoch": 62.730627306273064, "grad_norm": 0.05449046194553375, "learning_rate": 3.7294635254044855e-05, "loss": 0.003862547129392624, "step": 221000 }, { "epoch": 62.730627306273064, "eval_accuracy": 0.9832771666560692, "eval_loss": 0.0669541135430336, "eval_runtime": 42.0162, "eval_samples_per_second": 374.308, "eval_steps_per_second": 5.855, "step": 221000 }, { "epoch": 62.73346579619643, "grad_norm": 0.049513768404722214, "learning_rate": 3.729179676412149e-05, "loss": 0.00013595186173915864, "step": 221010 }, { "epoch": 62.73630428611978, "grad_norm": 0.5336814522743225, "learning_rate": 3.7288958274198124e-05, "loss": 0.0029611390084028242, "step": 221020 }, { "epoch": 62.739142776043145, "grad_norm": 0.03189321607351303, "learning_rate": 3.728611978427477e-05, "loss": 0.00011280793696641923, "step": 221030 }, { "epoch": 62.74198126596651, "grad_norm": 0.0040009841322898865, "learning_rate": 3.728328129435141e-05, "loss": 0.002748399041593075, "step": 221040 }, { "epoch": 62.744819755889864, "grad_norm": 0.03636980801820755, "learning_rate": 3.728044280442805e-05, "loss": 0.0011794677004218102, "step": 221050 }, { "epoch": 62.74765824581323, "grad_norm": 0.020478487014770508, "learning_rate": 3.727760431450468e-05, "loss": 0.00018467195332050323, "step": 221060 }, { "epoch": 62.75049673573659, "grad_norm": 0.026960404589772224, "learning_rate": 3.7274765824581324e-05, "loss": 0.0002370484173297882, "step": 221070 }, { "epoch": 62.753335225659946, "grad_norm": 0.0024168870877474546, "learning_rate": 3.7271927334657966e-05, "loss": 0.00011454764753580094, "step": 221080 }, { "epoch": 62.75617371558331, "grad_norm": 0.10037743300199509, "learning_rate": 3.72690888447346e-05, "loss": 0.0005700096487998962, "step": 221090 }, { "epoch": 62.75901220550667, "grad_norm": 0.0037251911126077175, "learning_rate": 3.726625035481124e-05, "loss": 0.0033569246530532836, "step": 221100 }, { "epoch": 62.761850695430034, "grad_norm": 0.24288605153560638, "learning_rate": 3.726341186488788e-05, "loss": 0.0008655969053506851, "step": 221110 }, { "epoch": 62.76468918535339, "grad_norm": 0.010848259553313255, "learning_rate": 3.726057337496452e-05, "loss": 0.00010812897235155106, "step": 221120 }, { "epoch": 62.76752767527675, "grad_norm": 0.3471009135246277, "learning_rate": 3.725773488504116e-05, "loss": 0.0006379544734954834, "step": 221130 }, { "epoch": 62.770366165200116, "grad_norm": 0.038380153477191925, "learning_rate": 3.72548963951178e-05, "loss": 0.00024422388523817065, "step": 221140 }, { "epoch": 62.77320465512347, "grad_norm": 0.11300535500049591, "learning_rate": 3.7252057905194435e-05, "loss": 0.00010720081627368927, "step": 221150 }, { "epoch": 62.776043145046835, "grad_norm": 11.958379745483398, "learning_rate": 3.7249219415271077e-05, "loss": 0.002859099209308624, "step": 221160 }, { "epoch": 62.7788816349702, "grad_norm": 0.030062735080718994, "learning_rate": 3.724638092534772e-05, "loss": 5.669761449098587e-05, "step": 221170 }, { "epoch": 62.78172012489355, "grad_norm": 0.00929985474795103, "learning_rate": 3.724354243542435e-05, "loss": 0.0001689983531832695, "step": 221180 }, { "epoch": 62.784558614816916, "grad_norm": 0.018922368064522743, "learning_rate": 3.7240703945500994e-05, "loss": 0.0009504057466983795, "step": 221190 }, { "epoch": 62.78739710474028, "grad_norm": 0.009360983967781067, "learning_rate": 3.7237865455577635e-05, "loss": 0.0008936306461691856, "step": 221200 }, { "epoch": 62.79023559466364, "grad_norm": 0.007378400769084692, "learning_rate": 3.7235026965654277e-05, "loss": 0.00022882204502820968, "step": 221210 }, { "epoch": 62.793074084587, "grad_norm": 14.885631561279297, "learning_rate": 3.723218847573091e-05, "loss": 0.013424815237522125, "step": 221220 }, { "epoch": 62.79591257451036, "grad_norm": 16.624155044555664, "learning_rate": 3.722934998580755e-05, "loss": 0.03425512313842773, "step": 221230 }, { "epoch": 62.79875106443372, "grad_norm": 0.007655374240130186, "learning_rate": 3.7226511495884194e-05, "loss": 0.009043262898921966, "step": 221240 }, { "epoch": 62.80158955435708, "grad_norm": 0.42795878648757935, "learning_rate": 3.722367300596083e-05, "loss": 0.010432595014572143, "step": 221250 }, { "epoch": 62.80442804428044, "grad_norm": 0.0463310107588768, "learning_rate": 3.722083451603747e-05, "loss": 0.0042056553065776825, "step": 221260 }, { "epoch": 62.807266534203805, "grad_norm": 3.1504993438720703, "learning_rate": 3.721799602611411e-05, "loss": 0.0005013482645153999, "step": 221270 }, { "epoch": 62.81010502412717, "grad_norm": 0.03234424442052841, "learning_rate": 3.7215157536190746e-05, "loss": 0.00035253297537565233, "step": 221280 }, { "epoch": 62.812943514050524, "grad_norm": 0.03993978723883629, "learning_rate": 3.721231904626739e-05, "loss": 0.0012903301045298575, "step": 221290 }, { "epoch": 62.81578200397389, "grad_norm": 2.2704858779907227, "learning_rate": 3.720948055634403e-05, "loss": 0.0005027448758482933, "step": 221300 }, { "epoch": 62.81862049389725, "grad_norm": 0.35589778423309326, "learning_rate": 3.720664206642066e-05, "loss": 0.0003454430028796196, "step": 221310 }, { "epoch": 62.821458983820605, "grad_norm": 0.05644713714718819, "learning_rate": 3.7203803576497305e-05, "loss": 0.0002520609647035599, "step": 221320 }, { "epoch": 62.82429747374397, "grad_norm": 0.12434928119182587, "learning_rate": 3.7200965086573946e-05, "loss": 0.00041002221405506134, "step": 221330 }, { "epoch": 62.82713596366733, "grad_norm": 0.06179254502058029, "learning_rate": 3.719812659665059e-05, "loss": 0.0010089883580803871, "step": 221340 }, { "epoch": 62.82997445359069, "grad_norm": 0.15910576283931732, "learning_rate": 3.719528810672722e-05, "loss": 0.00025586578994989394, "step": 221350 }, { "epoch": 62.83281294351405, "grad_norm": 2.9165234565734863, "learning_rate": 3.7192449616803857e-05, "loss": 0.001076436974108219, "step": 221360 }, { "epoch": 62.83565143343741, "grad_norm": 0.0037422482855618, "learning_rate": 3.7189611126880505e-05, "loss": 0.001655951328575611, "step": 221370 }, { "epoch": 62.838489923360775, "grad_norm": 0.0586702898144722, "learning_rate": 3.718677263695714e-05, "loss": 0.0002283286303281784, "step": 221380 }, { "epoch": 62.84132841328413, "grad_norm": 1.5370500087738037, "learning_rate": 3.718393414703378e-05, "loss": 0.0028471196070313453, "step": 221390 }, { "epoch": 62.844166903207494, "grad_norm": 11.028443336486816, "learning_rate": 3.718109565711042e-05, "loss": 0.004463280364871025, "step": 221400 }, { "epoch": 62.84700539313086, "grad_norm": 2.7155654430389404, "learning_rate": 3.717825716718706e-05, "loss": 0.0006826560944318772, "step": 221410 }, { "epoch": 62.84984388305421, "grad_norm": 9.561491012573242, "learning_rate": 3.71754186772637e-05, "loss": 0.0028834840282797813, "step": 221420 }, { "epoch": 62.852682372977576, "grad_norm": 0.045621637254953384, "learning_rate": 3.717258018734034e-05, "loss": 0.00028381552547216413, "step": 221430 }, { "epoch": 62.85552086290094, "grad_norm": 0.026693863794207573, "learning_rate": 3.7169741697416974e-05, "loss": 0.00011944044381380081, "step": 221440 }, { "epoch": 62.858359352824294, "grad_norm": 0.0009211244760081172, "learning_rate": 3.7166903207493615e-05, "loss": 0.0010100426152348518, "step": 221450 }, { "epoch": 62.86119784274766, "grad_norm": 0.022990936413407326, "learning_rate": 3.716406471757025e-05, "loss": 0.00019009355455636978, "step": 221460 }, { "epoch": 62.86403633267102, "grad_norm": 0.11912184208631516, "learning_rate": 3.71612262276469e-05, "loss": 0.00018599461764097215, "step": 221470 }, { "epoch": 62.86687482259438, "grad_norm": 0.24308304488658905, "learning_rate": 3.715838773772353e-05, "loss": 0.00017456226050853728, "step": 221480 }, { "epoch": 62.86971331251774, "grad_norm": 0.009763886220753193, "learning_rate": 3.715554924780017e-05, "loss": 0.00043624378740787507, "step": 221490 }, { "epoch": 62.8725518024411, "grad_norm": 0.013175828382372856, "learning_rate": 3.7152710757876815e-05, "loss": 0.0021866450086236, "step": 221500 }, { "epoch": 62.8725518024411, "eval_accuracy": 0.9834679214090418, "eval_loss": 0.06943795830011368, "eval_runtime": 40.2361, "eval_samples_per_second": 390.868, "eval_steps_per_second": 6.114, "step": 221500 }, { "epoch": 62.875390292364465, "grad_norm": 0.028319906443357468, "learning_rate": 3.714987226795345e-05, "loss": 0.0005166182294487953, "step": 221510 }, { "epoch": 62.87822878228782, "grad_norm": 0.11144208163022995, "learning_rate": 3.714703377803009e-05, "loss": 0.0010597020387649535, "step": 221520 }, { "epoch": 62.88106727221118, "grad_norm": 0.01930260844528675, "learning_rate": 3.714419528810673e-05, "loss": 0.00829896554350853, "step": 221530 }, { "epoch": 62.883905762134546, "grad_norm": 0.0068933553993701935, "learning_rate": 3.714135679818337e-05, "loss": 0.008166977763175964, "step": 221540 }, { "epoch": 62.8867442520579, "grad_norm": 7.532669544219971, "learning_rate": 3.713851830826001e-05, "loss": 0.002301187254488468, "step": 221550 }, { "epoch": 62.889582741981265, "grad_norm": 11.580528259277344, "learning_rate": 3.713567981833665e-05, "loss": 0.003320709988474846, "step": 221560 }, { "epoch": 62.89242123190463, "grad_norm": 0.1601770520210266, "learning_rate": 3.7132841328413285e-05, "loss": 0.0008471004664897918, "step": 221570 }, { "epoch": 62.89525972182799, "grad_norm": 0.030058398842811584, "learning_rate": 3.7130002838489926e-05, "loss": 0.005464760214090347, "step": 221580 }, { "epoch": 62.898098211751346, "grad_norm": 0.20922935009002686, "learning_rate": 3.712716434856656e-05, "loss": 0.0002919094637036324, "step": 221590 }, { "epoch": 62.90093670167471, "grad_norm": 0.0016447881935164332, "learning_rate": 3.71243258586432e-05, "loss": 0.0006215723231434822, "step": 221600 }, { "epoch": 62.90377519159807, "grad_norm": 0.014574949629604816, "learning_rate": 3.7121487368719843e-05, "loss": 0.00015298649668693542, "step": 221610 }, { "epoch": 62.90661368152143, "grad_norm": 0.027760447934269905, "learning_rate": 3.711864887879648e-05, "loss": 9.916163980960846e-05, "step": 221620 }, { "epoch": 62.90945217144479, "grad_norm": 0.006334371864795685, "learning_rate": 3.7115810388873126e-05, "loss": 6.876308470964432e-05, "step": 221630 }, { "epoch": 62.912290661368154, "grad_norm": 0.014689981937408447, "learning_rate": 3.711297189894976e-05, "loss": 0.00013997238129377366, "step": 221640 }, { "epoch": 62.91512915129151, "grad_norm": 0.06121765822172165, "learning_rate": 3.7110133409026395e-05, "loss": 0.00013847779482603073, "step": 221650 }, { "epoch": 62.91796764121487, "grad_norm": 0.04029170796275139, "learning_rate": 3.7107294919103044e-05, "loss": 0.0002790858969092369, "step": 221660 }, { "epoch": 62.920806131138235, "grad_norm": 0.211636483669281, "learning_rate": 3.710445642917968e-05, "loss": 0.00016934983432292938, "step": 221670 }, { "epoch": 62.9236446210616, "grad_norm": 0.16120441257953644, "learning_rate": 3.710161793925632e-05, "loss": 0.00022381581366062164, "step": 221680 }, { "epoch": 62.926483110984954, "grad_norm": 0.06334678828716278, "learning_rate": 3.7098779449332954e-05, "loss": 9.673330932855607e-05, "step": 221690 }, { "epoch": 62.92932160090832, "grad_norm": 0.004703400656580925, "learning_rate": 3.7095940959409596e-05, "loss": 7.259026169776916e-05, "step": 221700 }, { "epoch": 62.93216009083168, "grad_norm": 0.05500265955924988, "learning_rate": 3.709310246948624e-05, "loss": 8.521154522895813e-05, "step": 221710 }, { "epoch": 62.934998580755035, "grad_norm": 0.035241298377513885, "learning_rate": 3.709026397956287e-05, "loss": 0.00016651861369609833, "step": 221720 }, { "epoch": 62.9378370706784, "grad_norm": 0.21673071384429932, "learning_rate": 3.708742548963951e-05, "loss": 0.000406511127948761, "step": 221730 }, { "epoch": 62.94067556060176, "grad_norm": 0.016848376020789146, "learning_rate": 3.7084586999716154e-05, "loss": 8.885450661182404e-05, "step": 221740 }, { "epoch": 62.943514050525124, "grad_norm": 0.596100389957428, "learning_rate": 3.708174850979279e-05, "loss": 0.00020015370100736617, "step": 221750 }, { "epoch": 62.94635254044848, "grad_norm": 0.011264772154390812, "learning_rate": 3.707891001986944e-05, "loss": 8.407328277826309e-05, "step": 221760 }, { "epoch": 62.94919103037184, "grad_norm": 0.015829645097255707, "learning_rate": 3.707607152994607e-05, "loss": 0.004983730614185333, "step": 221770 }, { "epoch": 62.952029520295206, "grad_norm": 0.30431753396987915, "learning_rate": 3.7073233040022706e-05, "loss": 9.787604212760926e-05, "step": 221780 }, { "epoch": 62.95486801021856, "grad_norm": 0.10126552730798721, "learning_rate": 3.707039455009935e-05, "loss": 0.00014373678714036943, "step": 221790 }, { "epoch": 62.957706500141924, "grad_norm": 0.022012868896126747, "learning_rate": 3.706755606017599e-05, "loss": 5.4490379989147185e-05, "step": 221800 }, { "epoch": 62.96054499006529, "grad_norm": 0.005186645779758692, "learning_rate": 3.706471757025263e-05, "loss": 5.73107972741127e-05, "step": 221810 }, { "epoch": 62.96338347998864, "grad_norm": 0.0307058934122324, "learning_rate": 3.7061879080329265e-05, "loss": 0.00011250358074903488, "step": 221820 }, { "epoch": 62.966221969912006, "grad_norm": 0.0194716714322567, "learning_rate": 3.7059040590405906e-05, "loss": 5.977712571620941e-05, "step": 221830 }, { "epoch": 62.96906045983537, "grad_norm": 0.02435961179435253, "learning_rate": 3.705620210048255e-05, "loss": 9.999051690101624e-05, "step": 221840 }, { "epoch": 62.97189894975873, "grad_norm": 0.03949914500117302, "learning_rate": 3.705336361055918e-05, "loss": 4.3901242315769194e-05, "step": 221850 }, { "epoch": 62.97473743968209, "grad_norm": 0.001164318178780377, "learning_rate": 3.7050525120635824e-05, "loss": 0.00015269666910171508, "step": 221860 }, { "epoch": 62.97757592960545, "grad_norm": 1.7385308742523193, "learning_rate": 3.7047686630712465e-05, "loss": 0.0005281716585159302, "step": 221870 }, { "epoch": 62.98041441952881, "grad_norm": 0.06160430237650871, "learning_rate": 3.70448481407891e-05, "loss": 8.616428822278976e-05, "step": 221880 }, { "epoch": 62.98325290945217, "grad_norm": 0.008060593158006668, "learning_rate": 3.704200965086574e-05, "loss": 6.063692271709442e-05, "step": 221890 }, { "epoch": 62.98609139937553, "grad_norm": 0.0029424885287880898, "learning_rate": 3.703917116094238e-05, "loss": 5.7788193225860594e-05, "step": 221900 }, { "epoch": 62.988929889298895, "grad_norm": 0.0012860563583672047, "learning_rate": 3.703633267101902e-05, "loss": 0.0010334676131606101, "step": 221910 }, { "epoch": 62.99176837922225, "grad_norm": 3.501070737838745, "learning_rate": 3.703349418109566e-05, "loss": 0.0012592403218150139, "step": 221920 }, { "epoch": 62.99460686914561, "grad_norm": 0.054015278816223145, "learning_rate": 3.70306556911723e-05, "loss": 0.002630739659070969, "step": 221930 }, { "epoch": 62.997445359068976, "grad_norm": 0.13791674375534058, "learning_rate": 3.702781720124894e-05, "loss": 0.000751229003071785, "step": 221940 }, { "epoch": 63.00028384899234, "grad_norm": 1.2361762523651123, "learning_rate": 3.7024978711325576e-05, "loss": 0.0019305149093270303, "step": 221950 }, { "epoch": 63.003122338915695, "grad_norm": 0.010933199897408485, "learning_rate": 3.702214022140222e-05, "loss": 0.0007864315062761307, "step": 221960 }, { "epoch": 63.00596082883906, "grad_norm": 0.09577751904726028, "learning_rate": 3.701930173147886e-05, "loss": 0.0002742866054177284, "step": 221970 }, { "epoch": 63.00879931876242, "grad_norm": 0.00822001788765192, "learning_rate": 3.701646324155549e-05, "loss": 0.005103591829538345, "step": 221980 }, { "epoch": 63.01163780868578, "grad_norm": 0.008178609423339367, "learning_rate": 3.7013624751632134e-05, "loss": 0.00012207329273223878, "step": 221990 }, { "epoch": 63.01447629860914, "grad_norm": 1.5428284406661987, "learning_rate": 3.7010786261708776e-05, "loss": 0.0008643751963973046, "step": 222000 }, { "epoch": 63.01447629860914, "eval_accuracy": 0.9811788643733707, "eval_loss": 0.07499489188194275, "eval_runtime": 44.9426, "eval_samples_per_second": 349.935, "eval_steps_per_second": 5.474, "step": 222000 }, { "epoch": 63.0173147885325, "grad_norm": 0.05471323803067207, "learning_rate": 3.700794777178541e-05, "loss": 0.001048329845070839, "step": 222010 }, { "epoch": 63.02015327845586, "grad_norm": 0.005957242101430893, "learning_rate": 3.700510928186205e-05, "loss": 8.964817970991135e-05, "step": 222020 }, { "epoch": 63.02299176837922, "grad_norm": 0.01912301778793335, "learning_rate": 3.700227079193869e-05, "loss": 0.0005655800923705101, "step": 222030 }, { "epoch": 63.025830258302584, "grad_norm": 0.007274748757481575, "learning_rate": 3.699943230201533e-05, "loss": 4.156045615673065e-05, "step": 222040 }, { "epoch": 63.02866874822595, "grad_norm": 0.04863428324460983, "learning_rate": 3.699659381209197e-05, "loss": 0.00020579788833856582, "step": 222050 }, { "epoch": 63.0315072381493, "grad_norm": 0.029827050864696503, "learning_rate": 3.699375532216861e-05, "loss": 5.706381052732468e-05, "step": 222060 }, { "epoch": 63.034345728072665, "grad_norm": 0.08542609959840775, "learning_rate": 3.6990916832245245e-05, "loss": 0.0004683317616581917, "step": 222070 }, { "epoch": 63.03718421799603, "grad_norm": 0.9165181517601013, "learning_rate": 3.6988078342321886e-05, "loss": 0.0002825699746608734, "step": 222080 }, { "epoch": 63.040022707919384, "grad_norm": 2.270846128463745, "learning_rate": 3.698523985239852e-05, "loss": 0.0007692243903875351, "step": 222090 }, { "epoch": 63.04286119784275, "grad_norm": 1.0100972652435303, "learning_rate": 3.698240136247517e-05, "loss": 0.0003712093457579613, "step": 222100 }, { "epoch": 63.04569968776611, "grad_norm": 1.5949851274490356, "learning_rate": 3.6979562872551804e-05, "loss": 0.0003544136881828308, "step": 222110 }, { "epoch": 63.04853817768947, "grad_norm": 0.03437433019280434, "learning_rate": 3.697672438262844e-05, "loss": 0.0011725787073373795, "step": 222120 }, { "epoch": 63.05137666761283, "grad_norm": 0.011671727523207664, "learning_rate": 3.6973885892705087e-05, "loss": 0.0009391261264681816, "step": 222130 }, { "epoch": 63.05421515753619, "grad_norm": 0.0017714385176077485, "learning_rate": 3.697104740278172e-05, "loss": 0.0007318455725908279, "step": 222140 }, { "epoch": 63.057053647459554, "grad_norm": 0.0035762214101850986, "learning_rate": 3.696820891285836e-05, "loss": 0.00034612715244293214, "step": 222150 }, { "epoch": 63.05989213738291, "grad_norm": 0.014483288861811161, "learning_rate": 3.6965370422935004e-05, "loss": 0.0002745069563388824, "step": 222160 }, { "epoch": 63.06273062730627, "grad_norm": 0.016350584104657173, "learning_rate": 3.696253193301164e-05, "loss": 6.463825702667237e-05, "step": 222170 }, { "epoch": 63.065569117229636, "grad_norm": 0.03382689878344536, "learning_rate": 3.695969344308828e-05, "loss": 0.001220761053264141, "step": 222180 }, { "epoch": 63.06840760715299, "grad_norm": 0.08523406833410263, "learning_rate": 3.6956854953164914e-05, "loss": 0.0004214845597743988, "step": 222190 }, { "epoch": 63.071246097076354, "grad_norm": 0.09246087074279785, "learning_rate": 3.6954016463241556e-05, "loss": 0.0001746315509080887, "step": 222200 }, { "epoch": 63.07408458699972, "grad_norm": 0.1694282442331314, "learning_rate": 3.69511779733182e-05, "loss": 0.00017221104353666307, "step": 222210 }, { "epoch": 63.07692307692308, "grad_norm": 0.023442460224032402, "learning_rate": 3.694833948339483e-05, "loss": 0.0001105181872844696, "step": 222220 }, { "epoch": 63.079761566846436, "grad_norm": 0.008171516470611095, "learning_rate": 3.694550099347148e-05, "loss": 0.00014520976692438125, "step": 222230 }, { "epoch": 63.0826000567698, "grad_norm": 0.2013297826051712, "learning_rate": 3.6942662503548115e-05, "loss": 0.0001277826726436615, "step": 222240 }, { "epoch": 63.08543854669316, "grad_norm": 0.0038934897165745497, "learning_rate": 3.693982401362475e-05, "loss": 0.00037372428923845293, "step": 222250 }, { "epoch": 63.08827703661652, "grad_norm": 0.019325263798236847, "learning_rate": 3.69369855237014e-05, "loss": 0.00022293757647275924, "step": 222260 }, { "epoch": 63.09111552653988, "grad_norm": 0.03833479806780815, "learning_rate": 3.693414703377803e-05, "loss": 0.0003970185294747353, "step": 222270 }, { "epoch": 63.09395401646324, "grad_norm": 0.648408055305481, "learning_rate": 3.693130854385467e-05, "loss": 0.00027608592063188553, "step": 222280 }, { "epoch": 63.0967925063866, "grad_norm": 0.5928374528884888, "learning_rate": 3.692847005393131e-05, "loss": 0.0023806331679224967, "step": 222290 }, { "epoch": 63.09963099630996, "grad_norm": 0.011561747640371323, "learning_rate": 3.692563156400795e-05, "loss": 0.0014475414529442788, "step": 222300 }, { "epoch": 63.102469486233325, "grad_norm": 0.011033381335437298, "learning_rate": 3.692279307408459e-05, "loss": 0.0003954136744141579, "step": 222310 }, { "epoch": 63.10530797615669, "grad_norm": 0.0016151006566360593, "learning_rate": 3.6919954584161225e-05, "loss": 0.0074343658983707425, "step": 222320 }, { "epoch": 63.10814646608004, "grad_norm": 0.01873786374926567, "learning_rate": 3.691711609423787e-05, "loss": 0.00015956740826368333, "step": 222330 }, { "epoch": 63.110984956003406, "grad_norm": 0.004690333269536495, "learning_rate": 3.691427760431451e-05, "loss": 0.0005423082038760185, "step": 222340 }, { "epoch": 63.11382344592677, "grad_norm": 0.014719052240252495, "learning_rate": 3.691143911439114e-05, "loss": 0.00011183377355337143, "step": 222350 }, { "epoch": 63.116661935850125, "grad_norm": 0.09008938074111938, "learning_rate": 3.6908600624467784e-05, "loss": 0.00010187234729528428, "step": 222360 }, { "epoch": 63.11950042577349, "grad_norm": 0.0051397765055298805, "learning_rate": 3.6905762134544425e-05, "loss": 0.0029509766027331352, "step": 222370 }, { "epoch": 63.12233891569685, "grad_norm": 0.0073655275627970695, "learning_rate": 3.690292364462106e-05, "loss": 0.00041807275265455247, "step": 222380 }, { "epoch": 63.12517740562021, "grad_norm": 0.11019744724035263, "learning_rate": 3.69000851546977e-05, "loss": 5.103889852762222e-05, "step": 222390 }, { "epoch": 63.12801589554357, "grad_norm": 0.0034014375414699316, "learning_rate": 3.689724666477434e-05, "loss": 9.136348962783814e-05, "step": 222400 }, { "epoch": 63.13085438546693, "grad_norm": 0.003381367539986968, "learning_rate": 3.689440817485098e-05, "loss": 3.48581001162529e-05, "step": 222410 }, { "epoch": 63.133692875390295, "grad_norm": 0.06927037239074707, "learning_rate": 3.689156968492762e-05, "loss": 0.0001561744138598442, "step": 222420 }, { "epoch": 63.13653136531365, "grad_norm": 0.013446188531816006, "learning_rate": 3.688873119500426e-05, "loss": 0.0012349467724561691, "step": 222430 }, { "epoch": 63.139369855237014, "grad_norm": 0.003462862689048052, "learning_rate": 3.68858927050809e-05, "loss": 3.3828243613243106e-05, "step": 222440 }, { "epoch": 63.14220834516038, "grad_norm": 0.017125044018030167, "learning_rate": 3.6883054215157536e-05, "loss": 0.00010066982358694076, "step": 222450 }, { "epoch": 63.14504683508373, "grad_norm": 0.03153488039970398, "learning_rate": 3.688021572523418e-05, "loss": 9.555872529745101e-05, "step": 222460 }, { "epoch": 63.147885325007096, "grad_norm": 0.07793865352869034, "learning_rate": 3.687737723531082e-05, "loss": 5.7474896311759946e-05, "step": 222470 }, { "epoch": 63.15072381493046, "grad_norm": 0.004155206959694624, "learning_rate": 3.687453874538745e-05, "loss": 2.813916653394699e-05, "step": 222480 }, { "epoch": 63.15356230485382, "grad_norm": 0.012094416655600071, "learning_rate": 3.6871700255464095e-05, "loss": 0.0003545217216014862, "step": 222490 }, { "epoch": 63.15640079477718, "grad_norm": 0.01902780681848526, "learning_rate": 3.6868861765540736e-05, "loss": 0.00015552975237369536, "step": 222500 }, { "epoch": 63.15640079477718, "eval_accuracy": 0.9870286767978635, "eval_loss": 0.05771815776824951, "eval_runtime": 40.2054, "eval_samples_per_second": 391.166, "eval_steps_per_second": 6.119, "step": 222500 }, { "epoch": 63.15923928470054, "grad_norm": 0.0013886750675737858, "learning_rate": 3.686602327561737e-05, "loss": 5.235765129327774e-05, "step": 222510 }, { "epoch": 63.1620777746239, "grad_norm": 0.03329775109887123, "learning_rate": 3.686318478569401e-05, "loss": 5.4367072880268094e-05, "step": 222520 }, { "epoch": 63.16491626454726, "grad_norm": 0.004871426150202751, "learning_rate": 3.6860346295770653e-05, "loss": 3.7865526974201205e-05, "step": 222530 }, { "epoch": 63.16775475447062, "grad_norm": 0.0005914723151363432, "learning_rate": 3.685750780584729e-05, "loss": 0.0001091349869966507, "step": 222540 }, { "epoch": 63.170593244393984, "grad_norm": 0.07848238199949265, "learning_rate": 3.685466931592393e-05, "loss": 5.29751181602478e-05, "step": 222550 }, { "epoch": 63.17343173431734, "grad_norm": 0.03296884521842003, "learning_rate": 3.685183082600057e-05, "loss": 4.8918649554252625e-05, "step": 222560 }, { "epoch": 63.1762702242407, "grad_norm": 0.005515208467841148, "learning_rate": 3.684899233607721e-05, "loss": 4.0137767791748046e-05, "step": 222570 }, { "epoch": 63.179108714164066, "grad_norm": 0.025039993226528168, "learning_rate": 3.684615384615385e-05, "loss": 5.114395171403885e-05, "step": 222580 }, { "epoch": 63.18194720408743, "grad_norm": 0.008586520329117775, "learning_rate": 3.684331535623048e-05, "loss": 4.98523935675621e-05, "step": 222590 }, { "epoch": 63.184785694010785, "grad_norm": 0.0009820250561460853, "learning_rate": 3.684047686630713e-05, "loss": 0.00015581529587507247, "step": 222600 }, { "epoch": 63.18762418393415, "grad_norm": 0.02706109918653965, "learning_rate": 3.6837638376383764e-05, "loss": 0.000411967933177948, "step": 222610 }, { "epoch": 63.19046267385751, "grad_norm": 0.0026576670352369547, "learning_rate": 3.6834799886460406e-05, "loss": 0.00016720779240131378, "step": 222620 }, { "epoch": 63.193301163780866, "grad_norm": 0.01822495460510254, "learning_rate": 3.683196139653705e-05, "loss": 0.00033684764057397845, "step": 222630 }, { "epoch": 63.19613965370423, "grad_norm": 0.07344871014356613, "learning_rate": 3.682912290661368e-05, "loss": 7.838848978281021e-05, "step": 222640 }, { "epoch": 63.19897814362759, "grad_norm": 0.554084062576294, "learning_rate": 3.682628441669032e-05, "loss": 0.0002224750816822052, "step": 222650 }, { "epoch": 63.20181663355095, "grad_norm": 0.0067381528206169605, "learning_rate": 3.6823445926766964e-05, "loss": 5.986317992210388e-05, "step": 222660 }, { "epoch": 63.20465512347431, "grad_norm": 0.005615897011011839, "learning_rate": 3.68206074368436e-05, "loss": 8.357781916856766e-05, "step": 222670 }, { "epoch": 63.20749361339767, "grad_norm": 0.036858513951301575, "learning_rate": 3.681776894692024e-05, "loss": 0.0005512474104762078, "step": 222680 }, { "epoch": 63.210332103321036, "grad_norm": 0.05964364856481552, "learning_rate": 3.6814930456996875e-05, "loss": 0.0017918445169925689, "step": 222690 }, { "epoch": 63.21317059324439, "grad_norm": 0.0848454013466835, "learning_rate": 3.681209196707352e-05, "loss": 8.225273340940475e-05, "step": 222700 }, { "epoch": 63.216009083167755, "grad_norm": 0.017147721722722054, "learning_rate": 3.680925347715016e-05, "loss": 0.0001469319686293602, "step": 222710 }, { "epoch": 63.21884757309112, "grad_norm": 0.047258660197257996, "learning_rate": 3.680641498722679e-05, "loss": 0.00010683741420507431, "step": 222720 }, { "epoch": 63.221686063014474, "grad_norm": 0.002293606288731098, "learning_rate": 3.680357649730344e-05, "loss": 0.0012271465733647347, "step": 222730 }, { "epoch": 63.22452455293784, "grad_norm": 0.08538854122161865, "learning_rate": 3.6800738007380075e-05, "loss": 0.00013334322720766069, "step": 222740 }, { "epoch": 63.2273630428612, "grad_norm": 0.0021164901554584503, "learning_rate": 3.6797899517456716e-05, "loss": 0.0003942074254155159, "step": 222750 }, { "epoch": 63.230201532784555, "grad_norm": 0.012076755985617638, "learning_rate": 3.679506102753336e-05, "loss": 5.2078813314437864e-05, "step": 222760 }, { "epoch": 63.23304002270792, "grad_norm": 0.007310348562896252, "learning_rate": 3.679222253760999e-05, "loss": 7.357671856880187e-05, "step": 222770 }, { "epoch": 63.23587851263128, "grad_norm": 0.008583899587392807, "learning_rate": 3.6789384047686634e-05, "loss": 0.002241012267768383, "step": 222780 }, { "epoch": 63.238717002554644, "grad_norm": 0.01004039216786623, "learning_rate": 3.678654555776327e-05, "loss": 0.00011798478662967682, "step": 222790 }, { "epoch": 63.241555492478, "grad_norm": 0.11297664046287537, "learning_rate": 3.678370706783991e-05, "loss": 8.765067905187607e-05, "step": 222800 }, { "epoch": 63.24439398240136, "grad_norm": 0.0017037480138242245, "learning_rate": 3.678086857791655e-05, "loss": 0.00012035258114337922, "step": 222810 }, { "epoch": 63.247232472324725, "grad_norm": 0.017520783469080925, "learning_rate": 3.6778030087993186e-05, "loss": 5.3448230028152464e-05, "step": 222820 }, { "epoch": 63.25007096224808, "grad_norm": 0.0030516451224684715, "learning_rate": 3.677519159806983e-05, "loss": 3.3911503851413724e-05, "step": 222830 }, { "epoch": 63.252909452171444, "grad_norm": 0.24703030288219452, "learning_rate": 3.677235310814647e-05, "loss": 7.028486579656601e-05, "step": 222840 }, { "epoch": 63.25574794209481, "grad_norm": 0.0030352885369211435, "learning_rate": 3.67695146182231e-05, "loss": 3.581959754228592e-05, "step": 222850 }, { "epoch": 63.25858643201816, "grad_norm": 0.028057031333446503, "learning_rate": 3.676667612829975e-05, "loss": 6.923656910657882e-05, "step": 222860 }, { "epoch": 63.261424921941526, "grad_norm": 0.014365396462380886, "learning_rate": 3.6763837638376386e-05, "loss": 4.957914352416992e-05, "step": 222870 }, { "epoch": 63.26426341186489, "grad_norm": 0.10062360763549805, "learning_rate": 3.676099914845302e-05, "loss": 7.090829312801361e-05, "step": 222880 }, { "epoch": 63.26710190178825, "grad_norm": 0.00439456244930625, "learning_rate": 3.675816065852967e-05, "loss": 4.7507509589195254e-05, "step": 222890 }, { "epoch": 63.26994039171161, "grad_norm": 0.027155322954058647, "learning_rate": 3.67553221686063e-05, "loss": 3.063250333070755e-05, "step": 222900 }, { "epoch": 63.27277888163497, "grad_norm": 0.1032199114561081, "learning_rate": 3.6752483678682944e-05, "loss": 0.00025353282690048217, "step": 222910 }, { "epoch": 63.27561737155833, "grad_norm": 0.02113066427409649, "learning_rate": 3.674964518875958e-05, "loss": 0.00011040512472391129, "step": 222920 }, { "epoch": 63.27845586148169, "grad_norm": 0.30650877952575684, "learning_rate": 3.674680669883622e-05, "loss": 0.000176316499710083, "step": 222930 }, { "epoch": 63.28129435140505, "grad_norm": 0.003628568723797798, "learning_rate": 3.674396820891286e-05, "loss": 0.0007285567000508309, "step": 222940 }, { "epoch": 63.284132841328415, "grad_norm": 0.27194324135780334, "learning_rate": 3.6741129718989496e-05, "loss": 0.0008978305384516716, "step": 222950 }, { "epoch": 63.28697133125178, "grad_norm": 0.019733799621462822, "learning_rate": 3.673829122906614e-05, "loss": 0.0005645463243126869, "step": 222960 }, { "epoch": 63.28980982117513, "grad_norm": 0.09017644822597504, "learning_rate": 3.673545273914278e-05, "loss": 0.00015720184892416001, "step": 222970 }, { "epoch": 63.292648311098496, "grad_norm": 0.12233760207891464, "learning_rate": 3.6732614249219414e-05, "loss": 0.00023257005959749223, "step": 222980 }, { "epoch": 63.29548680102186, "grad_norm": 0.006079395301640034, "learning_rate": 3.672977575929606e-05, "loss": 8.974559605121613e-05, "step": 222990 }, { "epoch": 63.298325290945215, "grad_norm": 0.002928489353507757, "learning_rate": 3.6726937269372696e-05, "loss": 0.0008697962388396263, "step": 223000 }, { "epoch": 63.298325290945215, "eval_accuracy": 0.986011318115343, "eval_loss": 0.05651155486702919, "eval_runtime": 42.6771, "eval_samples_per_second": 368.511, "eval_steps_per_second": 5.764, "step": 223000 }, { "epoch": 63.30116378086858, "grad_norm": 0.005334765650331974, "learning_rate": 3.672409877944933e-05, "loss": 0.0016165165230631827, "step": 223010 }, { "epoch": 63.30400227079194, "grad_norm": 0.008502583019435406, "learning_rate": 3.672126028952597e-05, "loss": 0.00011400748044252396, "step": 223020 }, { "epoch": 63.306840760715296, "grad_norm": 0.02521110326051712, "learning_rate": 3.6718421799602614e-05, "loss": 8.494481444358826e-05, "step": 223030 }, { "epoch": 63.30967925063866, "grad_norm": 0.05941677466034889, "learning_rate": 3.6715583309679255e-05, "loss": 0.00013607945293188096, "step": 223040 }, { "epoch": 63.31251774056202, "grad_norm": 0.46970686316490173, "learning_rate": 3.671274481975589e-05, "loss": 0.0001719566062092781, "step": 223050 }, { "epoch": 63.315356230485385, "grad_norm": 0.006061909254640341, "learning_rate": 3.670990632983253e-05, "loss": 0.00016485098749399185, "step": 223060 }, { "epoch": 63.31819472040874, "grad_norm": 0.008933966048061848, "learning_rate": 3.670706783990917e-05, "loss": 9.1598741710186e-05, "step": 223070 }, { "epoch": 63.321033210332104, "grad_norm": 0.012437180615961552, "learning_rate": 3.670422934998581e-05, "loss": 0.0003586886450648308, "step": 223080 }, { "epoch": 63.32387170025547, "grad_norm": 0.05316830053925514, "learning_rate": 3.670139086006245e-05, "loss": 0.00019155126065015793, "step": 223090 }, { "epoch": 63.32671019017882, "grad_norm": 0.026742568239569664, "learning_rate": 3.669855237013909e-05, "loss": 0.00019866600632667542, "step": 223100 }, { "epoch": 63.329548680102185, "grad_norm": 2.5647175312042236, "learning_rate": 3.6695713880215724e-05, "loss": 0.0005564259365200997, "step": 223110 }, { "epoch": 63.33238717002555, "grad_norm": 0.8916362524032593, "learning_rate": 3.6692875390292366e-05, "loss": 0.0010733656585216523, "step": 223120 }, { "epoch": 63.335225659948904, "grad_norm": 0.00352277304045856, "learning_rate": 3.669003690036901e-05, "loss": 0.005508442968130111, "step": 223130 }, { "epoch": 63.33806414987227, "grad_norm": 0.07961790263652802, "learning_rate": 3.668719841044564e-05, "loss": 0.0003068597987294197, "step": 223140 }, { "epoch": 63.34090263979563, "grad_norm": 0.018902616575360298, "learning_rate": 3.668435992052228e-05, "loss": 9.52892005443573e-05, "step": 223150 }, { "epoch": 63.34374112971899, "grad_norm": 0.11021900177001953, "learning_rate": 3.6681521430598925e-05, "loss": 8.561704307794571e-05, "step": 223160 }, { "epoch": 63.34657961964235, "grad_norm": 0.0049287099391222, "learning_rate": 3.6678682940675566e-05, "loss": 3.665313124656677e-05, "step": 223170 }, { "epoch": 63.34941810956571, "grad_norm": 0.022623224183917046, "learning_rate": 3.66758444507522e-05, "loss": 0.00014934726059436798, "step": 223180 }, { "epoch": 63.352256599489074, "grad_norm": 0.0055049085058271885, "learning_rate": 3.667300596082884e-05, "loss": 0.0003572680056095123, "step": 223190 }, { "epoch": 63.35509508941243, "grad_norm": 0.00286428933031857, "learning_rate": 3.667016747090548e-05, "loss": 0.00021038148552179336, "step": 223200 }, { "epoch": 63.35793357933579, "grad_norm": 0.22517499327659607, "learning_rate": 3.666732898098212e-05, "loss": 0.00013569425791502, "step": 223210 }, { "epoch": 63.360772069259156, "grad_norm": 0.004327221307903528, "learning_rate": 3.666449049105876e-05, "loss": 0.00025470145046710966, "step": 223220 }, { "epoch": 63.36361055918251, "grad_norm": 0.004679491743445396, "learning_rate": 3.66616520011354e-05, "loss": 0.003937532752752304, "step": 223230 }, { "epoch": 63.366449049105874, "grad_norm": 0.01498672366142273, "learning_rate": 3.6658813511212035e-05, "loss": 0.007736872136592865, "step": 223240 }, { "epoch": 63.36928753902924, "grad_norm": 0.013919683173298836, "learning_rate": 3.6655975021288677e-05, "loss": 0.00019514653831720352, "step": 223250 }, { "epoch": 63.3721260289526, "grad_norm": 0.0390358604490757, "learning_rate": 3.665313653136532e-05, "loss": 0.00019707586616277695, "step": 223260 }, { "epoch": 63.374964518875956, "grad_norm": 0.011482787318527699, "learning_rate": 3.665029804144195e-05, "loss": 5.6063570082187655e-05, "step": 223270 }, { "epoch": 63.37780300879932, "grad_norm": 0.108847975730896, "learning_rate": 3.6647459551518594e-05, "loss": 0.0009898062795400619, "step": 223280 }, { "epoch": 63.38064149872268, "grad_norm": 0.015709929168224335, "learning_rate": 3.6644621061595235e-05, "loss": 5.445387214422226e-05, "step": 223290 }, { "epoch": 63.38347998864604, "grad_norm": 0.040118057280778885, "learning_rate": 3.664178257167187e-05, "loss": 0.00026952382177114487, "step": 223300 }, { "epoch": 63.3863184785694, "grad_norm": 0.005549741443246603, "learning_rate": 3.663894408174851e-05, "loss": 0.007644771784543991, "step": 223310 }, { "epoch": 63.38915696849276, "grad_norm": 1.1583428382873535, "learning_rate": 3.6636105591825146e-05, "loss": 0.0002853242680430412, "step": 223320 }, { "epoch": 63.391995458416126, "grad_norm": 0.004423603415489197, "learning_rate": 3.6633267101901794e-05, "loss": 5.626212805509567e-05, "step": 223330 }, { "epoch": 63.39483394833948, "grad_norm": 0.0022391723468899727, "learning_rate": 3.663042861197843e-05, "loss": 0.003184562548995018, "step": 223340 }, { "epoch": 63.397672438262845, "grad_norm": 0.0861809030175209, "learning_rate": 3.662759012205506e-05, "loss": 0.00028512179851531984, "step": 223350 }, { "epoch": 63.40051092818621, "grad_norm": 0.033818475902080536, "learning_rate": 3.662475163213171e-05, "loss": 8.185897022485732e-05, "step": 223360 }, { "epoch": 63.40334941810956, "grad_norm": 0.017319679260253906, "learning_rate": 3.6621913142208346e-05, "loss": 0.00026616379618644715, "step": 223370 }, { "epoch": 63.406187908032926, "grad_norm": 0.008378325030207634, "learning_rate": 3.661907465228499e-05, "loss": 3.987569361925125e-05, "step": 223380 }, { "epoch": 63.40902639795629, "grad_norm": 0.004672252107411623, "learning_rate": 3.661623616236163e-05, "loss": 9.512249380350113e-05, "step": 223390 }, { "epoch": 63.411864887879645, "grad_norm": 0.0036255940794944763, "learning_rate": 3.661339767243826e-05, "loss": 4.9865990877151487e-05, "step": 223400 }, { "epoch": 63.41470337780301, "grad_norm": 0.011394037865102291, "learning_rate": 3.6610559182514905e-05, "loss": 4.562344402074814e-05, "step": 223410 }, { "epoch": 63.41754186772637, "grad_norm": 0.13804616034030914, "learning_rate": 3.660772069259154e-05, "loss": 9.3805231153965e-05, "step": 223420 }, { "epoch": 63.420380357649734, "grad_norm": 0.003312069922685623, "learning_rate": 3.660488220266818e-05, "loss": 8.087586611509323e-05, "step": 223430 }, { "epoch": 63.42321884757309, "grad_norm": 0.3693499267101288, "learning_rate": 3.660204371274482e-05, "loss": 0.00010591540485620499, "step": 223440 }, { "epoch": 63.42605733749645, "grad_norm": 0.0019168966682627797, "learning_rate": 3.659920522282146e-05, "loss": 7.344745099544526e-05, "step": 223450 }, { "epoch": 63.428895827419815, "grad_norm": 0.06473436206579208, "learning_rate": 3.6596366732898105e-05, "loss": 0.00021960772573947906, "step": 223460 }, { "epoch": 63.43173431734317, "grad_norm": 0.0027976231649518013, "learning_rate": 3.659352824297474e-05, "loss": 4.2328983545303346e-05, "step": 223470 }, { "epoch": 63.434572807266534, "grad_norm": 0.05245933309197426, "learning_rate": 3.6590689753051374e-05, "loss": 9.653270244598388e-05, "step": 223480 }, { "epoch": 63.4374112971899, "grad_norm": 0.006929019931703806, "learning_rate": 3.658785126312802e-05, "loss": 6.011519581079483e-05, "step": 223490 }, { "epoch": 63.44024978711325, "grad_norm": 0.001231226953677833, "learning_rate": 3.658501277320466e-05, "loss": 3.627687692642212e-05, "step": 223500 }, { "epoch": 63.44024978711325, "eval_accuracy": 0.9864564125389458, "eval_loss": 0.057771794497966766, "eval_runtime": 36.1167, "eval_samples_per_second": 435.449, "eval_steps_per_second": 6.811, "step": 223500 }, { "epoch": 63.443088277036615, "grad_norm": 0.0040885042399168015, "learning_rate": 3.65821742832813e-05, "loss": 0.0001328008249402046, "step": 223510 }, { "epoch": 63.44592676695998, "grad_norm": 0.004438720643520355, "learning_rate": 3.657933579335793e-05, "loss": 2.8696656227111816e-05, "step": 223520 }, { "epoch": 63.44876525688334, "grad_norm": 0.003084653289988637, "learning_rate": 3.6576497303434574e-05, "loss": 8.419677615165711e-05, "step": 223530 }, { "epoch": 63.4516037468067, "grad_norm": 0.0469939261674881, "learning_rate": 3.6573658813511215e-05, "loss": 3.0128844082355498e-05, "step": 223540 }, { "epoch": 63.45444223673006, "grad_norm": 0.03495344519615173, "learning_rate": 3.657082032358785e-05, "loss": 0.000333760492503643, "step": 223550 }, { "epoch": 63.45728072665342, "grad_norm": 0.018353594467043877, "learning_rate": 3.656798183366449e-05, "loss": 0.0001252938061952591, "step": 223560 }, { "epoch": 63.46011921657678, "grad_norm": 0.0019579348154366016, "learning_rate": 3.656514334374113e-05, "loss": 4.43803146481514e-05, "step": 223570 }, { "epoch": 63.46295770650014, "grad_norm": 0.0535019114613533, "learning_rate": 3.6562588702810106e-05, "loss": 0.0029955286532640456, "step": 223580 }, { "epoch": 63.465796196423504, "grad_norm": 0.018279654905200005, "learning_rate": 3.655975021288675e-05, "loss": 0.00020274501293897628, "step": 223590 }, { "epoch": 63.46863468634686, "grad_norm": 0.000578800099901855, "learning_rate": 3.655691172296338e-05, "loss": 0.0027022268623113632, "step": 223600 }, { "epoch": 63.47147317627022, "grad_norm": 0.0021990451496094465, "learning_rate": 3.655407323304002e-05, "loss": 0.00010297223925590516, "step": 223610 }, { "epoch": 63.474311666193586, "grad_norm": 1.0074715614318848, "learning_rate": 3.6551234743116665e-05, "loss": 0.0017989611253142357, "step": 223620 }, { "epoch": 63.47715015611695, "grad_norm": 17.44586181640625, "learning_rate": 3.65483962531933e-05, "loss": 0.008627587556838989, "step": 223630 }, { "epoch": 63.479988646040304, "grad_norm": 0.00579309556633234, "learning_rate": 3.654555776326995e-05, "loss": 0.00016186852008104324, "step": 223640 }, { "epoch": 63.48282713596367, "grad_norm": 0.0058319284580647945, "learning_rate": 3.654271927334658e-05, "loss": 0.0009532241150736809, "step": 223650 }, { "epoch": 63.48566562588703, "grad_norm": 0.013066763989627361, "learning_rate": 3.653988078342322e-05, "loss": 0.0007531631737947464, "step": 223660 }, { "epoch": 63.488504115810386, "grad_norm": 0.012141210027039051, "learning_rate": 3.6537042293499865e-05, "loss": 0.00304377730935812, "step": 223670 }, { "epoch": 63.49134260573375, "grad_norm": 0.18251363933086395, "learning_rate": 3.65342038035765e-05, "loss": 0.021452009677886963, "step": 223680 }, { "epoch": 63.49418109565711, "grad_norm": 0.0040139867924153805, "learning_rate": 3.653136531365314e-05, "loss": 0.02179720550775528, "step": 223690 }, { "epoch": 63.497019585580475, "grad_norm": 0.04740726947784424, "learning_rate": 3.6528526823729775e-05, "loss": 0.0016464658081531524, "step": 223700 }, { "epoch": 63.49985807550383, "grad_norm": 0.11068625748157501, "learning_rate": 3.652568833380642e-05, "loss": 0.00013583973050117492, "step": 223710 }, { "epoch": 63.50269656542719, "grad_norm": 0.11419981718063354, "learning_rate": 3.652284984388306e-05, "loss": 0.001049961894750595, "step": 223720 }, { "epoch": 63.505535055350556, "grad_norm": 0.5183039307594299, "learning_rate": 3.652001135395969e-05, "loss": 0.006524159759283066, "step": 223730 }, { "epoch": 63.50837354527391, "grad_norm": 0.10748652368783951, "learning_rate": 3.6517172864036334e-05, "loss": 0.0005049765110015869, "step": 223740 }, { "epoch": 63.511212035197275, "grad_norm": 0.05798349156975746, "learning_rate": 3.6514334374112975e-05, "loss": 0.013268399238586425, "step": 223750 }, { "epoch": 63.51405052512064, "grad_norm": 0.10279430449008942, "learning_rate": 3.651149588418961e-05, "loss": 0.0007379718124866485, "step": 223760 }, { "epoch": 63.51688901504399, "grad_norm": 0.0288290586322546, "learning_rate": 3.650865739426625e-05, "loss": 0.0012078601866960526, "step": 223770 }, { "epoch": 63.519727504967356, "grad_norm": 0.5043288469314575, "learning_rate": 3.650581890434289e-05, "loss": 0.013162016868591309, "step": 223780 }, { "epoch": 63.52256599489072, "grad_norm": 0.0340101420879364, "learning_rate": 3.650298041441953e-05, "loss": 0.0015129290521144867, "step": 223790 }, { "epoch": 63.52540448481408, "grad_norm": 0.002691948087885976, "learning_rate": 3.650014192449617e-05, "loss": 0.0004474237561225891, "step": 223800 }, { "epoch": 63.52824297473744, "grad_norm": 0.5753144025802612, "learning_rate": 3.649730343457281e-05, "loss": 0.0023762943223118783, "step": 223810 }, { "epoch": 63.5310814646608, "grad_norm": 0.2073039710521698, "learning_rate": 3.6494464944649445e-05, "loss": 0.000261160172522068, "step": 223820 }, { "epoch": 63.533919954584164, "grad_norm": 0.06444323807954788, "learning_rate": 3.6491626454726086e-05, "loss": 0.00013085026293992996, "step": 223830 }, { "epoch": 63.53675844450752, "grad_norm": 0.1926426887512207, "learning_rate": 3.648878796480273e-05, "loss": 0.0032511427998542786, "step": 223840 }, { "epoch": 63.53959693443088, "grad_norm": 1.7875895500183105, "learning_rate": 3.648594947487937e-05, "loss": 0.0006136149168014526, "step": 223850 }, { "epoch": 63.542435424354245, "grad_norm": 0.08439810574054718, "learning_rate": 3.6483110984956003e-05, "loss": 0.00037822499871253967, "step": 223860 }, { "epoch": 63.5452739142776, "grad_norm": 0.00915446411818266, "learning_rate": 3.6480272495032645e-05, "loss": 0.0002051426097750664, "step": 223870 }, { "epoch": 63.548112404200964, "grad_norm": 0.18985746800899506, "learning_rate": 3.6477434005109286e-05, "loss": 0.0021219585090875626, "step": 223880 }, { "epoch": 63.55095089412433, "grad_norm": 0.4586183726787567, "learning_rate": 3.647459551518592e-05, "loss": 0.002932504378259182, "step": 223890 }, { "epoch": 63.55378938404769, "grad_norm": 0.038059890270233154, "learning_rate": 3.647175702526256e-05, "loss": 0.0005607651546597481, "step": 223900 }, { "epoch": 63.556627873971046, "grad_norm": 0.008817736990749836, "learning_rate": 3.6468918535339204e-05, "loss": 0.00028292573988437654, "step": 223910 }, { "epoch": 63.55946636389441, "grad_norm": 0.1542968600988388, "learning_rate": 3.646608004541584e-05, "loss": 0.0019169818609952928, "step": 223920 }, { "epoch": 63.56230485381777, "grad_norm": 6.2685770988464355, "learning_rate": 3.646324155549248e-05, "loss": 0.0020761780440807344, "step": 223930 }, { "epoch": 63.56514334374113, "grad_norm": 0.1055687963962555, "learning_rate": 3.646040306556912e-05, "loss": 0.0006776006892323494, "step": 223940 }, { "epoch": 63.56798183366449, "grad_norm": 0.000512793892994523, "learning_rate": 3.6457564575645756e-05, "loss": 0.003973754495382309, "step": 223950 }, { "epoch": 63.57082032358785, "grad_norm": 0.33121412992477417, "learning_rate": 3.64547260857224e-05, "loss": 0.00041541531682014464, "step": 223960 }, { "epoch": 63.57365881351121, "grad_norm": 0.04472082853317261, "learning_rate": 3.645188759579904e-05, "loss": 0.0010541670024394989, "step": 223970 }, { "epoch": 63.57649730343457, "grad_norm": 0.10458756238222122, "learning_rate": 3.644904910587568e-05, "loss": 0.0008292792364954949, "step": 223980 }, { "epoch": 63.579335793357934, "grad_norm": 0.020971819758415222, "learning_rate": 3.6446210615952314e-05, "loss": 0.009974076598882674, "step": 223990 }, { "epoch": 63.5821742832813, "grad_norm": 0.037731196731328964, "learning_rate": 3.644337212602895e-05, "loss": 0.000586615689098835, "step": 224000 }, { "epoch": 63.5821742832813, "eval_accuracy": 0.9839130158326445, "eval_loss": 0.07289119064807892, "eval_runtime": 38.7378, "eval_samples_per_second": 405.986, "eval_steps_per_second": 6.35, "step": 224000 }, { "epoch": 63.58501277320465, "grad_norm": 0.03703458979725838, "learning_rate": 3.64405336361056e-05, "loss": 0.0002960916608572006, "step": 224010 }, { "epoch": 63.587851263128016, "grad_norm": 0.7719717025756836, "learning_rate": 3.643769514618223e-05, "loss": 0.00030827317386865615, "step": 224020 }, { "epoch": 63.59068975305138, "grad_norm": 0.0134236179292202, "learning_rate": 3.643485665625887e-05, "loss": 0.0001770026981830597, "step": 224030 }, { "epoch": 63.593528242974735, "grad_norm": 0.014674156904220581, "learning_rate": 3.6432018166335514e-05, "loss": 0.00020059719681739808, "step": 224040 }, { "epoch": 63.5963667328981, "grad_norm": 0.6266777515411377, "learning_rate": 3.642917967641215e-05, "loss": 0.00020484365522861482, "step": 224050 }, { "epoch": 63.59920522282146, "grad_norm": 0.03310764953494072, "learning_rate": 3.642634118648879e-05, "loss": 0.0006609288975596428, "step": 224060 }, { "epoch": 63.602043712744816, "grad_norm": 0.11702274531126022, "learning_rate": 3.642350269656543e-05, "loss": 0.00012708231806755066, "step": 224070 }, { "epoch": 63.60488220266818, "grad_norm": 0.010297799482941628, "learning_rate": 3.6420664206642066e-05, "loss": 5.81447035074234e-05, "step": 224080 }, { "epoch": 63.60772069259154, "grad_norm": 0.2216036170721054, "learning_rate": 3.641782571671871e-05, "loss": 9.372141212224961e-05, "step": 224090 }, { "epoch": 63.610559182514905, "grad_norm": 0.0027367149014025927, "learning_rate": 3.641498722679534e-05, "loss": 0.0001247301697731018, "step": 224100 }, { "epoch": 63.61339767243826, "grad_norm": 0.00645038578659296, "learning_rate": 3.641214873687199e-05, "loss": 0.00011419020593166352, "step": 224110 }, { "epoch": 63.61623616236162, "grad_norm": 0.00821620225906372, "learning_rate": 3.6409310246948625e-05, "loss": 0.0005610154941678048, "step": 224120 }, { "epoch": 63.619074652284986, "grad_norm": 0.0005801416700705886, "learning_rate": 3.640647175702526e-05, "loss": 0.0009100308641791343, "step": 224130 }, { "epoch": 63.62191314220834, "grad_norm": 0.0032927989959716797, "learning_rate": 3.640363326710191e-05, "loss": 0.000421636737883091, "step": 224140 }, { "epoch": 63.624751632131705, "grad_norm": 0.06791634857654572, "learning_rate": 3.640079477717854e-05, "loss": 0.0001595769077539444, "step": 224150 }, { "epoch": 63.62759012205507, "grad_norm": 0.04947077855467796, "learning_rate": 3.6397956287255184e-05, "loss": 0.0007299300283193588, "step": 224160 }, { "epoch": 63.63042861197843, "grad_norm": 0.10413723438978195, "learning_rate": 3.6395117797331825e-05, "loss": 0.00047134533524513246, "step": 224170 }, { "epoch": 63.63326710190179, "grad_norm": 0.004118946380913258, "learning_rate": 3.639227930740846e-05, "loss": 0.0024787355214357376, "step": 224180 }, { "epoch": 63.63610559182515, "grad_norm": 0.010660936124622822, "learning_rate": 3.63894408174851e-05, "loss": 4.693958908319473e-05, "step": 224190 }, { "epoch": 63.63894408174851, "grad_norm": 0.003442137036472559, "learning_rate": 3.6386602327561736e-05, "loss": 6.947647780179978e-05, "step": 224200 }, { "epoch": 63.64178257167187, "grad_norm": 0.027734585106372833, "learning_rate": 3.638376383763838e-05, "loss": 0.0005891107022762299, "step": 224210 }, { "epoch": 63.64462106159523, "grad_norm": 0.02225104160606861, "learning_rate": 3.638092534771502e-05, "loss": 0.00196639820933342, "step": 224220 }, { "epoch": 63.647459551518594, "grad_norm": 0.012638923712074757, "learning_rate": 3.637808685779165e-05, "loss": 0.0027511995285749435, "step": 224230 }, { "epoch": 63.65029804144195, "grad_norm": 0.6847367882728577, "learning_rate": 3.6375248367868294e-05, "loss": 0.00024482980370521545, "step": 224240 }, { "epoch": 63.65313653136531, "grad_norm": 0.002149673178792, "learning_rate": 3.6372409877944936e-05, "loss": 0.0001634318381547928, "step": 224250 }, { "epoch": 63.655975021288675, "grad_norm": 0.8330767750740051, "learning_rate": 3.636957138802157e-05, "loss": 0.00040943268686532974, "step": 224260 }, { "epoch": 63.65881351121204, "grad_norm": 0.012173672206699848, "learning_rate": 3.636673289809822e-05, "loss": 0.00042563993483781817, "step": 224270 }, { "epoch": 63.661652001135394, "grad_norm": 0.5771710872650146, "learning_rate": 3.636389440817485e-05, "loss": 0.00017850622534751893, "step": 224280 }, { "epoch": 63.66449049105876, "grad_norm": 0.06328960508108139, "learning_rate": 3.636105591825149e-05, "loss": 0.006619658321142197, "step": 224290 }, { "epoch": 63.66732898098212, "grad_norm": 0.01505543664097786, "learning_rate": 3.635821742832813e-05, "loss": 0.002584819495677948, "step": 224300 }, { "epoch": 63.670167470905476, "grad_norm": 0.32252299785614014, "learning_rate": 3.635537893840477e-05, "loss": 0.0010431738570332526, "step": 224310 }, { "epoch": 63.67300596082884, "grad_norm": 0.005776938516646624, "learning_rate": 3.635254044848141e-05, "loss": 0.0009467877447605133, "step": 224320 }, { "epoch": 63.6758444507522, "grad_norm": 1.35593581199646, "learning_rate": 3.6349701958558046e-05, "loss": 0.0005304159596562386, "step": 224330 }, { "epoch": 63.67868294067556, "grad_norm": 0.08804529160261154, "learning_rate": 3.634686346863469e-05, "loss": 0.001593368500471115, "step": 224340 }, { "epoch": 63.68152143059892, "grad_norm": 1.0405584573745728, "learning_rate": 3.634402497871133e-05, "loss": 0.001332131028175354, "step": 224350 }, { "epoch": 63.68435992052228, "grad_norm": 0.06972651928663254, "learning_rate": 3.6341186488787964e-05, "loss": 0.0001269834116101265, "step": 224360 }, { "epoch": 63.687198410445646, "grad_norm": 0.3924393057823181, "learning_rate": 3.6338347998864605e-05, "loss": 0.002108054608106613, "step": 224370 }, { "epoch": 63.690036900369, "grad_norm": 0.16413863003253937, "learning_rate": 3.6335509508941247e-05, "loss": 0.0002925468608736992, "step": 224380 }, { "epoch": 63.692875390292365, "grad_norm": 0.13881585001945496, "learning_rate": 3.633267101901788e-05, "loss": 0.00034968480467796327, "step": 224390 }, { "epoch": 63.69571388021573, "grad_norm": 0.028677700087428093, "learning_rate": 3.632983252909452e-05, "loss": 0.0036365557461977005, "step": 224400 }, { "epoch": 63.69855237013908, "grad_norm": 0.01002742163836956, "learning_rate": 3.6326994039171164e-05, "loss": 0.0035696201026439666, "step": 224410 }, { "epoch": 63.701390860062446, "grad_norm": 0.02411208115518093, "learning_rate": 3.63241555492478e-05, "loss": 0.0016014823690056802, "step": 224420 }, { "epoch": 63.70422934998581, "grad_norm": 0.7594919204711914, "learning_rate": 3.632131705932444e-05, "loss": 0.004693600535392761, "step": 224430 }, { "epoch": 63.70706783990917, "grad_norm": 3.613719940185547, "learning_rate": 3.631847856940108e-05, "loss": 0.00524275004863739, "step": 224440 }, { "epoch": 63.70990632983253, "grad_norm": 0.03823316469788551, "learning_rate": 3.631564007947772e-05, "loss": 0.0004100177437067032, "step": 224450 }, { "epoch": 63.71274481975589, "grad_norm": 0.025408955290913582, "learning_rate": 3.631280158955436e-05, "loss": 0.00013522598892450332, "step": 224460 }, { "epoch": 63.71558330967925, "grad_norm": 0.04364224523305893, "learning_rate": 3.6309963099631e-05, "loss": 0.0012892208993434905, "step": 224470 }, { "epoch": 63.71842179960261, "grad_norm": 0.06201209872961044, "learning_rate": 3.630712460970764e-05, "loss": 0.00021735373884439467, "step": 224480 }, { "epoch": 63.72126028952597, "grad_norm": 0.010149204172194004, "learning_rate": 3.6304286119784275e-05, "loss": 5.117245018482208e-05, "step": 224490 }, { "epoch": 63.724098779449335, "grad_norm": 0.1567564606666565, "learning_rate": 3.6301447629860916e-05, "loss": 0.0018002957105636597, "step": 224500 }, { "epoch": 63.724098779449335, "eval_accuracy": 0.9844852800915623, "eval_loss": 0.05885354056954384, "eval_runtime": 49.0589, "eval_samples_per_second": 320.574, "eval_steps_per_second": 5.014, "step": 224500 }, { "epoch": 63.72693726937269, "grad_norm": 0.003187149064615369, "learning_rate": 3.629860913993756e-05, "loss": 5.84874302148819e-05, "step": 224510 }, { "epoch": 63.729775759296054, "grad_norm": 0.298933207988739, "learning_rate": 3.629577065001419e-05, "loss": 0.0001566428691148758, "step": 224520 }, { "epoch": 63.73261424921942, "grad_norm": 0.5070264339447021, "learning_rate": 3.629293216009083e-05, "loss": 0.002816251292824745, "step": 224530 }, { "epoch": 63.73545273914278, "grad_norm": 0.03221150487661362, "learning_rate": 3.6290093670167475e-05, "loss": 0.00029514040797948835, "step": 224540 }, { "epoch": 63.738291229066135, "grad_norm": 0.003269675187766552, "learning_rate": 3.628725518024411e-05, "loss": 5.558747798204422e-05, "step": 224550 }, { "epoch": 63.7411297189895, "grad_norm": 0.10791443288326263, "learning_rate": 3.628441669032075e-05, "loss": 0.0004965282976627349, "step": 224560 }, { "epoch": 63.74396820891286, "grad_norm": 0.004258415661752224, "learning_rate": 3.628157820039739e-05, "loss": 0.0005702352151274681, "step": 224570 }, { "epoch": 63.74680669883622, "grad_norm": 1.5285065174102783, "learning_rate": 3.6278739710474033e-05, "loss": 0.0004103695973753929, "step": 224580 }, { "epoch": 63.74964518875958, "grad_norm": 2.266052484512329, "learning_rate": 3.627590122055067e-05, "loss": 0.0006242839619517327, "step": 224590 }, { "epoch": 63.75248367868294, "grad_norm": 0.009700002148747444, "learning_rate": 3.627306273062731e-05, "loss": 0.0005669606849551201, "step": 224600 }, { "epoch": 63.7553221686063, "grad_norm": 0.006432169117033482, "learning_rate": 3.627022424070395e-05, "loss": 0.0018174093216657638, "step": 224610 }, { "epoch": 63.75816065852966, "grad_norm": 0.006445504259318113, "learning_rate": 3.6267385750780585e-05, "loss": 0.000273311510682106, "step": 224620 }, { "epoch": 63.760999148453024, "grad_norm": 0.0020076981745660305, "learning_rate": 3.626454726085723e-05, "loss": 8.074864745140075e-05, "step": 224630 }, { "epoch": 63.76383763837639, "grad_norm": 0.03169398754835129, "learning_rate": 3.626170877093387e-05, "loss": 0.009529957920312882, "step": 224640 }, { "epoch": 63.76667612829974, "grad_norm": 0.04488128796219826, "learning_rate": 3.62588702810105e-05, "loss": 0.0005085794255137444, "step": 224650 }, { "epoch": 63.769514618223106, "grad_norm": 0.006746875122189522, "learning_rate": 3.6256031791087144e-05, "loss": 0.0005354370921850204, "step": 224660 }, { "epoch": 63.77235310814647, "grad_norm": 0.00416474649682641, "learning_rate": 3.6253193301163785e-05, "loss": 0.0006691031157970428, "step": 224670 }, { "epoch": 63.775191598069824, "grad_norm": 0.2644629180431366, "learning_rate": 3.625035481124042e-05, "loss": 0.0005096295848488807, "step": 224680 }, { "epoch": 63.77803008799319, "grad_norm": 0.05218740552663803, "learning_rate": 3.624751632131706e-05, "loss": 0.00010052938014268876, "step": 224690 }, { "epoch": 63.78086857791655, "grad_norm": 0.0024009987246245146, "learning_rate": 3.62446778313937e-05, "loss": 0.0002214275300502777, "step": 224700 }, { "epoch": 63.783707067839906, "grad_norm": 0.0036517989356070757, "learning_rate": 3.624183934147034e-05, "loss": 3.4290365874767303e-05, "step": 224710 }, { "epoch": 63.78654555776327, "grad_norm": 0.15373070538043976, "learning_rate": 3.623900085154698e-05, "loss": 7.564220577478409e-05, "step": 224720 }, { "epoch": 63.78938404768663, "grad_norm": 0.18330538272857666, "learning_rate": 3.623616236162361e-05, "loss": 9.984970092773438e-05, "step": 224730 }, { "epoch": 63.792222537609995, "grad_norm": 0.02046150341629982, "learning_rate": 3.623332387170026e-05, "loss": 9.0823695063591e-05, "step": 224740 }, { "epoch": 63.79506102753335, "grad_norm": 0.0014243456535041332, "learning_rate": 3.6230485381776896e-05, "loss": 8.19990411400795e-05, "step": 224750 }, { "epoch": 63.79789951745671, "grad_norm": 0.003475220175459981, "learning_rate": 3.622764689185353e-05, "loss": 4.816558212041855e-05, "step": 224760 }, { "epoch": 63.800738007380076, "grad_norm": 0.0015404976438730955, "learning_rate": 3.622480840193018e-05, "loss": 8.128080517053605e-05, "step": 224770 }, { "epoch": 63.80357649730343, "grad_norm": 0.020994974300265312, "learning_rate": 3.6221969912006813e-05, "loss": 7.607415318489075e-05, "step": 224780 }, { "epoch": 63.806414987226795, "grad_norm": 0.019876224920153618, "learning_rate": 3.6219131422083455e-05, "loss": 0.00021308381110429764, "step": 224790 }, { "epoch": 63.80925347715016, "grad_norm": 0.07548193633556366, "learning_rate": 3.6216292932160096e-05, "loss": 5.884524434804916e-05, "step": 224800 }, { "epoch": 63.81209196707351, "grad_norm": 1.3211103677749634, "learning_rate": 3.621345444223673e-05, "loss": 0.0002534586936235428, "step": 224810 }, { "epoch": 63.814930456996876, "grad_norm": 0.03856966271996498, "learning_rate": 3.621061595231337e-05, "loss": 0.0002898089587688446, "step": 224820 }, { "epoch": 63.81776894692024, "grad_norm": 0.13476493954658508, "learning_rate": 3.620777746239001e-05, "loss": 0.006408035755157471, "step": 224830 }, { "epoch": 63.8206074368436, "grad_norm": 0.054856132715940475, "learning_rate": 3.620493897246665e-05, "loss": 7.304586470127105e-05, "step": 224840 }, { "epoch": 63.82344592676696, "grad_norm": 0.020975610241293907, "learning_rate": 3.620210048254329e-05, "loss": 0.00020775459706783294, "step": 224850 }, { "epoch": 63.82628441669032, "grad_norm": 0.0413040965795517, "learning_rate": 3.6199261992619924e-05, "loss": 6.19666650891304e-05, "step": 224860 }, { "epoch": 63.829122906613684, "grad_norm": 0.0599481500685215, "learning_rate": 3.619642350269657e-05, "loss": 0.00010822359472513199, "step": 224870 }, { "epoch": 63.83196139653704, "grad_norm": 0.003497365163639188, "learning_rate": 3.619358501277321e-05, "loss": 0.0007479816675186157, "step": 224880 }, { "epoch": 63.8347998864604, "grad_norm": 0.08746398985385895, "learning_rate": 3.619074652284984e-05, "loss": 0.00018949881196022033, "step": 224890 }, { "epoch": 63.837638376383765, "grad_norm": 0.03837602585554123, "learning_rate": 3.618790803292649e-05, "loss": 0.0021628165617585183, "step": 224900 }, { "epoch": 63.84047686630713, "grad_norm": 0.013880294747650623, "learning_rate": 3.6185069543003124e-05, "loss": 0.00047457292675971984, "step": 224910 }, { "epoch": 63.843315356230484, "grad_norm": 0.027006829157471657, "learning_rate": 3.6182231053079766e-05, "loss": 6.981436163187028e-05, "step": 224920 }, { "epoch": 63.84615384615385, "grad_norm": 0.007918504066765308, "learning_rate": 3.61793925631564e-05, "loss": 8.851196616888046e-05, "step": 224930 }, { "epoch": 63.84899233607721, "grad_norm": 2.3935978412628174, "learning_rate": 3.617655407323304e-05, "loss": 0.0005845023319125175, "step": 224940 }, { "epoch": 63.851830826000565, "grad_norm": 0.019875647500157356, "learning_rate": 3.617371558330968e-05, "loss": 0.00011670067906379699, "step": 224950 }, { "epoch": 63.85466931592393, "grad_norm": 0.011189409531652927, "learning_rate": 3.617087709338632e-05, "loss": 8.617490530014039e-05, "step": 224960 }, { "epoch": 63.85750780584729, "grad_norm": 0.03456037491559982, "learning_rate": 3.616803860346296e-05, "loss": 0.00010871570557355881, "step": 224970 }, { "epoch": 63.86034629577065, "grad_norm": 0.03251371160149574, "learning_rate": 3.61652001135396e-05, "loss": 0.0003281429409980774, "step": 224980 }, { "epoch": 63.86318478569401, "grad_norm": 1.0005983114242554, "learning_rate": 3.6162361623616235e-05, "loss": 0.0002167804166674614, "step": 224990 }, { "epoch": 63.86602327561737, "grad_norm": 0.23928476870059967, "learning_rate": 3.615952313369288e-05, "loss": 0.0010190337896347045, "step": 225000 }, { "epoch": 63.86602327561737, "eval_accuracy": 0.9832771666560692, "eval_loss": 0.07014112919569016, "eval_runtime": 44.7928, "eval_samples_per_second": 351.106, "eval_steps_per_second": 5.492, "step": 225000 }, { "epoch": 63.868861765540736, "grad_norm": 0.010867624543607235, "learning_rate": 3.615668464376952e-05, "loss": 0.004285103082656861, "step": 225010 }, { "epoch": 63.87170025546409, "grad_norm": 0.04609503597021103, "learning_rate": 3.615384615384615e-05, "loss": 0.0007820278406143188, "step": 225020 }, { "epoch": 63.874538745387454, "grad_norm": 0.0021405420266091824, "learning_rate": 3.6151007663922794e-05, "loss": 0.001198708452284336, "step": 225030 }, { "epoch": 63.87737723531082, "grad_norm": 0.01071204710751772, "learning_rate": 3.6148169173999435e-05, "loss": 7.256064563989639e-05, "step": 225040 }, { "epoch": 63.88021572523417, "grad_norm": 0.009485059417784214, "learning_rate": 3.6145330684076076e-05, "loss": 0.0027874434366822244, "step": 225050 }, { "epoch": 63.883054215157536, "grad_norm": 0.10715191811323166, "learning_rate": 3.614249219415271e-05, "loss": 0.004679053276777268, "step": 225060 }, { "epoch": 63.8858927050809, "grad_norm": 0.048328984528779984, "learning_rate": 3.613965370422935e-05, "loss": 0.0005381094291806221, "step": 225070 }, { "epoch": 63.888731195004254, "grad_norm": 2.129163980484009, "learning_rate": 3.6136815214305994e-05, "loss": 0.006461343169212342, "step": 225080 }, { "epoch": 63.89156968492762, "grad_norm": 0.0027747699059545994, "learning_rate": 3.613397672438263e-05, "loss": 0.0019228605553507805, "step": 225090 }, { "epoch": 63.89440817485098, "grad_norm": 0.04530743882060051, "learning_rate": 3.613113823445927e-05, "loss": 0.0003502938896417618, "step": 225100 }, { "epoch": 63.89724666477434, "grad_norm": 0.03588385879993439, "learning_rate": 3.612829974453591e-05, "loss": 0.0011235028505325317, "step": 225110 }, { "epoch": 63.9000851546977, "grad_norm": 0.014386672526597977, "learning_rate": 3.6125461254612546e-05, "loss": 0.0013135559856891632, "step": 225120 }, { "epoch": 63.90292364462106, "grad_norm": 0.004599182866513729, "learning_rate": 3.612262276468919e-05, "loss": 0.004383261501789093, "step": 225130 }, { "epoch": 63.905762134544425, "grad_norm": 1.6082789897918701, "learning_rate": 3.611978427476583e-05, "loss": 0.0004390498623251915, "step": 225140 }, { "epoch": 63.90860062446778, "grad_norm": 0.15213264524936676, "learning_rate": 3.611694578484246e-05, "loss": 9.514503180980682e-05, "step": 225150 }, { "epoch": 63.91143911439114, "grad_norm": 0.01268579438328743, "learning_rate": 3.6114107294919104e-05, "loss": 0.0007636262103915215, "step": 225160 }, { "epoch": 63.914277604314506, "grad_norm": 0.16453541815280914, "learning_rate": 3.6111268804995746e-05, "loss": 0.0012014348059892655, "step": 225170 }, { "epoch": 63.91711609423786, "grad_norm": 0.020700842142105103, "learning_rate": 3.610843031507238e-05, "loss": 0.0005205659195780754, "step": 225180 }, { "epoch": 63.919954584161225, "grad_norm": 0.005668096709996462, "learning_rate": 3.610559182514902e-05, "loss": 0.00021029040217399598, "step": 225190 }, { "epoch": 63.92279307408459, "grad_norm": 0.025449303910136223, "learning_rate": 3.610275333522566e-05, "loss": 0.0004798643290996552, "step": 225200 }, { "epoch": 63.92563156400795, "grad_norm": 0.024966223165392876, "learning_rate": 3.6099914845302304e-05, "loss": 0.0010650152340531348, "step": 225210 }, { "epoch": 63.928470053931306, "grad_norm": 0.022844385355710983, "learning_rate": 3.609707635537894e-05, "loss": 6.659571081399918e-05, "step": 225220 }, { "epoch": 63.93130854385467, "grad_norm": 0.010741055943071842, "learning_rate": 3.6094237865455574e-05, "loss": 0.0002574903890490532, "step": 225230 }, { "epoch": 63.93414703377803, "grad_norm": 2.1419639587402344, "learning_rate": 3.609139937553222e-05, "loss": 0.0006779827177524567, "step": 225240 }, { "epoch": 63.93698552370139, "grad_norm": 0.07790601253509521, "learning_rate": 3.6088560885608856e-05, "loss": 0.003174135833978653, "step": 225250 }, { "epoch": 63.93982401362475, "grad_norm": 0.004709932953119278, "learning_rate": 3.60857223956855e-05, "loss": 0.00010562427341938019, "step": 225260 }, { "epoch": 63.942662503548114, "grad_norm": 0.2716752886772156, "learning_rate": 3.608288390576214e-05, "loss": 0.00017131827771663666, "step": 225270 }, { "epoch": 63.94550099347148, "grad_norm": 0.004092082846909761, "learning_rate": 3.6080045415838774e-05, "loss": 0.0020520150661468505, "step": 225280 }, { "epoch": 63.94833948339483, "grad_norm": 0.009406388737261295, "learning_rate": 3.6077206925915415e-05, "loss": 7.794853299856186e-05, "step": 225290 }, { "epoch": 63.951177973318195, "grad_norm": 0.04778384417295456, "learning_rate": 3.6074368435992057e-05, "loss": 0.00011675748974084854, "step": 225300 }, { "epoch": 63.95401646324156, "grad_norm": 0.1012733206152916, "learning_rate": 3.607152994606869e-05, "loss": 0.0010896002873778342, "step": 225310 }, { "epoch": 63.956854953164914, "grad_norm": 0.009880089201033115, "learning_rate": 3.606869145614533e-05, "loss": 0.0004702012985944748, "step": 225320 }, { "epoch": 63.95969344308828, "grad_norm": 12.160265922546387, "learning_rate": 3.606585296622197e-05, "loss": 0.003427032381296158, "step": 225330 }, { "epoch": 63.96253193301164, "grad_norm": 2.0546326637268066, "learning_rate": 3.6063014476298615e-05, "loss": 0.0006543120369315147, "step": 225340 }, { "epoch": 63.965370422934996, "grad_norm": 0.00652508158236742, "learning_rate": 3.606017598637525e-05, "loss": 8.605122566223145e-05, "step": 225350 }, { "epoch": 63.96820891285836, "grad_norm": 0.010560959577560425, "learning_rate": 3.6057337496451884e-05, "loss": 7.430631667375565e-05, "step": 225360 }, { "epoch": 63.97104740278172, "grad_norm": 0.1021939292550087, "learning_rate": 3.605449900652853e-05, "loss": 0.00018622148782014846, "step": 225370 }, { "epoch": 63.973885892705084, "grad_norm": 0.06191571056842804, "learning_rate": 3.605166051660517e-05, "loss": 0.00026240777224302293, "step": 225380 }, { "epoch": 63.97672438262844, "grad_norm": 0.004847110249102116, "learning_rate": 3.604882202668181e-05, "loss": 0.0025774961337447166, "step": 225390 }, { "epoch": 63.9795628725518, "grad_norm": 0.010638736188411713, "learning_rate": 3.604598353675845e-05, "loss": 0.0016166066750884056, "step": 225400 }, { "epoch": 63.982401362475166, "grad_norm": 0.038888558745384216, "learning_rate": 3.6043145046835085e-05, "loss": 0.0013660388067364693, "step": 225410 }, { "epoch": 63.98523985239852, "grad_norm": 0.023188795894384384, "learning_rate": 3.6040306556911726e-05, "loss": 9.237565100193024e-05, "step": 225420 }, { "epoch": 63.988078342321884, "grad_norm": 1.739241600036621, "learning_rate": 3.603746806698836e-05, "loss": 0.0004034653306007385, "step": 225430 }, { "epoch": 63.99091683224525, "grad_norm": 1.3505157232284546, "learning_rate": 3.6034629577065e-05, "loss": 0.012605640292167663, "step": 225440 }, { "epoch": 63.9937553221686, "grad_norm": 0.1039498969912529, "learning_rate": 3.603179108714164e-05, "loss": 0.0010197168216109277, "step": 225450 }, { "epoch": 63.996593812091966, "grad_norm": 0.04030343145132065, "learning_rate": 3.602895259721828e-05, "loss": 0.0020701931789517403, "step": 225460 }, { "epoch": 63.99943230201533, "grad_norm": 0.06600844860076904, "learning_rate": 3.6026114107294926e-05, "loss": 0.0017214320600032807, "step": 225470 }, { "epoch": 64.00227079193868, "grad_norm": 0.46610960364341736, "learning_rate": 3.602327561737156e-05, "loss": 0.0017236871644854546, "step": 225480 }, { "epoch": 64.00510928186205, "grad_norm": 0.03723447024822235, "learning_rate": 3.6020437127448195e-05, "loss": 0.0009357629343867302, "step": 225490 }, { "epoch": 64.00794777178541, "grad_norm": 0.13671335577964783, "learning_rate": 3.601759863752484e-05, "loss": 0.001969505473971367, "step": 225500 }, { "epoch": 64.00794777178541, "eval_accuracy": 0.9826413174794939, "eval_loss": 0.07408466935157776, "eval_runtime": 46.5749, "eval_samples_per_second": 337.671, "eval_steps_per_second": 5.282, "step": 225500 }, { "epoch": 64.01078626170877, "grad_norm": 0.5087496638298035, "learning_rate": 3.601476014760148e-05, "loss": 0.00020696185529232026, "step": 225510 }, { "epoch": 64.01362475163214, "grad_norm": 0.004378661513328552, "learning_rate": 3.601192165767812e-05, "loss": 0.0002329351380467415, "step": 225520 }, { "epoch": 64.0164632415555, "grad_norm": 0.4764362871646881, "learning_rate": 3.6009083167754754e-05, "loss": 0.0005558650940656662, "step": 225530 }, { "epoch": 64.01930173147885, "grad_norm": 0.04167768731713295, "learning_rate": 3.6006244677831395e-05, "loss": 0.0011650925502181054, "step": 225540 }, { "epoch": 64.02214022140221, "grad_norm": 0.05494198575615883, "learning_rate": 3.600340618790804e-05, "loss": 0.00039858035743236544, "step": 225550 }, { "epoch": 64.02497871132557, "grad_norm": 0.004862065427005291, "learning_rate": 3.600056769798467e-05, "loss": 0.00037061553448438647, "step": 225560 }, { "epoch": 64.02781720124894, "grad_norm": 0.014502539299428463, "learning_rate": 3.599772920806131e-05, "loss": 0.006218323111534118, "step": 225570 }, { "epoch": 64.0306556911723, "grad_norm": 0.014066819101572037, "learning_rate": 3.5994890718137954e-05, "loss": 0.00038680024445056913, "step": 225580 }, { "epoch": 64.03349418109566, "grad_norm": 0.10140771418809891, "learning_rate": 3.599205222821459e-05, "loss": 0.0002055346965789795, "step": 225590 }, { "epoch": 64.03633267101901, "grad_norm": 0.03113376349210739, "learning_rate": 3.598921373829123e-05, "loss": 0.0005363274365663528, "step": 225600 }, { "epoch": 64.03917116094237, "grad_norm": 0.4639342129230499, "learning_rate": 3.598637524836787e-05, "loss": 0.0005288006737828255, "step": 225610 }, { "epoch": 64.04200965086574, "grad_norm": 0.008506138809025288, "learning_rate": 3.5983536758444506e-05, "loss": 0.0007438719272613526, "step": 225620 }, { "epoch": 64.0448481407891, "grad_norm": 0.6661307215690613, "learning_rate": 3.598069826852115e-05, "loss": 0.0003421302884817123, "step": 225630 }, { "epoch": 64.04768663071246, "grad_norm": 0.047744568437337875, "learning_rate": 3.597785977859779e-05, "loss": 0.0002605196088552475, "step": 225640 }, { "epoch": 64.05052512063583, "grad_norm": 0.08439138531684875, "learning_rate": 3.597502128867442e-05, "loss": 0.0020632984116673468, "step": 225650 }, { "epoch": 64.05336361055919, "grad_norm": 0.02229916676878929, "learning_rate": 3.5972182798751065e-05, "loss": 0.0003127988427877426, "step": 225660 }, { "epoch": 64.05620210048254, "grad_norm": 0.04644990339875221, "learning_rate": 3.5969344308827706e-05, "loss": 8.617490530014039e-05, "step": 225670 }, { "epoch": 64.0590405904059, "grad_norm": 0.006993117276579142, "learning_rate": 3.596650581890435e-05, "loss": 0.0001124262809753418, "step": 225680 }, { "epoch": 64.06187908032926, "grad_norm": 0.17304487526416779, "learning_rate": 3.596366732898098e-05, "loss": 0.00036674104630947114, "step": 225690 }, { "epoch": 64.06471757025263, "grad_norm": 0.10672484338283539, "learning_rate": 3.5960828839057623e-05, "loss": 0.0001567533239722252, "step": 225700 }, { "epoch": 64.06755606017599, "grad_norm": 0.010858551599085331, "learning_rate": 3.5957990349134265e-05, "loss": 0.0002669155597686768, "step": 225710 }, { "epoch": 64.07039455009935, "grad_norm": 16.996131896972656, "learning_rate": 3.59551518592109e-05, "loss": 0.00431799404323101, "step": 225720 }, { "epoch": 64.07323304002271, "grad_norm": 0.2199944704771042, "learning_rate": 3.595231336928754e-05, "loss": 0.00023594629019498825, "step": 225730 }, { "epoch": 64.07607152994606, "grad_norm": 0.016196299344301224, "learning_rate": 3.594947487936418e-05, "loss": 0.0010593898594379425, "step": 225740 }, { "epoch": 64.07891001986943, "grad_norm": 0.2817826569080353, "learning_rate": 3.594663638944082e-05, "loss": 9.153168648481368e-05, "step": 225750 }, { "epoch": 64.08174850979279, "grad_norm": 0.005841007921844721, "learning_rate": 3.594379789951746e-05, "loss": 0.000455637089908123, "step": 225760 }, { "epoch": 64.08458699971615, "grad_norm": 0.01923115737736225, "learning_rate": 3.59409594095941e-05, "loss": 0.0002074098214507103, "step": 225770 }, { "epoch": 64.08742548963951, "grad_norm": 0.2590380311012268, "learning_rate": 3.5938120919670734e-05, "loss": 0.00011115279048681259, "step": 225780 }, { "epoch": 64.09026397956288, "grad_norm": 0.0018160424660891294, "learning_rate": 3.5935282429747375e-05, "loss": 0.0005535282194614411, "step": 225790 }, { "epoch": 64.09310246948624, "grad_norm": 16.935123443603516, "learning_rate": 3.593244393982402e-05, "loss": 0.010632209479808807, "step": 225800 }, { "epoch": 64.09594095940959, "grad_norm": 0.14901544153690338, "learning_rate": 3.592960544990066e-05, "loss": 0.0006383992731571198, "step": 225810 }, { "epoch": 64.09877944933295, "grad_norm": 0.003933839499950409, "learning_rate": 3.592676695997729e-05, "loss": 0.0003089135512709618, "step": 225820 }, { "epoch": 64.10161793925631, "grad_norm": 0.21759019792079926, "learning_rate": 3.5923928470053934e-05, "loss": 0.00013762153685092926, "step": 225830 }, { "epoch": 64.10445642917968, "grad_norm": 0.0026126953307539225, "learning_rate": 3.5921089980130576e-05, "loss": 0.004290907084941864, "step": 225840 }, { "epoch": 64.10729491910304, "grad_norm": 0.02140917256474495, "learning_rate": 3.591825149020721e-05, "loss": 0.0009180380031466485, "step": 225850 }, { "epoch": 64.1101334090264, "grad_norm": 0.0011922663543373346, "learning_rate": 3.591541300028385e-05, "loss": 0.01768902540206909, "step": 225860 }, { "epoch": 64.11297189894975, "grad_norm": 0.007855457253754139, "learning_rate": 3.591257451036049e-05, "loss": 0.0006170757114887238, "step": 225870 }, { "epoch": 64.11581038887311, "grad_norm": 0.002629626542329788, "learning_rate": 3.590973602043713e-05, "loss": 0.0005152890458703041, "step": 225880 }, { "epoch": 64.11864887879648, "grad_norm": 0.024713998660445213, "learning_rate": 3.590689753051377e-05, "loss": 0.0008780913427472115, "step": 225890 }, { "epoch": 64.12148736871984, "grad_norm": 3.6366183757781982, "learning_rate": 3.590405904059041e-05, "loss": 0.0007498130202293396, "step": 225900 }, { "epoch": 64.1243258586432, "grad_norm": 0.03947173058986664, "learning_rate": 3.5901220550667045e-05, "loss": 0.000869065523147583, "step": 225910 }, { "epoch": 64.12716434856657, "grad_norm": 0.0011671328684315085, "learning_rate": 3.5898382060743686e-05, "loss": 0.007079482078552246, "step": 225920 }, { "epoch": 64.13000283848993, "grad_norm": 0.012505077756941319, "learning_rate": 3.589554357082033e-05, "loss": 0.00030378829687833787, "step": 225930 }, { "epoch": 64.13284132841328, "grad_norm": 0.062497805804014206, "learning_rate": 3.589270508089697e-05, "loss": 0.0001003773882985115, "step": 225940 }, { "epoch": 64.13567981833664, "grad_norm": 0.030412262305617332, "learning_rate": 3.5889866590973604e-05, "loss": 0.002634565159678459, "step": 225950 }, { "epoch": 64.13851830826, "grad_norm": 0.06203471124172211, "learning_rate": 3.588702810105024e-05, "loss": 0.002508731745183468, "step": 225960 }, { "epoch": 64.14135679818337, "grad_norm": 0.11018843203783035, "learning_rate": 3.5884189611126886e-05, "loss": 0.0002351822331547737, "step": 225970 }, { "epoch": 64.14419528810673, "grad_norm": 0.03824147582054138, "learning_rate": 3.588135112120352e-05, "loss": 0.00013460982590913774, "step": 225980 }, { "epoch": 64.14703377803009, "grad_norm": 0.008380926214158535, "learning_rate": 3.587851263128016e-05, "loss": 8.710864931344986e-05, "step": 225990 }, { "epoch": 64.14987226795346, "grad_norm": 0.01275953184813261, "learning_rate": 3.5875674141356804e-05, "loss": 7.192548364400863e-05, "step": 226000 }, { "epoch": 64.14987226795346, "eval_accuracy": 0.9843581102562472, "eval_loss": 0.06285674124956131, "eval_runtime": 36.7878, "eval_samples_per_second": 427.506, "eval_steps_per_second": 6.687, "step": 226000 }, { "epoch": 64.1527107578768, "grad_norm": 0.003705005394294858, "learning_rate": 3.587283565143344e-05, "loss": 0.0002783382311463356, "step": 226010 }, { "epoch": 64.15554924780017, "grad_norm": 1.1339902877807617, "learning_rate": 3.586999716151008e-05, "loss": 0.0003190714865922928, "step": 226020 }, { "epoch": 64.15838773772353, "grad_norm": 0.12306306511163712, "learning_rate": 3.586715867158672e-05, "loss": 0.00018194280564785004, "step": 226030 }, { "epoch": 64.16122622764689, "grad_norm": 0.0037642696406692266, "learning_rate": 3.5864320181663356e-05, "loss": 0.00017664115875959396, "step": 226040 }, { "epoch": 64.16406471757026, "grad_norm": 0.029360685497522354, "learning_rate": 3.586148169174e-05, "loss": 0.0026428427547216414, "step": 226050 }, { "epoch": 64.16690320749362, "grad_norm": 0.011315743438899517, "learning_rate": 3.585864320181663e-05, "loss": 5.4798834025859835e-05, "step": 226060 }, { "epoch": 64.16974169741698, "grad_norm": 0.03758443892002106, "learning_rate": 3.585580471189327e-05, "loss": 0.0001937897875905037, "step": 226070 }, { "epoch": 64.17258018734033, "grad_norm": 0.0032832962460815907, "learning_rate": 3.5852966221969914e-05, "loss": 9.564105421304703e-05, "step": 226080 }, { "epoch": 64.17541867726369, "grad_norm": 0.048827510327100754, "learning_rate": 3.585012773204655e-05, "loss": 0.0001763245090842247, "step": 226090 }, { "epoch": 64.17825716718706, "grad_norm": 0.002665166277438402, "learning_rate": 3.58472892421232e-05, "loss": 0.00010263659060001373, "step": 226100 }, { "epoch": 64.18109565711042, "grad_norm": 0.0036302912048995495, "learning_rate": 3.584445075219983e-05, "loss": 8.35038721561432e-05, "step": 226110 }, { "epoch": 64.18393414703378, "grad_norm": 0.008067223243415356, "learning_rate": 3.5841612262276466e-05, "loss": 0.00016250498592853546, "step": 226120 }, { "epoch": 64.18677263695714, "grad_norm": 0.008607402443885803, "learning_rate": 3.5838773772353114e-05, "loss": 0.001415404863655567, "step": 226130 }, { "epoch": 64.1896111268805, "grad_norm": 0.006386205554008484, "learning_rate": 3.583593528242975e-05, "loss": 0.00018900521099567412, "step": 226140 }, { "epoch": 64.19244961680386, "grad_norm": 0.006216506008058786, "learning_rate": 3.583309679250639e-05, "loss": 7.067676633596421e-05, "step": 226150 }, { "epoch": 64.19528810672722, "grad_norm": 0.061448611319065094, "learning_rate": 3.5830258302583025e-05, "loss": 0.00015538223087787628, "step": 226160 }, { "epoch": 64.19812659665058, "grad_norm": 0.04128129035234451, "learning_rate": 3.5827419812659666e-05, "loss": 0.00010969638824462891, "step": 226170 }, { "epoch": 64.20096508657394, "grad_norm": 0.01882593147456646, "learning_rate": 3.582458132273631e-05, "loss": 0.00010655354708433151, "step": 226180 }, { "epoch": 64.20380357649731, "grad_norm": 0.04612499848008156, "learning_rate": 3.582174283281294e-05, "loss": 5.861129611730576e-05, "step": 226190 }, { "epoch": 64.20664206642067, "grad_norm": 0.0009653490851633251, "learning_rate": 3.5818904342889584e-05, "loss": 4.461389034986496e-05, "step": 226200 }, { "epoch": 64.20948055634402, "grad_norm": 0.0845797136425972, "learning_rate": 3.5816065852966225e-05, "loss": 3.266762942075729e-05, "step": 226210 }, { "epoch": 64.21231904626738, "grad_norm": 0.06779606640338898, "learning_rate": 3.581322736304286e-05, "loss": 0.0001915697008371353, "step": 226220 }, { "epoch": 64.21515753619074, "grad_norm": 0.05285625904798508, "learning_rate": 3.581038887311951e-05, "loss": 4.7458335757255554e-05, "step": 226230 }, { "epoch": 64.21799602611411, "grad_norm": 0.0022340198047459126, "learning_rate": 3.580755038319614e-05, "loss": 5.8879517018795015e-05, "step": 226240 }, { "epoch": 64.22083451603747, "grad_norm": 0.02364414557814598, "learning_rate": 3.580471189327278e-05, "loss": 3.9838626980781557e-05, "step": 226250 }, { "epoch": 64.22367300596083, "grad_norm": 0.0037302605342119932, "learning_rate": 3.580187340334942e-05, "loss": 0.0003961745649576187, "step": 226260 }, { "epoch": 64.2265114958842, "grad_norm": 0.15007472038269043, "learning_rate": 3.579903491342606e-05, "loss": 6.41901046037674e-05, "step": 226270 }, { "epoch": 64.22934998580754, "grad_norm": 0.003121336456388235, "learning_rate": 3.57961964235027e-05, "loss": 5.142576992511749e-05, "step": 226280 }, { "epoch": 64.23218847573091, "grad_norm": 0.01647241599857807, "learning_rate": 3.5793357933579336e-05, "loss": 8.020941168069839e-05, "step": 226290 }, { "epoch": 64.23502696565427, "grad_norm": 0.0028450125828385353, "learning_rate": 3.579051944365598e-05, "loss": 7.077232003211975e-05, "step": 226300 }, { "epoch": 64.23786545557763, "grad_norm": 0.009754850529134274, "learning_rate": 3.578768095373262e-05, "loss": 3.197845071554184e-05, "step": 226310 }, { "epoch": 64.240703945501, "grad_norm": 0.002091800095513463, "learning_rate": 3.578484246380925e-05, "loss": 4.798993468284607e-05, "step": 226320 }, { "epoch": 64.24354243542436, "grad_norm": 0.03630191087722778, "learning_rate": 3.5782003973885895e-05, "loss": 6.313025951385499e-05, "step": 226330 }, { "epoch": 64.24638092534771, "grad_norm": 0.002956799929961562, "learning_rate": 3.5779165483962536e-05, "loss": 3.303494304418564e-05, "step": 226340 }, { "epoch": 64.24921941527107, "grad_norm": 0.0015176464803516865, "learning_rate": 3.577632699403917e-05, "loss": 2.6148185133934022e-05, "step": 226350 }, { "epoch": 64.25205790519443, "grad_norm": 0.0040540387853980064, "learning_rate": 3.577348850411581e-05, "loss": 3.284793347120285e-05, "step": 226360 }, { "epoch": 64.2548963951178, "grad_norm": 0.001634802669286728, "learning_rate": 3.577065001419245e-05, "loss": 3.814715892076492e-05, "step": 226370 }, { "epoch": 64.25773488504116, "grad_norm": 0.0009367931634187698, "learning_rate": 3.576781152426909e-05, "loss": 2.283640205860138e-05, "step": 226380 }, { "epoch": 64.26057337496452, "grad_norm": 0.01573038287460804, "learning_rate": 3.576497303434573e-05, "loss": 3.502797335386276e-05, "step": 226390 }, { "epoch": 64.26341186488789, "grad_norm": 0.004221149254590273, "learning_rate": 3.576213454442237e-05, "loss": 3.5263970494270326e-05, "step": 226400 }, { "epoch": 64.26625035481123, "grad_norm": 0.03273700550198555, "learning_rate": 3.5759296054499005e-05, "loss": 6.596446037292481e-05, "step": 226410 }, { "epoch": 64.2690888447346, "grad_norm": 0.011357300914824009, "learning_rate": 3.5756457564575647e-05, "loss": 2.6952661573886872e-05, "step": 226420 }, { "epoch": 64.27192733465796, "grad_norm": 0.010726110078394413, "learning_rate": 3.575361907465229e-05, "loss": 6.874799728393555e-05, "step": 226430 }, { "epoch": 64.27476582458132, "grad_norm": 0.03872162848711014, "learning_rate": 3.575078058472893e-05, "loss": 6.034336984157562e-05, "step": 226440 }, { "epoch": 64.27760431450469, "grad_norm": 0.000707004452124238, "learning_rate": 3.5747942094805564e-05, "loss": 3.871787339448929e-05, "step": 226450 }, { "epoch": 64.28044280442805, "grad_norm": 0.004787338897585869, "learning_rate": 3.5745103604882205e-05, "loss": 2.4460814893245696e-05, "step": 226460 }, { "epoch": 64.28328129435141, "grad_norm": 0.004501992836594582, "learning_rate": 3.574226511495885e-05, "loss": 6.189253181219101e-05, "step": 226470 }, { "epoch": 64.28611978427476, "grad_norm": 0.0021113737020641565, "learning_rate": 3.573942662503548e-05, "loss": 4.7655776143074036e-05, "step": 226480 }, { "epoch": 64.28895827419812, "grad_norm": 0.015805982053279877, "learning_rate": 3.573658813511212e-05, "loss": 5.422048270702362e-05, "step": 226490 }, { "epoch": 64.29179676412149, "grad_norm": 0.005963001400232315, "learning_rate": 3.5733749645188764e-05, "loss": 7.36784189939499e-05, "step": 226500 }, { "epoch": 64.29179676412149, "eval_accuracy": 0.9879824505627265, "eval_loss": 0.05113416537642479, "eval_runtime": 46.4583, "eval_samples_per_second": 338.519, "eval_steps_per_second": 5.295, "step": 226500 }, { "epoch": 64.29463525404485, "grad_norm": 0.004738843534141779, "learning_rate": 3.57309111552654e-05, "loss": 6.0627982020378116e-05, "step": 226510 }, { "epoch": 64.29747374396821, "grad_norm": 0.002480061026290059, "learning_rate": 3.572807266534204e-05, "loss": 1.8629617989063264e-05, "step": 226520 }, { "epoch": 64.30031223389157, "grad_norm": 0.004556250758469105, "learning_rate": 3.572523417541868e-05, "loss": 4.794169217348099e-05, "step": 226530 }, { "epoch": 64.30315072381494, "grad_norm": 0.0009993930580094457, "learning_rate": 3.5722395685495316e-05, "loss": 2.1581724286079407e-05, "step": 226540 }, { "epoch": 64.30598921373829, "grad_norm": 0.0035363698843866587, "learning_rate": 3.571955719557196e-05, "loss": 0.00011793766170740128, "step": 226550 }, { "epoch": 64.30882770366165, "grad_norm": 0.004771962761878967, "learning_rate": 3.571671870564859e-05, "loss": 0.00019725430756807328, "step": 226560 }, { "epoch": 64.31166619358501, "grad_norm": 0.12345574796199799, "learning_rate": 3.571388021572524e-05, "loss": 4.2399391531944275e-05, "step": 226570 }, { "epoch": 64.31450468350837, "grad_norm": 17.906030654907227, "learning_rate": 3.571132557479421e-05, "loss": 0.015534818172454834, "step": 226580 }, { "epoch": 64.31734317343174, "grad_norm": 0.004674597177654505, "learning_rate": 3.570848708487085e-05, "loss": 0.0002201749011874199, "step": 226590 }, { "epoch": 64.3201816633551, "grad_norm": 0.0030662131030112505, "learning_rate": 3.570564859494749e-05, "loss": 0.0008298562839627266, "step": 226600 }, { "epoch": 64.32302015327845, "grad_norm": 0.013894844800233841, "learning_rate": 3.570281010502413e-05, "loss": 0.00014498159289360045, "step": 226610 }, { "epoch": 64.32585864320181, "grad_norm": 0.002875794656574726, "learning_rate": 3.569997161510077e-05, "loss": 0.00020494237542152406, "step": 226620 }, { "epoch": 64.32869713312517, "grad_norm": 0.1503196507692337, "learning_rate": 3.5697133125177407e-05, "loss": 0.0001971770077943802, "step": 226630 }, { "epoch": 64.33153562304854, "grad_norm": 0.17098578810691833, "learning_rate": 3.569429463525404e-05, "loss": 0.007326842844486236, "step": 226640 }, { "epoch": 64.3343741129719, "grad_norm": 0.00770386541262269, "learning_rate": 3.569145614533069e-05, "loss": 0.00010344311594963073, "step": 226650 }, { "epoch": 64.33721260289526, "grad_norm": 0.004303898196667433, "learning_rate": 3.5688617655407324e-05, "loss": 0.0006768515333533287, "step": 226660 }, { "epoch": 64.34005109281863, "grad_norm": 0.6247149109840393, "learning_rate": 3.5685779165483965e-05, "loss": 0.0008695447817444801, "step": 226670 }, { "epoch": 64.34288958274198, "grad_norm": 0.009323811158537865, "learning_rate": 3.568294067556061e-05, "loss": 0.00014857277274131774, "step": 226680 }, { "epoch": 64.34572807266534, "grad_norm": 0.09375907480716705, "learning_rate": 3.568010218563724e-05, "loss": 0.00013779625296592713, "step": 226690 }, { "epoch": 64.3485665625887, "grad_norm": 0.0042060501873493195, "learning_rate": 3.567726369571388e-05, "loss": 6.88387081027031e-05, "step": 226700 }, { "epoch": 64.35140505251206, "grad_norm": 0.003253744915127754, "learning_rate": 3.5674425205790524e-05, "loss": 0.0009230762720108032, "step": 226710 }, { "epoch": 64.35424354243543, "grad_norm": 0.06598623842000961, "learning_rate": 3.567158671586716e-05, "loss": 0.0002519242465496063, "step": 226720 }, { "epoch": 64.35708203235879, "grad_norm": 0.03663512319326401, "learning_rate": 3.56687482259438e-05, "loss": 0.0006474863737821579, "step": 226730 }, { "epoch": 64.35992052228215, "grad_norm": 0.16667713224887848, "learning_rate": 3.5665909736020435e-05, "loss": 0.00017526280134916306, "step": 226740 }, { "epoch": 64.3627590122055, "grad_norm": 0.14835238456726074, "learning_rate": 3.566307124609708e-05, "loss": 8.949879556894303e-05, "step": 226750 }, { "epoch": 64.36559750212886, "grad_norm": 0.046244870871305466, "learning_rate": 3.566023275617372e-05, "loss": 0.004275527596473694, "step": 226760 }, { "epoch": 64.36843599205223, "grad_norm": 0.0150996008887887, "learning_rate": 3.565739426625035e-05, "loss": 0.000367104634642601, "step": 226770 }, { "epoch": 64.37127448197559, "grad_norm": 0.004365265369415283, "learning_rate": 3.5654555776327e-05, "loss": 0.00043144989758729934, "step": 226780 }, { "epoch": 64.37411297189895, "grad_norm": 0.13210338354110718, "learning_rate": 3.5651717286403635e-05, "loss": 0.012922447919845582, "step": 226790 }, { "epoch": 64.37695146182232, "grad_norm": 12.901873588562012, "learning_rate": 3.5648878796480276e-05, "loss": 0.005135950446128845, "step": 226800 }, { "epoch": 64.37978995174566, "grad_norm": 0.06587667763233185, "learning_rate": 3.564604030655692e-05, "loss": 5.7058967649936675e-05, "step": 226810 }, { "epoch": 64.38262844166903, "grad_norm": 0.0127865606918931, "learning_rate": 3.564320181663355e-05, "loss": 0.0003233179450035095, "step": 226820 }, { "epoch": 64.38546693159239, "grad_norm": 0.2543513774871826, "learning_rate": 3.5640363326710193e-05, "loss": 0.00017877724021673203, "step": 226830 }, { "epoch": 64.38830542151575, "grad_norm": 0.08255007117986679, "learning_rate": 3.563752483678683e-05, "loss": 0.0003289893269538879, "step": 226840 }, { "epoch": 64.39114391143912, "grad_norm": 0.012037333101034164, "learning_rate": 3.563468634686347e-05, "loss": 7.800515741109847e-05, "step": 226850 }, { "epoch": 64.39398240136248, "grad_norm": 0.08622623980045319, "learning_rate": 3.563184785694011e-05, "loss": 8.860956877470017e-05, "step": 226860 }, { "epoch": 64.39682089128584, "grad_norm": 1.1056269407272339, "learning_rate": 3.5629009367016745e-05, "loss": 0.00041505005210638044, "step": 226870 }, { "epoch": 64.39965938120919, "grad_norm": 0.15025755763053894, "learning_rate": 3.562617087709339e-05, "loss": 0.0001433519646525383, "step": 226880 }, { "epoch": 64.40249787113255, "grad_norm": 0.024958165362477303, "learning_rate": 3.562333238717003e-05, "loss": 0.008951841294765473, "step": 226890 }, { "epoch": 64.40533636105592, "grad_norm": 0.08321648836135864, "learning_rate": 3.562049389724666e-05, "loss": 0.006598171591758728, "step": 226900 }, { "epoch": 64.40817485097928, "grad_norm": 8.980705261230469, "learning_rate": 3.561765540732331e-05, "loss": 0.004862837493419647, "step": 226910 }, { "epoch": 64.41101334090264, "grad_norm": 0.01655547507107258, "learning_rate": 3.5614816917399945e-05, "loss": 0.0003244034945964813, "step": 226920 }, { "epoch": 64.413851830826, "grad_norm": 0.057542189955711365, "learning_rate": 3.561197842747658e-05, "loss": 0.0031207768246531487, "step": 226930 }, { "epoch": 64.41669032074937, "grad_norm": 0.008160839788615704, "learning_rate": 3.560913993755322e-05, "loss": 0.00023453645408153534, "step": 226940 }, { "epoch": 64.41952881067272, "grad_norm": 0.0037137630861252546, "learning_rate": 3.560630144762986e-05, "loss": 0.000300256721675396, "step": 226950 }, { "epoch": 64.42236730059608, "grad_norm": 1.2314366102218628, "learning_rate": 3.5603462957706504e-05, "loss": 0.0006433252245187759, "step": 226960 }, { "epoch": 64.42520579051944, "grad_norm": 0.005933646112680435, "learning_rate": 3.560062446778314e-05, "loss": 0.0002492886036634445, "step": 226970 }, { "epoch": 64.4280442804428, "grad_norm": 0.004996987525373697, "learning_rate": 3.559778597785978e-05, "loss": 0.0024825382977724077, "step": 226980 }, { "epoch": 64.43088277036617, "grad_norm": 0.19591942429542542, "learning_rate": 3.559494748793642e-05, "loss": 0.0003467494621872902, "step": 226990 }, { "epoch": 64.43372126028953, "grad_norm": 0.011670767329633236, "learning_rate": 3.5592108998013056e-05, "loss": 0.0006520634517073632, "step": 227000 }, { "epoch": 64.43372126028953, "eval_accuracy": 0.9844216951739048, "eval_loss": 0.06608280539512634, "eval_runtime": 35.8338, "eval_samples_per_second": 438.887, "eval_steps_per_second": 6.865, "step": 227000 }, { "epoch": 64.4365597502129, "grad_norm": 0.00928043294698, "learning_rate": 3.55892705080897e-05, "loss": 0.0003508828580379486, "step": 227010 }, { "epoch": 64.43939824013624, "grad_norm": 0.2939853370189667, "learning_rate": 3.558643201816634e-05, "loss": 0.0005092643201351166, "step": 227020 }, { "epoch": 64.4422367300596, "grad_norm": 0.0009504219051450491, "learning_rate": 3.5583593528242973e-05, "loss": 0.0012920726090669631, "step": 227030 }, { "epoch": 64.44507521998297, "grad_norm": 8.281567573547363, "learning_rate": 3.5580755038319615e-05, "loss": 0.002268454059958458, "step": 227040 }, { "epoch": 64.44791370990633, "grad_norm": 0.05425143614411354, "learning_rate": 3.5577916548396256e-05, "loss": 0.00022689849138259887, "step": 227050 }, { "epoch": 64.4507521998297, "grad_norm": 0.006649784743785858, "learning_rate": 3.557507805847289e-05, "loss": 0.00012604277580976486, "step": 227060 }, { "epoch": 64.45359068975306, "grad_norm": 0.2617590129375458, "learning_rate": 3.557223956854953e-05, "loss": 0.0006793936714529991, "step": 227070 }, { "epoch": 64.4564291796764, "grad_norm": 0.06908684968948364, "learning_rate": 3.5569401078626174e-05, "loss": 0.0003853915259242058, "step": 227080 }, { "epoch": 64.45926766959977, "grad_norm": 1.9581512212753296, "learning_rate": 3.5566562588702815e-05, "loss": 0.0008331630378961563, "step": 227090 }, { "epoch": 64.46210615952313, "grad_norm": 0.010553916916251183, "learning_rate": 3.556372409877945e-05, "loss": 0.0010356307029724121, "step": 227100 }, { "epoch": 64.4649446494465, "grad_norm": 0.044034942984580994, "learning_rate": 3.556088560885609e-05, "loss": 0.006286969780921936, "step": 227110 }, { "epoch": 64.46778313936986, "grad_norm": 0.032543208450078964, "learning_rate": 3.555804711893273e-05, "loss": 0.0008339343592524528, "step": 227120 }, { "epoch": 64.47062162929322, "grad_norm": 0.04369264841079712, "learning_rate": 3.555520862900937e-05, "loss": 0.0026786642149090765, "step": 227130 }, { "epoch": 64.47346011921658, "grad_norm": 0.3602866530418396, "learning_rate": 3.555237013908601e-05, "loss": 0.0001520579680800438, "step": 227140 }, { "epoch": 64.47629860913993, "grad_norm": 0.09913728386163712, "learning_rate": 3.554953164916265e-05, "loss": 0.00021785683929920197, "step": 227150 }, { "epoch": 64.4791370990633, "grad_norm": 0.10164421796798706, "learning_rate": 3.5546693159239284e-05, "loss": 0.0004983192309737205, "step": 227160 }, { "epoch": 64.48197558898666, "grad_norm": 0.025667423382401466, "learning_rate": 3.5543854669315926e-05, "loss": 0.00015639830380678176, "step": 227170 }, { "epoch": 64.48481407891002, "grad_norm": 0.02019578032195568, "learning_rate": 3.554101617939257e-05, "loss": 0.0017991283908486365, "step": 227180 }, { "epoch": 64.48765256883338, "grad_norm": 0.8441632390022278, "learning_rate": 3.55381776894692e-05, "loss": 0.0009100951254367828, "step": 227190 }, { "epoch": 64.49049105875675, "grad_norm": 0.055672578513622284, "learning_rate": 3.553533919954584e-05, "loss": 0.0036432214081287386, "step": 227200 }, { "epoch": 64.49332954868011, "grad_norm": 2.5857670307159424, "learning_rate": 3.5532500709622484e-05, "loss": 0.0011474138125777244, "step": 227210 }, { "epoch": 64.49616803860346, "grad_norm": 0.16511724889278412, "learning_rate": 3.5529662219699126e-05, "loss": 0.00015515219420194625, "step": 227220 }, { "epoch": 64.49900652852682, "grad_norm": 0.009917899034917355, "learning_rate": 3.552682372977576e-05, "loss": 0.0010160431265830994, "step": 227230 }, { "epoch": 64.50184501845018, "grad_norm": 0.1665300726890564, "learning_rate": 3.5523985239852395e-05, "loss": 0.0003808032721281052, "step": 227240 }, { "epoch": 64.50468350837355, "grad_norm": 0.0637780949473381, "learning_rate": 3.552114674992904e-05, "loss": 0.0008501864969730378, "step": 227250 }, { "epoch": 64.50752199829691, "grad_norm": 1.5753177404403687, "learning_rate": 3.551830826000568e-05, "loss": 0.0003590971231460571, "step": 227260 }, { "epoch": 64.51036048822027, "grad_norm": 0.09003022313117981, "learning_rate": 3.551546977008232e-05, "loss": 0.0004638418555259705, "step": 227270 }, { "epoch": 64.51319897814363, "grad_norm": 0.2777996063232422, "learning_rate": 3.551263128015896e-05, "loss": 0.0006336294114589691, "step": 227280 }, { "epoch": 64.51603746806698, "grad_norm": 0.002043113810941577, "learning_rate": 3.5509792790235595e-05, "loss": 0.0034680794924497604, "step": 227290 }, { "epoch": 64.51887595799035, "grad_norm": 0.022639738395810127, "learning_rate": 3.5506954300312236e-05, "loss": 0.00023266747593879699, "step": 227300 }, { "epoch": 64.52171444791371, "grad_norm": 0.1048721894621849, "learning_rate": 3.550411581038888e-05, "loss": 0.0003335559740662575, "step": 227310 }, { "epoch": 64.52455293783707, "grad_norm": 0.004445597995072603, "learning_rate": 3.550127732046551e-05, "loss": 0.000324675440788269, "step": 227320 }, { "epoch": 64.52739142776043, "grad_norm": 0.2002563774585724, "learning_rate": 3.5498438830542154e-05, "loss": 0.00033178254961967467, "step": 227330 }, { "epoch": 64.5302299176838, "grad_norm": 0.016774578019976616, "learning_rate": 3.549560034061879e-05, "loss": 0.007364509254693985, "step": 227340 }, { "epoch": 64.53306840760715, "grad_norm": 0.21656620502471924, "learning_rate": 3.549276185069543e-05, "loss": 0.0240974098443985, "step": 227350 }, { "epoch": 64.53590689753051, "grad_norm": 7.6434197425842285, "learning_rate": 3.548992336077207e-05, "loss": 0.0055525898933410645, "step": 227360 }, { "epoch": 64.53874538745387, "grad_norm": 1.9770240783691406, "learning_rate": 3.5487084870848706e-05, "loss": 0.01891263872385025, "step": 227370 }, { "epoch": 64.54158387737724, "grad_norm": 0.0013376947026699781, "learning_rate": 3.5484246380925354e-05, "loss": 0.00012343749403953552, "step": 227380 }, { "epoch": 64.5444223673006, "grad_norm": 0.28958097100257874, "learning_rate": 3.548140789100199e-05, "loss": 0.00020664818584918975, "step": 227390 }, { "epoch": 64.54726085722396, "grad_norm": 0.0027951649390161037, "learning_rate": 3.547856940107862e-05, "loss": 8.662249892950058e-05, "step": 227400 }, { "epoch": 64.55009934714732, "grad_norm": 0.08176113665103912, "learning_rate": 3.547573091115527e-05, "loss": 0.00012594405561685562, "step": 227410 }, { "epoch": 64.55293783707067, "grad_norm": 0.0037133917212486267, "learning_rate": 3.5472892421231906e-05, "loss": 0.00012756790965795517, "step": 227420 }, { "epoch": 64.55577632699404, "grad_norm": 7.536355495452881, "learning_rate": 3.547005393130855e-05, "loss": 0.0025581372901797295, "step": 227430 }, { "epoch": 64.5586148169174, "grad_norm": 0.14660996198654175, "learning_rate": 3.546721544138518e-05, "loss": 0.000614849105477333, "step": 227440 }, { "epoch": 64.56145330684076, "grad_norm": 0.026788868010044098, "learning_rate": 3.546437695146182e-05, "loss": 0.00022968947887420655, "step": 227450 }, { "epoch": 64.56429179676412, "grad_norm": 0.03602477163076401, "learning_rate": 3.5461538461538464e-05, "loss": 0.0038960006088018416, "step": 227460 }, { "epoch": 64.56713028668749, "grad_norm": 0.7297311425209045, "learning_rate": 3.54586999716151e-05, "loss": 0.0002107229083776474, "step": 227470 }, { "epoch": 64.56996877661085, "grad_norm": 0.5052852630615234, "learning_rate": 3.545586148169174e-05, "loss": 0.00034597069025039674, "step": 227480 }, { "epoch": 64.5728072665342, "grad_norm": 0.2103443592786789, "learning_rate": 3.545302299176838e-05, "loss": 0.001987769640982151, "step": 227490 }, { "epoch": 64.57564575645756, "grad_norm": 0.07049080729484558, "learning_rate": 3.5450184501845016e-05, "loss": 0.00012617893517017365, "step": 227500 }, { "epoch": 64.57564575645756, "eval_accuracy": 0.9829592420677815, "eval_loss": 0.06994865089654922, "eval_runtime": 36.6059, "eval_samples_per_second": 429.63, "eval_steps_per_second": 6.72, "step": 227500 }, { "epoch": 64.57848424638092, "grad_norm": 0.008202899247407913, "learning_rate": 3.5447346011921665e-05, "loss": 0.00026763323694467545, "step": 227510 }, { "epoch": 64.58132273630429, "grad_norm": 0.19466188549995422, "learning_rate": 3.54445075219983e-05, "loss": 0.00023750029504299164, "step": 227520 }, { "epoch": 64.58416122622765, "grad_norm": 2.799889326095581, "learning_rate": 3.5441669032074934e-05, "loss": 0.0004140825942158699, "step": 227530 }, { "epoch": 64.58699971615101, "grad_norm": 1.01814866065979, "learning_rate": 3.543883054215158e-05, "loss": 0.0002939550206065178, "step": 227540 }, { "epoch": 64.58983820607436, "grad_norm": 0.056744497269392014, "learning_rate": 3.5435992052228217e-05, "loss": 0.0006023988127708435, "step": 227550 }, { "epoch": 64.59267669599772, "grad_norm": 0.5714821815490723, "learning_rate": 3.543315356230486e-05, "loss": 0.0001751065254211426, "step": 227560 }, { "epoch": 64.59551518592109, "grad_norm": 0.023376690223813057, "learning_rate": 3.543031507238149e-05, "loss": 0.0022639617323875426, "step": 227570 }, { "epoch": 64.59835367584445, "grad_norm": 0.0211799293756485, "learning_rate": 3.5427476582458134e-05, "loss": 0.0002794453874230385, "step": 227580 }, { "epoch": 64.60119216576781, "grad_norm": 0.016193268820643425, "learning_rate": 3.5424638092534775e-05, "loss": 0.009716961532831192, "step": 227590 }, { "epoch": 64.60403065569118, "grad_norm": 0.015952611342072487, "learning_rate": 3.542179960261141e-05, "loss": 0.0005601771175861358, "step": 227600 }, { "epoch": 64.60686914561454, "grad_norm": 0.017836978659033775, "learning_rate": 3.541896111268805e-05, "loss": 0.00016676820814609528, "step": 227610 }, { "epoch": 64.60970763553789, "grad_norm": 0.004366051405668259, "learning_rate": 3.541612262276469e-05, "loss": 0.003355065733194351, "step": 227620 }, { "epoch": 64.61254612546125, "grad_norm": 0.18472380936145782, "learning_rate": 3.541328413284133e-05, "loss": 0.001912970095872879, "step": 227630 }, { "epoch": 64.61538461538461, "grad_norm": 0.004922784399241209, "learning_rate": 3.5410445642917975e-05, "loss": 0.00013733543455600737, "step": 227640 }, { "epoch": 64.61822310530798, "grad_norm": 0.02943994663655758, "learning_rate": 3.540760715299461e-05, "loss": 0.0006705967709422112, "step": 227650 }, { "epoch": 64.62106159523134, "grad_norm": 0.0181667972356081, "learning_rate": 3.5404768663071245e-05, "loss": 0.00045491736382246016, "step": 227660 }, { "epoch": 64.6239000851547, "grad_norm": 0.05192208290100098, "learning_rate": 3.5401930173147886e-05, "loss": 0.00026190634816884993, "step": 227670 }, { "epoch": 64.62673857507806, "grad_norm": 0.009412662126123905, "learning_rate": 3.539909168322453e-05, "loss": 0.00011967699974775314, "step": 227680 }, { "epoch": 64.62957706500141, "grad_norm": 0.051978930830955505, "learning_rate": 3.539625319330117e-05, "loss": 8.18135216832161e-05, "step": 227690 }, { "epoch": 64.63241555492478, "grad_norm": 0.7898004055023193, "learning_rate": 3.53934147033778e-05, "loss": 0.0006255954504013062, "step": 227700 }, { "epoch": 64.63525404484814, "grad_norm": 0.050358597189188004, "learning_rate": 3.5390576213454445e-05, "loss": 0.0017698312178254128, "step": 227710 }, { "epoch": 64.6380925347715, "grad_norm": 0.003639731090515852, "learning_rate": 3.5387737723531086e-05, "loss": 0.00025804173201322557, "step": 227720 }, { "epoch": 64.64093102469486, "grad_norm": 0.0735655128955841, "learning_rate": 3.538489923360772e-05, "loss": 0.0002628808841109276, "step": 227730 }, { "epoch": 64.64376951461823, "grad_norm": 0.014633879996836185, "learning_rate": 3.538206074368436e-05, "loss": 0.0005049893632531166, "step": 227740 }, { "epoch": 64.64660800454159, "grad_norm": 0.007571252528578043, "learning_rate": 3.5379222253761e-05, "loss": 9.044632315635681e-05, "step": 227750 }, { "epoch": 64.64944649446494, "grad_norm": 0.034964077174663544, "learning_rate": 3.537638376383764e-05, "loss": 0.0002771053463220596, "step": 227760 }, { "epoch": 64.6522849843883, "grad_norm": 0.028603749349713326, "learning_rate": 3.537354527391428e-05, "loss": 0.00022814925760030746, "step": 227770 }, { "epoch": 64.65512347431167, "grad_norm": 0.055856093764305115, "learning_rate": 3.537070678399092e-05, "loss": 0.00031790323555469513, "step": 227780 }, { "epoch": 64.65796196423503, "grad_norm": 0.010174696333706379, "learning_rate": 3.5367868294067555e-05, "loss": 0.000726374052464962, "step": 227790 }, { "epoch": 64.66080045415839, "grad_norm": 0.003868181724101305, "learning_rate": 3.53650298041442e-05, "loss": 0.00011813826858997345, "step": 227800 }, { "epoch": 64.66363894408175, "grad_norm": 0.06709595024585724, "learning_rate": 3.536219131422084e-05, "loss": 7.578302174806594e-05, "step": 227810 }, { "epoch": 64.6664774340051, "grad_norm": 0.04999631270766258, "learning_rate": 3.535935282429747e-05, "loss": 0.000275634229183197, "step": 227820 }, { "epoch": 64.66931592392847, "grad_norm": 0.14868976175785065, "learning_rate": 3.5356514334374114e-05, "loss": 9.840521961450577e-05, "step": 227830 }, { "epoch": 64.67215441385183, "grad_norm": 0.0634658932685852, "learning_rate": 3.5353675844450755e-05, "loss": 0.00018536057323217391, "step": 227840 }, { "epoch": 64.67499290377519, "grad_norm": 0.00237785535864532, "learning_rate": 3.53508373545274e-05, "loss": 0.004169424623250961, "step": 227850 }, { "epoch": 64.67783139369855, "grad_norm": 0.03426022082567215, "learning_rate": 3.534799886460403e-05, "loss": 0.009470903128385545, "step": 227860 }, { "epoch": 64.68066988362192, "grad_norm": 0.20272880792617798, "learning_rate": 3.5345160374680666e-05, "loss": 0.00029000174254179, "step": 227870 }, { "epoch": 64.68350837354528, "grad_norm": 3.750725507736206, "learning_rate": 3.5342321884757314e-05, "loss": 0.0005640342831611633, "step": 227880 }, { "epoch": 64.68634686346863, "grad_norm": 0.07743090391159058, "learning_rate": 3.533948339483395e-05, "loss": 0.0014807762578129768, "step": 227890 }, { "epoch": 64.68918535339199, "grad_norm": 0.0020613407250493765, "learning_rate": 3.533664490491059e-05, "loss": 0.0005461160093545913, "step": 227900 }, { "epoch": 64.69202384331535, "grad_norm": 0.02858850732445717, "learning_rate": 3.533380641498723e-05, "loss": 0.00040268562734127047, "step": 227910 }, { "epoch": 64.69486233323872, "grad_norm": 0.004434154834598303, "learning_rate": 3.5330967925063866e-05, "loss": 0.0003940064460039139, "step": 227920 }, { "epoch": 64.69770082316208, "grad_norm": 0.001964749302715063, "learning_rate": 3.532812943514051e-05, "loss": 9.401030838489533e-05, "step": 227930 }, { "epoch": 64.70053931308544, "grad_norm": 0.009881130419671535, "learning_rate": 3.532529094521715e-05, "loss": 0.0004991453140974044, "step": 227940 }, { "epoch": 64.7033778030088, "grad_norm": 0.035744279623031616, "learning_rate": 3.5322452455293783e-05, "loss": 0.0003993809223175049, "step": 227950 }, { "epoch": 64.70621629293215, "grad_norm": 0.1507987082004547, "learning_rate": 3.5319613965370425e-05, "loss": 0.00012087021023035049, "step": 227960 }, { "epoch": 64.70905478285552, "grad_norm": 0.08009588718414307, "learning_rate": 3.531677547544706e-05, "loss": 0.00022233445197343825, "step": 227970 }, { "epoch": 64.71189327277888, "grad_norm": 0.012315396219491959, "learning_rate": 3.531393698552371e-05, "loss": 0.0009900499135255814, "step": 227980 }, { "epoch": 64.71473176270224, "grad_norm": 0.008216611109673977, "learning_rate": 3.531109849560034e-05, "loss": 0.0012350307777523994, "step": 227990 }, { "epoch": 64.7175702526256, "grad_norm": 1.0619935989379883, "learning_rate": 3.530826000567698e-05, "loss": 0.0002563539892435074, "step": 228000 }, { "epoch": 64.7175702526256, "eval_accuracy": 0.9829592420677815, "eval_loss": 0.07490795105695724, "eval_runtime": 37.992, "eval_samples_per_second": 413.955, "eval_steps_per_second": 6.475, "step": 228000 }, { "epoch": 64.72040874254897, "grad_norm": 0.0057776086032390594, "learning_rate": 3.5305421515753625e-05, "loss": 0.004396748542785644, "step": 228010 }, { "epoch": 64.72324723247232, "grad_norm": 0.1576072722673416, "learning_rate": 3.530258302583026e-05, "loss": 0.00021181125193834305, "step": 228020 }, { "epoch": 64.72608572239568, "grad_norm": 0.03237628936767578, "learning_rate": 3.52997445359069e-05, "loss": 0.00020931195467710495, "step": 228030 }, { "epoch": 64.72892421231904, "grad_norm": 0.024141794070601463, "learning_rate": 3.529690604598354e-05, "loss": 0.003144311159849167, "step": 228040 }, { "epoch": 64.7317627022424, "grad_norm": 0.004552090540528297, "learning_rate": 3.529406755606018e-05, "loss": 0.00026204567402601244, "step": 228050 }, { "epoch": 64.73460119216577, "grad_norm": 0.014214986935257912, "learning_rate": 3.529122906613682e-05, "loss": 0.00019946284592151642, "step": 228060 }, { "epoch": 64.73743968208913, "grad_norm": 0.18384577333927155, "learning_rate": 3.528839057621345e-05, "loss": 0.0009310042485594749, "step": 228070 }, { "epoch": 64.7402781720125, "grad_norm": 0.023134294897317886, "learning_rate": 3.5285552086290094e-05, "loss": 9.855907410383224e-05, "step": 228080 }, { "epoch": 64.74311666193584, "grad_norm": 0.021899018436670303, "learning_rate": 3.5282713596366736e-05, "loss": 0.007438589632511139, "step": 228090 }, { "epoch": 64.7459551518592, "grad_norm": 0.1797645092010498, "learning_rate": 3.527987510644337e-05, "loss": 0.008122052252292632, "step": 228100 }, { "epoch": 64.74879364178257, "grad_norm": 11.209013938903809, "learning_rate": 3.527703661652002e-05, "loss": 0.004631064087152481, "step": 228110 }, { "epoch": 64.75163213170593, "grad_norm": 0.055925656110048294, "learning_rate": 3.527419812659665e-05, "loss": 0.00014006104320287704, "step": 228120 }, { "epoch": 64.7544706216293, "grad_norm": 1.3270933628082275, "learning_rate": 3.527135963667329e-05, "loss": 0.0007096558809280395, "step": 228130 }, { "epoch": 64.75730911155266, "grad_norm": 0.03561325743794441, "learning_rate": 3.5268521146749936e-05, "loss": 0.0050932042300701145, "step": 228140 }, { "epoch": 64.76014760147602, "grad_norm": 0.00574467284604907, "learning_rate": 3.526568265682657e-05, "loss": 0.0016168203204870225, "step": 228150 }, { "epoch": 64.76298609139937, "grad_norm": 0.003756609046831727, "learning_rate": 3.526284416690321e-05, "loss": 0.00037116631865501403, "step": 228160 }, { "epoch": 64.76582458132273, "grad_norm": 0.377367228269577, "learning_rate": 3.5260005676979846e-05, "loss": 0.00028506740927696227, "step": 228170 }, { "epoch": 64.7686630712461, "grad_norm": 0.07851440459489822, "learning_rate": 3.525716718705649e-05, "loss": 0.0002123551443219185, "step": 228180 }, { "epoch": 64.77150156116946, "grad_norm": 0.14685721695423126, "learning_rate": 3.525432869713313e-05, "loss": 0.0009572369977831841, "step": 228190 }, { "epoch": 64.77434005109282, "grad_norm": 0.01040087454020977, "learning_rate": 3.5251490207209764e-05, "loss": 0.00011412352323532104, "step": 228200 }, { "epoch": 64.77717854101618, "grad_norm": 0.08050297200679779, "learning_rate": 3.5248651717286405e-05, "loss": 9.192135185003281e-05, "step": 228210 }, { "epoch": 64.78001703093955, "grad_norm": 0.005279331002384424, "learning_rate": 3.5245813227363046e-05, "loss": 3.80735844373703e-05, "step": 228220 }, { "epoch": 64.7828555208629, "grad_norm": 0.014340943656861782, "learning_rate": 3.524297473743968e-05, "loss": 9.552408009767532e-05, "step": 228230 }, { "epoch": 64.78569401078626, "grad_norm": 0.00580957904458046, "learning_rate": 3.524013624751632e-05, "loss": 0.00019503589719533921, "step": 228240 }, { "epoch": 64.78853250070962, "grad_norm": 0.11155891418457031, "learning_rate": 3.5237297757592964e-05, "loss": 0.00018645189702510835, "step": 228250 }, { "epoch": 64.79137099063298, "grad_norm": 0.005956881679594517, "learning_rate": 3.52344592676696e-05, "loss": 0.0003931790590286255, "step": 228260 }, { "epoch": 64.79420948055635, "grad_norm": 0.007008849177509546, "learning_rate": 3.523162077774624e-05, "loss": 0.002163444831967354, "step": 228270 }, { "epoch": 64.79704797047971, "grad_norm": 0.43862661719322205, "learning_rate": 3.522878228782288e-05, "loss": 0.00021794624626636505, "step": 228280 }, { "epoch": 64.79988646040306, "grad_norm": 0.346992164850235, "learning_rate": 3.5225943797899516e-05, "loss": 0.010278551280498505, "step": 228290 }, { "epoch": 64.80272495032642, "grad_norm": 0.013955553062260151, "learning_rate": 3.522310530797616e-05, "loss": 2.5919638574123383e-05, "step": 228300 }, { "epoch": 64.80556344024978, "grad_norm": 0.01202375628054142, "learning_rate": 3.52202668180528e-05, "loss": 8.662976324558258e-05, "step": 228310 }, { "epoch": 64.80840193017315, "grad_norm": 0.0009645005338825285, "learning_rate": 3.521742832812944e-05, "loss": 4.392042756080627e-05, "step": 228320 }, { "epoch": 64.81124042009651, "grad_norm": 0.006522584240883589, "learning_rate": 3.5214589838206074e-05, "loss": 0.0006132273003458977, "step": 228330 }, { "epoch": 64.81407891001987, "grad_norm": 0.4909084141254425, "learning_rate": 3.5211751348282716e-05, "loss": 0.0001451052725315094, "step": 228340 }, { "epoch": 64.81691739994324, "grad_norm": 0.038499318063259125, "learning_rate": 3.520891285835936e-05, "loss": 4.5428983867168427e-05, "step": 228350 }, { "epoch": 64.81975588986658, "grad_norm": 0.003318481845781207, "learning_rate": 3.520607436843599e-05, "loss": 5.623586475849152e-05, "step": 228360 }, { "epoch": 64.82259437978995, "grad_norm": 0.006379741709679365, "learning_rate": 3.520323587851263e-05, "loss": 8.360408246517181e-05, "step": 228370 }, { "epoch": 64.82543286971331, "grad_norm": 0.34469136595726013, "learning_rate": 3.5200397388589274e-05, "loss": 0.00036576595157384874, "step": 228380 }, { "epoch": 64.82827135963667, "grad_norm": 0.008473960682749748, "learning_rate": 3.519755889866591e-05, "loss": 0.0013836052268743515, "step": 228390 }, { "epoch": 64.83110984956004, "grad_norm": 0.10474471002817154, "learning_rate": 3.519472040874255e-05, "loss": 0.0026578601449728013, "step": 228400 }, { "epoch": 64.8339483394834, "grad_norm": 0.004848923534154892, "learning_rate": 3.519188191881919e-05, "loss": 7.431022822856903e-05, "step": 228410 }, { "epoch": 64.83678682940676, "grad_norm": 0.003001951612532139, "learning_rate": 3.5189043428895826e-05, "loss": 6.040707230567932e-05, "step": 228420 }, { "epoch": 64.83962531933011, "grad_norm": 0.12681080400943756, "learning_rate": 3.518620493897247e-05, "loss": 0.0002071872353553772, "step": 228430 }, { "epoch": 64.84246380925347, "grad_norm": 0.005063765216618776, "learning_rate": 3.518336644904911e-05, "loss": 3.971438854932785e-05, "step": 228440 }, { "epoch": 64.84530229917684, "grad_norm": 0.4144168496131897, "learning_rate": 3.518052795912575e-05, "loss": 0.0002212507650256157, "step": 228450 }, { "epoch": 64.8481407891002, "grad_norm": 0.46940526366233826, "learning_rate": 3.5177689469202385e-05, "loss": 0.00012216847389936447, "step": 228460 }, { "epoch": 64.85097927902356, "grad_norm": 0.033940836787223816, "learning_rate": 3.517485097927902e-05, "loss": 9.210482239723206e-05, "step": 228470 }, { "epoch": 64.85381776894693, "grad_norm": 0.019274191930890083, "learning_rate": 3.517201248935567e-05, "loss": 3.911294043064117e-05, "step": 228480 }, { "epoch": 64.85665625887029, "grad_norm": 0.140304297208786, "learning_rate": 3.51691739994323e-05, "loss": 0.0001417970284819603, "step": 228490 }, { "epoch": 64.85949474879364, "grad_norm": 0.13156937062740326, "learning_rate": 3.5166335509508944e-05, "loss": 0.0002686399966478348, "step": 228500 }, { "epoch": 64.85949474879364, "eval_accuracy": 0.9865199974566033, "eval_loss": 0.05812066048383713, "eval_runtime": 37.6495, "eval_samples_per_second": 417.721, "eval_steps_per_second": 6.534, "step": 228500 }, { "epoch": 64.862333238717, "grad_norm": 0.011446614749729633, "learning_rate": 3.5163497019585585e-05, "loss": 0.000831715576350689, "step": 228510 }, { "epoch": 64.86517172864036, "grad_norm": 0.00872405618429184, "learning_rate": 3.516065852966222e-05, "loss": 3.2776035368442535e-05, "step": 228520 }, { "epoch": 64.86801021856373, "grad_norm": 0.005077365320175886, "learning_rate": 3.515782003973886e-05, "loss": 0.00012477729469537736, "step": 228530 }, { "epoch": 64.87084870848709, "grad_norm": 0.005286029074341059, "learning_rate": 3.51549815498155e-05, "loss": 0.00015443582087755204, "step": 228540 }, { "epoch": 64.87368719841045, "grad_norm": 0.025016745552420616, "learning_rate": 3.515214305989214e-05, "loss": 0.00019814521074295043, "step": 228550 }, { "epoch": 64.8765256883338, "grad_norm": 0.006604786496609449, "learning_rate": 3.514930456996878e-05, "loss": 6.278455257415771e-05, "step": 228560 }, { "epoch": 64.87936417825716, "grad_norm": 0.012952739372849464, "learning_rate": 3.514646608004541e-05, "loss": 0.0030788138508796694, "step": 228570 }, { "epoch": 64.88220266818053, "grad_norm": 0.023563239723443985, "learning_rate": 3.514362759012206e-05, "loss": 0.0004870347678661346, "step": 228580 }, { "epoch": 64.88504115810389, "grad_norm": 0.012833046726882458, "learning_rate": 3.5140789100198696e-05, "loss": 0.00010491739958524704, "step": 228590 }, { "epoch": 64.88787964802725, "grad_norm": 0.20581094920635223, "learning_rate": 3.513795061027533e-05, "loss": 0.00010321103036403656, "step": 228600 }, { "epoch": 64.89071813795061, "grad_norm": 0.061915814876556396, "learning_rate": 3.513511212035198e-05, "loss": 8.82856547832489e-05, "step": 228610 }, { "epoch": 64.89355662787398, "grad_norm": 0.016004078090190887, "learning_rate": 3.513227363042861e-05, "loss": 9.119641035795211e-05, "step": 228620 }, { "epoch": 64.89639511779733, "grad_norm": 0.005586076527833939, "learning_rate": 3.5129435140505255e-05, "loss": 2.8466805815696716e-05, "step": 228630 }, { "epoch": 64.89923360772069, "grad_norm": 0.006337935570627451, "learning_rate": 3.5126596650581896e-05, "loss": 3.549624234437943e-05, "step": 228640 }, { "epoch": 64.90207209764405, "grad_norm": 0.00345336040481925, "learning_rate": 3.512375816065853e-05, "loss": 7.114838808774948e-05, "step": 228650 }, { "epoch": 64.90491058756741, "grad_norm": 0.06877045333385468, "learning_rate": 3.512091967073517e-05, "loss": 0.00011177510023117066, "step": 228660 }, { "epoch": 64.90774907749078, "grad_norm": 0.252074271440506, "learning_rate": 3.5118081180811807e-05, "loss": 7.258821278810501e-05, "step": 228670 }, { "epoch": 64.91058756741414, "grad_norm": 0.0780264139175415, "learning_rate": 3.511524269088845e-05, "loss": 4.439428448677063e-05, "step": 228680 }, { "epoch": 64.9134260573375, "grad_norm": 0.14722594618797302, "learning_rate": 3.511240420096509e-05, "loss": 7.89182260632515e-05, "step": 228690 }, { "epoch": 64.91626454726085, "grad_norm": 0.005522528663277626, "learning_rate": 3.5109565711041724e-05, "loss": 3.7496909499168396e-05, "step": 228700 }, { "epoch": 64.91910303718421, "grad_norm": 0.011691839434206486, "learning_rate": 3.5106727221118365e-05, "loss": 0.00013698413968086243, "step": 228710 }, { "epoch": 64.92194152710758, "grad_norm": 0.0021115851122885942, "learning_rate": 3.510388873119501e-05, "loss": 0.000127432681620121, "step": 228720 }, { "epoch": 64.92478001703094, "grad_norm": 0.030174557119607925, "learning_rate": 3.510105024127164e-05, "loss": 7.984209805727005e-05, "step": 228730 }, { "epoch": 64.9276185069543, "grad_norm": 0.002553570317104459, "learning_rate": 3.509821175134829e-05, "loss": 0.00019757114350795745, "step": 228740 }, { "epoch": 64.93045699687767, "grad_norm": 0.10786744952201843, "learning_rate": 3.5095373261424924e-05, "loss": 0.000131085142493248, "step": 228750 }, { "epoch": 64.93329548680101, "grad_norm": 0.010322542861104012, "learning_rate": 3.509253477150156e-05, "loss": 0.00037263091653585436, "step": 228760 }, { "epoch": 64.93613397672438, "grad_norm": 0.007748478092253208, "learning_rate": 3.50896962815782e-05, "loss": 0.00023789741098880767, "step": 228770 }, { "epoch": 64.93897246664774, "grad_norm": 0.0026713800616562366, "learning_rate": 3.508685779165484e-05, "loss": 9.14260745048523e-05, "step": 228780 }, { "epoch": 64.9418109565711, "grad_norm": 0.02071969024837017, "learning_rate": 3.508401930173148e-05, "loss": 0.0001119634136557579, "step": 228790 }, { "epoch": 64.94464944649447, "grad_norm": 0.009383153170347214, "learning_rate": 3.508118081180812e-05, "loss": 0.00042066462337970735, "step": 228800 }, { "epoch": 64.94748793641783, "grad_norm": 0.014988313429057598, "learning_rate": 3.507834232188476e-05, "loss": 0.0005969328805804253, "step": 228810 }, { "epoch": 64.95032642634119, "grad_norm": 0.004767813254147768, "learning_rate": 3.50755038319614e-05, "loss": 0.0013523202389478684, "step": 228820 }, { "epoch": 64.95316491626454, "grad_norm": 0.018931396305561066, "learning_rate": 3.5072665342038035e-05, "loss": 0.00016620047390460967, "step": 228830 }, { "epoch": 64.9560034061879, "grad_norm": 0.03568417951464653, "learning_rate": 3.5069826852114676e-05, "loss": 8.801445364952088e-05, "step": 228840 }, { "epoch": 64.95884189611127, "grad_norm": 0.004446669481694698, "learning_rate": 3.506698836219132e-05, "loss": 0.0001974513754248619, "step": 228850 }, { "epoch": 64.96168038603463, "grad_norm": 0.047316934913396835, "learning_rate": 3.506414987226795e-05, "loss": 0.00021016355603933334, "step": 228860 }, { "epoch": 64.96451887595799, "grad_norm": 0.30963456630706787, "learning_rate": 3.50613113823446e-05, "loss": 0.0005000861361622811, "step": 228870 }, { "epoch": 64.96735736588136, "grad_norm": 0.32935696840286255, "learning_rate": 3.5058472892421235e-05, "loss": 0.0004757432267069817, "step": 228880 }, { "epoch": 64.97019585580472, "grad_norm": 0.014440648257732391, "learning_rate": 3.505563440249787e-05, "loss": 0.00010825097560882569, "step": 228890 }, { "epoch": 64.97303434572807, "grad_norm": 0.0020530717447400093, "learning_rate": 3.505279591257451e-05, "loss": 0.0021806644275784493, "step": 228900 }, { "epoch": 64.97587283565143, "grad_norm": Infinity, "learning_rate": 3.504995742265115e-05, "loss": 0.010131648927927017, "step": 228910 }, { "epoch": 64.97871132557479, "grad_norm": 0.0504218265414238, "learning_rate": 3.504740278172013e-05, "loss": 0.0031764980405569075, "step": 228920 }, { "epoch": 64.98154981549816, "grad_norm": 0.04830300807952881, "learning_rate": 3.504456429179677e-05, "loss": 0.006857055425643921, "step": 228930 }, { "epoch": 64.98438830542152, "grad_norm": 0.03950970619916916, "learning_rate": 3.50417258018734e-05, "loss": 0.0010613474994897842, "step": 228940 }, { "epoch": 64.98722679534488, "grad_norm": 0.621818482875824, "learning_rate": 3.503888731195004e-05, "loss": 0.005214802548289299, "step": 228950 }, { "epoch": 64.99006528526824, "grad_norm": 0.0050929212011396885, "learning_rate": 3.5036048822026684e-05, "loss": 0.0001902453601360321, "step": 228960 }, { "epoch": 64.99290377519159, "grad_norm": 0.020117323845624924, "learning_rate": 3.5033210332103325e-05, "loss": 0.005609509721398354, "step": 228970 }, { "epoch": 64.99574226511496, "grad_norm": 0.17788217961788177, "learning_rate": 3.503037184217996e-05, "loss": 0.00027552973479032515, "step": 228980 }, { "epoch": 64.99858075503832, "grad_norm": 0.6194797158241272, "learning_rate": 3.50275333522566e-05, "loss": 0.00024327319115400315, "step": 228990 }, { "epoch": 65.00141924496168, "grad_norm": 0.007175359409302473, "learning_rate": 3.502469486233324e-05, "loss": 0.004799332469701767, "step": 229000 }, { "epoch": 65.00141924496168, "eval_accuracy": 0.9836586761620144, "eval_loss": 0.06973095238208771, "eval_runtime": 56.0884, "eval_samples_per_second": 280.396, "eval_steps_per_second": 4.386, "step": 229000 }, { "epoch": 65.00425773488504, "grad_norm": 2.846590042114258, "learning_rate": 3.502185637240988e-05, "loss": 0.0006374359130859375, "step": 229010 }, { "epoch": 65.00709622480841, "grad_norm": 0.0113025838509202, "learning_rate": 3.501901788248652e-05, "loss": 0.0008153369650244713, "step": 229020 }, { "epoch": 65.00993471473176, "grad_norm": 0.0012178018223494291, "learning_rate": 3.501617939256316e-05, "loss": 0.00017563290894031524, "step": 229030 }, { "epoch": 65.01277320465512, "grad_norm": 0.009300848469138145, "learning_rate": 3.5013340902639795e-05, "loss": 0.00019079018384218215, "step": 229040 }, { "epoch": 65.01561169457848, "grad_norm": 0.0007730338838882744, "learning_rate": 3.5010502412716436e-05, "loss": 0.001764312759041786, "step": 229050 }, { "epoch": 65.01845018450184, "grad_norm": 0.5323156714439392, "learning_rate": 3.500766392279308e-05, "loss": 0.00044369660317897794, "step": 229060 }, { "epoch": 65.02128867442521, "grad_norm": 0.047740016132593155, "learning_rate": 3.500482543286971e-05, "loss": 0.0003512507304549217, "step": 229070 }, { "epoch": 65.02412716434857, "grad_norm": 0.02048363722860813, "learning_rate": 3.5001986942946353e-05, "loss": 0.00030458178371191026, "step": 229080 }, { "epoch": 65.02696565427193, "grad_norm": 0.035247378051280975, "learning_rate": 3.4999148453022995e-05, "loss": 0.0001361340284347534, "step": 229090 }, { "epoch": 65.02980414419528, "grad_norm": 0.027213983237743378, "learning_rate": 3.499630996309963e-05, "loss": 0.00020498260855674743, "step": 229100 }, { "epoch": 65.03264263411864, "grad_norm": 2.019115924835205, "learning_rate": 3.499347147317627e-05, "loss": 0.00023473910987377168, "step": 229110 }, { "epoch": 65.03548112404201, "grad_norm": 0.03714855760335922, "learning_rate": 3.499063298325291e-05, "loss": 5.9192627668380736e-05, "step": 229120 }, { "epoch": 65.03831961396537, "grad_norm": 0.16937920451164246, "learning_rate": 3.4987794493329553e-05, "loss": 0.0005574675276875495, "step": 229130 }, { "epoch": 65.04115810388873, "grad_norm": 0.01186367031186819, "learning_rate": 3.498495600340619e-05, "loss": 0.0015527769923210145, "step": 229140 }, { "epoch": 65.0439965938121, "grad_norm": 0.002618744969367981, "learning_rate": 3.498211751348282e-05, "loss": 0.004516644030809402, "step": 229150 }, { "epoch": 65.04683508373546, "grad_norm": 3.776325225830078, "learning_rate": 3.497927902355947e-05, "loss": 0.0006106615066528321, "step": 229160 }, { "epoch": 65.04967357365881, "grad_norm": 0.016364796087145805, "learning_rate": 3.4976440533636105e-05, "loss": 0.0004093945026397705, "step": 229170 }, { "epoch": 65.05251206358217, "grad_norm": 0.00384092191234231, "learning_rate": 3.497360204371275e-05, "loss": 0.007620613276958466, "step": 229180 }, { "epoch": 65.05535055350553, "grad_norm": 0.020044682547450066, "learning_rate": 3.497076355378939e-05, "loss": 0.0013964051380753518, "step": 229190 }, { "epoch": 65.0581890434289, "grad_norm": 0.006665071472525597, "learning_rate": 3.496792506386602e-05, "loss": 0.0008273044601082802, "step": 229200 }, { "epoch": 65.06102753335226, "grad_norm": 0.13331840932369232, "learning_rate": 3.4965086573942664e-05, "loss": 0.00017506945878267288, "step": 229210 }, { "epoch": 65.06386602327562, "grad_norm": 0.03115629032254219, "learning_rate": 3.4962248084019306e-05, "loss": 0.0002587299793958664, "step": 229220 }, { "epoch": 65.06670451319899, "grad_norm": 16.751102447509766, "learning_rate": 3.495940959409594e-05, "loss": 0.014008475840091706, "step": 229230 }, { "epoch": 65.06954300312233, "grad_norm": 13.387914657592773, "learning_rate": 3.495657110417258e-05, "loss": 0.007624220848083496, "step": 229240 }, { "epoch": 65.0723814930457, "grad_norm": 0.0010294555686414242, "learning_rate": 3.495373261424922e-05, "loss": 0.005760814994573593, "step": 229250 }, { "epoch": 65.07521998296906, "grad_norm": 0.006244239397346973, "learning_rate": 3.4950894124325864e-05, "loss": 0.00010527856647968292, "step": 229260 }, { "epoch": 65.07805847289242, "grad_norm": 0.0016206455184146762, "learning_rate": 3.49480556344025e-05, "loss": 0.00014725979417562485, "step": 229270 }, { "epoch": 65.08089696281579, "grad_norm": 0.024879660457372665, "learning_rate": 3.4945217144479133e-05, "loss": 0.011736519634723663, "step": 229280 }, { "epoch": 65.08373545273915, "grad_norm": 0.07037505507469177, "learning_rate": 3.494237865455578e-05, "loss": 0.0007307210937142373, "step": 229290 }, { "epoch": 65.0865739426625, "grad_norm": 0.008946847170591354, "learning_rate": 3.4939540164632416e-05, "loss": 0.004960752278566361, "step": 229300 }, { "epoch": 65.08941243258586, "grad_norm": 0.008806504309177399, "learning_rate": 3.493670167470906e-05, "loss": 0.0013362618163228035, "step": 229310 }, { "epoch": 65.09225092250922, "grad_norm": 0.1442590057849884, "learning_rate": 3.49338631847857e-05, "loss": 0.007533515989780426, "step": 229320 }, { "epoch": 65.09508941243259, "grad_norm": 0.01922536827623844, "learning_rate": 3.4931024694862334e-05, "loss": 0.0016206827014684678, "step": 229330 }, { "epoch": 65.09792790235595, "grad_norm": 0.0014214920811355114, "learning_rate": 3.4928186204938975e-05, "loss": 0.0009204221889376641, "step": 229340 }, { "epoch": 65.10076639227931, "grad_norm": 8.78955364227295, "learning_rate": 3.4925347715015616e-05, "loss": 0.0023844124749302863, "step": 229350 }, { "epoch": 65.10360488220267, "grad_norm": 2.691113233566284, "learning_rate": 3.492250922509225e-05, "loss": 0.0004771171137690544, "step": 229360 }, { "epoch": 65.10644337212602, "grad_norm": 8.446453094482422, "learning_rate": 3.491967073516889e-05, "loss": 0.0013535825535655021, "step": 229370 }, { "epoch": 65.10928186204939, "grad_norm": 0.007594014517962933, "learning_rate": 3.491683224524553e-05, "loss": 0.0006516959518194198, "step": 229380 }, { "epoch": 65.11212035197275, "grad_norm": 0.01710556633770466, "learning_rate": 3.4913993755322175e-05, "loss": 5.2848458290100096e-05, "step": 229390 }, { "epoch": 65.11495884189611, "grad_norm": 0.01688798889517784, "learning_rate": 3.491115526539881e-05, "loss": 0.0009588507935404777, "step": 229400 }, { "epoch": 65.11779733181947, "grad_norm": 0.49030640721321106, "learning_rate": 3.4908316775475444e-05, "loss": 0.00042702928185462953, "step": 229410 }, { "epoch": 65.12063582174284, "grad_norm": 0.03833308815956116, "learning_rate": 3.490547828555209e-05, "loss": 0.00023163948208093643, "step": 229420 }, { "epoch": 65.1234743116662, "grad_norm": 0.48686647415161133, "learning_rate": 3.490263979562873e-05, "loss": 0.002527233026921749, "step": 229430 }, { "epoch": 65.12631280158955, "grad_norm": 0.010224337689578533, "learning_rate": 3.489980130570537e-05, "loss": 0.002734410390257835, "step": 229440 }, { "epoch": 65.12915129151291, "grad_norm": 0.006289627868682146, "learning_rate": 3.489696281578201e-05, "loss": 0.0005069682374596596, "step": 229450 }, { "epoch": 65.13198978143627, "grad_norm": 0.011437898501753807, "learning_rate": 3.4894124325858644e-05, "loss": 0.00010995268821716309, "step": 229460 }, { "epoch": 65.13482827135964, "grad_norm": 0.050527483224868774, "learning_rate": 3.4891285835935286e-05, "loss": 0.0003721185028553009, "step": 229470 }, { "epoch": 65.137666761283, "grad_norm": 0.019144810736179352, "learning_rate": 3.488844734601192e-05, "loss": 4.365779459476471e-05, "step": 229480 }, { "epoch": 65.14050525120636, "grad_norm": 0.10795702785253525, "learning_rate": 3.488560885608856e-05, "loss": 0.0022603563964366915, "step": 229490 }, { "epoch": 65.14334374112971, "grad_norm": 0.09241898357868195, "learning_rate": 3.48827703661652e-05, "loss": 9.406190365552902e-05, "step": 229500 }, { "epoch": 65.14334374112971, "eval_accuracy": 0.9842309404209322, "eval_loss": 0.06752821058034897, "eval_runtime": 40.2419, "eval_samples_per_second": 390.811, "eval_steps_per_second": 6.113, "step": 229500 }, { "epoch": 65.14618223105307, "grad_norm": 0.014227337203919888, "learning_rate": 3.487993187624184e-05, "loss": 0.00012100096791982651, "step": 229510 }, { "epoch": 65.14902072097644, "grad_norm": 0.08349138498306274, "learning_rate": 3.487709338631848e-05, "loss": 4.995111376047134e-05, "step": 229520 }, { "epoch": 65.1518592108998, "grad_norm": 0.015999695286154747, "learning_rate": 3.487425489639512e-05, "loss": 0.0004717500880360603, "step": 229530 }, { "epoch": 65.15469770082316, "grad_norm": 9.480664253234863, "learning_rate": 3.4871416406471755e-05, "loss": 0.010124418884515762, "step": 229540 }, { "epoch": 65.15753619074653, "grad_norm": 0.01855255663394928, "learning_rate": 3.48685779165484e-05, "loss": 0.00013072583824396132, "step": 229550 }, { "epoch": 65.16037468066989, "grad_norm": 0.02214938774704933, "learning_rate": 3.486573942662504e-05, "loss": 0.006330503523349762, "step": 229560 }, { "epoch": 65.16321317059324, "grad_norm": 0.04322821646928787, "learning_rate": 3.486290093670167e-05, "loss": 0.0004805693402886391, "step": 229570 }, { "epoch": 65.1660516605166, "grad_norm": 0.03318871930241585, "learning_rate": 3.4860062446778314e-05, "loss": 0.00028681252151727674, "step": 229580 }, { "epoch": 65.16889015043996, "grad_norm": 0.03763004392385483, "learning_rate": 3.4857223956854955e-05, "loss": 0.004714704304933548, "step": 229590 }, { "epoch": 65.17172864036333, "grad_norm": 0.02609502337872982, "learning_rate": 3.4854385466931596e-05, "loss": 0.00012238305062055587, "step": 229600 }, { "epoch": 65.17456713028669, "grad_norm": 0.0009394409134984016, "learning_rate": 3.485154697700823e-05, "loss": 0.00014515221118927002, "step": 229610 }, { "epoch": 65.17740562021005, "grad_norm": 0.0035280792508274317, "learning_rate": 3.484870848708487e-05, "loss": 0.0002991989254951477, "step": 229620 }, { "epoch": 65.18024411013342, "grad_norm": 0.45574891567230225, "learning_rate": 3.4845869997161514e-05, "loss": 0.00014299731701612474, "step": 229630 }, { "epoch": 65.18308260005676, "grad_norm": 0.014030947349965572, "learning_rate": 3.484303150723815e-05, "loss": 0.0001696491613984108, "step": 229640 }, { "epoch": 65.18592108998013, "grad_norm": 0.0710396096110344, "learning_rate": 3.484019301731479e-05, "loss": 8.093453943729401e-05, "step": 229650 }, { "epoch": 65.18875957990349, "grad_norm": 0.010647386312484741, "learning_rate": 3.483735452739143e-05, "loss": 5.911476910114288e-05, "step": 229660 }, { "epoch": 65.19159806982685, "grad_norm": 0.019612746313214302, "learning_rate": 3.4834516037468066e-05, "loss": 0.0006721261888742447, "step": 229670 }, { "epoch": 65.19443655975022, "grad_norm": 0.39474135637283325, "learning_rate": 3.483167754754471e-05, "loss": 0.00011547505855560302, "step": 229680 }, { "epoch": 65.19727504967358, "grad_norm": 0.0025478010065853596, "learning_rate": 3.482883905762135e-05, "loss": 5.801171064376831e-05, "step": 229690 }, { "epoch": 65.20011353959694, "grad_norm": 0.01673145964741707, "learning_rate": 3.482600056769798e-05, "loss": 0.0004964910447597503, "step": 229700 }, { "epoch": 65.20295202952029, "grad_norm": 0.016354594379663467, "learning_rate": 3.4823162077774624e-05, "loss": 0.0009576182812452317, "step": 229710 }, { "epoch": 65.20579051944365, "grad_norm": 0.0046132756397128105, "learning_rate": 3.4820323587851266e-05, "loss": 5.3958408534526826e-05, "step": 229720 }, { "epoch": 65.20862900936702, "grad_norm": 0.012124323286116123, "learning_rate": 3.481748509792791e-05, "loss": 5.093012005090714e-05, "step": 229730 }, { "epoch": 65.21146749929038, "grad_norm": 1.6018187999725342, "learning_rate": 3.481464660800454e-05, "loss": 0.000342569500207901, "step": 229740 }, { "epoch": 65.21430598921374, "grad_norm": 0.010346482507884502, "learning_rate": 3.481180811808118e-05, "loss": 0.00039118919521570207, "step": 229750 }, { "epoch": 65.2171444791371, "grad_norm": 1.8254241943359375, "learning_rate": 3.4808969628157825e-05, "loss": 0.00028560031205415726, "step": 229760 }, { "epoch": 65.21998296906045, "grad_norm": 0.007563559338450432, "learning_rate": 3.480613113823446e-05, "loss": 4.1655637323856355e-05, "step": 229770 }, { "epoch": 65.22282145898382, "grad_norm": 0.007981671951711178, "learning_rate": 3.48032926483111e-05, "loss": 6.696730852127076e-05, "step": 229780 }, { "epoch": 65.22565994890718, "grad_norm": 0.001979992724955082, "learning_rate": 3.480045415838774e-05, "loss": 0.00024572182446718215, "step": 229790 }, { "epoch": 65.22849843883054, "grad_norm": 0.015863727778196335, "learning_rate": 3.4797615668464377e-05, "loss": 2.985391765832901e-05, "step": 229800 }, { "epoch": 65.2313369287539, "grad_norm": 0.0035775108262896538, "learning_rate": 3.479477717854102e-05, "loss": 9.238608181476593e-05, "step": 229810 }, { "epoch": 65.23417541867727, "grad_norm": 0.003944906406104565, "learning_rate": 3.479193868861766e-05, "loss": 7.207244634628296e-05, "step": 229820 }, { "epoch": 65.23701390860063, "grad_norm": 0.005128596443682909, "learning_rate": 3.4789100198694294e-05, "loss": 2.6979297399520873e-05, "step": 229830 }, { "epoch": 65.23985239852398, "grad_norm": 0.0509212389588356, "learning_rate": 3.4786261708770935e-05, "loss": 5.798861384391785e-05, "step": 229840 }, { "epoch": 65.24269088844734, "grad_norm": 0.0032366232480853796, "learning_rate": 3.478342321884758e-05, "loss": 2.6786327362060546e-05, "step": 229850 }, { "epoch": 65.2455293783707, "grad_norm": 0.0026773507706820965, "learning_rate": 3.478058472892422e-05, "loss": 0.0003405800089240074, "step": 229860 }, { "epoch": 65.24836786829407, "grad_norm": 0.008549494668841362, "learning_rate": 3.477774623900085e-05, "loss": 0.0001960044726729393, "step": 229870 }, { "epoch": 65.25120635821743, "grad_norm": 0.012404138222336769, "learning_rate": 3.477490774907749e-05, "loss": 0.0004027349874377251, "step": 229880 }, { "epoch": 65.2540448481408, "grad_norm": 0.03592286631464958, "learning_rate": 3.4772069259154135e-05, "loss": 0.00025800950825214384, "step": 229890 }, { "epoch": 65.25688333806416, "grad_norm": 0.022550435736775398, "learning_rate": 3.476923076923077e-05, "loss": 0.0009288687258958816, "step": 229900 }, { "epoch": 65.2597218279875, "grad_norm": 0.021712807938456535, "learning_rate": 3.476639227930741e-05, "loss": 0.00037909504026174546, "step": 229910 }, { "epoch": 65.26256031791087, "grad_norm": 0.006486019119620323, "learning_rate": 3.476355378938405e-05, "loss": 0.0003831017762422562, "step": 229920 }, { "epoch": 65.26539880783423, "grad_norm": 0.014891204424202442, "learning_rate": 3.476071529946069e-05, "loss": 6.495323032140732e-05, "step": 229930 }, { "epoch": 65.2682372977576, "grad_norm": 0.0021081394515931606, "learning_rate": 3.475787680953733e-05, "loss": 8.708480745553971e-05, "step": 229940 }, { "epoch": 65.27107578768096, "grad_norm": 0.005093730520457029, "learning_rate": 3.475503831961397e-05, "loss": 0.0002530783414840698, "step": 229950 }, { "epoch": 65.27391427760432, "grad_norm": 0.047110240906476974, "learning_rate": 3.4752199829690605e-05, "loss": 0.00010642986744642257, "step": 229960 }, { "epoch": 65.27675276752768, "grad_norm": 9.994388580322266, "learning_rate": 3.4749361339767246e-05, "loss": 0.0017346305772662162, "step": 229970 }, { "epoch": 65.27959125745103, "grad_norm": 0.06089295819401741, "learning_rate": 3.474652284984388e-05, "loss": 0.0004894683137536049, "step": 229980 }, { "epoch": 65.2824297473744, "grad_norm": 0.010776916518807411, "learning_rate": 3.474368435992052e-05, "loss": 6.340853869915008e-05, "step": 229990 }, { "epoch": 65.28526823729776, "grad_norm": 0.03188782185316086, "learning_rate": 3.474084586999716e-05, "loss": 0.008003702759742737, "step": 230000 }, { "epoch": 65.28526823729776, "eval_accuracy": 0.9870286767978635, "eval_loss": 0.06156713515520096, "eval_runtime": 37.8511, "eval_samples_per_second": 415.497, "eval_steps_per_second": 6.499, "step": 230000 }, { "epoch": 65.28810672722112, "grad_norm": 0.044653162360191345, "learning_rate": 3.47380073800738e-05, "loss": 0.0002499869093298912, "step": 230010 }, { "epoch": 65.29094521714448, "grad_norm": 0.39062851667404175, "learning_rate": 3.4735168890150446e-05, "loss": 0.0004996728152036667, "step": 230020 }, { "epoch": 65.29378370706785, "grad_norm": 0.004869213327765465, "learning_rate": 3.473233040022708e-05, "loss": 9.809359908103943e-05, "step": 230030 }, { "epoch": 65.2966221969912, "grad_norm": 0.002177059417590499, "learning_rate": 3.4729491910303715e-05, "loss": 0.0003807734698057175, "step": 230040 }, { "epoch": 65.29946068691456, "grad_norm": 0.04523758217692375, "learning_rate": 3.4726653420380363e-05, "loss": 4.865117371082306e-05, "step": 230050 }, { "epoch": 65.30229917683792, "grad_norm": 0.013192479498684406, "learning_rate": 3.4723814930457e-05, "loss": 0.00015047676861286164, "step": 230060 }, { "epoch": 65.30513766676128, "grad_norm": 0.006088589783757925, "learning_rate": 3.472097644053364e-05, "loss": 0.00014968402683734894, "step": 230070 }, { "epoch": 65.30797615668465, "grad_norm": 0.01026007067412138, "learning_rate": 3.4718137950610274e-05, "loss": 0.00037469416856765746, "step": 230080 }, { "epoch": 65.31081464660801, "grad_norm": 0.3189006745815277, "learning_rate": 3.4715299460686915e-05, "loss": 0.00012335367500782013, "step": 230090 }, { "epoch": 65.31365313653137, "grad_norm": 0.11777099221944809, "learning_rate": 3.471246097076356e-05, "loss": 6.976071745157242e-05, "step": 230100 }, { "epoch": 65.31649162645472, "grad_norm": 0.014472998678684235, "learning_rate": 3.470962248084019e-05, "loss": 0.0011014699935913085, "step": 230110 }, { "epoch": 65.31933011637808, "grad_norm": 0.27435362339019775, "learning_rate": 3.470678399091683e-05, "loss": 0.0007008934393525123, "step": 230120 }, { "epoch": 65.32216860630145, "grad_norm": 0.002500878181308508, "learning_rate": 3.4703945500993474e-05, "loss": 0.00011038705706596374, "step": 230130 }, { "epoch": 65.32500709622481, "grad_norm": 0.00713641382753849, "learning_rate": 3.470110701107011e-05, "loss": 0.00022553689777851104, "step": 230140 }, { "epoch": 65.32784558614817, "grad_norm": 0.011159197427332401, "learning_rate": 3.469826852114676e-05, "loss": 5.200672894716263e-05, "step": 230150 }, { "epoch": 65.33068407607153, "grad_norm": 0.5290109515190125, "learning_rate": 3.469543003122339e-05, "loss": 0.00014095846563577652, "step": 230160 }, { "epoch": 65.3335225659949, "grad_norm": 0.015051244758069515, "learning_rate": 3.4692591541300026e-05, "loss": 0.0023963794112205504, "step": 230170 }, { "epoch": 65.33636105591825, "grad_norm": 2.6504554748535156, "learning_rate": 3.468975305137667e-05, "loss": 0.0010657243430614472, "step": 230180 }, { "epoch": 65.33919954584161, "grad_norm": 0.00581978028640151, "learning_rate": 3.468691456145331e-05, "loss": 0.00012323874980211258, "step": 230190 }, { "epoch": 65.34203803576497, "grad_norm": 0.23373380303382874, "learning_rate": 3.468407607152995e-05, "loss": 0.0020123636350035667, "step": 230200 }, { "epoch": 65.34487652568833, "grad_norm": 0.020936403423547745, "learning_rate": 3.4681237581606585e-05, "loss": 0.00031824447214603426, "step": 230210 }, { "epoch": 65.3477150156117, "grad_norm": 0.046851105988025665, "learning_rate": 3.4678399091683226e-05, "loss": 0.0019684717059135436, "step": 230220 }, { "epoch": 65.35055350553506, "grad_norm": 0.3889978528022766, "learning_rate": 3.467556060175987e-05, "loss": 0.00034755785018205645, "step": 230230 }, { "epoch": 65.35339199545841, "grad_norm": 0.31440818309783936, "learning_rate": 3.46727221118365e-05, "loss": 0.0008139543235301972, "step": 230240 }, { "epoch": 65.35623048538177, "grad_norm": 0.09692401438951492, "learning_rate": 3.4669883621913144e-05, "loss": 0.00014828871935606002, "step": 230250 }, { "epoch": 65.35906897530514, "grad_norm": 0.48206689953804016, "learning_rate": 3.4667045131989785e-05, "loss": 0.00042759496718645094, "step": 230260 }, { "epoch": 65.3619074652285, "grad_norm": 0.000984098413027823, "learning_rate": 3.466420664206642e-05, "loss": 0.000717153400182724, "step": 230270 }, { "epoch": 65.36474595515186, "grad_norm": 0.9734203815460205, "learning_rate": 3.466136815214306e-05, "loss": 0.00021919850260019303, "step": 230280 }, { "epoch": 65.36758444507522, "grad_norm": 0.5758187770843506, "learning_rate": 3.46585296622197e-05, "loss": 0.00023513156920671464, "step": 230290 }, { "epoch": 65.37042293499859, "grad_norm": 0.4690074324607849, "learning_rate": 3.465569117229634e-05, "loss": 0.0001295514404773712, "step": 230300 }, { "epoch": 65.37326142492194, "grad_norm": 0.0031786735635250807, "learning_rate": 3.465285268237298e-05, "loss": 0.00013785287737846374, "step": 230310 }, { "epoch": 65.3760999148453, "grad_norm": 0.010793385095894337, "learning_rate": 3.465001419244962e-05, "loss": 0.0001598261296749115, "step": 230320 }, { "epoch": 65.37893840476866, "grad_norm": 0.06821922957897186, "learning_rate": 3.464717570252626e-05, "loss": 0.006816406548023224, "step": 230330 }, { "epoch": 65.38177689469202, "grad_norm": 0.047747015953063965, "learning_rate": 3.4644337212602896e-05, "loss": 9.623859077692031e-05, "step": 230340 }, { "epoch": 65.38461538461539, "grad_norm": 1.0418263673782349, "learning_rate": 3.464149872267954e-05, "loss": 0.00022621173411607742, "step": 230350 }, { "epoch": 65.38745387453875, "grad_norm": 0.773369550704956, "learning_rate": 3.463866023275618e-05, "loss": 0.0006789933890104294, "step": 230360 }, { "epoch": 65.39029236446211, "grad_norm": 0.007325479760766029, "learning_rate": 3.463582174283281e-05, "loss": 0.00010769888758659363, "step": 230370 }, { "epoch": 65.39313085438546, "grad_norm": 0.07935050129890442, "learning_rate": 3.4632983252909454e-05, "loss": 0.004928858578205108, "step": 230380 }, { "epoch": 65.39596934430882, "grad_norm": 3.9760019779205322, "learning_rate": 3.4630144762986096e-05, "loss": 0.001719455048441887, "step": 230390 }, { "epoch": 65.39880783423219, "grad_norm": 0.018965020775794983, "learning_rate": 3.462730627306273e-05, "loss": 0.0013917502015829085, "step": 230400 }, { "epoch": 65.40164632415555, "grad_norm": 0.024776309728622437, "learning_rate": 3.462446778313937e-05, "loss": 0.0002328086644411087, "step": 230410 }, { "epoch": 65.40448481407891, "grad_norm": 0.025489652529358864, "learning_rate": 3.462162929321601e-05, "loss": 9.237546473741531e-05, "step": 230420 }, { "epoch": 65.40732330400228, "grad_norm": 0.019740872085094452, "learning_rate": 3.461879080329265e-05, "loss": 0.0003940422087907791, "step": 230430 }, { "epoch": 65.41016179392564, "grad_norm": 0.04974616318941116, "learning_rate": 3.461595231336929e-05, "loss": 0.00058012455701828, "step": 230440 }, { "epoch": 65.41300028384899, "grad_norm": 0.0703747346997261, "learning_rate": 3.461311382344593e-05, "loss": 0.00035785306245088576, "step": 230450 }, { "epoch": 65.41583877377235, "grad_norm": 0.0053597805090248585, "learning_rate": 3.4610275333522565e-05, "loss": 0.00016375835984945298, "step": 230460 }, { "epoch": 65.41867726369571, "grad_norm": 0.30211302638053894, "learning_rate": 3.4607436843599206e-05, "loss": 0.00014077909290790557, "step": 230470 }, { "epoch": 65.42151575361908, "grad_norm": 0.00561558036133647, "learning_rate": 3.460459835367584e-05, "loss": 0.0001291075721383095, "step": 230480 }, { "epoch": 65.42435424354244, "grad_norm": 0.06526461243629456, "learning_rate": 3.460175986375249e-05, "loss": 9.697992354631423e-05, "step": 230490 }, { "epoch": 65.4271927334658, "grad_norm": 0.0786593034863472, "learning_rate": 3.4598921373829124e-05, "loss": 0.00011079106479883194, "step": 230500 }, { "epoch": 65.4271927334658, "eval_accuracy": 0.984930374515165, "eval_loss": 0.06282936781644821, "eval_runtime": 43.9079, "eval_samples_per_second": 358.182, "eval_steps_per_second": 5.603, "step": 230500 }, { "epoch": 65.43003122338915, "grad_norm": 0.013547394424676895, "learning_rate": 3.459608288390576e-05, "loss": 9.400416165590286e-05, "step": 230510 }, { "epoch": 65.43286971331251, "grad_norm": 0.008027222007513046, "learning_rate": 3.4593244393982406e-05, "loss": 0.0005367225036025048, "step": 230520 }, { "epoch": 65.43570820323588, "grad_norm": 0.006928610149770975, "learning_rate": 3.459040590405904e-05, "loss": 0.0002901703119277954, "step": 230530 }, { "epoch": 65.43854669315924, "grad_norm": 0.22228747606277466, "learning_rate": 3.458756741413568e-05, "loss": 0.00263843759894371, "step": 230540 }, { "epoch": 65.4413851830826, "grad_norm": 0.09933988004922867, "learning_rate": 3.4584728924212324e-05, "loss": 0.001780715212225914, "step": 230550 }, { "epoch": 65.44422367300596, "grad_norm": 0.4113050103187561, "learning_rate": 3.458189043428896e-05, "loss": 0.0004573449492454529, "step": 230560 }, { "epoch": 65.44706216292933, "grad_norm": 0.002332474337890744, "learning_rate": 3.45790519443656e-05, "loss": 0.00014162026345729828, "step": 230570 }, { "epoch": 65.44990065285268, "grad_norm": 0.02116672694683075, "learning_rate": 3.457621345444224e-05, "loss": 0.00010560974478721619, "step": 230580 }, { "epoch": 65.45273914277604, "grad_norm": 0.07129030674695969, "learning_rate": 3.4573374964518876e-05, "loss": 8.363369852304459e-05, "step": 230590 }, { "epoch": 65.4555776326994, "grad_norm": 0.08548523485660553, "learning_rate": 3.457053647459552e-05, "loss": 0.00011617019772529602, "step": 230600 }, { "epoch": 65.45841612262276, "grad_norm": 0.0064292834140360355, "learning_rate": 3.456769798467215e-05, "loss": 0.0002992687746882439, "step": 230610 }, { "epoch": 65.46125461254613, "grad_norm": 0.82362300157547, "learning_rate": 3.45648594947488e-05, "loss": 0.00012556463479995728, "step": 230620 }, { "epoch": 65.46409310246949, "grad_norm": 0.015948649495840073, "learning_rate": 3.4562021004825434e-05, "loss": 0.000180898979306221, "step": 230630 }, { "epoch": 65.46693159239285, "grad_norm": 0.010941893793642521, "learning_rate": 3.455918251490207e-05, "loss": 0.0004713352769613266, "step": 230640 }, { "epoch": 65.4697700823162, "grad_norm": 0.0060231368988752365, "learning_rate": 3.455634402497872e-05, "loss": 5.295034497976303e-05, "step": 230650 }, { "epoch": 65.47260857223957, "grad_norm": 0.005316644906997681, "learning_rate": 3.455350553505535e-05, "loss": 0.00025203730911016464, "step": 230660 }, { "epoch": 65.47544706216293, "grad_norm": 0.0013148549478501081, "learning_rate": 3.455066704513199e-05, "loss": 0.00011457279324531556, "step": 230670 }, { "epoch": 65.47828555208629, "grad_norm": 0.004525907337665558, "learning_rate": 3.4547828555208635e-05, "loss": 0.0003191996365785599, "step": 230680 }, { "epoch": 65.48112404200965, "grad_norm": 0.12731514871120453, "learning_rate": 3.454499006528527e-05, "loss": 0.00012164916843175888, "step": 230690 }, { "epoch": 65.48396253193302, "grad_norm": 0.006515929475426674, "learning_rate": 3.454215157536191e-05, "loss": 0.0008199883624911309, "step": 230700 }, { "epoch": 65.48680102185637, "grad_norm": 0.009221786633133888, "learning_rate": 3.4539313085438545e-05, "loss": 2.5052763521671296e-05, "step": 230710 }, { "epoch": 65.48963951177973, "grad_norm": 0.01420717965811491, "learning_rate": 3.4536474595515186e-05, "loss": 0.00015066713094711304, "step": 230720 }, { "epoch": 65.49247800170309, "grad_norm": 0.003901892574504018, "learning_rate": 3.453363610559183e-05, "loss": 0.0006736796349287033, "step": 230730 }, { "epoch": 65.49531649162645, "grad_norm": 0.02199464477598667, "learning_rate": 3.453079761566846e-05, "loss": 0.00012125540524721146, "step": 230740 }, { "epoch": 65.49815498154982, "grad_norm": 0.004983772058039904, "learning_rate": 3.452795912574511e-05, "loss": 0.001009381003677845, "step": 230750 }, { "epoch": 65.50099347147318, "grad_norm": 0.01874159649014473, "learning_rate": 3.4525120635821745e-05, "loss": 0.0002959856763482094, "step": 230760 }, { "epoch": 65.50383196139654, "grad_norm": 0.009439926594495773, "learning_rate": 3.452228214589838e-05, "loss": 0.00021450668573379516, "step": 230770 }, { "epoch": 65.50667045131989, "grad_norm": 0.48245522379875183, "learning_rate": 3.451944365597503e-05, "loss": 0.0002918185666203499, "step": 230780 }, { "epoch": 65.50950894124325, "grad_norm": 0.0726826936006546, "learning_rate": 3.451660516605166e-05, "loss": 0.0002722049131989479, "step": 230790 }, { "epoch": 65.51234743116662, "grad_norm": 0.014508053660392761, "learning_rate": 3.4513766676128304e-05, "loss": 9.119287133216858e-05, "step": 230800 }, { "epoch": 65.51518592108998, "grad_norm": 0.005075044929981232, "learning_rate": 3.451092818620494e-05, "loss": 6.677135825157165e-05, "step": 230810 }, { "epoch": 65.51802441101334, "grad_norm": 4.511605739593506, "learning_rate": 3.450808969628158e-05, "loss": 0.0011112160980701446, "step": 230820 }, { "epoch": 65.5208629009367, "grad_norm": 0.17381234467029572, "learning_rate": 3.450525120635822e-05, "loss": 0.00011477842926979064, "step": 230830 }, { "epoch": 65.52370139086007, "grad_norm": 0.031050898134708405, "learning_rate": 3.4502412716434856e-05, "loss": 0.00015519019216299058, "step": 230840 }, { "epoch": 65.52653988078342, "grad_norm": 0.0037401914596557617, "learning_rate": 3.44995742265115e-05, "loss": 5.8321841061115264e-05, "step": 230850 }, { "epoch": 65.52937837070678, "grad_norm": 2.871772289276123, "learning_rate": 3.449673573658814e-05, "loss": 0.0006246397271752358, "step": 230860 }, { "epoch": 65.53221686063014, "grad_norm": 0.5024873614311218, "learning_rate": 3.449389724666477e-05, "loss": 0.0003998853266239166, "step": 230870 }, { "epoch": 65.5350553505535, "grad_norm": 0.12258068472146988, "learning_rate": 3.4491058756741415e-05, "loss": 0.0004271833226084709, "step": 230880 }, { "epoch": 65.53789384047687, "grad_norm": 0.0314621739089489, "learning_rate": 3.4488220266818056e-05, "loss": 0.00045341700315475463, "step": 230890 }, { "epoch": 65.54073233040023, "grad_norm": 0.0023105370346456766, "learning_rate": 3.448538177689469e-05, "loss": 0.00013671405613422394, "step": 230900 }, { "epoch": 65.5435708203236, "grad_norm": 0.004480971489101648, "learning_rate": 3.448254328697133e-05, "loss": 6.458628922700882e-05, "step": 230910 }, { "epoch": 65.54640931024694, "grad_norm": 0.013796827755868435, "learning_rate": 3.447970479704797e-05, "loss": 4.9616582691669465e-05, "step": 230920 }, { "epoch": 65.5492478001703, "grad_norm": 0.03949625790119171, "learning_rate": 3.447686630712461e-05, "loss": 7.441192865371704e-05, "step": 230930 }, { "epoch": 65.55208629009367, "grad_norm": 0.009198426268994808, "learning_rate": 3.447402781720125e-05, "loss": 3.745444118976593e-05, "step": 230940 }, { "epoch": 65.55492478001703, "grad_norm": 0.3299429714679718, "learning_rate": 3.447118932727789e-05, "loss": 0.00020638387650251389, "step": 230950 }, { "epoch": 65.5577632699404, "grad_norm": 1.2090529203414917, "learning_rate": 3.446835083735453e-05, "loss": 0.0005548352375626564, "step": 230960 }, { "epoch": 65.56060175986376, "grad_norm": 0.364031583070755, "learning_rate": 3.446551234743117e-05, "loss": 0.0001874484121799469, "step": 230970 }, { "epoch": 65.5634402497871, "grad_norm": 0.011207721196115017, "learning_rate": 3.446267385750781e-05, "loss": 0.00011548846960067749, "step": 230980 }, { "epoch": 65.56627873971047, "grad_norm": 0.030919041484594345, "learning_rate": 3.445983536758445e-05, "loss": 0.0025461789220571516, "step": 230990 }, { "epoch": 65.56911722963383, "grad_norm": 0.02099393680691719, "learning_rate": 3.4456996877661084e-05, "loss": 7.705874741077423e-05, "step": 231000 }, { "epoch": 65.56911722963383, "eval_accuracy": 0.9864564125389458, "eval_loss": 0.059417933225631714, "eval_runtime": 37.6357, "eval_samples_per_second": 417.875, "eval_steps_per_second": 6.536, "step": 231000 }, { "epoch": 65.5719557195572, "grad_norm": 0.038687750697135925, "learning_rate": 3.4454158387737725e-05, "loss": 6.751026958227158e-05, "step": 231010 }, { "epoch": 65.57479420948056, "grad_norm": 0.01243953313678503, "learning_rate": 3.445131989781437e-05, "loss": 5.63155859708786e-05, "step": 231020 }, { "epoch": 65.57763269940392, "grad_norm": 0.0023460916709154844, "learning_rate": 3.4448481407891e-05, "loss": 0.0006981050595641136, "step": 231030 }, { "epoch": 65.58047118932728, "grad_norm": 0.07797157764434814, "learning_rate": 3.444564291796764e-05, "loss": 0.00023263543844223022, "step": 231040 }, { "epoch": 65.58330967925063, "grad_norm": 0.08936918526887894, "learning_rate": 3.4442804428044284e-05, "loss": 0.0002469690516591072, "step": 231050 }, { "epoch": 65.586148169174, "grad_norm": 0.012039345689117908, "learning_rate": 3.443996593812092e-05, "loss": 0.00033298451453447344, "step": 231060 }, { "epoch": 65.58898665909736, "grad_norm": 0.0071246386505663395, "learning_rate": 3.443712744819756e-05, "loss": 0.00022660680115222932, "step": 231070 }, { "epoch": 65.59182514902072, "grad_norm": 0.009801395237445831, "learning_rate": 3.44342889582742e-05, "loss": 6.420873105525971e-05, "step": 231080 }, { "epoch": 65.59466363894408, "grad_norm": 0.009223053231835365, "learning_rate": 3.443145046835084e-05, "loss": 0.0008747847750782967, "step": 231090 }, { "epoch": 65.59750212886745, "grad_norm": 0.029488585889339447, "learning_rate": 3.442861197842748e-05, "loss": 0.00039435140788555143, "step": 231100 }, { "epoch": 65.60034061879081, "grad_norm": 0.08516252040863037, "learning_rate": 3.442577348850411e-05, "loss": 9.471811354160309e-05, "step": 231110 }, { "epoch": 65.60317910871416, "grad_norm": 0.010047642514109612, "learning_rate": 3.442293499858076e-05, "loss": 0.00018281489610671997, "step": 231120 }, { "epoch": 65.60601759863752, "grad_norm": 0.0032867996487766504, "learning_rate": 3.442038035764973e-05, "loss": 0.002550334483385086, "step": 231130 }, { "epoch": 65.60885608856088, "grad_norm": 0.0044611981138587, "learning_rate": 3.4417541867726375e-05, "loss": 0.0038059722632169724, "step": 231140 }, { "epoch": 65.61169457848425, "grad_norm": 3.744706153869629, "learning_rate": 3.441470337780301e-05, "loss": 0.001018400490283966, "step": 231150 }, { "epoch": 65.61453306840761, "grad_norm": 0.01478941272944212, "learning_rate": 3.441186488787965e-05, "loss": 0.00035997126251459123, "step": 231160 }, { "epoch": 65.61737155833097, "grad_norm": 0.5756787061691284, "learning_rate": 3.440902639795629e-05, "loss": 0.003617028146982193, "step": 231170 }, { "epoch": 65.62021004825434, "grad_norm": 0.024172969162464142, "learning_rate": 3.440618790803293e-05, "loss": 0.01254870593547821, "step": 231180 }, { "epoch": 65.62304853817768, "grad_norm": 1.1108304262161255, "learning_rate": 3.440334941810957e-05, "loss": 0.008457779139280319, "step": 231190 }, { "epoch": 65.62588702810105, "grad_norm": 0.05069897323846817, "learning_rate": 3.440051092818621e-05, "loss": 0.0008040929213166236, "step": 231200 }, { "epoch": 65.62872551802441, "grad_norm": 0.04258815199136734, "learning_rate": 3.4397672438262844e-05, "loss": 0.00031028259545564654, "step": 231210 }, { "epoch": 65.63156400794777, "grad_norm": 0.013318895362317562, "learning_rate": 3.4394833948339485e-05, "loss": 0.0031757064163684843, "step": 231220 }, { "epoch": 65.63440249787114, "grad_norm": 3.462832450866699, "learning_rate": 3.439199545841613e-05, "loss": 0.001998214237391949, "step": 231230 }, { "epoch": 65.6372409877945, "grad_norm": 0.012079249136149883, "learning_rate": 3.438915696849276e-05, "loss": 0.0012079497799277305, "step": 231240 }, { "epoch": 65.64007947771785, "grad_norm": 9.906033515930176, "learning_rate": 3.43863184785694e-05, "loss": 0.006940907239913941, "step": 231250 }, { "epoch": 65.64291796764121, "grad_norm": 0.3499726355075836, "learning_rate": 3.4383479988646044e-05, "loss": 0.007694694399833679, "step": 231260 }, { "epoch": 65.64575645756457, "grad_norm": 0.4347386360168457, "learning_rate": 3.4380641498722685e-05, "loss": 0.0061622846871614454, "step": 231270 }, { "epoch": 65.64859494748794, "grad_norm": 0.03273846209049225, "learning_rate": 3.437780300879932e-05, "loss": 0.001580776646733284, "step": 231280 }, { "epoch": 65.6514334374113, "grad_norm": 0.04431765899062157, "learning_rate": 3.4374964518875955e-05, "loss": 0.0028724506497383117, "step": 231290 }, { "epoch": 65.65427192733466, "grad_norm": 0.006807493045926094, "learning_rate": 3.43721260289526e-05, "loss": 0.007493072748184204, "step": 231300 }, { "epoch": 65.65711041725802, "grad_norm": 0.24263834953308105, "learning_rate": 3.436928753902924e-05, "loss": 0.002577562630176544, "step": 231310 }, { "epoch": 65.65994890718137, "grad_norm": 0.031389471143484116, "learning_rate": 3.436644904910588e-05, "loss": 0.0012030867859721185, "step": 231320 }, { "epoch": 65.66278739710474, "grad_norm": 0.017468268051743507, "learning_rate": 3.436361055918252e-05, "loss": 0.0006152160465717315, "step": 231330 }, { "epoch": 65.6656258870281, "grad_norm": 0.018709156662225723, "learning_rate": 3.4360772069259155e-05, "loss": 0.002352485805749893, "step": 231340 }, { "epoch": 65.66846437695146, "grad_norm": 2.624246120452881, "learning_rate": 3.4357933579335796e-05, "loss": 0.0040108591318130495, "step": 231350 }, { "epoch": 65.67130286687483, "grad_norm": 0.05368318781256676, "learning_rate": 3.435509508941244e-05, "loss": 0.00019634552299976348, "step": 231360 }, { "epoch": 65.67414135679819, "grad_norm": 1.0679477453231812, "learning_rate": 3.4352540448481404e-05, "loss": 0.004305846989154816, "step": 231370 }, { "epoch": 65.67697984672155, "grad_norm": 0.07900816947221756, "learning_rate": 3.434970195855805e-05, "loss": 0.0027798136696219446, "step": 231380 }, { "epoch": 65.6798183366449, "grad_norm": 2.1051087379455566, "learning_rate": 3.434686346863469e-05, "loss": 0.0012019915506243707, "step": 231390 }, { "epoch": 65.68265682656826, "grad_norm": 0.023917321115732193, "learning_rate": 3.434402497871133e-05, "loss": 0.00016113799065351487, "step": 231400 }, { "epoch": 65.68549531649163, "grad_norm": 0.004449686501175165, "learning_rate": 3.434118648878797e-05, "loss": 0.0002431098371744156, "step": 231410 }, { "epoch": 65.68833380641499, "grad_norm": 6.479013919830322, "learning_rate": 3.4338347998864604e-05, "loss": 0.0021111929789185526, "step": 231420 }, { "epoch": 65.69117229633835, "grad_norm": 0.013437706045806408, "learning_rate": 3.4335509508941245e-05, "loss": 0.00015547126531600952, "step": 231430 }, { "epoch": 65.69401078626171, "grad_norm": 0.5510806441307068, "learning_rate": 3.433267101901789e-05, "loss": 0.0003813350573182106, "step": 231440 }, { "epoch": 65.69684927618506, "grad_norm": 0.007818491198122501, "learning_rate": 3.432983252909452e-05, "loss": 9.923819452524185e-05, "step": 231450 }, { "epoch": 65.69968776610843, "grad_norm": 0.004059556871652603, "learning_rate": 3.432699403917116e-05, "loss": 6.269067525863648e-05, "step": 231460 }, { "epoch": 65.70252625603179, "grad_norm": 0.01481912937015295, "learning_rate": 3.43241555492478e-05, "loss": 0.00010526347905397415, "step": 231470 }, { "epoch": 65.70536474595515, "grad_norm": 0.027221692726016045, "learning_rate": 3.4321317059324445e-05, "loss": 0.00011312589049339294, "step": 231480 }, { "epoch": 65.70820323587851, "grad_norm": 0.00499211298301816, "learning_rate": 3.431847856940108e-05, "loss": 0.0005693785846233368, "step": 231490 }, { "epoch": 65.71104172580188, "grad_norm": 0.05744171515107155, "learning_rate": 3.4315640079477715e-05, "loss": 0.0011924216523766518, "step": 231500 }, { "epoch": 65.71104172580188, "eval_accuracy": 0.983976600750302, "eval_loss": 0.06580402702093124, "eval_runtime": 36.9545, "eval_samples_per_second": 425.578, "eval_steps_per_second": 6.657, "step": 231500 }, { "epoch": 65.71388021572524, "grad_norm": 0.01231334824115038, "learning_rate": 3.431280158955436e-05, "loss": 0.00014815032482147218, "step": 231510 }, { "epoch": 65.71671870564859, "grad_norm": 0.009914579801261425, "learning_rate": 3.4309963099631e-05, "loss": 8.564535528421402e-05, "step": 231520 }, { "epoch": 65.71955719557195, "grad_norm": 0.0062380703166127205, "learning_rate": 3.430712460970764e-05, "loss": 0.00022957175970077514, "step": 231530 }, { "epoch": 65.72239568549531, "grad_norm": 0.006510657723993063, "learning_rate": 3.430428611978428e-05, "loss": 0.00011706482619047164, "step": 231540 }, { "epoch": 65.72523417541868, "grad_norm": 0.10873502492904663, "learning_rate": 3.4301447629860915e-05, "loss": 0.00010715108364820481, "step": 231550 }, { "epoch": 65.72807266534204, "grad_norm": 0.011610387824475765, "learning_rate": 3.4298609139937556e-05, "loss": 0.0001846957951784134, "step": 231560 }, { "epoch": 65.7309111552654, "grad_norm": 0.01153116300702095, "learning_rate": 3.429577065001419e-05, "loss": 9.392984211444854e-05, "step": 231570 }, { "epoch": 65.73374964518877, "grad_norm": 0.0006269788136705756, "learning_rate": 3.429293216009083e-05, "loss": 5.9452466666698454e-05, "step": 231580 }, { "epoch": 65.73658813511211, "grad_norm": 0.16372902691364288, "learning_rate": 3.4290093670167473e-05, "loss": 8.892212063074111e-05, "step": 231590 }, { "epoch": 65.73942662503548, "grad_norm": 0.0741354301571846, "learning_rate": 3.428725518024411e-05, "loss": 8.12208279967308e-05, "step": 231600 }, { "epoch": 65.74226511495884, "grad_norm": 0.020530730485916138, "learning_rate": 3.4284416690320756e-05, "loss": 8.909441530704498e-05, "step": 231610 }, { "epoch": 65.7451036048822, "grad_norm": 0.2536911368370056, "learning_rate": 3.428157820039739e-05, "loss": 7.399935275316238e-05, "step": 231620 }, { "epoch": 65.74794209480557, "grad_norm": 0.016740387305617332, "learning_rate": 3.4278739710474025e-05, "loss": 0.00011295266449451447, "step": 231630 }, { "epoch": 65.75078058472893, "grad_norm": 0.009614512324333191, "learning_rate": 3.4275901220550674e-05, "loss": 0.00022170338779687883, "step": 231640 }, { "epoch": 65.75361907465229, "grad_norm": 0.001031016930937767, "learning_rate": 3.427306273062731e-05, "loss": 5.577672272920609e-05, "step": 231650 }, { "epoch": 65.75645756457564, "grad_norm": 0.007483439054340124, "learning_rate": 3.427022424070395e-05, "loss": 5.130264908075333e-05, "step": 231660 }, { "epoch": 65.759296054499, "grad_norm": 0.06130966916680336, "learning_rate": 3.4267385750780584e-05, "loss": 7.745213806629181e-05, "step": 231670 }, { "epoch": 65.76213454442237, "grad_norm": 0.0953872874379158, "learning_rate": 3.4264547260857226e-05, "loss": 4.4718943536281584e-05, "step": 231680 }, { "epoch": 65.76497303434573, "grad_norm": 0.008886524476110935, "learning_rate": 3.426170877093387e-05, "loss": 0.0003730708733201027, "step": 231690 }, { "epoch": 65.76781152426909, "grad_norm": 0.10616689175367355, "learning_rate": 3.42588702810105e-05, "loss": 0.00012534894049167633, "step": 231700 }, { "epoch": 65.77065001419246, "grad_norm": 0.05095289275050163, "learning_rate": 3.425603179108714e-05, "loss": 0.0026124900206923483, "step": 231710 }, { "epoch": 65.7734885041158, "grad_norm": 0.003194211982190609, "learning_rate": 3.4253193301163784e-05, "loss": 7.468704134225846e-05, "step": 231720 }, { "epoch": 65.77632699403917, "grad_norm": 0.04718780145049095, "learning_rate": 3.425035481124042e-05, "loss": 0.0030801685526967047, "step": 231730 }, { "epoch": 65.77916548396253, "grad_norm": 0.6691194176673889, "learning_rate": 3.424751632131706e-05, "loss": 0.00021902285516262054, "step": 231740 }, { "epoch": 65.78200397388589, "grad_norm": 0.0796615406870842, "learning_rate": 3.42446778313937e-05, "loss": 8.746478706598282e-05, "step": 231750 }, { "epoch": 65.78484246380926, "grad_norm": 0.005994886625558138, "learning_rate": 3.4241839341470336e-05, "loss": 0.00010288655757904053, "step": 231760 }, { "epoch": 65.78768095373262, "grad_norm": 0.005179221276193857, "learning_rate": 3.423900085154698e-05, "loss": 0.00024651885032653806, "step": 231770 }, { "epoch": 65.79051944365598, "grad_norm": 0.10285357385873795, "learning_rate": 3.423616236162362e-05, "loss": 0.00012129303067922592, "step": 231780 }, { "epoch": 65.79335793357933, "grad_norm": 0.009899024851620197, "learning_rate": 3.4233323871700254e-05, "loss": 0.00013456325978040696, "step": 231790 }, { "epoch": 65.79619642350269, "grad_norm": 0.013716734945774078, "learning_rate": 3.4230485381776895e-05, "loss": 0.001709038019180298, "step": 231800 }, { "epoch": 65.79903491342606, "grad_norm": 0.005779419094324112, "learning_rate": 3.4227646891853536e-05, "loss": 0.0008799515664577485, "step": 231810 }, { "epoch": 65.80187340334942, "grad_norm": 0.002868122886866331, "learning_rate": 3.422480840193018e-05, "loss": 0.00019031353294849396, "step": 231820 }, { "epoch": 65.80471189327278, "grad_norm": 0.005552553106099367, "learning_rate": 3.422196991200681e-05, "loss": 0.00016552843153476714, "step": 231830 }, { "epoch": 65.80755038319614, "grad_norm": 0.0032021424267441034, "learning_rate": 3.4219131422083454e-05, "loss": 7.45631754398346e-05, "step": 231840 }, { "epoch": 65.8103888731195, "grad_norm": 0.5081598162651062, "learning_rate": 3.4216292932160095e-05, "loss": 0.0002626638859510422, "step": 231850 }, { "epoch": 65.81322736304286, "grad_norm": 0.005850899498909712, "learning_rate": 3.421345444223673e-05, "loss": 0.0001938866451382637, "step": 231860 }, { "epoch": 65.81606585296622, "grad_norm": 0.0022778790444135666, "learning_rate": 3.421061595231337e-05, "loss": 0.0001382468268275261, "step": 231870 }, { "epoch": 65.81890434288958, "grad_norm": 0.1066102683544159, "learning_rate": 3.420777746239001e-05, "loss": 0.00011002831161022186, "step": 231880 }, { "epoch": 65.82174283281294, "grad_norm": 0.010333196260035038, "learning_rate": 3.420493897246665e-05, "loss": 7.553957402706146e-05, "step": 231890 }, { "epoch": 65.82458132273631, "grad_norm": 0.3792489171028137, "learning_rate": 3.420210048254329e-05, "loss": 0.00010870993137359619, "step": 231900 }, { "epoch": 65.82741981265967, "grad_norm": 0.014429807662963867, "learning_rate": 3.419926199261993e-05, "loss": 4.509091377258301e-05, "step": 231910 }, { "epoch": 65.83025830258302, "grad_norm": 0.02180224098265171, "learning_rate": 3.4196423502696564e-05, "loss": 8.67670401930809e-05, "step": 231920 }, { "epoch": 65.83309679250638, "grad_norm": 0.0034877662546932697, "learning_rate": 3.4193585012773206e-05, "loss": 5.371980369091034e-05, "step": 231930 }, { "epoch": 65.83593528242974, "grad_norm": 0.009875974617898464, "learning_rate": 3.419074652284985e-05, "loss": 9.61538404226303e-05, "step": 231940 }, { "epoch": 65.83877377235311, "grad_norm": 0.03500709682703018, "learning_rate": 3.418790803292649e-05, "loss": 3.058593720197678e-05, "step": 231950 }, { "epoch": 65.84161226227647, "grad_norm": 0.013801966793835163, "learning_rate": 3.418506954300312e-05, "loss": 6.7942775785923e-05, "step": 231960 }, { "epoch": 65.84445075219983, "grad_norm": 0.1761651635169983, "learning_rate": 3.418223105307976e-05, "loss": 0.0004913376644253731, "step": 231970 }, { "epoch": 65.8472892421232, "grad_norm": 0.0159663874655962, "learning_rate": 3.4179392563156406e-05, "loss": 0.0008664149791002274, "step": 231980 }, { "epoch": 65.85012773204654, "grad_norm": 0.023121556267142296, "learning_rate": 3.417655407323304e-05, "loss": 0.00039355140179395673, "step": 231990 }, { "epoch": 65.85296622196991, "grad_norm": 0.06248262897133827, "learning_rate": 3.417371558330968e-05, "loss": 4.392024129629135e-05, "step": 232000 }, { "epoch": 65.85296622196991, "eval_accuracy": 0.9856933935270554, "eval_loss": 0.061007190495729446, "eval_runtime": 35.4977, "eval_samples_per_second": 443.043, "eval_steps_per_second": 6.93, "step": 232000 }, { "epoch": 65.85580471189327, "grad_norm": 0.0005145067698322237, "learning_rate": 3.417087709338632e-05, "loss": 0.00010073576122522354, "step": 232010 }, { "epoch": 65.85864320181663, "grad_norm": 0.09042041003704071, "learning_rate": 3.416803860346296e-05, "loss": 0.00013708304613828658, "step": 232020 }, { "epoch": 65.86148169174, "grad_norm": 0.004986579995602369, "learning_rate": 3.41652001135396e-05, "loss": 0.0001871170476078987, "step": 232030 }, { "epoch": 65.86432018166336, "grad_norm": 0.010647201910614967, "learning_rate": 3.416236162361624e-05, "loss": 5.2413903176784515e-05, "step": 232040 }, { "epoch": 65.86715867158672, "grad_norm": 0.01006757840514183, "learning_rate": 3.4159523133692875e-05, "loss": 6.592348217964173e-05, "step": 232050 }, { "epoch": 65.86999716151007, "grad_norm": 0.0013823651243001223, "learning_rate": 3.4156684643769516e-05, "loss": 8.478090167045594e-05, "step": 232060 }, { "epoch": 65.87283565143343, "grad_norm": 0.0029333874117583036, "learning_rate": 3.415384615384615e-05, "loss": 2.8110109269618988e-05, "step": 232070 }, { "epoch": 65.8756741413568, "grad_norm": 0.00447238702327013, "learning_rate": 3.41510076639228e-05, "loss": 0.0007773585617542267, "step": 232080 }, { "epoch": 65.87851263128016, "grad_norm": 1.3902292251586914, "learning_rate": 3.4148169173999434e-05, "loss": 0.00040338393300771714, "step": 232090 }, { "epoch": 65.88135112120352, "grad_norm": 0.519213080406189, "learning_rate": 3.414533068407607e-05, "loss": 0.0001620907336473465, "step": 232100 }, { "epoch": 65.88418961112689, "grad_norm": 0.04473285749554634, "learning_rate": 3.4142492194152717e-05, "loss": 6.550364196300507e-05, "step": 232110 }, { "epoch": 65.88702810105025, "grad_norm": 0.019273122772574425, "learning_rate": 3.413965370422935e-05, "loss": 0.0013400334864854813, "step": 232120 }, { "epoch": 65.8898665909736, "grad_norm": 0.6875054836273193, "learning_rate": 3.413681521430599e-05, "loss": 0.00015898775309324263, "step": 232130 }, { "epoch": 65.89270508089696, "grad_norm": 0.05510139465332031, "learning_rate": 3.4133976724382634e-05, "loss": 0.00010127052664756774, "step": 232140 }, { "epoch": 65.89554357082032, "grad_norm": 0.014361207373440266, "learning_rate": 3.413113823445927e-05, "loss": 0.00017026048153638839, "step": 232150 }, { "epoch": 65.89838206074369, "grad_norm": 0.007188354153186083, "learning_rate": 3.412829974453591e-05, "loss": 0.00026291124522686004, "step": 232160 }, { "epoch": 65.90122055066705, "grad_norm": 0.04154258221387863, "learning_rate": 3.4125461254612544e-05, "loss": 0.00048067811876535417, "step": 232170 }, { "epoch": 65.90405904059041, "grad_norm": 0.0020395738538354635, "learning_rate": 3.4122622764689186e-05, "loss": 0.00014875363558530807, "step": 232180 }, { "epoch": 65.90689753051376, "grad_norm": 0.0011047787265852094, "learning_rate": 3.411978427476583e-05, "loss": 0.0010737445205450057, "step": 232190 }, { "epoch": 65.90973602043712, "grad_norm": 0.30270037055015564, "learning_rate": 3.411694578484246e-05, "loss": 0.01575983166694641, "step": 232200 }, { "epoch": 65.91257451036049, "grad_norm": 0.9591307640075684, "learning_rate": 3.41141072949191e-05, "loss": 0.00020611733198165892, "step": 232210 }, { "epoch": 65.91541300028385, "grad_norm": 0.03634125366806984, "learning_rate": 3.4111268804995745e-05, "loss": 0.001052003540098667, "step": 232220 }, { "epoch": 65.91825149020721, "grad_norm": 0.06254777312278748, "learning_rate": 3.410843031507238e-05, "loss": 4.7913193702697755e-05, "step": 232230 }, { "epoch": 65.92108998013057, "grad_norm": 0.0030403255950659513, "learning_rate": 3.410559182514903e-05, "loss": 6.820987910032273e-05, "step": 232240 }, { "epoch": 65.92392847005394, "grad_norm": 0.00843973271548748, "learning_rate": 3.410275333522566e-05, "loss": 0.00020836275070905684, "step": 232250 }, { "epoch": 65.92676695997729, "grad_norm": 0.02121376432478428, "learning_rate": 3.4099914845302297e-05, "loss": 6.952825933694839e-05, "step": 232260 }, { "epoch": 65.92960544990065, "grad_norm": 0.008697844110429287, "learning_rate": 3.409707635537894e-05, "loss": 0.00010136496275663376, "step": 232270 }, { "epoch": 65.93244393982401, "grad_norm": 0.03222785145044327, "learning_rate": 3.409423786545558e-05, "loss": 7.93682411313057e-05, "step": 232280 }, { "epoch": 65.93528242974737, "grad_norm": 0.21820737421512604, "learning_rate": 3.409139937553222e-05, "loss": 0.00010582804679870605, "step": 232290 }, { "epoch": 65.93812091967074, "grad_norm": 0.013075219467282295, "learning_rate": 3.4088560885608855e-05, "loss": 0.0001888491213321686, "step": 232300 }, { "epoch": 65.9409594095941, "grad_norm": 0.005750967189669609, "learning_rate": 3.40857223956855e-05, "loss": 3.475174307823181e-05, "step": 232310 }, { "epoch": 65.94379789951746, "grad_norm": 0.04554501920938492, "learning_rate": 3.408288390576214e-05, "loss": 0.004667354002594948, "step": 232320 }, { "epoch": 65.94663638944081, "grad_norm": 0.19964052736759186, "learning_rate": 3.408004541583877e-05, "loss": 0.00010396353900432587, "step": 232330 }, { "epoch": 65.94947487936417, "grad_norm": 0.017224522307515144, "learning_rate": 3.4077206925915414e-05, "loss": 0.00014228001236915588, "step": 232340 }, { "epoch": 65.95231336928754, "grad_norm": 0.001706941518932581, "learning_rate": 3.4074368435992055e-05, "loss": 0.0005166105926036835, "step": 232350 }, { "epoch": 65.9551518592109, "grad_norm": 0.5256364941596985, "learning_rate": 3.407152994606869e-05, "loss": 0.00015409514307975769, "step": 232360 }, { "epoch": 65.95799034913426, "grad_norm": 0.01616447977721691, "learning_rate": 3.406869145614533e-05, "loss": 7.545072585344315e-05, "step": 232370 }, { "epoch": 65.96082883905763, "grad_norm": 0.009271944873034954, "learning_rate": 3.406585296622197e-05, "loss": 0.00015750210732221604, "step": 232380 }, { "epoch": 65.96366732898099, "grad_norm": 0.02431403659284115, "learning_rate": 3.406301447629861e-05, "loss": 4.0820054709911344e-05, "step": 232390 }, { "epoch": 65.96650581890434, "grad_norm": 0.16784870624542236, "learning_rate": 3.406017598637525e-05, "loss": 0.0001020379364490509, "step": 232400 }, { "epoch": 65.9693443088277, "grad_norm": 0.015000401996076107, "learning_rate": 3.405733749645189e-05, "loss": 0.001607731357216835, "step": 232410 }, { "epoch": 65.97218279875106, "grad_norm": 0.019375570118427277, "learning_rate": 3.405449900652853e-05, "loss": 0.0005845343694090843, "step": 232420 }, { "epoch": 65.97502128867443, "grad_norm": 0.036119285970926285, "learning_rate": 3.4051660516605166e-05, "loss": 0.0005490392446517945, "step": 232430 }, { "epoch": 65.97785977859779, "grad_norm": 0.0012374571524560452, "learning_rate": 3.404882202668181e-05, "loss": 0.0036634080111980437, "step": 232440 }, { "epoch": 65.98069826852115, "grad_norm": 0.10806882381439209, "learning_rate": 3.404598353675845e-05, "loss": 0.0003269413486123085, "step": 232450 }, { "epoch": 65.9835367584445, "grad_norm": 0.0007161747198551893, "learning_rate": 3.404314504683508e-05, "loss": 0.0029777707532048225, "step": 232460 }, { "epoch": 65.98637524836786, "grad_norm": 0.0076056974940001965, "learning_rate": 3.4040306556911725e-05, "loss": 0.0005154186859726906, "step": 232470 }, { "epoch": 65.98921373829123, "grad_norm": 4.15744161605835, "learning_rate": 3.4037468066988366e-05, "loss": 0.0014112018048763274, "step": 232480 }, { "epoch": 65.99205222821459, "grad_norm": 0.6074082255363464, "learning_rate": 3.4034629577065e-05, "loss": 0.0013439569622278214, "step": 232490 }, { "epoch": 65.99489071813795, "grad_norm": 0.9638134837150574, "learning_rate": 3.403179108714164e-05, "loss": 0.0017575744539499282, "step": 232500 }, { "epoch": 65.99489071813795, "eval_accuracy": 0.981941883385261, "eval_loss": 0.07502394169569016, "eval_runtime": 37.8959, "eval_samples_per_second": 415.006, "eval_steps_per_second": 6.491, "step": 232500 }, { "epoch": 65.99772920806132, "grad_norm": 0.009382834658026695, "learning_rate": 3.4028952597218283e-05, "loss": 0.001044648513197899, "step": 232510 }, { "epoch": 66.00056769798468, "grad_norm": 0.5860065817832947, "learning_rate": 3.402611410729492e-05, "loss": 0.0020974582061171533, "step": 232520 }, { "epoch": 66.00340618790803, "grad_norm": 0.6350289583206177, "learning_rate": 3.402327561737156e-05, "loss": 0.0002544194459915161, "step": 232530 }, { "epoch": 66.00624467783139, "grad_norm": 0.011464019306004047, "learning_rate": 3.40204371274482e-05, "loss": 0.0005065415054559708, "step": 232540 }, { "epoch": 66.00908316775475, "grad_norm": 0.009367886930704117, "learning_rate": 3.401759863752484e-05, "loss": 8.839871734380722e-05, "step": 232550 }, { "epoch": 66.01192165767812, "grad_norm": 0.06736252456903458, "learning_rate": 3.401476014760148e-05, "loss": 9.587090462446213e-05, "step": 232560 }, { "epoch": 66.01476014760148, "grad_norm": 0.22818607091903687, "learning_rate": 3.401192165767811e-05, "loss": 0.00010256040841341018, "step": 232570 }, { "epoch": 66.01759863752484, "grad_norm": 0.0250431876629591, "learning_rate": 3.400908316775476e-05, "loss": 0.003181156516075134, "step": 232580 }, { "epoch": 66.0204371274482, "grad_norm": 0.09070951491594315, "learning_rate": 3.4006244677831394e-05, "loss": 0.00021972395479679107, "step": 232590 }, { "epoch": 66.02327561737155, "grad_norm": 0.12463131546974182, "learning_rate": 3.4003406187908036e-05, "loss": 0.0008233657106757164, "step": 232600 }, { "epoch": 66.02611410729492, "grad_norm": 0.005987556651234627, "learning_rate": 3.400056769798468e-05, "loss": 0.0024834591895341873, "step": 232610 }, { "epoch": 66.02895259721828, "grad_norm": 3.8092081546783447, "learning_rate": 3.399772920806131e-05, "loss": 0.001678311638534069, "step": 232620 }, { "epoch": 66.03179108714164, "grad_norm": 0.01763869822025299, "learning_rate": 3.399489071813795e-05, "loss": 0.0007244532927870751, "step": 232630 }, { "epoch": 66.034629577065, "grad_norm": 0.033500734716653824, "learning_rate": 3.3992052228214594e-05, "loss": 0.00034704115241765974, "step": 232640 }, { "epoch": 66.03746806698837, "grad_norm": 0.0023927001748234034, "learning_rate": 3.398921373829123e-05, "loss": 0.001039067655801773, "step": 232650 }, { "epoch": 66.04030655691172, "grad_norm": 0.0071973493322730064, "learning_rate": 3.398637524836787e-05, "loss": 0.003870062530040741, "step": 232660 }, { "epoch": 66.04314504683508, "grad_norm": 1.4277491569519043, "learning_rate": 3.398353675844451e-05, "loss": 0.0007562804967164993, "step": 232670 }, { "epoch": 66.04598353675844, "grad_norm": 0.006996369455009699, "learning_rate": 3.3980698268521146e-05, "loss": 0.00033707134425640104, "step": 232680 }, { "epoch": 66.0488220266818, "grad_norm": 0.01412059273570776, "learning_rate": 3.397785977859779e-05, "loss": 0.00018961280584335328, "step": 232690 }, { "epoch": 66.05166051660517, "grad_norm": 0.019571958109736443, "learning_rate": 3.397502128867442e-05, "loss": 0.001730233058333397, "step": 232700 }, { "epoch": 66.05449900652853, "grad_norm": 0.02574668452143669, "learning_rate": 3.397218279875107e-05, "loss": 0.00441369041800499, "step": 232710 }, { "epoch": 66.0573374964519, "grad_norm": 0.03386039659380913, "learning_rate": 3.3969344308827705e-05, "loss": 0.0037474483251571655, "step": 232720 }, { "epoch": 66.06017598637524, "grad_norm": 0.005862506106495857, "learning_rate": 3.396650581890434e-05, "loss": 0.0004601512104272842, "step": 232730 }, { "epoch": 66.0630144762986, "grad_norm": 0.014784760773181915, "learning_rate": 3.396366732898099e-05, "loss": 0.00021834857761859894, "step": 232740 }, { "epoch": 66.06585296622197, "grad_norm": 0.006413114722818136, "learning_rate": 3.396082883905762e-05, "loss": 0.0004488082602620125, "step": 232750 }, { "epoch": 66.06869145614533, "grad_norm": 0.0841003954410553, "learning_rate": 3.3957990349134264e-05, "loss": 0.0002568498253822327, "step": 232760 }, { "epoch": 66.0715299460687, "grad_norm": 0.02756727859377861, "learning_rate": 3.3955151859210905e-05, "loss": 0.0001449480652809143, "step": 232770 }, { "epoch": 66.07436843599206, "grad_norm": 0.035069599747657776, "learning_rate": 3.395231336928754e-05, "loss": 0.00010538548231124878, "step": 232780 }, { "epoch": 66.07720692591542, "grad_norm": 0.1564854383468628, "learning_rate": 3.394947487936418e-05, "loss": 0.0001298544928431511, "step": 232790 }, { "epoch": 66.08004541583877, "grad_norm": 0.7775769233703613, "learning_rate": 3.3946636389440816e-05, "loss": 0.00036995578557252884, "step": 232800 }, { "epoch": 66.08288390576213, "grad_norm": 0.5693956017494202, "learning_rate": 3.394379789951746e-05, "loss": 0.00032150018960237504, "step": 232810 }, { "epoch": 66.0857223956855, "grad_norm": 0.3235272169113159, "learning_rate": 3.39409594095941e-05, "loss": 0.00014773141592741012, "step": 232820 }, { "epoch": 66.08856088560886, "grad_norm": 0.0059838443994522095, "learning_rate": 3.393812091967073e-05, "loss": 0.00642048567533493, "step": 232830 }, { "epoch": 66.09139937553222, "grad_norm": 9.749025344848633, "learning_rate": 3.393528242974738e-05, "loss": 0.0028857439756393433, "step": 232840 }, { "epoch": 66.09423786545558, "grad_norm": 0.0015532122924923897, "learning_rate": 3.3932443939824016e-05, "loss": 0.0001224508509039879, "step": 232850 }, { "epoch": 66.09707635537895, "grad_norm": 1.3130384683609009, "learning_rate": 3.392960544990065e-05, "loss": 0.0011385152116417884, "step": 232860 }, { "epoch": 66.0999148453023, "grad_norm": 0.01675572618842125, "learning_rate": 3.39267669599773e-05, "loss": 5.258172750473022e-05, "step": 232870 }, { "epoch": 66.10275333522566, "grad_norm": 0.013794216327369213, "learning_rate": 3.392392847005393e-05, "loss": 0.004102931916713714, "step": 232880 }, { "epoch": 66.10559182514902, "grad_norm": 0.007952021434903145, "learning_rate": 3.3921089980130574e-05, "loss": 0.0047519180923700334, "step": 232890 }, { "epoch": 66.10843031507238, "grad_norm": 0.003864268073812127, "learning_rate": 3.391825149020721e-05, "loss": 0.00015269890427589418, "step": 232900 }, { "epoch": 66.11126880499575, "grad_norm": 10.844472885131836, "learning_rate": 3.391541300028385e-05, "loss": 0.0064303018152713776, "step": 232910 }, { "epoch": 66.11410729491911, "grad_norm": 0.09511983394622803, "learning_rate": 3.391257451036049e-05, "loss": 0.0015065167099237442, "step": 232920 }, { "epoch": 66.11694578484246, "grad_norm": 0.01292620599269867, "learning_rate": 3.3909736020437126e-05, "loss": 0.00026802662760019305, "step": 232930 }, { "epoch": 66.11978427476582, "grad_norm": 0.007583055645227432, "learning_rate": 3.390689753051377e-05, "loss": 0.005937404930591583, "step": 232940 }, { "epoch": 66.12262276468918, "grad_norm": 0.009005917236208916, "learning_rate": 3.390405904059041e-05, "loss": 0.0008363740518689156, "step": 232950 }, { "epoch": 66.12546125461255, "grad_norm": 0.06135711446404457, "learning_rate": 3.3901220550667044e-05, "loss": 0.002759149670600891, "step": 232960 }, { "epoch": 66.12829974453591, "grad_norm": 0.00498628057539463, "learning_rate": 3.389838206074369e-05, "loss": 0.0005110122263431549, "step": 232970 }, { "epoch": 66.13113823445927, "grad_norm": 0.005452621262520552, "learning_rate": 3.3895543570820326e-05, "loss": 8.555911481380462e-05, "step": 232980 }, { "epoch": 66.13397672438263, "grad_norm": 0.0008236374123953283, "learning_rate": 3.389270508089696e-05, "loss": 0.0004983598366379738, "step": 232990 }, { "epoch": 66.13681521430598, "grad_norm": 0.010283878073096275, "learning_rate": 3.38898665909736e-05, "loss": 0.0006338171660900116, "step": 233000 }, { "epoch": 66.13681521430598, "eval_accuracy": 0.9839130158326445, "eval_loss": 0.06903740018606186, "eval_runtime": 37.8889, "eval_samples_per_second": 415.082, "eval_steps_per_second": 6.493, "step": 233000 }, { "epoch": 66.13965370422935, "grad_norm": 19.23219871520996, "learning_rate": 3.3887028101050244e-05, "loss": 0.009160536527633666, "step": 233010 }, { "epoch": 66.14249219415271, "grad_norm": 0.09327242523431778, "learning_rate": 3.3884189611126885e-05, "loss": 0.004382132738828659, "step": 233020 }, { "epoch": 66.14533068407607, "grad_norm": 0.01392772700637579, "learning_rate": 3.388135112120352e-05, "loss": 0.0030765272676944733, "step": 233030 }, { "epoch": 66.14816917399943, "grad_norm": 0.009477437473833561, "learning_rate": 3.387851263128016e-05, "loss": 9.011365473270417e-05, "step": 233040 }, { "epoch": 66.1510076639228, "grad_norm": 0.009295189753174782, "learning_rate": 3.38756741413568e-05, "loss": 7.526203989982605e-05, "step": 233050 }, { "epoch": 66.15384615384616, "grad_norm": 0.009456885978579521, "learning_rate": 3.387283565143344e-05, "loss": 0.00034303758293390273, "step": 233060 }, { "epoch": 66.15668464376951, "grad_norm": 1.662019968032837, "learning_rate": 3.386999716151008e-05, "loss": 0.0003440916538238525, "step": 233070 }, { "epoch": 66.15952313369287, "grad_norm": 0.10563334822654724, "learning_rate": 3.386715867158672e-05, "loss": 0.0004360094666481018, "step": 233080 }, { "epoch": 66.16236162361623, "grad_norm": 0.06528464704751968, "learning_rate": 3.3864320181663354e-05, "loss": 0.000178501196205616, "step": 233090 }, { "epoch": 66.1652001135396, "grad_norm": 0.18194130063056946, "learning_rate": 3.3861481691739996e-05, "loss": 9.346175938844681e-05, "step": 233100 }, { "epoch": 66.16803860346296, "grad_norm": 0.0018872927175834775, "learning_rate": 3.385864320181664e-05, "loss": 0.00024050641804933547, "step": 233110 }, { "epoch": 66.17087709338632, "grad_norm": 0.004830183461308479, "learning_rate": 3.385580471189327e-05, "loss": 5.997307598590851e-05, "step": 233120 }, { "epoch": 66.17371558330967, "grad_norm": 0.031217416748404503, "learning_rate": 3.385296622196991e-05, "loss": 0.0001658055931329727, "step": 233130 }, { "epoch": 66.17655407323304, "grad_norm": 0.00539760896936059, "learning_rate": 3.3850127732046555e-05, "loss": 0.0004716005176305771, "step": 233140 }, { "epoch": 66.1793925631564, "grad_norm": 1.2450426816940308, "learning_rate": 3.384728924212319e-05, "loss": 0.0004599273204803467, "step": 233150 }, { "epoch": 66.18223105307976, "grad_norm": 0.0024758847430348396, "learning_rate": 3.384445075219983e-05, "loss": 9.71190631389618e-05, "step": 233160 }, { "epoch": 66.18506954300312, "grad_norm": 0.03813381493091583, "learning_rate": 3.384161226227647e-05, "loss": 0.0029788978397846224, "step": 233170 }, { "epoch": 66.18790803292649, "grad_norm": 0.11029775440692902, "learning_rate": 3.383877377235311e-05, "loss": 0.00015553329139947892, "step": 233180 }, { "epoch": 66.19074652284985, "grad_norm": 0.005459499079734087, "learning_rate": 3.383593528242975e-05, "loss": 0.00038220640271902084, "step": 233190 }, { "epoch": 66.1935850127732, "grad_norm": 0.05628835782408714, "learning_rate": 3.383309679250638e-05, "loss": 3.6465562880039214e-05, "step": 233200 }, { "epoch": 66.19642350269656, "grad_norm": 0.04755248874425888, "learning_rate": 3.383025830258303e-05, "loss": 0.00017465893179178238, "step": 233210 }, { "epoch": 66.19926199261992, "grad_norm": 0.14936354756355286, "learning_rate": 3.3827419812659665e-05, "loss": 0.00010710060596466064, "step": 233220 }, { "epoch": 66.20210048254329, "grad_norm": 0.029765358194708824, "learning_rate": 3.3824581322736307e-05, "loss": 0.00016031675040721892, "step": 233230 }, { "epoch": 66.20493897246665, "grad_norm": 0.015431367792189121, "learning_rate": 3.382174283281295e-05, "loss": 0.00017433427274227142, "step": 233240 }, { "epoch": 66.20777746239001, "grad_norm": 0.007614320144057274, "learning_rate": 3.381890434288958e-05, "loss": 0.0001685643568634987, "step": 233250 }, { "epoch": 66.21061595231338, "grad_norm": 0.0015870569040998816, "learning_rate": 3.3816065852966224e-05, "loss": 0.0016198579221963882, "step": 233260 }, { "epoch": 66.21345444223672, "grad_norm": 0.04240049421787262, "learning_rate": 3.3813227363042865e-05, "loss": 0.00023725740611553193, "step": 233270 }, { "epoch": 66.21629293216009, "grad_norm": 0.018279073759913445, "learning_rate": 3.38103888731195e-05, "loss": 0.0003872973844408989, "step": 233280 }, { "epoch": 66.21913142208345, "grad_norm": 0.022115616127848625, "learning_rate": 3.380755038319614e-05, "loss": 7.229764014482498e-05, "step": 233290 }, { "epoch": 66.22196991200681, "grad_norm": 0.0024407391902059317, "learning_rate": 3.3804711893272776e-05, "loss": 0.00010145753622055053, "step": 233300 }, { "epoch": 66.22480840193018, "grad_norm": 0.14587968587875366, "learning_rate": 3.3801873403349424e-05, "loss": 6.549395620822907e-05, "step": 233310 }, { "epoch": 66.22764689185354, "grad_norm": 0.0026402357034385204, "learning_rate": 3.379903491342606e-05, "loss": 3.571435809135437e-05, "step": 233320 }, { "epoch": 66.2304853817769, "grad_norm": 0.01572336070239544, "learning_rate": 3.379619642350269e-05, "loss": 8.314251899719239e-05, "step": 233330 }, { "epoch": 66.23332387170025, "grad_norm": 0.03545840084552765, "learning_rate": 3.379335793357934e-05, "loss": 8.597876876592636e-05, "step": 233340 }, { "epoch": 66.23616236162361, "grad_norm": 0.0037228341680020094, "learning_rate": 3.3790519443655976e-05, "loss": 0.00012136660516262054, "step": 233350 }, { "epoch": 66.23900085154698, "grad_norm": 0.0035265618935227394, "learning_rate": 3.378768095373262e-05, "loss": 2.530459314584732e-05, "step": 233360 }, { "epoch": 66.24183934147034, "grad_norm": 0.002057964215055108, "learning_rate": 3.378484246380926e-05, "loss": 3.0998513102531434e-05, "step": 233370 }, { "epoch": 66.2446778313937, "grad_norm": 0.007815173827111721, "learning_rate": 3.378200397388589e-05, "loss": 2.720355987548828e-05, "step": 233380 }, { "epoch": 66.24751632131706, "grad_norm": 0.006211303174495697, "learning_rate": 3.3779165483962535e-05, "loss": 2.4190358817577363e-05, "step": 233390 }, { "epoch": 66.25035481124041, "grad_norm": 0.005773978307843208, "learning_rate": 3.377632699403917e-05, "loss": 1.31877139210701e-05, "step": 233400 }, { "epoch": 66.25319330116378, "grad_norm": 0.049208007752895355, "learning_rate": 3.377348850411581e-05, "loss": 0.00014034919440746306, "step": 233410 }, { "epoch": 66.25603179108714, "grad_norm": 0.006766047794371843, "learning_rate": 3.377065001419245e-05, "loss": 4.864037036895752e-05, "step": 233420 }, { "epoch": 66.2588702810105, "grad_norm": 0.04591812938451767, "learning_rate": 3.376781152426909e-05, "loss": 2.563726156949997e-05, "step": 233430 }, { "epoch": 66.26170877093386, "grad_norm": 0.007391439285129309, "learning_rate": 3.3764973034345735e-05, "loss": 2.5396235287189484e-05, "step": 233440 }, { "epoch": 66.26454726085723, "grad_norm": 0.005771826487034559, "learning_rate": 3.376213454442237e-05, "loss": 2.450011670589447e-05, "step": 233450 }, { "epoch": 66.26738575078059, "grad_norm": 0.007712331600487232, "learning_rate": 3.3759296054499004e-05, "loss": 2.0098686218261718e-05, "step": 233460 }, { "epoch": 66.27022424070394, "grad_norm": 0.00204607960768044, "learning_rate": 3.375645756457565e-05, "loss": 0.00016860831528902053, "step": 233470 }, { "epoch": 66.2730627306273, "grad_norm": 0.02807501144707203, "learning_rate": 3.375361907465229e-05, "loss": 0.0010216623544692994, "step": 233480 }, { "epoch": 66.27590122055067, "grad_norm": 0.0015271612210199237, "learning_rate": 3.375078058472893e-05, "loss": 5.543045699596405e-05, "step": 233490 }, { "epoch": 66.27873971047403, "grad_norm": 0.010962605476379395, "learning_rate": 3.374794209480556e-05, "loss": 0.002901604026556015, "step": 233500 }, { "epoch": 66.27873971047403, "eval_accuracy": 0.9860749030330006, "eval_loss": 0.05808263272047043, "eval_runtime": 36.9033, "eval_samples_per_second": 426.167, "eval_steps_per_second": 6.666, "step": 233500 }, { "epoch": 66.28157820039739, "grad_norm": 0.24266836047172546, "learning_rate": 3.3745103604882204e-05, "loss": 0.0011114947497844695, "step": 233510 }, { "epoch": 66.28441669032075, "grad_norm": 0.056496575474739075, "learning_rate": 3.3742548963951184e-05, "loss": 0.014612308144569397, "step": 233520 }, { "epoch": 66.28725518024412, "grad_norm": 0.048028506338596344, "learning_rate": 3.373971047402782e-05, "loss": 0.003338975086808205, "step": 233530 }, { "epoch": 66.29009367016747, "grad_norm": 0.0070195733569562435, "learning_rate": 3.373687198410446e-05, "loss": 0.0029152978211641313, "step": 233540 }, { "epoch": 66.29293216009083, "grad_norm": 8.846634864807129, "learning_rate": 3.37340334941811e-05, "loss": 0.017015913128852846, "step": 233550 }, { "epoch": 66.29577065001419, "grad_norm": 0.19498756527900696, "learning_rate": 3.3731195004257736e-05, "loss": 0.0009173121303319931, "step": 233560 }, { "epoch": 66.29860913993755, "grad_norm": 0.06395142525434494, "learning_rate": 3.372835651433438e-05, "loss": 0.01968989223241806, "step": 233570 }, { "epoch": 66.30144762986092, "grad_norm": 0.05197374150156975, "learning_rate": 3.372551802441101e-05, "loss": 0.0206513836979866, "step": 233580 }, { "epoch": 66.30428611978428, "grad_norm": 0.09663568437099457, "learning_rate": 3.372267953448765e-05, "loss": 0.0004741862416267395, "step": 233590 }, { "epoch": 66.30712460970764, "grad_norm": 0.012292876839637756, "learning_rate": 3.3719841044564295e-05, "loss": 0.0006725842133164406, "step": 233600 }, { "epoch": 66.30996309963099, "grad_norm": 0.021315736696124077, "learning_rate": 3.371700255464093e-05, "loss": 0.00022163689136505126, "step": 233610 }, { "epoch": 66.31280158955435, "grad_norm": 0.9597537517547607, "learning_rate": 3.371416406471757e-05, "loss": 0.0013096081092953682, "step": 233620 }, { "epoch": 66.31564007947772, "grad_norm": 0.14409595727920532, "learning_rate": 3.371132557479421e-05, "loss": 0.007923388481140136, "step": 233630 }, { "epoch": 66.31847856940108, "grad_norm": 0.06450870633125305, "learning_rate": 3.370848708487085e-05, "loss": 0.00017535574734210967, "step": 233640 }, { "epoch": 66.32131705932444, "grad_norm": 0.0035018573980778456, "learning_rate": 3.3705648594947495e-05, "loss": 0.00012153163552284241, "step": 233650 }, { "epoch": 66.3241555492478, "grad_norm": 0.022364983335137367, "learning_rate": 3.370281010502413e-05, "loss": 0.00019888412207365037, "step": 233660 }, { "epoch": 66.32699403917115, "grad_norm": 0.006687599699944258, "learning_rate": 3.3699971615100764e-05, "loss": 7.616095244884491e-05, "step": 233670 }, { "epoch": 66.32983252909452, "grad_norm": 0.011247101239860058, "learning_rate": 3.3697133125177405e-05, "loss": 0.00034006927162408827, "step": 233680 }, { "epoch": 66.33267101901788, "grad_norm": 0.2057858407497406, "learning_rate": 3.369429463525405e-05, "loss": 0.00015099719166755676, "step": 233690 }, { "epoch": 66.33550950894124, "grad_norm": 0.0017032207688316703, "learning_rate": 3.369145614533069e-05, "loss": 0.00014985725283622742, "step": 233700 }, { "epoch": 66.3383479988646, "grad_norm": 0.004195858724415302, "learning_rate": 3.368861765540732e-05, "loss": 6.693247705698013e-05, "step": 233710 }, { "epoch": 66.34118648878797, "grad_norm": 0.9715620279312134, "learning_rate": 3.3685779165483964e-05, "loss": 0.0004069248214364052, "step": 233720 }, { "epoch": 66.34402497871133, "grad_norm": 0.008584940806031227, "learning_rate": 3.3682940675560605e-05, "loss": 0.0005189234390854836, "step": 233730 }, { "epoch": 66.34686346863468, "grad_norm": 0.5343528389930725, "learning_rate": 3.368010218563724e-05, "loss": 0.0021819017827510835, "step": 233740 }, { "epoch": 66.34970195855804, "grad_norm": 0.010119401849806309, "learning_rate": 3.367726369571388e-05, "loss": 0.00016213580965995789, "step": 233750 }, { "epoch": 66.3525404484814, "grad_norm": 0.028830066323280334, "learning_rate": 3.367442520579052e-05, "loss": 0.00012022145092487335, "step": 233760 }, { "epoch": 66.35537893840477, "grad_norm": 0.05471816286444664, "learning_rate": 3.367158671586716e-05, "loss": 0.0002888008952140808, "step": 233770 }, { "epoch": 66.35821742832813, "grad_norm": 0.05953245237469673, "learning_rate": 3.36687482259438e-05, "loss": 0.0002983754500746727, "step": 233780 }, { "epoch": 66.3610559182515, "grad_norm": 0.02785307914018631, "learning_rate": 3.366590973602044e-05, "loss": 5.4727308452129364e-05, "step": 233790 }, { "epoch": 66.36389440817486, "grad_norm": 0.011128628626465797, "learning_rate": 3.3663071246097075e-05, "loss": 0.00018673036247491838, "step": 233800 }, { "epoch": 66.3667328980982, "grad_norm": 0.004090575966984034, "learning_rate": 3.3660232756173716e-05, "loss": 0.0006023414433002471, "step": 233810 }, { "epoch": 66.36957138802157, "grad_norm": 0.12056121975183487, "learning_rate": 3.365739426625036e-05, "loss": 0.0008234055712819099, "step": 233820 }, { "epoch": 66.37240987794493, "grad_norm": 0.0265825055539608, "learning_rate": 3.3654555776327e-05, "loss": 0.00010566134005784988, "step": 233830 }, { "epoch": 66.3752483678683, "grad_norm": 0.017574168741703033, "learning_rate": 3.3651717286403633e-05, "loss": 0.00028619617223739623, "step": 233840 }, { "epoch": 66.37808685779166, "grad_norm": 0.381946325302124, "learning_rate": 3.3648878796480275e-05, "loss": 0.00011648591607809067, "step": 233850 }, { "epoch": 66.38092534771502, "grad_norm": 0.051205798983573914, "learning_rate": 3.3646040306556916e-05, "loss": 0.00039005279541015625, "step": 233860 }, { "epoch": 66.38376383763837, "grad_norm": 0.05737121030688286, "learning_rate": 3.364320181663355e-05, "loss": 7.18332827091217e-05, "step": 233870 }, { "epoch": 66.38660232756173, "grad_norm": 0.023711882531642914, "learning_rate": 3.364036332671019e-05, "loss": 0.00010376479476690293, "step": 233880 }, { "epoch": 66.3894408174851, "grad_norm": 0.013774292543530464, "learning_rate": 3.3637524836786834e-05, "loss": 0.00038598179817199706, "step": 233890 }, { "epoch": 66.39227930740846, "grad_norm": 0.21058237552642822, "learning_rate": 3.363468634686347e-05, "loss": 0.0001241326332092285, "step": 233900 }, { "epoch": 66.39511779733182, "grad_norm": 0.023701848462224007, "learning_rate": 3.363184785694011e-05, "loss": 0.00028410963714122774, "step": 233910 }, { "epoch": 66.39795628725518, "grad_norm": 0.006522956769913435, "learning_rate": 3.362900936701675e-05, "loss": 0.0017320733517408372, "step": 233920 }, { "epoch": 66.40079477717855, "grad_norm": 0.06457874923944473, "learning_rate": 3.3626170877093386e-05, "loss": 9.260736405849456e-05, "step": 233930 }, { "epoch": 66.4036332671019, "grad_norm": 0.01646876521408558, "learning_rate": 3.362333238717003e-05, "loss": 0.00013590808957815171, "step": 233940 }, { "epoch": 66.40647175702526, "grad_norm": 0.2255176305770874, "learning_rate": 3.362049389724667e-05, "loss": 0.00041296985000371933, "step": 233950 }, { "epoch": 66.40931024694862, "grad_norm": 0.029260771349072456, "learning_rate": 3.361765540732331e-05, "loss": 0.00021583959460258484, "step": 233960 }, { "epoch": 66.41214873687198, "grad_norm": 0.0435432493686676, "learning_rate": 3.3614816917399944e-05, "loss": 5.40582463145256e-05, "step": 233970 }, { "epoch": 66.41498722679535, "grad_norm": 0.060236312448978424, "learning_rate": 3.361197842747658e-05, "loss": 8.370727300643921e-05, "step": 233980 }, { "epoch": 66.41782571671871, "grad_norm": 0.012075552716851234, "learning_rate": 3.360913993755323e-05, "loss": 6.851423531770707e-05, "step": 233990 }, { "epoch": 66.42066420664207, "grad_norm": 0.006405899301171303, "learning_rate": 3.360630144762986e-05, "loss": 0.00026760660111904146, "step": 234000 }, { "epoch": 66.42066420664207, "eval_accuracy": 0.9870286767978635, "eval_loss": 0.05441458523273468, "eval_runtime": 35.6197, "eval_samples_per_second": 441.525, "eval_steps_per_second": 6.906, "step": 234000 }, { "epoch": 66.42350269656542, "grad_norm": 0.12315424531698227, "learning_rate": 3.36034629577065e-05, "loss": 0.0001757938414812088, "step": 234010 }, { "epoch": 66.42634118648878, "grad_norm": 0.01983623206615448, "learning_rate": 3.3600624467783144e-05, "loss": 7.059797644615173e-05, "step": 234020 }, { "epoch": 66.42917967641215, "grad_norm": 0.01378307119011879, "learning_rate": 3.359778597785978e-05, "loss": 8.99631530046463e-05, "step": 234030 }, { "epoch": 66.43201816633551, "grad_norm": 0.021754438057541847, "learning_rate": 3.359494748793642e-05, "loss": 7.761493325233459e-05, "step": 234040 }, { "epoch": 66.43485665625887, "grad_norm": 0.03895027935504913, "learning_rate": 3.359210899801306e-05, "loss": 7.260963320732117e-05, "step": 234050 }, { "epoch": 66.43769514618224, "grad_norm": 0.042270734906196594, "learning_rate": 3.3589270508089696e-05, "loss": 4.430618137121201e-05, "step": 234060 }, { "epoch": 66.4405336361056, "grad_norm": 0.07254339009523392, "learning_rate": 3.358643201816634e-05, "loss": 0.0001185612753033638, "step": 234070 }, { "epoch": 66.44337212602895, "grad_norm": 0.0009820839622989297, "learning_rate": 3.358359352824297e-05, "loss": 0.00010001529008150101, "step": 234080 }, { "epoch": 66.44621061595231, "grad_norm": 0.014177650213241577, "learning_rate": 3.3580755038319614e-05, "loss": 6.34666532278061e-05, "step": 234090 }, { "epoch": 66.44904910587567, "grad_norm": 0.002075756434351206, "learning_rate": 3.3577916548396255e-05, "loss": 2.338755875825882e-05, "step": 234100 }, { "epoch": 66.45188759579904, "grad_norm": 0.0028827807400375605, "learning_rate": 3.357507805847289e-05, "loss": 6.680432707071305e-05, "step": 234110 }, { "epoch": 66.4547260857224, "grad_norm": 0.005370078142732382, "learning_rate": 3.357223956854954e-05, "loss": 3.3181905746459964e-05, "step": 234120 }, { "epoch": 66.45756457564576, "grad_norm": 0.0014385543763637543, "learning_rate": 3.356940107862617e-05, "loss": 3.835652023553848e-05, "step": 234130 }, { "epoch": 66.46040306556911, "grad_norm": 0.005302313715219498, "learning_rate": 3.356656258870281e-05, "loss": 7.688924670219421e-05, "step": 234140 }, { "epoch": 66.46324155549247, "grad_norm": 0.001175659243017435, "learning_rate": 3.3563724098779455e-05, "loss": 7.776245474815369e-05, "step": 234150 }, { "epoch": 66.46608004541584, "grad_norm": 0.0037493379786610603, "learning_rate": 3.356088560885609e-05, "loss": 3.821644932031631e-05, "step": 234160 }, { "epoch": 66.4689185353392, "grad_norm": 0.0006913860561326146, "learning_rate": 3.355804711893273e-05, "loss": 0.00010223183780908585, "step": 234170 }, { "epoch": 66.47175702526256, "grad_norm": 0.19675420224666595, "learning_rate": 3.3555208629009366e-05, "loss": 0.0016816329210996629, "step": 234180 }, { "epoch": 66.47459551518592, "grad_norm": 0.011736979708075523, "learning_rate": 3.355237013908601e-05, "loss": 0.00017012935131788254, "step": 234190 }, { "epoch": 66.47743400510929, "grad_norm": 0.002697720192372799, "learning_rate": 3.354953164916265e-05, "loss": 2.8340891003608703e-05, "step": 234200 }, { "epoch": 66.48027249503264, "grad_norm": 0.00710765365511179, "learning_rate": 3.354669315923928e-05, "loss": 0.00018573179841041565, "step": 234210 }, { "epoch": 66.483110984956, "grad_norm": 0.011292618699371815, "learning_rate": 3.3543854669315924e-05, "loss": 0.0003403607755899429, "step": 234220 }, { "epoch": 66.48594947487936, "grad_norm": 0.00376089196652174, "learning_rate": 3.3541016179392566e-05, "loss": 7.846150547266006e-05, "step": 234230 }, { "epoch": 66.48878796480273, "grad_norm": 0.05134512484073639, "learning_rate": 3.35381776894692e-05, "loss": 7.512755692005158e-05, "step": 234240 }, { "epoch": 66.49162645472609, "grad_norm": 0.0036626008804887533, "learning_rate": 3.353533919954585e-05, "loss": 0.0005151664838194848, "step": 234250 }, { "epoch": 66.49446494464945, "grad_norm": 0.020601967349648476, "learning_rate": 3.353250070962248e-05, "loss": 0.00010534953325986862, "step": 234260 }, { "epoch": 66.49730343457281, "grad_norm": 0.009093226864933968, "learning_rate": 3.352966221969912e-05, "loss": 0.00021281223744153976, "step": 234270 }, { "epoch": 66.50014192449616, "grad_norm": 0.08889538794755936, "learning_rate": 3.352682372977576e-05, "loss": 0.0012833556160330773, "step": 234280 }, { "epoch": 66.50298041441953, "grad_norm": 0.26383811235427856, "learning_rate": 3.35239852398524e-05, "loss": 0.00032354481518268583, "step": 234290 }, { "epoch": 66.50581890434289, "grad_norm": 0.003145745024085045, "learning_rate": 3.352114674992904e-05, "loss": 0.000246465764939785, "step": 234300 }, { "epoch": 66.50865739426625, "grad_norm": 0.006217553745955229, "learning_rate": 3.3518308260005676e-05, "loss": 0.00020913016051054, "step": 234310 }, { "epoch": 66.51149588418961, "grad_norm": 0.008941789157688618, "learning_rate": 3.351546977008232e-05, "loss": 5.0095096230506894e-05, "step": 234320 }, { "epoch": 66.51433437411298, "grad_norm": 0.005598758347332478, "learning_rate": 3.351263128015896e-05, "loss": 0.00010984688997268676, "step": 234330 }, { "epoch": 66.51717286403633, "grad_norm": 0.02938179112970829, "learning_rate": 3.3509792790235594e-05, "loss": 5.047488957643509e-05, "step": 234340 }, { "epoch": 66.52001135395969, "grad_norm": 0.002199106616899371, "learning_rate": 3.3506954300312235e-05, "loss": 6.715860217809677e-05, "step": 234350 }, { "epoch": 66.52284984388305, "grad_norm": 0.017076261341571808, "learning_rate": 3.3504115810388877e-05, "loss": 4.139970988035202e-05, "step": 234360 }, { "epoch": 66.52568833380641, "grad_norm": 0.0024703494273126125, "learning_rate": 3.350127732046551e-05, "loss": 0.00011052545160055161, "step": 234370 }, { "epoch": 66.52852682372978, "grad_norm": 0.008146797306835651, "learning_rate": 3.349843883054216e-05, "loss": 5.4243020713329315e-05, "step": 234380 }, { "epoch": 66.53136531365314, "grad_norm": 0.010398493148386478, "learning_rate": 3.3495600340618794e-05, "loss": 8.530877530574799e-05, "step": 234390 }, { "epoch": 66.5342038035765, "grad_norm": 0.0017955112271010876, "learning_rate": 3.349276185069543e-05, "loss": 2.7378834784030914e-05, "step": 234400 }, { "epoch": 66.53704229349985, "grad_norm": 0.0021811029873788357, "learning_rate": 3.348992336077207e-05, "loss": 5.927868187427521e-05, "step": 234410 }, { "epoch": 66.53988078342321, "grad_norm": 0.021540647372603416, "learning_rate": 3.348708487084871e-05, "loss": 4.492569714784622e-05, "step": 234420 }, { "epoch": 66.54271927334658, "grad_norm": 0.004006885923445225, "learning_rate": 3.348424638092535e-05, "loss": 3.199223428964615e-05, "step": 234430 }, { "epoch": 66.54555776326994, "grad_norm": 0.0036392805632203817, "learning_rate": 3.348140789100199e-05, "loss": 4.6609528362751006e-05, "step": 234440 }, { "epoch": 66.5483962531933, "grad_norm": 0.007205985486507416, "learning_rate": 3.347856940107863e-05, "loss": 1.707039773464203e-05, "step": 234450 }, { "epoch": 66.55123474311667, "grad_norm": 0.0021951263770461082, "learning_rate": 3.347573091115527e-05, "loss": 0.00012027788907289504, "step": 234460 }, { "epoch": 66.55407323304003, "grad_norm": 0.0018487198976799846, "learning_rate": 3.3472892421231905e-05, "loss": 4.365015774965286e-05, "step": 234470 }, { "epoch": 66.55691172296338, "grad_norm": 0.0027215653099119663, "learning_rate": 3.3470053931308546e-05, "loss": 7.770266383886338e-05, "step": 234480 }, { "epoch": 66.55975021288674, "grad_norm": 0.017619172111153603, "learning_rate": 3.346721544138519e-05, "loss": 2.4188123643398284e-05, "step": 234490 }, { "epoch": 66.5625887028101, "grad_norm": 0.0016610639868304133, "learning_rate": 3.346437695146182e-05, "loss": 0.00020116120576858522, "step": 234500 }, { "epoch": 66.5625887028101, "eval_accuracy": 0.9863928276212882, "eval_loss": 0.05375242605805397, "eval_runtime": 39.5009, "eval_samples_per_second": 398.143, "eval_steps_per_second": 6.228, "step": 234500 }, { "epoch": 66.56542719273347, "grad_norm": 0.0024424802977591753, "learning_rate": 3.346153846153846e-05, "loss": 0.0003625215962529182, "step": 234510 }, { "epoch": 66.56826568265683, "grad_norm": 0.024835791438817978, "learning_rate": 3.3458699971615105e-05, "loss": 8.242577314376831e-05, "step": 234520 }, { "epoch": 66.57110417258019, "grad_norm": 0.10140953212976456, "learning_rate": 3.345586148169174e-05, "loss": 0.00025225505232810973, "step": 234530 }, { "epoch": 66.57394266250355, "grad_norm": 0.017244389280676842, "learning_rate": 3.345302299176838e-05, "loss": 0.0003044389188289642, "step": 234540 }, { "epoch": 66.5767811524269, "grad_norm": 0.010893898084759712, "learning_rate": 3.345018450184502e-05, "loss": 0.00024249851703643798, "step": 234550 }, { "epoch": 66.57961964235027, "grad_norm": 0.0560934916138649, "learning_rate": 3.344734601192166e-05, "loss": 0.00034658536314964296, "step": 234560 }, { "epoch": 66.58245813227363, "grad_norm": 0.13007405400276184, "learning_rate": 3.34445075219983e-05, "loss": 0.0005804989486932755, "step": 234570 }, { "epoch": 66.58529662219699, "grad_norm": 0.0013419976457953453, "learning_rate": 3.344166903207494e-05, "loss": 0.00013096611946821212, "step": 234580 }, { "epoch": 66.58813511212036, "grad_norm": 0.08965178579092026, "learning_rate": 3.343883054215158e-05, "loss": 0.00018507558852434158, "step": 234590 }, { "epoch": 66.59097360204372, "grad_norm": 0.03860488533973694, "learning_rate": 3.3435992052228215e-05, "loss": 8.067600429058075e-05, "step": 234600 }, { "epoch": 66.59381209196707, "grad_norm": 0.008337730541825294, "learning_rate": 3.343315356230485e-05, "loss": 3.5392306745052335e-05, "step": 234610 }, { "epoch": 66.59665058189043, "grad_norm": 0.12468888610601425, "learning_rate": 3.34303150723815e-05, "loss": 0.00021876022219657898, "step": 234620 }, { "epoch": 66.59948907181379, "grad_norm": 0.0009489954099990427, "learning_rate": 3.342747658245813e-05, "loss": 0.00020619090646505355, "step": 234630 }, { "epoch": 66.60232756173716, "grad_norm": 0.133834108710289, "learning_rate": 3.3424638092534774e-05, "loss": 0.00010867062956094742, "step": 234640 }, { "epoch": 66.60516605166052, "grad_norm": 0.01782328635454178, "learning_rate": 3.3421799602611415e-05, "loss": 0.00010035466402769089, "step": 234650 }, { "epoch": 66.60800454158388, "grad_norm": 0.00696513382717967, "learning_rate": 3.341896111268805e-05, "loss": 4.191398620605469e-05, "step": 234660 }, { "epoch": 66.61084303150724, "grad_norm": 0.0024817546363919973, "learning_rate": 3.341612262276469e-05, "loss": 7.652007043361664e-05, "step": 234670 }, { "epoch": 66.61368152143059, "grad_norm": 0.09144359827041626, "learning_rate": 3.341328413284133e-05, "loss": 6.170403212308883e-05, "step": 234680 }, { "epoch": 66.61652001135396, "grad_norm": 0.0061355531215667725, "learning_rate": 3.341044564291797e-05, "loss": 0.0001619398593902588, "step": 234690 }, { "epoch": 66.61935850127732, "grad_norm": 0.011121244169771671, "learning_rate": 3.340760715299461e-05, "loss": 0.0009671466425061226, "step": 234700 }, { "epoch": 66.62219699120068, "grad_norm": 0.0348772257566452, "learning_rate": 3.340476866307124e-05, "loss": 9.503550827503205e-05, "step": 234710 }, { "epoch": 66.62503548112404, "grad_norm": 0.008679009974002838, "learning_rate": 3.340193017314789e-05, "loss": 0.00015197377651929856, "step": 234720 }, { "epoch": 66.6278739710474, "grad_norm": 0.0022557899355888367, "learning_rate": 3.3399091683224526e-05, "loss": 5.641002207994461e-05, "step": 234730 }, { "epoch": 66.63071246097077, "grad_norm": 0.012238714843988419, "learning_rate": 3.339625319330116e-05, "loss": 0.0009200487285852433, "step": 234740 }, { "epoch": 66.63355095089412, "grad_norm": 0.002214085776358843, "learning_rate": 3.339341470337781e-05, "loss": 0.00011819843202829362, "step": 234750 }, { "epoch": 66.63638944081748, "grad_norm": 0.03451623395085335, "learning_rate": 3.3390576213454443e-05, "loss": 0.00020445995032787324, "step": 234760 }, { "epoch": 66.63922793074084, "grad_norm": 0.15443243086338043, "learning_rate": 3.3387737723531085e-05, "loss": 0.00014475174248218536, "step": 234770 }, { "epoch": 66.64206642066421, "grad_norm": 0.0005294090951792896, "learning_rate": 3.3384899233607726e-05, "loss": 1.781899482011795e-05, "step": 234780 }, { "epoch": 66.64490491058757, "grad_norm": 0.0006029267096891999, "learning_rate": 3.338206074368436e-05, "loss": 4.58545982837677e-05, "step": 234790 }, { "epoch": 66.64774340051093, "grad_norm": 0.00927266851067543, "learning_rate": 3.3379222253761e-05, "loss": 2.2366642951965332e-05, "step": 234800 }, { "epoch": 66.6505818904343, "grad_norm": 0.1216154545545578, "learning_rate": 3.337638376383764e-05, "loss": 4.4553354382514956e-05, "step": 234810 }, { "epoch": 66.65342038035764, "grad_norm": 0.004937862046062946, "learning_rate": 3.337354527391428e-05, "loss": 2.8943829238414765e-05, "step": 234820 }, { "epoch": 66.65625887028101, "grad_norm": 0.011889890767633915, "learning_rate": 3.337070678399092e-05, "loss": 3.979243338108063e-05, "step": 234830 }, { "epoch": 66.65909736020437, "grad_norm": 0.0013796011917293072, "learning_rate": 3.3367868294067554e-05, "loss": 5.417447537183762e-05, "step": 234840 }, { "epoch": 66.66193585012773, "grad_norm": 0.011169521138072014, "learning_rate": 3.3365029804144196e-05, "loss": 4.869047552347183e-05, "step": 234850 }, { "epoch": 66.6647743400511, "grad_norm": 0.0017548577161505818, "learning_rate": 3.336219131422084e-05, "loss": 6.532836705446244e-05, "step": 234860 }, { "epoch": 66.66761282997446, "grad_norm": 0.1221914291381836, "learning_rate": 3.335935282429747e-05, "loss": 0.00010351371020078659, "step": 234870 }, { "epoch": 66.67045131989781, "grad_norm": 0.004762358497828245, "learning_rate": 3.335651433437412e-05, "loss": 3.044232726097107e-05, "step": 234880 }, { "epoch": 66.67328980982117, "grad_norm": 0.0016065819654613733, "learning_rate": 3.3353675844450754e-05, "loss": 2.5412999093532562e-05, "step": 234890 }, { "epoch": 66.67612829974453, "grad_norm": 0.012008058838546276, "learning_rate": 3.3350837354527396e-05, "loss": 5.2382983267307284e-05, "step": 234900 }, { "epoch": 66.6789667896679, "grad_norm": 0.0014888823498040438, "learning_rate": 3.334799886460403e-05, "loss": 2.834070473909378e-05, "step": 234910 }, { "epoch": 66.68180527959126, "grad_norm": 0.021706106141209602, "learning_rate": 3.334516037468067e-05, "loss": 9.21059399843216e-05, "step": 234920 }, { "epoch": 66.68464376951462, "grad_norm": 0.0010935028549283743, "learning_rate": 3.334232188475731e-05, "loss": 4.296340048313141e-05, "step": 234930 }, { "epoch": 66.68748225943799, "grad_norm": 0.04080742225050926, "learning_rate": 3.333948339483395e-05, "loss": 3.405585885047912e-05, "step": 234940 }, { "epoch": 66.69032074936133, "grad_norm": 0.002685332903638482, "learning_rate": 3.333664490491059e-05, "loss": 7.272697985172272e-05, "step": 234950 }, { "epoch": 66.6931592392847, "grad_norm": 0.015683112666010857, "learning_rate": 3.333380641498723e-05, "loss": 4.6042539179325106e-05, "step": 234960 }, { "epoch": 66.69599772920806, "grad_norm": 0.003452928503975272, "learning_rate": 3.3330967925063865e-05, "loss": 3.52758914232254e-05, "step": 234970 }, { "epoch": 66.69883621913142, "grad_norm": 0.021866148337721825, "learning_rate": 3.3328129435140506e-05, "loss": 3.9458274841308594e-05, "step": 234980 }, { "epoch": 66.70167470905479, "grad_norm": 0.07526488602161407, "learning_rate": 3.332529094521715e-05, "loss": 4.515256732702255e-05, "step": 234990 }, { "epoch": 66.70451319897815, "grad_norm": 0.00047511979937553406, "learning_rate": 3.332245245529378e-05, "loss": 0.00011074505746364594, "step": 235000 }, { "epoch": 66.70451319897815, "eval_accuracy": 0.9862020728683156, "eval_loss": 0.05366000533103943, "eval_runtime": 40.6695, "eval_samples_per_second": 386.703, "eval_steps_per_second": 6.049, "step": 235000 }, { "epoch": 66.70735168890151, "grad_norm": 0.032849330455064774, "learning_rate": 3.3319613965370424e-05, "loss": 7.836706936359406e-05, "step": 235010 }, { "epoch": 66.71019017882486, "grad_norm": 0.012137582525610924, "learning_rate": 3.3316775475447065e-05, "loss": 3.8341432809829715e-05, "step": 235020 }, { "epoch": 66.71302866874822, "grad_norm": 0.23480316996574402, "learning_rate": 3.33139369855237e-05, "loss": 7.438752800226212e-05, "step": 235030 }, { "epoch": 66.71586715867159, "grad_norm": 0.0032549567986279726, "learning_rate": 3.331109849560034e-05, "loss": 1.9013695418834686e-05, "step": 235040 }, { "epoch": 66.71870564859495, "grad_norm": 0.005747917108237743, "learning_rate": 3.330826000567698e-05, "loss": 4.0259212255477905e-05, "step": 235050 }, { "epoch": 66.72154413851831, "grad_norm": 0.03675966337323189, "learning_rate": 3.3305421515753624e-05, "loss": 3.303270787000656e-05, "step": 235060 }, { "epoch": 66.72438262844167, "grad_norm": 0.003026923630386591, "learning_rate": 3.330258302583026e-05, "loss": 3.2401084899902345e-05, "step": 235070 }, { "epoch": 66.72722111836502, "grad_norm": 0.00546652264893055, "learning_rate": 3.32997445359069e-05, "loss": 1.7414242029190065e-05, "step": 235080 }, { "epoch": 66.73005960828839, "grad_norm": 0.005777087528258562, "learning_rate": 3.329690604598354e-05, "loss": 4.371199756860733e-05, "step": 235090 }, { "epoch": 66.73289809821175, "grad_norm": 0.005508020985871553, "learning_rate": 3.3294067556060176e-05, "loss": 3.390386700630188e-05, "step": 235100 }, { "epoch": 66.73573658813511, "grad_norm": 0.0019852023106068373, "learning_rate": 3.329122906613682e-05, "loss": 9.80626791715622e-05, "step": 235110 }, { "epoch": 66.73857507805847, "grad_norm": 0.009772718884050846, "learning_rate": 3.328839057621346e-05, "loss": 2.576597034931183e-05, "step": 235120 }, { "epoch": 66.74141356798184, "grad_norm": 0.016603481024503708, "learning_rate": 3.328555208629009e-05, "loss": 3.616549074649811e-05, "step": 235130 }, { "epoch": 66.7442520579052, "grad_norm": 0.003794442629441619, "learning_rate": 3.3282713596366734e-05, "loss": 1.2240931391716004e-05, "step": 235140 }, { "epoch": 66.74709054782855, "grad_norm": 0.0024879910051822662, "learning_rate": 3.3279875106443376e-05, "loss": 3.359653055667877e-05, "step": 235150 }, { "epoch": 66.74992903775191, "grad_norm": 0.00581262307241559, "learning_rate": 3.327703661652001e-05, "loss": 0.00015354100614786148, "step": 235160 }, { "epoch": 66.75276752767527, "grad_norm": 0.004579245578497648, "learning_rate": 3.327419812659665e-05, "loss": 3.0994415283203125e-05, "step": 235170 }, { "epoch": 66.75560601759864, "grad_norm": 0.026217227801680565, "learning_rate": 3.327135963667329e-05, "loss": 3.3233501017093656e-05, "step": 235180 }, { "epoch": 66.758444507522, "grad_norm": 0.02395874261856079, "learning_rate": 3.3268521146749934e-05, "loss": 2.9577873647212982e-05, "step": 235190 }, { "epoch": 66.76128299744536, "grad_norm": 0.17159689962863922, "learning_rate": 3.326568265682657e-05, "loss": 9.511932730674744e-05, "step": 235200 }, { "epoch": 66.76412148736873, "grad_norm": 0.0062864613719284534, "learning_rate": 3.3262844166903204e-05, "loss": 0.0002731984481215477, "step": 235210 }, { "epoch": 66.76695997729207, "grad_norm": 0.004374248441308737, "learning_rate": 3.326000567697985e-05, "loss": 0.0009537961333990097, "step": 235220 }, { "epoch": 66.76979846721544, "grad_norm": 0.18385571241378784, "learning_rate": 3.3257167187056486e-05, "loss": 0.00039400625973939896, "step": 235230 }, { "epoch": 66.7726369571388, "grad_norm": 0.2390095442533493, "learning_rate": 3.325432869713313e-05, "loss": 0.02291748970746994, "step": 235240 }, { "epoch": 66.77547544706216, "grad_norm": 0.3111995458602905, "learning_rate": 3.325149020720977e-05, "loss": 0.0001573227345943451, "step": 235250 }, { "epoch": 66.77831393698553, "grad_norm": 0.018368910998106003, "learning_rate": 3.3248651717286404e-05, "loss": 0.0003992345184087753, "step": 235260 }, { "epoch": 66.78115242690889, "grad_norm": 0.08644651621580124, "learning_rate": 3.3245813227363045e-05, "loss": 0.00015862919390201568, "step": 235270 }, { "epoch": 66.78399091683225, "grad_norm": 0.0550009123980999, "learning_rate": 3.3242974737439687e-05, "loss": 0.001237543486058712, "step": 235280 }, { "epoch": 66.7868294067556, "grad_norm": 0.034670639783144, "learning_rate": 3.324013624751632e-05, "loss": 0.00031780097633600236, "step": 235290 }, { "epoch": 66.78966789667896, "grad_norm": 0.012595516629517078, "learning_rate": 3.323729775759296e-05, "loss": 8.523538708686829e-05, "step": 235300 }, { "epoch": 66.79250638660233, "grad_norm": 0.03115265816450119, "learning_rate": 3.32344592676696e-05, "loss": 0.008711443841457367, "step": 235310 }, { "epoch": 66.79534487652569, "grad_norm": 0.009574895724654198, "learning_rate": 3.323162077774624e-05, "loss": 0.010397931933403015, "step": 235320 }, { "epoch": 66.79818336644905, "grad_norm": 5.306119441986084, "learning_rate": 3.322878228782288e-05, "loss": 0.001531030796468258, "step": 235330 }, { "epoch": 66.80102185637242, "grad_norm": 11.221332550048828, "learning_rate": 3.3225943797899514e-05, "loss": 0.010035093128681182, "step": 235340 }, { "epoch": 66.80386034629576, "grad_norm": 0.09350492060184479, "learning_rate": 3.322310530797616e-05, "loss": 0.003097962588071823, "step": 235350 }, { "epoch": 66.80669883621913, "grad_norm": 0.02065095119178295, "learning_rate": 3.32202668180528e-05, "loss": 0.0013243354856967925, "step": 235360 }, { "epoch": 66.80953732614249, "grad_norm": 0.07913009822368622, "learning_rate": 3.321742832812943e-05, "loss": 0.0009563073515892028, "step": 235370 }, { "epoch": 66.81237581606585, "grad_norm": 0.00511951744556427, "learning_rate": 3.321458983820608e-05, "loss": 0.00347461998462677, "step": 235380 }, { "epoch": 66.81521430598922, "grad_norm": 0.007052653469145298, "learning_rate": 3.3211751348282715e-05, "loss": 0.0008174631744623184, "step": 235390 }, { "epoch": 66.81805279591258, "grad_norm": 0.19396020472049713, "learning_rate": 3.3208912858359356e-05, "loss": 0.002036057785153389, "step": 235400 }, { "epoch": 66.82089128583594, "grad_norm": 0.02286534011363983, "learning_rate": 3.320607436843599e-05, "loss": 0.0069438830018043515, "step": 235410 }, { "epoch": 66.82372977575929, "grad_norm": 0.11903835833072662, "learning_rate": 3.320323587851263e-05, "loss": 0.0019691998139023783, "step": 235420 }, { "epoch": 66.82656826568265, "grad_norm": 0.1200731173157692, "learning_rate": 3.320039738858927e-05, "loss": 0.0008320145308971405, "step": 235430 }, { "epoch": 66.82940675560602, "grad_norm": 0.24124258756637573, "learning_rate": 3.319755889866591e-05, "loss": 0.0010638291016221047, "step": 235440 }, { "epoch": 66.83224524552938, "grad_norm": 0.006829149089753628, "learning_rate": 3.319472040874255e-05, "loss": 0.003214488551020622, "step": 235450 }, { "epoch": 66.83508373545274, "grad_norm": 9.488683700561523, "learning_rate": 3.319188191881919e-05, "loss": 0.002327289804816246, "step": 235460 }, { "epoch": 66.8379222253761, "grad_norm": 0.1763646900653839, "learning_rate": 3.3189043428895825e-05, "loss": 8.484870195388794e-05, "step": 235470 }, { "epoch": 66.84076071529947, "grad_norm": 0.3869445323944092, "learning_rate": 3.318620493897247e-05, "loss": 0.010157690942287445, "step": 235480 }, { "epoch": 66.84359920522282, "grad_norm": 0.10721765458583832, "learning_rate": 3.318336644904911e-05, "loss": 0.00046672262251377104, "step": 235490 }, { "epoch": 66.84643769514618, "grad_norm": 2.9592907428741455, "learning_rate": 3.318052795912574e-05, "loss": 0.0013257816433906555, "step": 235500 }, { "epoch": 66.84643769514618, "eval_accuracy": 0.9824505627265213, "eval_loss": 0.07485096901655197, "eval_runtime": 35.1447, "eval_samples_per_second": 447.493, "eval_steps_per_second": 7.0, "step": 235500 }, { "epoch": 66.84927618506954, "grad_norm": 0.004122703801840544, "learning_rate": 3.3177689469202384e-05, "loss": 0.002026869170367718, "step": 235510 }, { "epoch": 66.8521146749929, "grad_norm": 0.09068160504102707, "learning_rate": 3.3174850979279025e-05, "loss": 0.00013980101794004441, "step": 235520 }, { "epoch": 66.85495316491627, "grad_norm": 0.005209843162447214, "learning_rate": 3.317201248935567e-05, "loss": 0.0002538057044148445, "step": 235530 }, { "epoch": 66.85779165483963, "grad_norm": 0.6610313057899475, "learning_rate": 3.31691739994323e-05, "loss": 0.0005240162834525108, "step": 235540 }, { "epoch": 66.86063014476298, "grad_norm": 0.10956595838069916, "learning_rate": 3.316633550950894e-05, "loss": 0.00030403919517993925, "step": 235550 }, { "epoch": 66.86346863468634, "grad_norm": 0.024199767038226128, "learning_rate": 3.3163497019585584e-05, "loss": 0.0003744740039110184, "step": 235560 }, { "epoch": 66.8663071246097, "grad_norm": 0.013268919661641121, "learning_rate": 3.316065852966222e-05, "loss": 0.0016329826787114144, "step": 235570 }, { "epoch": 66.86914561453307, "grad_norm": 0.06994390487670898, "learning_rate": 3.315782003973886e-05, "loss": 0.00014487337321043015, "step": 235580 }, { "epoch": 66.87198410445643, "grad_norm": 0.00435638427734375, "learning_rate": 3.31549815498155e-05, "loss": 0.005183162540197373, "step": 235590 }, { "epoch": 66.8748225943798, "grad_norm": 0.02610718086361885, "learning_rate": 3.3152143059892136e-05, "loss": 0.0021721184253692626, "step": 235600 }, { "epoch": 66.87766108430316, "grad_norm": 0.12185775488615036, "learning_rate": 3.314930456996878e-05, "loss": 0.0016647417098283768, "step": 235610 }, { "epoch": 66.8804995742265, "grad_norm": 0.09120426326990128, "learning_rate": 3.314646608004542e-05, "loss": 0.007948152720928192, "step": 235620 }, { "epoch": 66.88333806414987, "grad_norm": 0.10971169173717499, "learning_rate": 3.314362759012205e-05, "loss": 0.0026552341878414154, "step": 235630 }, { "epoch": 66.88617655407323, "grad_norm": 0.05383387580513954, "learning_rate": 3.3140789100198695e-05, "loss": 0.0019154137000441552, "step": 235640 }, { "epoch": 66.8890150439966, "grad_norm": 0.21820537745952606, "learning_rate": 3.3137950610275336e-05, "loss": 0.005821800976991654, "step": 235650 }, { "epoch": 66.89185353391996, "grad_norm": 0.24948306381702423, "learning_rate": 3.313511212035198e-05, "loss": 0.0010073181241750718, "step": 235660 }, { "epoch": 66.89469202384332, "grad_norm": 0.11191786825656891, "learning_rate": 3.313227363042861e-05, "loss": 0.00017499178647994995, "step": 235670 }, { "epoch": 66.89753051376668, "grad_norm": 0.19871297478675842, "learning_rate": 3.3129435140505253e-05, "loss": 0.0022937742993235587, "step": 235680 }, { "epoch": 66.90036900369003, "grad_norm": 0.11251785606145859, "learning_rate": 3.3126596650581895e-05, "loss": 0.001903284899890423, "step": 235690 }, { "epoch": 66.9032074936134, "grad_norm": 0.09039126336574554, "learning_rate": 3.312375816065853e-05, "loss": 0.0002550659701228142, "step": 235700 }, { "epoch": 66.90604598353676, "grad_norm": 0.08607237040996552, "learning_rate": 3.312091967073517e-05, "loss": 0.00046192556619644164, "step": 235710 }, { "epoch": 66.90888447346012, "grad_norm": 0.03636409714818001, "learning_rate": 3.311808118081181e-05, "loss": 0.0011141417548060418, "step": 235720 }, { "epoch": 66.91172296338348, "grad_norm": 0.011544411070644855, "learning_rate": 3.311524269088845e-05, "loss": 0.00010568574070930481, "step": 235730 }, { "epoch": 66.91456145330685, "grad_norm": 0.03560793772339821, "learning_rate": 3.311240420096509e-05, "loss": 0.0004059983417391777, "step": 235740 }, { "epoch": 66.91739994323021, "grad_norm": 0.028611332178115845, "learning_rate": 3.310956571104173e-05, "loss": 0.00051728505641222, "step": 235750 }, { "epoch": 66.92023843315356, "grad_norm": 0.4601851999759674, "learning_rate": 3.3106727221118364e-05, "loss": 0.00016394704580307006, "step": 235760 }, { "epoch": 66.92307692307692, "grad_norm": 0.29048582911491394, "learning_rate": 3.3103888731195005e-05, "loss": 0.0001401541754603386, "step": 235770 }, { "epoch": 66.92591541300028, "grad_norm": 0.09526367485523224, "learning_rate": 3.310105024127165e-05, "loss": 0.0062000565230846405, "step": 235780 }, { "epoch": 66.92875390292365, "grad_norm": 0.02970721758902073, "learning_rate": 3.309821175134828e-05, "loss": 0.0008976412937045097, "step": 235790 }, { "epoch": 66.93159239284701, "grad_norm": 0.06132617965340614, "learning_rate": 3.309537326142492e-05, "loss": 0.00011873897165060043, "step": 235800 }, { "epoch": 66.93443088277037, "grad_norm": 0.023173363879323006, "learning_rate": 3.3092534771501564e-05, "loss": 0.001711305044591427, "step": 235810 }, { "epoch": 66.93726937269372, "grad_norm": 0.06650818884372711, "learning_rate": 3.3089696281578206e-05, "loss": 0.0001705782487988472, "step": 235820 }, { "epoch": 66.94010786261708, "grad_norm": 0.04740455001592636, "learning_rate": 3.308685779165484e-05, "loss": 0.0008689980953931808, "step": 235830 }, { "epoch": 66.94294635254045, "grad_norm": 0.030142193660140038, "learning_rate": 3.3084019301731475e-05, "loss": 0.001025652326643467, "step": 235840 }, { "epoch": 66.94578484246381, "grad_norm": 0.041436050087213516, "learning_rate": 3.308118081180812e-05, "loss": 0.00034448765218257903, "step": 235850 }, { "epoch": 66.94862333238717, "grad_norm": 0.07479967176914215, "learning_rate": 3.307834232188476e-05, "loss": 0.000338885560631752, "step": 235860 }, { "epoch": 66.95146182231053, "grad_norm": 0.13650241494178772, "learning_rate": 3.30755038319614e-05, "loss": 0.0004656044766306877, "step": 235870 }, { "epoch": 66.9543003122339, "grad_norm": 0.02559690922498703, "learning_rate": 3.307266534203804e-05, "loss": 0.00020824708044528962, "step": 235880 }, { "epoch": 66.95713880215725, "grad_norm": 0.5309410095214844, "learning_rate": 3.3069826852114675e-05, "loss": 0.0009301483631134033, "step": 235890 }, { "epoch": 66.95997729208061, "grad_norm": 0.0025828233920037746, "learning_rate": 3.3066988362191316e-05, "loss": 7.372908294200897e-05, "step": 235900 }, { "epoch": 66.96281578200397, "grad_norm": 0.043481796979904175, "learning_rate": 3.306414987226796e-05, "loss": 0.0005184162408113479, "step": 235910 }, { "epoch": 66.96565427192733, "grad_norm": 0.03495152294635773, "learning_rate": 3.306131138234459e-05, "loss": 8.65185633301735e-05, "step": 235920 }, { "epoch": 66.9684927618507, "grad_norm": 0.004917552229017019, "learning_rate": 3.3058472892421234e-05, "loss": 8.901171386241913e-05, "step": 235930 }, { "epoch": 66.97133125177406, "grad_norm": 0.017724696546792984, "learning_rate": 3.305563440249787e-05, "loss": 0.00014808457344770432, "step": 235940 }, { "epoch": 66.97416974169742, "grad_norm": 0.03397870808839798, "learning_rate": 3.3052795912574516e-05, "loss": 0.00027983114123344424, "step": 235950 }, { "epoch": 66.97700823162077, "grad_norm": 0.012461250647902489, "learning_rate": 3.304995742265115e-05, "loss": 0.005855099856853485, "step": 235960 }, { "epoch": 66.97984672154413, "grad_norm": 0.010510618798434734, "learning_rate": 3.3047118932727786e-05, "loss": 0.0004103502258658409, "step": 235970 }, { "epoch": 66.9826852114675, "grad_norm": 0.1379183679819107, "learning_rate": 3.3044280442804434e-05, "loss": 0.00010143611580133438, "step": 235980 }, { "epoch": 66.98552370139086, "grad_norm": 0.0029529521707445383, "learning_rate": 3.304144195288107e-05, "loss": 0.0003821305930614471, "step": 235990 }, { "epoch": 66.98836219131422, "grad_norm": 13.23208999633789, "learning_rate": 3.303860346295771e-05, "loss": 0.004158215224742889, "step": 236000 }, { "epoch": 66.98836219131422, "eval_accuracy": 0.9844216951739048, "eval_loss": 0.06385692209005356, "eval_runtime": 37.2693, "eval_samples_per_second": 421.982, "eval_steps_per_second": 6.601, "step": 236000 }, { "epoch": 66.99120068123759, "grad_norm": 0.0085616335272789, "learning_rate": 3.303576497303435e-05, "loss": 5.86826354265213e-05, "step": 236010 }, { "epoch": 66.99403917116095, "grad_norm": 1.962290644645691, "learning_rate": 3.3032926483110986e-05, "loss": 0.0005087435245513916, "step": 236020 }, { "epoch": 66.9968776610843, "grad_norm": 0.5152590870857239, "learning_rate": 3.303008799318763e-05, "loss": 0.00011567920446395875, "step": 236030 }, { "epoch": 66.99971615100766, "grad_norm": 0.04915137216448784, "learning_rate": 3.302724950326426e-05, "loss": 0.0001967931166291237, "step": 236040 }, { "epoch": 67.00255464093102, "grad_norm": 0.008339615538716316, "learning_rate": 3.30244110133409e-05, "loss": 0.001250405516475439, "step": 236050 }, { "epoch": 67.00539313085439, "grad_norm": 0.040827665477991104, "learning_rate": 3.3021572523417544e-05, "loss": 0.00044583957642316816, "step": 236060 }, { "epoch": 67.00823162077775, "grad_norm": 0.006998925935477018, "learning_rate": 3.301873403349418e-05, "loss": 0.006380677223205566, "step": 236070 }, { "epoch": 67.01107011070111, "grad_norm": 0.029639972373843193, "learning_rate": 3.301589554357083e-05, "loss": 6.373245269060135e-05, "step": 236080 }, { "epoch": 67.01390860062446, "grad_norm": 0.040676530450582504, "learning_rate": 3.301305705364746e-05, "loss": 0.00765426903963089, "step": 236090 }, { "epoch": 67.01674709054782, "grad_norm": 0.007100678980350494, "learning_rate": 3.3010218563724096e-05, "loss": 0.000388488732278347, "step": 236100 }, { "epoch": 67.01958558047119, "grad_norm": 0.08468178659677505, "learning_rate": 3.3007380073800744e-05, "loss": 0.00015398748219013213, "step": 236110 }, { "epoch": 67.02242407039455, "grad_norm": 0.10620256513357162, "learning_rate": 3.300454158387738e-05, "loss": 0.0008503269404172897, "step": 236120 }, { "epoch": 67.02526256031791, "grad_norm": 0.048062317073345184, "learning_rate": 3.300170309395402e-05, "loss": 0.0004968544468283653, "step": 236130 }, { "epoch": 67.02810105024128, "grad_norm": 0.010166301392018795, "learning_rate": 3.2998864604030655e-05, "loss": 5.679745227098465e-05, "step": 236140 }, { "epoch": 67.03093954016464, "grad_norm": 0.467644065618515, "learning_rate": 3.2996026114107296e-05, "loss": 0.0007524428889155388, "step": 236150 }, { "epoch": 67.03377803008799, "grad_norm": 0.025793321430683136, "learning_rate": 3.299318762418394e-05, "loss": 0.00011954214423894882, "step": 236160 }, { "epoch": 67.03661652001135, "grad_norm": 0.0007072489243000746, "learning_rate": 3.299034913426057e-05, "loss": 0.00010977406054735184, "step": 236170 }, { "epoch": 67.03945500993471, "grad_norm": 0.005557655822485685, "learning_rate": 3.2987510644337214e-05, "loss": 0.00011277422308921813, "step": 236180 }, { "epoch": 67.04229349985808, "grad_norm": 0.0026987537275999784, "learning_rate": 3.2984672154413855e-05, "loss": 2.8331391513347624e-05, "step": 236190 }, { "epoch": 67.04513198978144, "grad_norm": 0.043370723724365234, "learning_rate": 3.298183366449049e-05, "loss": 7.812902331352233e-05, "step": 236200 }, { "epoch": 67.0479704797048, "grad_norm": 0.003921020310372114, "learning_rate": 3.297899517456713e-05, "loss": 0.00013054944574832916, "step": 236210 }, { "epoch": 67.05080896962816, "grad_norm": 0.026304814964532852, "learning_rate": 3.297615668464377e-05, "loss": 5.346592515707016e-05, "step": 236220 }, { "epoch": 67.05364745955151, "grad_norm": 0.006330288480967283, "learning_rate": 3.297331819472041e-05, "loss": 0.0006626872345805168, "step": 236230 }, { "epoch": 67.05648594947488, "grad_norm": 0.01702824793756008, "learning_rate": 3.297047970479705e-05, "loss": 5.251988768577576e-05, "step": 236240 }, { "epoch": 67.05932443939824, "grad_norm": 0.00780765013769269, "learning_rate": 3.296764121487369e-05, "loss": 6.0372985899448395e-05, "step": 236250 }, { "epoch": 67.0621629293216, "grad_norm": 0.005323467776179314, "learning_rate": 3.2964802724950324e-05, "loss": 8.812788873910904e-05, "step": 236260 }, { "epoch": 67.06500141924496, "grad_norm": 0.004816265311092138, "learning_rate": 3.2961964235026966e-05, "loss": 5.644354969263077e-05, "step": 236270 }, { "epoch": 67.06783990916833, "grad_norm": 0.0032901966478675604, "learning_rate": 3.295912574510361e-05, "loss": 5.574114620685577e-05, "step": 236280 }, { "epoch": 67.07067839909168, "grad_norm": 0.01257900707423687, "learning_rate": 3.295628725518025e-05, "loss": 4.453267902135849e-05, "step": 236290 }, { "epoch": 67.07351688901504, "grad_norm": 0.027645964175462723, "learning_rate": 3.295344876525688e-05, "loss": 6.434880197048187e-05, "step": 236300 }, { "epoch": 67.0763553789384, "grad_norm": 0.00848790816962719, "learning_rate": 3.2950610275333525e-05, "loss": 5.166530609130859e-05, "step": 236310 }, { "epoch": 67.07919386886176, "grad_norm": 0.006510869599878788, "learning_rate": 3.2947771785410166e-05, "loss": 4.759225994348526e-05, "step": 236320 }, { "epoch": 67.08203235878513, "grad_norm": 0.04598970711231232, "learning_rate": 3.29449332954868e-05, "loss": 5.749668926000595e-05, "step": 236330 }, { "epoch": 67.08487084870849, "grad_norm": 0.01624325104057789, "learning_rate": 3.294209480556344e-05, "loss": 0.0001619897782802582, "step": 236340 }, { "epoch": 67.08770933863185, "grad_norm": 0.008510448969900608, "learning_rate": 3.293925631564008e-05, "loss": 3.35007905960083e-05, "step": 236350 }, { "epoch": 67.0905478285552, "grad_norm": 0.806091845035553, "learning_rate": 3.293641782571672e-05, "loss": 0.00015933960676193238, "step": 236360 }, { "epoch": 67.09338631847857, "grad_norm": 6.369908332824707, "learning_rate": 3.293357933579336e-05, "loss": 0.0020744575187563894, "step": 236370 }, { "epoch": 67.09622480840193, "grad_norm": 0.014632739126682281, "learning_rate": 3.293074084587e-05, "loss": 3.889035433530807e-05, "step": 236380 }, { "epoch": 67.09906329832529, "grad_norm": 0.039024718105793, "learning_rate": 3.2927902355946635e-05, "loss": 4.377402365207672e-05, "step": 236390 }, { "epoch": 67.10190178824865, "grad_norm": 0.0012953280238434672, "learning_rate": 3.2925063866023277e-05, "loss": 0.002801315113902092, "step": 236400 }, { "epoch": 67.10474027817202, "grad_norm": 0.023164138197898865, "learning_rate": 3.292222537609992e-05, "loss": 0.0003058355301618576, "step": 236410 }, { "epoch": 67.10757876809538, "grad_norm": 0.00321371853351593, "learning_rate": 3.291938688617656e-05, "loss": 0.00010738074779510498, "step": 236420 }, { "epoch": 67.11041725801873, "grad_norm": 0.5606908202171326, "learning_rate": 3.2916548396253194e-05, "loss": 0.00010309852659702301, "step": 236430 }, { "epoch": 67.11325574794209, "grad_norm": 0.00365451886318624, "learning_rate": 3.291370990632983e-05, "loss": 0.0005446938797831535, "step": 236440 }, { "epoch": 67.11609423786545, "grad_norm": 0.029295196756720543, "learning_rate": 3.291087141640648e-05, "loss": 0.00269508920609951, "step": 236450 }, { "epoch": 67.11893272778882, "grad_norm": 0.008856239728629589, "learning_rate": 3.290803292648311e-05, "loss": 5.47606498003006e-05, "step": 236460 }, { "epoch": 67.12177121771218, "grad_norm": 0.46190008521080017, "learning_rate": 3.290519443655975e-05, "loss": 0.0001828141510486603, "step": 236470 }, { "epoch": 67.12460970763554, "grad_norm": 0.01851881854236126, "learning_rate": 3.2902355946636394e-05, "loss": 5.613062530755997e-05, "step": 236480 }, { "epoch": 67.1274481975589, "grad_norm": 0.07772661745548248, "learning_rate": 3.289951745671303e-05, "loss": 6.460882723331451e-05, "step": 236490 }, { "epoch": 67.13028668748225, "grad_norm": 0.0022708650212734938, "learning_rate": 3.289667896678967e-05, "loss": 9.278859943151474e-05, "step": 236500 }, { "epoch": 67.13028668748225, "eval_accuracy": 0.9862656577859732, "eval_loss": 0.0581180602312088, "eval_runtime": 35.886, "eval_samples_per_second": 438.249, "eval_steps_per_second": 6.855, "step": 236500 }, { "epoch": 67.13312517740562, "grad_norm": 0.0011283191852271557, "learning_rate": 3.289384047686631e-05, "loss": 0.00012691859155893325, "step": 236510 }, { "epoch": 67.13596366732898, "grad_norm": 0.003917803056538105, "learning_rate": 3.2891001986942946e-05, "loss": 7.002204656600953e-05, "step": 236520 }, { "epoch": 67.13880215725234, "grad_norm": 0.008691053837537766, "learning_rate": 3.288816349701959e-05, "loss": 0.00023017823696136475, "step": 236530 }, { "epoch": 67.1416406471757, "grad_norm": 0.005169979762285948, "learning_rate": 3.288532500709622e-05, "loss": 0.00012344308197498322, "step": 236540 }, { "epoch": 67.14447913709907, "grad_norm": 0.00143519532866776, "learning_rate": 3.288248651717287e-05, "loss": 0.00024543963372707366, "step": 236550 }, { "epoch": 67.14731762702242, "grad_norm": 0.06840070337057114, "learning_rate": 3.2879648027249505e-05, "loss": 4.2729265987873075e-05, "step": 236560 }, { "epoch": 67.15015611694578, "grad_norm": 0.0029445830732584, "learning_rate": 3.287680953732614e-05, "loss": 3.976244479417801e-05, "step": 236570 }, { "epoch": 67.15299460686914, "grad_norm": 0.10308792442083359, "learning_rate": 3.287397104740279e-05, "loss": 0.00013977009803056717, "step": 236580 }, { "epoch": 67.1558330967925, "grad_norm": 8.124159812927246, "learning_rate": 3.287113255747942e-05, "loss": 0.0013223081827163697, "step": 236590 }, { "epoch": 67.15867158671587, "grad_norm": 0.014427386224269867, "learning_rate": 3.286829406755606e-05, "loss": 3.9790011942386624e-05, "step": 236600 }, { "epoch": 67.16151007663923, "grad_norm": 0.007404904812574387, "learning_rate": 3.2865455577632705e-05, "loss": 0.00047574210911989213, "step": 236610 }, { "epoch": 67.1643485665626, "grad_norm": 0.14312788844108582, "learning_rate": 3.286261708770934e-05, "loss": 6.349552422761917e-05, "step": 236620 }, { "epoch": 67.16718705648594, "grad_norm": 0.0018891040235757828, "learning_rate": 3.285977859778598e-05, "loss": 4.5518018305301665e-05, "step": 236630 }, { "epoch": 67.1700255464093, "grad_norm": 0.0033319341018795967, "learning_rate": 3.2856940107862615e-05, "loss": 4.263278096914291e-05, "step": 236640 }, { "epoch": 67.17286403633267, "grad_norm": 0.03142520412802696, "learning_rate": 3.285410161793926e-05, "loss": 0.00016688592731952667, "step": 236650 }, { "epoch": 67.17570252625603, "grad_norm": 0.03027350641787052, "learning_rate": 3.28512631280159e-05, "loss": 4.904773086309433e-05, "step": 236660 }, { "epoch": 67.1785410161794, "grad_norm": 0.060897376388311386, "learning_rate": 3.284842463809253e-05, "loss": 0.0001523854210972786, "step": 236670 }, { "epoch": 67.18137950610276, "grad_norm": 0.05814886465668678, "learning_rate": 3.2845586148169174e-05, "loss": 0.00011142659932374954, "step": 236680 }, { "epoch": 67.18421799602612, "grad_norm": 0.0035810316912829876, "learning_rate": 3.2842747658245815e-05, "loss": 5.061626434326172e-05, "step": 236690 }, { "epoch": 67.18705648594947, "grad_norm": 0.0029746179934591055, "learning_rate": 3.283990916832245e-05, "loss": 5.5631250143051146e-05, "step": 236700 }, { "epoch": 67.18989497587283, "grad_norm": 0.012139839120209217, "learning_rate": 3.28370706783991e-05, "loss": 3.162752836942673e-05, "step": 236710 }, { "epoch": 67.1927334657962, "grad_norm": 0.02759607881307602, "learning_rate": 3.283423218847573e-05, "loss": 3.185588866472244e-05, "step": 236720 }, { "epoch": 67.19557195571956, "grad_norm": 0.012636709958314896, "learning_rate": 3.283139369855237e-05, "loss": 5.981922149658203e-05, "step": 236730 }, { "epoch": 67.19841044564292, "grad_norm": 0.003526550717651844, "learning_rate": 3.282855520862901e-05, "loss": 2.4744123220443725e-05, "step": 236740 }, { "epoch": 67.20124893556628, "grad_norm": 0.013087834231555462, "learning_rate": 3.282571671870565e-05, "loss": 2.24115327000618e-05, "step": 236750 }, { "epoch": 67.20408742548965, "grad_norm": 0.0006455993861891329, "learning_rate": 3.282287822878229e-05, "loss": 5.339384078979492e-05, "step": 236760 }, { "epoch": 67.206925915413, "grad_norm": 0.04461754858493805, "learning_rate": 3.2820039738858926e-05, "loss": 3.705006092786789e-05, "step": 236770 }, { "epoch": 67.20976440533636, "grad_norm": 0.0035576787777245045, "learning_rate": 3.281720124893557e-05, "loss": 7.510129362344741e-05, "step": 236780 }, { "epoch": 67.21260289525972, "grad_norm": 0.046129874885082245, "learning_rate": 3.281436275901221e-05, "loss": 4.699751734733581e-05, "step": 236790 }, { "epoch": 67.21544138518308, "grad_norm": 0.05124136433005333, "learning_rate": 3.2811524269088843e-05, "loss": 2.9310956597328187e-05, "step": 236800 }, { "epoch": 67.21827987510645, "grad_norm": 0.035547755658626556, "learning_rate": 3.2808685779165485e-05, "loss": 0.001809811033308506, "step": 236810 }, { "epoch": 67.22111836502981, "grad_norm": 1.0533260107040405, "learning_rate": 3.2805847289242126e-05, "loss": 0.00046549513936042783, "step": 236820 }, { "epoch": 67.22395685495316, "grad_norm": 0.2116013467311859, "learning_rate": 3.280300879931876e-05, "loss": 0.0014216039329767228, "step": 236830 }, { "epoch": 67.22679534487652, "grad_norm": 0.28528159856796265, "learning_rate": 3.28001703093954e-05, "loss": 0.0008917678147554398, "step": 236840 }, { "epoch": 67.22963383479988, "grad_norm": 0.003867104882374406, "learning_rate": 3.2797331819472044e-05, "loss": 0.0014981124550104141, "step": 236850 }, { "epoch": 67.23247232472325, "grad_norm": 0.07592961937189102, "learning_rate": 3.279449332954868e-05, "loss": 0.0003550548106431961, "step": 236860 }, { "epoch": 67.23531081464661, "grad_norm": 0.03282218053936958, "learning_rate": 3.279165483962532e-05, "loss": 0.006305522471666336, "step": 236870 }, { "epoch": 67.23814930456997, "grad_norm": 0.019449295476078987, "learning_rate": 3.278881634970196e-05, "loss": 5.7944655418395995e-05, "step": 236880 }, { "epoch": 67.24098779449334, "grad_norm": 0.03476696088910103, "learning_rate": 3.27859778597786e-05, "loss": 0.000357946939766407, "step": 236890 }, { "epoch": 67.24382628441668, "grad_norm": 0.004001288209110498, "learning_rate": 3.278313936985524e-05, "loss": 0.0006385616958141327, "step": 236900 }, { "epoch": 67.24666477434005, "grad_norm": 0.0009274447802454233, "learning_rate": 3.278030087993188e-05, "loss": 0.00019930601119995118, "step": 236910 }, { "epoch": 67.24950326426341, "grad_norm": 0.006929745431989431, "learning_rate": 3.277746239000852e-05, "loss": 0.0002095576375722885, "step": 236920 }, { "epoch": 67.25234175418677, "grad_norm": 0.005310730542987585, "learning_rate": 3.2774623900085154e-05, "loss": 5.676858127117157e-05, "step": 236930 }, { "epoch": 67.25518024411014, "grad_norm": 0.004295660182833672, "learning_rate": 3.2771785410161796e-05, "loss": 0.0002109980210661888, "step": 236940 }, { "epoch": 67.2580187340335, "grad_norm": 0.01736057735979557, "learning_rate": 3.276894692023844e-05, "loss": 0.0013730626553297043, "step": 236950 }, { "epoch": 67.26085722395686, "grad_norm": 0.004924266133457422, "learning_rate": 3.276610843031507e-05, "loss": 0.0002207251265645027, "step": 236960 }, { "epoch": 67.26369571388021, "grad_norm": 0.28037258982658386, "learning_rate": 3.276326994039171e-05, "loss": 7.732510566711426e-05, "step": 236970 }, { "epoch": 67.26653420380357, "grad_norm": 0.008904306218028069, "learning_rate": 3.2760431450468354e-05, "loss": 6.347056478261948e-05, "step": 236980 }, { "epoch": 67.26937269372694, "grad_norm": 0.022099707275629044, "learning_rate": 3.275759296054499e-05, "loss": 0.00011818613857030868, "step": 236990 }, { "epoch": 67.2722111836503, "grad_norm": 0.00210969103500247, "learning_rate": 3.275475447062163e-05, "loss": 0.0003177842125296593, "step": 237000 }, { "epoch": 67.2722111836503, "eval_accuracy": 0.9861384879506581, "eval_loss": 0.05847139284014702, "eval_runtime": 46.7957, "eval_samples_per_second": 336.078, "eval_steps_per_second": 5.257, "step": 237000 }, { "epoch": 67.27504967357366, "grad_norm": 2.3141276836395264, "learning_rate": 3.275191598069827e-05, "loss": 0.0006264671683311462, "step": 237010 }, { "epoch": 67.27788816349702, "grad_norm": 0.008514185436069965, "learning_rate": 3.274907749077491e-05, "loss": 3.4718960523605344e-05, "step": 237020 }, { "epoch": 67.28072665342037, "grad_norm": 0.009431811049580574, "learning_rate": 3.274623900085155e-05, "loss": 0.00013957973569631577, "step": 237030 }, { "epoch": 67.28356514334374, "grad_norm": 0.013089030049741268, "learning_rate": 3.274340051092819e-05, "loss": 0.00010854937136173248, "step": 237040 }, { "epoch": 67.2864036332671, "grad_norm": 0.008859180845320225, "learning_rate": 3.274056202100483e-05, "loss": 0.0038688868284225465, "step": 237050 }, { "epoch": 67.28924212319046, "grad_norm": 0.033569663763046265, "learning_rate": 3.2737723531081465e-05, "loss": 0.00010012835264205933, "step": 237060 }, { "epoch": 67.29208061311382, "grad_norm": 0.029650870710611343, "learning_rate": 3.2734885041158106e-05, "loss": 0.00014769211411476136, "step": 237070 }, { "epoch": 67.29491910303719, "grad_norm": 0.02745410054922104, "learning_rate": 3.273204655123475e-05, "loss": 9.073801338672638e-05, "step": 237080 }, { "epoch": 67.29775759296055, "grad_norm": 8.241425514221191, "learning_rate": 3.272920806131138e-05, "loss": 0.0014665622264146804, "step": 237090 }, { "epoch": 67.3005960828839, "grad_norm": 0.029405735433101654, "learning_rate": 3.2726369571388024e-05, "loss": 0.0015028972178697587, "step": 237100 }, { "epoch": 67.30343457280726, "grad_norm": 0.011448947712779045, "learning_rate": 3.2723531081464665e-05, "loss": 0.0009129980579018592, "step": 237110 }, { "epoch": 67.30627306273063, "grad_norm": 0.0394723117351532, "learning_rate": 3.27206925915413e-05, "loss": 0.0014951998367905618, "step": 237120 }, { "epoch": 67.30911155265399, "grad_norm": 0.06797832995653152, "learning_rate": 3.271785410161794e-05, "loss": 0.00011396128684282303, "step": 237130 }, { "epoch": 67.31195004257735, "grad_norm": 0.02656954526901245, "learning_rate": 3.271501561169458e-05, "loss": 8.67864117026329e-05, "step": 237140 }, { "epoch": 67.31478853250071, "grad_norm": 0.013296670280396938, "learning_rate": 3.271217712177122e-05, "loss": 0.00021263081580400467, "step": 237150 }, { "epoch": 67.31762702242408, "grad_norm": 0.0011480350513011217, "learning_rate": 3.270933863184786e-05, "loss": 3.76380980014801e-05, "step": 237160 }, { "epoch": 67.32046551234743, "grad_norm": 0.22604526579380035, "learning_rate": 3.270650014192449e-05, "loss": 0.0006598303094506264, "step": 237170 }, { "epoch": 67.32330400227079, "grad_norm": 0.007384014315903187, "learning_rate": 3.270366165200114e-05, "loss": 0.00021804142743349076, "step": 237180 }, { "epoch": 67.32614249219415, "grad_norm": 0.008672612719237804, "learning_rate": 3.2700823162077776e-05, "loss": 0.0028907662257552148, "step": 237190 }, { "epoch": 67.32898098211751, "grad_norm": 0.0055276183411479, "learning_rate": 3.269798467215441e-05, "loss": 0.00014171451330184935, "step": 237200 }, { "epoch": 67.33181947204088, "grad_norm": 0.024827878922224045, "learning_rate": 3.269514618223106e-05, "loss": 0.0007763523608446121, "step": 237210 }, { "epoch": 67.33465796196424, "grad_norm": 0.004884397145360708, "learning_rate": 3.269230769230769e-05, "loss": 0.0009417681023478508, "step": 237220 }, { "epoch": 67.3374964518876, "grad_norm": 0.1292184591293335, "learning_rate": 3.2689469202384334e-05, "loss": 0.0013511579483747483, "step": 237230 }, { "epoch": 67.34033494181095, "grad_norm": 0.0411476232111454, "learning_rate": 3.2686630712460976e-05, "loss": 0.001558394730091095, "step": 237240 }, { "epoch": 67.34317343173431, "grad_norm": 0.006793994456529617, "learning_rate": 3.268379222253761e-05, "loss": 7.862616330385208e-05, "step": 237250 }, { "epoch": 67.34601192165768, "grad_norm": 0.0024728039279580116, "learning_rate": 3.268095373261425e-05, "loss": 0.00034657400101423264, "step": 237260 }, { "epoch": 67.34885041158104, "grad_norm": 0.002522151218727231, "learning_rate": 3.2678115242690886e-05, "loss": 9.927116334438325e-05, "step": 237270 }, { "epoch": 67.3516889015044, "grad_norm": 0.04076376184821129, "learning_rate": 3.267527675276753e-05, "loss": 0.0003084622323513031, "step": 237280 }, { "epoch": 67.35452739142777, "grad_norm": 0.16731448471546173, "learning_rate": 3.267243826284417e-05, "loss": 0.00011070743203163147, "step": 237290 }, { "epoch": 67.35736588135111, "grad_norm": 0.001304559176787734, "learning_rate": 3.2669599772920804e-05, "loss": 8.484851568937301e-05, "step": 237300 }, { "epoch": 67.36020437127448, "grad_norm": 0.0026891021989285946, "learning_rate": 3.266676128299745e-05, "loss": 0.00014858357608318328, "step": 237310 }, { "epoch": 67.36304286119784, "grad_norm": 4.177457332611084, "learning_rate": 3.2663922793074087e-05, "loss": 0.0010252095758914947, "step": 237320 }, { "epoch": 67.3658813511212, "grad_norm": 0.006857351865619421, "learning_rate": 3.266108430315072e-05, "loss": 0.0016654504463076592, "step": 237330 }, { "epoch": 67.36871984104457, "grad_norm": 0.1394859105348587, "learning_rate": 3.265824581322737e-05, "loss": 0.0003069518133997917, "step": 237340 }, { "epoch": 67.37155833096793, "grad_norm": 0.3758957087993622, "learning_rate": 3.2655407323304004e-05, "loss": 0.00016454346477985383, "step": 237350 }, { "epoch": 67.37439682089129, "grad_norm": 1.3410836458206177, "learning_rate": 3.2652568833380645e-05, "loss": 0.0003058349713683128, "step": 237360 }, { "epoch": 67.37723531081464, "grad_norm": 0.0020198526326566935, "learning_rate": 3.264973034345728e-05, "loss": 0.0001402510330080986, "step": 237370 }, { "epoch": 67.380073800738, "grad_norm": 7.816259860992432, "learning_rate": 3.264689185353392e-05, "loss": 0.0013747600838541984, "step": 237380 }, { "epoch": 67.38291229066137, "grad_norm": 0.02814161404967308, "learning_rate": 3.264405336361056e-05, "loss": 0.00013358592987060546, "step": 237390 }, { "epoch": 67.38575078058473, "grad_norm": 0.07448430359363556, "learning_rate": 3.26412148736872e-05, "loss": 0.0004014529287815094, "step": 237400 }, { "epoch": 67.38858927050809, "grad_norm": 0.001436317339539528, "learning_rate": 3.263837638376384e-05, "loss": 0.00031150002032518385, "step": 237410 }, { "epoch": 67.39142776043145, "grad_norm": 0.00484648160636425, "learning_rate": 3.263553789384048e-05, "loss": 0.00015680771321058273, "step": 237420 }, { "epoch": 67.39426625035482, "grad_norm": 0.003859196323901415, "learning_rate": 3.2632699403917115e-05, "loss": 0.00017625521868467331, "step": 237430 }, { "epoch": 67.39710474027817, "grad_norm": 0.0045034089125692844, "learning_rate": 3.262986091399376e-05, "loss": 6.285812705755233e-05, "step": 237440 }, { "epoch": 67.39994323020153, "grad_norm": 0.027676541358232498, "learning_rate": 3.26270224240704e-05, "loss": 0.00017214249819517136, "step": 237450 }, { "epoch": 67.40278172012489, "grad_norm": 0.03304196149110794, "learning_rate": 3.262418393414703e-05, "loss": 6.209984421730041e-05, "step": 237460 }, { "epoch": 67.40562021004826, "grad_norm": 0.006918713450431824, "learning_rate": 3.262134544422367e-05, "loss": 4.606228321790695e-05, "step": 237470 }, { "epoch": 67.40845869997162, "grad_norm": 0.021412234753370285, "learning_rate": 3.2618506954300315e-05, "loss": 0.00012067537754774093, "step": 237480 }, { "epoch": 67.41129718989498, "grad_norm": 0.014665492810308933, "learning_rate": 3.2615668464376956e-05, "loss": 4.657991230487824e-05, "step": 237490 }, { "epoch": 67.41413567981834, "grad_norm": 0.037844639271497726, "learning_rate": 3.261282997445359e-05, "loss": 3.345329314470291e-05, "step": 237500 }, { "epoch": 67.41413567981834, "eval_accuracy": 0.9862656577859732, "eval_loss": 0.05863509699702263, "eval_runtime": 37.0567, "eval_samples_per_second": 424.404, "eval_steps_per_second": 6.638, "step": 237500 }, { "epoch": 67.41697416974169, "grad_norm": 0.0056608011946082115, "learning_rate": 3.260999148453023e-05, "loss": 0.0003809509798884392, "step": 237510 }, { "epoch": 67.41981265966506, "grad_norm": 0.004121358040720224, "learning_rate": 3.260715299460687e-05, "loss": 0.0013338342308998109, "step": 237520 }, { "epoch": 67.42265114958842, "grad_norm": 0.016682999208569527, "learning_rate": 3.260431450468351e-05, "loss": 9.639784693717956e-05, "step": 237530 }, { "epoch": 67.42548963951178, "grad_norm": 0.02661164477467537, "learning_rate": 3.260147601476015e-05, "loss": 0.0010299894958734513, "step": 237540 }, { "epoch": 67.42832812943514, "grad_norm": 0.05345712974667549, "learning_rate": 3.259863752483679e-05, "loss": 9.099468588829041e-05, "step": 237550 }, { "epoch": 67.4311666193585, "grad_norm": 0.009544864296913147, "learning_rate": 3.2595799034913425e-05, "loss": 0.0005595671012997627, "step": 237560 }, { "epoch": 67.43400510928186, "grad_norm": 0.04203822836279869, "learning_rate": 3.259296054499007e-05, "loss": 0.0034768965095281603, "step": 237570 }, { "epoch": 67.43684359920522, "grad_norm": 0.017466885969042778, "learning_rate": 3.259012205506671e-05, "loss": 0.0012522865086793899, "step": 237580 }, { "epoch": 67.43968208912858, "grad_norm": 0.09089618176221848, "learning_rate": 3.258728356514334e-05, "loss": 0.0001642383635044098, "step": 237590 }, { "epoch": 67.44252057905194, "grad_norm": 0.009583334438502789, "learning_rate": 3.2584445075219984e-05, "loss": 0.0009305775165557861, "step": 237600 }, { "epoch": 67.4453590689753, "grad_norm": 0.004960339982062578, "learning_rate": 3.258189043428896e-05, "loss": 0.027358025312423706, "step": 237610 }, { "epoch": 67.44819755889867, "grad_norm": 0.0020216426346451044, "learning_rate": 3.25790519443656e-05, "loss": 9.6965953707695e-05, "step": 237620 }, { "epoch": 67.45103604882203, "grad_norm": 0.12238648533821106, "learning_rate": 3.257621345444224e-05, "loss": 0.00043759942054748534, "step": 237630 }, { "epoch": 67.45387453874538, "grad_norm": 10.759821891784668, "learning_rate": 3.2573374964518875e-05, "loss": 0.002761161141097546, "step": 237640 }, { "epoch": 67.45671302866874, "grad_norm": 0.010244451463222504, "learning_rate": 3.2570536474595516e-05, "loss": 0.0005047572776675224, "step": 237650 }, { "epoch": 67.45955151859211, "grad_norm": 7.207873821258545, "learning_rate": 3.256769798467216e-05, "loss": 0.0020109012722969053, "step": 237660 }, { "epoch": 67.46239000851547, "grad_norm": 0.2589181065559387, "learning_rate": 3.256485949474879e-05, "loss": 0.000630149245262146, "step": 237670 }, { "epoch": 67.46522849843883, "grad_norm": 0.007631595246493816, "learning_rate": 3.256202100482543e-05, "loss": 0.0011098865419626237, "step": 237680 }, { "epoch": 67.4680669883622, "grad_norm": 0.010479847900569439, "learning_rate": 3.2559182514902075e-05, "loss": 0.00024211443960666656, "step": 237690 }, { "epoch": 67.47090547828556, "grad_norm": 0.16366605460643768, "learning_rate": 3.2556344024978716e-05, "loss": 0.0003897378221154213, "step": 237700 }, { "epoch": 67.47374396820891, "grad_norm": 0.0021869877818971872, "learning_rate": 3.255350553505535e-05, "loss": 0.0014965005218982696, "step": 237710 }, { "epoch": 67.47658245813227, "grad_norm": 0.21718455851078033, "learning_rate": 3.255066704513199e-05, "loss": 0.001711788773536682, "step": 237720 }, { "epoch": 67.47942094805563, "grad_norm": 0.14787046611309052, "learning_rate": 3.254782855520863e-05, "loss": 0.00038145966827869416, "step": 237730 }, { "epoch": 67.482259437979, "grad_norm": 0.03592899814248085, "learning_rate": 3.254499006528527e-05, "loss": 0.00024983342736959455, "step": 237740 }, { "epoch": 67.48509792790236, "grad_norm": 0.01942640356719494, "learning_rate": 3.254215157536191e-05, "loss": 0.0001925293356180191, "step": 237750 }, { "epoch": 67.48793641782572, "grad_norm": 0.8807029724121094, "learning_rate": 3.253931308543855e-05, "loss": 0.0007865894585847855, "step": 237760 }, { "epoch": 67.49077490774907, "grad_norm": 0.0028738793917000294, "learning_rate": 3.2536474595515185e-05, "loss": 0.00014885067939758302, "step": 237770 }, { "epoch": 67.49361339767243, "grad_norm": 0.0161873959004879, "learning_rate": 3.253363610559183e-05, "loss": 0.00022095069289207458, "step": 237780 }, { "epoch": 67.4964518875958, "grad_norm": 0.04595314338803291, "learning_rate": 3.253079761566847e-05, "loss": 0.0017062541097402572, "step": 237790 }, { "epoch": 67.49929037751916, "grad_norm": 0.0030155859421938658, "learning_rate": 3.25279591257451e-05, "loss": 0.0030436081811785697, "step": 237800 }, { "epoch": 67.50212886744252, "grad_norm": 0.004286352079361677, "learning_rate": 3.2525120635821744e-05, "loss": 0.00029772724956274034, "step": 237810 }, { "epoch": 67.50496735736589, "grad_norm": 7.740063190460205, "learning_rate": 3.2522282145898385e-05, "loss": 0.0010137034580111504, "step": 237820 }, { "epoch": 67.50780584728925, "grad_norm": 0.010175932198762894, "learning_rate": 3.251944365597503e-05, "loss": 0.00021932162344455718, "step": 237830 }, { "epoch": 67.5106443372126, "grad_norm": 0.031381476670503616, "learning_rate": 3.251660516605166e-05, "loss": 0.0008295876905322075, "step": 237840 }, { "epoch": 67.51348282713596, "grad_norm": 0.05470533296465874, "learning_rate": 3.2513766676128296e-05, "loss": 0.001649525947868824, "step": 237850 }, { "epoch": 67.51632131705932, "grad_norm": 0.19461289048194885, "learning_rate": 3.2511212035197276e-05, "loss": 0.006820455193519592, "step": 237860 }, { "epoch": 67.51915980698269, "grad_norm": 0.2739819884300232, "learning_rate": 3.250837354527392e-05, "loss": 0.011142629384994506, "step": 237870 }, { "epoch": 67.52199829690605, "grad_norm": 0.022926006466150284, "learning_rate": 3.250553505535056e-05, "loss": 0.002461534179747105, "step": 237880 }, { "epoch": 67.52483678682941, "grad_norm": 0.20322102308273315, "learning_rate": 3.250269656542719e-05, "loss": 0.002988535538315773, "step": 237890 }, { "epoch": 67.52767527675277, "grad_norm": 0.004887685179710388, "learning_rate": 3.2499858075503835e-05, "loss": 0.0002055039629340172, "step": 237900 }, { "epoch": 67.53051376667612, "grad_norm": 0.03155887499451637, "learning_rate": 3.2497019585580476e-05, "loss": 0.00741225928068161, "step": 237910 }, { "epoch": 67.53335225659949, "grad_norm": 0.04082632064819336, "learning_rate": 3.249418109565711e-05, "loss": 0.0021372938528656958, "step": 237920 }, { "epoch": 67.53619074652285, "grad_norm": 0.021956007927656174, "learning_rate": 3.249134260573375e-05, "loss": 0.001067180745303631, "step": 237930 }, { "epoch": 67.53902923644621, "grad_norm": 0.1694965660572052, "learning_rate": 3.248850411581039e-05, "loss": 6.19545578956604e-05, "step": 237940 }, { "epoch": 67.54186772636957, "grad_norm": 0.011175322346389294, "learning_rate": 3.248566562588703e-05, "loss": 0.00012666359543800354, "step": 237950 }, { "epoch": 67.54470621629294, "grad_norm": 0.09086211770772934, "learning_rate": 3.248282713596367e-05, "loss": 9.857863187789917e-05, "step": 237960 }, { "epoch": 67.5475447062163, "grad_norm": 6.798864364624023, "learning_rate": 3.247998864604031e-05, "loss": 0.001075883023440838, "step": 237970 }, { "epoch": 67.55038319613965, "grad_norm": 0.018061256036162376, "learning_rate": 3.2477150156116945e-05, "loss": 5.7645514607429506e-05, "step": 237980 }, { "epoch": 67.55322168606301, "grad_norm": 0.5364612340927124, "learning_rate": 3.247431166619359e-05, "loss": 0.00015693623572587967, "step": 237990 }, { "epoch": 67.55606017598637, "grad_norm": 0.011522026732563972, "learning_rate": 3.247147317627023e-05, "loss": 0.000646786205470562, "step": 238000 }, { "epoch": 67.55606017598637, "eval_accuracy": 0.9851211292681376, "eval_loss": 0.06243377923965454, "eval_runtime": 38.0491, "eval_samples_per_second": 413.335, "eval_steps_per_second": 6.465, "step": 238000 }, { "epoch": 67.55889866590974, "grad_norm": 0.21423961222171783, "learning_rate": 3.246863468634686e-05, "loss": 0.00018634889274835587, "step": 238010 }, { "epoch": 67.5617371558331, "grad_norm": 0.022744761779904366, "learning_rate": 3.2465796196423504e-05, "loss": 0.00019243042916059495, "step": 238020 }, { "epoch": 67.56457564575646, "grad_norm": 0.001845447113737464, "learning_rate": 3.246295770650014e-05, "loss": 0.0011872826144099236, "step": 238030 }, { "epoch": 67.56741413567981, "grad_norm": 0.0010241729905828834, "learning_rate": 3.246011921657679e-05, "loss": 0.0013620033860206604, "step": 238040 }, { "epoch": 67.57025262560317, "grad_norm": 0.0025006437208503485, "learning_rate": 3.245728072665342e-05, "loss": 0.00020691398531198503, "step": 238050 }, { "epoch": 67.57309111552654, "grad_norm": 0.006539653521031141, "learning_rate": 3.2454442236730056e-05, "loss": 0.0003021597862243652, "step": 238060 }, { "epoch": 67.5759296054499, "grad_norm": 0.012606332078576088, "learning_rate": 3.2451603746806704e-05, "loss": 3.816280514001846e-05, "step": 238070 }, { "epoch": 67.57876809537326, "grad_norm": 0.05166075378656387, "learning_rate": 3.244876525688334e-05, "loss": 7.574371993541717e-05, "step": 238080 }, { "epoch": 67.58160658529663, "grad_norm": 0.0015479883877560496, "learning_rate": 3.244592676695998e-05, "loss": 3.928374499082565e-05, "step": 238090 }, { "epoch": 67.58444507521999, "grad_norm": 0.006505864206701517, "learning_rate": 3.244308827703662e-05, "loss": 6.972122937440873e-05, "step": 238100 }, { "epoch": 67.58728356514334, "grad_norm": 0.02927570231258869, "learning_rate": 3.2440249787113256e-05, "loss": 5.16403466463089e-05, "step": 238110 }, { "epoch": 67.5901220550667, "grad_norm": 0.0044853477738797665, "learning_rate": 3.24374112971899e-05, "loss": 3.558285534381867e-05, "step": 238120 }, { "epoch": 67.59296054499006, "grad_norm": 0.02667880244553089, "learning_rate": 3.243457280726653e-05, "loss": 6.633028388023377e-05, "step": 238130 }, { "epoch": 67.59579903491343, "grad_norm": 0.007576148025691509, "learning_rate": 3.2431734317343173e-05, "loss": 9.676031768321992e-05, "step": 238140 }, { "epoch": 67.59863752483679, "grad_norm": 0.010396371595561504, "learning_rate": 3.2428895827419815e-05, "loss": 6.030108779668808e-05, "step": 238150 }, { "epoch": 67.60147601476015, "grad_norm": 0.0044491225853562355, "learning_rate": 3.242605733749645e-05, "loss": 0.00012950822710990905, "step": 238160 }, { "epoch": 67.60431450468351, "grad_norm": 0.06278372555971146, "learning_rate": 3.24232188475731e-05, "loss": 0.00010292455554008483, "step": 238170 }, { "epoch": 67.60715299460686, "grad_norm": 0.005583439953625202, "learning_rate": 3.242038035764973e-05, "loss": 2.1562911570072174e-05, "step": 238180 }, { "epoch": 67.60999148453023, "grad_norm": 0.003226929809898138, "learning_rate": 3.241754186772637e-05, "loss": 6.719063967466354e-05, "step": 238190 }, { "epoch": 67.61282997445359, "grad_norm": 0.015644196420907974, "learning_rate": 3.2414703377803015e-05, "loss": 0.00010451469570398331, "step": 238200 }, { "epoch": 67.61566846437695, "grad_norm": 0.002226191107183695, "learning_rate": 3.241186488787965e-05, "loss": 3.7610344588756564e-05, "step": 238210 }, { "epoch": 67.61850695430032, "grad_norm": 0.022048067301511765, "learning_rate": 3.240902639795629e-05, "loss": 6.492696702480316e-05, "step": 238220 }, { "epoch": 67.62134544422368, "grad_norm": 0.006675843149423599, "learning_rate": 3.2406187908032925e-05, "loss": 0.00013164021074771882, "step": 238230 }, { "epoch": 67.62418393414703, "grad_norm": 0.02204548381268978, "learning_rate": 3.240334941810957e-05, "loss": 0.00021748151630163193, "step": 238240 }, { "epoch": 67.62702242407039, "grad_norm": 0.00717923091724515, "learning_rate": 3.240051092818621e-05, "loss": 4.230961203575134e-05, "step": 238250 }, { "epoch": 67.62986091399375, "grad_norm": 0.01578335091471672, "learning_rate": 3.239767243826284e-05, "loss": 0.00030231941491365433, "step": 238260 }, { "epoch": 67.63269940391712, "grad_norm": 7.333093643188477, "learning_rate": 3.2394833948339484e-05, "loss": 0.0013773240149021148, "step": 238270 }, { "epoch": 67.63553789384048, "grad_norm": 6.009037017822266, "learning_rate": 3.2391995458416126e-05, "loss": 0.0008707309141755104, "step": 238280 }, { "epoch": 67.63837638376384, "grad_norm": 0.1975628286600113, "learning_rate": 3.238915696849276e-05, "loss": 0.00010495502501726151, "step": 238290 }, { "epoch": 67.6412148736872, "grad_norm": 0.040877386927604675, "learning_rate": 3.238631847856941e-05, "loss": 0.00011034533381462097, "step": 238300 }, { "epoch": 67.64405336361055, "grad_norm": 0.03491203859448433, "learning_rate": 3.238347998864604e-05, "loss": 3.5527534782886506e-05, "step": 238310 }, { "epoch": 67.64689185353392, "grad_norm": 0.027955632656812668, "learning_rate": 3.238064149872268e-05, "loss": 4.097949713468552e-05, "step": 238320 }, { "epoch": 67.64973034345728, "grad_norm": 0.013233995996415615, "learning_rate": 3.237780300879932e-05, "loss": 0.00026427805423736574, "step": 238330 }, { "epoch": 67.65256883338064, "grad_norm": 0.0035221539437770844, "learning_rate": 3.237496451887596e-05, "loss": 0.00015618856996297837, "step": 238340 }, { "epoch": 67.655407323304, "grad_norm": 0.9349386096000671, "learning_rate": 3.23721260289526e-05, "loss": 0.0022950489073991776, "step": 238350 }, { "epoch": 67.65824581322737, "grad_norm": 0.001980542205274105, "learning_rate": 3.2369287539029236e-05, "loss": 0.00010429173707962036, "step": 238360 }, { "epoch": 67.66108430315073, "grad_norm": 0.08355057239532471, "learning_rate": 3.236644904910588e-05, "loss": 0.0007808029651641846, "step": 238370 }, { "epoch": 67.66392279307408, "grad_norm": 0.13547444343566895, "learning_rate": 3.236361055918252e-05, "loss": 0.0021080465987324713, "step": 238380 }, { "epoch": 67.66676128299744, "grad_norm": 0.03149287402629852, "learning_rate": 3.2360772069259154e-05, "loss": 0.000766034983098507, "step": 238390 }, { "epoch": 67.6695997729208, "grad_norm": 0.27067509293556213, "learning_rate": 3.2357933579335795e-05, "loss": 0.00027674026787281034, "step": 238400 }, { "epoch": 67.67243826284417, "grad_norm": 11.171908378601074, "learning_rate": 3.2355095089412436e-05, "loss": 0.0036370202898979185, "step": 238410 }, { "epoch": 67.67527675276753, "grad_norm": 0.007724618539214134, "learning_rate": 3.235225659948907e-05, "loss": 6.359070539474487e-05, "step": 238420 }, { "epoch": 67.6781152426909, "grad_norm": 0.21579624712467194, "learning_rate": 3.234941810956571e-05, "loss": 0.0001504393294453621, "step": 238430 }, { "epoch": 67.68095373261426, "grad_norm": 0.0011793510057032108, "learning_rate": 3.2346579619642354e-05, "loss": 4.312079399824142e-05, "step": 238440 }, { "epoch": 67.6837922225376, "grad_norm": 0.01120884157717228, "learning_rate": 3.234374112971899e-05, "loss": 2.4437718093395232e-05, "step": 238450 }, { "epoch": 67.68663071246097, "grad_norm": 0.0050471993163228035, "learning_rate": 3.234090263979563e-05, "loss": 5.836840718984604e-05, "step": 238460 }, { "epoch": 67.68946920238433, "grad_norm": 0.01701677218079567, "learning_rate": 3.233806414987227e-05, "loss": 8.915867656469345e-05, "step": 238470 }, { "epoch": 67.6923076923077, "grad_norm": 0.0026119740214198828, "learning_rate": 3.2335225659948906e-05, "loss": 5.3534097969532016e-05, "step": 238480 }, { "epoch": 67.69514618223106, "grad_norm": 0.0019393180264160037, "learning_rate": 3.233238717002555e-05, "loss": 3.0406937003135682e-05, "step": 238490 }, { "epoch": 67.69798467215442, "grad_norm": 0.028136178851127625, "learning_rate": 3.232954868010219e-05, "loss": 4.1194446384906766e-05, "step": 238500 }, { "epoch": 67.69798467215442, "eval_accuracy": 0.9871558466331786, "eval_loss": 0.052917689085006714, "eval_runtime": 36.6538, "eval_samples_per_second": 429.069, "eval_steps_per_second": 6.711, "step": 238500 }, { "epoch": 67.70082316207777, "grad_norm": 0.1303255259990692, "learning_rate": 3.232671019017883e-05, "loss": 9.354613721370696e-05, "step": 238510 }, { "epoch": 67.70366165200113, "grad_norm": 0.002655499614775181, "learning_rate": 3.2323871700255464e-05, "loss": 3.7151575088500974e-05, "step": 238520 }, { "epoch": 67.7065001419245, "grad_norm": 0.0018793317722156644, "learning_rate": 3.23210332103321e-05, "loss": 3.061611205339432e-05, "step": 238530 }, { "epoch": 67.70933863184786, "grad_norm": 0.004938165191560984, "learning_rate": 3.231819472040875e-05, "loss": 2.4590082466602327e-05, "step": 238540 }, { "epoch": 67.71217712177122, "grad_norm": 0.07364450395107269, "learning_rate": 3.231535623048538e-05, "loss": 5.91544434428215e-05, "step": 238550 }, { "epoch": 67.71501561169458, "grad_norm": 0.0021625410299748182, "learning_rate": 3.231251774056202e-05, "loss": 2.4195574223995208e-05, "step": 238560 }, { "epoch": 67.71785410161795, "grad_norm": 0.09983620792627335, "learning_rate": 3.2309679250638664e-05, "loss": 5.40735200047493e-05, "step": 238570 }, { "epoch": 67.7206925915413, "grad_norm": 0.00689541595056653, "learning_rate": 3.23068407607153e-05, "loss": 7.34018161892891e-05, "step": 238580 }, { "epoch": 67.72353108146466, "grad_norm": 0.0037993884179741144, "learning_rate": 3.230400227079194e-05, "loss": 0.00011705495417118073, "step": 238590 }, { "epoch": 67.72636957138802, "grad_norm": 0.007612867746502161, "learning_rate": 3.230116378086858e-05, "loss": 3.593042492866516e-05, "step": 238600 }, { "epoch": 67.72920806131138, "grad_norm": 0.048131927847862244, "learning_rate": 3.2298325290945216e-05, "loss": 6.250254809856415e-05, "step": 238610 }, { "epoch": 67.73204655123475, "grad_norm": 0.0012099950108677149, "learning_rate": 3.229548680102186e-05, "loss": 5.8349967002868655e-05, "step": 238620 }, { "epoch": 67.73488504115811, "grad_norm": 0.0003920208546333015, "learning_rate": 3.229264831109849e-05, "loss": 3.6525167524814604e-05, "step": 238630 }, { "epoch": 67.73772353108147, "grad_norm": 0.001043443102389574, "learning_rate": 3.228980982117514e-05, "loss": 5.117487162351608e-05, "step": 238640 }, { "epoch": 67.74056202100482, "grad_norm": 0.003177113365381956, "learning_rate": 3.2286971331251775e-05, "loss": 3.178007900714874e-05, "step": 238650 }, { "epoch": 67.74340051092818, "grad_norm": 0.539995014667511, "learning_rate": 3.228413284132841e-05, "loss": 0.00023093782365322114, "step": 238660 }, { "epoch": 67.74623900085155, "grad_norm": 0.18404939770698547, "learning_rate": 3.228129435140506e-05, "loss": 0.00011110324412584305, "step": 238670 }, { "epoch": 67.74907749077491, "grad_norm": 0.0003604268713388592, "learning_rate": 3.227845586148169e-05, "loss": 8.56056809425354e-05, "step": 238680 }, { "epoch": 67.75191598069827, "grad_norm": 0.06063460931181908, "learning_rate": 3.2275617371558334e-05, "loss": 7.85980373620987e-05, "step": 238690 }, { "epoch": 67.75475447062163, "grad_norm": 0.007202644366770983, "learning_rate": 3.2272778881634975e-05, "loss": 5.8859214186668395e-05, "step": 238700 }, { "epoch": 67.757592960545, "grad_norm": 0.004336195066571236, "learning_rate": 3.226994039171161e-05, "loss": 5.758125334978104e-05, "step": 238710 }, { "epoch": 67.76043145046835, "grad_norm": 0.0044119637459516525, "learning_rate": 3.226710190178825e-05, "loss": 0.00012400895357131959, "step": 238720 }, { "epoch": 67.76326994039171, "grad_norm": 0.000980703392997384, "learning_rate": 3.2264263411864886e-05, "loss": 2.9713474214076994e-05, "step": 238730 }, { "epoch": 67.76610843031507, "grad_norm": 0.014054283499717712, "learning_rate": 3.226142492194153e-05, "loss": 1.9935518503189086e-05, "step": 238740 }, { "epoch": 67.76894692023843, "grad_norm": 0.03624211996793747, "learning_rate": 3.225858643201817e-05, "loss": 9.13342460989952e-05, "step": 238750 }, { "epoch": 67.7717854101618, "grad_norm": 0.041961919516325, "learning_rate": 3.22557479420948e-05, "loss": 0.0003196651116013527, "step": 238760 }, { "epoch": 67.77462390008516, "grad_norm": 0.005506062880158424, "learning_rate": 3.225290945217145e-05, "loss": 0.0002452012151479721, "step": 238770 }, { "epoch": 67.77746239000851, "grad_norm": 0.0032262089662253857, "learning_rate": 3.2250070962248086e-05, "loss": 4.6545639634132385e-05, "step": 238780 }, { "epoch": 67.78030087993187, "grad_norm": 0.019377021118998528, "learning_rate": 3.224723247232472e-05, "loss": 0.00013522263616323472, "step": 238790 }, { "epoch": 67.78313936985523, "grad_norm": 0.00450360681861639, "learning_rate": 3.224439398240137e-05, "loss": 0.00034970548003911974, "step": 238800 }, { "epoch": 67.7859778597786, "grad_norm": 0.03222178295254707, "learning_rate": 3.2241555492478e-05, "loss": 0.00010829754173755646, "step": 238810 }, { "epoch": 67.78881634970196, "grad_norm": 0.015331443399190903, "learning_rate": 3.2238717002554645e-05, "loss": 9.848307818174362e-05, "step": 238820 }, { "epoch": 67.79165483962532, "grad_norm": 0.0018534875707700849, "learning_rate": 3.223587851263128e-05, "loss": 3.836080431938171e-05, "step": 238830 }, { "epoch": 67.79449332954869, "grad_norm": 0.02129046618938446, "learning_rate": 3.223304002270792e-05, "loss": 3.477036952972412e-05, "step": 238840 }, { "epoch": 67.79733181947203, "grad_norm": 0.022768251597881317, "learning_rate": 3.223020153278456e-05, "loss": 0.00036012157797813413, "step": 238850 }, { "epoch": 67.8001703093954, "grad_norm": 0.00843789242208004, "learning_rate": 3.2227363042861197e-05, "loss": 0.0029996078461408614, "step": 238860 }, { "epoch": 67.80300879931876, "grad_norm": 0.003310201456770301, "learning_rate": 3.222452455293784e-05, "loss": 5.7179853320121766e-05, "step": 238870 }, { "epoch": 67.80584728924212, "grad_norm": 0.04632195457816124, "learning_rate": 3.222168606301448e-05, "loss": 0.00042665842920541763, "step": 238880 }, { "epoch": 67.80868577916549, "grad_norm": 0.0033860458061099052, "learning_rate": 3.2218847573091114e-05, "loss": 0.001013515330851078, "step": 238890 }, { "epoch": 67.81152426908885, "grad_norm": 0.004675138276070356, "learning_rate": 3.2216009083167755e-05, "loss": 0.0004998305812478065, "step": 238900 }, { "epoch": 67.81436275901221, "grad_norm": 0.0163272675126791, "learning_rate": 3.22131705932444e-05, "loss": 5.9126317501068117e-05, "step": 238910 }, { "epoch": 67.81720124893556, "grad_norm": 0.011519741266965866, "learning_rate": 3.221033210332103e-05, "loss": 0.0004411978647112846, "step": 238920 }, { "epoch": 67.82003973885892, "grad_norm": 0.019912179559469223, "learning_rate": 3.220749361339767e-05, "loss": 0.0014117514714598657, "step": 238930 }, { "epoch": 67.82287822878229, "grad_norm": 0.0442030131816864, "learning_rate": 3.2204655123474314e-05, "loss": 0.0001196403056383133, "step": 238940 }, { "epoch": 67.82571671870565, "grad_norm": 0.022052733227610588, "learning_rate": 3.220181663355095e-05, "loss": 0.0001486167311668396, "step": 238950 }, { "epoch": 67.82855520862901, "grad_norm": 0.05916573107242584, "learning_rate": 3.219897814362759e-05, "loss": 0.004007121920585633, "step": 238960 }, { "epoch": 67.83139369855238, "grad_norm": 0.0027450043708086014, "learning_rate": 3.219613965370423e-05, "loss": 0.0005126636475324631, "step": 238970 }, { "epoch": 67.83423218847572, "grad_norm": 0.09273003786802292, "learning_rate": 3.219330116378087e-05, "loss": 0.00012658722698688507, "step": 238980 }, { "epoch": 67.83707067839909, "grad_norm": 0.010847470723092556, "learning_rate": 3.219046267385751e-05, "loss": 0.00018416214734315873, "step": 238990 }, { "epoch": 67.83990916832245, "grad_norm": 0.338384211063385, "learning_rate": 3.218762418393415e-05, "loss": 0.0002866325899958611, "step": 239000 }, { "epoch": 67.83990916832245, "eval_accuracy": 0.986965091880206, "eval_loss": 0.05442202091217041, "eval_runtime": 35.3055, "eval_samples_per_second": 445.455, "eval_steps_per_second": 6.968, "step": 239000 }, { "epoch": 67.84274765824581, "grad_norm": 0.02373574860394001, "learning_rate": 3.218478569401079e-05, "loss": 0.00021671410650014877, "step": 239010 }, { "epoch": 67.84558614816918, "grad_norm": 0.05370110273361206, "learning_rate": 3.2181947204087425e-05, "loss": 6.391629576683045e-05, "step": 239020 }, { "epoch": 67.84842463809254, "grad_norm": 0.005212871823459864, "learning_rate": 3.2179108714164066e-05, "loss": 7.550511509180069e-05, "step": 239030 }, { "epoch": 67.8512631280159, "grad_norm": 0.0083279674872756, "learning_rate": 3.217627022424071e-05, "loss": 0.0009210668504238129, "step": 239040 }, { "epoch": 67.85410161793925, "grad_norm": 0.002503466559574008, "learning_rate": 3.217343173431734e-05, "loss": 5.5388174951076506e-05, "step": 239050 }, { "epoch": 67.85694010786261, "grad_norm": 0.05556806921958923, "learning_rate": 3.2170593244393983e-05, "loss": 9.940974414348602e-05, "step": 239060 }, { "epoch": 67.85977859778598, "grad_norm": 0.050361573696136475, "learning_rate": 3.2167754754470625e-05, "loss": 0.0014938537031412124, "step": 239070 }, { "epoch": 67.86261708770934, "grad_norm": 0.018109336495399475, "learning_rate": 3.216491626454726e-05, "loss": 0.011593437194824219, "step": 239080 }, { "epoch": 67.8654555776327, "grad_norm": 0.016252486035227776, "learning_rate": 3.21620777746239e-05, "loss": 0.0049544177949428555, "step": 239090 }, { "epoch": 67.86829406755606, "grad_norm": 0.02720743604004383, "learning_rate": 3.215923928470054e-05, "loss": 0.00012382641434669495, "step": 239100 }, { "epoch": 67.87113255747943, "grad_norm": 0.02386544458568096, "learning_rate": 3.2156400794777183e-05, "loss": 0.0001396290957927704, "step": 239110 }, { "epoch": 67.87397104740278, "grad_norm": 0.006218914408236742, "learning_rate": 3.215356230485382e-05, "loss": 0.0022770332172513006, "step": 239120 }, { "epoch": 67.87680953732614, "grad_norm": 0.021239368245005608, "learning_rate": 3.215072381493046e-05, "loss": 9.955950081348419e-05, "step": 239130 }, { "epoch": 67.8796480272495, "grad_norm": 0.008045737631618977, "learning_rate": 3.21478853250071e-05, "loss": 0.00037905871868133546, "step": 239140 }, { "epoch": 67.88248651717286, "grad_norm": 0.0014644534094259143, "learning_rate": 3.2145046835083735e-05, "loss": 0.00032449495047330855, "step": 239150 }, { "epoch": 67.88532500709623, "grad_norm": 0.03685740381479263, "learning_rate": 3.214220834516038e-05, "loss": 0.0018205884844064713, "step": 239160 }, { "epoch": 67.88816349701959, "grad_norm": 0.06952270865440369, "learning_rate": 3.213936985523702e-05, "loss": 0.002014100179076195, "step": 239170 }, { "epoch": 67.89100198694295, "grad_norm": 0.08790069073438644, "learning_rate": 3.213653136531365e-05, "loss": 0.00028249211609363554, "step": 239180 }, { "epoch": 67.8938404768663, "grad_norm": 7.264313220977783, "learning_rate": 3.2133692875390294e-05, "loss": 0.0016326535493135451, "step": 239190 }, { "epoch": 67.89667896678966, "grad_norm": 0.0039961631409823895, "learning_rate": 3.2130854385466936e-05, "loss": 0.0014259347692131997, "step": 239200 }, { "epoch": 67.89951745671303, "grad_norm": 0.006020241416990757, "learning_rate": 3.212801589554357e-05, "loss": 0.013840402662754058, "step": 239210 }, { "epoch": 67.90235594663639, "grad_norm": 0.6356789469718933, "learning_rate": 3.212517740562021e-05, "loss": 0.00018243398517370224, "step": 239220 }, { "epoch": 67.90519443655975, "grad_norm": 0.024614475667476654, "learning_rate": 3.212233891569685e-05, "loss": 0.009866657108068467, "step": 239230 }, { "epoch": 67.90803292648312, "grad_norm": 0.015942508354783058, "learning_rate": 3.2119500425773494e-05, "loss": 0.0013974901288747788, "step": 239240 }, { "epoch": 67.91087141640647, "grad_norm": 0.03881886973977089, "learning_rate": 3.211666193585013e-05, "loss": 0.0003850158303976059, "step": 239250 }, { "epoch": 67.91370990632983, "grad_norm": 0.3819429576396942, "learning_rate": 3.2113823445926763e-05, "loss": 0.001364544965326786, "step": 239260 }, { "epoch": 67.91654839625319, "grad_norm": 0.7040698528289795, "learning_rate": 3.211098495600341e-05, "loss": 0.0008779175579547883, "step": 239270 }, { "epoch": 67.91938688617655, "grad_norm": 0.03650423139333725, "learning_rate": 3.2108146466080046e-05, "loss": 0.00036567952483892443, "step": 239280 }, { "epoch": 67.92222537609992, "grad_norm": 0.010927999392151833, "learning_rate": 3.210530797615669e-05, "loss": 0.00041014328598976135, "step": 239290 }, { "epoch": 67.92506386602328, "grad_norm": 0.7363908886909485, "learning_rate": 3.210246948623333e-05, "loss": 0.00027477312833070757, "step": 239300 }, { "epoch": 67.92790235594664, "grad_norm": 0.05090698227286339, "learning_rate": 3.2099630996309964e-05, "loss": 0.00017158258706331253, "step": 239310 }, { "epoch": 67.93074084586999, "grad_norm": 0.10197577625513077, "learning_rate": 3.2096792506386605e-05, "loss": 0.0001011790707707405, "step": 239320 }, { "epoch": 67.93357933579335, "grad_norm": 0.019579848274588585, "learning_rate": 3.2093954016463246e-05, "loss": 7.38067552447319e-05, "step": 239330 }, { "epoch": 67.93641782571672, "grad_norm": 0.008291642181575298, "learning_rate": 3.209111552653988e-05, "loss": 0.00034070778638124467, "step": 239340 }, { "epoch": 67.93925631564008, "grad_norm": 0.0038600575644522905, "learning_rate": 3.208827703661652e-05, "loss": 7.681064307689667e-05, "step": 239350 }, { "epoch": 67.94209480556344, "grad_norm": 0.08569547533988953, "learning_rate": 3.208543854669316e-05, "loss": 0.0003748646005988121, "step": 239360 }, { "epoch": 67.9449332954868, "grad_norm": 0.1491209715604782, "learning_rate": 3.20826000567698e-05, "loss": 0.0037934791296720506, "step": 239370 }, { "epoch": 67.94777178541017, "grad_norm": 1.8905971050262451, "learning_rate": 3.207976156684644e-05, "loss": 0.012579822540283203, "step": 239380 }, { "epoch": 67.95061027533352, "grad_norm": 0.057821132242679596, "learning_rate": 3.2076923076923074e-05, "loss": 0.00010123439133167267, "step": 239390 }, { "epoch": 67.95344876525688, "grad_norm": 0.004387528635561466, "learning_rate": 3.207408458699972e-05, "loss": 0.0028391767293214796, "step": 239400 }, { "epoch": 67.95628725518024, "grad_norm": 0.022746963426470757, "learning_rate": 3.207124609707636e-05, "loss": 0.001930384710431099, "step": 239410 }, { "epoch": 67.9591257451036, "grad_norm": 0.0025919179897755384, "learning_rate": 3.206840760715299e-05, "loss": 8.139368146657944e-05, "step": 239420 }, { "epoch": 67.96196423502697, "grad_norm": 0.08122854679822922, "learning_rate": 3.206556911722964e-05, "loss": 0.00014665462076663972, "step": 239430 }, { "epoch": 67.96480272495033, "grad_norm": 0.11125951260328293, "learning_rate": 3.2062730627306274e-05, "loss": 0.0005321212112903595, "step": 239440 }, { "epoch": 67.96764121487368, "grad_norm": 0.008080604486167431, "learning_rate": 3.2059892137382916e-05, "loss": 0.0005807345733046532, "step": 239450 }, { "epoch": 67.97047970479704, "grad_norm": 0.010754209011793137, "learning_rate": 3.205705364745955e-05, "loss": 0.0006776198744773865, "step": 239460 }, { "epoch": 67.9733181947204, "grad_norm": 0.020039062947034836, "learning_rate": 3.205421515753619e-05, "loss": 0.0001721736043691635, "step": 239470 }, { "epoch": 67.97615668464377, "grad_norm": 0.0017409337451681495, "learning_rate": 3.205137666761283e-05, "loss": 8.94889235496521e-05, "step": 239480 }, { "epoch": 67.97899517456713, "grad_norm": 0.011545284651219845, "learning_rate": 3.204853817768947e-05, "loss": 4.1227787733078e-05, "step": 239490 }, { "epoch": 67.9818336644905, "grad_norm": 0.03513283282518387, "learning_rate": 3.204569968776611e-05, "loss": 4.9019604921340944e-05, "step": 239500 }, { "epoch": 67.9818336644905, "eval_accuracy": 0.985884148280028, "eval_loss": 0.0582934133708477, "eval_runtime": 35.7285, "eval_samples_per_second": 440.181, "eval_steps_per_second": 6.885, "step": 239500 }, { "epoch": 67.98467215441386, "grad_norm": 0.02066783979535103, "learning_rate": 3.204286119784275e-05, "loss": 7.461775094270706e-05, "step": 239510 }, { "epoch": 67.9875106443372, "grad_norm": 0.001491849310696125, "learning_rate": 3.2040022707919385e-05, "loss": 2.815406769514084e-05, "step": 239520 }, { "epoch": 67.99034913426057, "grad_norm": 0.002819330431520939, "learning_rate": 3.203718421799603e-05, "loss": 4.2961724102497104e-05, "step": 239530 }, { "epoch": 67.99318762418393, "grad_norm": 0.0031227157451212406, "learning_rate": 3.203434572807267e-05, "loss": 4.0792860090732574e-05, "step": 239540 }, { "epoch": 67.9960261141073, "grad_norm": 0.01194041408598423, "learning_rate": 3.20315072381493e-05, "loss": 4.3851509690284726e-05, "step": 239550 }, { "epoch": 67.99886460403066, "grad_norm": 0.01353237684816122, "learning_rate": 3.2028668748225944e-05, "loss": 6.933081895112991e-05, "step": 239560 }, { "epoch": 68.00170309395402, "grad_norm": 0.11342604458332062, "learning_rate": 3.2025830258302585e-05, "loss": 5.1828846335411074e-05, "step": 239570 }, { "epoch": 68.00454158387738, "grad_norm": 0.005359881557524204, "learning_rate": 3.2022991768379226e-05, "loss": 9.501241147518158e-05, "step": 239580 }, { "epoch": 68.00738007380073, "grad_norm": 12.862977027893066, "learning_rate": 3.202015327845586e-05, "loss": 0.0032232508063316347, "step": 239590 }, { "epoch": 68.0102185637241, "grad_norm": 0.007349023595452309, "learning_rate": 3.20173147885325e-05, "loss": 3.7418864667415616e-05, "step": 239600 }, { "epoch": 68.01305705364746, "grad_norm": 0.02041563019156456, "learning_rate": 3.2014476298609144e-05, "loss": 3.066938370466232e-05, "step": 239610 }, { "epoch": 68.01589554357082, "grad_norm": 0.032474417239427567, "learning_rate": 3.201163780868578e-05, "loss": 6.514173001050949e-05, "step": 239620 }, { "epoch": 68.01873403349418, "grad_norm": 0.010089490562677383, "learning_rate": 3.200879931876242e-05, "loss": 0.004081518203020096, "step": 239630 }, { "epoch": 68.02157252341755, "grad_norm": 0.47677019238471985, "learning_rate": 3.200596082883906e-05, "loss": 0.00011593922972679138, "step": 239640 }, { "epoch": 68.02441101334091, "grad_norm": 0.0031671798788011074, "learning_rate": 3.2003122338915696e-05, "loss": 5.8235041797161104e-05, "step": 239650 }, { "epoch": 68.02724950326426, "grad_norm": 0.11267532408237457, "learning_rate": 3.200028384899234e-05, "loss": 5.1516294479370114e-05, "step": 239660 }, { "epoch": 68.03008799318762, "grad_norm": 0.0043432037346065044, "learning_rate": 3.199744535906898e-05, "loss": 8.150655776262283e-05, "step": 239670 }, { "epoch": 68.03292648311098, "grad_norm": 0.029984358698129654, "learning_rate": 3.199460686914561e-05, "loss": 7.441863417625427e-05, "step": 239680 }, { "epoch": 68.03576497303435, "grad_norm": 0.005267861299216747, "learning_rate": 3.1991768379222254e-05, "loss": 5.929283797740936e-05, "step": 239690 }, { "epoch": 68.03860346295771, "grad_norm": 0.006542325019836426, "learning_rate": 3.1988929889298896e-05, "loss": 5.586054176092148e-05, "step": 239700 }, { "epoch": 68.04144195288107, "grad_norm": 0.005342708434909582, "learning_rate": 3.198609139937554e-05, "loss": 3.321655094623566e-05, "step": 239710 }, { "epoch": 68.04428044280442, "grad_norm": 0.007835903204977512, "learning_rate": 3.198325290945217e-05, "loss": 4.3964944779872894e-05, "step": 239720 }, { "epoch": 68.04711893272778, "grad_norm": 0.02900884300470352, "learning_rate": 3.198041441952881e-05, "loss": 0.00015446729958057404, "step": 239730 }, { "epoch": 68.04995742265115, "grad_norm": 0.0030540612060576677, "learning_rate": 3.1977575929605455e-05, "loss": 0.00011224150657653808, "step": 239740 }, { "epoch": 68.05279591257451, "grad_norm": 0.008147398009896278, "learning_rate": 3.197473743968209e-05, "loss": 5.1830708980560305e-05, "step": 239750 }, { "epoch": 68.05563440249787, "grad_norm": 0.001465560169890523, "learning_rate": 3.197189894975873e-05, "loss": 8.777100592851638e-05, "step": 239760 }, { "epoch": 68.05847289242124, "grad_norm": 0.002396557480096817, "learning_rate": 3.196906045983537e-05, "loss": 4.868302494287491e-05, "step": 239770 }, { "epoch": 68.0613113823446, "grad_norm": 0.029607344418764114, "learning_rate": 3.1966221969912007e-05, "loss": 2.3817643523216248e-05, "step": 239780 }, { "epoch": 68.06414987226795, "grad_norm": 0.0073677534237504005, "learning_rate": 3.196338347998865e-05, "loss": 2.3271888494491577e-05, "step": 239790 }, { "epoch": 68.06698836219131, "grad_norm": 0.006063254550099373, "learning_rate": 3.196054499006529e-05, "loss": 2.379976212978363e-05, "step": 239800 }, { "epoch": 68.06982685211467, "grad_norm": 0.0011458552908152342, "learning_rate": 3.1957706500141924e-05, "loss": 4.836674779653549e-05, "step": 239810 }, { "epoch": 68.07266534203804, "grad_norm": 0.002535470761358738, "learning_rate": 3.1954868010218565e-05, "loss": 1.4632381498813629e-05, "step": 239820 }, { "epoch": 68.0755038319614, "grad_norm": 0.03920385614037514, "learning_rate": 3.195202952029521e-05, "loss": 5.7050399482250215e-05, "step": 239830 }, { "epoch": 68.07834232188476, "grad_norm": 0.07156386971473694, "learning_rate": 3.194919103037184e-05, "loss": 0.00012263283133506775, "step": 239840 }, { "epoch": 68.08118081180812, "grad_norm": 0.0053425440564751625, "learning_rate": 3.194635254044848e-05, "loss": 3.3964961767196654e-05, "step": 239850 }, { "epoch": 68.08401930173147, "grad_norm": 0.003601634409278631, "learning_rate": 3.194351405052512e-05, "loss": 4.859417676925659e-05, "step": 239860 }, { "epoch": 68.08685779165484, "grad_norm": 0.004399095196276903, "learning_rate": 3.1940675560601765e-05, "loss": 9.172875434160233e-05, "step": 239870 }, { "epoch": 68.0896962815782, "grad_norm": 0.003535828087478876, "learning_rate": 3.19378370706784e-05, "loss": 6.157290190458297e-05, "step": 239880 }, { "epoch": 68.09253477150156, "grad_norm": 0.00620547728613019, "learning_rate": 3.1934998580755035e-05, "loss": 3.575161099433899e-05, "step": 239890 }, { "epoch": 68.09537326142492, "grad_norm": 0.007515354081988335, "learning_rate": 3.193216009083168e-05, "loss": 4.5348890125751495e-05, "step": 239900 }, { "epoch": 68.09821175134829, "grad_norm": 0.01174678560346365, "learning_rate": 3.192932160090832e-05, "loss": 3.552436828613281e-05, "step": 239910 }, { "epoch": 68.10105024127165, "grad_norm": 0.03527228161692619, "learning_rate": 3.192648311098496e-05, "loss": 2.891179174184799e-05, "step": 239920 }, { "epoch": 68.103888731195, "grad_norm": 0.0064133708365261555, "learning_rate": 3.19236446210616e-05, "loss": 3.885533660650253e-05, "step": 239930 }, { "epoch": 68.10672722111836, "grad_norm": 0.02111930213868618, "learning_rate": 3.1920806131138235e-05, "loss": 4.891306161880493e-05, "step": 239940 }, { "epoch": 68.10956571104172, "grad_norm": 0.003209564136341214, "learning_rate": 3.1917967641214876e-05, "loss": 6.529241800308227e-05, "step": 239950 }, { "epoch": 68.11240420096509, "grad_norm": 0.02158086746931076, "learning_rate": 3.191512915129151e-05, "loss": 3.2988376915454865e-05, "step": 239960 }, { "epoch": 68.11524269088845, "grad_norm": 0.0013419782044366002, "learning_rate": 3.191229066136815e-05, "loss": 7.376819849014282e-06, "step": 239970 }, { "epoch": 68.11808118081181, "grad_norm": 0.0008342273649759591, "learning_rate": 3.190945217144479e-05, "loss": 1.2599118053913117e-05, "step": 239980 }, { "epoch": 68.12091967073516, "grad_norm": 0.002278153784573078, "learning_rate": 3.190661368152143e-05, "loss": 8.725747466087341e-05, "step": 239990 }, { "epoch": 68.12375816065853, "grad_norm": 0.017145099118351936, "learning_rate": 3.1903775191598076e-05, "loss": 3.429688513278961e-05, "step": 240000 }, { "epoch": 68.12375816065853, "eval_accuracy": 0.9870286767978635, "eval_loss": 0.05630262941122055, "eval_runtime": 39.8837, "eval_samples_per_second": 394.322, "eval_steps_per_second": 6.168, "step": 240000 }, { "epoch": 68.12659665058189, "grad_norm": 0.0054348488338291645, "learning_rate": 3.190093670167471e-05, "loss": 5.038455128669739e-05, "step": 240010 }, { "epoch": 68.12943514050525, "grad_norm": 0.0005800601211376488, "learning_rate": 3.1898098211751345e-05, "loss": 2.027973532676697e-05, "step": 240020 }, { "epoch": 68.13227363042861, "grad_norm": 0.000627723231445998, "learning_rate": 3.1895259721827993e-05, "loss": 3.6679394543170926e-05, "step": 240030 }, { "epoch": 68.13511212035198, "grad_norm": 0.0052975500002503395, "learning_rate": 3.189242123190463e-05, "loss": 1.8204748630523682e-05, "step": 240040 }, { "epoch": 68.13795061027534, "grad_norm": 0.008892346173524857, "learning_rate": 3.188958274198127e-05, "loss": 0.00015419460833072663, "step": 240050 }, { "epoch": 68.14078910019869, "grad_norm": 0.0014628556091338396, "learning_rate": 3.1886744252057904e-05, "loss": 0.0002644892781972885, "step": 240060 }, { "epoch": 68.14362759012205, "grad_norm": 0.0020491855684667826, "learning_rate": 3.1883905762134545e-05, "loss": 0.0003855204209685326, "step": 240070 }, { "epoch": 68.14646608004541, "grad_norm": 0.006103516090661287, "learning_rate": 3.188106727221119e-05, "loss": 9.890235960483552e-05, "step": 240080 }, { "epoch": 68.14930456996878, "grad_norm": 0.0019820542074739933, "learning_rate": 3.187822878228782e-05, "loss": 0.0035316869616508484, "step": 240090 }, { "epoch": 68.15214305989214, "grad_norm": 0.03432622179389, "learning_rate": 3.187539029236446e-05, "loss": 4.126355051994324e-05, "step": 240100 }, { "epoch": 68.1549815498155, "grad_norm": 0.047830693423748016, "learning_rate": 3.1872551802441104e-05, "loss": 8.776970207691193e-05, "step": 240110 }, { "epoch": 68.15782003973887, "grad_norm": 0.009248877875506878, "learning_rate": 3.186971331251774e-05, "loss": 0.0016348831355571746, "step": 240120 }, { "epoch": 68.16065852966221, "grad_norm": 0.005993354599922895, "learning_rate": 3.186687482259439e-05, "loss": 0.00018171295523643493, "step": 240130 }, { "epoch": 68.16349701958558, "grad_norm": 0.018750198185443878, "learning_rate": 3.186403633267102e-05, "loss": 0.0004926539957523346, "step": 240140 }, { "epoch": 68.16633550950894, "grad_norm": 0.008976766839623451, "learning_rate": 3.1861197842747656e-05, "loss": 3.92567366361618e-05, "step": 240150 }, { "epoch": 68.1691739994323, "grad_norm": 0.006811204366385937, "learning_rate": 3.18583593528243e-05, "loss": 0.003353714942932129, "step": 240160 }, { "epoch": 68.17201248935567, "grad_norm": 0.10937120020389557, "learning_rate": 3.185552086290094e-05, "loss": 0.004404406249523163, "step": 240170 }, { "epoch": 68.17485097927903, "grad_norm": 0.47033631801605225, "learning_rate": 3.185268237297758e-05, "loss": 0.0007438739761710167, "step": 240180 }, { "epoch": 68.17768946920238, "grad_norm": 0.029970813542604446, "learning_rate": 3.1849843883054215e-05, "loss": 0.0004762489348649979, "step": 240190 }, { "epoch": 68.18052795912574, "grad_norm": 0.00532500771805644, "learning_rate": 3.1847005393130856e-05, "loss": 0.0002679917961359024, "step": 240200 }, { "epoch": 68.1833664490491, "grad_norm": 0.06131188943982124, "learning_rate": 3.18441669032075e-05, "loss": 0.00041307155042886735, "step": 240210 }, { "epoch": 68.18620493897247, "grad_norm": 0.00300308084115386, "learning_rate": 3.184132841328413e-05, "loss": 0.0002616364508867264, "step": 240220 }, { "epoch": 68.18904342889583, "grad_norm": 0.003184613771736622, "learning_rate": 3.1838489923360774e-05, "loss": 0.00017302036285400392, "step": 240230 }, { "epoch": 68.19188191881919, "grad_norm": 0.3469906449317932, "learning_rate": 3.1835651433437415e-05, "loss": 0.0008353976532816887, "step": 240240 }, { "epoch": 68.19472040874255, "grad_norm": 1.6415189504623413, "learning_rate": 3.183281294351405e-05, "loss": 0.0011564088985323907, "step": 240250 }, { "epoch": 68.1975588986659, "grad_norm": 0.248515784740448, "learning_rate": 3.182997445359069e-05, "loss": 0.00016403328627347946, "step": 240260 }, { "epoch": 68.20039738858927, "grad_norm": 0.20538395643234253, "learning_rate": 3.182713596366733e-05, "loss": 0.0002790339291095734, "step": 240270 }, { "epoch": 68.20323587851263, "grad_norm": 2.2470474243164062, "learning_rate": 3.182429747374397e-05, "loss": 0.0006895389407873154, "step": 240280 }, { "epoch": 68.20607436843599, "grad_norm": 0.027284186333417892, "learning_rate": 3.182145898382061e-05, "loss": 0.00041740089654922484, "step": 240290 }, { "epoch": 68.20891285835935, "grad_norm": Infinity, "learning_rate": 3.181862049389725e-05, "loss": 0.0033567994832992554, "step": 240300 }, { "epoch": 68.21175134828272, "grad_norm": 0.1578315943479538, "learning_rate": 3.181606585296622e-05, "loss": 0.0002481261268258095, "step": 240310 }, { "epoch": 68.21458983820608, "grad_norm": 1.0672662258148193, "learning_rate": 3.1813227363042864e-05, "loss": 0.007758699357509613, "step": 240320 }, { "epoch": 68.21742832812943, "grad_norm": 0.8407207727432251, "learning_rate": 3.18103888731195e-05, "loss": 0.0004545731469988823, "step": 240330 }, { "epoch": 68.22026681805279, "grad_norm": 3.303269147872925, "learning_rate": 3.180755038319614e-05, "loss": 0.0015059106051921844, "step": 240340 }, { "epoch": 68.22310530797616, "grad_norm": 0.0033356724306941032, "learning_rate": 3.180471189327278e-05, "loss": 0.0015159782022237777, "step": 240350 }, { "epoch": 68.22594379789952, "grad_norm": 0.5342150330543518, "learning_rate": 3.1801873403349416e-05, "loss": 0.0004732118919491768, "step": 240360 }, { "epoch": 68.22878228782288, "grad_norm": 0.16680897772312164, "learning_rate": 3.179903491342606e-05, "loss": 0.00019998867064714433, "step": 240370 }, { "epoch": 68.23162077774624, "grad_norm": 15.207114219665527, "learning_rate": 3.17961964235027e-05, "loss": 0.012333407998085022, "step": 240380 }, { "epoch": 68.2344592676696, "grad_norm": 0.12410110980272293, "learning_rate": 3.179335793357934e-05, "loss": 0.0004551811143755913, "step": 240390 }, { "epoch": 68.23729775759296, "grad_norm": 0.00351278530433774, "learning_rate": 3.1790519443655975e-05, "loss": 0.00017222985625267028, "step": 240400 }, { "epoch": 68.24013624751632, "grad_norm": 16.65017318725586, "learning_rate": 3.1787680953732616e-05, "loss": 0.004487044364213944, "step": 240410 }, { "epoch": 68.24297473743968, "grad_norm": 0.1449987292289734, "learning_rate": 3.178484246380926e-05, "loss": 0.0009057696908712388, "step": 240420 }, { "epoch": 68.24581322736304, "grad_norm": 0.08382358402013779, "learning_rate": 3.178200397388589e-05, "loss": 0.0016214974224567414, "step": 240430 }, { "epoch": 68.2486517172864, "grad_norm": 0.02784920483827591, "learning_rate": 3.1779165483962534e-05, "loss": 9.690262377262115e-05, "step": 240440 }, { "epoch": 68.25149020720977, "grad_norm": 0.0011673542903736234, "learning_rate": 3.1776326994039175e-05, "loss": 0.0003803469240665436, "step": 240450 }, { "epoch": 68.25432869713312, "grad_norm": 7.329404354095459, "learning_rate": 3.177348850411581e-05, "loss": 0.010168570280075073, "step": 240460 }, { "epoch": 68.25716718705648, "grad_norm": 0.06731994450092316, "learning_rate": 3.177065001419245e-05, "loss": 0.001183420792222023, "step": 240470 }, { "epoch": 68.26000567697984, "grad_norm": 0.00678376667201519, "learning_rate": 3.176781152426909e-05, "loss": 0.0006335102021694184, "step": 240480 }, { "epoch": 68.26284416690321, "grad_norm": 0.08120357990264893, "learning_rate": 3.176497303434573e-05, "loss": 0.0008393926545977592, "step": 240490 }, { "epoch": 68.26568265682657, "grad_norm": 0.04206700995564461, "learning_rate": 3.176213454442237e-05, "loss": 0.0003138858824968338, "step": 240500 }, { "epoch": 68.26568265682657, "eval_accuracy": 0.981941883385261, "eval_loss": 0.0752621442079544, "eval_runtime": 37.5509, "eval_samples_per_second": 418.818, "eval_steps_per_second": 6.551, "step": 240500 }, { "epoch": 68.26852114674993, "grad_norm": 0.02560482732951641, "learning_rate": 3.175929605449901e-05, "loss": 0.0002942701801657677, "step": 240510 }, { "epoch": 68.2713596366733, "grad_norm": 0.005416013300418854, "learning_rate": 3.175645756457565e-05, "loss": 0.0008636878803372384, "step": 240520 }, { "epoch": 68.27419812659664, "grad_norm": 0.01209618803113699, "learning_rate": 3.1753619074652286e-05, "loss": 0.0047335315495729445, "step": 240530 }, { "epoch": 68.27703661652001, "grad_norm": 4.211609840393066, "learning_rate": 3.175078058472892e-05, "loss": 0.0009036097675561905, "step": 240540 }, { "epoch": 68.27987510644337, "grad_norm": 0.0060607497580349445, "learning_rate": 3.174794209480557e-05, "loss": 0.008609867095947266, "step": 240550 }, { "epoch": 68.28271359636673, "grad_norm": 0.007267740089446306, "learning_rate": 3.17451036048822e-05, "loss": 0.001828145608305931, "step": 240560 }, { "epoch": 68.2855520862901, "grad_norm": 0.007836856879293919, "learning_rate": 3.1742265114958844e-05, "loss": 0.0022885840386152267, "step": 240570 }, { "epoch": 68.28839057621346, "grad_norm": 0.01647830940783024, "learning_rate": 3.1739426625035486e-05, "loss": 0.0008567716926336288, "step": 240580 }, { "epoch": 68.29122906613682, "grad_norm": 0.007586176507174969, "learning_rate": 3.173658813511212e-05, "loss": 0.006322900950908661, "step": 240590 }, { "epoch": 68.29406755606017, "grad_norm": 0.04418540000915527, "learning_rate": 3.173374964518876e-05, "loss": 0.00021809563040733337, "step": 240600 }, { "epoch": 68.29690604598353, "grad_norm": 0.0036874043289572, "learning_rate": 3.17309111552654e-05, "loss": 0.0007886540144681931, "step": 240610 }, { "epoch": 68.2997445359069, "grad_norm": 0.061342768371105194, "learning_rate": 3.172807266534204e-05, "loss": 0.00025563668459653853, "step": 240620 }, { "epoch": 68.30258302583026, "grad_norm": 0.06728702783584595, "learning_rate": 3.172523417541868e-05, "loss": 0.00019738432019948958, "step": 240630 }, { "epoch": 68.30542151575362, "grad_norm": 0.0008005813579075038, "learning_rate": 3.1722395685495314e-05, "loss": 0.0024459052830934523, "step": 240640 }, { "epoch": 68.30826000567698, "grad_norm": 0.07747559249401093, "learning_rate": 3.171955719557196e-05, "loss": 0.00017672274261713028, "step": 240650 }, { "epoch": 68.31109849560033, "grad_norm": 0.012172559276223183, "learning_rate": 3.1716718705648596e-05, "loss": 0.00029557086527347564, "step": 240660 }, { "epoch": 68.3139369855237, "grad_norm": 0.042249809950590134, "learning_rate": 3.171388021572523e-05, "loss": 9.77497547864914e-05, "step": 240670 }, { "epoch": 68.31677547544706, "grad_norm": 0.007788345217704773, "learning_rate": 3.171104172580188e-05, "loss": 0.00063485037535429, "step": 240680 }, { "epoch": 68.31961396537042, "grad_norm": 0.03063557855784893, "learning_rate": 3.1708203235878514e-05, "loss": 0.0004929393529891968, "step": 240690 }, { "epoch": 68.32245245529379, "grad_norm": 7.852071285247803, "learning_rate": 3.1705364745955155e-05, "loss": 0.0016228629276156426, "step": 240700 }, { "epoch": 68.32529094521715, "grad_norm": 0.029004808515310287, "learning_rate": 3.1702526256031796e-05, "loss": 0.00042061973363161087, "step": 240710 }, { "epoch": 68.32812943514051, "grad_norm": 0.004501908086240292, "learning_rate": 3.169968776610843e-05, "loss": 8.557084947824478e-05, "step": 240720 }, { "epoch": 68.33096792506386, "grad_norm": 0.0115900207310915, "learning_rate": 3.169684927618507e-05, "loss": 0.0010948121547698975, "step": 240730 }, { "epoch": 68.33380641498722, "grad_norm": 0.062357787042856216, "learning_rate": 3.1694010786261714e-05, "loss": 0.0004254322499036789, "step": 240740 }, { "epoch": 68.33664490491059, "grad_norm": 1.1870605945587158, "learning_rate": 3.169117229633835e-05, "loss": 0.0004509182646870613, "step": 240750 }, { "epoch": 68.33948339483395, "grad_norm": 0.006983451545238495, "learning_rate": 3.168833380641499e-05, "loss": 0.0008638791739940643, "step": 240760 }, { "epoch": 68.34232188475731, "grad_norm": 0.18766102194786072, "learning_rate": 3.1685495316491624e-05, "loss": 0.002707315981388092, "step": 240770 }, { "epoch": 68.34516037468067, "grad_norm": 0.03165770322084427, "learning_rate": 3.1682656826568266e-05, "loss": 0.0005172763019800186, "step": 240780 }, { "epoch": 68.34799886460404, "grad_norm": 0.007680956739932299, "learning_rate": 3.167981833664491e-05, "loss": 0.00014915969222784042, "step": 240790 }, { "epoch": 68.35083735452739, "grad_norm": 0.17380094528198242, "learning_rate": 3.167697984672154e-05, "loss": 0.00016940440982580185, "step": 240800 }, { "epoch": 68.35367584445075, "grad_norm": 0.04331692308187485, "learning_rate": 3.167414135679819e-05, "loss": 0.0007382092997431755, "step": 240810 }, { "epoch": 68.35651433437411, "grad_norm": 0.008488313294947147, "learning_rate": 3.1671302866874824e-05, "loss": 0.0002826608717441559, "step": 240820 }, { "epoch": 68.35935282429747, "grad_norm": 1.5192828178405762, "learning_rate": 3.166846437695146e-05, "loss": 0.0004287509247660637, "step": 240830 }, { "epoch": 68.36219131422084, "grad_norm": 0.030176304280757904, "learning_rate": 3.166562588702811e-05, "loss": 0.00022318586707115173, "step": 240840 }, { "epoch": 68.3650298041442, "grad_norm": 0.012283694930374622, "learning_rate": 3.166278739710474e-05, "loss": 7.31348991394043e-05, "step": 240850 }, { "epoch": 68.36786829406756, "grad_norm": 0.01417518313974142, "learning_rate": 3.165994890718138e-05, "loss": 0.00013067666441202164, "step": 240860 }, { "epoch": 68.37070678399091, "grad_norm": 0.043701961636543274, "learning_rate": 3.165711041725802e-05, "loss": 0.00018098428845405578, "step": 240870 }, { "epoch": 68.37354527391427, "grad_norm": 4.481802463531494, "learning_rate": 3.165427192733466e-05, "loss": 0.0013311458751559258, "step": 240880 }, { "epoch": 68.37638376383764, "grad_norm": 0.09484796226024628, "learning_rate": 3.16514334374113e-05, "loss": 0.0002487940713763237, "step": 240890 }, { "epoch": 68.379222253761, "grad_norm": 8.341326713562012, "learning_rate": 3.1648594947487935e-05, "loss": 0.002648993581533432, "step": 240900 }, { "epoch": 68.38206074368436, "grad_norm": 0.007952545769512653, "learning_rate": 3.1645756457564576e-05, "loss": 0.000433318130671978, "step": 240910 }, { "epoch": 68.38489923360773, "grad_norm": 0.33743104338645935, "learning_rate": 3.164291796764122e-05, "loss": 0.00024590715765953063, "step": 240920 }, { "epoch": 68.38773772353107, "grad_norm": 0.012342059053480625, "learning_rate": 3.164007947771785e-05, "loss": 0.0002996046096086502, "step": 240930 }, { "epoch": 68.39057621345444, "grad_norm": 0.47367265820503235, "learning_rate": 3.16372409877945e-05, "loss": 0.00831446573138237, "step": 240940 }, { "epoch": 68.3934147033778, "grad_norm": 0.02642156556248665, "learning_rate": 3.1634402497871135e-05, "loss": 0.00011361446231603623, "step": 240950 }, { "epoch": 68.39625319330116, "grad_norm": 0.04902622476220131, "learning_rate": 3.163156400794777e-05, "loss": 0.0005002420395612717, "step": 240960 }, { "epoch": 68.39909168322453, "grad_norm": 0.009845029562711716, "learning_rate": 3.162872551802441e-05, "loss": 0.005281845480203629, "step": 240970 }, { "epoch": 68.40193017314789, "grad_norm": 0.024807410314679146, "learning_rate": 3.162588702810105e-05, "loss": 7.436610758304596e-05, "step": 240980 }, { "epoch": 68.40476866307125, "grad_norm": 0.008713621646165848, "learning_rate": 3.1623048538177694e-05, "loss": 0.0004256404936313629, "step": 240990 }, { "epoch": 68.4076071529946, "grad_norm": 0.04193517565727234, "learning_rate": 3.162021004825433e-05, "loss": 0.0005416575819253922, "step": 241000 }, { "epoch": 68.4076071529946, "eval_accuracy": 0.9786990525847269, "eval_loss": 0.09232212603092194, "eval_runtime": 35.8724, "eval_samples_per_second": 438.415, "eval_steps_per_second": 6.858, "step": 241000 }, { "epoch": 68.41044564291796, "grad_norm": 0.1477912813425064, "learning_rate": 3.161737155833097e-05, "loss": 0.002946638874709606, "step": 241010 }, { "epoch": 68.41328413284133, "grad_norm": 0.01570584811270237, "learning_rate": 3.161453306840761e-05, "loss": 0.00012006498873233795, "step": 241020 }, { "epoch": 68.41612262276469, "grad_norm": 0.030424876138567924, "learning_rate": 3.1611694578484246e-05, "loss": 0.0003454115241765976, "step": 241030 }, { "epoch": 68.41896111268805, "grad_norm": 0.004156667273491621, "learning_rate": 3.160885608856089e-05, "loss": 8.80952924489975e-05, "step": 241040 }, { "epoch": 68.42179960261142, "grad_norm": 0.0019355762051418424, "learning_rate": 3.160601759863753e-05, "loss": 9.132008999586105e-05, "step": 241050 }, { "epoch": 68.42463809253478, "grad_norm": 0.0027862440329045057, "learning_rate": 3.160317910871416e-05, "loss": 6.120763719081878e-05, "step": 241060 }, { "epoch": 68.42747658245813, "grad_norm": 0.04747236147522926, "learning_rate": 3.1600340618790805e-05, "loss": 0.00010259486734867095, "step": 241070 }, { "epoch": 68.43031507238149, "grad_norm": 0.0016386983916163445, "learning_rate": 3.1597502128867446e-05, "loss": 5.286820232868195e-05, "step": 241080 }, { "epoch": 68.43315356230485, "grad_norm": 0.05621575936675072, "learning_rate": 3.159466363894408e-05, "loss": 7.964372634887696e-05, "step": 241090 }, { "epoch": 68.43599205222822, "grad_norm": 0.03766496852040291, "learning_rate": 3.159182514902072e-05, "loss": 9.218882769346237e-05, "step": 241100 }, { "epoch": 68.43883054215158, "grad_norm": 6.517855644226074, "learning_rate": 3.158898665909736e-05, "loss": 0.0010999128222465514, "step": 241110 }, { "epoch": 68.44166903207494, "grad_norm": 0.01148031372576952, "learning_rate": 3.1586148169174005e-05, "loss": 0.0003017807379364967, "step": 241120 }, { "epoch": 68.4445075219983, "grad_norm": 0.010725278407335281, "learning_rate": 3.158330967925064e-05, "loss": 0.00042031072080135347, "step": 241130 }, { "epoch": 68.44734601192165, "grad_norm": 0.01812233403325081, "learning_rate": 3.158047118932728e-05, "loss": 3.0246190726757048e-05, "step": 241140 }, { "epoch": 68.45018450184502, "grad_norm": 0.04245585948228836, "learning_rate": 3.157763269940392e-05, "loss": 0.0001392999663949013, "step": 241150 }, { "epoch": 68.45302299176838, "grad_norm": 0.044889770448207855, "learning_rate": 3.157479420948056e-05, "loss": 0.0008998753502964974, "step": 241160 }, { "epoch": 68.45586148169174, "grad_norm": 0.0021302972454577684, "learning_rate": 3.15719557195572e-05, "loss": 0.0009484643116593361, "step": 241170 }, { "epoch": 68.4586999716151, "grad_norm": 0.024901919066905975, "learning_rate": 3.156911722963384e-05, "loss": 0.00010455939918756485, "step": 241180 }, { "epoch": 68.46153846153847, "grad_norm": 0.010865379124879837, "learning_rate": 3.1566278739710474e-05, "loss": 0.00020656604319810868, "step": 241190 }, { "epoch": 68.46437695146182, "grad_norm": 0.023380761966109276, "learning_rate": 3.1563440249787115e-05, "loss": 0.0009027399122714996, "step": 241200 }, { "epoch": 68.46721544138518, "grad_norm": 0.1445561945438385, "learning_rate": 3.156060175986376e-05, "loss": 7.92142003774643e-05, "step": 241210 }, { "epoch": 68.47005393130854, "grad_norm": 0.0687345638871193, "learning_rate": 3.155776326994039e-05, "loss": 0.006757941842079162, "step": 241220 }, { "epoch": 68.4728924212319, "grad_norm": 0.003844842780381441, "learning_rate": 3.155492478001703e-05, "loss": 8.267723023891449e-05, "step": 241230 }, { "epoch": 68.47573091115527, "grad_norm": 0.00886473711580038, "learning_rate": 3.1552086290093674e-05, "loss": 0.0007406877353787422, "step": 241240 }, { "epoch": 68.47856940107863, "grad_norm": 0.018043356016278267, "learning_rate": 3.154924780017031e-05, "loss": 0.0003394030034542084, "step": 241250 }, { "epoch": 68.481407891002, "grad_norm": 0.010575533844530582, "learning_rate": 3.154640931024695e-05, "loss": 0.0008562471717596054, "step": 241260 }, { "epoch": 68.48424638092534, "grad_norm": 0.953268826007843, "learning_rate": 3.1543570820323585e-05, "loss": 0.00023769661784172057, "step": 241270 }, { "epoch": 68.4870848708487, "grad_norm": 0.025605805218219757, "learning_rate": 3.154073233040023e-05, "loss": 0.0003008704632520676, "step": 241280 }, { "epoch": 68.48992336077207, "grad_norm": 0.026508789509534836, "learning_rate": 3.153789384047687e-05, "loss": 6.545297801494599e-05, "step": 241290 }, { "epoch": 68.49276185069543, "grad_norm": 0.017146045342087746, "learning_rate": 3.15350553505535e-05, "loss": 8.593648672103882e-05, "step": 241300 }, { "epoch": 68.4956003406188, "grad_norm": 0.28549936413764954, "learning_rate": 3.153221686063015e-05, "loss": 0.00014492515474557878, "step": 241310 }, { "epoch": 68.49843883054216, "grad_norm": 0.001344967749901116, "learning_rate": 3.1529378370706785e-05, "loss": 0.0007947605103254318, "step": 241320 }, { "epoch": 68.50127732046552, "grad_norm": 0.12409453094005585, "learning_rate": 3.1526539880783426e-05, "loss": 0.0001620844006538391, "step": 241330 }, { "epoch": 68.50411581038887, "grad_norm": 0.2025132030248642, "learning_rate": 3.152370139086007e-05, "loss": 0.0003179488703608513, "step": 241340 }, { "epoch": 68.50695430031223, "grad_norm": 0.010020204819738865, "learning_rate": 3.15208629009367e-05, "loss": 7.12493434548378e-05, "step": 241350 }, { "epoch": 68.5097927902356, "grad_norm": 0.054296720772981644, "learning_rate": 3.1518024411013343e-05, "loss": 0.0011132648214697838, "step": 241360 }, { "epoch": 68.51263128015896, "grad_norm": 0.003960839007049799, "learning_rate": 3.151518592108998e-05, "loss": 0.00020819045603275298, "step": 241370 }, { "epoch": 68.51546977008232, "grad_norm": 0.010922245681285858, "learning_rate": 3.151234743116662e-05, "loss": 0.0013970142230391503, "step": 241380 }, { "epoch": 68.51830826000568, "grad_norm": 0.008313432335853577, "learning_rate": 3.150950894124326e-05, "loss": 0.00010442081838846207, "step": 241390 }, { "epoch": 68.52114674992903, "grad_norm": 0.006178335752338171, "learning_rate": 3.1506670451319895e-05, "loss": 0.0011907670646905898, "step": 241400 }, { "epoch": 68.5239852398524, "grad_norm": 0.0024459068663418293, "learning_rate": 3.1503831961396544e-05, "loss": 4.460029304027557e-05, "step": 241410 }, { "epoch": 68.52682372977576, "grad_norm": 0.001311842817813158, "learning_rate": 3.150099347147318e-05, "loss": 0.0002850320190191269, "step": 241420 }, { "epoch": 68.52966221969912, "grad_norm": 0.01189696416258812, "learning_rate": 3.149815498154981e-05, "loss": 0.0014952506870031356, "step": 241430 }, { "epoch": 68.53250070962248, "grad_norm": 0.005245222710072994, "learning_rate": 3.149531649162646e-05, "loss": 6.965063512325287e-05, "step": 241440 }, { "epoch": 68.53533919954585, "grad_norm": 0.0997343435883522, "learning_rate": 3.1492478001703096e-05, "loss": 8.694007992744446e-05, "step": 241450 }, { "epoch": 68.53817768946921, "grad_norm": 0.38168108463287354, "learning_rate": 3.148963951177974e-05, "loss": 0.0011399934068322183, "step": 241460 }, { "epoch": 68.54101617939256, "grad_norm": 0.003925653174519539, "learning_rate": 3.148680102185637e-05, "loss": 0.001093505509197712, "step": 241470 }, { "epoch": 68.54385466931592, "grad_norm": 0.009689796715974808, "learning_rate": 3.148396253193301e-05, "loss": 0.000280008465051651, "step": 241480 }, { "epoch": 68.54669315923928, "grad_norm": 0.11548539251089096, "learning_rate": 3.1481124042009654e-05, "loss": 0.0053018458187580105, "step": 241490 }, { "epoch": 68.54953164916265, "grad_norm": 0.011441230773925781, "learning_rate": 3.147828555208629e-05, "loss": 0.0001580921933054924, "step": 241500 }, { "epoch": 68.54953164916265, "eval_accuracy": 0.9837222610796719, "eval_loss": 0.07325015217065811, "eval_runtime": 37.6121, "eval_samples_per_second": 418.137, "eval_steps_per_second": 6.54, "step": 241500 }, { "epoch": 68.55237013908601, "grad_norm": 0.036182813346385956, "learning_rate": 3.147544706216293e-05, "loss": 0.0002533653751015663, "step": 241510 }, { "epoch": 68.55520862900937, "grad_norm": 0.758578360080719, "learning_rate": 3.147260857223957e-05, "loss": 0.00027342364192008974, "step": 241520 }, { "epoch": 68.55804711893273, "grad_norm": 9.623462677001953, "learning_rate": 3.1469770082316206e-05, "loss": 0.004910767078399658, "step": 241530 }, { "epoch": 68.56088560885608, "grad_norm": 0.11047637462615967, "learning_rate": 3.146693159239285e-05, "loss": 5.818251520395279e-05, "step": 241540 }, { "epoch": 68.56372409877945, "grad_norm": 0.017195964232087135, "learning_rate": 3.146409310246949e-05, "loss": 0.00033190697431564333, "step": 241550 }, { "epoch": 68.56656258870281, "grad_norm": 0.006990050431340933, "learning_rate": 3.1461254612546124e-05, "loss": 0.00021178796887397767, "step": 241560 }, { "epoch": 68.56940107862617, "grad_norm": 0.0018226711545139551, "learning_rate": 3.1458416122622765e-05, "loss": 0.00011740457266569137, "step": 241570 }, { "epoch": 68.57223956854953, "grad_norm": 0.4214693605899811, "learning_rate": 3.1455577632699406e-05, "loss": 0.00011174008250236511, "step": 241580 }, { "epoch": 68.5750780584729, "grad_norm": 0.009866012260317802, "learning_rate": 3.145273914277604e-05, "loss": 6.399601697921753e-05, "step": 241590 }, { "epoch": 68.57791654839626, "grad_norm": 0.07650236040353775, "learning_rate": 3.144990065285268e-05, "loss": 0.0001306101679801941, "step": 241600 }, { "epoch": 68.58075503831961, "grad_norm": 0.008367164060473442, "learning_rate": 3.1447062162929324e-05, "loss": 0.00018238015472888948, "step": 241610 }, { "epoch": 68.58359352824297, "grad_norm": 0.1669401228427887, "learning_rate": 3.1444223673005965e-05, "loss": 0.00027456339448690416, "step": 241620 }, { "epoch": 68.58643201816633, "grad_norm": 0.017111793160438538, "learning_rate": 3.14413851830826e-05, "loss": 8.81977379322052e-05, "step": 241630 }, { "epoch": 68.5892705080897, "grad_norm": 0.4017028212547302, "learning_rate": 3.143854669315924e-05, "loss": 0.0004727710038423538, "step": 241640 }, { "epoch": 68.59210899801306, "grad_norm": 0.005586893297731876, "learning_rate": 3.143570820323588e-05, "loss": 5.073249340057373e-05, "step": 241650 }, { "epoch": 68.59494748793642, "grad_norm": 0.0033792087342590094, "learning_rate": 3.143286971331252e-05, "loss": 0.00048000458627939224, "step": 241660 }, { "epoch": 68.59778597785977, "grad_norm": 0.011842811480164528, "learning_rate": 3.143003122338916e-05, "loss": 9.249597787857056e-05, "step": 241670 }, { "epoch": 68.60062446778313, "grad_norm": 0.028488028794527054, "learning_rate": 3.14271927334658e-05, "loss": 0.0003157496452331543, "step": 241680 }, { "epoch": 68.6034629577065, "grad_norm": 0.028649143874645233, "learning_rate": 3.1424354243542434e-05, "loss": 0.0005141250789165497, "step": 241690 }, { "epoch": 68.60630144762986, "grad_norm": 0.003998704720288515, "learning_rate": 3.1421515753619076e-05, "loss": 4.0683895349502563e-05, "step": 241700 }, { "epoch": 68.60913993755322, "grad_norm": 0.037539042532444, "learning_rate": 3.141867726369572e-05, "loss": 0.00018998868763446807, "step": 241710 }, { "epoch": 68.61197842747659, "grad_norm": 0.2937532663345337, "learning_rate": 3.141583877377235e-05, "loss": 0.0001223759725689888, "step": 241720 }, { "epoch": 68.61481691739995, "grad_norm": 0.004599826876074076, "learning_rate": 3.141300028384899e-05, "loss": 6.089955568313599e-05, "step": 241730 }, { "epoch": 68.6176554073233, "grad_norm": 0.015153001993894577, "learning_rate": 3.1410161793925634e-05, "loss": 9.131059050559998e-05, "step": 241740 }, { "epoch": 68.62049389724666, "grad_norm": 0.004790079314261675, "learning_rate": 3.1407323304002276e-05, "loss": 3.844592720270157e-05, "step": 241750 }, { "epoch": 68.62333238717002, "grad_norm": 0.01183332409709692, "learning_rate": 3.140448481407891e-05, "loss": 8.352547883987427e-05, "step": 241760 }, { "epoch": 68.62617087709339, "grad_norm": 0.006960852537304163, "learning_rate": 3.1401646324155545e-05, "loss": 3.083255141973495e-05, "step": 241770 }, { "epoch": 68.62900936701675, "grad_norm": 0.003843190846964717, "learning_rate": 3.139880783423219e-05, "loss": 7.023494690656662e-05, "step": 241780 }, { "epoch": 68.63184785694011, "grad_norm": 0.0015867928741499782, "learning_rate": 3.139596934430883e-05, "loss": 3.135986626148224e-05, "step": 241790 }, { "epoch": 68.63468634686348, "grad_norm": 0.0005363915115594864, "learning_rate": 3.139313085438547e-05, "loss": 9.955577552318573e-05, "step": 241800 }, { "epoch": 68.63752483678682, "grad_norm": 0.004332142882049084, "learning_rate": 3.139029236446211e-05, "loss": 6.279759109020233e-05, "step": 241810 }, { "epoch": 68.64036332671019, "grad_norm": 0.006347616668790579, "learning_rate": 3.1387453874538745e-05, "loss": 4.3258070945739746e-05, "step": 241820 }, { "epoch": 68.64320181663355, "grad_norm": 0.013800031505525112, "learning_rate": 3.1384615384615386e-05, "loss": 4.338230937719345e-05, "step": 241830 }, { "epoch": 68.64604030655691, "grad_norm": 0.01360583771020174, "learning_rate": 3.138177689469203e-05, "loss": 2.241712063550949e-05, "step": 241840 }, { "epoch": 68.64887879648028, "grad_norm": 0.004659988917410374, "learning_rate": 3.137893840476866e-05, "loss": 7.339213043451309e-05, "step": 241850 }, { "epoch": 68.65171728640364, "grad_norm": 0.006082908250391483, "learning_rate": 3.1376099914845304e-05, "loss": 2.1361373364925383e-05, "step": 241860 }, { "epoch": 68.65455577632699, "grad_norm": 0.06965763866901398, "learning_rate": 3.137326142492194e-05, "loss": 0.00044288039207458497, "step": 241870 }, { "epoch": 68.65739426625035, "grad_norm": 0.00933873001486063, "learning_rate": 3.1370422934998587e-05, "loss": 0.0002037210389971733, "step": 241880 }, { "epoch": 68.66023275617371, "grad_norm": 0.20902463793754578, "learning_rate": 3.136758444507522e-05, "loss": 0.00021392740309238433, "step": 241890 }, { "epoch": 68.66307124609708, "grad_norm": 0.012260379269719124, "learning_rate": 3.1364745955151856e-05, "loss": 0.00010172128677368164, "step": 241900 }, { "epoch": 68.66590973602044, "grad_norm": 0.025078753009438515, "learning_rate": 3.1361907465228504e-05, "loss": 0.000449318066239357, "step": 241910 }, { "epoch": 68.6687482259438, "grad_norm": 0.08994372934103012, "learning_rate": 3.135906897530514e-05, "loss": 0.0011855017393827439, "step": 241920 }, { "epoch": 68.67158671586716, "grad_norm": 0.008447092957794666, "learning_rate": 3.135623048538178e-05, "loss": 0.001216537319123745, "step": 241930 }, { "epoch": 68.67442520579051, "grad_norm": 0.17817886173725128, "learning_rate": 3.135339199545842e-05, "loss": 0.004593072831630707, "step": 241940 }, { "epoch": 68.67726369571388, "grad_norm": 0.013547426089644432, "learning_rate": 3.1350553505535056e-05, "loss": 0.006446942687034607, "step": 241950 }, { "epoch": 68.68010218563724, "grad_norm": 5.155890464782715, "learning_rate": 3.13477150156117e-05, "loss": 0.0010515820235013963, "step": 241960 }, { "epoch": 68.6829406755606, "grad_norm": 0.003459335071966052, "learning_rate": 3.134487652568833e-05, "loss": 8.767787367105483e-05, "step": 241970 }, { "epoch": 68.68577916548396, "grad_norm": 0.017875181511044502, "learning_rate": 3.134203803576497e-05, "loss": 0.003611842542886734, "step": 241980 }, { "epoch": 68.68861765540733, "grad_norm": 0.009660424664616585, "learning_rate": 3.1339199545841615e-05, "loss": 0.0004721464589238167, "step": 241990 }, { "epoch": 68.69145614533069, "grad_norm": 0.003825064515694976, "learning_rate": 3.133636105591825e-05, "loss": 9.090173989534378e-05, "step": 242000 }, { "epoch": 68.69145614533069, "eval_accuracy": 0.9857569784447129, "eval_loss": 0.06411635130643845, "eval_runtime": 47.2753, "eval_samples_per_second": 332.669, "eval_steps_per_second": 5.204, "step": 242000 }, { "epoch": 68.69429463525404, "grad_norm": 0.4811842441558838, "learning_rate": 3.133352256599489e-05, "loss": 0.0002745768055319786, "step": 242010 }, { "epoch": 68.6971331251774, "grad_norm": 1.6574711799621582, "learning_rate": 3.133068407607153e-05, "loss": 0.0004695907235145569, "step": 242020 }, { "epoch": 68.69997161510076, "grad_norm": 0.002969956723973155, "learning_rate": 3.1327845586148167e-05, "loss": 0.0003173667937517166, "step": 242030 }, { "epoch": 68.70281010502413, "grad_norm": 0.0031075216829776764, "learning_rate": 3.1325007096224815e-05, "loss": 0.0006364176049828529, "step": 242040 }, { "epoch": 68.70564859494749, "grad_norm": 0.03242626041173935, "learning_rate": 3.132216860630145e-05, "loss": 0.0003315985202789307, "step": 242050 }, { "epoch": 68.70848708487085, "grad_norm": 0.053545787930488586, "learning_rate": 3.1319330116378084e-05, "loss": 0.0003656165674328804, "step": 242060 }, { "epoch": 68.71132557479422, "grad_norm": 0.018688799813389778, "learning_rate": 3.131649162645473e-05, "loss": 0.0010098306462168693, "step": 242070 }, { "epoch": 68.71416406471756, "grad_norm": 0.22721067070960999, "learning_rate": 3.1313653136531367e-05, "loss": 8.029863238334656e-05, "step": 242080 }, { "epoch": 68.71700255464093, "grad_norm": 0.011697131209075451, "learning_rate": 3.131081464660801e-05, "loss": 3.4786947071552274e-05, "step": 242090 }, { "epoch": 68.71984104456429, "grad_norm": 0.02415064349770546, "learning_rate": 3.130797615668464e-05, "loss": 0.0002777004614472389, "step": 242100 }, { "epoch": 68.72267953448765, "grad_norm": 0.04498007893562317, "learning_rate": 3.1305137666761284e-05, "loss": 0.0002390049397945404, "step": 242110 }, { "epoch": 68.72551802441102, "grad_norm": 0.06359776109457016, "learning_rate": 3.1302299176837925e-05, "loss": 0.00024441834539175035, "step": 242120 }, { "epoch": 68.72835651433438, "grad_norm": 7.5053911209106445, "learning_rate": 3.129946068691456e-05, "loss": 0.000756523571908474, "step": 242130 }, { "epoch": 68.73119500425773, "grad_norm": 0.14491653442382812, "learning_rate": 3.12966221969912e-05, "loss": 0.0005408871918916702, "step": 242140 }, { "epoch": 68.73403349418109, "grad_norm": 0.1201905608177185, "learning_rate": 3.129378370706784e-05, "loss": 0.0001009371131658554, "step": 242150 }, { "epoch": 68.73687198410445, "grad_norm": 0.019530996680259705, "learning_rate": 3.129094521714448e-05, "loss": 0.001305459812283516, "step": 242160 }, { "epoch": 68.73971047402782, "grad_norm": 0.019178442656993866, "learning_rate": 3.1288106727221125e-05, "loss": 0.0001250132918357849, "step": 242170 }, { "epoch": 68.74254896395118, "grad_norm": 0.0023331083357334137, "learning_rate": 3.128526823729776e-05, "loss": 0.000135025754570961, "step": 242180 }, { "epoch": 68.74538745387454, "grad_norm": 0.005371957551687956, "learning_rate": 3.1282429747374395e-05, "loss": 0.0003245782107114792, "step": 242190 }, { "epoch": 68.7482259437979, "grad_norm": 0.1474253535270691, "learning_rate": 3.1279591257451036e-05, "loss": 8.322820067405701e-05, "step": 242200 }, { "epoch": 68.75106443372125, "grad_norm": 0.07895016670227051, "learning_rate": 3.127675276752768e-05, "loss": 0.0005824379622936249, "step": 242210 }, { "epoch": 68.75390292364462, "grad_norm": 0.37826231122016907, "learning_rate": 3.127391427760432e-05, "loss": 0.00023696348071098328, "step": 242220 }, { "epoch": 68.75674141356798, "grad_norm": 0.014521555043756962, "learning_rate": 3.127107578768095e-05, "loss": 0.00012265481054782868, "step": 242230 }, { "epoch": 68.75957990349134, "grad_norm": 0.009542717598378658, "learning_rate": 3.1268237297757595e-05, "loss": 7.680729031562805e-05, "step": 242240 }, { "epoch": 68.7624183934147, "grad_norm": 0.010511993430554867, "learning_rate": 3.1265398807834236e-05, "loss": 0.0001152973622083664, "step": 242250 }, { "epoch": 68.76525688333807, "grad_norm": 0.019641801714897156, "learning_rate": 3.126256031791087e-05, "loss": 0.00013644397258758545, "step": 242260 }, { "epoch": 68.76809537326143, "grad_norm": 0.015953466296195984, "learning_rate": 3.125972182798751e-05, "loss": 9.64626669883728e-05, "step": 242270 }, { "epoch": 68.77093386318478, "grad_norm": 0.01006372831761837, "learning_rate": 3.1256883338064153e-05, "loss": 8.606482297182083e-05, "step": 242280 }, { "epoch": 68.77377235310814, "grad_norm": 0.0015702582895755768, "learning_rate": 3.125404484814079e-05, "loss": 8.716844022274017e-05, "step": 242290 }, { "epoch": 68.7766108430315, "grad_norm": 0.022503184154629707, "learning_rate": 3.125120635821743e-05, "loss": 4.788227379322052e-05, "step": 242300 }, { "epoch": 68.77944933295487, "grad_norm": 0.002759424736723304, "learning_rate": 3.124836786829407e-05, "loss": 4.118718206882477e-05, "step": 242310 }, { "epoch": 68.78228782287823, "grad_norm": 0.05244138464331627, "learning_rate": 3.1245529378370705e-05, "loss": 0.00038196183741092684, "step": 242320 }, { "epoch": 68.7851263128016, "grad_norm": 0.04183056578040123, "learning_rate": 3.124269088844735e-05, "loss": 4.6337582170963286e-05, "step": 242330 }, { "epoch": 68.78796480272496, "grad_norm": 0.010177926160395145, "learning_rate": 3.123985239852399e-05, "loss": 0.0002345377579331398, "step": 242340 }, { "epoch": 68.7908032926483, "grad_norm": 0.0051698400638997555, "learning_rate": 3.123701390860063e-05, "loss": 6.66748732328415e-05, "step": 242350 }, { "epoch": 68.79364178257167, "grad_norm": 0.07212597131729126, "learning_rate": 3.1234175418677264e-05, "loss": 0.00019523017108440398, "step": 242360 }, { "epoch": 68.79648027249503, "grad_norm": 0.02083931304514408, "learning_rate": 3.1231336928753905e-05, "loss": 4.9522146582603455e-05, "step": 242370 }, { "epoch": 68.7993187624184, "grad_norm": 0.015513231977820396, "learning_rate": 3.122849843883055e-05, "loss": 0.00011592451483011246, "step": 242380 }, { "epoch": 68.80215725234176, "grad_norm": 0.0045471107587218285, "learning_rate": 3.122565994890718e-05, "loss": 0.0003768453374505043, "step": 242390 }, { "epoch": 68.80499574226512, "grad_norm": 0.003386152908205986, "learning_rate": 3.122282145898382e-05, "loss": 3.756769001483917e-05, "step": 242400 }, { "epoch": 68.80783423218847, "grad_norm": 1.6973230838775635, "learning_rate": 3.1219982969060464e-05, "loss": 0.00040118172764778135, "step": 242410 }, { "epoch": 68.81067272211183, "grad_norm": 0.005311233457177877, "learning_rate": 3.12171444791371e-05, "loss": 7.124133408069611e-05, "step": 242420 }, { "epoch": 68.8135112120352, "grad_norm": 0.002993655623868108, "learning_rate": 3.121430598921374e-05, "loss": 6.964057683944702e-05, "step": 242430 }, { "epoch": 68.81634970195856, "grad_norm": 0.030118808150291443, "learning_rate": 3.121146749929038e-05, "loss": 0.00030196160078048705, "step": 242440 }, { "epoch": 68.81918819188192, "grad_norm": 0.009569007903337479, "learning_rate": 3.1208629009367016e-05, "loss": 0.0004286518320441246, "step": 242450 }, { "epoch": 68.82202668180528, "grad_norm": 0.06224195286631584, "learning_rate": 3.120579051944366e-05, "loss": 0.00015289504081010817, "step": 242460 }, { "epoch": 68.82486517172865, "grad_norm": 0.0034106906969100237, "learning_rate": 3.12029520295203e-05, "loss": 0.0003752095624804497, "step": 242470 }, { "epoch": 68.827703661652, "grad_norm": 0.2083096206188202, "learning_rate": 3.1200113539596934e-05, "loss": 0.00016641095280647277, "step": 242480 }, { "epoch": 68.83054215157536, "grad_norm": 0.0006769439205527306, "learning_rate": 3.1197275049673575e-05, "loss": 3.285203129053116e-05, "step": 242490 }, { "epoch": 68.83338064149872, "grad_norm": 0.008462360128760338, "learning_rate": 3.119443655975021e-05, "loss": 0.00012510232627391816, "step": 242500 }, { "epoch": 68.83338064149872, "eval_accuracy": 0.9861384879506581, "eval_loss": 0.05869239196181297, "eval_runtime": 37.7362, "eval_samples_per_second": 416.762, "eval_steps_per_second": 6.519, "step": 242500 }, { "epoch": 68.83621913142208, "grad_norm": 0.23255464434623718, "learning_rate": 3.119159806982686e-05, "loss": 8.191168308258057e-05, "step": 242510 }, { "epoch": 68.83905762134545, "grad_norm": 0.0037564157973974943, "learning_rate": 3.118875957990349e-05, "loss": 8.834060281515121e-05, "step": 242520 }, { "epoch": 68.84189611126881, "grad_norm": 0.0035780291073024273, "learning_rate": 3.118592108998013e-05, "loss": 0.00013796407729387282, "step": 242530 }, { "epoch": 68.84473460119217, "grad_norm": 0.017490744590759277, "learning_rate": 3.1183082600056775e-05, "loss": 6.108898669481278e-05, "step": 242540 }, { "epoch": 68.84757309111552, "grad_norm": 0.002207046141847968, "learning_rate": 3.118024411013341e-05, "loss": 0.00016517248004674912, "step": 242550 }, { "epoch": 68.85041158103888, "grad_norm": 0.0008004004484973848, "learning_rate": 3.117740562021005e-05, "loss": 6.232969462871552e-05, "step": 242560 }, { "epoch": 68.85325007096225, "grad_norm": 0.023339292034506798, "learning_rate": 3.117456713028669e-05, "loss": 0.00011105816811323166, "step": 242570 }, { "epoch": 68.85608856088561, "grad_norm": 0.0075482530519366264, "learning_rate": 3.117172864036333e-05, "loss": 4.355832934379578e-05, "step": 242580 }, { "epoch": 68.85892705080897, "grad_norm": 0.008150560781359673, "learning_rate": 3.116889015043997e-05, "loss": 2.7609802782535552e-05, "step": 242590 }, { "epoch": 68.86176554073234, "grad_norm": 0.01786244660615921, "learning_rate": 3.11660516605166e-05, "loss": 3.223065286874771e-05, "step": 242600 }, { "epoch": 68.86460403065568, "grad_norm": 0.0028414311818778515, "learning_rate": 3.1163213170593244e-05, "loss": 1.8697604537010194e-05, "step": 242610 }, { "epoch": 68.86744252057905, "grad_norm": 0.00919427815824747, "learning_rate": 3.1160374680669886e-05, "loss": 8.41693952679634e-05, "step": 242620 }, { "epoch": 68.87028101050241, "grad_norm": 0.006469688843935728, "learning_rate": 3.115753619074652e-05, "loss": 0.000266626849770546, "step": 242630 }, { "epoch": 68.87311950042577, "grad_norm": 0.00553313409909606, "learning_rate": 3.115469770082317e-05, "loss": 4.973374307155609e-05, "step": 242640 }, { "epoch": 68.87595799034914, "grad_norm": 0.06373409181833267, "learning_rate": 3.11518592108998e-05, "loss": 0.0017977427691221238, "step": 242650 }, { "epoch": 68.8787964802725, "grad_norm": 0.008618527092039585, "learning_rate": 3.114902072097644e-05, "loss": 2.199951559305191e-05, "step": 242660 }, { "epoch": 68.88163497019586, "grad_norm": 0.005274506285786629, "learning_rate": 3.1146182231053086e-05, "loss": 3.536958247423172e-05, "step": 242670 }, { "epoch": 68.88447346011921, "grad_norm": 0.005058481357991695, "learning_rate": 3.114334374112972e-05, "loss": 3.0346028506755828e-05, "step": 242680 }, { "epoch": 68.88731195004257, "grad_norm": 0.0026690037921071053, "learning_rate": 3.114050525120636e-05, "loss": 4.139244556427002e-05, "step": 242690 }, { "epoch": 68.89015043996594, "grad_norm": 0.0020080816466361284, "learning_rate": 3.1137666761282996e-05, "loss": 0.0002670546993613243, "step": 242700 }, { "epoch": 68.8929889298893, "grad_norm": 0.04790991172194481, "learning_rate": 3.113482827135964e-05, "loss": 6.0774572193622586e-05, "step": 242710 }, { "epoch": 68.89582741981266, "grad_norm": 0.05255613476037979, "learning_rate": 3.113198978143628e-05, "loss": 5.7221390306949615e-05, "step": 242720 }, { "epoch": 68.89866590973602, "grad_norm": 0.014859034679830074, "learning_rate": 3.1129151291512914e-05, "loss": 2.6554055511951447e-05, "step": 242730 }, { "epoch": 68.90150439965939, "grad_norm": 0.0023273080587387085, "learning_rate": 3.1126312801589555e-05, "loss": 8.674897253513336e-05, "step": 242740 }, { "epoch": 68.90434288958274, "grad_norm": 0.017219185829162598, "learning_rate": 3.1123474311666196e-05, "loss": 0.0015318196266889573, "step": 242750 }, { "epoch": 68.9071813795061, "grad_norm": 0.005031108856201172, "learning_rate": 3.112063582174283e-05, "loss": 0.00018310118466615676, "step": 242760 }, { "epoch": 68.91001986942946, "grad_norm": 0.07246223092079163, "learning_rate": 3.111779733181948e-05, "loss": 0.00017639510333538054, "step": 242770 }, { "epoch": 68.91285835935282, "grad_norm": 0.017278175801038742, "learning_rate": 3.1114958841896114e-05, "loss": 7.840972393751145e-05, "step": 242780 }, { "epoch": 68.91569684927619, "grad_norm": 0.0021131730172783136, "learning_rate": 3.111212035197275e-05, "loss": 0.0005562050268054008, "step": 242790 }, { "epoch": 68.91853533919955, "grad_norm": 0.006762103643268347, "learning_rate": 3.110928186204939e-05, "loss": 0.00025100596249103544, "step": 242800 }, { "epoch": 68.92137382912291, "grad_norm": 0.0015959697775542736, "learning_rate": 3.110644337212603e-05, "loss": 0.0008496290072798729, "step": 242810 }, { "epoch": 68.92421231904626, "grad_norm": 3.631293296813965, "learning_rate": 3.110360488220267e-05, "loss": 0.0005738625302910804, "step": 242820 }, { "epoch": 68.92705080896962, "grad_norm": 0.046960316598415375, "learning_rate": 3.110076639227931e-05, "loss": 0.00012945104390382767, "step": 242830 }, { "epoch": 68.92988929889299, "grad_norm": 0.0007541571976616979, "learning_rate": 3.109792790235595e-05, "loss": 0.0001373903825879097, "step": 242840 }, { "epoch": 68.93272778881635, "grad_norm": 0.020764950662851334, "learning_rate": 3.109508941243259e-05, "loss": 6.834473460912705e-05, "step": 242850 }, { "epoch": 68.93556627873971, "grad_norm": 0.0028102328069508076, "learning_rate": 3.1092250922509224e-05, "loss": 5.982201546430588e-05, "step": 242860 }, { "epoch": 68.93840476866308, "grad_norm": 0.012091828510165215, "learning_rate": 3.1089412432585866e-05, "loss": 6.682109087705613e-05, "step": 242870 }, { "epoch": 68.94124325858643, "grad_norm": 0.002497222274541855, "learning_rate": 3.108657394266251e-05, "loss": 7.009748369455338e-05, "step": 242880 }, { "epoch": 68.94408174850979, "grad_norm": 0.0008020972018130124, "learning_rate": 3.108373545273914e-05, "loss": 0.000209689699113369, "step": 242890 }, { "epoch": 68.94692023843315, "grad_norm": 0.04874587059020996, "learning_rate": 3.108089696281578e-05, "loss": 9.271316230297089e-05, "step": 242900 }, { "epoch": 68.94975872835651, "grad_norm": 0.004763227887451649, "learning_rate": 3.1078058472892425e-05, "loss": 0.00011248812079429626, "step": 242910 }, { "epoch": 68.95259721827988, "grad_norm": 0.019736962392926216, "learning_rate": 3.107521998296906e-05, "loss": 4.089735448360443e-05, "step": 242920 }, { "epoch": 68.95543570820324, "grad_norm": 0.0034951618872582912, "learning_rate": 3.10723814930457e-05, "loss": 5.541238933801651e-05, "step": 242930 }, { "epoch": 68.9582741981266, "grad_norm": 0.0010560983791947365, "learning_rate": 3.106954300312234e-05, "loss": 8.884519338607788e-05, "step": 242940 }, { "epoch": 68.96111268804995, "grad_norm": 0.0029235309921205044, "learning_rate": 3.1066704513198976e-05, "loss": 0.0002868633717298508, "step": 242950 }, { "epoch": 68.96395117797331, "grad_norm": 12.493463516235352, "learning_rate": 3.106386602327562e-05, "loss": 0.002570381946861744, "step": 242960 }, { "epoch": 68.96678966789668, "grad_norm": 0.06918419897556305, "learning_rate": 3.106102753335226e-05, "loss": 0.004894288629293442, "step": 242970 }, { "epoch": 68.96962815782004, "grad_norm": 0.013690871186554432, "learning_rate": 3.10581890434289e-05, "loss": 0.00014212355017662048, "step": 242980 }, { "epoch": 68.9724666477434, "grad_norm": 0.0013680345145985484, "learning_rate": 3.1055350553505535e-05, "loss": 0.005081922933459282, "step": 242990 }, { "epoch": 68.97530513766677, "grad_norm": 0.07172010093927383, "learning_rate": 3.105251206358217e-05, "loss": 0.0002447132021188736, "step": 243000 }, { "epoch": 68.97530513766677, "eval_accuracy": 0.9829592420677815, "eval_loss": 0.06883736699819565, "eval_runtime": 36.9721, "eval_samples_per_second": 425.375, "eval_steps_per_second": 6.654, "step": 243000 }, { "epoch": 68.97814362759013, "grad_norm": 1.3536232709884644, "learning_rate": 3.104967357365882e-05, "loss": 0.007960078120231629, "step": 243010 }, { "epoch": 68.98098211751348, "grad_norm": 0.1402720957994461, "learning_rate": 3.104683508373545e-05, "loss": 0.0001432480290532112, "step": 243020 }, { "epoch": 68.98382060743684, "grad_norm": 0.00119569874368608, "learning_rate": 3.1043996593812094e-05, "loss": 0.00015398375689983367, "step": 243030 }, { "epoch": 68.9866590973602, "grad_norm": 0.0029363196808844805, "learning_rate": 3.1041158103888735e-05, "loss": 0.0002834178507328033, "step": 243040 }, { "epoch": 68.98949758728357, "grad_norm": 2.8856186866760254, "learning_rate": 3.103831961396537e-05, "loss": 0.0030129900202155112, "step": 243050 }, { "epoch": 68.99233607720693, "grad_norm": 0.1231979951262474, "learning_rate": 3.103548112404201e-05, "loss": 0.0006267545744776726, "step": 243060 }, { "epoch": 68.99517456713029, "grad_norm": 1.0723717212677002, "learning_rate": 3.103264263411865e-05, "loss": 0.00020436998456716538, "step": 243070 }, { "epoch": 68.99801305705365, "grad_norm": 0.017530443146824837, "learning_rate": 3.102980414419529e-05, "loss": 0.0018367134034633636, "step": 243080 }, { "epoch": 69.000851546977, "grad_norm": 0.046982619911432266, "learning_rate": 3.102696565427193e-05, "loss": 0.001828153058886528, "step": 243090 }, { "epoch": 69.00369003690037, "grad_norm": 10.495277404785156, "learning_rate": 3.102412716434856e-05, "loss": 0.002762712351977825, "step": 243100 }, { "epoch": 69.00652852682373, "grad_norm": 7.424793720245361, "learning_rate": 3.102128867442521e-05, "loss": 0.00220104455947876, "step": 243110 }, { "epoch": 69.00936701674709, "grad_norm": 0.004669375251978636, "learning_rate": 3.1018450184501846e-05, "loss": 0.0025709476321935654, "step": 243120 }, { "epoch": 69.01220550667045, "grad_norm": 0.032344188541173935, "learning_rate": 3.101561169457848e-05, "loss": 0.001253405399620533, "step": 243130 }, { "epoch": 69.01504399659382, "grad_norm": 0.00513497507199645, "learning_rate": 3.101277320465513e-05, "loss": 0.002724456787109375, "step": 243140 }, { "epoch": 69.01788248651717, "grad_norm": 0.0030470259953290224, "learning_rate": 3.100993471473176e-05, "loss": 0.01224232167005539, "step": 243150 }, { "epoch": 69.02072097644053, "grad_norm": 0.006712974514812231, "learning_rate": 3.1007096224808405e-05, "loss": 0.00011645667254924774, "step": 243160 }, { "epoch": 69.02355946636389, "grad_norm": 0.026279091835021973, "learning_rate": 3.1004257734885046e-05, "loss": 5.43719157576561e-05, "step": 243170 }, { "epoch": 69.02639795628725, "grad_norm": 0.009277606382966042, "learning_rate": 3.100141924496168e-05, "loss": 0.00011321548372507095, "step": 243180 }, { "epoch": 69.02923644621062, "grad_norm": 0.005183538421988487, "learning_rate": 3.099858075503832e-05, "loss": 8.183605968952179e-05, "step": 243190 }, { "epoch": 69.03207493613398, "grad_norm": 0.005188687238842249, "learning_rate": 3.099574226511496e-05, "loss": 3.830809146165848e-05, "step": 243200 }, { "epoch": 69.03491342605734, "grad_norm": 0.008444322273135185, "learning_rate": 3.09929037751916e-05, "loss": 4.724953323602677e-05, "step": 243210 }, { "epoch": 69.03775191598069, "grad_norm": 0.00261700083501637, "learning_rate": 3.099006528526824e-05, "loss": 5.186274647712707e-05, "step": 243220 }, { "epoch": 69.04059040590406, "grad_norm": 0.006677641533315182, "learning_rate": 3.0987226795344874e-05, "loss": 7.227174937725067e-05, "step": 243230 }, { "epoch": 69.04342889582742, "grad_norm": 0.0038948289584368467, "learning_rate": 3.098438830542152e-05, "loss": 3.4393183887004854e-05, "step": 243240 }, { "epoch": 69.04626738575078, "grad_norm": 0.0313723050057888, "learning_rate": 3.098154981549816e-05, "loss": 8.291006088256836e-05, "step": 243250 }, { "epoch": 69.04910587567414, "grad_norm": 0.4342646598815918, "learning_rate": 3.097871132557479e-05, "loss": 0.0014084657654166222, "step": 243260 }, { "epoch": 69.0519443655975, "grad_norm": 0.01803451217710972, "learning_rate": 3.097587283565144e-05, "loss": 4.6916492283344266e-05, "step": 243270 }, { "epoch": 69.05478285552087, "grad_norm": 0.07027722150087357, "learning_rate": 3.0973034345728074e-05, "loss": 7.187444716691971e-05, "step": 243280 }, { "epoch": 69.05762134544422, "grad_norm": 0.0011800209758803248, "learning_rate": 3.0970195855804715e-05, "loss": 8.402541279792786e-05, "step": 243290 }, { "epoch": 69.06045983536758, "grad_norm": 0.014574877917766571, "learning_rate": 3.096735736588135e-05, "loss": 9.168479591608048e-05, "step": 243300 }, { "epoch": 69.06329832529094, "grad_norm": 0.01110127754509449, "learning_rate": 3.096451887595799e-05, "loss": 3.749784082174301e-05, "step": 243310 }, { "epoch": 69.0661368152143, "grad_norm": 0.001062567811459303, "learning_rate": 3.096168038603463e-05, "loss": 6.51344656944275e-05, "step": 243320 }, { "epoch": 69.06897530513767, "grad_norm": 0.02150549367070198, "learning_rate": 3.095884189611127e-05, "loss": 3.0419230461120607e-05, "step": 243330 }, { "epoch": 69.07181379506103, "grad_norm": 0.0064528887160122395, "learning_rate": 3.095600340618791e-05, "loss": 2.744346857070923e-05, "step": 243340 }, { "epoch": 69.07465228498438, "grad_norm": 0.10127377510070801, "learning_rate": 3.095316491626455e-05, "loss": 0.00019623097032308578, "step": 243350 }, { "epoch": 69.07749077490774, "grad_norm": 0.06449621170759201, "learning_rate": 3.0950326426341185e-05, "loss": 0.011251552402973175, "step": 243360 }, { "epoch": 69.08032926483111, "grad_norm": 0.0030606836080551147, "learning_rate": 3.0947487936417826e-05, "loss": 2.6095658540725708e-05, "step": 243370 }, { "epoch": 69.08316775475447, "grad_norm": 0.012146689929068089, "learning_rate": 3.094464944649447e-05, "loss": 4.69014048576355e-05, "step": 243380 }, { "epoch": 69.08600624467783, "grad_norm": 0.004544905386865139, "learning_rate": 3.09418109565711e-05, "loss": 2.8413347899913786e-05, "step": 243390 }, { "epoch": 69.0888447346012, "grad_norm": 0.014995813369750977, "learning_rate": 3.093897246664775e-05, "loss": 2.859402447938919e-05, "step": 243400 }, { "epoch": 69.09168322452456, "grad_norm": 0.002788179786875844, "learning_rate": 3.0936133976724385e-05, "loss": 2.607889473438263e-05, "step": 243410 }, { "epoch": 69.09452171444791, "grad_norm": 0.003601699834689498, "learning_rate": 3.093329548680102e-05, "loss": 5.360729992389679e-05, "step": 243420 }, { "epoch": 69.09736020437127, "grad_norm": 0.04444083571434021, "learning_rate": 3.093045699687766e-05, "loss": 3.002900630235672e-05, "step": 243430 }, { "epoch": 69.10019869429463, "grad_norm": 0.004265183582901955, "learning_rate": 3.09276185069543e-05, "loss": 4.31060791015625e-05, "step": 243440 }, { "epoch": 69.103037184218, "grad_norm": 0.01616581343114376, "learning_rate": 3.0924780017030944e-05, "loss": 2.5797635316848755e-05, "step": 243450 }, { "epoch": 69.10587567414136, "grad_norm": 0.0020889868028461933, "learning_rate": 3.092194152710758e-05, "loss": 4.3017230927944185e-05, "step": 243460 }, { "epoch": 69.10871416406472, "grad_norm": 0.005206017289310694, "learning_rate": 3.091910303718422e-05, "loss": 1.5937723219394684e-05, "step": 243470 }, { "epoch": 69.11155265398808, "grad_norm": 0.023873552680015564, "learning_rate": 3.091626454726086e-05, "loss": 5.0406157970428464e-05, "step": 243480 }, { "epoch": 69.11439114391143, "grad_norm": 0.003808765672147274, "learning_rate": 3.0913426057337496e-05, "loss": 5.761217325925827e-05, "step": 243490 }, { "epoch": 69.1172296338348, "grad_norm": 0.015097202733159065, "learning_rate": 3.091058756741414e-05, "loss": 2.978108823299408e-05, "step": 243500 }, { "epoch": 69.1172296338348, "eval_accuracy": 0.9865199974566033, "eval_loss": 0.056997716426849365, "eval_runtime": 38.0569, "eval_samples_per_second": 413.25, "eval_steps_per_second": 6.464, "step": 243500 }, { "epoch": 69.12006812375816, "grad_norm": 0.011355108581483364, "learning_rate": 3.090774907749078e-05, "loss": 8.350536227226257e-05, "step": 243510 }, { "epoch": 69.12290661368152, "grad_norm": 0.007608204148709774, "learning_rate": 3.090491058756741e-05, "loss": 4.566442221403122e-05, "step": 243520 }, { "epoch": 69.12574510360488, "grad_norm": 0.004255729261785746, "learning_rate": 3.0902072097644054e-05, "loss": 5.8710016310215e-05, "step": 243530 }, { "epoch": 69.12858359352825, "grad_norm": 0.0018056733533740044, "learning_rate": 3.0899233607720696e-05, "loss": 7.945559918880463e-05, "step": 243540 }, { "epoch": 69.13142208345161, "grad_norm": 0.0008184313774108887, "learning_rate": 3.089639511779733e-05, "loss": 1.7606467008590698e-05, "step": 243550 }, { "epoch": 69.13426057337496, "grad_norm": 0.003409018972888589, "learning_rate": 3.089355662787397e-05, "loss": 3.674812614917755e-05, "step": 243560 }, { "epoch": 69.13709906329832, "grad_norm": 0.0017113859066739678, "learning_rate": 3.089071813795061e-05, "loss": 2.5679171085357665e-05, "step": 243570 }, { "epoch": 69.13993755322169, "grad_norm": 0.007647681515663862, "learning_rate": 3.0887879648027254e-05, "loss": 3.987252712249756e-05, "step": 243580 }, { "epoch": 69.14277604314505, "grad_norm": 0.0010262933792546391, "learning_rate": 3.088504115810389e-05, "loss": 2.4228356778621674e-05, "step": 243590 }, { "epoch": 69.14561453306841, "grad_norm": 0.004606996662914753, "learning_rate": 3.088220266818053e-05, "loss": 4.091579467058182e-05, "step": 243600 }, { "epoch": 69.14845302299177, "grad_norm": 0.0019297334365546703, "learning_rate": 3.087936417825717e-05, "loss": 1.3574957847595215e-05, "step": 243610 }, { "epoch": 69.15129151291512, "grad_norm": 0.020292961969971657, "learning_rate": 3.0876525688333806e-05, "loss": 5.631837993860245e-05, "step": 243620 }, { "epoch": 69.15413000283849, "grad_norm": 0.025928771123290062, "learning_rate": 3.087368719841045e-05, "loss": 3.312397748231888e-05, "step": 243630 }, { "epoch": 69.15696849276185, "grad_norm": 0.005530406720936298, "learning_rate": 3.087084870848709e-05, "loss": 4.41528856754303e-05, "step": 243640 }, { "epoch": 69.15980698268521, "grad_norm": 0.04653564840555191, "learning_rate": 3.0868010218563724e-05, "loss": 3.1330250203609465e-05, "step": 243650 }, { "epoch": 69.16264547260857, "grad_norm": 0.01603107526898384, "learning_rate": 3.0865171728640365e-05, "loss": 5.671605467796326e-05, "step": 243660 }, { "epoch": 69.16548396253194, "grad_norm": 0.001649594632908702, "learning_rate": 3.0862333238717006e-05, "loss": 1.4891289174556732e-05, "step": 243670 }, { "epoch": 69.1683224524553, "grad_norm": 0.001571886008605361, "learning_rate": 3.085949474879364e-05, "loss": 1.5079043805599213e-05, "step": 243680 }, { "epoch": 69.17116094237865, "grad_norm": 0.022040750831365585, "learning_rate": 3.085665625887028e-05, "loss": 7.726605981588364e-05, "step": 243690 }, { "epoch": 69.17399943230201, "grad_norm": 0.003291365457698703, "learning_rate": 3.0853817768946924e-05, "loss": 5.266573280096054e-05, "step": 243700 }, { "epoch": 69.17683792222537, "grad_norm": 0.7599106431007385, "learning_rate": 3.0850979279023565e-05, "loss": 0.0001499321311712265, "step": 243710 }, { "epoch": 69.17967641214874, "grad_norm": 0.023566357791423798, "learning_rate": 3.08481407891002e-05, "loss": 8.88412818312645e-05, "step": 243720 }, { "epoch": 69.1825149020721, "grad_norm": 0.04715610295534134, "learning_rate": 3.0845302299176834e-05, "loss": 5.1248446106910704e-05, "step": 243730 }, { "epoch": 69.18535339199546, "grad_norm": 0.012561875395476818, "learning_rate": 3.084246380925348e-05, "loss": 0.00021852627396583556, "step": 243740 }, { "epoch": 69.18819188191883, "grad_norm": 0.014254236593842506, "learning_rate": 3.083962531933012e-05, "loss": 8.588545024394989e-05, "step": 243750 }, { "epoch": 69.19103037184217, "grad_norm": 0.0639142394065857, "learning_rate": 3.083678682940676e-05, "loss": 0.00014401692897081375, "step": 243760 }, { "epoch": 69.19386886176554, "grad_norm": 0.0406937412917614, "learning_rate": 3.08339483394834e-05, "loss": 5.1264651119709015e-05, "step": 243770 }, { "epoch": 69.1967073516889, "grad_norm": 0.10923203080892563, "learning_rate": 3.0831109849560034e-05, "loss": 8.981283754110337e-05, "step": 243780 }, { "epoch": 69.19954584161226, "grad_norm": 0.012497109360992908, "learning_rate": 3.0828271359636676e-05, "loss": 0.00037806928157806395, "step": 243790 }, { "epoch": 69.20238433153563, "grad_norm": 0.01406451128423214, "learning_rate": 3.082543286971332e-05, "loss": 9.055975824594498e-05, "step": 243800 }, { "epoch": 69.20522282145899, "grad_norm": 0.14122474193572998, "learning_rate": 3.082259437978995e-05, "loss": 0.00015476662665605545, "step": 243810 }, { "epoch": 69.20806131138234, "grad_norm": 0.011546478606760502, "learning_rate": 3.081975588986659e-05, "loss": 3.2271444797515866e-05, "step": 243820 }, { "epoch": 69.2108998013057, "grad_norm": 0.0018174489960074425, "learning_rate": 3.081691739994323e-05, "loss": 0.0008863596245646477, "step": 243830 }, { "epoch": 69.21373829122906, "grad_norm": 0.0015949612716212869, "learning_rate": 3.081407891001987e-05, "loss": 0.0001360910013318062, "step": 243840 }, { "epoch": 69.21657678115243, "grad_norm": 0.0011469029122963548, "learning_rate": 3.081124042009651e-05, "loss": 9.142998605966568e-05, "step": 243850 }, { "epoch": 69.21941527107579, "grad_norm": 0.008108435198664665, "learning_rate": 3.0808401930173145e-05, "loss": 4.557706415653229e-05, "step": 243860 }, { "epoch": 69.22225376099915, "grad_norm": 0.015037999488413334, "learning_rate": 3.080556344024979e-05, "loss": 3.2432377338409424e-05, "step": 243870 }, { "epoch": 69.22509225092251, "grad_norm": 0.0202256441116333, "learning_rate": 3.080272495032643e-05, "loss": 8.280482143163681e-05, "step": 243880 }, { "epoch": 69.22793074084586, "grad_norm": 0.0024150244425982237, "learning_rate": 3.079988646040306e-05, "loss": 0.00012115407735109329, "step": 243890 }, { "epoch": 69.23076923076923, "grad_norm": 0.0006517941947095096, "learning_rate": 3.079704797047971e-05, "loss": 6.65806233882904e-05, "step": 243900 }, { "epoch": 69.23360772069259, "grad_norm": 4.153613567352295, "learning_rate": 3.0794209480556345e-05, "loss": 0.0012334270402789115, "step": 243910 }, { "epoch": 69.23644621061595, "grad_norm": 0.008239037357270718, "learning_rate": 3.0791370990632987e-05, "loss": 6.956011056900025e-05, "step": 243920 }, { "epoch": 69.23928470053932, "grad_norm": 0.01807212084531784, "learning_rate": 3.078853250070962e-05, "loss": 0.000173354335129261, "step": 243930 }, { "epoch": 69.24212319046268, "grad_norm": 0.006042747758328915, "learning_rate": 3.078569401078626e-05, "loss": 0.0025398511439561843, "step": 243940 }, { "epoch": 69.24496168038604, "grad_norm": 0.0374942421913147, "learning_rate": 3.0782855520862904e-05, "loss": 4.8214010894298555e-05, "step": 243950 }, { "epoch": 69.24780017030939, "grad_norm": 0.005208685528486967, "learning_rate": 3.078001703093954e-05, "loss": 0.0008780118077993393, "step": 243960 }, { "epoch": 69.25063866023275, "grad_norm": 1.7219430208206177, "learning_rate": 3.077717854101618e-05, "loss": 0.0007113805040717125, "step": 243970 }, { "epoch": 69.25347715015612, "grad_norm": 0.00774106802418828, "learning_rate": 3.077434005109282e-05, "loss": 0.00018790010362863542, "step": 243980 }, { "epoch": 69.25631564007948, "grad_norm": 0.3831535875797272, "learning_rate": 3.0771501561169456e-05, "loss": 0.0048131212592124935, "step": 243990 }, { "epoch": 69.25915413000284, "grad_norm": 0.012105902656912804, "learning_rate": 3.0768663071246104e-05, "loss": 0.001032901369035244, "step": 244000 }, { "epoch": 69.25915413000284, "eval_accuracy": 0.9836586761620144, "eval_loss": 0.07071933895349503, "eval_runtime": 38.6834, "eval_samples_per_second": 406.557, "eval_steps_per_second": 6.359, "step": 244000 }, { "epoch": 69.2619926199262, "grad_norm": 0.05743422731757164, "learning_rate": 3.076582458132274e-05, "loss": 0.00035367831587791444, "step": 244010 }, { "epoch": 69.26483110984957, "grad_norm": 0.9464497566223145, "learning_rate": 3.076298609139937e-05, "loss": 0.00018948223441839218, "step": 244020 }, { "epoch": 69.26766959977292, "grad_norm": 0.006106072571128607, "learning_rate": 3.0760147601476015e-05, "loss": 4.55513596534729e-05, "step": 244030 }, { "epoch": 69.27050808969628, "grad_norm": 0.40484729409217834, "learning_rate": 3.0757309111552656e-05, "loss": 7.27316364645958e-05, "step": 244040 }, { "epoch": 69.27334657961964, "grad_norm": 0.00880687590688467, "learning_rate": 3.07544706216293e-05, "loss": 0.0002455919981002808, "step": 244050 }, { "epoch": 69.276185069543, "grad_norm": 0.004653036128729582, "learning_rate": 3.075163213170593e-05, "loss": 0.00036607924848794935, "step": 244060 }, { "epoch": 69.27902355946637, "grad_norm": 0.15131202340126038, "learning_rate": 3.074879364178257e-05, "loss": 0.0002718428149819374, "step": 244070 }, { "epoch": 69.28186204938973, "grad_norm": 0.12352675199508667, "learning_rate": 3.0745955151859215e-05, "loss": 0.00025233682245016096, "step": 244080 }, { "epoch": 69.28470053931308, "grad_norm": 1.655372142791748, "learning_rate": 3.074311666193585e-05, "loss": 0.001298394426703453, "step": 244090 }, { "epoch": 69.28753902923644, "grad_norm": 0.06432979553937912, "learning_rate": 3.074027817201249e-05, "loss": 0.016891737282276154, "step": 244100 }, { "epoch": 69.2903775191598, "grad_norm": 0.41776108741760254, "learning_rate": 3.073743968208913e-05, "loss": 0.004207803681492805, "step": 244110 }, { "epoch": 69.29321600908317, "grad_norm": 0.18259036540985107, "learning_rate": 3.0734601192165767e-05, "loss": 0.002985391952097416, "step": 244120 }, { "epoch": 69.29605449900653, "grad_norm": 0.04009053856134415, "learning_rate": 3.073176270224241e-05, "loss": 0.0003870762884616852, "step": 244130 }, { "epoch": 69.2988929889299, "grad_norm": 0.012296165339648724, "learning_rate": 3.072892421231905e-05, "loss": 0.00020198728889226915, "step": 244140 }, { "epoch": 69.30173147885326, "grad_norm": 0.0012451255461201072, "learning_rate": 3.0726085722395684e-05, "loss": 0.0003355326130986214, "step": 244150 }, { "epoch": 69.3045699687766, "grad_norm": 0.005489756353199482, "learning_rate": 3.0723247232472325e-05, "loss": 3.404188901185989e-05, "step": 244160 }, { "epoch": 69.30740845869997, "grad_norm": 0.008605108596384525, "learning_rate": 3.072040874254897e-05, "loss": 0.00012925788760185242, "step": 244170 }, { "epoch": 69.31024694862333, "grad_norm": 0.013676597736775875, "learning_rate": 3.071757025262561e-05, "loss": 0.00024410076439380647, "step": 244180 }, { "epoch": 69.3130854385467, "grad_norm": 0.011286970227956772, "learning_rate": 3.071473176270224e-05, "loss": 7.722452282905578e-05, "step": 244190 }, { "epoch": 69.31592392847006, "grad_norm": 0.007281470578163862, "learning_rate": 3.0711893272778884e-05, "loss": 0.00012090150266885757, "step": 244200 }, { "epoch": 69.31876241839342, "grad_norm": 0.014895396307110786, "learning_rate": 3.0709054782855525e-05, "loss": 0.00012845154851675032, "step": 244210 }, { "epoch": 69.32160090831678, "grad_norm": 0.07094444334506989, "learning_rate": 3.070621629293216e-05, "loss": 0.00015880856662988662, "step": 244220 }, { "epoch": 69.32443939824013, "grad_norm": 0.007743909955024719, "learning_rate": 3.07033778030088e-05, "loss": 7.471293210983276e-05, "step": 244230 }, { "epoch": 69.3272778881635, "grad_norm": 0.041655704379081726, "learning_rate": 3.070053931308544e-05, "loss": 5.0813518464565276e-05, "step": 244240 }, { "epoch": 69.33011637808686, "grad_norm": 12.01974105834961, "learning_rate": 3.069770082316208e-05, "loss": 0.0043497256934642795, "step": 244250 }, { "epoch": 69.33295486801022, "grad_norm": 0.26811692118644714, "learning_rate": 3.069486233323872e-05, "loss": 0.0005086960271000862, "step": 244260 }, { "epoch": 69.33579335793358, "grad_norm": 0.11552761495113373, "learning_rate": 3.069202384331536e-05, "loss": 0.0015898533165454865, "step": 244270 }, { "epoch": 69.33863184785695, "grad_norm": 0.0023990371264517307, "learning_rate": 3.0689185353391995e-05, "loss": 0.00031293705105781553, "step": 244280 }, { "epoch": 69.34147033778031, "grad_norm": 0.014411958865821362, "learning_rate": 3.0686346863468636e-05, "loss": 0.002335033379495144, "step": 244290 }, { "epoch": 69.34430882770366, "grad_norm": 0.009439369663596153, "learning_rate": 3.068350837354528e-05, "loss": 0.0037272654473781584, "step": 244300 }, { "epoch": 69.34714731762702, "grad_norm": 0.1997731328010559, "learning_rate": 3.068066988362191e-05, "loss": 0.001926329918205738, "step": 244310 }, { "epoch": 69.34998580755038, "grad_norm": 0.001867144019342959, "learning_rate": 3.0677831393698553e-05, "loss": 0.00012035835534334183, "step": 244320 }, { "epoch": 69.35282429747375, "grad_norm": 0.26810964941978455, "learning_rate": 3.0675276752767527e-05, "loss": 0.0047422852367162704, "step": 244330 }, { "epoch": 69.35566278739711, "grad_norm": 0.13369227945804596, "learning_rate": 3.067243826284417e-05, "loss": 0.00040435511618852615, "step": 244340 }, { "epoch": 69.35850127732047, "grad_norm": 0.0030817771330475807, "learning_rate": 3.066959977292081e-05, "loss": 0.006781357526779175, "step": 244350 }, { "epoch": 69.36133976724382, "grad_norm": 0.23143252730369568, "learning_rate": 3.0666761282997444e-05, "loss": 0.0013342486694455148, "step": 244360 }, { "epoch": 69.36417825716718, "grad_norm": 0.008452896028757095, "learning_rate": 3.0663922793074085e-05, "loss": 0.00016696471720933914, "step": 244370 }, { "epoch": 69.36701674709055, "grad_norm": 0.007803377229720354, "learning_rate": 3.066108430315073e-05, "loss": 0.00018735043704509735, "step": 244380 }, { "epoch": 69.36985523701391, "grad_norm": 0.0016160828527063131, "learning_rate": 3.065824581322737e-05, "loss": 0.00039152577519416807, "step": 244390 }, { "epoch": 69.37269372693727, "grad_norm": 0.004438434727489948, "learning_rate": 3.065569117229634e-05, "loss": 0.008143705129623414, "step": 244400 }, { "epoch": 69.37553221686063, "grad_norm": 0.009181031957268715, "learning_rate": 3.0652852682372976e-05, "loss": 0.005828468501567841, "step": 244410 }, { "epoch": 69.378370706784, "grad_norm": 0.009452344849705696, "learning_rate": 3.065001419244962e-05, "loss": 0.0007900042459368706, "step": 244420 }, { "epoch": 69.38120919670735, "grad_norm": 0.07185620814561844, "learning_rate": 3.064717570252626e-05, "loss": 0.0006252117455005646, "step": 244430 }, { "epoch": 69.38404768663071, "grad_norm": 0.09735696017742157, "learning_rate": 3.06443372126029e-05, "loss": 0.00010110456496477127, "step": 244440 }, { "epoch": 69.38688617655407, "grad_norm": 0.00464964285492897, "learning_rate": 3.0641498722679535e-05, "loss": 0.00031626317650079727, "step": 244450 }, { "epoch": 69.38972466647743, "grad_norm": 0.019565002992749214, "learning_rate": 3.0638660232756176e-05, "loss": 0.0022161688655614855, "step": 244460 }, { "epoch": 69.3925631564008, "grad_norm": 0.03497663512825966, "learning_rate": 3.063582174283282e-05, "loss": 0.0004052998498082161, "step": 244470 }, { "epoch": 69.39540164632416, "grad_norm": 0.01285458728671074, "learning_rate": 3.063298325290945e-05, "loss": 8.947364985942841e-05, "step": 244480 }, { "epoch": 69.39824013624752, "grad_norm": 1.907827615737915, "learning_rate": 3.063014476298609e-05, "loss": 0.003966686874628067, "step": 244490 }, { "epoch": 69.40107862617087, "grad_norm": 0.040587350726127625, "learning_rate": 3.0627306273062735e-05, "loss": 0.00014011971652507781, "step": 244500 }, { "epoch": 69.40107862617087, "eval_accuracy": 0.9848667895975075, "eval_loss": 0.06234279274940491, "eval_runtime": 37.6921, "eval_samples_per_second": 417.249, "eval_steps_per_second": 6.527, "step": 244500 }, { "epoch": 69.40391711609423, "grad_norm": 0.18811126053333282, "learning_rate": 3.062446778313937e-05, "loss": 0.0005702180787920952, "step": 244510 }, { "epoch": 69.4067556060176, "grad_norm": 0.008166591636836529, "learning_rate": 3.062162929321601e-05, "loss": 0.0005157725885510444, "step": 244520 }, { "epoch": 69.40959409594096, "grad_norm": 0.008310784585773945, "learning_rate": 3.061879080329265e-05, "loss": 0.00029856376349925997, "step": 244530 }, { "epoch": 69.41243258586432, "grad_norm": 0.02341771498322487, "learning_rate": 3.061595231336929e-05, "loss": 0.00016513094305992126, "step": 244540 }, { "epoch": 69.41527107578769, "grad_norm": 0.7186065912246704, "learning_rate": 3.061311382344593e-05, "loss": 0.0007448185235261918, "step": 244550 }, { "epoch": 69.41810956571103, "grad_norm": 0.01277498621493578, "learning_rate": 3.061027533352257e-05, "loss": 0.00017932187765836715, "step": 244560 }, { "epoch": 69.4209480556344, "grad_norm": 0.027234096080064774, "learning_rate": 3.060743684359921e-05, "loss": 0.0002560708671808243, "step": 244570 }, { "epoch": 69.42378654555776, "grad_norm": 0.0020753671415150166, "learning_rate": 3.0604598353675845e-05, "loss": 0.007891638576984406, "step": 244580 }, { "epoch": 69.42662503548112, "grad_norm": 0.49205294251441956, "learning_rate": 3.060175986375248e-05, "loss": 0.00045906566083431244, "step": 244590 }, { "epoch": 69.42946352540449, "grad_norm": 0.10492647439241409, "learning_rate": 3.059892137382913e-05, "loss": 0.00019093900918960572, "step": 244600 }, { "epoch": 69.43230201532785, "grad_norm": 0.01755586639046669, "learning_rate": 3.059608288390576e-05, "loss": 0.00020887032151222228, "step": 244610 }, { "epoch": 69.43514050525121, "grad_norm": 0.006069367751479149, "learning_rate": 3.0593244393982404e-05, "loss": 5.749259144067764e-05, "step": 244620 }, { "epoch": 69.43797899517456, "grad_norm": 17.257463455200195, "learning_rate": 3.0590405904059045e-05, "loss": 0.0038564659655094145, "step": 244630 }, { "epoch": 69.44081748509792, "grad_norm": 0.010290609672665596, "learning_rate": 3.058756741413568e-05, "loss": 0.0008019601926207542, "step": 244640 }, { "epoch": 69.44365597502129, "grad_norm": 0.1699010729789734, "learning_rate": 3.058472892421232e-05, "loss": 9.96103510260582e-05, "step": 244650 }, { "epoch": 69.44649446494465, "grad_norm": 0.04504885897040367, "learning_rate": 3.058189043428896e-05, "loss": 7.928106933832168e-05, "step": 244660 }, { "epoch": 69.44933295486801, "grad_norm": 0.06866581737995148, "learning_rate": 3.05790519443656e-05, "loss": 0.0001509740948677063, "step": 244670 }, { "epoch": 69.45217144479138, "grad_norm": 0.021269889548420906, "learning_rate": 3.057621345444224e-05, "loss": 9.553376585245133e-05, "step": 244680 }, { "epoch": 69.45500993471474, "grad_norm": 0.015123716555535793, "learning_rate": 3.057337496451887e-05, "loss": 0.0033439867198467254, "step": 244690 }, { "epoch": 69.45784842463809, "grad_norm": 0.3310700058937073, "learning_rate": 3.0570536474595515e-05, "loss": 9.135138243436814e-05, "step": 244700 }, { "epoch": 69.46068691456145, "grad_norm": 0.0021623647771775723, "learning_rate": 3.0567697984672156e-05, "loss": 0.00015775077044963837, "step": 244710 }, { "epoch": 69.46352540448481, "grad_norm": 0.0129544110968709, "learning_rate": 3.056485949474879e-05, "loss": 0.0001811247318983078, "step": 244720 }, { "epoch": 69.46636389440818, "grad_norm": 2.4067442417144775, "learning_rate": 3.056202100482544e-05, "loss": 0.005659329891204834, "step": 244730 }, { "epoch": 69.46920238433154, "grad_norm": 0.756753146648407, "learning_rate": 3.0559182514902073e-05, "loss": 0.000356297567486763, "step": 244740 }, { "epoch": 69.4720408742549, "grad_norm": 0.05481601506471634, "learning_rate": 3.055634402497871e-05, "loss": 0.0005230274051427841, "step": 244750 }, { "epoch": 69.47487936417826, "grad_norm": 0.0012688550632447004, "learning_rate": 3.0553505535055356e-05, "loss": 0.005990613996982574, "step": 244760 }, { "epoch": 69.47771785410161, "grad_norm": 0.0038886675611138344, "learning_rate": 3.055066704513199e-05, "loss": 5.3746812045574186e-05, "step": 244770 }, { "epoch": 69.48055634402498, "grad_norm": 10.5511474609375, "learning_rate": 3.054782855520863e-05, "loss": 0.009920582920312882, "step": 244780 }, { "epoch": 69.48339483394834, "grad_norm": 0.1115889772772789, "learning_rate": 3.054499006528527e-05, "loss": 0.00022788327187299727, "step": 244790 }, { "epoch": 69.4862333238717, "grad_norm": 0.001237515709362924, "learning_rate": 3.054215157536191e-05, "loss": 6.93395733833313e-05, "step": 244800 }, { "epoch": 69.48907181379506, "grad_norm": 0.0013668345054611564, "learning_rate": 3.053931308543855e-05, "loss": 0.00016221590340137482, "step": 244810 }, { "epoch": 69.49191030371843, "grad_norm": 0.004601551219820976, "learning_rate": 3.0536474595515184e-05, "loss": 5.229003727436066e-05, "step": 244820 }, { "epoch": 69.49474879364178, "grad_norm": 0.0013641807017847896, "learning_rate": 3.0533636105591825e-05, "loss": 3.515295684337616e-05, "step": 244830 }, { "epoch": 69.49758728356514, "grad_norm": 0.002692670561373234, "learning_rate": 3.053079761566847e-05, "loss": 0.00029602479189634324, "step": 244840 }, { "epoch": 69.5004257734885, "grad_norm": 0.16503724455833435, "learning_rate": 3.05279591257451e-05, "loss": 0.00019739046692848205, "step": 244850 }, { "epoch": 69.50326426341186, "grad_norm": 0.003367594676092267, "learning_rate": 3.052512063582175e-05, "loss": 0.00010484661906957626, "step": 244860 }, { "epoch": 69.50610275333523, "grad_norm": 0.006882688961923122, "learning_rate": 3.0522282145898384e-05, "loss": 3.7086941301822665e-05, "step": 244870 }, { "epoch": 69.50894124325859, "grad_norm": 0.046379491686820984, "learning_rate": 3.051944365597502e-05, "loss": 0.00016853325068950654, "step": 244880 }, { "epoch": 69.51177973318195, "grad_norm": 0.04271809384226799, "learning_rate": 3.051660516605166e-05, "loss": 8.704941719770432e-05, "step": 244890 }, { "epoch": 69.5146182231053, "grad_norm": 0.04647555947303772, "learning_rate": 3.05137666761283e-05, "loss": 0.0009976314380764962, "step": 244900 }, { "epoch": 69.51745671302866, "grad_norm": 0.00983703788369894, "learning_rate": 3.051092818620494e-05, "loss": 7.000882178544998e-05, "step": 244910 }, { "epoch": 69.52029520295203, "grad_norm": 0.017108725383877754, "learning_rate": 3.0508089696281578e-05, "loss": 0.0041903495788574215, "step": 244920 }, { "epoch": 69.52313369287539, "grad_norm": 0.007617031689733267, "learning_rate": 3.0505251206358222e-05, "loss": 0.0004157492890954018, "step": 244930 }, { "epoch": 69.52597218279875, "grad_norm": 0.02273334003984928, "learning_rate": 3.0502412716434857e-05, "loss": 0.003264006972312927, "step": 244940 }, { "epoch": 69.52881067272212, "grad_norm": 0.007213076110929251, "learning_rate": 3.0499574226511495e-05, "loss": 9.601283818483353e-05, "step": 244950 }, { "epoch": 69.53164916264548, "grad_norm": 0.007823998108506203, "learning_rate": 3.049673573658814e-05, "loss": 0.00012847837060689927, "step": 244960 }, { "epoch": 69.53448765256883, "grad_norm": 0.0016822811448946595, "learning_rate": 3.0493897246664778e-05, "loss": 8.481442928314209e-05, "step": 244970 }, { "epoch": 69.53732614249219, "grad_norm": 0.0027770008891820908, "learning_rate": 3.0491058756741416e-05, "loss": 0.0009623058140277863, "step": 244980 }, { "epoch": 69.54016463241555, "grad_norm": 0.012619000859558582, "learning_rate": 3.048822026681805e-05, "loss": 3.233030438423157e-05, "step": 244990 }, { "epoch": 69.54300312233892, "grad_norm": 0.0004479954077396542, "learning_rate": 3.0485381776894695e-05, "loss": 0.00020429063588380813, "step": 245000 }, { "epoch": 69.54300312233892, "eval_accuracy": 0.9853118840211101, "eval_loss": 0.06084338575601578, "eval_runtime": 35.817, "eval_samples_per_second": 439.094, "eval_steps_per_second": 6.868, "step": 245000 }, { "epoch": 69.54584161226228, "grad_norm": 0.003661086317151785, "learning_rate": 3.0482543286971333e-05, "loss": 4.6551413834095e-05, "step": 245010 }, { "epoch": 69.54868010218564, "grad_norm": 0.0006582600180990994, "learning_rate": 3.047970479704797e-05, "loss": 0.00012242440134286882, "step": 245020 }, { "epoch": 69.551518592109, "grad_norm": 0.0237070694565773, "learning_rate": 3.0476866307124612e-05, "loss": 5.7344883680343627e-05, "step": 245030 }, { "epoch": 69.55435708203235, "grad_norm": 0.02829384058713913, "learning_rate": 3.047402781720125e-05, "loss": 7.520914077758789e-05, "step": 245040 }, { "epoch": 69.55719557195572, "grad_norm": 0.008593144826591015, "learning_rate": 3.047118932727789e-05, "loss": 0.00020761266350746154, "step": 245050 }, { "epoch": 69.56003406187908, "grad_norm": 0.36692240834236145, "learning_rate": 3.046835083735453e-05, "loss": 0.0007911046966910362, "step": 245060 }, { "epoch": 69.56287255180244, "grad_norm": 0.04711756855249405, "learning_rate": 3.0465512347431168e-05, "loss": 0.001756049506366253, "step": 245070 }, { "epoch": 69.5657110417258, "grad_norm": 0.031137239187955856, "learning_rate": 3.0462673857507806e-05, "loss": 0.00011048167943954468, "step": 245080 }, { "epoch": 69.56854953164917, "grad_norm": 0.02221696451306343, "learning_rate": 3.0459835367584444e-05, "loss": 0.00016880612820386888, "step": 245090 }, { "epoch": 69.57138802157252, "grad_norm": 0.021373514086008072, "learning_rate": 3.045699687766109e-05, "loss": 0.00010317321866750717, "step": 245100 }, { "epoch": 69.57422651149588, "grad_norm": 0.013907144777476788, "learning_rate": 3.0454158387737723e-05, "loss": 4.566535353660584e-05, "step": 245110 }, { "epoch": 69.57706500141924, "grad_norm": 0.00165241037029773, "learning_rate": 3.045131989781436e-05, "loss": 4.462487995624542e-05, "step": 245120 }, { "epoch": 69.5799034913426, "grad_norm": 0.0030896649695932865, "learning_rate": 3.0448481407891006e-05, "loss": 0.00022011902183294296, "step": 245130 }, { "epoch": 69.58274198126597, "grad_norm": 0.17634060978889465, "learning_rate": 3.0445642917967644e-05, "loss": 0.0002074871212244034, "step": 245140 }, { "epoch": 69.58558047118933, "grad_norm": 0.01142571959644556, "learning_rate": 3.0442804428044282e-05, "loss": 3.786683082580566e-05, "step": 245150 }, { "epoch": 69.5884189611127, "grad_norm": 0.0009803640423342586, "learning_rate": 3.0439965938120923e-05, "loss": 0.00017685014754533767, "step": 245160 }, { "epoch": 69.59125745103604, "grad_norm": 0.0014601204311475158, "learning_rate": 3.043712744819756e-05, "loss": 0.00013767071068286895, "step": 245170 }, { "epoch": 69.5940959409594, "grad_norm": 0.01639924757182598, "learning_rate": 3.04342889582742e-05, "loss": 0.00021735839545726775, "step": 245180 }, { "epoch": 69.59693443088277, "grad_norm": 0.03463487699627876, "learning_rate": 3.0431450468350837e-05, "loss": 0.00028885435312986374, "step": 245190 }, { "epoch": 69.59977292080613, "grad_norm": 0.0020581837743520737, "learning_rate": 3.042861197842748e-05, "loss": 9.390413761138916e-05, "step": 245200 }, { "epoch": 69.6026114107295, "grad_norm": 0.001336180604994297, "learning_rate": 3.0425773488504116e-05, "loss": 0.0001050194725394249, "step": 245210 }, { "epoch": 69.60544990065286, "grad_norm": 0.011179179884493351, "learning_rate": 3.0422934998580754e-05, "loss": 2.6869028806686403e-05, "step": 245220 }, { "epoch": 69.60828839057622, "grad_norm": 0.002098072087392211, "learning_rate": 3.0420096508657396e-05, "loss": 2.8664246201515198e-05, "step": 245230 }, { "epoch": 69.61112688049957, "grad_norm": 0.0009215103345923126, "learning_rate": 3.0417258018734034e-05, "loss": 0.000142788328230381, "step": 245240 }, { "epoch": 69.61396537042293, "grad_norm": 0.011692147701978683, "learning_rate": 3.0414419528810672e-05, "loss": 2.7032755315303803e-05, "step": 245250 }, { "epoch": 69.6168038603463, "grad_norm": 0.0010381838073953986, "learning_rate": 3.0411581038887317e-05, "loss": 9.821746498346328e-05, "step": 245260 }, { "epoch": 69.61964235026966, "grad_norm": 0.0105049479752779, "learning_rate": 3.0408742548963955e-05, "loss": 6.228405982255936e-05, "step": 245270 }, { "epoch": 69.62248084019302, "grad_norm": 0.00398617796599865, "learning_rate": 3.0405904059040592e-05, "loss": 4.0707923471927646e-05, "step": 245280 }, { "epoch": 69.62531933011638, "grad_norm": 0.08662501722574234, "learning_rate": 3.0403065569117227e-05, "loss": 0.00010502785444259644, "step": 245290 }, { "epoch": 69.62815782003973, "grad_norm": 0.0030421828851103783, "learning_rate": 3.0400227079193872e-05, "loss": 3.51274386048317e-05, "step": 245300 }, { "epoch": 69.6309963099631, "grad_norm": 0.006380012258887291, "learning_rate": 3.039738858927051e-05, "loss": 0.00012113116681575776, "step": 245310 }, { "epoch": 69.63383479988646, "grad_norm": 0.06026007607579231, "learning_rate": 3.0394550099347148e-05, "loss": 0.00048004873096942904, "step": 245320 }, { "epoch": 69.63667328980982, "grad_norm": 0.01159178838133812, "learning_rate": 3.039171160942379e-05, "loss": 6.431248039007187e-05, "step": 245330 }, { "epoch": 69.63951177973318, "grad_norm": 0.020285913720726967, "learning_rate": 3.0388873119500427e-05, "loss": 0.00043525602668523786, "step": 245340 }, { "epoch": 69.64235026965655, "grad_norm": 0.002105681225657463, "learning_rate": 3.0386034629577065e-05, "loss": 0.0007714886218309402, "step": 245350 }, { "epoch": 69.64518875957991, "grad_norm": 18.002790451049805, "learning_rate": 3.0383196139653707e-05, "loss": 0.010592823475599289, "step": 245360 }, { "epoch": 69.64802724950326, "grad_norm": 0.0029991939663887024, "learning_rate": 3.0380357649730345e-05, "loss": 0.00016188006848096847, "step": 245370 }, { "epoch": 69.65086573942662, "grad_norm": 0.020188773050904274, "learning_rate": 3.0377519159806983e-05, "loss": 0.000103803351521492, "step": 245380 }, { "epoch": 69.65370422934998, "grad_norm": 0.04627068340778351, "learning_rate": 3.037468066988362e-05, "loss": 0.00029852651059627535, "step": 245390 }, { "epoch": 69.65654271927335, "grad_norm": 7.649611949920654, "learning_rate": 3.0371842179960265e-05, "loss": 0.0019908731803297995, "step": 245400 }, { "epoch": 69.65938120919671, "grad_norm": 0.024711614474654198, "learning_rate": 3.03690036900369e-05, "loss": 0.00014500115066766738, "step": 245410 }, { "epoch": 69.66221969912007, "grad_norm": 0.0097183957695961, "learning_rate": 3.0366165200113538e-05, "loss": 0.0005409792065620422, "step": 245420 }, { "epoch": 69.66505818904344, "grad_norm": 0.2601284980773926, "learning_rate": 3.0363326710190183e-05, "loss": 9.895935654640198e-05, "step": 245430 }, { "epoch": 69.66789667896678, "grad_norm": 0.026216113939881325, "learning_rate": 3.036048822026682e-05, "loss": 0.00012108776718378067, "step": 245440 }, { "epoch": 69.67073516889015, "grad_norm": 0.00746951624751091, "learning_rate": 3.035764973034346e-05, "loss": 0.0009867310523986816, "step": 245450 }, { "epoch": 69.67357365881351, "grad_norm": 0.08241626620292664, "learning_rate": 3.03548112404201e-05, "loss": 8.087158203125e-05, "step": 245460 }, { "epoch": 69.67641214873687, "grad_norm": 0.001100580208003521, "learning_rate": 3.0351972750496738e-05, "loss": 3.850944340229034e-05, "step": 245470 }, { "epoch": 69.67925063866024, "grad_norm": 0.01244024932384491, "learning_rate": 3.0349134260573376e-05, "loss": 9.775198996067047e-05, "step": 245480 }, { "epoch": 69.6820891285836, "grad_norm": 0.0030461724381893873, "learning_rate": 3.0346295770650017e-05, "loss": 0.00030300971120595933, "step": 245490 }, { "epoch": 69.68492761850696, "grad_norm": 0.0034100073389708996, "learning_rate": 3.0343457280726655e-05, "loss": 5.843229591846466e-05, "step": 245500 }, { "epoch": 69.68492761850696, "eval_accuracy": 0.9837222610796719, "eval_loss": 0.06972697377204895, "eval_runtime": 36.3397, "eval_samples_per_second": 432.778, "eval_steps_per_second": 6.769, "step": 245500 }, { "epoch": 69.68776610843031, "grad_norm": 0.03202533721923828, "learning_rate": 3.0340618790803293e-05, "loss": 0.00016707517206668854, "step": 245510 }, { "epoch": 69.69060459835367, "grad_norm": 0.01232651062309742, "learning_rate": 3.033778030087993e-05, "loss": 0.00011953450739383697, "step": 245520 }, { "epoch": 69.69344308827704, "grad_norm": 0.03963908925652504, "learning_rate": 3.0334941810956573e-05, "loss": 0.0011825874447822572, "step": 245530 }, { "epoch": 69.6962815782004, "grad_norm": 0.03456110507249832, "learning_rate": 3.033210332103321e-05, "loss": 0.00015712380409240722, "step": 245540 }, { "epoch": 69.69912006812376, "grad_norm": 0.016588857397437096, "learning_rate": 3.032926483110985e-05, "loss": 0.0038444459438323974, "step": 245550 }, { "epoch": 69.70195855804712, "grad_norm": 0.02227840945124626, "learning_rate": 3.0326426341186493e-05, "loss": 5.453154444694519e-05, "step": 245560 }, { "epoch": 69.70479704797047, "grad_norm": 0.003357888665050268, "learning_rate": 3.032358785126313e-05, "loss": 0.0017452869564294816, "step": 245570 }, { "epoch": 69.70763553789384, "grad_norm": 0.021699612960219383, "learning_rate": 3.0320749361339766e-05, "loss": 0.00020900964736938476, "step": 245580 }, { "epoch": 69.7104740278172, "grad_norm": 0.004028871189802885, "learning_rate": 3.031791087141641e-05, "loss": 0.00011046119034290314, "step": 245590 }, { "epoch": 69.71331251774056, "grad_norm": 0.00464838370680809, "learning_rate": 3.031507238149305e-05, "loss": 5.2335485816001895e-05, "step": 245600 }, { "epoch": 69.71615100766392, "grad_norm": 0.5910958647727966, "learning_rate": 3.0312233891569687e-05, "loss": 0.00022909864783287047, "step": 245610 }, { "epoch": 69.71898949758729, "grad_norm": 0.011724350042641163, "learning_rate": 3.0309395401646325e-05, "loss": 0.00031529106199741365, "step": 245620 }, { "epoch": 69.72182798751065, "grad_norm": 0.17238694429397583, "learning_rate": 3.0306556911722966e-05, "loss": 0.0010063890367746353, "step": 245630 }, { "epoch": 69.724666477434, "grad_norm": 0.0065800221636891365, "learning_rate": 3.0303718421799604e-05, "loss": 0.004016277566552162, "step": 245640 }, { "epoch": 69.72750496735736, "grad_norm": 0.020930489525198936, "learning_rate": 3.0300879931876242e-05, "loss": 0.00024658627808094025, "step": 245650 }, { "epoch": 69.73034345728072, "grad_norm": 0.003217312740162015, "learning_rate": 3.0298041441952883e-05, "loss": 0.005740850046277046, "step": 245660 }, { "epoch": 69.73318194720409, "grad_norm": 0.0014686710201203823, "learning_rate": 3.029520295202952e-05, "loss": 4.072543233633041e-05, "step": 245670 }, { "epoch": 69.73602043712745, "grad_norm": 0.012525106780230999, "learning_rate": 3.029236446210616e-05, "loss": 0.00011055357754230499, "step": 245680 }, { "epoch": 69.73885892705081, "grad_norm": 0.010544884949922562, "learning_rate": 3.0289525972182804e-05, "loss": 0.00021189004182815553, "step": 245690 }, { "epoch": 69.74169741697418, "grad_norm": 0.0021262888330966234, "learning_rate": 3.028668748225944e-05, "loss": 0.0008818922564387321, "step": 245700 }, { "epoch": 69.74453590689753, "grad_norm": 0.008420766331255436, "learning_rate": 3.0283848992336077e-05, "loss": 0.000526469573378563, "step": 245710 }, { "epoch": 69.74737439682089, "grad_norm": 0.013105307705700397, "learning_rate": 3.0281010502412715e-05, "loss": 0.00013328194618225097, "step": 245720 }, { "epoch": 69.75021288674425, "grad_norm": 0.09618387371301651, "learning_rate": 3.027817201248936e-05, "loss": 0.00015805307775735856, "step": 245730 }, { "epoch": 69.75305137666761, "grad_norm": 0.17364875972270966, "learning_rate": 3.0275333522565997e-05, "loss": 0.00013071615248918533, "step": 245740 }, { "epoch": 69.75588986659098, "grad_norm": 0.14803189039230347, "learning_rate": 3.0272495032642632e-05, "loss": 7.44180753827095e-05, "step": 245750 }, { "epoch": 69.75872835651434, "grad_norm": 0.01448790542781353, "learning_rate": 3.0269656542719277e-05, "loss": 4.650000482797623e-05, "step": 245760 }, { "epoch": 69.76156684643769, "grad_norm": 0.0033834415953606367, "learning_rate": 3.0266818052795915e-05, "loss": 0.0013614580035209655, "step": 245770 }, { "epoch": 69.76440533636105, "grad_norm": 0.015006650239229202, "learning_rate": 3.0263979562872553e-05, "loss": 0.000620521605014801, "step": 245780 }, { "epoch": 69.76724382628441, "grad_norm": 0.0021368004381656647, "learning_rate": 3.0261141072949194e-05, "loss": 3.5977177321910855e-05, "step": 245790 }, { "epoch": 69.77008231620778, "grad_norm": 0.009209898300468922, "learning_rate": 3.0258302583025832e-05, "loss": 9.734779596328735e-05, "step": 245800 }, { "epoch": 69.77292080613114, "grad_norm": 0.0018307581776753068, "learning_rate": 3.025546409310247e-05, "loss": 4.166960716247559e-05, "step": 245810 }, { "epoch": 69.7757592960545, "grad_norm": 0.005259967874735594, "learning_rate": 3.0252625603179108e-05, "loss": 5.2510574460029605e-05, "step": 245820 }, { "epoch": 69.77859778597787, "grad_norm": 0.13631506264209747, "learning_rate": 3.024978711325575e-05, "loss": 4.974212497472763e-05, "step": 245830 }, { "epoch": 69.78143627590121, "grad_norm": 0.02407943271100521, "learning_rate": 3.0246948623332388e-05, "loss": 8.825883269309997e-05, "step": 245840 }, { "epoch": 69.78427476582458, "grad_norm": 0.01808636449277401, "learning_rate": 3.0244110133409025e-05, "loss": 0.0001471264287829399, "step": 245850 }, { "epoch": 69.78711325574794, "grad_norm": 0.0022983504459261894, "learning_rate": 3.024127164348567e-05, "loss": 4.8201531171798706e-05, "step": 245860 }, { "epoch": 69.7899517456713, "grad_norm": 0.0070526436902582645, "learning_rate": 3.0238433153562308e-05, "loss": 5.3617358207702634e-05, "step": 245870 }, { "epoch": 69.79279023559467, "grad_norm": 0.003993961960077286, "learning_rate": 3.0235594663638943e-05, "loss": 0.003846864774823189, "step": 245880 }, { "epoch": 69.79562872551803, "grad_norm": 0.006071744952350855, "learning_rate": 3.0232756173715588e-05, "loss": 0.00026807710528373716, "step": 245890 }, { "epoch": 69.79846721544139, "grad_norm": 0.014124765060842037, "learning_rate": 3.0229917683792226e-05, "loss": 2.8714537620544434e-05, "step": 245900 }, { "epoch": 69.80130570536474, "grad_norm": 0.43790751695632935, "learning_rate": 3.0227079193868864e-05, "loss": 0.0003313258290290833, "step": 245910 }, { "epoch": 69.8041441952881, "grad_norm": 0.028446903452277184, "learning_rate": 3.02242407039455e-05, "loss": 0.0035846441984176635, "step": 245920 }, { "epoch": 69.80698268521147, "grad_norm": 0.007553613279014826, "learning_rate": 3.0221402214022143e-05, "loss": 9.858589619398117e-05, "step": 245930 }, { "epoch": 69.80982117513483, "grad_norm": 0.02362305484712124, "learning_rate": 3.021856372409878e-05, "loss": 0.0008933678269386292, "step": 245940 }, { "epoch": 69.81265966505819, "grad_norm": 0.011666486971080303, "learning_rate": 3.021572523417542e-05, "loss": 0.00010432880371809005, "step": 245950 }, { "epoch": 69.81549815498155, "grad_norm": 0.007840855978429317, "learning_rate": 3.021288674425206e-05, "loss": 0.002245473675429821, "step": 245960 }, { "epoch": 69.81833664490492, "grad_norm": 0.008967423811554909, "learning_rate": 3.0210048254328698e-05, "loss": 0.002696585655212402, "step": 245970 }, { "epoch": 69.82117513482827, "grad_norm": 0.003863284131512046, "learning_rate": 3.0207209764405336e-05, "loss": 5.7549029588699344e-05, "step": 245980 }, { "epoch": 69.82401362475163, "grad_norm": 0.01200897991657257, "learning_rate": 3.020437127448198e-05, "loss": 5.836505442857742e-05, "step": 245990 }, { "epoch": 69.82685211467499, "grad_norm": 0.016502616927027702, "learning_rate": 3.0201532784558616e-05, "loss": 2.6592053472995758e-05, "step": 246000 }, { "epoch": 69.82685211467499, "eval_accuracy": 0.9855662236917403, "eval_loss": 0.06286168098449707, "eval_runtime": 43.6807, "eval_samples_per_second": 360.045, "eval_steps_per_second": 5.632, "step": 246000 }, { "epoch": 69.82969060459835, "grad_norm": 0.09628559648990631, "learning_rate": 3.0198694294635254e-05, "loss": 0.004496322199702263, "step": 246010 }, { "epoch": 69.83252909452172, "grad_norm": 0.02169615402817726, "learning_rate": 3.019585580471189e-05, "loss": 0.0004992252215743065, "step": 246020 }, { "epoch": 69.83536758444508, "grad_norm": 0.11419285833835602, "learning_rate": 3.0193017314788536e-05, "loss": 0.0007637780159711838, "step": 246030 }, { "epoch": 69.83820607436843, "grad_norm": 0.10087647289037704, "learning_rate": 3.0190178824865174e-05, "loss": 0.0037808403372764587, "step": 246040 }, { "epoch": 69.84104456429179, "grad_norm": 0.0029194389935582876, "learning_rate": 3.018734033494181e-05, "loss": 7.303319871425629e-05, "step": 246050 }, { "epoch": 69.84388305421515, "grad_norm": 0.04657236859202385, "learning_rate": 3.0184501845018454e-05, "loss": 0.006914541870355606, "step": 246060 }, { "epoch": 69.84672154413852, "grad_norm": 9.077048301696777, "learning_rate": 3.018166335509509e-05, "loss": 0.0014212755486369133, "step": 246070 }, { "epoch": 69.84956003406188, "grad_norm": 0.239957794547081, "learning_rate": 3.017882486517173e-05, "loss": 0.012755671143531799, "step": 246080 }, { "epoch": 69.85239852398524, "grad_norm": 0.01794985681772232, "learning_rate": 3.017598637524837e-05, "loss": 0.0002930374816060066, "step": 246090 }, { "epoch": 69.8552370139086, "grad_norm": 0.0762149766087532, "learning_rate": 3.017314788532501e-05, "loss": 0.0008455879986286163, "step": 246100 }, { "epoch": 69.85807550383196, "grad_norm": 0.0017163701122626662, "learning_rate": 3.0170309395401647e-05, "loss": 0.01119922399520874, "step": 246110 }, { "epoch": 69.86091399375532, "grad_norm": 0.026638589799404144, "learning_rate": 3.0167470905478285e-05, "loss": 3.9866380393505094e-05, "step": 246120 }, { "epoch": 69.86375248367868, "grad_norm": 0.08128692209720612, "learning_rate": 3.0164632415554926e-05, "loss": 0.0003007899969816208, "step": 246130 }, { "epoch": 69.86659097360204, "grad_norm": 0.02365352027118206, "learning_rate": 3.0161793925631564e-05, "loss": 6.025843322277069e-05, "step": 246140 }, { "epoch": 69.8694294635254, "grad_norm": 0.0008699687896296382, "learning_rate": 3.0158955435708202e-05, "loss": 0.002302723377943039, "step": 246150 }, { "epoch": 69.87226795344877, "grad_norm": 0.007895397953689098, "learning_rate": 3.0156116945784847e-05, "loss": 0.006224969029426574, "step": 246160 }, { "epoch": 69.87510644337213, "grad_norm": 0.20454220473766327, "learning_rate": 3.0153278455861482e-05, "loss": 0.00011311713606119156, "step": 246170 }, { "epoch": 69.87794493329548, "grad_norm": 0.19562017917633057, "learning_rate": 3.015043996593812e-05, "loss": 0.00035451240837574004, "step": 246180 }, { "epoch": 69.88078342321884, "grad_norm": 0.009076155722141266, "learning_rate": 3.0147601476014764e-05, "loss": 6.0189329087734225e-05, "step": 246190 }, { "epoch": 69.8836219131422, "grad_norm": 3.7220866680145264, "learning_rate": 3.0144762986091402e-05, "loss": 0.0007601328194141388, "step": 246200 }, { "epoch": 69.88646040306557, "grad_norm": 0.24582794308662415, "learning_rate": 3.014192449616804e-05, "loss": 0.0004445655271410942, "step": 246210 }, { "epoch": 69.88929889298893, "grad_norm": 0.017511608079075813, "learning_rate": 3.0139086006244675e-05, "loss": 0.002174285054206848, "step": 246220 }, { "epoch": 69.8921373829123, "grad_norm": 0.031030431389808655, "learning_rate": 3.013624751632132e-05, "loss": 7.395297288894653e-05, "step": 246230 }, { "epoch": 69.89497587283566, "grad_norm": 0.018120644614100456, "learning_rate": 3.0133409026397958e-05, "loss": 0.004674392938613892, "step": 246240 }, { "epoch": 69.89781436275901, "grad_norm": 0.0020997216925024986, "learning_rate": 3.0130570536474596e-05, "loss": 0.00012481939047574996, "step": 246250 }, { "epoch": 69.90065285268237, "grad_norm": 0.07358092069625854, "learning_rate": 3.0127732046551237e-05, "loss": 0.0012653864920139312, "step": 246260 }, { "epoch": 69.90349134260573, "grad_norm": 0.049008022993803024, "learning_rate": 3.0124893556627875e-05, "loss": 0.0002191644161939621, "step": 246270 }, { "epoch": 69.9063298325291, "grad_norm": 0.002192131243646145, "learning_rate": 3.0122055066704513e-05, "loss": 8.67944210767746e-05, "step": 246280 }, { "epoch": 69.90916832245246, "grad_norm": 0.04231521859765053, "learning_rate": 3.0119216576781158e-05, "loss": 7.883142679929734e-05, "step": 246290 }, { "epoch": 69.91200681237582, "grad_norm": 0.034330159425735474, "learning_rate": 3.0116378086857792e-05, "loss": 0.00010768231004476547, "step": 246300 }, { "epoch": 69.91484530229917, "grad_norm": 0.014320364221930504, "learning_rate": 3.011353959693443e-05, "loss": 2.680756151676178e-05, "step": 246310 }, { "epoch": 69.91768379222253, "grad_norm": 0.03209047392010689, "learning_rate": 3.011070110701107e-05, "loss": 5.6317076086997986e-05, "step": 246320 }, { "epoch": 69.9205222821459, "grad_norm": 0.010542593896389008, "learning_rate": 3.0107862617087713e-05, "loss": 5.217641592025757e-05, "step": 246330 }, { "epoch": 69.92336077206926, "grad_norm": 0.01906193606555462, "learning_rate": 3.010502412716435e-05, "loss": 5.998704582452774e-05, "step": 246340 }, { "epoch": 69.92619926199262, "grad_norm": 0.008960627019405365, "learning_rate": 3.0102185637240986e-05, "loss": 0.0009055057540535927, "step": 246350 }, { "epoch": 69.92903775191598, "grad_norm": 0.0059837717562913895, "learning_rate": 3.009934714731763e-05, "loss": 5.27016818523407e-05, "step": 246360 }, { "epoch": 69.93187624183935, "grad_norm": 0.003245104104280472, "learning_rate": 3.009650865739427e-05, "loss": 0.00012167301028966903, "step": 246370 }, { "epoch": 69.9347147317627, "grad_norm": 0.002412960631772876, "learning_rate": 3.0093670167470907e-05, "loss": 0.00016964860260486602, "step": 246380 }, { "epoch": 69.93755322168606, "grad_norm": 0.03990185260772705, "learning_rate": 3.0090831677547548e-05, "loss": 0.0006623532623052597, "step": 246390 }, { "epoch": 69.94039171160942, "grad_norm": 0.014737250283360481, "learning_rate": 3.0087993187624186e-05, "loss": 2.462156116962433e-05, "step": 246400 }, { "epoch": 69.94323020153278, "grad_norm": 0.008889763616025448, "learning_rate": 3.0085154697700824e-05, "loss": 5.028080195188522e-05, "step": 246410 }, { "epoch": 69.94606869145615, "grad_norm": 0.002270025433972478, "learning_rate": 3.0082316207777462e-05, "loss": 0.0005939625203609467, "step": 246420 }, { "epoch": 69.94890718137951, "grad_norm": 0.033328816294670105, "learning_rate": 3.0079477717854103e-05, "loss": 0.0005053324624896049, "step": 246430 }, { "epoch": 69.95174567130287, "grad_norm": 0.0005982607253827155, "learning_rate": 3.007663922793074e-05, "loss": 8.515343070030213e-05, "step": 246440 }, { "epoch": 69.95458416122622, "grad_norm": 0.004396476317197084, "learning_rate": 3.007380073800738e-05, "loss": 6.0055404901504517e-05, "step": 246450 }, { "epoch": 69.95742265114959, "grad_norm": 0.3960837125778198, "learning_rate": 3.0070962248084024e-05, "loss": 0.004242462664842605, "step": 246460 }, { "epoch": 69.96026114107295, "grad_norm": 0.008777691051363945, "learning_rate": 3.006812375816066e-05, "loss": 0.0001387115567922592, "step": 246470 }, { "epoch": 69.96309963099631, "grad_norm": 0.008607564494013786, "learning_rate": 3.0065285268237297e-05, "loss": 5.004629492759705e-05, "step": 246480 }, { "epoch": 69.96593812091967, "grad_norm": 0.003628386417403817, "learning_rate": 3.006244677831394e-05, "loss": 0.0001423647627234459, "step": 246490 }, { "epoch": 69.96877661084304, "grad_norm": 0.002099869307130575, "learning_rate": 3.005960828839058e-05, "loss": 0.00010629966855049133, "step": 246500 }, { "epoch": 69.96877661084304, "eval_accuracy": 0.9853754689387677, "eval_loss": 0.06146205589175224, "eval_runtime": 36.0613, "eval_samples_per_second": 436.118, "eval_steps_per_second": 6.822, "step": 246500 }, { "epoch": 69.97161510076639, "grad_norm": 0.008149494417011738, "learning_rate": 3.0056769798467217e-05, "loss": 0.00013265274465084075, "step": 246510 }, { "epoch": 69.97445359068975, "grad_norm": 0.01703990437090397, "learning_rate": 3.0053931308543852e-05, "loss": 2.7867406606674196e-05, "step": 246520 }, { "epoch": 69.97729208061311, "grad_norm": 0.08997166156768799, "learning_rate": 3.0051092818620497e-05, "loss": 0.0006586132571101188, "step": 246530 }, { "epoch": 69.98013057053647, "grad_norm": 0.013757298700511456, "learning_rate": 3.0048254328697135e-05, "loss": 3.563910722732544e-05, "step": 246540 }, { "epoch": 69.98296906045984, "grad_norm": 0.01950816437602043, "learning_rate": 3.0045415838773773e-05, "loss": 0.004549929499626159, "step": 246550 }, { "epoch": 69.9858075503832, "grad_norm": 0.005333784967660904, "learning_rate": 3.0042577348850414e-05, "loss": 0.014268286526203156, "step": 246560 }, { "epoch": 69.98864604030656, "grad_norm": 0.002898362698033452, "learning_rate": 3.0039738858927052e-05, "loss": 0.0005672084167599678, "step": 246570 }, { "epoch": 69.99148453022991, "grad_norm": 0.012670494616031647, "learning_rate": 3.003690036900369e-05, "loss": 5.109496414661408e-05, "step": 246580 }, { "epoch": 69.99432302015327, "grad_norm": 0.09264934808015823, "learning_rate": 3.003406187908033e-05, "loss": 0.0023407917469739914, "step": 246590 }, { "epoch": 69.99716151007664, "grad_norm": 0.00778657291084528, "learning_rate": 3.003122338915697e-05, "loss": 0.00010614823549985886, "step": 246600 }, { "epoch": 70.0, "grad_norm": 0.001688918680883944, "learning_rate": 3.0028384899233607e-05, "loss": 2.335531753487885e-05, "step": 246610 }, { "epoch": 70.00283848992336, "grad_norm": 0.006635427009314299, "learning_rate": 3.0025546409310245e-05, "loss": 6.859898567199708e-05, "step": 246620 }, { "epoch": 70.00567697984673, "grad_norm": 0.010351341217756271, "learning_rate": 3.002270791938689e-05, "loss": 2.857726067304611e-05, "step": 246630 }, { "epoch": 70.00851546977009, "grad_norm": 0.004702415782958269, "learning_rate": 3.0019869429463525e-05, "loss": 1.7103180289268495e-05, "step": 246640 }, { "epoch": 70.01135395969344, "grad_norm": 0.0024487918708473444, "learning_rate": 3.0017030939540163e-05, "loss": 8.534621447324753e-05, "step": 246650 }, { "epoch": 70.0141924496168, "grad_norm": 0.004113934002816677, "learning_rate": 3.0014192449616807e-05, "loss": 0.00011829324066638946, "step": 246660 }, { "epoch": 70.01703093954016, "grad_norm": 0.0078360540792346, "learning_rate": 3.0011353959693445e-05, "loss": 0.000204349122941494, "step": 246670 }, { "epoch": 70.01986942946353, "grad_norm": 0.03446660190820694, "learning_rate": 3.0008515469770083e-05, "loss": 0.00010189171880483627, "step": 246680 }, { "epoch": 70.02270791938689, "grad_norm": 0.003670650301501155, "learning_rate": 3.0005676979846725e-05, "loss": 0.00010085292160511017, "step": 246690 }, { "epoch": 70.02554640931025, "grad_norm": 0.003805731888860464, "learning_rate": 3.0002838489923363e-05, "loss": 0.009118357300758361, "step": 246700 }, { "epoch": 70.02838489923361, "grad_norm": 0.002199307782575488, "learning_rate": 3e-05, "loss": 0.0001445768401026726, "step": 246710 }, { "epoch": 70.03122338915696, "grad_norm": 0.10655289888381958, "learning_rate": 2.9997161510076642e-05, "loss": 0.00012105163186788559, "step": 246720 }, { "epoch": 70.03406187908033, "grad_norm": 0.004269082564860582, "learning_rate": 2.999432302015328e-05, "loss": 0.0016711976379156113, "step": 246730 }, { "epoch": 70.03690036900369, "grad_norm": 0.04786685109138489, "learning_rate": 2.9991484530229918e-05, "loss": 6.988253444433213e-05, "step": 246740 }, { "epoch": 70.03973885892705, "grad_norm": 0.020907048135995865, "learning_rate": 2.9988646040306556e-05, "loss": 6.540194153785706e-05, "step": 246750 }, { "epoch": 70.04257734885041, "grad_norm": 0.03607979789376259, "learning_rate": 2.99858075503832e-05, "loss": 0.00016815420240163804, "step": 246760 }, { "epoch": 70.04541583877378, "grad_norm": 0.03883116692304611, "learning_rate": 2.9982969060459835e-05, "loss": 7.289350032806397e-05, "step": 246770 }, { "epoch": 70.04825432869713, "grad_norm": 0.034668855369091034, "learning_rate": 2.9980130570536473e-05, "loss": 0.0001299828290939331, "step": 246780 }, { "epoch": 70.05109281862049, "grad_norm": 0.015547098591923714, "learning_rate": 2.9977292080613118e-05, "loss": 2.996046096086502e-05, "step": 246790 }, { "epoch": 70.05393130854385, "grad_norm": 0.001975477207452059, "learning_rate": 2.9974453590689756e-05, "loss": 4.076790064573288e-05, "step": 246800 }, { "epoch": 70.05676979846722, "grad_norm": 0.013795538805425167, "learning_rate": 2.9971615100766394e-05, "loss": 3.7332624197006224e-05, "step": 246810 }, { "epoch": 70.05960828839058, "grad_norm": 0.004227357916533947, "learning_rate": 2.9968776610843036e-05, "loss": 3.428235650062561e-05, "step": 246820 }, { "epoch": 70.06244677831394, "grad_norm": 0.010119469836354256, "learning_rate": 2.9965938120919674e-05, "loss": 0.00010430570691823959, "step": 246830 }, { "epoch": 70.0652852682373, "grad_norm": 0.006805171724408865, "learning_rate": 2.996309963099631e-05, "loss": 1.9341707229614258e-05, "step": 246840 }, { "epoch": 70.06812375816065, "grad_norm": 0.004429773893207312, "learning_rate": 2.996026114107295e-05, "loss": 4.820935428142548e-05, "step": 246850 }, { "epoch": 70.07096224808402, "grad_norm": 0.01198340393602848, "learning_rate": 2.995742265114959e-05, "loss": 5.067102611064911e-05, "step": 246860 }, { "epoch": 70.07380073800738, "grad_norm": 0.0025492634158581495, "learning_rate": 2.995458416122623e-05, "loss": 6.927121430635452e-05, "step": 246870 }, { "epoch": 70.07663922793074, "grad_norm": 0.22175510227680206, "learning_rate": 2.9951745671302867e-05, "loss": 0.00010714661329984665, "step": 246880 }, { "epoch": 70.0794777178541, "grad_norm": 0.006176903378218412, "learning_rate": 2.9948907181379508e-05, "loss": 3.827735781669617e-05, "step": 246890 }, { "epoch": 70.08231620777747, "grad_norm": 0.001375060179270804, "learning_rate": 2.9946068691456146e-05, "loss": 3.742184489965439e-05, "step": 246900 }, { "epoch": 70.08515469770083, "grad_norm": 0.007768474984914064, "learning_rate": 2.9943230201532784e-05, "loss": 2.6615336537361145e-05, "step": 246910 }, { "epoch": 70.08799318762418, "grad_norm": 0.03405660763382912, "learning_rate": 2.994039171160943e-05, "loss": 0.00010481551289558411, "step": 246920 }, { "epoch": 70.09083167754754, "grad_norm": 0.37975266575813293, "learning_rate": 2.9937553221686067e-05, "loss": 0.0003813231363892555, "step": 246930 }, { "epoch": 70.0936701674709, "grad_norm": 0.08028353005647659, "learning_rate": 2.99347147317627e-05, "loss": 7.58027657866478e-05, "step": 246940 }, { "epoch": 70.09650865739427, "grad_norm": 0.0033818103838711977, "learning_rate": 2.993187624183934e-05, "loss": 0.00010097101330757141, "step": 246950 }, { "epoch": 70.09934714731763, "grad_norm": 0.0010771648958325386, "learning_rate": 2.9929037751915984e-05, "loss": 9.955298155546188e-05, "step": 246960 }, { "epoch": 70.10218563724099, "grad_norm": 0.03742297738790512, "learning_rate": 2.9926199261992622e-05, "loss": 0.00018338281661272048, "step": 246970 }, { "epoch": 70.10502412716434, "grad_norm": 0.0010657605016604066, "learning_rate": 2.992336077206926e-05, "loss": 0.0001077672466635704, "step": 246980 }, { "epoch": 70.1078626170877, "grad_norm": 0.005199847277253866, "learning_rate": 2.99205222821459e-05, "loss": 3.5540014505386355e-05, "step": 246990 }, { "epoch": 70.11070110701107, "grad_norm": 0.00399512005969882, "learning_rate": 2.991768379222254e-05, "loss": 2.0872429013252257e-05, "step": 247000 }, { "epoch": 70.11070110701107, "eval_accuracy": 0.9860749030330006, "eval_loss": 0.058256614953279495, "eval_runtime": 40.3819, "eval_samples_per_second": 389.456, "eval_steps_per_second": 6.092, "step": 247000 }, { "epoch": 70.11353959693443, "grad_norm": 0.006738802883774042, "learning_rate": 2.9914845302299178e-05, "loss": 3.8174912333488464e-05, "step": 247010 }, { "epoch": 70.1163780868578, "grad_norm": 0.01375164370983839, "learning_rate": 2.991200681237582e-05, "loss": 0.0015780035406351089, "step": 247020 }, { "epoch": 70.11921657678116, "grad_norm": 0.006343838758766651, "learning_rate": 2.9909168322452457e-05, "loss": 7.537417113780975e-05, "step": 247030 }, { "epoch": 70.12205506670452, "grad_norm": 0.012884442694485188, "learning_rate": 2.9906329832529095e-05, "loss": 0.008345483243465424, "step": 247040 }, { "epoch": 70.12489355662787, "grad_norm": 0.0022541414946317673, "learning_rate": 2.9903491342605733e-05, "loss": 3.7657842040061953e-05, "step": 247050 }, { "epoch": 70.12773204655123, "grad_norm": 0.00299710500985384, "learning_rate": 2.9900652852682374e-05, "loss": 5.230456590652466e-05, "step": 247060 }, { "epoch": 70.1305705364746, "grad_norm": 0.0006370776682160795, "learning_rate": 2.9897814362759012e-05, "loss": 5.2711181342601773e-05, "step": 247070 }, { "epoch": 70.13340902639796, "grad_norm": 0.002958677476271987, "learning_rate": 2.989497587283565e-05, "loss": 7.090717554092407e-05, "step": 247080 }, { "epoch": 70.13624751632132, "grad_norm": 0.017088815569877625, "learning_rate": 2.9892137382912295e-05, "loss": 0.0001336481422185898, "step": 247090 }, { "epoch": 70.13908600624468, "grad_norm": 0.009194343350827694, "learning_rate": 2.9889298892988933e-05, "loss": 0.00011180005967617034, "step": 247100 }, { "epoch": 70.14192449616804, "grad_norm": 1.113499402999878, "learning_rate": 2.9886460403065568e-05, "loss": 0.0006044024601578712, "step": 247110 }, { "epoch": 70.1447629860914, "grad_norm": 0.0694320946931839, "learning_rate": 2.9883621913142212e-05, "loss": 0.000270589254796505, "step": 247120 }, { "epoch": 70.14760147601476, "grad_norm": 0.0012140856124460697, "learning_rate": 2.988078342321885e-05, "loss": 0.0024502815678715704, "step": 247130 }, { "epoch": 70.15043996593812, "grad_norm": 0.005550610367208719, "learning_rate": 2.987794493329549e-05, "loss": 4.185512661933899e-05, "step": 247140 }, { "epoch": 70.15327845586148, "grad_norm": 0.019190380349755287, "learning_rate": 2.9875106443372126e-05, "loss": 0.000266551598906517, "step": 247150 }, { "epoch": 70.15611694578485, "grad_norm": 0.01170163694769144, "learning_rate": 2.9872267953448768e-05, "loss": 5.5544450879096986e-05, "step": 247160 }, { "epoch": 70.15895543570821, "grad_norm": 0.014507890678942204, "learning_rate": 2.9869429463525406e-05, "loss": 0.00038741063326597214, "step": 247170 }, { "epoch": 70.16179392563157, "grad_norm": 0.0032360004261136055, "learning_rate": 2.9866590973602044e-05, "loss": 5.8698281645774844e-05, "step": 247180 }, { "epoch": 70.16463241555492, "grad_norm": 0.06700485944747925, "learning_rate": 2.9863752483678685e-05, "loss": 0.00043263211846351625, "step": 247190 }, { "epoch": 70.16747090547828, "grad_norm": 0.034297194331884384, "learning_rate": 2.9860913993755323e-05, "loss": 0.00045789647847414015, "step": 247200 }, { "epoch": 70.17030939540165, "grad_norm": 0.014429166913032532, "learning_rate": 2.985807550383196e-05, "loss": 3.6028027534484866e-05, "step": 247210 }, { "epoch": 70.17314788532501, "grad_norm": 0.48818379640579224, "learning_rate": 2.9855237013908606e-05, "loss": 0.00010953843593597412, "step": 247220 }, { "epoch": 70.17598637524837, "grad_norm": 0.008915135636925697, "learning_rate": 2.9852398523985244e-05, "loss": 4.244912415742874e-05, "step": 247230 }, { "epoch": 70.17882486517173, "grad_norm": 0.017229625955224037, "learning_rate": 2.984956003406188e-05, "loss": 2.791397273540497e-05, "step": 247240 }, { "epoch": 70.18166335509508, "grad_norm": 0.020345522090792656, "learning_rate": 2.9846721544138516e-05, "loss": 8.751116693019866e-05, "step": 247250 }, { "epoch": 70.18450184501845, "grad_norm": 0.0053484635427594185, "learning_rate": 2.984388305421516e-05, "loss": 4.2790547013282776e-05, "step": 247260 }, { "epoch": 70.18734033494181, "grad_norm": 0.008441852405667305, "learning_rate": 2.98410445642918e-05, "loss": 3.120545297861099e-05, "step": 247270 }, { "epoch": 70.19017882486517, "grad_norm": 0.006981653161346912, "learning_rate": 2.9838206074368437e-05, "loss": 1.8428079783916473e-05, "step": 247280 }, { "epoch": 70.19301731478853, "grad_norm": 0.0004928285488858819, "learning_rate": 2.983536758444508e-05, "loss": 3.634914755821228e-05, "step": 247290 }, { "epoch": 70.1958558047119, "grad_norm": 0.042519453912973404, "learning_rate": 2.9832529094521717e-05, "loss": 4.9505755305290224e-05, "step": 247300 }, { "epoch": 70.19869429463526, "grad_norm": 0.024624258279800415, "learning_rate": 2.9829690604598354e-05, "loss": 9.464845061302185e-05, "step": 247310 }, { "epoch": 70.20153278455861, "grad_norm": 0.06832616031169891, "learning_rate": 2.9826852114674996e-05, "loss": 4.842337220907211e-05, "step": 247320 }, { "epoch": 70.20437127448197, "grad_norm": 0.12175247073173523, "learning_rate": 2.9824013624751634e-05, "loss": 5.913153290748596e-05, "step": 247330 }, { "epoch": 70.20720976440533, "grad_norm": 0.005256861448287964, "learning_rate": 2.9821175134828272e-05, "loss": 1.6314908862113953e-05, "step": 247340 }, { "epoch": 70.2100482543287, "grad_norm": 0.04507335275411606, "learning_rate": 2.981833664490491e-05, "loss": 3.987979143857956e-05, "step": 247350 }, { "epoch": 70.21288674425206, "grad_norm": 0.004655594006180763, "learning_rate": 2.981549815498155e-05, "loss": 2.8640404343605043e-05, "step": 247360 }, { "epoch": 70.21572523417542, "grad_norm": 0.008105880580842495, "learning_rate": 2.981265966505819e-05, "loss": 2.8507597744464874e-05, "step": 247370 }, { "epoch": 70.21856372409879, "grad_norm": 0.007536240387707949, "learning_rate": 2.9809821175134827e-05, "loss": 3.158058971166611e-05, "step": 247380 }, { "epoch": 70.22140221402213, "grad_norm": 0.00392538495361805, "learning_rate": 2.9806982685211472e-05, "loss": 1.908969134092331e-05, "step": 247390 }, { "epoch": 70.2242407039455, "grad_norm": 0.023477062582969666, "learning_rate": 2.980414419528811e-05, "loss": 5.7477131485939025e-05, "step": 247400 }, { "epoch": 70.22707919386886, "grad_norm": 0.7441603541374207, "learning_rate": 2.9801305705364745e-05, "loss": 0.0002597710117697716, "step": 247410 }, { "epoch": 70.22991768379222, "grad_norm": 0.030620090663433075, "learning_rate": 2.979846721544139e-05, "loss": 0.00014723464846611023, "step": 247420 }, { "epoch": 70.23275617371559, "grad_norm": 0.0009279429214075208, "learning_rate": 2.9795628725518027e-05, "loss": 0.00011243429034948348, "step": 247430 }, { "epoch": 70.23559466363895, "grad_norm": 0.7279539704322815, "learning_rate": 2.9792790235594665e-05, "loss": 0.00012444183230400085, "step": 247440 }, { "epoch": 70.23843315356231, "grad_norm": 0.0075887455604970455, "learning_rate": 2.9789951745671303e-05, "loss": 8.611958473920822e-05, "step": 247450 }, { "epoch": 70.24127164348566, "grad_norm": 0.0010519068455323577, "learning_rate": 2.9787113255747945e-05, "loss": 8.399225771427155e-05, "step": 247460 }, { "epoch": 70.24411013340902, "grad_norm": 0.011787819676101208, "learning_rate": 2.9784274765824583e-05, "loss": 9.202305227518082e-05, "step": 247470 }, { "epoch": 70.24694862333239, "grad_norm": 0.0014737374149262905, "learning_rate": 2.978143627590122e-05, "loss": 4.1865743696689606e-05, "step": 247480 }, { "epoch": 70.24978711325575, "grad_norm": 0.0176981370896101, "learning_rate": 2.9778597785977862e-05, "loss": 4.392378032207489e-05, "step": 247490 }, { "epoch": 70.25262560317911, "grad_norm": 0.24039718508720398, "learning_rate": 2.97757592960545e-05, "loss": 0.005603751540184021, "step": 247500 }, { "epoch": 70.25262560317911, "eval_accuracy": 0.9858205633623705, "eval_loss": 0.05913535878062248, "eval_runtime": 39.0862, "eval_samples_per_second": 402.367, "eval_steps_per_second": 6.294, "step": 247500 }, { "epoch": 70.25546409310247, "grad_norm": 0.17610441148281097, "learning_rate": 2.9772920806131138e-05, "loss": 0.00027833227068185806, "step": 247510 }, { "epoch": 70.25830258302582, "grad_norm": 0.3148278594017029, "learning_rate": 2.9770082316207783e-05, "loss": 0.0001418694853782654, "step": 247520 }, { "epoch": 70.26114107294919, "grad_norm": 7.435847282409668, "learning_rate": 2.9767243826284417e-05, "loss": 0.0015501849353313445, "step": 247530 }, { "epoch": 70.26397956287255, "grad_norm": 0.11449771374464035, "learning_rate": 2.9764405336361055e-05, "loss": 0.00710134357213974, "step": 247540 }, { "epoch": 70.26681805279591, "grad_norm": 0.014773010276257992, "learning_rate": 2.9761566846437693e-05, "loss": 0.0007049959152936935, "step": 247550 }, { "epoch": 70.26965654271928, "grad_norm": 0.18417717516422272, "learning_rate": 2.9758728356514338e-05, "loss": 0.006320548057556152, "step": 247560 }, { "epoch": 70.27249503264264, "grad_norm": 0.12078018486499786, "learning_rate": 2.9755889866590976e-05, "loss": 0.00024136565625667571, "step": 247570 }, { "epoch": 70.275333522566, "grad_norm": 0.0015680157812312245, "learning_rate": 2.975305137666761e-05, "loss": 0.0001581486314535141, "step": 247580 }, { "epoch": 70.27817201248935, "grad_norm": 0.11126186698675156, "learning_rate": 2.9750212886744255e-05, "loss": 0.00016263481229543685, "step": 247590 }, { "epoch": 70.28101050241271, "grad_norm": 0.0038209271151572466, "learning_rate": 2.9747374396820893e-05, "loss": 0.00013597272336483002, "step": 247600 }, { "epoch": 70.28384899233608, "grad_norm": 0.056136228144168854, "learning_rate": 2.974453590689753e-05, "loss": 0.015044780075550079, "step": 247610 }, { "epoch": 70.28668748225944, "grad_norm": 0.007731772027909756, "learning_rate": 2.9741697416974173e-05, "loss": 0.0006281113252043724, "step": 247620 }, { "epoch": 70.2895259721828, "grad_norm": 0.0028770396020263433, "learning_rate": 2.973885892705081e-05, "loss": 0.0071313433349132534, "step": 247630 }, { "epoch": 70.29236446210616, "grad_norm": 11.01982593536377, "learning_rate": 2.973602043712745e-05, "loss": 0.0032580096274614336, "step": 247640 }, { "epoch": 70.29520295202953, "grad_norm": 0.009551904164254665, "learning_rate": 2.9733181947204087e-05, "loss": 0.018115514516830446, "step": 247650 }, { "epoch": 70.29804144195288, "grad_norm": 0.3347812592983246, "learning_rate": 2.9730343457280728e-05, "loss": 0.00020674318075180053, "step": 247660 }, { "epoch": 70.30087993187624, "grad_norm": 0.0004282447916921228, "learning_rate": 2.9727504967357366e-05, "loss": 0.0002095278352499008, "step": 247670 }, { "epoch": 70.3037184217996, "grad_norm": 0.008860394358634949, "learning_rate": 2.9724666477434004e-05, "loss": 9.360052645206452e-05, "step": 247680 }, { "epoch": 70.30655691172296, "grad_norm": 0.0014247104991227388, "learning_rate": 2.972182798751065e-05, "loss": 0.005504970997571945, "step": 247690 }, { "epoch": 70.30939540164633, "grad_norm": 0.00788260530680418, "learning_rate": 2.9718989497587287e-05, "loss": 0.0010011330246925353, "step": 247700 }, { "epoch": 70.31223389156969, "grad_norm": 0.052840229123830795, "learning_rate": 2.971615100766392e-05, "loss": 0.003195866197347641, "step": 247710 }, { "epoch": 70.31507238149304, "grad_norm": 0.06409326195716858, "learning_rate": 2.9713312517740566e-05, "loss": 0.0011240966618061067, "step": 247720 }, { "epoch": 70.3179108714164, "grad_norm": 7.29354190826416, "learning_rate": 2.9710474027817204e-05, "loss": 0.0023133404552936553, "step": 247730 }, { "epoch": 70.32074936133976, "grad_norm": 0.02966366335749626, "learning_rate": 2.9707635537893842e-05, "loss": 0.00011050589382648468, "step": 247740 }, { "epoch": 70.32358785126313, "grad_norm": 1.53435218334198, "learning_rate": 2.970479704797048e-05, "loss": 0.00498831570148468, "step": 247750 }, { "epoch": 70.32642634118649, "grad_norm": Infinity, "learning_rate": 2.970195855804712e-05, "loss": 0.00483967661857605, "step": 247760 }, { "epoch": 70.32926483110985, "grad_norm": 0.09955822676420212, "learning_rate": 2.9699403917116098e-05, "loss": 0.00028530731797218325, "step": 247770 }, { "epoch": 70.33210332103322, "grad_norm": 0.0014010388404130936, "learning_rate": 2.9696565427192736e-05, "loss": 0.0018632398918271066, "step": 247780 }, { "epoch": 70.33494181095656, "grad_norm": 1.7129210233688354, "learning_rate": 2.9693726937269374e-05, "loss": 0.00032455939799547195, "step": 247790 }, { "epoch": 70.33778030087993, "grad_norm": 2.915712833404541, "learning_rate": 2.9690888447346015e-05, "loss": 0.0016013681888580322, "step": 247800 }, { "epoch": 70.34061879080329, "grad_norm": 6.388339519500732, "learning_rate": 2.9688049957422653e-05, "loss": 0.0017456362023949624, "step": 247810 }, { "epoch": 70.34345728072665, "grad_norm": 0.01270593237131834, "learning_rate": 2.968521146749929e-05, "loss": 0.0009627291932702064, "step": 247820 }, { "epoch": 70.34629577065002, "grad_norm": 0.0017828099662438035, "learning_rate": 2.968237297757593e-05, "loss": 0.00012117233127355576, "step": 247830 }, { "epoch": 70.34913426057338, "grad_norm": 0.005140464752912521, "learning_rate": 2.967953448765257e-05, "loss": 0.0017176533117890358, "step": 247840 }, { "epoch": 70.35197275049674, "grad_norm": 0.1132071316242218, "learning_rate": 2.967669599772921e-05, "loss": 0.0038474150002002715, "step": 247850 }, { "epoch": 70.35481124042009, "grad_norm": 0.024389030411839485, "learning_rate": 2.9673857507805847e-05, "loss": 0.00029383916407823565, "step": 247860 }, { "epoch": 70.35764973034345, "grad_norm": 0.006606861483305693, "learning_rate": 2.967101901788249e-05, "loss": 0.00010400880128145218, "step": 247870 }, { "epoch": 70.36048822026682, "grad_norm": 0.02242317423224449, "learning_rate": 2.9668180527959126e-05, "loss": 0.0007123537361621857, "step": 247880 }, { "epoch": 70.36332671019018, "grad_norm": 0.053093764930963516, "learning_rate": 2.9665342038035764e-05, "loss": 6.694700568914414e-05, "step": 247890 }, { "epoch": 70.36616520011354, "grad_norm": 0.7335031032562256, "learning_rate": 2.966250354811241e-05, "loss": 0.0005222255364060402, "step": 247900 }, { "epoch": 70.3690036900369, "grad_norm": 0.02653224766254425, "learning_rate": 2.9659665058189047e-05, "loss": 0.00016768332570791245, "step": 247910 }, { "epoch": 70.37184217996027, "grad_norm": 0.1721029281616211, "learning_rate": 2.9656826568265685e-05, "loss": 0.00016489308327436448, "step": 247920 }, { "epoch": 70.37468066988362, "grad_norm": 0.01700435020029545, "learning_rate": 2.965398807834232e-05, "loss": 0.0001050194725394249, "step": 247930 }, { "epoch": 70.37751915980698, "grad_norm": 0.010430275462567806, "learning_rate": 2.9651149588418964e-05, "loss": 6.488654762506484e-05, "step": 247940 }, { "epoch": 70.38035764973034, "grad_norm": 0.12417735904455185, "learning_rate": 2.9648311098495602e-05, "loss": 0.00011369530111551284, "step": 247950 }, { "epoch": 70.3831961396537, "grad_norm": 0.02008724771440029, "learning_rate": 2.964547260857224e-05, "loss": 0.00012165755033493042, "step": 247960 }, { "epoch": 70.38603462957707, "grad_norm": 0.08417502790689468, "learning_rate": 2.964263411864888e-05, "loss": 0.00013938266783952713, "step": 247970 }, { "epoch": 70.38887311950043, "grad_norm": 0.013404149562120438, "learning_rate": 2.963979562872552e-05, "loss": 0.00028750337660312655, "step": 247980 }, { "epoch": 70.39171160942378, "grad_norm": 0.01914070174098015, "learning_rate": 2.9636957138802157e-05, "loss": 7.038097828626633e-05, "step": 247990 }, { "epoch": 70.39455009934714, "grad_norm": 0.009700100868940353, "learning_rate": 2.96341186488788e-05, "loss": 5.9332884848117826e-05, "step": 248000 }, { "epoch": 70.39455009934714, "eval_accuracy": 0.9842309404209322, "eval_loss": 0.0674503892660141, "eval_runtime": 37.1302, "eval_samples_per_second": 423.564, "eval_steps_per_second": 6.625, "step": 248000 }, { "epoch": 70.3973885892705, "grad_norm": 0.05403287708759308, "learning_rate": 2.9631280158955437e-05, "loss": 8.414871990680695e-05, "step": 248010 }, { "epoch": 70.40022707919387, "grad_norm": 0.01173448283225298, "learning_rate": 2.9628441669032075e-05, "loss": 0.00015360824763774872, "step": 248020 }, { "epoch": 70.40306556911723, "grad_norm": 0.002713131718337536, "learning_rate": 2.9625603179108713e-05, "loss": 0.00013796593993902205, "step": 248030 }, { "epoch": 70.4059040590406, "grad_norm": 0.05308198556303978, "learning_rate": 2.9622764689185358e-05, "loss": 0.00010911822319030762, "step": 248040 }, { "epoch": 70.40874254896396, "grad_norm": 0.0149715356528759, "learning_rate": 2.9619926199261992e-05, "loss": 1.910421997308731e-05, "step": 248050 }, { "epoch": 70.4115810388873, "grad_norm": 0.01977718248963356, "learning_rate": 2.961708770933863e-05, "loss": 6.508957594633102e-05, "step": 248060 }, { "epoch": 70.41441952881067, "grad_norm": 0.008311969228088856, "learning_rate": 2.9614249219415275e-05, "loss": 7.038004696369171e-05, "step": 248070 }, { "epoch": 70.41725801873403, "grad_norm": 0.00420642364770174, "learning_rate": 2.9611410729491913e-05, "loss": 0.00010792091488838196, "step": 248080 }, { "epoch": 70.4200965086574, "grad_norm": 0.06895717978477478, "learning_rate": 2.960857223956855e-05, "loss": 0.0058065265417099, "step": 248090 }, { "epoch": 70.42293499858076, "grad_norm": 0.0018213180592283607, "learning_rate": 2.9605733749645192e-05, "loss": 5.4712779819965364e-05, "step": 248100 }, { "epoch": 70.42577348850412, "grad_norm": 0.006375469267368317, "learning_rate": 2.960289525972183e-05, "loss": 3.978200256824493e-05, "step": 248110 }, { "epoch": 70.42861197842748, "grad_norm": 14.853215217590332, "learning_rate": 2.9600056769798468e-05, "loss": 0.0048948023468256, "step": 248120 }, { "epoch": 70.43145046835083, "grad_norm": 2.9951012134552, "learning_rate": 2.9597218279875106e-05, "loss": 0.0006230246275663376, "step": 248130 }, { "epoch": 70.4342889582742, "grad_norm": 0.0031985328532755375, "learning_rate": 2.9594379789951748e-05, "loss": 0.00010928511619567871, "step": 248140 }, { "epoch": 70.43712744819756, "grad_norm": 0.012846257537603378, "learning_rate": 2.9591541300028386e-05, "loss": 0.0008499963209033012, "step": 248150 }, { "epoch": 70.43996593812092, "grad_norm": 0.0022724061273038387, "learning_rate": 2.9588702810105024e-05, "loss": 4.6351924538612366e-05, "step": 248160 }, { "epoch": 70.44280442804428, "grad_norm": 0.015153252519667149, "learning_rate": 2.9585864320181665e-05, "loss": 0.00026236642152071, "step": 248170 }, { "epoch": 70.44564291796765, "grad_norm": 0.005568309687077999, "learning_rate": 2.9583025830258303e-05, "loss": 4.3087638914585114e-05, "step": 248180 }, { "epoch": 70.448481407891, "grad_norm": 0.05310269445180893, "learning_rate": 2.958018734033494e-05, "loss": 6.709489971399308e-05, "step": 248190 }, { "epoch": 70.45131989781436, "grad_norm": 0.011771791614592075, "learning_rate": 2.9577348850411586e-05, "loss": 0.00014161858707666396, "step": 248200 }, { "epoch": 70.45415838773772, "grad_norm": 0.017022807151079178, "learning_rate": 2.9574510360488224e-05, "loss": 0.0001315910369157791, "step": 248210 }, { "epoch": 70.45699687766108, "grad_norm": 0.011292306706309319, "learning_rate": 2.9571671870564858e-05, "loss": 6.989799439907073e-05, "step": 248220 }, { "epoch": 70.45983536758445, "grad_norm": 0.011968112550675869, "learning_rate": 2.9568833380641496e-05, "loss": 8.218567818403244e-05, "step": 248230 }, { "epoch": 70.46267385750781, "grad_norm": 0.0071738846600055695, "learning_rate": 2.956599489071814e-05, "loss": 0.00010550059378147125, "step": 248240 }, { "epoch": 70.46551234743117, "grad_norm": 0.02277645468711853, "learning_rate": 2.956315640079478e-05, "loss": 0.0002102261409163475, "step": 248250 }, { "epoch": 70.46835083735452, "grad_norm": 0.0016883619828149676, "learning_rate": 2.9560317910871417e-05, "loss": 6.398055702447892e-05, "step": 248260 }, { "epoch": 70.47118932727788, "grad_norm": 0.003471383359283209, "learning_rate": 2.955747942094806e-05, "loss": 8.677169680595397e-05, "step": 248270 }, { "epoch": 70.47402781720125, "grad_norm": 1.8116209506988525, "learning_rate": 2.9554640931024696e-05, "loss": 0.0003227733075618744, "step": 248280 }, { "epoch": 70.47686630712461, "grad_norm": 0.49076324701309204, "learning_rate": 2.9551802441101334e-05, "loss": 0.0002725858241319656, "step": 248290 }, { "epoch": 70.47970479704797, "grad_norm": 2.3788421154022217, "learning_rate": 2.9548963951177976e-05, "loss": 0.0005228227004408836, "step": 248300 }, { "epoch": 70.48254328697134, "grad_norm": 0.4386679232120514, "learning_rate": 2.9546125461254614e-05, "loss": 0.00016097985208034514, "step": 248310 }, { "epoch": 70.4853817768947, "grad_norm": 0.05320712924003601, "learning_rate": 2.954328697133125e-05, "loss": 0.0009699681773781777, "step": 248320 }, { "epoch": 70.48822026681805, "grad_norm": 0.007203076966106892, "learning_rate": 2.954044848140789e-05, "loss": 0.00011854134500026703, "step": 248330 }, { "epoch": 70.49105875674141, "grad_norm": 0.0395171120762825, "learning_rate": 2.9537609991484534e-05, "loss": 0.0001520257443189621, "step": 248340 }, { "epoch": 70.49389724666477, "grad_norm": 0.0012188699329271913, "learning_rate": 2.953477150156117e-05, "loss": 0.00024972185492515563, "step": 248350 }, { "epoch": 70.49673573658814, "grad_norm": 0.10959286242723465, "learning_rate": 2.9531933011637807e-05, "loss": 0.0002721814438700676, "step": 248360 }, { "epoch": 70.4995742265115, "grad_norm": 0.006143668666481972, "learning_rate": 2.9529094521714452e-05, "loss": 0.0006985535845160484, "step": 248370 }, { "epoch": 70.50241271643486, "grad_norm": 0.3664526045322418, "learning_rate": 2.952625603179109e-05, "loss": 0.0013691164553165437, "step": 248380 }, { "epoch": 70.50525120635822, "grad_norm": 0.004833901301026344, "learning_rate": 2.9523417541867728e-05, "loss": 0.00020143669098615646, "step": 248390 }, { "epoch": 70.50808969628157, "grad_norm": 3.658005714416504, "learning_rate": 2.952057905194437e-05, "loss": 0.002720526047050953, "step": 248400 }, { "epoch": 70.51092818620494, "grad_norm": 0.0207795649766922, "learning_rate": 2.9517740562021007e-05, "loss": 0.00021497737616300584, "step": 248410 }, { "epoch": 70.5137666761283, "grad_norm": 1.840864658355713, "learning_rate": 2.9514902072097645e-05, "loss": 0.0027487598359584807, "step": 248420 }, { "epoch": 70.51660516605166, "grad_norm": 0.017770886421203613, "learning_rate": 2.9512063582174286e-05, "loss": 0.0001908091828227043, "step": 248430 }, { "epoch": 70.51944365597502, "grad_norm": 0.0070456149987876415, "learning_rate": 2.9509225092250924e-05, "loss": 0.00021708086133003234, "step": 248440 }, { "epoch": 70.52228214589839, "grad_norm": 0.004152680281549692, "learning_rate": 2.9506386602327562e-05, "loss": 9.521804749965668e-05, "step": 248450 }, { "epoch": 70.52512063582174, "grad_norm": 0.006864550057798624, "learning_rate": 2.95035481124042e-05, "loss": 4.722233861684799e-05, "step": 248460 }, { "epoch": 70.5279591257451, "grad_norm": 0.024161966517567635, "learning_rate": 2.9500709622480842e-05, "loss": 0.003810770809650421, "step": 248470 }, { "epoch": 70.53079761566846, "grad_norm": 0.015367534011602402, "learning_rate": 2.949787113255748e-05, "loss": 7.966533303260803e-05, "step": 248480 }, { "epoch": 70.53363610559182, "grad_norm": 0.00493839243426919, "learning_rate": 2.9495032642634118e-05, "loss": 0.00015094727277755737, "step": 248490 }, { "epoch": 70.53647459551519, "grad_norm": 0.007263580337166786, "learning_rate": 2.9492194152710763e-05, "loss": 0.0005944306030869484, "step": 248500 }, { "epoch": 70.53647459551519, "eval_accuracy": 0.9850575443504801, "eval_loss": 0.06136137992143631, "eval_runtime": 53.5213, "eval_samples_per_second": 293.846, "eval_steps_per_second": 4.596, "step": 248500 }, { "epoch": 70.53931308543855, "grad_norm": 0.002969159511849284, "learning_rate": 2.94893556627874e-05, "loss": 0.00013584494590759277, "step": 248510 }, { "epoch": 70.54215157536191, "grad_norm": 0.04288516938686371, "learning_rate": 2.9486517172864035e-05, "loss": 6.497353315353394e-05, "step": 248520 }, { "epoch": 70.54499006528526, "grad_norm": 0.04218044877052307, "learning_rate": 2.948367868294068e-05, "loss": 6.85686245560646e-05, "step": 248530 }, { "epoch": 70.54782855520862, "grad_norm": 0.014036130160093307, "learning_rate": 2.9480840193017318e-05, "loss": 0.0011008020490407943, "step": 248540 }, { "epoch": 70.55066704513199, "grad_norm": 0.02440466172993183, "learning_rate": 2.9478001703093956e-05, "loss": 0.00037846043705940245, "step": 248550 }, { "epoch": 70.55350553505535, "grad_norm": 0.013930566608905792, "learning_rate": 2.9475163213170594e-05, "loss": 0.0002534789964556694, "step": 248560 }, { "epoch": 70.55634402497871, "grad_norm": 0.2289898693561554, "learning_rate": 2.9472324723247235e-05, "loss": 0.001074550114572048, "step": 248570 }, { "epoch": 70.55918251490208, "grad_norm": 0.047655731439590454, "learning_rate": 2.9469486233323873e-05, "loss": 0.00012052766978740693, "step": 248580 }, { "epoch": 70.56202100482544, "grad_norm": 0.04551009461283684, "learning_rate": 2.946664774340051e-05, "loss": 0.00029621105641126634, "step": 248590 }, { "epoch": 70.56485949474879, "grad_norm": 0.006886932998895645, "learning_rate": 2.9463809253477153e-05, "loss": 0.00023989230394363403, "step": 248600 }, { "epoch": 70.56769798467215, "grad_norm": 0.03877277299761772, "learning_rate": 2.946097076355379e-05, "loss": 6.0862675309181216e-05, "step": 248610 }, { "epoch": 70.57053647459551, "grad_norm": 0.01961331255733967, "learning_rate": 2.945813227363043e-05, "loss": 0.00013696588575839996, "step": 248620 }, { "epoch": 70.57337496451888, "grad_norm": 0.022866331040859222, "learning_rate": 2.9455293783707073e-05, "loss": 0.0002532023936510086, "step": 248630 }, { "epoch": 70.57621345444224, "grad_norm": 0.0013553155586123466, "learning_rate": 2.9452455293783708e-05, "loss": 0.00014808401465415955, "step": 248640 }, { "epoch": 70.5790519443656, "grad_norm": 0.17299632728099823, "learning_rate": 2.9449616803860346e-05, "loss": 9.495634585618973e-05, "step": 248650 }, { "epoch": 70.58189043428897, "grad_norm": 1.2133585214614868, "learning_rate": 2.9446778313936984e-05, "loss": 0.000551382452249527, "step": 248660 }, { "epoch": 70.58472892421231, "grad_norm": 0.4170458912849426, "learning_rate": 2.944393982401363e-05, "loss": 0.00013039857149124145, "step": 248670 }, { "epoch": 70.58756741413568, "grad_norm": 0.013287095353007317, "learning_rate": 2.9441101334090267e-05, "loss": 0.0013622796162962914, "step": 248680 }, { "epoch": 70.59040590405904, "grad_norm": 7.8899312019348145, "learning_rate": 2.94382628441669e-05, "loss": 0.0012204613536596297, "step": 248690 }, { "epoch": 70.5932443939824, "grad_norm": 0.10453259199857712, "learning_rate": 2.9435424354243546e-05, "loss": 0.00013272780925035476, "step": 248700 }, { "epoch": 70.59608288390577, "grad_norm": 0.0026942130643874407, "learning_rate": 2.9432585864320184e-05, "loss": 0.00028483103960752487, "step": 248710 }, { "epoch": 70.59892137382913, "grad_norm": 0.045762501657009125, "learning_rate": 2.9429747374396822e-05, "loss": 0.00023305881768465042, "step": 248720 }, { "epoch": 70.60175986375248, "grad_norm": 0.0865032896399498, "learning_rate": 2.9426908884473463e-05, "loss": 5.094371736049652e-05, "step": 248730 }, { "epoch": 70.60459835367584, "grad_norm": 0.0018523390172049403, "learning_rate": 2.94240703945501e-05, "loss": 0.0005166798830032349, "step": 248740 }, { "epoch": 70.6074368435992, "grad_norm": 0.01996910199522972, "learning_rate": 2.942123190462674e-05, "loss": 0.0001348629593849182, "step": 248750 }, { "epoch": 70.61027533352257, "grad_norm": 0.00879773311316967, "learning_rate": 2.9418393414703377e-05, "loss": 0.00010746121406555176, "step": 248760 }, { "epoch": 70.61311382344593, "grad_norm": 0.025566570460796356, "learning_rate": 2.941555492478002e-05, "loss": 9.403303265571594e-05, "step": 248770 }, { "epoch": 70.61595231336929, "grad_norm": 0.012971930205821991, "learning_rate": 2.9412716434856657e-05, "loss": 6.353631615638733e-05, "step": 248780 }, { "epoch": 70.61879080329265, "grad_norm": 0.007432314567267895, "learning_rate": 2.9409877944933295e-05, "loss": 0.0001051759347319603, "step": 248790 }, { "epoch": 70.621629293216, "grad_norm": 0.001992718316614628, "learning_rate": 2.940703945500994e-05, "loss": 9.671375155448914e-05, "step": 248800 }, { "epoch": 70.62446778313937, "grad_norm": 0.014198211953043938, "learning_rate": 2.9404200965086577e-05, "loss": 0.00022434759885072708, "step": 248810 }, { "epoch": 70.62730627306273, "grad_norm": 0.007328366860747337, "learning_rate": 2.9401362475163212e-05, "loss": 5.925595760345459e-05, "step": 248820 }, { "epoch": 70.63014476298609, "grad_norm": 0.008527814410626888, "learning_rate": 2.9398523985239857e-05, "loss": 5.133580416440964e-05, "step": 248830 }, { "epoch": 70.63298325290945, "grad_norm": 0.07831089198589325, "learning_rate": 2.9395685495316495e-05, "loss": 0.00045852363109588623, "step": 248840 }, { "epoch": 70.63582174283282, "grad_norm": 0.28076836466789246, "learning_rate": 2.9392847005393133e-05, "loss": 0.002731834724545479, "step": 248850 }, { "epoch": 70.63866023275618, "grad_norm": 0.029049932956695557, "learning_rate": 2.939000851546977e-05, "loss": 0.000259871780872345, "step": 248860 }, { "epoch": 70.64149872267953, "grad_norm": 0.01922100968658924, "learning_rate": 2.9387170025546412e-05, "loss": 0.0002800034359097481, "step": 248870 }, { "epoch": 70.64433721260289, "grad_norm": 0.04689742997288704, "learning_rate": 2.938433153562305e-05, "loss": 6.975587457418442e-05, "step": 248880 }, { "epoch": 70.64717570252625, "grad_norm": 0.004275089129805565, "learning_rate": 2.9381493045699688e-05, "loss": 5.095638334751129e-05, "step": 248890 }, { "epoch": 70.65001419244962, "grad_norm": 0.02949371747672558, "learning_rate": 2.937865455577633e-05, "loss": 5.843713879585266e-05, "step": 248900 }, { "epoch": 70.65285268237298, "grad_norm": 0.010013998486101627, "learning_rate": 2.9375816065852967e-05, "loss": 5.395747721195221e-05, "step": 248910 }, { "epoch": 70.65569117229634, "grad_norm": 0.004542596638202667, "learning_rate": 2.9372977575929605e-05, "loss": 0.0001734931021928787, "step": 248920 }, { "epoch": 70.65852966221969, "grad_norm": 0.012113838456571102, "learning_rate": 2.937013908600625e-05, "loss": 0.0001715186983346939, "step": 248930 }, { "epoch": 70.66136815214305, "grad_norm": 0.00098871486261487, "learning_rate": 2.9367300596082885e-05, "loss": 2.644956111907959e-05, "step": 248940 }, { "epoch": 70.66420664206642, "grad_norm": 0.0038321816828101873, "learning_rate": 2.9364462106159523e-05, "loss": 0.0001948310062289238, "step": 248950 }, { "epoch": 70.66704513198978, "grad_norm": 0.12593723833560944, "learning_rate": 2.936162361623616e-05, "loss": 9.07478854060173e-05, "step": 248960 }, { "epoch": 70.66988362191314, "grad_norm": 0.016682827845215797, "learning_rate": 2.9358785126312806e-05, "loss": 0.003266008198261261, "step": 248970 }, { "epoch": 70.6727221118365, "grad_norm": 0.007254281081259251, "learning_rate": 2.9355946636389444e-05, "loss": 0.00011160038411617279, "step": 248980 }, { "epoch": 70.67556060175987, "grad_norm": 0.9457295536994934, "learning_rate": 2.9353108146466078e-05, "loss": 0.00019010305404663086, "step": 248990 }, { "epoch": 70.67839909168322, "grad_norm": 0.17319971323013306, "learning_rate": 2.9350269656542723e-05, "loss": 0.00010744519531726837, "step": 249000 }, { "epoch": 70.67839909168322, "eval_accuracy": 0.9779996184904941, "eval_loss": 0.09350395202636719, "eval_runtime": 42.6377, "eval_samples_per_second": 368.852, "eval_steps_per_second": 5.77, "step": 249000 }, { "epoch": 70.68123758160658, "grad_norm": 0.00779690220952034, "learning_rate": 2.934743116661936e-05, "loss": 0.0005245797336101532, "step": 249010 }, { "epoch": 70.68407607152994, "grad_norm": 0.003982786554843187, "learning_rate": 2.9344592676696e-05, "loss": 0.0003173528239130974, "step": 249020 }, { "epoch": 70.6869145614533, "grad_norm": 0.11205805838108063, "learning_rate": 2.934175418677264e-05, "loss": 8.328091353178024e-05, "step": 249030 }, { "epoch": 70.68975305137667, "grad_norm": 0.009028894826769829, "learning_rate": 2.9338915696849278e-05, "loss": 0.00027976054698228834, "step": 249040 }, { "epoch": 70.69259154130003, "grad_norm": 0.07736699283123016, "learning_rate": 2.9336077206925916e-05, "loss": 0.000348428450524807, "step": 249050 }, { "epoch": 70.6954300312234, "grad_norm": 0.16609637439250946, "learning_rate": 2.9333238717002554e-05, "loss": 0.00033530276268720625, "step": 249060 }, { "epoch": 70.69826852114674, "grad_norm": 0.005842849612236023, "learning_rate": 2.9330400227079196e-05, "loss": 8.766278624534607e-05, "step": 249070 }, { "epoch": 70.7011070110701, "grad_norm": 8.428167343139648, "learning_rate": 2.9327561737155834e-05, "loss": 0.005165395140647888, "step": 249080 }, { "epoch": 70.70394550099347, "grad_norm": 0.014373153448104858, "learning_rate": 2.932472324723247e-05, "loss": 5.350932478904724e-05, "step": 249090 }, { "epoch": 70.70678399091683, "grad_norm": 0.002873152494430542, "learning_rate": 2.9321884757309116e-05, "loss": 0.0008810846135020256, "step": 249100 }, { "epoch": 70.7096224808402, "grad_norm": 0.0019046904053539038, "learning_rate": 2.931904626738575e-05, "loss": 0.0002635207027196884, "step": 249110 }, { "epoch": 70.71246097076356, "grad_norm": 1.5257927179336548, "learning_rate": 2.931620777746239e-05, "loss": 0.0002728061750531197, "step": 249120 }, { "epoch": 70.71529946068692, "grad_norm": 0.0032994700595736504, "learning_rate": 2.9313369287539034e-05, "loss": 0.0002134270966053009, "step": 249130 }, { "epoch": 70.71813795061027, "grad_norm": 0.009994682855904102, "learning_rate": 2.931053079761567e-05, "loss": 0.00018127858638763427, "step": 249140 }, { "epoch": 70.72097644053363, "grad_norm": 0.005927131045609713, "learning_rate": 2.930769230769231e-05, "loss": 0.00012995153665542602, "step": 249150 }, { "epoch": 70.723814930457, "grad_norm": 0.7952917218208313, "learning_rate": 2.9304853817768944e-05, "loss": 0.00018496662378311157, "step": 249160 }, { "epoch": 70.72665342038036, "grad_norm": 0.0021245318930596113, "learning_rate": 2.930201532784559e-05, "loss": 3.302209079265594e-05, "step": 249170 }, { "epoch": 70.72949191030372, "grad_norm": 0.005547685548663139, "learning_rate": 2.9299176837922227e-05, "loss": 4.126764833927155e-05, "step": 249180 }, { "epoch": 70.73233040022708, "grad_norm": 0.002886164467781782, "learning_rate": 2.9296338347998865e-05, "loss": 8.745696395635605e-05, "step": 249190 }, { "epoch": 70.73516889015043, "grad_norm": 0.009388790465891361, "learning_rate": 2.9293499858075506e-05, "loss": 0.00012970399111509324, "step": 249200 }, { "epoch": 70.7380073800738, "grad_norm": 0.00538341049104929, "learning_rate": 2.9290661368152144e-05, "loss": 0.00010363850742578506, "step": 249210 }, { "epoch": 70.74084586999716, "grad_norm": 0.0006684940890409052, "learning_rate": 2.9287822878228782e-05, "loss": 0.0002778751775622368, "step": 249220 }, { "epoch": 70.74368435992052, "grad_norm": 0.014782200567424297, "learning_rate": 2.9284984388305424e-05, "loss": 0.0001246882602572441, "step": 249230 }, { "epoch": 70.74652284984388, "grad_norm": 0.009344164282083511, "learning_rate": 2.928214589838206e-05, "loss": 8.310619741678238e-05, "step": 249240 }, { "epoch": 70.74936133976725, "grad_norm": 0.005564493592828512, "learning_rate": 2.92793074084587e-05, "loss": 0.00038788877427577975, "step": 249250 }, { "epoch": 70.75219982969061, "grad_norm": 0.0034015618730336428, "learning_rate": 2.9276468918535338e-05, "loss": 0.0025524716824293137, "step": 249260 }, { "epoch": 70.75503831961396, "grad_norm": 0.024031639099121094, "learning_rate": 2.9273630428611982e-05, "loss": 0.0008377058431506157, "step": 249270 }, { "epoch": 70.75787680953732, "grad_norm": 0.028547152876853943, "learning_rate": 2.927079193868862e-05, "loss": 0.0037551030516624452, "step": 249280 }, { "epoch": 70.76071529946068, "grad_norm": 0.001017376547679305, "learning_rate": 2.9267953448765255e-05, "loss": 5.036778748035431e-05, "step": 249290 }, { "epoch": 70.76355378938405, "grad_norm": 0.013321594335138798, "learning_rate": 2.92651149588419e-05, "loss": 0.00015711709856987, "step": 249300 }, { "epoch": 70.76639227930741, "grad_norm": 0.005958344787359238, "learning_rate": 2.9262276468918538e-05, "loss": 3.8401409983634946e-05, "step": 249310 }, { "epoch": 70.76923076923077, "grad_norm": 0.002276309998705983, "learning_rate": 2.9259437978995176e-05, "loss": 6.508789956569672e-05, "step": 249320 }, { "epoch": 70.77206925915414, "grad_norm": 0.03202289342880249, "learning_rate": 2.9256599489071817e-05, "loss": 4.345923662185669e-05, "step": 249330 }, { "epoch": 70.77490774907749, "grad_norm": 0.0018468413036316633, "learning_rate": 2.9253760999148455e-05, "loss": 5.851574242115021e-05, "step": 249340 }, { "epoch": 70.77774623900085, "grad_norm": 0.01988677866756916, "learning_rate": 2.9250922509225093e-05, "loss": 4.7231465578079225e-05, "step": 249350 }, { "epoch": 70.78058472892421, "grad_norm": 0.0029962260741740465, "learning_rate": 2.924808401930173e-05, "loss": 4.227496683597565e-05, "step": 249360 }, { "epoch": 70.78342321884757, "grad_norm": 0.008802843280136585, "learning_rate": 2.9245245529378372e-05, "loss": 2.591777592897415e-05, "step": 249370 }, { "epoch": 70.78626170877094, "grad_norm": 0.010266626253724098, "learning_rate": 2.924240703945501e-05, "loss": 2.4041347205638886e-05, "step": 249380 }, { "epoch": 70.7891001986943, "grad_norm": 0.001931802136823535, "learning_rate": 2.923956854953165e-05, "loss": 0.00015222150832414628, "step": 249390 }, { "epoch": 70.79193868861765, "grad_norm": 0.03602933511137962, "learning_rate": 2.9236730059608293e-05, "loss": 4.078075289726257e-05, "step": 249400 }, { "epoch": 70.79477717854101, "grad_norm": 0.0049340077675879, "learning_rate": 2.9233891569684928e-05, "loss": 0.00012414902448654175, "step": 249410 }, { "epoch": 70.79761566846437, "grad_norm": 0.10574299097061157, "learning_rate": 2.9231053079761566e-05, "loss": 9.03623178601265e-05, "step": 249420 }, { "epoch": 70.80045415838774, "grad_norm": 0.019764967262744904, "learning_rate": 2.922821458983821e-05, "loss": 0.00036213193088769914, "step": 249430 }, { "epoch": 70.8032926483111, "grad_norm": 0.32796624302864075, "learning_rate": 2.922537609991485e-05, "loss": 0.005483055114746093, "step": 249440 }, { "epoch": 70.80613113823446, "grad_norm": 0.00441837077960372, "learning_rate": 2.9222537609991486e-05, "loss": 4.699751734733581e-05, "step": 249450 }, { "epoch": 70.80896962815783, "grad_norm": 0.02097305655479431, "learning_rate": 2.921969912006812e-05, "loss": 0.0018309606239199638, "step": 249460 }, { "epoch": 70.81180811808117, "grad_norm": 0.022212419658899307, "learning_rate": 2.9216860630144766e-05, "loss": 0.0002735583111643791, "step": 249470 }, { "epoch": 70.81464660800454, "grad_norm": 0.023913629353046417, "learning_rate": 2.9214022140221404e-05, "loss": 5.982853472232819e-05, "step": 249480 }, { "epoch": 70.8174850979279, "grad_norm": 0.01113983802497387, "learning_rate": 2.9211183650298042e-05, "loss": 0.007601199299097061, "step": 249490 }, { "epoch": 70.82032358785126, "grad_norm": 0.003193930722773075, "learning_rate": 2.9208345160374683e-05, "loss": 0.0014154966920614243, "step": 249500 }, { "epoch": 70.82032358785126, "eval_accuracy": 0.9847396197621924, "eval_loss": 0.060005951672792435, "eval_runtime": 42.7781, "eval_samples_per_second": 367.642, "eval_steps_per_second": 5.751, "step": 249500 }, { "epoch": 70.82316207777463, "grad_norm": 0.017029698938131332, "learning_rate": 2.920550667045132e-05, "loss": 0.00012044087052345275, "step": 249510 }, { "epoch": 70.82600056769799, "grad_norm": 0.0036561700981110334, "learning_rate": 2.920266818052796e-05, "loss": 0.00011339746415615082, "step": 249520 }, { "epoch": 70.82883905762135, "grad_norm": 0.004248818848282099, "learning_rate": 2.91998296906046e-05, "loss": 0.004117703437805176, "step": 249530 }, { "epoch": 70.8316775475447, "grad_norm": 0.005679034627974033, "learning_rate": 2.919699120068124e-05, "loss": 5.499646067619324e-05, "step": 249540 }, { "epoch": 70.83451603746806, "grad_norm": 0.2689702808856964, "learning_rate": 2.9194152710757877e-05, "loss": 0.00010921880602836609, "step": 249550 }, { "epoch": 70.83735452739143, "grad_norm": 0.04847085848450661, "learning_rate": 2.9191314220834514e-05, "loss": 3.819167613983154e-05, "step": 249560 }, { "epoch": 70.84019301731479, "grad_norm": 0.009779621846973896, "learning_rate": 2.918847573091116e-05, "loss": 6.05262815952301e-05, "step": 249570 }, { "epoch": 70.84303150723815, "grad_norm": 0.043444130569696426, "learning_rate": 2.9185637240987794e-05, "loss": 2.5402568280696868e-05, "step": 249580 }, { "epoch": 70.84586999716151, "grad_norm": 0.012666445225477219, "learning_rate": 2.9182798751064432e-05, "loss": 5.8770738542079924e-05, "step": 249590 }, { "epoch": 70.84870848708488, "grad_norm": 0.007961893454194069, "learning_rate": 2.9179960261141077e-05, "loss": 5.0409696996212006e-05, "step": 249600 }, { "epoch": 70.85154697700823, "grad_norm": 0.010418735444545746, "learning_rate": 2.9177121771217715e-05, "loss": 7.750168442726136e-05, "step": 249610 }, { "epoch": 70.85438546693159, "grad_norm": 0.1407838761806488, "learning_rate": 2.9174283281294353e-05, "loss": 0.00015841443091630937, "step": 249620 }, { "epoch": 70.85722395685495, "grad_norm": 1.1668297052383423, "learning_rate": 2.9171444791370994e-05, "loss": 0.00025217346847057344, "step": 249630 }, { "epoch": 70.86006244677831, "grad_norm": 0.017904963344335556, "learning_rate": 2.9168606301447632e-05, "loss": 6.179902702569961e-05, "step": 249640 }, { "epoch": 70.86290093670168, "grad_norm": 1.9005985260009766, "learning_rate": 2.916576781152427e-05, "loss": 0.0010441916063427925, "step": 249650 }, { "epoch": 70.86573942662504, "grad_norm": 0.006065373308956623, "learning_rate": 2.9162929321600908e-05, "loss": 2.1168217062950135e-05, "step": 249660 }, { "epoch": 70.86857791654839, "grad_norm": 0.0037500157486647367, "learning_rate": 2.916009083167755e-05, "loss": 0.0001807762309908867, "step": 249670 }, { "epoch": 70.87141640647175, "grad_norm": 0.06463050097227097, "learning_rate": 2.9157252341754187e-05, "loss": 9.730327874422074e-05, "step": 249680 }, { "epoch": 70.87425489639512, "grad_norm": 0.04922890290617943, "learning_rate": 2.9154413851830825e-05, "loss": 8.384846150875092e-05, "step": 249690 }, { "epoch": 70.87709338631848, "grad_norm": 0.013529431074857712, "learning_rate": 2.9151575361907467e-05, "loss": 0.0013541525229811668, "step": 249700 }, { "epoch": 70.87993187624184, "grad_norm": 0.08820676803588867, "learning_rate": 2.9148736871984105e-05, "loss": 0.0005833595991134643, "step": 249710 }, { "epoch": 70.8827703661652, "grad_norm": 0.17434541881084442, "learning_rate": 2.9145898382060743e-05, "loss": 0.00017890986055135727, "step": 249720 }, { "epoch": 70.88560885608857, "grad_norm": 0.28074729442596436, "learning_rate": 2.9143059892137387e-05, "loss": 0.0012887699529528617, "step": 249730 }, { "epoch": 70.88844734601192, "grad_norm": 0.007532993331551552, "learning_rate": 2.9140221402214025e-05, "loss": 7.10049644112587e-05, "step": 249740 }, { "epoch": 70.89128583593528, "grad_norm": 0.055172398686409, "learning_rate": 2.913738291229066e-05, "loss": 0.008674873411655426, "step": 249750 }, { "epoch": 70.89412432585864, "grad_norm": 0.002402934478595853, "learning_rate": 2.9134544422367305e-05, "loss": 8.252877742052079e-05, "step": 249760 }, { "epoch": 70.896962815782, "grad_norm": 0.036893606185913086, "learning_rate": 2.9131705932443943e-05, "loss": 0.0003785686567425728, "step": 249770 }, { "epoch": 70.89980130570537, "grad_norm": 0.03172261640429497, "learning_rate": 2.912886744252058e-05, "loss": 8.709151297807694e-05, "step": 249780 }, { "epoch": 70.90263979562873, "grad_norm": 0.03953960910439491, "learning_rate": 2.912602895259722e-05, "loss": 4.6459212899208066e-05, "step": 249790 }, { "epoch": 70.90547828555209, "grad_norm": 0.0023808523546904325, "learning_rate": 2.912319046267386e-05, "loss": 0.001982230320572853, "step": 249800 }, { "epoch": 70.90831677547544, "grad_norm": 0.004087985958904028, "learning_rate": 2.9120351972750498e-05, "loss": 0.0010667890310287476, "step": 249810 }, { "epoch": 70.9111552653988, "grad_norm": 0.0022527684923261404, "learning_rate": 2.9117513482827136e-05, "loss": 4.018247127532959e-05, "step": 249820 }, { "epoch": 70.91399375532217, "grad_norm": 0.009099393151700497, "learning_rate": 2.9114674992903777e-05, "loss": 5.7816319167613986e-05, "step": 249830 }, { "epoch": 70.91683224524553, "grad_norm": 0.009310293942689896, "learning_rate": 2.9111836502980415e-05, "loss": 0.0018581243231892586, "step": 249840 }, { "epoch": 70.91967073516889, "grad_norm": 0.007679580710828304, "learning_rate": 2.9109281862049392e-05, "loss": 0.008808840066194534, "step": 249850 }, { "epoch": 70.92250922509226, "grad_norm": 0.022108228877186775, "learning_rate": 2.910644337212603e-05, "loss": 0.0012321101501584053, "step": 249860 }, { "epoch": 70.92534771501562, "grad_norm": 0.019995497539639473, "learning_rate": 2.9103604882202668e-05, "loss": 0.0011793909594416617, "step": 249870 }, { "epoch": 70.92818620493897, "grad_norm": 0.0029033476021140814, "learning_rate": 2.910076639227931e-05, "loss": 0.0013557370752096177, "step": 249880 }, { "epoch": 70.93102469486233, "grad_norm": 0.08384434133768082, "learning_rate": 2.9097927902355947e-05, "loss": 0.0012028006836771964, "step": 249890 }, { "epoch": 70.9338631847857, "grad_norm": 0.00299613899551332, "learning_rate": 2.9095089412432585e-05, "loss": 0.004388758540153503, "step": 249900 }, { "epoch": 70.93670167470906, "grad_norm": 0.002354811877012253, "learning_rate": 2.909225092250923e-05, "loss": 0.00011127665638923644, "step": 249910 }, { "epoch": 70.93954016463242, "grad_norm": 0.022733226418495178, "learning_rate": 2.9089412432585868e-05, "loss": 0.00015730392187833785, "step": 249920 }, { "epoch": 70.94237865455578, "grad_norm": 0.004246091935783625, "learning_rate": 2.9086573942662503e-05, "loss": 0.00016429033130407333, "step": 249930 }, { "epoch": 70.94521714447913, "grad_norm": 0.005311186425387859, "learning_rate": 2.908373545273914e-05, "loss": 4.46520745754242e-05, "step": 249940 }, { "epoch": 70.9480556344025, "grad_norm": 0.12177932262420654, "learning_rate": 2.9080896962815785e-05, "loss": 0.000141189806163311, "step": 249950 }, { "epoch": 70.95089412432586, "grad_norm": 0.0064924065954983234, "learning_rate": 2.9078058472892423e-05, "loss": 4.6497955918312075e-05, "step": 249960 }, { "epoch": 70.95373261424922, "grad_norm": 0.00501705426722765, "learning_rate": 2.907521998296906e-05, "loss": 3.196615725755692e-05, "step": 249970 }, { "epoch": 70.95657110417258, "grad_norm": 0.05973195284605026, "learning_rate": 2.9072381493045703e-05, "loss": 0.007776347547769546, "step": 249980 }, { "epoch": 70.95940959409594, "grad_norm": 0.050943631678819656, "learning_rate": 2.906954300312234e-05, "loss": 0.00011157412081956864, "step": 249990 }, { "epoch": 70.96224808401931, "grad_norm": 0.003492925548925996, "learning_rate": 2.906670451319898e-05, "loss": 5.083493888378143e-05, "step": 250000 }, { "epoch": 70.96224808401931, "eval_accuracy": 0.9846124499268774, "eval_loss": 0.06644155085086823, "eval_runtime": 45.5354, "eval_samples_per_second": 345.38, "eval_steps_per_second": 5.402, "step": 250000 }, { "epoch": 70.96508657394266, "grad_norm": 0.006484625861048698, "learning_rate": 2.906386602327562e-05, "loss": 7.580183446407319e-05, "step": 250010 }, { "epoch": 70.96792506386602, "grad_norm": 0.009534400887787342, "learning_rate": 2.9061027533352258e-05, "loss": 7.701925933361054e-05, "step": 250020 }, { "epoch": 70.97076355378938, "grad_norm": 0.010171466507017612, "learning_rate": 2.9058189043428896e-05, "loss": 5.021467804908752e-05, "step": 250030 }, { "epoch": 70.97360204371275, "grad_norm": 0.0018805862637236714, "learning_rate": 2.9055350553505534e-05, "loss": 2.5855936110019682e-05, "step": 250040 }, { "epoch": 70.97644053363611, "grad_norm": 0.0010484032100066543, "learning_rate": 2.9052512063582175e-05, "loss": 0.00022617373615503312, "step": 250050 }, { "epoch": 70.97927902355947, "grad_norm": 0.0011949442559853196, "learning_rate": 2.9049673573658813e-05, "loss": 0.0005056731402873993, "step": 250060 }, { "epoch": 70.98211751348283, "grad_norm": 0.09579981863498688, "learning_rate": 2.904683508373545e-05, "loss": 9.599346667528152e-05, "step": 250070 }, { "epoch": 70.98495600340618, "grad_norm": 0.0064655994065105915, "learning_rate": 2.9043996593812096e-05, "loss": 0.0002158578485250473, "step": 250080 }, { "epoch": 70.98779449332955, "grad_norm": 0.004277331754565239, "learning_rate": 2.9041158103888734e-05, "loss": 0.0028336357325315477, "step": 250090 }, { "epoch": 70.99063298325291, "grad_norm": 0.1203240156173706, "learning_rate": 2.903831961396537e-05, "loss": 6.011631339788437e-05, "step": 250100 }, { "epoch": 70.99347147317627, "grad_norm": 0.010019155219197273, "learning_rate": 2.9035481124042013e-05, "loss": 7.561296224594116e-05, "step": 250110 }, { "epoch": 70.99630996309963, "grad_norm": 0.006395756267011166, "learning_rate": 2.903264263411865e-05, "loss": 9.774994105100631e-05, "step": 250120 }, { "epoch": 70.999148453023, "grad_norm": 0.022935746237635612, "learning_rate": 2.902980414419529e-05, "loss": 0.00032211914658546447, "step": 250130 }, { "epoch": 71.00198694294635, "grad_norm": 0.04527295008301735, "learning_rate": 2.902696565427193e-05, "loss": 4.4686772162094714e-05, "step": 250140 }, { "epoch": 71.00482543286971, "grad_norm": 0.013589567504823208, "learning_rate": 2.902412716434857e-05, "loss": 0.0025041690096259117, "step": 250150 }, { "epoch": 71.00766392279307, "grad_norm": 0.008171748369932175, "learning_rate": 2.9021288674425207e-05, "loss": 6.792433559894561e-05, "step": 250160 }, { "epoch": 71.01050241271643, "grad_norm": 0.015320559963583946, "learning_rate": 2.9018450184501845e-05, "loss": 2.282802015542984e-05, "step": 250170 }, { "epoch": 71.0133409026398, "grad_norm": 0.01986028254032135, "learning_rate": 2.9015611694578486e-05, "loss": 0.0003235477954149246, "step": 250180 }, { "epoch": 71.01617939256316, "grad_norm": 0.03320768475532532, "learning_rate": 2.9012773204655124e-05, "loss": 0.0002908544614911079, "step": 250190 }, { "epoch": 71.01901788248652, "grad_norm": 0.028234757483005524, "learning_rate": 2.9009934714731762e-05, "loss": 1.922585070133209e-05, "step": 250200 }, { "epoch": 71.02185637240987, "grad_norm": 0.029240427538752556, "learning_rate": 2.9007096224808407e-05, "loss": 0.00010243821889162063, "step": 250210 }, { "epoch": 71.02469486233323, "grad_norm": 0.02687322348356247, "learning_rate": 2.900425773488504e-05, "loss": 0.0063629157841205595, "step": 250220 }, { "epoch": 71.0275333522566, "grad_norm": 11.042926788330078, "learning_rate": 2.900141924496168e-05, "loss": 0.008380267769098282, "step": 250230 }, { "epoch": 71.03037184217996, "grad_norm": 0.050857141613960266, "learning_rate": 2.8998580755038324e-05, "loss": 0.0038609810173511503, "step": 250240 }, { "epoch": 71.03321033210332, "grad_norm": 0.1722131371498108, "learning_rate": 2.8995742265114962e-05, "loss": 0.0002124592661857605, "step": 250250 }, { "epoch": 71.03604882202669, "grad_norm": 0.8010825514793396, "learning_rate": 2.89929037751916e-05, "loss": 0.0001989796757698059, "step": 250260 }, { "epoch": 71.03888731195005, "grad_norm": 0.003300502896308899, "learning_rate": 2.8990065285268235e-05, "loss": 0.00018040165305137633, "step": 250270 }, { "epoch": 71.0417258018734, "grad_norm": 0.006200821604579687, "learning_rate": 2.898722679534488e-05, "loss": 5.849786102771759e-05, "step": 250280 }, { "epoch": 71.04456429179676, "grad_norm": 0.015942445024847984, "learning_rate": 2.8984388305421518e-05, "loss": 0.0006967628374695778, "step": 250290 }, { "epoch": 71.04740278172012, "grad_norm": 0.023299960419535637, "learning_rate": 2.8981549815498156e-05, "loss": 0.0015887634828686715, "step": 250300 }, { "epoch": 71.05024127164349, "grad_norm": 0.02004060335457325, "learning_rate": 2.8978711325574797e-05, "loss": 6.124954670667648e-05, "step": 250310 }, { "epoch": 71.05307976156685, "grad_norm": 0.007181152235716581, "learning_rate": 2.8975872835651435e-05, "loss": 0.0010174768045544623, "step": 250320 }, { "epoch": 71.05591825149021, "grad_norm": 0.2669789493083954, "learning_rate": 2.8973034345728073e-05, "loss": 9.610746055841446e-05, "step": 250330 }, { "epoch": 71.05875674141357, "grad_norm": 0.002317033475264907, "learning_rate": 2.8970195855804714e-05, "loss": 0.003397687524557114, "step": 250340 }, { "epoch": 71.06159523133692, "grad_norm": 0.0026721935719251633, "learning_rate": 2.8967357365881352e-05, "loss": 0.0030646676197648047, "step": 250350 }, { "epoch": 71.06443372126029, "grad_norm": 0.044830016791820526, "learning_rate": 2.896451887595799e-05, "loss": 0.0001461755484342575, "step": 250360 }, { "epoch": 71.06727221118365, "grad_norm": 0.00462550250813365, "learning_rate": 2.8961680386034628e-05, "loss": 0.0015160497277975082, "step": 250370 }, { "epoch": 71.07011070110701, "grad_norm": 0.013125233352184296, "learning_rate": 2.8958841896111273e-05, "loss": 0.00031311847269535067, "step": 250380 }, { "epoch": 71.07294919103038, "grad_norm": 0.23752771317958832, "learning_rate": 2.895600340618791e-05, "loss": 0.00018746480345726012, "step": 250390 }, { "epoch": 71.07578768095374, "grad_norm": 0.07141891121864319, "learning_rate": 2.8953164916264546e-05, "loss": 0.00021403487771749498, "step": 250400 }, { "epoch": 71.07862617087709, "grad_norm": 0.015567664057016373, "learning_rate": 2.895032642634119e-05, "loss": 0.003631604090332985, "step": 250410 }, { "epoch": 71.08146466080045, "grad_norm": 0.06843525171279907, "learning_rate": 2.894748793641783e-05, "loss": 5.950033664703369e-05, "step": 250420 }, { "epoch": 71.08430315072381, "grad_norm": 0.0030827228911221027, "learning_rate": 2.8944649446494466e-05, "loss": 0.00011701285839080811, "step": 250430 }, { "epoch": 71.08714164064718, "grad_norm": 0.004277306608855724, "learning_rate": 2.8941810956571108e-05, "loss": 0.0002619970589876175, "step": 250440 }, { "epoch": 71.08998013057054, "grad_norm": 0.012443536892533302, "learning_rate": 2.8938972466647746e-05, "loss": 5.79124316573143e-05, "step": 250450 }, { "epoch": 71.0928186204939, "grad_norm": 0.007875747978687286, "learning_rate": 2.8936133976724384e-05, "loss": 4.819519817829132e-05, "step": 250460 }, { "epoch": 71.09565711041726, "grad_norm": 0.020804299041628838, "learning_rate": 2.893329548680102e-05, "loss": 0.0009720394387841225, "step": 250470 }, { "epoch": 71.09849560034061, "grad_norm": 0.24663014709949493, "learning_rate": 2.8930456996877663e-05, "loss": 0.00040580444037914277, "step": 250480 }, { "epoch": 71.10133409026398, "grad_norm": 0.024177875369787216, "learning_rate": 2.89276185069543e-05, "loss": 2.7627497911453246e-05, "step": 250490 }, { "epoch": 71.10417258018734, "grad_norm": 0.05516304075717926, "learning_rate": 2.892478001703094e-05, "loss": 4.4498220086097716e-05, "step": 250500 }, { "epoch": 71.10417258018734, "eval_accuracy": 0.9850575443504801, "eval_loss": 0.06187301501631737, "eval_runtime": 36.4392, "eval_samples_per_second": 431.595, "eval_steps_per_second": 6.751, "step": 250500 }, { "epoch": 71.1070110701107, "grad_norm": 0.004956018179655075, "learning_rate": 2.8921941527107584e-05, "loss": 0.00013107247650623322, "step": 250510 }, { "epoch": 71.10984956003406, "grad_norm": 0.0024792435579001904, "learning_rate": 2.891910303718422e-05, "loss": 0.00010326951742172241, "step": 250520 }, { "epoch": 71.11268804995743, "grad_norm": 0.008143090642988682, "learning_rate": 2.8916264547260856e-05, "loss": 4.551094025373459e-05, "step": 250530 }, { "epoch": 71.11552653988079, "grad_norm": 0.8361321091651917, "learning_rate": 2.89134260573375e-05, "loss": 0.00045908354222774507, "step": 250540 }, { "epoch": 71.11836502980414, "grad_norm": 0.03542593866586685, "learning_rate": 2.891058756741414e-05, "loss": 4.737619310617447e-05, "step": 250550 }, { "epoch": 71.1212035197275, "grad_norm": 0.0024343254044651985, "learning_rate": 2.8907749077490777e-05, "loss": 0.000344119593501091, "step": 250560 }, { "epoch": 71.12404200965086, "grad_norm": 0.009768582880496979, "learning_rate": 2.890491058756741e-05, "loss": 0.0001517055556178093, "step": 250570 }, { "epoch": 71.12688049957423, "grad_norm": 1.5276061296463013, "learning_rate": 2.8902072097644056e-05, "loss": 0.0004273515194654465, "step": 250580 }, { "epoch": 71.12971898949759, "grad_norm": 0.0029802368953824043, "learning_rate": 2.8899233607720694e-05, "loss": 0.00017161481082439423, "step": 250590 }, { "epoch": 71.13255747942095, "grad_norm": 0.1638975292444229, "learning_rate": 2.8896395117797332e-05, "loss": 0.0024620940908789634, "step": 250600 }, { "epoch": 71.13539596934432, "grad_norm": 0.22942471504211426, "learning_rate": 2.8893556627873974e-05, "loss": 0.00023485906422138214, "step": 250610 }, { "epoch": 71.13823445926766, "grad_norm": 0.15664945542812347, "learning_rate": 2.8890718137950612e-05, "loss": 0.00031718909740448, "step": 250620 }, { "epoch": 71.14107294919103, "grad_norm": 0.05417347699403763, "learning_rate": 2.888787964802725e-05, "loss": 0.00824921652674675, "step": 250630 }, { "epoch": 71.14391143911439, "grad_norm": 0.02097911573946476, "learning_rate": 2.888504115810389e-05, "loss": 0.00023959837853908538, "step": 250640 }, { "epoch": 71.14674992903775, "grad_norm": 0.016307948157191277, "learning_rate": 2.888220266818053e-05, "loss": 0.00023645143955945968, "step": 250650 }, { "epoch": 71.14958841896112, "grad_norm": 0.010937315411865711, "learning_rate": 2.8879364178257167e-05, "loss": 0.0007065601646900177, "step": 250660 }, { "epoch": 71.15242690888448, "grad_norm": 0.02363019436597824, "learning_rate": 2.8876525688333805e-05, "loss": 0.00043542757630348204, "step": 250670 }, { "epoch": 71.15526539880783, "grad_norm": 0.09893131256103516, "learning_rate": 2.887368719841045e-05, "loss": 0.0003948384895920753, "step": 250680 }, { "epoch": 71.15810388873119, "grad_norm": 0.0948166698217392, "learning_rate": 2.8870848708487084e-05, "loss": 0.00013828445225954055, "step": 250690 }, { "epoch": 71.16094237865455, "grad_norm": 12.671445846557617, "learning_rate": 2.8868010218563722e-05, "loss": 0.004101072251796722, "step": 250700 }, { "epoch": 71.16378086857792, "grad_norm": 0.012608714401721954, "learning_rate": 2.8865171728640367e-05, "loss": 0.00022367127239704133, "step": 250710 }, { "epoch": 71.16661935850128, "grad_norm": 0.010905729606747627, "learning_rate": 2.8862333238717005e-05, "loss": 0.0016220588237047195, "step": 250720 }, { "epoch": 71.16945784842464, "grad_norm": 0.01328352652490139, "learning_rate": 2.8859494748793643e-05, "loss": 0.00016861073672771453, "step": 250730 }, { "epoch": 71.172296338348, "grad_norm": 0.018819862976670265, "learning_rate": 2.8856656258870285e-05, "loss": 7.181614637374878e-05, "step": 250740 }, { "epoch": 71.17513482827135, "grad_norm": 0.008631385862827301, "learning_rate": 2.8853817768946923e-05, "loss": 0.00021141674369573593, "step": 250750 }, { "epoch": 71.17797331819472, "grad_norm": 0.004832505248486996, "learning_rate": 2.885097927902356e-05, "loss": 0.001412680372595787, "step": 250760 }, { "epoch": 71.18081180811808, "grad_norm": 0.007143383380025625, "learning_rate": 2.88481407891002e-05, "loss": 7.620565593242645e-05, "step": 250770 }, { "epoch": 71.18365029804144, "grad_norm": 0.004764864686876535, "learning_rate": 2.884530229917684e-05, "loss": 8.53411853313446e-05, "step": 250780 }, { "epoch": 71.1864887879648, "grad_norm": 0.0022277210373431444, "learning_rate": 2.8842463809253478e-05, "loss": 0.0004856200888752937, "step": 250790 }, { "epoch": 71.18932727788817, "grad_norm": 0.010429168120026588, "learning_rate": 2.8839625319330116e-05, "loss": 0.002822296507656574, "step": 250800 }, { "epoch": 71.19216576781153, "grad_norm": 0.011783945374190807, "learning_rate": 2.8836786829406757e-05, "loss": 0.0003999633714556694, "step": 250810 }, { "epoch": 71.19500425773488, "grad_norm": 0.4695257246494293, "learning_rate": 2.8833948339483395e-05, "loss": 0.001403205282986164, "step": 250820 }, { "epoch": 71.19784274765824, "grad_norm": 0.1377018392086029, "learning_rate": 2.8831109849560033e-05, "loss": 0.0005734022706747055, "step": 250830 }, { "epoch": 71.2006812375816, "grad_norm": 0.12475556880235672, "learning_rate": 2.8828271359636678e-05, "loss": 8.443221449851989e-05, "step": 250840 }, { "epoch": 71.20351972750497, "grad_norm": 0.05431528016924858, "learning_rate": 2.8825432869713316e-05, "loss": 0.014053702354431152, "step": 250850 }, { "epoch": 71.20635821742833, "grad_norm": 0.0035768866073340178, "learning_rate": 2.882259437978995e-05, "loss": 0.000413505919277668, "step": 250860 }, { "epoch": 71.2091967073517, "grad_norm": 0.0026885231491178274, "learning_rate": 2.881975588986659e-05, "loss": 0.00041285976767539977, "step": 250870 }, { "epoch": 71.21203519727504, "grad_norm": 0.0038081479724496603, "learning_rate": 2.8816917399943233e-05, "loss": 0.000354657880961895, "step": 250880 }, { "epoch": 71.2148736871984, "grad_norm": 0.09241923689842224, "learning_rate": 2.881407891001987e-05, "loss": 0.00011264383792877197, "step": 250890 }, { "epoch": 71.21771217712177, "grad_norm": 0.003073601284995675, "learning_rate": 2.881124042009651e-05, "loss": 7.960367947816849e-05, "step": 250900 }, { "epoch": 71.22055066704513, "grad_norm": 0.01213739812374115, "learning_rate": 2.880840193017315e-05, "loss": 0.0002581508830189705, "step": 250910 }, { "epoch": 71.2233891569685, "grad_norm": 0.0023586249444633722, "learning_rate": 2.880556344024979e-05, "loss": 0.000311276875436306, "step": 250920 }, { "epoch": 71.22622764689186, "grad_norm": 0.003170587122440338, "learning_rate": 2.8802724950326427e-05, "loss": 0.00012789927423000335, "step": 250930 }, { "epoch": 71.22906613681522, "grad_norm": 0.0510261096060276, "learning_rate": 2.8799886460403068e-05, "loss": 8.876807987689972e-05, "step": 250940 }, { "epoch": 71.23190462673857, "grad_norm": 0.005895329639315605, "learning_rate": 2.8797047970479706e-05, "loss": 8.00546258687973e-05, "step": 250950 }, { "epoch": 71.23474311666193, "grad_norm": 0.005686534568667412, "learning_rate": 2.8794209480556344e-05, "loss": 0.00013664048165082932, "step": 250960 }, { "epoch": 71.2375816065853, "grad_norm": 0.022959012538194656, "learning_rate": 2.8791370990632982e-05, "loss": 4.816204309463501e-05, "step": 250970 }, { "epoch": 71.24042009650866, "grad_norm": 0.009422270581126213, "learning_rate": 2.8788532500709627e-05, "loss": 0.0005822645500302315, "step": 250980 }, { "epoch": 71.24325858643202, "grad_norm": 0.0013171164318919182, "learning_rate": 2.878569401078626e-05, "loss": 3.925077617168426e-05, "step": 250990 }, { "epoch": 71.24609707635538, "grad_norm": 0.002780684968456626, "learning_rate": 2.87828555208629e-05, "loss": 4.966072738170624e-05, "step": 251000 }, { "epoch": 71.24609707635538, "eval_accuracy": 0.9858205633623705, "eval_loss": 0.05940520390868187, "eval_runtime": 35.2289, "eval_samples_per_second": 446.423, "eval_steps_per_second": 6.983, "step": 251000 }, { "epoch": 71.24893556627875, "grad_norm": 0.0041898260824382305, "learning_rate": 2.8780017030939544e-05, "loss": 3.363043069839478e-05, "step": 251010 }, { "epoch": 71.2517740562021, "grad_norm": 0.03615047037601471, "learning_rate": 2.8777178541016182e-05, "loss": 5.951113998889923e-05, "step": 251020 }, { "epoch": 71.25461254612546, "grad_norm": 0.25860732793807983, "learning_rate": 2.877434005109282e-05, "loss": 0.0024379029870033263, "step": 251030 }, { "epoch": 71.25745103604882, "grad_norm": 0.009865312837064266, "learning_rate": 2.877150156116946e-05, "loss": 3.57065349817276e-05, "step": 251040 }, { "epoch": 71.26028952597218, "grad_norm": 0.005342736374586821, "learning_rate": 2.87686630712461e-05, "loss": 3.310199826955795e-05, "step": 251050 }, { "epoch": 71.26312801589555, "grad_norm": 0.004425726365298033, "learning_rate": 2.8765824581322737e-05, "loss": 2.8317049145698548e-05, "step": 251060 }, { "epoch": 71.26596650581891, "grad_norm": 0.005530763883143663, "learning_rate": 2.8762986091399375e-05, "loss": 4.001352936029434e-05, "step": 251070 }, { "epoch": 71.26880499574227, "grad_norm": 0.005355593748390675, "learning_rate": 2.8760147601476017e-05, "loss": 5.5928342044353484e-05, "step": 251080 }, { "epoch": 71.27164348566562, "grad_norm": 0.007039492949843407, "learning_rate": 2.8757309111552655e-05, "loss": 7.46624544262886e-05, "step": 251090 }, { "epoch": 71.27448197558898, "grad_norm": 0.020311616361141205, "learning_rate": 2.8754470621629293e-05, "loss": 5.3510256111621854e-05, "step": 251100 }, { "epoch": 71.27732046551235, "grad_norm": 0.0023551653139293194, "learning_rate": 2.8751632131705934e-05, "loss": 4.005562514066696e-05, "step": 251110 }, { "epoch": 71.28015895543571, "grad_norm": 0.04224555566906929, "learning_rate": 2.8748793641782572e-05, "loss": 3.1332485377788544e-05, "step": 251120 }, { "epoch": 71.28299744535907, "grad_norm": 0.023703129962086678, "learning_rate": 2.874595515185921e-05, "loss": 5.6681782007217406e-05, "step": 251130 }, { "epoch": 71.28583593528244, "grad_norm": 0.0007529925205744803, "learning_rate": 2.8743116661935855e-05, "loss": 0.00014334283769130708, "step": 251140 }, { "epoch": 71.28867442520578, "grad_norm": 0.002387979533523321, "learning_rate": 2.8740278172012493e-05, "loss": 5.596354603767395e-05, "step": 251150 }, { "epoch": 71.29151291512915, "grad_norm": 0.005787923000752926, "learning_rate": 2.8737439682089127e-05, "loss": 2.1166726946830748e-05, "step": 251160 }, { "epoch": 71.29435140505251, "grad_norm": 0.30686312913894653, "learning_rate": 2.8734601192165765e-05, "loss": 0.0005075536668300628, "step": 251170 }, { "epoch": 71.29718989497587, "grad_norm": 0.024375641718506813, "learning_rate": 2.873176270224241e-05, "loss": 6.900932639837265e-05, "step": 251180 }, { "epoch": 71.30002838489924, "grad_norm": 0.002143240300938487, "learning_rate": 2.8728924212319048e-05, "loss": 0.00039241015911102296, "step": 251190 }, { "epoch": 71.3028668748226, "grad_norm": 0.0015909227076917887, "learning_rate": 2.8726085722395686e-05, "loss": 2.7860701084136964e-05, "step": 251200 }, { "epoch": 71.30570536474596, "grad_norm": 0.0213791336864233, "learning_rate": 2.8723247232472328e-05, "loss": 0.002119671739637852, "step": 251210 }, { "epoch": 71.30854385466931, "grad_norm": 0.017306627705693245, "learning_rate": 2.8720408742548966e-05, "loss": 0.00034355204552412034, "step": 251220 }, { "epoch": 71.31138234459267, "grad_norm": 0.005757402628660202, "learning_rate": 2.8717570252625604e-05, "loss": 9.245369583368301e-05, "step": 251230 }, { "epoch": 71.31422083451604, "grad_norm": 0.007683765608817339, "learning_rate": 2.8714731762702245e-05, "loss": 0.0001054178923368454, "step": 251240 }, { "epoch": 71.3170593244394, "grad_norm": 0.011607573367655277, "learning_rate": 2.8711893272778883e-05, "loss": 0.00020021628588438035, "step": 251250 }, { "epoch": 71.31989781436276, "grad_norm": 0.0104740671813488, "learning_rate": 2.870905478285552e-05, "loss": 9.991023689508438e-05, "step": 251260 }, { "epoch": 71.32273630428612, "grad_norm": 0.00641315383836627, "learning_rate": 2.870621629293216e-05, "loss": 3.962982445955276e-05, "step": 251270 }, { "epoch": 71.32557479420949, "grad_norm": 0.0008422803948633373, "learning_rate": 2.87033778030088e-05, "loss": 4.0578469634056094e-05, "step": 251280 }, { "epoch": 71.32841328413284, "grad_norm": 0.013826580718159676, "learning_rate": 2.8700539313085438e-05, "loss": 0.00010291766375303268, "step": 251290 }, { "epoch": 71.3312517740562, "grad_norm": 0.003499868791550398, "learning_rate": 2.8697700823162076e-05, "loss": 6.180498749017716e-05, "step": 251300 }, { "epoch": 71.33409026397956, "grad_norm": 0.00990188680589199, "learning_rate": 2.869486233323872e-05, "loss": 5.3171999752521515e-05, "step": 251310 }, { "epoch": 71.33692875390292, "grad_norm": 0.01893266662955284, "learning_rate": 2.869202384331536e-05, "loss": 3.517866134643555e-05, "step": 251320 }, { "epoch": 71.33976724382629, "grad_norm": 0.009606171399354935, "learning_rate": 2.8689185353391994e-05, "loss": 4.8123486340045926e-05, "step": 251330 }, { "epoch": 71.34260573374965, "grad_norm": 0.0013098191702738404, "learning_rate": 2.8686346863468638e-05, "loss": 0.00015505049377679826, "step": 251340 }, { "epoch": 71.34544422367301, "grad_norm": 0.005140725523233414, "learning_rate": 2.8683508373545276e-05, "loss": 0.00015239380300045014, "step": 251350 }, { "epoch": 71.34828271359636, "grad_norm": 0.0021195625886321068, "learning_rate": 2.8680669883621914e-05, "loss": 9.439941495656968e-05, "step": 251360 }, { "epoch": 71.35112120351972, "grad_norm": 0.008967256173491478, "learning_rate": 2.8677831393698552e-05, "loss": 5.788225680589676e-05, "step": 251370 }, { "epoch": 71.35395969344309, "grad_norm": 0.005200359039008617, "learning_rate": 2.8674992903775194e-05, "loss": 7.354244589805604e-05, "step": 251380 }, { "epoch": 71.35679818336645, "grad_norm": 0.020679740235209465, "learning_rate": 2.867215441385183e-05, "loss": 3.9676018059253695e-05, "step": 251390 }, { "epoch": 71.35963667328981, "grad_norm": 0.011552002280950546, "learning_rate": 2.866931592392847e-05, "loss": 4.1701458394527434e-05, "step": 251400 }, { "epoch": 71.36247516321318, "grad_norm": 0.0033072500955313444, "learning_rate": 2.866647743400511e-05, "loss": 5.5182166397571565e-05, "step": 251410 }, { "epoch": 71.36531365313652, "grad_norm": 0.007097106426954269, "learning_rate": 2.866363894408175e-05, "loss": 0.00019978266209363937, "step": 251420 }, { "epoch": 71.36815214305989, "grad_norm": 0.002186056924983859, "learning_rate": 2.8660800454158387e-05, "loss": 8.967462927103042e-05, "step": 251430 }, { "epoch": 71.37099063298325, "grad_norm": 0.009963003918528557, "learning_rate": 2.8657961964235032e-05, "loss": 9.336266666650772e-05, "step": 251440 }, { "epoch": 71.37382912290661, "grad_norm": 0.005855197552591562, "learning_rate": 2.865512347431167e-05, "loss": 8.581690490245819e-05, "step": 251450 }, { "epoch": 71.37666761282998, "grad_norm": 0.10785222798585892, "learning_rate": 2.8652284984388304e-05, "loss": 0.00020293761044740677, "step": 251460 }, { "epoch": 71.37950610275334, "grad_norm": 0.029580913484096527, "learning_rate": 2.864944649446495e-05, "loss": 7.553491741418839e-05, "step": 251470 }, { "epoch": 71.3823445926767, "grad_norm": 0.05825117975473404, "learning_rate": 2.8646608004541587e-05, "loss": 9.185392409563064e-05, "step": 251480 }, { "epoch": 71.38518308260005, "grad_norm": 0.003113938495516777, "learning_rate": 2.8643769514618225e-05, "loss": 6.010178476572037e-05, "step": 251490 }, { "epoch": 71.38802157252341, "grad_norm": 0.018052246421575546, "learning_rate": 2.8640931024694863e-05, "loss": 0.0004470190033316612, "step": 251500 }, { "epoch": 71.38802157252341, "eval_accuracy": 0.9858205633623705, "eval_loss": 0.06038263440132141, "eval_runtime": 35.6989, "eval_samples_per_second": 440.546, "eval_steps_per_second": 6.891, "step": 251500 }, { "epoch": 71.39086006244678, "grad_norm": 0.22936925292015076, "learning_rate": 2.8638092534771504e-05, "loss": 0.0001336023211479187, "step": 251510 }, { "epoch": 71.39369855237014, "grad_norm": 0.021393897011876106, "learning_rate": 2.8635254044848142e-05, "loss": 0.0007040077820420265, "step": 251520 }, { "epoch": 71.3965370422935, "grad_norm": 0.006471514236181974, "learning_rate": 2.863241555492478e-05, "loss": 2.1549686789512634e-05, "step": 251530 }, { "epoch": 71.39937553221687, "grad_norm": 0.009332943707704544, "learning_rate": 2.8629577065001422e-05, "loss": 2.1748803555965422e-05, "step": 251540 }, { "epoch": 71.40221402214023, "grad_norm": 0.0041479370556771755, "learning_rate": 2.862673857507806e-05, "loss": 4.061684012413025e-05, "step": 251550 }, { "epoch": 71.40505251206358, "grad_norm": 0.009481648914515972, "learning_rate": 2.8623900085154698e-05, "loss": 2.0924769341945648e-05, "step": 251560 }, { "epoch": 71.40789100198694, "grad_norm": 0.01695333607494831, "learning_rate": 2.8621061595231342e-05, "loss": 4.449300467967987e-05, "step": 251570 }, { "epoch": 71.4107294919103, "grad_norm": 0.004644263535737991, "learning_rate": 2.8618223105307977e-05, "loss": 3.355983644723892e-05, "step": 251580 }, { "epoch": 71.41356798183367, "grad_norm": 0.0013058483600616455, "learning_rate": 2.8615384615384615e-05, "loss": 8.265841752290726e-05, "step": 251590 }, { "epoch": 71.41640647175703, "grad_norm": 0.005430298391729593, "learning_rate": 2.8612546125461253e-05, "loss": 3.493409603834152e-05, "step": 251600 }, { "epoch": 71.41924496168039, "grad_norm": 0.013065618462860584, "learning_rate": 2.8609707635537898e-05, "loss": 2.224072813987732e-05, "step": 251610 }, { "epoch": 71.42208345160374, "grad_norm": 2.102433204650879, "learning_rate": 2.8606869145614536e-05, "loss": 0.00029016193002462387, "step": 251620 }, { "epoch": 71.4249219415271, "grad_norm": 0.004249281715601683, "learning_rate": 2.860403065569117e-05, "loss": 4.1386298835277555e-05, "step": 251630 }, { "epoch": 71.42776043145047, "grad_norm": 0.11664419621229172, "learning_rate": 2.8601192165767815e-05, "loss": 9.205099195241928e-05, "step": 251640 }, { "epoch": 71.43059892137383, "grad_norm": 0.0011262353509664536, "learning_rate": 2.8598353675844453e-05, "loss": 3.333818167448044e-05, "step": 251650 }, { "epoch": 71.43343741129719, "grad_norm": 0.0019629683811217546, "learning_rate": 2.859551518592109e-05, "loss": 9.895432740449905e-05, "step": 251660 }, { "epoch": 71.43627590122055, "grad_norm": 0.0033394666388630867, "learning_rate": 2.8592676695997733e-05, "loss": 7.066484540700913e-05, "step": 251670 }, { "epoch": 71.43911439114392, "grad_norm": 0.006793641019612551, "learning_rate": 2.858983820607437e-05, "loss": 3.577042371034622e-05, "step": 251680 }, { "epoch": 71.44195288106727, "grad_norm": 0.002282860456034541, "learning_rate": 2.858699971615101e-05, "loss": 7.79327005147934e-05, "step": 251690 }, { "epoch": 71.44479137099063, "grad_norm": 0.0071822162717580795, "learning_rate": 2.8584161226227646e-05, "loss": 0.0011529093608260155, "step": 251700 }, { "epoch": 71.44762986091399, "grad_norm": 16.941314697265625, "learning_rate": 2.8581322736304288e-05, "loss": 0.004601505398750305, "step": 251710 }, { "epoch": 71.45046835083735, "grad_norm": 0.13338395953178406, "learning_rate": 2.8578484246380926e-05, "loss": 9.078308939933777e-05, "step": 251720 }, { "epoch": 71.45330684076072, "grad_norm": 0.004762478172779083, "learning_rate": 2.8575645756457564e-05, "loss": 1.714024692773819e-05, "step": 251730 }, { "epoch": 71.45614533068408, "grad_norm": 0.013165222480893135, "learning_rate": 2.857280726653421e-05, "loss": 4.11350280046463e-05, "step": 251740 }, { "epoch": 71.45898382060744, "grad_norm": 0.00270446646027267, "learning_rate": 2.8569968776610843e-05, "loss": 4.814900457859039e-05, "step": 251750 }, { "epoch": 71.46182231053079, "grad_norm": 0.0014444446424022317, "learning_rate": 2.856713028668748e-05, "loss": 3.593694418668747e-05, "step": 251760 }, { "epoch": 71.46466080045415, "grad_norm": 0.06727464497089386, "learning_rate": 2.8564291796764126e-05, "loss": 5.409158766269684e-05, "step": 251770 }, { "epoch": 71.46749929037752, "grad_norm": 0.08423925191164017, "learning_rate": 2.8561453306840764e-05, "loss": 2.569258213043213e-05, "step": 251780 }, { "epoch": 71.47033778030088, "grad_norm": 0.005169228184968233, "learning_rate": 2.8558614816917402e-05, "loss": 4.2146071791648865e-05, "step": 251790 }, { "epoch": 71.47317627022424, "grad_norm": 0.009952844120562077, "learning_rate": 2.8555776326994037e-05, "loss": 3.017168492078781e-05, "step": 251800 }, { "epoch": 71.4760147601476, "grad_norm": 0.027407847344875336, "learning_rate": 2.855293783707068e-05, "loss": 9.712167084217072e-05, "step": 251810 }, { "epoch": 71.47885325007097, "grad_norm": 0.0133901946246624, "learning_rate": 2.855009934714732e-05, "loss": 3.384556621313095e-05, "step": 251820 }, { "epoch": 71.48169173999432, "grad_norm": 0.019706174731254578, "learning_rate": 2.8547260857223957e-05, "loss": 2.542361617088318e-05, "step": 251830 }, { "epoch": 71.48453022991768, "grad_norm": 0.002524902345612645, "learning_rate": 2.85444223673006e-05, "loss": 5.978420376777649e-05, "step": 251840 }, { "epoch": 71.48736871984104, "grad_norm": 0.009180456399917603, "learning_rate": 2.8541583877377237e-05, "loss": 3.31951305270195e-05, "step": 251850 }, { "epoch": 71.4902072097644, "grad_norm": 0.004285106901079416, "learning_rate": 2.8538745387453875e-05, "loss": 5.322694778442383e-05, "step": 251860 }, { "epoch": 71.49304569968777, "grad_norm": 0.009615900926291943, "learning_rate": 2.853590689753052e-05, "loss": 0.0011089008301496505, "step": 251870 }, { "epoch": 71.49588418961113, "grad_norm": 0.0016851943219080567, "learning_rate": 2.8533068407607154e-05, "loss": 0.00014257635921239854, "step": 251880 }, { "epoch": 71.49872267953448, "grad_norm": 0.023659635335206985, "learning_rate": 2.8530229917683792e-05, "loss": 0.0003182167187333107, "step": 251890 }, { "epoch": 71.50156116945784, "grad_norm": 0.30459973216056824, "learning_rate": 2.852739142776043e-05, "loss": 0.0001684160903096199, "step": 251900 }, { "epoch": 71.5043996593812, "grad_norm": 0.002489769598469138, "learning_rate": 2.8524552937837075e-05, "loss": 4.892610013484955e-05, "step": 251910 }, { "epoch": 71.50723814930457, "grad_norm": 0.020092114806175232, "learning_rate": 2.8521714447913713e-05, "loss": 0.00022199507802724838, "step": 251920 }, { "epoch": 71.51007663922793, "grad_norm": 0.011337739415466785, "learning_rate": 2.8518875957990347e-05, "loss": 0.005472828447818756, "step": 251930 }, { "epoch": 71.5129151291513, "grad_norm": 0.007945068180561066, "learning_rate": 2.8516037468066992e-05, "loss": 0.0008275443688035011, "step": 251940 }, { "epoch": 71.51575361907466, "grad_norm": 0.1021331176161766, "learning_rate": 2.851319897814363e-05, "loss": 0.00023516155779361724, "step": 251950 }, { "epoch": 71.518592108998, "grad_norm": 0.004212376661598682, "learning_rate": 2.8510360488220268e-05, "loss": 0.00012573041021823883, "step": 251960 }, { "epoch": 71.52143059892137, "grad_norm": 0.005138658452779055, "learning_rate": 2.850752199829691e-05, "loss": 0.0031740739941596985, "step": 251970 }, { "epoch": 71.52426908884473, "grad_norm": 0.018382739275693893, "learning_rate": 2.8504683508373547e-05, "loss": 5.687512457370758e-05, "step": 251980 }, { "epoch": 71.5271075787681, "grad_norm": 0.04015055671334267, "learning_rate": 2.8501845018450185e-05, "loss": 6.701014935970307e-05, "step": 251990 }, { "epoch": 71.52994606869146, "grad_norm": 0.032644763588905334, "learning_rate": 2.8499006528526823e-05, "loss": 0.00018178131431341172, "step": 252000 }, { "epoch": 71.52994606869146, "eval_accuracy": 0.9857569784447129, "eval_loss": 0.057648397982120514, "eval_runtime": 35.6331, "eval_samples_per_second": 441.359, "eval_steps_per_second": 6.904, "step": 252000 }, { "epoch": 71.53278455861482, "grad_norm": 0.014443904161453247, "learning_rate": 2.8496168038603465e-05, "loss": 4.749447107315064e-05, "step": 252010 }, { "epoch": 71.53562304853818, "grad_norm": 0.0029431581497192383, "learning_rate": 2.8493329548680103e-05, "loss": 3.08675691485405e-05, "step": 252020 }, { "epoch": 71.53846153846153, "grad_norm": 0.0025479181203991175, "learning_rate": 2.849049105875674e-05, "loss": 3.677923232316971e-05, "step": 252030 }, { "epoch": 71.5413000283849, "grad_norm": 0.0006233072490431368, "learning_rate": 2.8487652568833385e-05, "loss": 2.9708817601203917e-05, "step": 252040 }, { "epoch": 71.54413851830826, "grad_norm": 0.0018402665155008435, "learning_rate": 2.848481407891002e-05, "loss": 4.406701773405075e-05, "step": 252050 }, { "epoch": 71.54697700823162, "grad_norm": 0.07094386219978333, "learning_rate": 2.8481975588986658e-05, "loss": 0.00020607933402061462, "step": 252060 }, { "epoch": 71.54981549815498, "grad_norm": 0.0164048969745636, "learning_rate": 2.8479137099063303e-05, "loss": 0.0001699170097708702, "step": 252070 }, { "epoch": 71.55265398807835, "grad_norm": 0.024405673146247864, "learning_rate": 2.847629860913994e-05, "loss": 0.0001419438049197197, "step": 252080 }, { "epoch": 71.5554924780017, "grad_norm": 0.011368690989911556, "learning_rate": 2.847346011921658e-05, "loss": 0.0002493094652891159, "step": 252090 }, { "epoch": 71.55833096792506, "grad_norm": 0.007451561279594898, "learning_rate": 2.8470621629293213e-05, "loss": 0.0001455981284379959, "step": 252100 }, { "epoch": 71.56116945784842, "grad_norm": 0.06437266618013382, "learning_rate": 2.8467783139369858e-05, "loss": 0.0001776626333594322, "step": 252110 }, { "epoch": 71.56400794777178, "grad_norm": 0.03220352157950401, "learning_rate": 2.8464944649446496e-05, "loss": 0.0053870327770709995, "step": 252120 }, { "epoch": 71.56684643769515, "grad_norm": 0.4297727644443512, "learning_rate": 2.8462106159523134e-05, "loss": 0.002638646401464939, "step": 252130 }, { "epoch": 71.56968492761851, "grad_norm": 0.004363361746072769, "learning_rate": 2.8459267669599775e-05, "loss": 6.532128900289535e-05, "step": 252140 }, { "epoch": 71.57252341754187, "grad_norm": 0.03645380958914757, "learning_rate": 2.8456429179676413e-05, "loss": 9.74796712398529e-05, "step": 252150 }, { "epoch": 71.57536190746522, "grad_norm": 11.347609519958496, "learning_rate": 2.845359068975305e-05, "loss": 0.0018705675378441811, "step": 252160 }, { "epoch": 71.57820039738858, "grad_norm": 0.0024660213384777308, "learning_rate": 2.8450752199829693e-05, "loss": 0.0019566383212804793, "step": 252170 }, { "epoch": 71.58103888731195, "grad_norm": 0.0062794978730380535, "learning_rate": 2.844791370990633e-05, "loss": 2.7459673583507536e-05, "step": 252180 }, { "epoch": 71.58387737723531, "grad_norm": 0.0008262211340479553, "learning_rate": 2.844507521998297e-05, "loss": 0.00011037904769182206, "step": 252190 }, { "epoch": 71.58671586715867, "grad_norm": 0.010668627917766571, "learning_rate": 2.8442236730059607e-05, "loss": 4.092734307050705e-05, "step": 252200 }, { "epoch": 71.58955435708204, "grad_norm": 0.007604963146150112, "learning_rate": 2.843939824013625e-05, "loss": 5.900692194700241e-05, "step": 252210 }, { "epoch": 71.5923928470054, "grad_norm": 0.06549397855997086, "learning_rate": 2.8436559750212886e-05, "loss": 0.00013822242617607118, "step": 252220 }, { "epoch": 71.59523133692875, "grad_norm": 0.004277936648577452, "learning_rate": 2.8433721260289524e-05, "loss": 0.00012658722698688507, "step": 252230 }, { "epoch": 71.59806982685211, "grad_norm": 0.012047835625708103, "learning_rate": 2.843088277036617e-05, "loss": 5.2551738917827606e-05, "step": 252240 }, { "epoch": 71.60090831677547, "grad_norm": 0.018787438049912453, "learning_rate": 2.8428044280442807e-05, "loss": 7.381793111562728e-05, "step": 252250 }, { "epoch": 71.60374680669884, "grad_norm": 0.016403084620833397, "learning_rate": 2.8425205790519445e-05, "loss": 0.00010624360293149948, "step": 252260 }, { "epoch": 71.6065852966222, "grad_norm": 0.051318153738975525, "learning_rate": 2.8422367300596086e-05, "loss": 9.583737701177598e-05, "step": 252270 }, { "epoch": 71.60942378654556, "grad_norm": 0.013701790943741798, "learning_rate": 2.8419528810672724e-05, "loss": 1.807287335395813e-05, "step": 252280 }, { "epoch": 71.61226227646893, "grad_norm": 0.007588766515254974, "learning_rate": 2.8416690320749362e-05, "loss": 0.001166527159512043, "step": 252290 }, { "epoch": 71.61510076639227, "grad_norm": 0.0043242620304226875, "learning_rate": 2.8413851830826e-05, "loss": 3.996100276708603e-05, "step": 252300 }, { "epoch": 71.61793925631564, "grad_norm": 0.0014342163922265172, "learning_rate": 2.841101334090264e-05, "loss": 0.001651453785598278, "step": 252310 }, { "epoch": 71.620777746239, "grad_norm": 0.014094546437263489, "learning_rate": 2.840817485097928e-05, "loss": 0.00012003760784864425, "step": 252320 }, { "epoch": 71.62361623616236, "grad_norm": 0.08431573212146759, "learning_rate": 2.8405336361055918e-05, "loss": 6.956588476896286e-05, "step": 252330 }, { "epoch": 71.62645472608573, "grad_norm": 0.002859191270545125, "learning_rate": 2.8402497871132562e-05, "loss": 0.0007697848603129386, "step": 252340 }, { "epoch": 71.62929321600909, "grad_norm": 0.02393963187932968, "learning_rate": 2.8399659381209197e-05, "loss": 0.0001287795603275299, "step": 252350 }, { "epoch": 71.63213170593244, "grad_norm": 0.005817389115691185, "learning_rate": 2.8396820891285835e-05, "loss": 6.705410778522492e-05, "step": 252360 }, { "epoch": 71.6349701958558, "grad_norm": 0.0051080090925097466, "learning_rate": 2.839398240136248e-05, "loss": 8.611064404249192e-05, "step": 252370 }, { "epoch": 71.63780868577916, "grad_norm": 1.9556993246078491, "learning_rate": 2.8391143911439118e-05, "loss": 0.00031028371304273603, "step": 252380 }, { "epoch": 71.64064717570253, "grad_norm": 0.7369804978370667, "learning_rate": 2.8388305421515756e-05, "loss": 0.000235220231115818, "step": 252390 }, { "epoch": 71.64348566562589, "grad_norm": 0.002878261497244239, "learning_rate": 2.838546693159239e-05, "loss": 0.00024049542844295502, "step": 252400 }, { "epoch": 71.64632415554925, "grad_norm": 0.0014340408379212022, "learning_rate": 2.8382628441669035e-05, "loss": 0.00021839737892150878, "step": 252410 }, { "epoch": 71.64916264547261, "grad_norm": 0.0014039803063496947, "learning_rate": 2.8379789951745673e-05, "loss": 0.00017374586313962937, "step": 252420 }, { "epoch": 71.65200113539596, "grad_norm": 0.013517833314836025, "learning_rate": 2.837695146182231e-05, "loss": 0.0022539433091878893, "step": 252430 }, { "epoch": 71.65483962531933, "grad_norm": 0.0029194869566708803, "learning_rate": 2.8374112971898952e-05, "loss": 0.00041085816919803617, "step": 252440 }, { "epoch": 71.65767811524269, "grad_norm": 0.00631979713216424, "learning_rate": 2.837127448197559e-05, "loss": 2.0788796246051788e-05, "step": 252450 }, { "epoch": 71.66051660516605, "grad_norm": 0.008245821110904217, "learning_rate": 2.836843599205223e-05, "loss": 0.001100059598684311, "step": 252460 }, { "epoch": 71.66335509508941, "grad_norm": 0.02469627372920513, "learning_rate": 2.836559750212887e-05, "loss": 0.0011891059577465058, "step": 252470 }, { "epoch": 71.66619358501278, "grad_norm": 0.008589358069002628, "learning_rate": 2.8362759012205508e-05, "loss": 0.00010189283639192582, "step": 252480 }, { "epoch": 71.66903207493614, "grad_norm": 0.005400794092565775, "learning_rate": 2.8359920522282146e-05, "loss": 0.00036266706883907317, "step": 252490 }, { "epoch": 71.67187056485949, "grad_norm": 0.03383065387606621, "learning_rate": 2.8357082032358784e-05, "loss": 8.564107120037078e-05, "step": 252500 }, { "epoch": 71.67187056485949, "eval_accuracy": 0.9809245247027405, "eval_loss": 0.08145944774150848, "eval_runtime": 35.5079, "eval_samples_per_second": 442.916, "eval_steps_per_second": 6.928, "step": 252500 }, { "epoch": 71.67470905478285, "grad_norm": 0.010048430413007736, "learning_rate": 2.835424354243543e-05, "loss": 0.00010199695825576782, "step": 252510 }, { "epoch": 71.67754754470621, "grad_norm": 0.018030792474746704, "learning_rate": 2.8351405052512063e-05, "loss": 0.0002489158883690834, "step": 252520 }, { "epoch": 71.68038603462958, "grad_norm": 2.529346227645874, "learning_rate": 2.83485665625887e-05, "loss": 0.001349775493144989, "step": 252530 }, { "epoch": 71.68322452455294, "grad_norm": 0.002031040843576193, "learning_rate": 2.8345728072665346e-05, "loss": 0.00016116388142108917, "step": 252540 }, { "epoch": 71.6860630144763, "grad_norm": 0.035457294434309006, "learning_rate": 2.8342889582741984e-05, "loss": 0.00012468118220567703, "step": 252550 }, { "epoch": 71.68890150439967, "grad_norm": 0.0033313508611172438, "learning_rate": 2.8340051092818622e-05, "loss": 6.55587762594223e-05, "step": 252560 }, { "epoch": 71.69173999432302, "grad_norm": 0.12607288360595703, "learning_rate": 2.8337212602895263e-05, "loss": 5.6061707437038424e-05, "step": 252570 }, { "epoch": 71.69457848424638, "grad_norm": 0.020799268037080765, "learning_rate": 2.83343741129719e-05, "loss": 3.9499253034591675e-05, "step": 252580 }, { "epoch": 71.69741697416974, "grad_norm": 0.007758415769785643, "learning_rate": 2.833153562304854e-05, "loss": 7.912330329418182e-05, "step": 252590 }, { "epoch": 71.7002554640931, "grad_norm": 0.16789023578166962, "learning_rate": 2.8328697133125177e-05, "loss": 5.216635763645172e-05, "step": 252600 }, { "epoch": 71.70309395401647, "grad_norm": 0.04795987531542778, "learning_rate": 2.832585864320182e-05, "loss": 7.019564509391785e-05, "step": 252610 }, { "epoch": 71.70593244393983, "grad_norm": 0.09016842395067215, "learning_rate": 2.8323020153278456e-05, "loss": 9.824801236391068e-05, "step": 252620 }, { "epoch": 71.70877093386318, "grad_norm": 0.00960109755396843, "learning_rate": 2.8320181663355094e-05, "loss": 3.3838488161563876e-05, "step": 252630 }, { "epoch": 71.71160942378654, "grad_norm": 0.00851142406463623, "learning_rate": 2.8317343173431736e-05, "loss": 0.0001026911661028862, "step": 252640 }, { "epoch": 71.7144479137099, "grad_norm": 0.009194541722536087, "learning_rate": 2.8314504683508374e-05, "loss": 3.0294433236122133e-05, "step": 252650 }, { "epoch": 71.71728640363327, "grad_norm": 0.006023691035807133, "learning_rate": 2.8311666193585012e-05, "loss": 2.7641281485557555e-05, "step": 252660 }, { "epoch": 71.72012489355663, "grad_norm": 0.0007597928633913398, "learning_rate": 2.8308827703661657e-05, "loss": 3.679804503917694e-05, "step": 252670 }, { "epoch": 71.72296338347999, "grad_norm": 0.025428175926208496, "learning_rate": 2.8305989213738295e-05, "loss": 6.309431046247482e-05, "step": 252680 }, { "epoch": 71.72580187340336, "grad_norm": 0.13816021382808685, "learning_rate": 2.830315072381493e-05, "loss": 7.443819195032119e-05, "step": 252690 }, { "epoch": 71.7286403633267, "grad_norm": 0.017438190057873726, "learning_rate": 2.8300312233891567e-05, "loss": 7.819905877113343e-05, "step": 252700 }, { "epoch": 71.73147885325007, "grad_norm": 0.0005359947681427002, "learning_rate": 2.8297473743968212e-05, "loss": 0.00016265474259853364, "step": 252710 }, { "epoch": 71.73431734317343, "grad_norm": 0.0058623612858355045, "learning_rate": 2.829463525404485e-05, "loss": 5.606077611446381e-05, "step": 252720 }, { "epoch": 71.73715583309679, "grad_norm": 0.009218614548444748, "learning_rate": 2.8291796764121488e-05, "loss": 0.00012464616447687148, "step": 252730 }, { "epoch": 71.73999432302016, "grad_norm": 0.03223513066768646, "learning_rate": 2.828895827419813e-05, "loss": 8.079241961240768e-05, "step": 252740 }, { "epoch": 71.74283281294352, "grad_norm": 0.004069442395120859, "learning_rate": 2.8286119784274767e-05, "loss": 4.3715164065361026e-05, "step": 252750 }, { "epoch": 71.74567130286688, "grad_norm": 0.005570585839450359, "learning_rate": 2.8283281294351405e-05, "loss": 0.00028276946395635607, "step": 252760 }, { "epoch": 71.74850979279023, "grad_norm": 16.548206329345703, "learning_rate": 2.8280442804428047e-05, "loss": 0.004644510895013809, "step": 252770 }, { "epoch": 71.7513482827136, "grad_norm": 0.005198600701987743, "learning_rate": 2.8277604314504685e-05, "loss": 3.2665207982063293e-05, "step": 252780 }, { "epoch": 71.75418677263696, "grad_norm": 0.026202047243714333, "learning_rate": 2.8274765824581323e-05, "loss": 0.0002913402393460274, "step": 252790 }, { "epoch": 71.75702526256032, "grad_norm": 0.02971411496400833, "learning_rate": 2.8271927334657967e-05, "loss": 0.0004115892574191093, "step": 252800 }, { "epoch": 71.75986375248368, "grad_norm": 0.005240066442638636, "learning_rate": 2.8269088844734605e-05, "loss": 0.00014806855469942092, "step": 252810 }, { "epoch": 71.76270224240704, "grad_norm": 0.001752797863446176, "learning_rate": 2.826625035481124e-05, "loss": 0.0008430983871221542, "step": 252820 }, { "epoch": 71.7655407323304, "grad_norm": 0.4862818121910095, "learning_rate": 2.8263411864887878e-05, "loss": 0.0008218584582209588, "step": 252830 }, { "epoch": 71.76837922225376, "grad_norm": 0.006279604509472847, "learning_rate": 2.8260573374964523e-05, "loss": 0.0010737141594290734, "step": 252840 }, { "epoch": 71.77121771217712, "grad_norm": 0.007706840056926012, "learning_rate": 2.825773488504116e-05, "loss": 0.0024972328916192054, "step": 252850 }, { "epoch": 71.77405620210048, "grad_norm": 0.16267871856689453, "learning_rate": 2.8255180244110134e-05, "loss": 0.013717962801456452, "step": 252860 }, { "epoch": 71.77689469202384, "grad_norm": 7.416736125946045, "learning_rate": 2.8252341754186772e-05, "loss": 0.001338944397866726, "step": 252870 }, { "epoch": 71.77973318194721, "grad_norm": 0.02932014688849449, "learning_rate": 2.824950326426341e-05, "loss": 0.00012806784361600875, "step": 252880 }, { "epoch": 71.78257167187057, "grad_norm": 0.12898825109004974, "learning_rate": 2.8246664774340055e-05, "loss": 0.002250181324779987, "step": 252890 }, { "epoch": 71.78541016179392, "grad_norm": 6.451074600219727, "learning_rate": 2.8243826284416693e-05, "loss": 0.00555497407913208, "step": 252900 }, { "epoch": 71.78824865171728, "grad_norm": 0.006916411221027374, "learning_rate": 2.8240987794493327e-05, "loss": 0.00020469781011343003, "step": 252910 }, { "epoch": 71.79108714164065, "grad_norm": 0.06639926135540009, "learning_rate": 2.8238149304569972e-05, "loss": 0.00012033171951770782, "step": 252920 }, { "epoch": 71.79392563156401, "grad_norm": 0.000430158746894449, "learning_rate": 2.823531081464661e-05, "loss": 0.0011665485799312592, "step": 252930 }, { "epoch": 71.79676412148737, "grad_norm": 0.0012772160116583109, "learning_rate": 2.8232472324723248e-05, "loss": 0.0029058247804641725, "step": 252940 }, { "epoch": 71.79960261141073, "grad_norm": 0.06377095729112625, "learning_rate": 2.822963383479989e-05, "loss": 0.00022611841559410094, "step": 252950 }, { "epoch": 71.8024411013341, "grad_norm": 0.0036912027280777693, "learning_rate": 2.8226795344876527e-05, "loss": 8.344613015651703e-05, "step": 252960 }, { "epoch": 71.80527959125745, "grad_norm": 0.07778379321098328, "learning_rate": 2.8223956854953165e-05, "loss": 7.047802209854126e-05, "step": 252970 }, { "epoch": 71.80811808118081, "grad_norm": 0.03685938939452171, "learning_rate": 2.8221118365029803e-05, "loss": 9.405910968780517e-05, "step": 252980 }, { "epoch": 71.81095657110417, "grad_norm": 0.24334082007408142, "learning_rate": 2.8218279875106445e-05, "loss": 0.00019250139594078063, "step": 252990 }, { "epoch": 71.81379506102753, "grad_norm": 0.024271691218018532, "learning_rate": 2.8215441385183083e-05, "loss": 4.121400415897369e-05, "step": 253000 }, { "epoch": 71.81379506102753, "eval_accuracy": 0.9848667895975075, "eval_loss": 0.06049672141671181, "eval_runtime": 35.9156, "eval_samples_per_second": 437.888, "eval_steps_per_second": 6.849, "step": 253000 }, { "epoch": 71.8166335509509, "grad_norm": 0.010047392919659615, "learning_rate": 2.821260289525972e-05, "loss": 4.1054002940654756e-05, "step": 253010 }, { "epoch": 71.81947204087426, "grad_norm": 0.0247400663793087, "learning_rate": 2.8209764405336365e-05, "loss": 4.812031984329224e-05, "step": 253020 }, { "epoch": 71.82231053079762, "grad_norm": 0.03757251799106598, "learning_rate": 2.8206925915413003e-05, "loss": 4.609730094671249e-05, "step": 253030 }, { "epoch": 71.82514902072097, "grad_norm": 0.0149589404463768, "learning_rate": 2.8204087425489638e-05, "loss": 6.817914545536042e-05, "step": 253040 }, { "epoch": 71.82798751064433, "grad_norm": 0.019746892154216766, "learning_rate": 2.8201248935566283e-05, "loss": 3.1392090022563934e-05, "step": 253050 }, { "epoch": 71.8308260005677, "grad_norm": 0.009457926265895367, "learning_rate": 2.819841044564292e-05, "loss": 6.0531497001647947e-05, "step": 253060 }, { "epoch": 71.83366449049106, "grad_norm": 0.04820580035448074, "learning_rate": 2.819557195571956e-05, "loss": 9.123813360929489e-05, "step": 253070 }, { "epoch": 71.83650298041442, "grad_norm": 0.00681962538510561, "learning_rate": 2.8192733465796197e-05, "loss": 5.192812532186508e-05, "step": 253080 }, { "epoch": 71.83934147033779, "grad_norm": 0.5444871187210083, "learning_rate": 2.8189894975872838e-05, "loss": 0.00010060984641313553, "step": 253090 }, { "epoch": 71.84217996026113, "grad_norm": 0.0022204143460839987, "learning_rate": 2.8187056485949476e-05, "loss": 2.3705512285232543e-05, "step": 253100 }, { "epoch": 71.8450184501845, "grad_norm": 0.1479586660861969, "learning_rate": 2.8184217996026114e-05, "loss": 0.0007574370130896569, "step": 253110 }, { "epoch": 71.84785694010786, "grad_norm": 0.013140572234988213, "learning_rate": 2.8181379506102755e-05, "loss": 0.0061051521450281145, "step": 253120 }, { "epoch": 71.85069543003122, "grad_norm": 0.08220358192920685, "learning_rate": 2.8178541016179393e-05, "loss": 0.0002887677401304245, "step": 253130 }, { "epoch": 71.85353391995459, "grad_norm": 0.007069185841828585, "learning_rate": 2.817570252625603e-05, "loss": 0.00016652420163154602, "step": 253140 }, { "epoch": 71.85637240987795, "grad_norm": 0.008227482438087463, "learning_rate": 2.8172864036332676e-05, "loss": 0.0005457540974020958, "step": 253150 }, { "epoch": 71.85921089980131, "grad_norm": 0.08925153315067291, "learning_rate": 2.817002554640931e-05, "loss": 0.00013701952993869782, "step": 253160 }, { "epoch": 71.86204938972466, "grad_norm": 0.0034761340357363224, "learning_rate": 2.816718705648595e-05, "loss": 0.0002222435548901558, "step": 253170 }, { "epoch": 71.86488787964802, "grad_norm": 0.0019416777649894357, "learning_rate": 2.8164348566562593e-05, "loss": 0.0001359177753329277, "step": 253180 }, { "epoch": 71.86772636957139, "grad_norm": 0.027363451197743416, "learning_rate": 2.816151007663923e-05, "loss": 0.000409938208758831, "step": 253190 }, { "epoch": 71.87056485949475, "grad_norm": 0.016008242964744568, "learning_rate": 2.815867158671587e-05, "loss": 8.654873818159103e-05, "step": 253200 }, { "epoch": 71.87340334941811, "grad_norm": 0.0020679321605712175, "learning_rate": 2.8155833096792504e-05, "loss": 0.0017194882035255431, "step": 253210 }, { "epoch": 71.87624183934147, "grad_norm": 0.006369549781084061, "learning_rate": 2.815299460686915e-05, "loss": 0.00011996570974588394, "step": 253220 }, { "epoch": 71.87908032926484, "grad_norm": 0.005975413601845503, "learning_rate": 2.8150156116945787e-05, "loss": 0.000275450199842453, "step": 253230 }, { "epoch": 71.88191881918819, "grad_norm": 0.043733932077884674, "learning_rate": 2.8147317627022425e-05, "loss": 5.4253451526165006e-05, "step": 253240 }, { "epoch": 71.88475730911155, "grad_norm": 0.0008961206185631454, "learning_rate": 2.8144479137099066e-05, "loss": 0.0001010490581393242, "step": 253250 }, { "epoch": 71.88759579903491, "grad_norm": 0.002374551957473159, "learning_rate": 2.8141640647175704e-05, "loss": 3.5066716372966765e-05, "step": 253260 }, { "epoch": 71.89043428895828, "grad_norm": 0.3084738254547119, "learning_rate": 2.8138802157252342e-05, "loss": 7.87585973739624e-05, "step": 253270 }, { "epoch": 71.89327277888164, "grad_norm": 0.0265625212341547, "learning_rate": 2.8135963667328983e-05, "loss": 0.00041688475757837293, "step": 253280 }, { "epoch": 71.896111268805, "grad_norm": 0.7495980858802795, "learning_rate": 2.813312517740562e-05, "loss": 0.004630697146058083, "step": 253290 }, { "epoch": 71.89894975872835, "grad_norm": 0.008681122213602066, "learning_rate": 2.813028668748226e-05, "loss": 5.4403766989707945e-05, "step": 253300 }, { "epoch": 71.90178824865171, "grad_norm": 0.0029265787452459335, "learning_rate": 2.8127448197558897e-05, "loss": 0.00012018885463476181, "step": 253310 }, { "epoch": 71.90462673857508, "grad_norm": 0.011218059808015823, "learning_rate": 2.8124609707635542e-05, "loss": 3.250874578952789e-05, "step": 253320 }, { "epoch": 71.90746522849844, "grad_norm": 0.004450822249054909, "learning_rate": 2.8121771217712177e-05, "loss": 0.00011676531285047531, "step": 253330 }, { "epoch": 71.9103037184218, "grad_norm": 0.014565150253474712, "learning_rate": 2.8118932727788815e-05, "loss": 3.0046701431274415e-05, "step": 253340 }, { "epoch": 71.91314220834516, "grad_norm": 0.0830831527709961, "learning_rate": 2.811609423786546e-05, "loss": 0.0001162111759185791, "step": 253350 }, { "epoch": 71.91598069826853, "grad_norm": 0.0019651399925351143, "learning_rate": 2.8113255747942097e-05, "loss": 7.958542555570602e-05, "step": 253360 }, { "epoch": 71.91881918819188, "grad_norm": 0.005436412524431944, "learning_rate": 2.8110417258018735e-05, "loss": 7.910411804914474e-05, "step": 253370 }, { "epoch": 71.92165767811524, "grad_norm": 0.007972928695380688, "learning_rate": 2.8107578768095377e-05, "loss": 3.713518381118774e-05, "step": 253380 }, { "epoch": 71.9244961680386, "grad_norm": 0.01425117440521717, "learning_rate": 2.8104740278172015e-05, "loss": 5.371086299419403e-05, "step": 253390 }, { "epoch": 71.92733465796196, "grad_norm": 0.02826554886996746, "learning_rate": 2.8101901788248653e-05, "loss": 2.4244002997875213e-05, "step": 253400 }, { "epoch": 71.93017314788533, "grad_norm": 0.010400151833891869, "learning_rate": 2.809906329832529e-05, "loss": 2.416912466287613e-05, "step": 253410 }, { "epoch": 71.93301163780869, "grad_norm": 0.0019616014324128628, "learning_rate": 2.8096224808401932e-05, "loss": 1.7624720931053163e-05, "step": 253420 }, { "epoch": 71.93585012773205, "grad_norm": 0.007451469544321299, "learning_rate": 2.809338631847857e-05, "loss": 0.0016425665467977523, "step": 253430 }, { "epoch": 71.9386886176554, "grad_norm": 0.0014756087912246585, "learning_rate": 2.8090547828555208e-05, "loss": 7.173418998718262e-05, "step": 253440 }, { "epoch": 71.94152710757876, "grad_norm": 0.07243658602237701, "learning_rate": 2.8087709338631853e-05, "loss": 6.445609033107757e-05, "step": 253450 }, { "epoch": 71.94436559750213, "grad_norm": 0.01319468580186367, "learning_rate": 2.8084870848708488e-05, "loss": 3.7443451583385466e-05, "step": 253460 }, { "epoch": 71.94720408742549, "grad_norm": 0.005360425915569067, "learning_rate": 2.8082032358785126e-05, "loss": 2.2069737315177918e-05, "step": 253470 }, { "epoch": 71.95004257734885, "grad_norm": 0.02373434230685234, "learning_rate": 2.807919386886177e-05, "loss": 3.0549243092536926e-05, "step": 253480 }, { "epoch": 71.95288106727222, "grad_norm": 0.00926928874105215, "learning_rate": 2.8076355378938408e-05, "loss": 3.110244870185852e-05, "step": 253490 }, { "epoch": 71.95571955719558, "grad_norm": 0.0029107846785336733, "learning_rate": 2.8073516889015046e-05, "loss": 4.54707071185112e-05, "step": 253500 }, { "epoch": 71.95571955719558, "eval_accuracy": 0.9851211292681376, "eval_loss": 0.05994433909654617, "eval_runtime": 35.0671, "eval_samples_per_second": 448.482, "eval_steps_per_second": 7.015, "step": 253500 }, { "epoch": 71.95855804711893, "grad_norm": 0.005209644325077534, "learning_rate": 2.807067839909168e-05, "loss": 0.00048117414116859435, "step": 253510 }, { "epoch": 71.96139653704229, "grad_norm": 15.780491828918457, "learning_rate": 2.8067839909168326e-05, "loss": 0.02076266407966614, "step": 253520 }, { "epoch": 71.96423502696565, "grad_norm": 0.006832460407167673, "learning_rate": 2.8065001419244964e-05, "loss": 6.26707449555397e-05, "step": 253530 }, { "epoch": 71.96707351688902, "grad_norm": 0.001022034091874957, "learning_rate": 2.80621629293216e-05, "loss": 4.176627844572067e-05, "step": 253540 }, { "epoch": 71.96991200681238, "grad_norm": 0.14986662566661835, "learning_rate": 2.8059324439398243e-05, "loss": 7.436405867338181e-05, "step": 253550 }, { "epoch": 71.97275049673574, "grad_norm": 0.0021270622964948416, "learning_rate": 2.805648594947488e-05, "loss": 1.7132796347141267e-05, "step": 253560 }, { "epoch": 71.97558898665909, "grad_norm": 0.005049740429967642, "learning_rate": 2.805364745955152e-05, "loss": 8.391309529542923e-05, "step": 253570 }, { "epoch": 71.97842747658245, "grad_norm": 0.0016560701187700033, "learning_rate": 2.805080896962816e-05, "loss": 2.1868199110031127e-05, "step": 253580 }, { "epoch": 71.98126596650582, "grad_norm": 0.0017216249834746122, "learning_rate": 2.8047970479704798e-05, "loss": 2.3896247148513795e-05, "step": 253590 }, { "epoch": 71.98410445642918, "grad_norm": 0.00882322434335947, "learning_rate": 2.8045131989781436e-05, "loss": 1.550484448671341e-05, "step": 253600 }, { "epoch": 71.98694294635254, "grad_norm": 0.0024699908681213856, "learning_rate": 2.8042293499858074e-05, "loss": 6.699729710817337e-05, "step": 253610 }, { "epoch": 71.9897814362759, "grad_norm": 0.0026846753899008036, "learning_rate": 2.803945500993472e-05, "loss": 2.654287964105606e-05, "step": 253620 }, { "epoch": 71.99261992619927, "grad_norm": 0.4758002460002899, "learning_rate": 2.8036616520011354e-05, "loss": 9.057894349098205e-05, "step": 253630 }, { "epoch": 71.99545841612262, "grad_norm": 0.022357741370797157, "learning_rate": 2.803377803008799e-05, "loss": 0.0003511020913720131, "step": 253640 }, { "epoch": 71.99829690604598, "grad_norm": 0.0024024425074458122, "learning_rate": 2.8030939540164636e-05, "loss": 0.00012869592756032945, "step": 253650 }, { "epoch": 72.00113539596934, "grad_norm": 0.0036857270170003176, "learning_rate": 2.8028101050241274e-05, "loss": 2.3067064466886224e-05, "step": 253660 }, { "epoch": 72.0039738858927, "grad_norm": 0.005239980295300484, "learning_rate": 2.8025262560317912e-05, "loss": 0.00024502817541360855, "step": 253670 }, { "epoch": 72.00681237581607, "grad_norm": 0.009164057672023773, "learning_rate": 2.8022424070394554e-05, "loss": 2.903975546360016e-05, "step": 253680 }, { "epoch": 72.00965086573943, "grad_norm": 0.3120521008968353, "learning_rate": 2.8019585580471192e-05, "loss": 0.000833672471344471, "step": 253690 }, { "epoch": 72.0124893556628, "grad_norm": 0.007288329303264618, "learning_rate": 2.801674709054783e-05, "loss": 0.0003490600734949112, "step": 253700 }, { "epoch": 72.01532784558614, "grad_norm": 0.0989440381526947, "learning_rate": 2.8013908600624468e-05, "loss": 0.0006992088630795479, "step": 253710 }, { "epoch": 72.0181663355095, "grad_norm": 0.03643535077571869, "learning_rate": 2.801107011070111e-05, "loss": 0.0003638666123151779, "step": 253720 }, { "epoch": 72.02100482543287, "grad_norm": 0.12290478497743607, "learning_rate": 2.8008231620777747e-05, "loss": 0.00012507569044828415, "step": 253730 }, { "epoch": 72.02384331535623, "grad_norm": 0.03296850249171257, "learning_rate": 2.8005393130854385e-05, "loss": 0.0038918308913707734, "step": 253740 }, { "epoch": 72.0266818052796, "grad_norm": 0.035053033381700516, "learning_rate": 2.8002554640931026e-05, "loss": 0.00013037081807851792, "step": 253750 }, { "epoch": 72.02952029520296, "grad_norm": 0.005588815081864595, "learning_rate": 2.7999716151007664e-05, "loss": 0.00021960325539112092, "step": 253760 }, { "epoch": 72.03235878512632, "grad_norm": 0.026111993938684464, "learning_rate": 2.7996877661084302e-05, "loss": 0.0005660831928253173, "step": 253770 }, { "epoch": 72.03519727504967, "grad_norm": 0.011097048409283161, "learning_rate": 2.7994039171160947e-05, "loss": 0.0027176180854439737, "step": 253780 }, { "epoch": 72.03803576497303, "grad_norm": 0.0027739955112338066, "learning_rate": 2.7991200681237585e-05, "loss": 0.00011375583708286285, "step": 253790 }, { "epoch": 72.0408742548964, "grad_norm": 0.005824352148920298, "learning_rate": 2.798836219131422e-05, "loss": 0.0004603283479809761, "step": 253800 }, { "epoch": 72.04371274481976, "grad_norm": 0.0073830909095704556, "learning_rate": 2.7985523701390858e-05, "loss": 5.8594904839992526e-05, "step": 253810 }, { "epoch": 72.04655123474312, "grad_norm": 0.4404008388519287, "learning_rate": 2.7982685211467502e-05, "loss": 0.00011510699987411499, "step": 253820 }, { "epoch": 72.04938972466648, "grad_norm": 2.795078992843628, "learning_rate": 2.797984672154414e-05, "loss": 0.00043474733829498293, "step": 253830 }, { "epoch": 72.05222821458983, "grad_norm": 0.028393179178237915, "learning_rate": 2.797700823162078e-05, "loss": 0.0007514705881476402, "step": 253840 }, { "epoch": 72.0550667045132, "grad_norm": 0.009816530160605907, "learning_rate": 2.797416974169742e-05, "loss": 6.421059370040894e-05, "step": 253850 }, { "epoch": 72.05790519443656, "grad_norm": 0.06659701466560364, "learning_rate": 2.7971331251774058e-05, "loss": 0.00010046828538179398, "step": 253860 }, { "epoch": 72.06074368435992, "grad_norm": 0.004750450141727924, "learning_rate": 2.7968492761850696e-05, "loss": 0.0025953676551580427, "step": 253870 }, { "epoch": 72.06358217428328, "grad_norm": 0.006543118506669998, "learning_rate": 2.7965654271927337e-05, "loss": 0.0002206534147262573, "step": 253880 }, { "epoch": 72.06642066420665, "grad_norm": 0.06760742515325546, "learning_rate": 2.7962815782003975e-05, "loss": 9.533930569887162e-05, "step": 253890 }, { "epoch": 72.06925915413001, "grad_norm": 0.014389694668352604, "learning_rate": 2.7959977292080613e-05, "loss": 8.536297827959061e-05, "step": 253900 }, { "epoch": 72.07209764405336, "grad_norm": 0.01287812553346157, "learning_rate": 2.795713880215725e-05, "loss": 0.00016420409083366393, "step": 253910 }, { "epoch": 72.07493613397672, "grad_norm": 11.681288719177246, "learning_rate": 2.7954300312233896e-05, "loss": 0.0019999226555228234, "step": 253920 }, { "epoch": 72.07777462390008, "grad_norm": 0.033182162791490555, "learning_rate": 2.795146182231053e-05, "loss": 0.0007386116310954093, "step": 253930 }, { "epoch": 72.08061311382345, "grad_norm": 0.0016172395553439856, "learning_rate": 2.794862333238717e-05, "loss": 0.004175597429275512, "step": 253940 }, { "epoch": 72.08345160374681, "grad_norm": 0.04223071038722992, "learning_rate": 2.7945784842463813e-05, "loss": 0.0005898594856262207, "step": 253950 }, { "epoch": 72.08629009367017, "grad_norm": 1.6243197917938232, "learning_rate": 2.794294635254045e-05, "loss": 0.0005351701751351356, "step": 253960 }, { "epoch": 72.08912858359353, "grad_norm": 0.0047603570856153965, "learning_rate": 2.794010786261709e-05, "loss": 0.00025568921118974687, "step": 253970 }, { "epoch": 72.09196707351688, "grad_norm": 8.916985511779785, "learning_rate": 2.793726937269373e-05, "loss": 0.0027436664327979087, "step": 253980 }, { "epoch": 72.09480556344025, "grad_norm": 0.006834893953055143, "learning_rate": 2.793443088277037e-05, "loss": 0.00023483168333768845, "step": 253990 }, { "epoch": 72.09764405336361, "grad_norm": 0.006873519159853458, "learning_rate": 2.7931592392847007e-05, "loss": 0.0001746462658047676, "step": 254000 }, { "epoch": 72.09764405336361, "eval_accuracy": 0.9816239587969734, "eval_loss": 0.07873234897851944, "eval_runtime": 35.5554, "eval_samples_per_second": 442.324, "eval_steps_per_second": 6.919, "step": 254000 }, { "epoch": 72.10048254328697, "grad_norm": 0.010314899496734142, "learning_rate": 2.7928753902923645e-05, "loss": 0.0001776382327079773, "step": 254010 }, { "epoch": 72.10332103321034, "grad_norm": 0.026028012856841087, "learning_rate": 2.7925915413000286e-05, "loss": 0.00011256970465183258, "step": 254020 }, { "epoch": 72.1061595231337, "grad_norm": 0.008854944258928299, "learning_rate": 2.7923076923076924e-05, "loss": 0.00013298392295837402, "step": 254030 }, { "epoch": 72.10899801305705, "grad_norm": 0.09957601875066757, "learning_rate": 2.7920238433153562e-05, "loss": 0.00033329855650663374, "step": 254040 }, { "epoch": 72.11183650298041, "grad_norm": 0.0055015902034938335, "learning_rate": 2.7917399943230203e-05, "loss": 0.0001629013568162918, "step": 254050 }, { "epoch": 72.11467499290377, "grad_norm": 0.26188912987709045, "learning_rate": 2.791456145330684e-05, "loss": 0.0015728948637843132, "step": 254060 }, { "epoch": 72.11751348282714, "grad_norm": 0.008020677603781223, "learning_rate": 2.791172296338348e-05, "loss": 0.00016710013151168823, "step": 254070 }, { "epoch": 72.1203519727505, "grad_norm": 0.021104611456394196, "learning_rate": 2.7908884473460124e-05, "loss": 0.0011738527566194535, "step": 254080 }, { "epoch": 72.12319046267386, "grad_norm": 0.0278786551207304, "learning_rate": 2.7906045983536762e-05, "loss": 0.0001880064606666565, "step": 254090 }, { "epoch": 72.12602895259722, "grad_norm": 0.11746914684772491, "learning_rate": 2.7903207493613397e-05, "loss": 0.0001607213169336319, "step": 254100 }, { "epoch": 72.12886744252057, "grad_norm": 0.0636332705616951, "learning_rate": 2.7900369003690035e-05, "loss": 0.0001457246020436287, "step": 254110 }, { "epoch": 72.13170593244394, "grad_norm": 0.0591895654797554, "learning_rate": 2.789753051376668e-05, "loss": 0.0002773033455014229, "step": 254120 }, { "epoch": 72.1345444223673, "grad_norm": 0.03836999461054802, "learning_rate": 2.7894692023843317e-05, "loss": 5.291365087032318e-05, "step": 254130 }, { "epoch": 72.13738291229066, "grad_norm": 0.01656150445342064, "learning_rate": 2.7891853533919955e-05, "loss": 0.00020328294485807418, "step": 254140 }, { "epoch": 72.14022140221402, "grad_norm": 0.04399740323424339, "learning_rate": 2.7889015043996597e-05, "loss": 0.00021917782723903655, "step": 254150 }, { "epoch": 72.14305989213739, "grad_norm": 0.12596815824508667, "learning_rate": 2.7886176554073235e-05, "loss": 0.0003368215635418892, "step": 254160 }, { "epoch": 72.14589838206075, "grad_norm": 1.4195207357406616, "learning_rate": 2.7883338064149873e-05, "loss": 0.0002691999077796936, "step": 254170 }, { "epoch": 72.1487368719841, "grad_norm": 0.020804770290851593, "learning_rate": 2.7880499574226514e-05, "loss": 0.001789277233183384, "step": 254180 }, { "epoch": 72.15157536190746, "grad_norm": 0.006750207860022783, "learning_rate": 2.7877661084303152e-05, "loss": 0.0016723629087209702, "step": 254190 }, { "epoch": 72.15441385183082, "grad_norm": 0.006422781851142645, "learning_rate": 2.787482259437979e-05, "loss": 5.019847303628921e-05, "step": 254200 }, { "epoch": 72.15725234175419, "grad_norm": 0.00425516813993454, "learning_rate": 2.7871984104456428e-05, "loss": 0.0008865330368280411, "step": 254210 }, { "epoch": 72.16009083167755, "grad_norm": 0.029117146506905556, "learning_rate": 2.786914561453307e-05, "loss": 0.00010851342231035232, "step": 254220 }, { "epoch": 72.16292932160091, "grad_norm": 0.015899356454610825, "learning_rate": 2.7866307124609707e-05, "loss": 0.0026269055902957915, "step": 254230 }, { "epoch": 72.16576781152428, "grad_norm": 0.012239216826856136, "learning_rate": 2.7863468634686345e-05, "loss": 0.00010672397911548615, "step": 254240 }, { "epoch": 72.16860630144762, "grad_norm": 0.06598121672868729, "learning_rate": 2.786063014476299e-05, "loss": 0.004353866726160049, "step": 254250 }, { "epoch": 72.17144479137099, "grad_norm": 0.010059495456516743, "learning_rate": 2.7857791654839628e-05, "loss": 6.868615746498108e-05, "step": 254260 }, { "epoch": 72.17428328129435, "grad_norm": 0.08110823482275009, "learning_rate": 2.7854953164916263e-05, "loss": 4.146918654441833e-05, "step": 254270 }, { "epoch": 72.17712177121771, "grad_norm": 0.03235217556357384, "learning_rate": 2.7852114674992907e-05, "loss": 0.0001307450234889984, "step": 254280 }, { "epoch": 72.17996026114108, "grad_norm": 0.006985998246818781, "learning_rate": 2.7849276185069545e-05, "loss": 5.45550137758255e-05, "step": 254290 }, { "epoch": 72.18279875106444, "grad_norm": 0.004386264365166426, "learning_rate": 2.7846437695146183e-05, "loss": 0.0032737616449594496, "step": 254300 }, { "epoch": 72.18563724098779, "grad_norm": 0.03286818414926529, "learning_rate": 2.784359920522282e-05, "loss": 9.234678000211716e-05, "step": 254310 }, { "epoch": 72.18847573091115, "grad_norm": 0.016209913417696953, "learning_rate": 2.7840760715299463e-05, "loss": 5.497187376022339e-05, "step": 254320 }, { "epoch": 72.19131422083451, "grad_norm": 0.3034851849079132, "learning_rate": 2.78379222253761e-05, "loss": 0.0005692791193723679, "step": 254330 }, { "epoch": 72.19415271075788, "grad_norm": 0.06127690151333809, "learning_rate": 2.783508373545274e-05, "loss": 0.002910305559635162, "step": 254340 }, { "epoch": 72.19699120068124, "grad_norm": 0.012546568177640438, "learning_rate": 2.783224524552938e-05, "loss": 9.960513561964035e-05, "step": 254350 }, { "epoch": 72.1998296906046, "grad_norm": 0.0028625247068703175, "learning_rate": 2.7829406755606018e-05, "loss": 0.00010312255471944809, "step": 254360 }, { "epoch": 72.20266818052797, "grad_norm": 0.001655610860325396, "learning_rate": 2.7826568265682656e-05, "loss": 0.002753418684005737, "step": 254370 }, { "epoch": 72.20550667045131, "grad_norm": 0.04917953908443451, "learning_rate": 2.78237297757593e-05, "loss": 0.00011915471404790878, "step": 254380 }, { "epoch": 72.20834516037468, "grad_norm": 0.07415502518415451, "learning_rate": 2.782089128583594e-05, "loss": 0.00011055190116167068, "step": 254390 }, { "epoch": 72.21118365029804, "grad_norm": 0.7348788380622864, "learning_rate": 2.7818052795912573e-05, "loss": 0.0003084402531385422, "step": 254400 }, { "epoch": 72.2140221402214, "grad_norm": 0.0025305363815277815, "learning_rate": 2.781521430598921e-05, "loss": 9.279195219278336e-05, "step": 254410 }, { "epoch": 72.21686063014477, "grad_norm": 0.02043635956943035, "learning_rate": 2.7812375816065856e-05, "loss": 5.1936320960521695e-05, "step": 254420 }, { "epoch": 72.21969912006813, "grad_norm": 0.003451172960922122, "learning_rate": 2.7809537326142494e-05, "loss": 4.319753497838974e-05, "step": 254430 }, { "epoch": 72.22253760999149, "grad_norm": 0.060062162578105927, "learning_rate": 2.7806698836219132e-05, "loss": 0.0002562940120697021, "step": 254440 }, { "epoch": 72.22537609991484, "grad_norm": 0.0476144440472126, "learning_rate": 2.7803860346295774e-05, "loss": 0.00015728026628494262, "step": 254450 }, { "epoch": 72.2282145898382, "grad_norm": 0.002404953585937619, "learning_rate": 2.780102185637241e-05, "loss": 5.944687873125076e-05, "step": 254460 }, { "epoch": 72.23105307976157, "grad_norm": 0.02493206225335598, "learning_rate": 2.779818336644905e-05, "loss": 4.7817081212997434e-05, "step": 254470 }, { "epoch": 72.23389156968493, "grad_norm": 0.003054070984944701, "learning_rate": 2.779534487652569e-05, "loss": 6.947573274374008e-05, "step": 254480 }, { "epoch": 72.23673005960829, "grad_norm": 0.22736115753650665, "learning_rate": 2.779250638660233e-05, "loss": 9.775795042514801e-05, "step": 254490 }, { "epoch": 72.23956854953165, "grad_norm": 0.022502226755023003, "learning_rate": 2.7789667896678967e-05, "loss": 0.0001355031505227089, "step": 254500 }, { "epoch": 72.23956854953165, "eval_accuracy": 0.9860749030330006, "eval_loss": 0.059314191341400146, "eval_runtime": 35.7647, "eval_samples_per_second": 439.735, "eval_steps_per_second": 6.878, "step": 254500 }, { "epoch": 72.242407039455, "grad_norm": 0.011341754347085953, "learning_rate": 2.778682940675561e-05, "loss": 4.9202889204025266e-05, "step": 254510 }, { "epoch": 72.24524552937837, "grad_norm": 0.04075779393315315, "learning_rate": 2.7783990916832246e-05, "loss": 4.635956138372421e-05, "step": 254520 }, { "epoch": 72.24808401930173, "grad_norm": 0.01172060240060091, "learning_rate": 2.7781152426908884e-05, "loss": 5.9986114501953124e-05, "step": 254530 }, { "epoch": 72.25092250922509, "grad_norm": 0.015658918768167496, "learning_rate": 2.7778313936985522e-05, "loss": 5.7396478950977325e-05, "step": 254540 }, { "epoch": 72.25376099914845, "grad_norm": 0.012643113732337952, "learning_rate": 2.7775475447062167e-05, "loss": 6.158184260129929e-05, "step": 254550 }, { "epoch": 72.25659948907182, "grad_norm": 0.0038343018386512995, "learning_rate": 2.7772636957138805e-05, "loss": 4.265420138835907e-05, "step": 254560 }, { "epoch": 72.25943797899518, "grad_norm": 0.0013745762407779694, "learning_rate": 2.776979846721544e-05, "loss": 2.9491446912288666e-05, "step": 254570 }, { "epoch": 72.26227646891853, "grad_norm": 0.0034166937693953514, "learning_rate": 2.7766959977292084e-05, "loss": 6.549227982759476e-05, "step": 254580 }, { "epoch": 72.26511495884189, "grad_norm": 0.054560497403144836, "learning_rate": 2.7764121487368722e-05, "loss": 2.175215631723404e-05, "step": 254590 }, { "epoch": 72.26795344876525, "grad_norm": 0.002477958332747221, "learning_rate": 2.776128299744536e-05, "loss": 3.526583313941956e-05, "step": 254600 }, { "epoch": 72.27079193868862, "grad_norm": 0.005960308015346527, "learning_rate": 2.7758444507522e-05, "loss": 0.0016681674867868423, "step": 254610 }, { "epoch": 72.27363042861198, "grad_norm": 0.037223782390356064, "learning_rate": 2.775560601759864e-05, "loss": 0.00013860277831554413, "step": 254620 }, { "epoch": 72.27646891853534, "grad_norm": 0.004747726488858461, "learning_rate": 2.7752767527675278e-05, "loss": 2.81481072306633e-05, "step": 254630 }, { "epoch": 72.2793074084587, "grad_norm": 0.4521959722042084, "learning_rate": 2.7749929037751916e-05, "loss": 0.00017257723957300186, "step": 254640 }, { "epoch": 72.28214589838205, "grad_norm": 0.06600749492645264, "learning_rate": 2.7747090547828557e-05, "loss": 4.8638135194778445e-05, "step": 254650 }, { "epoch": 72.28498438830542, "grad_norm": 0.017145821824669838, "learning_rate": 2.7744252057905195e-05, "loss": 0.00013869386166334152, "step": 254660 }, { "epoch": 72.28782287822878, "grad_norm": 0.001718302839435637, "learning_rate": 2.7741413567981833e-05, "loss": 0.0016024390235543251, "step": 254670 }, { "epoch": 72.29066136815214, "grad_norm": 0.010465537197887897, "learning_rate": 2.7738575078058478e-05, "loss": 0.0011788180097937583, "step": 254680 }, { "epoch": 72.2934998580755, "grad_norm": 0.005003902595490217, "learning_rate": 2.7735736588135112e-05, "loss": 6.361212581396103e-05, "step": 254690 }, { "epoch": 72.29633834799887, "grad_norm": 0.009778800420463085, "learning_rate": 2.773289809821175e-05, "loss": 9.683109819889069e-05, "step": 254700 }, { "epoch": 72.29917683792223, "grad_norm": 0.01747296378016472, "learning_rate": 2.7730059608288395e-05, "loss": 4.01712954044342e-05, "step": 254710 }, { "epoch": 72.30201532784558, "grad_norm": 0.38698238134384155, "learning_rate": 2.7727221118365033e-05, "loss": 6.902869790792465e-05, "step": 254720 }, { "epoch": 72.30485381776894, "grad_norm": 0.1866064965724945, "learning_rate": 2.772438262844167e-05, "loss": 6.190035492181778e-05, "step": 254730 }, { "epoch": 72.3076923076923, "grad_norm": 0.0009806028101593256, "learning_rate": 2.7721544138518306e-05, "loss": 6.700269877910615e-05, "step": 254740 }, { "epoch": 72.31053079761567, "grad_norm": 0.0943499505519867, "learning_rate": 2.771870564859495e-05, "loss": 5.9087760746479034e-05, "step": 254750 }, { "epoch": 72.31336928753903, "grad_norm": 0.017663367092609406, "learning_rate": 2.771586715867159e-05, "loss": 0.00013439338654279708, "step": 254760 }, { "epoch": 72.3162077774624, "grad_norm": 0.006615367718040943, "learning_rate": 2.7713028668748226e-05, "loss": 6.961878389120102e-05, "step": 254770 }, { "epoch": 72.31904626738574, "grad_norm": 0.0011845818953588605, "learning_rate": 2.7710190178824868e-05, "loss": 6.252732127904892e-05, "step": 254780 }, { "epoch": 72.3218847573091, "grad_norm": 0.6535075902938843, "learning_rate": 2.7707351688901506e-05, "loss": 0.00014488231390714646, "step": 254790 }, { "epoch": 72.32472324723247, "grad_norm": 0.03991003707051277, "learning_rate": 2.7704513198978144e-05, "loss": 0.0034230001270771026, "step": 254800 }, { "epoch": 72.32756173715583, "grad_norm": 0.0009428277262486517, "learning_rate": 2.7701674709054785e-05, "loss": 8.138380944728851e-05, "step": 254810 }, { "epoch": 72.3304002270792, "grad_norm": 0.08590991795063019, "learning_rate": 2.7698836219131423e-05, "loss": 0.00018508359789848328, "step": 254820 }, { "epoch": 72.33323871700256, "grad_norm": 0.0016721355495974422, "learning_rate": 2.769599772920806e-05, "loss": 3.4782290458679197e-05, "step": 254830 }, { "epoch": 72.33607720692592, "grad_norm": 0.0033317450433969498, "learning_rate": 2.76931592392847e-05, "loss": 3.29216942191124e-05, "step": 254840 }, { "epoch": 72.33891569684927, "grad_norm": 0.0557619072496891, "learning_rate": 2.7690320749361344e-05, "loss": 0.00027238577604293823, "step": 254850 }, { "epoch": 72.34175418677263, "grad_norm": 0.0014766461681574583, "learning_rate": 2.768748225943798e-05, "loss": 0.0005839956924319267, "step": 254860 }, { "epoch": 72.344592676696, "grad_norm": 0.030703935772180557, "learning_rate": 2.7684643769514616e-05, "loss": 0.00029853805899620055, "step": 254870 }, { "epoch": 72.34743116661936, "grad_norm": 1.4794466495513916, "learning_rate": 2.768180527959126e-05, "loss": 0.0004599468782544136, "step": 254880 }, { "epoch": 72.35026965654272, "grad_norm": 0.16201163828372955, "learning_rate": 2.76789667896679e-05, "loss": 7.587578147649765e-05, "step": 254890 }, { "epoch": 72.35310814646608, "grad_norm": 0.0034313593059778214, "learning_rate": 2.7676128299744537e-05, "loss": 0.0024790825322270394, "step": 254900 }, { "epoch": 72.35594663638945, "grad_norm": 0.4205845296382904, "learning_rate": 2.767328980982118e-05, "loss": 0.0004794873297214508, "step": 254910 }, { "epoch": 72.3587851263128, "grad_norm": 12.011764526367188, "learning_rate": 2.7670451319897817e-05, "loss": 0.005783899128437043, "step": 254920 }, { "epoch": 72.36162361623616, "grad_norm": 0.36900776624679565, "learning_rate": 2.7667612829974455e-05, "loss": 0.008743740618228912, "step": 254930 }, { "epoch": 72.36446210615952, "grad_norm": 0.0036870085168629885, "learning_rate": 2.7664774340051093e-05, "loss": 0.00014191325753927232, "step": 254940 }, { "epoch": 72.36730059608288, "grad_norm": 0.004703435115516186, "learning_rate": 2.7661935850127734e-05, "loss": 7.759816944599151e-05, "step": 254950 }, { "epoch": 72.37013908600625, "grad_norm": 0.00411245645955205, "learning_rate": 2.7659097360204372e-05, "loss": 5.159955471754074e-05, "step": 254960 }, { "epoch": 72.37297757592961, "grad_norm": 0.0015171169070526958, "learning_rate": 2.765625887028101e-05, "loss": 3.277324140071869e-05, "step": 254970 }, { "epoch": 72.37581606585297, "grad_norm": 0.020520281046628952, "learning_rate": 2.7653420380357655e-05, "loss": 4.585590213537216e-05, "step": 254980 }, { "epoch": 72.37865455577632, "grad_norm": 0.0032900499645620584, "learning_rate": 2.765058189043429e-05, "loss": 2.0195171236991884e-05, "step": 254990 }, { "epoch": 72.38149304569968, "grad_norm": 0.004189232364296913, "learning_rate": 2.7647743400510927e-05, "loss": 1.7113611102104186e-05, "step": 255000 }, { "epoch": 72.38149304569968, "eval_accuracy": 0.9858205633623705, "eval_loss": 0.05515899509191513, "eval_runtime": 35.5604, "eval_samples_per_second": 442.262, "eval_steps_per_second": 6.918, "step": 255000 }, { "epoch": 72.38433153562305, "grad_norm": 0.0010371063835918903, "learning_rate": 2.7644904910587572e-05, "loss": 6.749816238880158e-05, "step": 255010 }, { "epoch": 72.38717002554641, "grad_norm": 0.0034493179991841316, "learning_rate": 2.764206642066421e-05, "loss": 0.00011946465820074081, "step": 255020 }, { "epoch": 72.39000851546977, "grad_norm": 0.3766140639781952, "learning_rate": 2.7639227930740848e-05, "loss": 0.0001021241769194603, "step": 255030 }, { "epoch": 72.39284700539314, "grad_norm": 0.001134359510615468, "learning_rate": 2.7636389440817483e-05, "loss": 9.215120226144791e-05, "step": 255040 }, { "epoch": 72.39568549531649, "grad_norm": 0.04213622957468033, "learning_rate": 2.7633550950894127e-05, "loss": 5.694124847650528e-05, "step": 255050 }, { "epoch": 72.39852398523985, "grad_norm": 0.007620925549417734, "learning_rate": 2.7630712460970765e-05, "loss": 1.9469857215881346e-05, "step": 255060 }, { "epoch": 72.40136247516321, "grad_norm": 0.0064919330179691315, "learning_rate": 2.7627873971047403e-05, "loss": 2.736486494541168e-05, "step": 255070 }, { "epoch": 72.40420096508657, "grad_norm": 0.001941085560247302, "learning_rate": 2.7625035481124045e-05, "loss": 5.775038152933121e-05, "step": 255080 }, { "epoch": 72.40703945500994, "grad_norm": 0.007450076285749674, "learning_rate": 2.7622196991200683e-05, "loss": 0.0030916059389710427, "step": 255090 }, { "epoch": 72.4098779449333, "grad_norm": 0.0102864233776927, "learning_rate": 2.761935850127732e-05, "loss": 2.947300672531128e-05, "step": 255100 }, { "epoch": 72.41271643485666, "grad_norm": 0.016386087983846664, "learning_rate": 2.7616520011353962e-05, "loss": 0.00016776155680418015, "step": 255110 }, { "epoch": 72.41555492478001, "grad_norm": 0.011127742938697338, "learning_rate": 2.76136815214306e-05, "loss": 0.00022680014371871947, "step": 255120 }, { "epoch": 72.41839341470337, "grad_norm": 0.31773385405540466, "learning_rate": 2.7610843031507238e-05, "loss": 0.00021598059684038162, "step": 255130 }, { "epoch": 72.42123190462674, "grad_norm": 0.03729398548603058, "learning_rate": 2.7608004541583876e-05, "loss": 5.540959537029266e-05, "step": 255140 }, { "epoch": 72.4240703945501, "grad_norm": 0.012586956843733788, "learning_rate": 2.760516605166052e-05, "loss": 0.00010396391153335571, "step": 255150 }, { "epoch": 72.42690888447346, "grad_norm": 0.0036584846675395966, "learning_rate": 2.7602327561737155e-05, "loss": 4.188697785139084e-05, "step": 255160 }, { "epoch": 72.42974737439683, "grad_norm": 0.007692879997193813, "learning_rate": 2.7599489071813793e-05, "loss": 4.508644342422485e-05, "step": 255170 }, { "epoch": 72.43258586432019, "grad_norm": 0.006376021075993776, "learning_rate": 2.7596650581890438e-05, "loss": 0.0005487473681569099, "step": 255180 }, { "epoch": 72.43542435424354, "grad_norm": 0.40356704592704773, "learning_rate": 2.7593812091967076e-05, "loss": 0.00011061243712902069, "step": 255190 }, { "epoch": 72.4382628441669, "grad_norm": 0.01633961871266365, "learning_rate": 2.7590973602043714e-05, "loss": 9.802840650081634e-05, "step": 255200 }, { "epoch": 72.44110133409026, "grad_norm": 0.0022012607660144567, "learning_rate": 2.7588135112120355e-05, "loss": 0.0002192651852965355, "step": 255210 }, { "epoch": 72.44393982401363, "grad_norm": 0.0064281560480594635, "learning_rate": 2.7585296622196993e-05, "loss": 9.537898004055023e-05, "step": 255220 }, { "epoch": 72.44677831393699, "grad_norm": 0.16369713842868805, "learning_rate": 2.758245813227363e-05, "loss": 4.178620874881744e-05, "step": 255230 }, { "epoch": 72.44961680386035, "grad_norm": 0.0079810731112957, "learning_rate": 2.757961964235027e-05, "loss": 3.876108676195145e-05, "step": 255240 }, { "epoch": 72.4524552937837, "grad_norm": 0.033301156014204025, "learning_rate": 2.757678115242691e-05, "loss": 4.050750285387039e-05, "step": 255250 }, { "epoch": 72.45529378370706, "grad_norm": 0.013289385475218296, "learning_rate": 2.757394266250355e-05, "loss": 4.682503640651703e-05, "step": 255260 }, { "epoch": 72.45813227363043, "grad_norm": 0.11500507593154907, "learning_rate": 2.7571104172580187e-05, "loss": 0.0001841692253947258, "step": 255270 }, { "epoch": 72.46097076355379, "grad_norm": 0.03485269844532013, "learning_rate": 2.7568265682656828e-05, "loss": 0.00013340525329113006, "step": 255280 }, { "epoch": 72.46380925347715, "grad_norm": 0.005470838397741318, "learning_rate": 2.7565427192733466e-05, "loss": 4.431959241628647e-05, "step": 255290 }, { "epoch": 72.46664774340051, "grad_norm": 0.006903760135173798, "learning_rate": 2.7562588702810104e-05, "loss": 0.0003716975450515747, "step": 255300 }, { "epoch": 72.46948623332388, "grad_norm": 0.014487490057945251, "learning_rate": 2.755975021288675e-05, "loss": 0.0001460200175642967, "step": 255310 }, { "epoch": 72.47232472324723, "grad_norm": 0.004055151250213385, "learning_rate": 2.7556911722963387e-05, "loss": 0.0010596379637718202, "step": 255320 }, { "epoch": 72.47516321317059, "grad_norm": 0.16512663662433624, "learning_rate": 2.755407323304002e-05, "loss": 7.398668676614762e-05, "step": 255330 }, { "epoch": 72.47800170309395, "grad_norm": 0.030262261629104614, "learning_rate": 2.755123474311666e-05, "loss": 0.00015929155051708221, "step": 255340 }, { "epoch": 72.48084019301731, "grad_norm": 0.0037061399780213833, "learning_rate": 2.7548396253193304e-05, "loss": 5.8205798268318176e-05, "step": 255350 }, { "epoch": 72.48367868294068, "grad_norm": 0.01800905354321003, "learning_rate": 2.7545557763269942e-05, "loss": 8.96751880645752e-05, "step": 255360 }, { "epoch": 72.48651717286404, "grad_norm": 0.02371426858007908, "learning_rate": 2.754271927334658e-05, "loss": 0.00014707949012517929, "step": 255370 }, { "epoch": 72.4893556627874, "grad_norm": 0.0011370974825695157, "learning_rate": 2.753988078342322e-05, "loss": 0.00026401858776807784, "step": 255380 }, { "epoch": 72.49219415271075, "grad_norm": 0.5062155723571777, "learning_rate": 2.753704229349986e-05, "loss": 0.00024558361619710924, "step": 255390 }, { "epoch": 72.49503264263411, "grad_norm": 0.0019965744577348232, "learning_rate": 2.7534203803576497e-05, "loss": 8.358731865882873e-05, "step": 255400 }, { "epoch": 72.49787113255748, "grad_norm": 0.0034066643565893173, "learning_rate": 2.753136531365314e-05, "loss": 0.001426139287650585, "step": 255410 }, { "epoch": 72.50070962248084, "grad_norm": 0.006718284450471401, "learning_rate": 2.7528526823729777e-05, "loss": 0.0005513507872819901, "step": 255420 }, { "epoch": 72.5035481124042, "grad_norm": 0.004282697569578886, "learning_rate": 2.7525688333806415e-05, "loss": 0.00015330482274293898, "step": 255430 }, { "epoch": 72.50638660232757, "grad_norm": 0.04834984987974167, "learning_rate": 2.7522849843883053e-05, "loss": 0.001006540097296238, "step": 255440 }, { "epoch": 72.50922509225093, "grad_norm": 0.008403277024626732, "learning_rate": 2.7520011353959698e-05, "loss": 0.00016974247992038727, "step": 255450 }, { "epoch": 72.51206358217428, "grad_norm": 0.0925353392958641, "learning_rate": 2.7517172864036332e-05, "loss": 6.361585110425949e-05, "step": 255460 }, { "epoch": 72.51490207209764, "grad_norm": 0.00688747875392437, "learning_rate": 2.751433437411297e-05, "loss": 5.6990794837474824e-05, "step": 255470 }, { "epoch": 72.517740562021, "grad_norm": 0.03270360827445984, "learning_rate": 2.7511495884189615e-05, "loss": 0.001118871383368969, "step": 255480 }, { "epoch": 72.52057905194437, "grad_norm": 0.0071588256396353245, "learning_rate": 2.7508657394266253e-05, "loss": 3.3941492438316347e-05, "step": 255490 }, { "epoch": 72.52341754186773, "grad_norm": 0.0026304605416953564, "learning_rate": 2.750581890434289e-05, "loss": 7.360186427831649e-05, "step": 255500 }, { "epoch": 72.52341754186773, "eval_accuracy": 0.9842309404209322, "eval_loss": 0.06232190504670143, "eval_runtime": 35.4993, "eval_samples_per_second": 443.022, "eval_steps_per_second": 6.93, "step": 255500 }, { "epoch": 72.52625603179109, "grad_norm": 0.0046160463243722916, "learning_rate": 2.7502980414419532e-05, "loss": 0.003437398374080658, "step": 255510 }, { "epoch": 72.52909452171444, "grad_norm": 0.005471543874591589, "learning_rate": 2.750014192449617e-05, "loss": 0.00031408127397298814, "step": 255520 }, { "epoch": 72.5319330116378, "grad_norm": 0.028663596138358116, "learning_rate": 2.7497303434572808e-05, "loss": 5.666390061378479e-05, "step": 255530 }, { "epoch": 72.53477150156117, "grad_norm": 0.10623345524072647, "learning_rate": 2.7494464944649446e-05, "loss": 0.010169468820095062, "step": 255540 }, { "epoch": 72.53760999148453, "grad_norm": 0.008959038183093071, "learning_rate": 2.7491626454726088e-05, "loss": 8.813552558422089e-05, "step": 255550 }, { "epoch": 72.54044848140789, "grad_norm": 14.497328758239746, "learning_rate": 2.7488787964802726e-05, "loss": 0.00586983859539032, "step": 255560 }, { "epoch": 72.54328697133126, "grad_norm": 0.25083139538764954, "learning_rate": 2.7485949474879364e-05, "loss": 0.00023650452494621276, "step": 255570 }, { "epoch": 72.54612546125462, "grad_norm": 14.82262897491455, "learning_rate": 2.7483110984956005e-05, "loss": 0.004290474206209182, "step": 255580 }, { "epoch": 72.54896395117797, "grad_norm": 0.0028224505949765444, "learning_rate": 2.7480272495032643e-05, "loss": 8.099302649497986e-05, "step": 255590 }, { "epoch": 72.55180244110133, "grad_norm": 0.5891464352607727, "learning_rate": 2.747743400510928e-05, "loss": 0.00012783762067556382, "step": 255600 }, { "epoch": 72.5546409310247, "grad_norm": 0.030054526403546333, "learning_rate": 2.7474595515185926e-05, "loss": 0.005269253253936767, "step": 255610 }, { "epoch": 72.55747942094806, "grad_norm": 0.012001628056168556, "learning_rate": 2.7471757025262564e-05, "loss": 0.00030978545546531675, "step": 255620 }, { "epoch": 72.56031791087142, "grad_norm": 0.0009274699259549379, "learning_rate": 2.7468918535339198e-05, "loss": 3.6133266985416415e-05, "step": 255630 }, { "epoch": 72.56315640079478, "grad_norm": 0.027821039780974388, "learning_rate": 2.7466080045415836e-05, "loss": 3.152601420879364e-05, "step": 255640 }, { "epoch": 72.56599489071814, "grad_norm": 0.19842694699764252, "learning_rate": 2.746324155549248e-05, "loss": 0.0007920570671558381, "step": 255650 }, { "epoch": 72.5688333806415, "grad_norm": 0.00453875632956624, "learning_rate": 2.746040306556912e-05, "loss": 0.000300595723092556, "step": 255660 }, { "epoch": 72.57167187056486, "grad_norm": 0.010995208285748959, "learning_rate": 2.7457564575645757e-05, "loss": 0.0008030971512198448, "step": 255670 }, { "epoch": 72.57451036048822, "grad_norm": 0.02107749506831169, "learning_rate": 2.74547260857224e-05, "loss": 0.0009128415957093238, "step": 255680 }, { "epoch": 72.57734885041158, "grad_norm": 0.023033402860164642, "learning_rate": 2.7451887595799036e-05, "loss": 0.0017595034092664718, "step": 255690 }, { "epoch": 72.58018734033494, "grad_norm": 0.060600198805332184, "learning_rate": 2.7449049105875674e-05, "loss": 0.00014048144221305846, "step": 255700 }, { "epoch": 72.58302583025831, "grad_norm": 0.02973606437444687, "learning_rate": 2.7446210615952316e-05, "loss": 7.43059441447258e-05, "step": 255710 }, { "epoch": 72.58586432018166, "grad_norm": 0.013244489207863808, "learning_rate": 2.7443372126028954e-05, "loss": 0.001796083338558674, "step": 255720 }, { "epoch": 72.58870281010502, "grad_norm": 0.013031989336013794, "learning_rate": 2.7440533636105592e-05, "loss": 6.497446447610855e-05, "step": 255730 }, { "epoch": 72.59154130002838, "grad_norm": 0.0016121434746310115, "learning_rate": 2.7437695146182236e-05, "loss": 6.627142429351806e-05, "step": 255740 }, { "epoch": 72.59437978995174, "grad_norm": 0.017171913757920265, "learning_rate": 2.743485665625887e-05, "loss": 9.579658508300782e-05, "step": 255750 }, { "epoch": 72.59721827987511, "grad_norm": 0.001280573895201087, "learning_rate": 2.743201816633551e-05, "loss": 6.513558328151703e-05, "step": 255760 }, { "epoch": 72.60005676979847, "grad_norm": 0.009589280933141708, "learning_rate": 2.7429179676412147e-05, "loss": 3.3332034945487976e-05, "step": 255770 }, { "epoch": 72.60289525972183, "grad_norm": 0.025994922965765, "learning_rate": 2.7426341186488792e-05, "loss": 5.811173468828201e-05, "step": 255780 }, { "epoch": 72.60573374964518, "grad_norm": 0.06323391944169998, "learning_rate": 2.742350269656543e-05, "loss": 0.0002646975219249725, "step": 255790 }, { "epoch": 72.60857223956855, "grad_norm": 0.8469845056533813, "learning_rate": 2.7420664206642064e-05, "loss": 0.00019891448318958281, "step": 255800 }, { "epoch": 72.61141072949191, "grad_norm": 0.015659941360354424, "learning_rate": 2.741782571671871e-05, "loss": 4.6693533658981326e-05, "step": 255810 }, { "epoch": 72.61424921941527, "grad_norm": 0.00869215838611126, "learning_rate": 2.7414987226795347e-05, "loss": 3.9534084498882295e-05, "step": 255820 }, { "epoch": 72.61708770933863, "grad_norm": 0.02603863552212715, "learning_rate": 2.7412148736871985e-05, "loss": 0.0002013135701417923, "step": 255830 }, { "epoch": 72.619926199262, "grad_norm": 0.005719569977372885, "learning_rate": 2.7409310246948626e-05, "loss": 4.099905490875244e-05, "step": 255840 }, { "epoch": 72.62276468918536, "grad_norm": 0.005762000102549791, "learning_rate": 2.7406471757025264e-05, "loss": 0.00014225617051124572, "step": 255850 }, { "epoch": 72.62560317910871, "grad_norm": 0.0006182634388096631, "learning_rate": 2.7403633267101902e-05, "loss": 7.122475653886795e-05, "step": 255860 }, { "epoch": 72.62844166903207, "grad_norm": 0.008028978481888771, "learning_rate": 2.740079477717854e-05, "loss": 2.617649734020233e-05, "step": 255870 }, { "epoch": 72.63128015895543, "grad_norm": 0.005858041811734438, "learning_rate": 2.7397956287255182e-05, "loss": 5.39928674697876e-05, "step": 255880 }, { "epoch": 72.6341186488788, "grad_norm": 0.07864093780517578, "learning_rate": 2.739511779733182e-05, "loss": 6.900858134031295e-05, "step": 255890 }, { "epoch": 72.63695713880216, "grad_norm": 0.007738275453448296, "learning_rate": 2.7392279307408458e-05, "loss": 3.210119903087616e-05, "step": 255900 }, { "epoch": 72.63979562872552, "grad_norm": 0.0015257705235853791, "learning_rate": 2.7389440817485103e-05, "loss": 4.162993282079697e-05, "step": 255910 }, { "epoch": 72.64263411864889, "grad_norm": 0.024029020220041275, "learning_rate": 2.738660232756174e-05, "loss": 2.7784332633018494e-05, "step": 255920 }, { "epoch": 72.64547260857223, "grad_norm": 0.006053834222257137, "learning_rate": 2.7383763837638375e-05, "loss": 2.6985257863998414e-05, "step": 255930 }, { "epoch": 72.6483110984956, "grad_norm": 0.0011154675157740712, "learning_rate": 2.738092534771502e-05, "loss": 2.140700817108154e-05, "step": 255940 }, { "epoch": 72.65114958841896, "grad_norm": 0.058203548192977905, "learning_rate": 2.7378086857791658e-05, "loss": 0.00016670208424329758, "step": 255950 }, { "epoch": 72.65398807834232, "grad_norm": 0.0006432099617086351, "learning_rate": 2.7375248367868296e-05, "loss": 8.587520569562912e-05, "step": 255960 }, { "epoch": 72.65682656826569, "grad_norm": 0.012517590075731277, "learning_rate": 2.7372409877944934e-05, "loss": 4.2850524187088014e-05, "step": 255970 }, { "epoch": 72.65966505818905, "grad_norm": 0.007645823527127504, "learning_rate": 2.7369571388021575e-05, "loss": 0.00018059685826301574, "step": 255980 }, { "epoch": 72.6625035481124, "grad_norm": 0.006592554040253162, "learning_rate": 2.7366732898098213e-05, "loss": 3.70485708117485e-05, "step": 255990 }, { "epoch": 72.66534203803576, "grad_norm": 0.00625831913203001, "learning_rate": 2.736389440817485e-05, "loss": 0.011151710897684098, "step": 256000 }, { "epoch": 72.66534203803576, "eval_accuracy": 0.9859477331976855, "eval_loss": 0.05640696361660957, "eval_runtime": 35.386, "eval_samples_per_second": 444.441, "eval_steps_per_second": 6.952, "step": 256000 }, { "epoch": 72.66818052795912, "grad_norm": 0.012472826987504959, "learning_rate": 2.7361055918251493e-05, "loss": 2.5691837072372438e-05, "step": 256010 }, { "epoch": 72.67101901788249, "grad_norm": 0.0022075565066188574, "learning_rate": 2.735821742832813e-05, "loss": 2.3709051311016084e-05, "step": 256020 }, { "epoch": 72.67385750780585, "grad_norm": 0.004370393231511116, "learning_rate": 2.735537893840477e-05, "loss": 0.0033002469688653948, "step": 256030 }, { "epoch": 72.67669599772921, "grad_norm": 0.025728201493620872, "learning_rate": 2.7352540448481413e-05, "loss": 9.451135993003845e-05, "step": 256040 }, { "epoch": 72.67953448765257, "grad_norm": 1.1480592489242554, "learning_rate": 2.7349701958558048e-05, "loss": 0.0001732662320137024, "step": 256050 }, { "epoch": 72.68237297757592, "grad_norm": 0.0019975353498011827, "learning_rate": 2.7346863468634686e-05, "loss": 0.0015030099079012872, "step": 256060 }, { "epoch": 72.68521146749929, "grad_norm": 0.013140937313437462, "learning_rate": 2.7344024978711324e-05, "loss": 9.643081575632096e-05, "step": 256070 }, { "epoch": 72.68804995742265, "grad_norm": 3.9220800399780273, "learning_rate": 2.734118648878797e-05, "loss": 0.0007483253255486488, "step": 256080 }, { "epoch": 72.69088844734601, "grad_norm": 0.0014126184396445751, "learning_rate": 2.7338347998864607e-05, "loss": 0.00013951323926448823, "step": 256090 }, { "epoch": 72.69372693726937, "grad_norm": 4.679144382476807, "learning_rate": 2.733550950894124e-05, "loss": 0.0008863156661391258, "step": 256100 }, { "epoch": 72.69656542719274, "grad_norm": 0.0012889550998806953, "learning_rate": 2.7332671019017886e-05, "loss": 3.5200640559196474e-05, "step": 256110 }, { "epoch": 72.6994039171161, "grad_norm": 0.018234746530652046, "learning_rate": 2.7329832529094524e-05, "loss": 0.00010869819670915604, "step": 256120 }, { "epoch": 72.70224240703945, "grad_norm": 0.003755244193598628, "learning_rate": 2.7326994039171162e-05, "loss": 6.116721779108048e-05, "step": 256130 }, { "epoch": 72.70508089696281, "grad_norm": 0.018673891201615334, "learning_rate": 2.7324155549247803e-05, "loss": 0.0008909845724701882, "step": 256140 }, { "epoch": 72.70791938688618, "grad_norm": 0.009338269010186195, "learning_rate": 2.732131705932444e-05, "loss": 0.0006859106943011284, "step": 256150 }, { "epoch": 72.71075787680954, "grad_norm": 0.0039001035038381815, "learning_rate": 2.7318762418393418e-05, "loss": 0.00260247141122818, "step": 256160 }, { "epoch": 72.7135963667329, "grad_norm": 0.002759870607405901, "learning_rate": 2.7315923928470056e-05, "loss": 0.002221350371837616, "step": 256170 }, { "epoch": 72.71643485665626, "grad_norm": 0.000744418881367892, "learning_rate": 2.7313085438546694e-05, "loss": 0.0010812327265739441, "step": 256180 }, { "epoch": 72.71927334657963, "grad_norm": 0.004612651653587818, "learning_rate": 2.7310246948623335e-05, "loss": 0.00011620596051216125, "step": 256190 }, { "epoch": 72.72211183650298, "grad_norm": 1.9160586595535278, "learning_rate": 2.7307408458699973e-05, "loss": 0.0003154931589961052, "step": 256200 }, { "epoch": 72.72495032642634, "grad_norm": 0.008405471220612526, "learning_rate": 2.730456996877661e-05, "loss": 2.5060400366783143e-05, "step": 256210 }, { "epoch": 72.7277888163497, "grad_norm": 0.023562684655189514, "learning_rate": 2.7301731478853253e-05, "loss": 0.004291234910488129, "step": 256220 }, { "epoch": 72.73062730627306, "grad_norm": 0.009877387434244156, "learning_rate": 2.729889298892989e-05, "loss": 3.9810501039028165e-05, "step": 256230 }, { "epoch": 72.73346579619643, "grad_norm": 0.009299155324697495, "learning_rate": 2.729605449900653e-05, "loss": 5.157012492418289e-05, "step": 256240 }, { "epoch": 72.73630428611979, "grad_norm": 0.02118619717657566, "learning_rate": 2.7293216009083167e-05, "loss": 0.00010485332459211349, "step": 256250 }, { "epoch": 72.73914277604314, "grad_norm": 0.0020188423804938793, "learning_rate": 2.729037751915981e-05, "loss": 3.547742962837219e-05, "step": 256260 }, { "epoch": 72.7419812659665, "grad_norm": 0.014814677648246288, "learning_rate": 2.7287539029236446e-05, "loss": 0.00014226753264665604, "step": 256270 }, { "epoch": 72.74481975588986, "grad_norm": 0.007897797971963882, "learning_rate": 2.7284700539313084e-05, "loss": 0.0007465755566954613, "step": 256280 }, { "epoch": 72.74765824581323, "grad_norm": 0.0031328978948295116, "learning_rate": 2.728186204938973e-05, "loss": 6.618332117795944e-05, "step": 256290 }, { "epoch": 72.75049673573659, "grad_norm": 0.0014312253333628178, "learning_rate": 2.7279023559466367e-05, "loss": 7.582623511552811e-05, "step": 256300 }, { "epoch": 72.75333522565995, "grad_norm": 0.004754200112074614, "learning_rate": 2.7276185069543005e-05, "loss": 0.0013499468564987182, "step": 256310 }, { "epoch": 72.75617371558332, "grad_norm": 0.0030248381663113832, "learning_rate": 2.7273346579619646e-05, "loss": 0.004264193400740623, "step": 256320 }, { "epoch": 72.75901220550666, "grad_norm": 0.08371465653181076, "learning_rate": 2.7270508089696284e-05, "loss": 5.89422881603241e-05, "step": 256330 }, { "epoch": 72.76185069543003, "grad_norm": 0.094712994992733, "learning_rate": 2.7267669599772922e-05, "loss": 0.001042301580309868, "step": 256340 }, { "epoch": 72.76468918535339, "grad_norm": 0.01503104716539383, "learning_rate": 2.726483110984956e-05, "loss": 0.00030189286917448044, "step": 256350 }, { "epoch": 72.76752767527675, "grad_norm": 0.1521647423505783, "learning_rate": 2.72619926199262e-05, "loss": 7.630642503499984e-05, "step": 256360 }, { "epoch": 72.77036616520012, "grad_norm": 0.0009377789683640003, "learning_rate": 2.725915413000284e-05, "loss": 3.412701189517975e-05, "step": 256370 }, { "epoch": 72.77320465512348, "grad_norm": 0.05221385508775711, "learning_rate": 2.7256315640079477e-05, "loss": 0.0005121968686580658, "step": 256380 }, { "epoch": 72.77604314504684, "grad_norm": 0.006161659024655819, "learning_rate": 2.725347715015612e-05, "loss": 2.1910667419433595e-05, "step": 256390 }, { "epoch": 72.77888163497019, "grad_norm": 0.0062532988376915455, "learning_rate": 2.7250638660232757e-05, "loss": 3.1571090221405027e-05, "step": 256400 }, { "epoch": 72.78172012489355, "grad_norm": 0.01721530593931675, "learning_rate": 2.7247800170309395e-05, "loss": 3.897026181221008e-05, "step": 256410 }, { "epoch": 72.78455861481692, "grad_norm": 0.009746907278895378, "learning_rate": 2.724496168038604e-05, "loss": 2.9337406158447265e-05, "step": 256420 }, { "epoch": 72.78739710474028, "grad_norm": 0.027206288650631905, "learning_rate": 2.7242123190462677e-05, "loss": 0.0016297094523906707, "step": 256430 }, { "epoch": 72.79023559466364, "grad_norm": 0.10291840136051178, "learning_rate": 2.7239284700539312e-05, "loss": 0.00011469870805740357, "step": 256440 }, { "epoch": 72.793074084587, "grad_norm": 0.005531615111976862, "learning_rate": 2.723644621061595e-05, "loss": 0.00015575755387544631, "step": 256450 }, { "epoch": 72.79591257451035, "grad_norm": 0.25717222690582275, "learning_rate": 2.7233607720692595e-05, "loss": 0.00013605859130620955, "step": 256460 }, { "epoch": 72.79875106443372, "grad_norm": 0.004335436969995499, "learning_rate": 2.7230769230769233e-05, "loss": 0.00014423076063394547, "step": 256470 }, { "epoch": 72.80158955435708, "grad_norm": 0.06141893193125725, "learning_rate": 2.722793074084587e-05, "loss": 3.03635373711586e-05, "step": 256480 }, { "epoch": 72.80442804428044, "grad_norm": 0.007792684715241194, "learning_rate": 2.7225092250922512e-05, "loss": 2.7094781398773193e-05, "step": 256490 }, { "epoch": 72.8072665342038, "grad_norm": 0.0033272635191679, "learning_rate": 2.722225376099915e-05, "loss": 3.1496770679950715e-05, "step": 256500 }, { "epoch": 72.8072665342038, "eval_accuracy": 0.9861384879506581, "eval_loss": 0.05706954374909401, "eval_runtime": 35.813, "eval_samples_per_second": 439.142, "eval_steps_per_second": 6.869, "step": 256500 }, { "epoch": 72.81010502412717, "grad_norm": 0.0814119353890419, "learning_rate": 2.7219415271075788e-05, "loss": 3.998689353466034e-05, "step": 256510 }, { "epoch": 72.81294351405053, "grad_norm": 0.00371216656640172, "learning_rate": 2.721657678115243e-05, "loss": 1.708194613456726e-05, "step": 256520 }, { "epoch": 72.81578200397388, "grad_norm": 0.009465017355978489, "learning_rate": 2.7213738291229067e-05, "loss": 3.122575581073761e-05, "step": 256530 }, { "epoch": 72.81862049389724, "grad_norm": 0.020701495930552483, "learning_rate": 2.7210899801305705e-05, "loss": 7.016919553279877e-05, "step": 256540 }, { "epoch": 72.8214589838206, "grad_norm": 0.001958246575668454, "learning_rate": 2.7208061311382343e-05, "loss": 2.3840367794036867e-05, "step": 256550 }, { "epoch": 72.82429747374397, "grad_norm": 0.0025637599173933268, "learning_rate": 2.7205222821458988e-05, "loss": 6.474163383245468e-05, "step": 256560 }, { "epoch": 72.82713596366733, "grad_norm": 0.009427894838154316, "learning_rate": 2.7202384331535623e-05, "loss": 1.7231516540050507e-05, "step": 256570 }, { "epoch": 72.8299744535907, "grad_norm": 0.19059523940086365, "learning_rate": 2.719954584161226e-05, "loss": 4.372484982013702e-05, "step": 256580 }, { "epoch": 72.83281294351406, "grad_norm": 0.006489157676696777, "learning_rate": 2.7196707351688906e-05, "loss": 4.2699649930000307e-05, "step": 256590 }, { "epoch": 72.8356514334374, "grad_norm": 0.006101259961724281, "learning_rate": 2.7193868861765544e-05, "loss": 3.25668603181839e-05, "step": 256600 }, { "epoch": 72.83848992336077, "grad_norm": 0.005342882592231035, "learning_rate": 2.719103037184218e-05, "loss": 3.9472058415412906e-05, "step": 256610 }, { "epoch": 72.84132841328413, "grad_norm": 0.003378152148798108, "learning_rate": 2.7188191881918823e-05, "loss": 1.9479915499687196e-05, "step": 256620 }, { "epoch": 72.8441669032075, "grad_norm": 0.005904456600546837, "learning_rate": 2.718535339199546e-05, "loss": 2.3256801068782806e-05, "step": 256630 }, { "epoch": 72.84700539313086, "grad_norm": 0.5566319823265076, "learning_rate": 2.71825149020721e-05, "loss": 0.0001361517235636711, "step": 256640 }, { "epoch": 72.84984388305422, "grad_norm": 0.003173575736582279, "learning_rate": 2.7179676412148737e-05, "loss": 6.170198321342468e-05, "step": 256650 }, { "epoch": 72.85268237297758, "grad_norm": 0.010756313800811768, "learning_rate": 2.7176837922225378e-05, "loss": 4.186294972896576e-05, "step": 256660 }, { "epoch": 72.85552086290093, "grad_norm": 0.004129583016037941, "learning_rate": 2.7173999432302016e-05, "loss": 2.0656920969486237e-05, "step": 256670 }, { "epoch": 72.8583593528243, "grad_norm": 0.0019538416527211666, "learning_rate": 2.7171160942378654e-05, "loss": 2.5331415235996248e-05, "step": 256680 }, { "epoch": 72.86119784274766, "grad_norm": 0.5113203525543213, "learning_rate": 2.7168322452455296e-05, "loss": 0.00012202970683574676, "step": 256690 }, { "epoch": 72.86403633267102, "grad_norm": 0.032473284751176834, "learning_rate": 2.7165483962531934e-05, "loss": 2.7486123144626617e-05, "step": 256700 }, { "epoch": 72.86687482259438, "grad_norm": 0.004424642771482468, "learning_rate": 2.716264547260857e-05, "loss": 1.977495849132538e-05, "step": 256710 }, { "epoch": 72.86971331251775, "grad_norm": 0.011523467488586903, "learning_rate": 2.7159806982685216e-05, "loss": 4.114564508199692e-05, "step": 256720 }, { "epoch": 72.8725518024411, "grad_norm": 0.00617584865540266, "learning_rate": 2.7156968492761854e-05, "loss": 0.00010689683258533478, "step": 256730 }, { "epoch": 72.87539029236446, "grad_norm": 0.001210705260746181, "learning_rate": 2.715413000283849e-05, "loss": 3.9689801633358e-05, "step": 256740 }, { "epoch": 72.87822878228782, "grad_norm": 0.008157782256603241, "learning_rate": 2.7151291512915127e-05, "loss": 3.4207664430141446e-05, "step": 256750 }, { "epoch": 72.88106727221118, "grad_norm": 0.006912074517458677, "learning_rate": 2.714845302299177e-05, "loss": 5.738120526075363e-05, "step": 256760 }, { "epoch": 72.88390576213455, "grad_norm": 0.0024088898207992315, "learning_rate": 2.714561453306841e-05, "loss": 4.257746040821075e-05, "step": 256770 }, { "epoch": 72.88674425205791, "grad_norm": 0.00146151939406991, "learning_rate": 2.7142776043145048e-05, "loss": 5.233846604824066e-05, "step": 256780 }, { "epoch": 72.88958274198127, "grad_norm": 0.0007153049809858203, "learning_rate": 2.713993755322169e-05, "loss": 3.951322287321091e-05, "step": 256790 }, { "epoch": 72.89242123190462, "grad_norm": 0.012281271629035473, "learning_rate": 2.7137099063298327e-05, "loss": 2.138260751962662e-05, "step": 256800 }, { "epoch": 72.89525972182798, "grad_norm": 0.0034205829724669456, "learning_rate": 2.7134260573374965e-05, "loss": 3.4994259476661685e-05, "step": 256810 }, { "epoch": 72.89809821175135, "grad_norm": 0.0032882527448236942, "learning_rate": 2.7131422083451606e-05, "loss": 0.00033808369189500807, "step": 256820 }, { "epoch": 72.90093670167471, "grad_norm": 0.013480938039720058, "learning_rate": 2.7128583593528244e-05, "loss": 7.338374853134155e-05, "step": 256830 }, { "epoch": 72.90377519159807, "grad_norm": 0.002626511501148343, "learning_rate": 2.7125745103604882e-05, "loss": 0.00015745535492897033, "step": 256840 }, { "epoch": 72.90661368152143, "grad_norm": 0.14679192006587982, "learning_rate": 2.712290661368152e-05, "loss": 0.0002065064385533333, "step": 256850 }, { "epoch": 72.9094521714448, "grad_norm": 0.06795403361320496, "learning_rate": 2.712006812375816e-05, "loss": 4.513952881097793e-05, "step": 256860 }, { "epoch": 72.91229066136815, "grad_norm": 0.23834918439388275, "learning_rate": 2.71172296338348e-05, "loss": 0.0004485674202442169, "step": 256870 }, { "epoch": 72.91512915129151, "grad_norm": 0.12344937026500702, "learning_rate": 2.7114391143911438e-05, "loss": 4.969257861375809e-05, "step": 256880 }, { "epoch": 72.91796764121487, "grad_norm": 0.11640414595603943, "learning_rate": 2.7111552653988082e-05, "loss": 0.0005999818444252014, "step": 256890 }, { "epoch": 72.92080613113824, "grad_norm": 0.006680709775537252, "learning_rate": 2.710871416406472e-05, "loss": 0.0002626808360219002, "step": 256900 }, { "epoch": 72.9236446210616, "grad_norm": 0.009837164543569088, "learning_rate": 2.7105875674141355e-05, "loss": 9.517371654510498e-05, "step": 256910 }, { "epoch": 72.92648311098496, "grad_norm": 0.0075636692345142365, "learning_rate": 2.7103037184218e-05, "loss": 5.415380001068115e-05, "step": 256920 }, { "epoch": 72.92932160090831, "grad_norm": 0.0029185758903622627, "learning_rate": 2.7100198694294638e-05, "loss": 0.0001367991790175438, "step": 256930 }, { "epoch": 72.93216009083167, "grad_norm": 0.0032518801745027304, "learning_rate": 2.7097360204371276e-05, "loss": 0.0001112762838602066, "step": 256940 }, { "epoch": 72.93499858075504, "grad_norm": 0.030642835423350334, "learning_rate": 2.7094521714447914e-05, "loss": 0.00023232381790876387, "step": 256950 }, { "epoch": 72.9378370706784, "grad_norm": 0.5938490033149719, "learning_rate": 2.7091683224524555e-05, "loss": 0.0003092348575592041, "step": 256960 }, { "epoch": 72.94067556060176, "grad_norm": 0.0017493371851742268, "learning_rate": 2.7088844734601193e-05, "loss": 0.00011243280023336411, "step": 256970 }, { "epoch": 72.94351405052512, "grad_norm": 0.013848627917468548, "learning_rate": 2.708600624467783e-05, "loss": 0.002385599352419376, "step": 256980 }, { "epoch": 72.94635254044849, "grad_norm": 0.011059543117880821, "learning_rate": 2.7083167754754472e-05, "loss": 0.004818202555179596, "step": 256990 }, { "epoch": 72.94919103037184, "grad_norm": 9.213132858276367, "learning_rate": 2.708032926483111e-05, "loss": 0.003984832763671875, "step": 257000 }, { "epoch": 72.94919103037184, "eval_accuracy": 0.9856298086093979, "eval_loss": 0.061906617134809494, "eval_runtime": 35.6607, "eval_samples_per_second": 441.017, "eval_steps_per_second": 6.898, "step": 257000 }, { "epoch": 72.9520295202952, "grad_norm": 0.003099410329014063, "learning_rate": 2.707749077490775e-05, "loss": 0.0005860276520252228, "step": 257010 }, { "epoch": 72.95486801021856, "grad_norm": 0.44858479499816895, "learning_rate": 2.7074652284984393e-05, "loss": 0.009577041864395142, "step": 257020 }, { "epoch": 72.95770650014192, "grad_norm": 0.008748890832066536, "learning_rate": 2.707181379506103e-05, "loss": 0.00031049270182847977, "step": 257030 }, { "epoch": 72.96054499006529, "grad_norm": 0.006292826496064663, "learning_rate": 2.7068975305137666e-05, "loss": 0.0010626673698425292, "step": 257040 }, { "epoch": 72.96338347998865, "grad_norm": 0.07701960951089859, "learning_rate": 2.7066136815214304e-05, "loss": 0.00012403242290019988, "step": 257050 }, { "epoch": 72.96622196991201, "grad_norm": 0.006724129430949688, "learning_rate": 2.706329832529095e-05, "loss": 0.00021589808166027068, "step": 257060 }, { "epoch": 72.96906045983536, "grad_norm": 0.004868372809141874, "learning_rate": 2.7060459835367586e-05, "loss": 4.062727093696594e-05, "step": 257070 }, { "epoch": 72.97189894975872, "grad_norm": 0.744376003742218, "learning_rate": 2.7057621345444224e-05, "loss": 0.00015497058629989623, "step": 257080 }, { "epoch": 72.97473743968209, "grad_norm": 0.01226984802633524, "learning_rate": 2.7054782855520866e-05, "loss": 0.00020292475819587706, "step": 257090 }, { "epoch": 72.97757592960545, "grad_norm": 0.016138672828674316, "learning_rate": 2.7051944365597504e-05, "loss": 0.0006278045475482941, "step": 257100 }, { "epoch": 72.98041441952881, "grad_norm": 0.2596878409385681, "learning_rate": 2.7049105875674142e-05, "loss": 0.0018873171880841254, "step": 257110 }, { "epoch": 72.98325290945218, "grad_norm": 0.11461453884840012, "learning_rate": 2.7046267385750783e-05, "loss": 0.000369952991604805, "step": 257120 }, { "epoch": 72.98609139937554, "grad_norm": 0.0008436727803200483, "learning_rate": 2.704342889582742e-05, "loss": 0.00023712832480669022, "step": 257130 }, { "epoch": 72.98892988929889, "grad_norm": 0.14474160969257355, "learning_rate": 2.704059040590406e-05, "loss": 9.917747229337692e-05, "step": 257140 }, { "epoch": 72.99176837922225, "grad_norm": 0.15226534008979797, "learning_rate": 2.7037751915980697e-05, "loss": 0.0002776434645056725, "step": 257150 }, { "epoch": 72.99460686914561, "grad_norm": 0.0018758811056613922, "learning_rate": 2.703491342605734e-05, "loss": 0.00010702591389417648, "step": 257160 }, { "epoch": 72.99744535906898, "grad_norm": 0.009810221381485462, "learning_rate": 2.7032074936133977e-05, "loss": 0.00022428464144468306, "step": 257170 }, { "epoch": 73.00028384899234, "grad_norm": 0.010142710991203785, "learning_rate": 2.7029236446210615e-05, "loss": 0.0001823167083784938, "step": 257180 }, { "epoch": 73.0031223389157, "grad_norm": 0.1273878514766693, "learning_rate": 2.702639795628726e-05, "loss": 5.112718790769577e-05, "step": 257190 }, { "epoch": 73.00596082883905, "grad_norm": 0.008643076755106449, "learning_rate": 2.7023559466363897e-05, "loss": 0.00036035217344760896, "step": 257200 }, { "epoch": 73.00879931876241, "grad_norm": 0.011357411742210388, "learning_rate": 2.7020720976440532e-05, "loss": 0.00010585449635982513, "step": 257210 }, { "epoch": 73.01163780868578, "grad_norm": 0.006701549515128136, "learning_rate": 2.7017882486517177e-05, "loss": 0.0003463352099061012, "step": 257220 }, { "epoch": 73.01447629860914, "grad_norm": 1.8782576322555542, "learning_rate": 2.7015043996593815e-05, "loss": 0.00037705712020397186, "step": 257230 }, { "epoch": 73.0173147885325, "grad_norm": 0.24585679173469543, "learning_rate": 2.7012205506670453e-05, "loss": 0.00011990517377853393, "step": 257240 }, { "epoch": 73.02015327845587, "grad_norm": 0.6895551085472107, "learning_rate": 2.700936701674709e-05, "loss": 0.00019715409725904465, "step": 257250 }, { "epoch": 73.02299176837923, "grad_norm": 0.012791278772056103, "learning_rate": 2.7006528526823732e-05, "loss": 3.208033740520477e-05, "step": 257260 }, { "epoch": 73.02583025830258, "grad_norm": 0.0028787057381123304, "learning_rate": 2.700369003690037e-05, "loss": 0.002344436198472977, "step": 257270 }, { "epoch": 73.02866874822594, "grad_norm": 6.219855785369873, "learning_rate": 2.7000851546977008e-05, "loss": 0.0011109542101621628, "step": 257280 }, { "epoch": 73.0315072381493, "grad_norm": 0.0015396855305880308, "learning_rate": 2.699801305705365e-05, "loss": 0.00031723715364933013, "step": 257290 }, { "epoch": 73.03434572807267, "grad_norm": 3.661508798599243, "learning_rate": 2.6995174567130287e-05, "loss": 0.0013105940073728562, "step": 257300 }, { "epoch": 73.03718421799603, "grad_norm": 0.16294755041599274, "learning_rate": 2.6992336077206925e-05, "loss": 7.377993315458298e-05, "step": 257310 }, { "epoch": 73.04002270791939, "grad_norm": 0.0005457229563035071, "learning_rate": 2.698949758728357e-05, "loss": 0.004144312813878059, "step": 257320 }, { "epoch": 73.04286119784275, "grad_norm": 0.0033258236944675446, "learning_rate": 2.6986659097360205e-05, "loss": 4.1620805859565735e-05, "step": 257330 }, { "epoch": 73.0456996877661, "grad_norm": 0.015932735055685043, "learning_rate": 2.6983820607436843e-05, "loss": 9.2301145195961e-05, "step": 257340 }, { "epoch": 73.04853817768947, "grad_norm": 0.007428810931742191, "learning_rate": 2.698098211751348e-05, "loss": 0.0001518111675977707, "step": 257350 }, { "epoch": 73.05137666761283, "grad_norm": 0.008041045628488064, "learning_rate": 2.6978143627590125e-05, "loss": 0.0002515658736228943, "step": 257360 }, { "epoch": 73.05421515753619, "grad_norm": 0.007881779223680496, "learning_rate": 2.6975305137666763e-05, "loss": 0.00018817000091075897, "step": 257370 }, { "epoch": 73.05705364745955, "grad_norm": 0.13730326294898987, "learning_rate": 2.6972466647743398e-05, "loss": 0.00015487782657146453, "step": 257380 }, { "epoch": 73.05989213738292, "grad_norm": 0.005127195734530687, "learning_rate": 2.6969628157820043e-05, "loss": 2.234242856502533e-05, "step": 257390 }, { "epoch": 73.06273062730628, "grad_norm": 0.031234167516231537, "learning_rate": 2.696678966789668e-05, "loss": 0.00010768398642539978, "step": 257400 }, { "epoch": 73.06556911722963, "grad_norm": 0.059751980006694794, "learning_rate": 2.696395117797332e-05, "loss": 4.894416779279709e-05, "step": 257410 }, { "epoch": 73.06840760715299, "grad_norm": 0.004208489321172237, "learning_rate": 2.696111268804996e-05, "loss": 0.0002831032499670982, "step": 257420 }, { "epoch": 73.07124609707635, "grad_norm": 0.006405980326235294, "learning_rate": 2.6958274198126598e-05, "loss": 8.552968502044678e-05, "step": 257430 }, { "epoch": 73.07408458699972, "grad_norm": 0.021315600723028183, "learning_rate": 2.6955435708203236e-05, "loss": 0.00023087728768587113, "step": 257440 }, { "epoch": 73.07692307692308, "grad_norm": 0.11200554668903351, "learning_rate": 2.695259721827988e-05, "loss": 9.17576253414154e-05, "step": 257450 }, { "epoch": 73.07976156684644, "grad_norm": 0.006775586400181055, "learning_rate": 2.6949758728356515e-05, "loss": 0.0017551641911268234, "step": 257460 }, { "epoch": 73.08260005676979, "grad_norm": 0.0009828992187976837, "learning_rate": 2.6946920238433153e-05, "loss": 4.624668508768082e-05, "step": 257470 }, { "epoch": 73.08543854669315, "grad_norm": 0.07377085089683533, "learning_rate": 2.694408174850979e-05, "loss": 0.006061084568500519, "step": 257480 }, { "epoch": 73.08827703661652, "grad_norm": 0.0230438020080328, "learning_rate": 2.6941243258586436e-05, "loss": 0.0001135069876909256, "step": 257490 }, { "epoch": 73.09111552653988, "grad_norm": 0.002237487118691206, "learning_rate": 2.6938404768663074e-05, "loss": 0.0003342390060424805, "step": 257500 }, { "epoch": 73.09111552653988, "eval_accuracy": 0.9857569784447129, "eval_loss": 0.058461688458919525, "eval_runtime": 35.5281, "eval_samples_per_second": 442.664, "eval_steps_per_second": 6.924, "step": 257500 }, { "epoch": 73.09395401646324, "grad_norm": 0.005517668556421995, "learning_rate": 2.693556627873971e-05, "loss": 0.001569734327495098, "step": 257510 }, { "epoch": 73.0967925063866, "grad_norm": 0.008010470308363438, "learning_rate": 2.6932727788816353e-05, "loss": 0.00035322047770023344, "step": 257520 }, { "epoch": 73.09963099630997, "grad_norm": 0.3404850363731384, "learning_rate": 2.692988929889299e-05, "loss": 0.00019291546195745468, "step": 257530 }, { "epoch": 73.10246948623332, "grad_norm": 0.00840164814144373, "learning_rate": 2.692705080896963e-05, "loss": 0.013705359399318695, "step": 257540 }, { "epoch": 73.10530797615668, "grad_norm": 0.036299411207437515, "learning_rate": 2.692421231904627e-05, "loss": 6.209909915924072e-05, "step": 257550 }, { "epoch": 73.10814646608004, "grad_norm": 0.012250900268554688, "learning_rate": 2.692137382912291e-05, "loss": 0.0007161272689700127, "step": 257560 }, { "epoch": 73.1109849560034, "grad_norm": 0.00533970957621932, "learning_rate": 2.6918535339199547e-05, "loss": 0.0001756131649017334, "step": 257570 }, { "epoch": 73.11382344592677, "grad_norm": 0.004543037619441748, "learning_rate": 2.6915696849276185e-05, "loss": 0.0008457981050014496, "step": 257580 }, { "epoch": 73.11666193585013, "grad_norm": 0.013416876085102558, "learning_rate": 2.6912858359352826e-05, "loss": 9.84683632850647e-05, "step": 257590 }, { "epoch": 73.1195004257735, "grad_norm": 0.015910794958472252, "learning_rate": 2.6910019869429464e-05, "loss": 0.00015559140592813492, "step": 257600 }, { "epoch": 73.12233891569684, "grad_norm": 0.1414870023727417, "learning_rate": 2.6907181379506102e-05, "loss": 4.612058401107788e-05, "step": 257610 }, { "epoch": 73.1251774056202, "grad_norm": 0.12114179879426956, "learning_rate": 2.6904342889582747e-05, "loss": 0.0027540847659111023, "step": 257620 }, { "epoch": 73.12801589554357, "grad_norm": 6.402209758758545, "learning_rate": 2.690150439965938e-05, "loss": 0.0008344706147909164, "step": 257630 }, { "epoch": 73.13085438546693, "grad_norm": 0.0062536029145121574, "learning_rate": 2.689866590973602e-05, "loss": 3.7126243114471436e-05, "step": 257640 }, { "epoch": 73.1336928753903, "grad_norm": 0.053107019513845444, "learning_rate": 2.6895827419812664e-05, "loss": 4.877932369709015e-05, "step": 257650 }, { "epoch": 73.13653136531366, "grad_norm": 0.013808908872306347, "learning_rate": 2.6892988929889302e-05, "loss": 6.405822932720184e-05, "step": 257660 }, { "epoch": 73.139369855237, "grad_norm": 0.008432706817984581, "learning_rate": 2.689015043996594e-05, "loss": 0.00017886310815811156, "step": 257670 }, { "epoch": 73.14220834516037, "grad_norm": 0.00701554398983717, "learning_rate": 2.6887311950042575e-05, "loss": 0.0016572317108511925, "step": 257680 }, { "epoch": 73.14504683508373, "grad_norm": 0.018140152096748352, "learning_rate": 2.688447346011922e-05, "loss": 0.00011562462896108627, "step": 257690 }, { "epoch": 73.1478853250071, "grad_norm": 0.14098584651947021, "learning_rate": 2.6881634970195858e-05, "loss": 0.000624758005142212, "step": 257700 }, { "epoch": 73.15072381493046, "grad_norm": 0.003459357423707843, "learning_rate": 2.6878796480272496e-05, "loss": 7.001012563705445e-05, "step": 257710 }, { "epoch": 73.15356230485382, "grad_norm": 0.011078473180532455, "learning_rate": 2.6875957990349137e-05, "loss": 0.00018342174589633942, "step": 257720 }, { "epoch": 73.15640079477718, "grad_norm": 0.004295630846172571, "learning_rate": 2.6873119500425775e-05, "loss": 2.0440854132175446e-05, "step": 257730 }, { "epoch": 73.15923928470053, "grad_norm": 0.0033693183213472366, "learning_rate": 2.6870281010502413e-05, "loss": 7.524825632572174e-05, "step": 257740 }, { "epoch": 73.1620777746239, "grad_norm": 0.0038139449898153543, "learning_rate": 2.6867442520579054e-05, "loss": 5.1593780517578126e-05, "step": 257750 }, { "epoch": 73.16491626454726, "grad_norm": 0.002236797008663416, "learning_rate": 2.6864604030655692e-05, "loss": 8.686762303113937e-05, "step": 257760 }, { "epoch": 73.16775475447062, "grad_norm": 0.004380403086543083, "learning_rate": 2.686176554073233e-05, "loss": 0.00018824432045221328, "step": 257770 }, { "epoch": 73.17059324439398, "grad_norm": 0.004183321725577116, "learning_rate": 2.6858927050808968e-05, "loss": 2.513434737920761e-05, "step": 257780 }, { "epoch": 73.17343173431735, "grad_norm": 0.1141422837972641, "learning_rate": 2.6856088560885613e-05, "loss": 0.00013480614870786667, "step": 257790 }, { "epoch": 73.17627022424071, "grad_norm": 0.008492939174175262, "learning_rate": 2.6853250070962248e-05, "loss": 2.9560178518295288e-05, "step": 257800 }, { "epoch": 73.17910871416406, "grad_norm": 0.03772042319178581, "learning_rate": 2.6850411581038886e-05, "loss": 0.00016590431332588195, "step": 257810 }, { "epoch": 73.18194720408742, "grad_norm": 0.0013497601030394435, "learning_rate": 2.684757309111553e-05, "loss": 0.00019915271550416947, "step": 257820 }, { "epoch": 73.18478569401078, "grad_norm": 0.017827419564127922, "learning_rate": 2.684473460119217e-05, "loss": 2.8106383979320526e-05, "step": 257830 }, { "epoch": 73.18762418393415, "grad_norm": 0.013579322025179863, "learning_rate": 2.6841896111268806e-05, "loss": 0.0001475956290960312, "step": 257840 }, { "epoch": 73.19046267385751, "grad_norm": 0.002524910494685173, "learning_rate": 2.6839057621345448e-05, "loss": 4.95346263051033e-05, "step": 257850 }, { "epoch": 73.19330116378087, "grad_norm": 0.0037738613318651915, "learning_rate": 2.6836219131422086e-05, "loss": 0.00021498966962099075, "step": 257860 }, { "epoch": 73.19613965370424, "grad_norm": 0.003762316657230258, "learning_rate": 2.6833380641498724e-05, "loss": 0.001354755088686943, "step": 257870 }, { "epoch": 73.19897814362758, "grad_norm": 0.016016924753785133, "learning_rate": 2.683054215157536e-05, "loss": 7.419828325510025e-05, "step": 257880 }, { "epoch": 73.20181663355095, "grad_norm": 0.06866331398487091, "learning_rate": 2.6827703661652003e-05, "loss": 0.00011670254170894622, "step": 257890 }, { "epoch": 73.20465512347431, "grad_norm": 0.0032038185745477676, "learning_rate": 2.682486517172864e-05, "loss": 8.698813617229462e-05, "step": 257900 }, { "epoch": 73.20749361339767, "grad_norm": 0.027535714209079742, "learning_rate": 2.682202668180528e-05, "loss": 3.164820373058319e-05, "step": 257910 }, { "epoch": 73.21033210332104, "grad_norm": 0.01794910430908203, "learning_rate": 2.6819188191881924e-05, "loss": 2.272985875606537e-05, "step": 257920 }, { "epoch": 73.2131705932444, "grad_norm": 0.012878312729299068, "learning_rate": 2.681634970195856e-05, "loss": 1.985505223274231e-05, "step": 257930 }, { "epoch": 73.21600908316775, "grad_norm": 0.2763423025608063, "learning_rate": 2.6813511212035196e-05, "loss": 5.9075839817523954e-05, "step": 257940 }, { "epoch": 73.21884757309111, "grad_norm": 0.005708898883312941, "learning_rate": 2.681067272211184e-05, "loss": 2.0885281264781953e-05, "step": 257950 }, { "epoch": 73.22168606301447, "grad_norm": 0.003048142185434699, "learning_rate": 2.680783423218848e-05, "loss": 3.0434876680374146e-05, "step": 257960 }, { "epoch": 73.22452455293784, "grad_norm": 0.0052383532747626305, "learning_rate": 2.6804995742265117e-05, "loss": 1.699831336736679e-05, "step": 257970 }, { "epoch": 73.2273630428612, "grad_norm": 0.07804188132286072, "learning_rate": 2.6802157252341752e-05, "loss": 6.747767329216004e-05, "step": 257980 }, { "epoch": 73.23020153278456, "grad_norm": 0.0021408013999462128, "learning_rate": 2.6799318762418396e-05, "loss": 6.047319620847702e-05, "step": 257990 }, { "epoch": 73.23304002270793, "grad_norm": 0.004662924911826849, "learning_rate": 2.6796480272495034e-05, "loss": 2.0370632410049438e-05, "step": 258000 }, { "epoch": 73.23304002270793, "eval_accuracy": 0.9861384879506581, "eval_loss": 0.05591008812189102, "eval_runtime": 35.5985, "eval_samples_per_second": 441.788, "eval_steps_per_second": 6.91, "step": 258000 }, { "epoch": 73.23587851263127, "grad_norm": 0.005302297882735729, "learning_rate": 2.6793641782571672e-05, "loss": 0.0039037130773067474, "step": 258010 }, { "epoch": 73.23871700255464, "grad_norm": 0.0496487021446228, "learning_rate": 2.6790803292648314e-05, "loss": 0.004157218337059021, "step": 258020 }, { "epoch": 73.241555492478, "grad_norm": 0.000804876908659935, "learning_rate": 2.6787964802724952e-05, "loss": 0.00048474520444869996, "step": 258030 }, { "epoch": 73.24439398240136, "grad_norm": 0.019634686410427094, "learning_rate": 2.678512631280159e-05, "loss": 0.0004130665212869644, "step": 258040 }, { "epoch": 73.24723247232473, "grad_norm": 0.09560948610305786, "learning_rate": 2.678228782287823e-05, "loss": 4.155375063419342e-05, "step": 258050 }, { "epoch": 73.25007096224809, "grad_norm": 0.010512061417102814, "learning_rate": 2.677944933295487e-05, "loss": 4.1215680539608e-05, "step": 258060 }, { "epoch": 73.25290945217145, "grad_norm": 0.028093157336115837, "learning_rate": 2.6776610843031507e-05, "loss": 3.156159073114395e-05, "step": 258070 }, { "epoch": 73.2557479420948, "grad_norm": 0.0035564135760068893, "learning_rate": 2.6773772353108145e-05, "loss": 1.4692358672618865e-05, "step": 258080 }, { "epoch": 73.25858643201816, "grad_norm": 0.008534290827810764, "learning_rate": 2.677093386318479e-05, "loss": 2.0047463476657867e-05, "step": 258090 }, { "epoch": 73.26142492194153, "grad_norm": 0.0011839271755889058, "learning_rate": 2.6768095373261424e-05, "loss": 1.657474786043167e-05, "step": 258100 }, { "epoch": 73.26426341186489, "grad_norm": 0.003787911031395197, "learning_rate": 2.6765256883338062e-05, "loss": 0.00011370033025741577, "step": 258110 }, { "epoch": 73.26710190178825, "grad_norm": 0.00513050751760602, "learning_rate": 2.6762418393414707e-05, "loss": 0.00021255798637866973, "step": 258120 }, { "epoch": 73.26994039171161, "grad_norm": 0.07010290026664734, "learning_rate": 2.6759579903491345e-05, "loss": 0.008544936776161194, "step": 258130 }, { "epoch": 73.27277888163498, "grad_norm": 22.416248321533203, "learning_rate": 2.6756741413567983e-05, "loss": 0.007990232855081558, "step": 258140 }, { "epoch": 73.27561737155833, "grad_norm": 0.07054047286510468, "learning_rate": 2.6753902923644625e-05, "loss": 0.00019985698163509368, "step": 258150 }, { "epoch": 73.27845586148169, "grad_norm": 0.0014392448356375098, "learning_rate": 2.6751064433721263e-05, "loss": 0.005092867463827133, "step": 258160 }, { "epoch": 73.28129435140505, "grad_norm": 0.36796388030052185, "learning_rate": 2.67482259437979e-05, "loss": 0.002281739003956318, "step": 258170 }, { "epoch": 73.28413284132841, "grad_norm": 0.02178516983985901, "learning_rate": 2.674538745387454e-05, "loss": 0.01190994754433632, "step": 258180 }, { "epoch": 73.28697133125178, "grad_norm": 0.03955979645252228, "learning_rate": 2.674254896395118e-05, "loss": 4.5081041753292085e-05, "step": 258190 }, { "epoch": 73.28980982117514, "grad_norm": 0.01675006002187729, "learning_rate": 2.6739710474027818e-05, "loss": 6.07730820775032e-05, "step": 258200 }, { "epoch": 73.29264831109849, "grad_norm": 0.014141170307993889, "learning_rate": 2.6736871984104456e-05, "loss": 4.676077514886856e-05, "step": 258210 }, { "epoch": 73.29548680102185, "grad_norm": 0.018772048875689507, "learning_rate": 2.6734033494181097e-05, "loss": 0.00017978567630052566, "step": 258220 }, { "epoch": 73.29832529094521, "grad_norm": 0.019572267308831215, "learning_rate": 2.6731195004257735e-05, "loss": 0.00017363764345645905, "step": 258230 }, { "epoch": 73.30116378086858, "grad_norm": 0.003904329612851143, "learning_rate": 2.6728356514334373e-05, "loss": 8.480679243803025e-05, "step": 258240 }, { "epoch": 73.30400227079194, "grad_norm": 0.014120213687419891, "learning_rate": 2.6725518024411018e-05, "loss": 3.680810332298279e-05, "step": 258250 }, { "epoch": 73.3068407607153, "grad_norm": 0.014589190483093262, "learning_rate": 2.6722679534487656e-05, "loss": 6.555188447237015e-05, "step": 258260 }, { "epoch": 73.30967925063867, "grad_norm": 0.0018096879357472062, "learning_rate": 2.671984104456429e-05, "loss": 6.572064012289047e-05, "step": 258270 }, { "epoch": 73.31251774056201, "grad_norm": 0.010063709691166878, "learning_rate": 2.671700255464093e-05, "loss": 7.255356758832932e-05, "step": 258280 }, { "epoch": 73.31535623048538, "grad_norm": 0.0015655914321541786, "learning_rate": 2.6714164064717573e-05, "loss": 4.3651647865772246e-05, "step": 258290 }, { "epoch": 73.31819472040874, "grad_norm": 0.001555957249365747, "learning_rate": 2.671132557479421e-05, "loss": 9.187813848257064e-05, "step": 258300 }, { "epoch": 73.3210332103321, "grad_norm": 0.01080723199993372, "learning_rate": 2.670848708487085e-05, "loss": 7.199961692094803e-05, "step": 258310 }, { "epoch": 73.32387170025547, "grad_norm": 0.008520961739122868, "learning_rate": 2.670564859494749e-05, "loss": 0.004454075545072556, "step": 258320 }, { "epoch": 73.32671019017883, "grad_norm": 0.0028805453330278397, "learning_rate": 2.670281010502413e-05, "loss": 0.00018488839268684387, "step": 258330 }, { "epoch": 73.32954868010219, "grad_norm": 0.025818252936005592, "learning_rate": 2.6699971615100767e-05, "loss": 6.931833922863007e-05, "step": 258340 }, { "epoch": 73.33238717002554, "grad_norm": 0.007038097828626633, "learning_rate": 2.6697133125177408e-05, "loss": 0.0003569703549146652, "step": 258350 }, { "epoch": 73.3352256599489, "grad_norm": 0.03012983500957489, "learning_rate": 2.6694294635254046e-05, "loss": 8.919797837734223e-05, "step": 258360 }, { "epoch": 73.33806414987227, "grad_norm": 0.00576787069439888, "learning_rate": 2.6691456145330684e-05, "loss": 3.044717013835907e-05, "step": 258370 }, { "epoch": 73.34090263979563, "grad_norm": 0.0333857424557209, "learning_rate": 2.6688617655407322e-05, "loss": 6.541814655065536e-05, "step": 258380 }, { "epoch": 73.34374112971899, "grad_norm": 0.002840410452336073, "learning_rate": 2.6685779165483967e-05, "loss": 0.0001267187297344208, "step": 258390 }, { "epoch": 73.34657961964236, "grad_norm": 0.008173579350113869, "learning_rate": 2.66829406755606e-05, "loss": 0.00014462750405073166, "step": 258400 }, { "epoch": 73.3494181095657, "grad_norm": 0.013961265794932842, "learning_rate": 2.668010218563724e-05, "loss": 4.674158990383148e-05, "step": 258410 }, { "epoch": 73.35225659948907, "grad_norm": 0.06175784766674042, "learning_rate": 2.6677263695713884e-05, "loss": 0.0007600855082273483, "step": 258420 }, { "epoch": 73.35509508941243, "grad_norm": 0.005707649048417807, "learning_rate": 2.6674425205790522e-05, "loss": 6.087832152843475e-05, "step": 258430 }, { "epoch": 73.35793357933579, "grad_norm": 0.008568519726395607, "learning_rate": 2.667158671586716e-05, "loss": 5.181953310966492e-05, "step": 258440 }, { "epoch": 73.36077206925916, "grad_norm": 0.025389479473233223, "learning_rate": 2.66687482259438e-05, "loss": 6.323140114545822e-05, "step": 258450 }, { "epoch": 73.36361055918252, "grad_norm": 0.004037504084408283, "learning_rate": 2.666590973602044e-05, "loss": 5.325488746166229e-05, "step": 258460 }, { "epoch": 73.36644904910588, "grad_norm": 0.0012766419677063823, "learning_rate": 2.6663071246097077e-05, "loss": 3.2475404441356656e-05, "step": 258470 }, { "epoch": 73.36928753902923, "grad_norm": 0.05844927951693535, "learning_rate": 2.6660232756173715e-05, "loss": 6.812382489442825e-05, "step": 258480 }, { "epoch": 73.3721260289526, "grad_norm": 0.002657161094248295, "learning_rate": 2.6657394266250357e-05, "loss": 4.1884370148181915e-05, "step": 258490 }, { "epoch": 73.37496451887596, "grad_norm": 0.0037729961331933737, "learning_rate": 2.6654555776326995e-05, "loss": 3.2603368163108824e-05, "step": 258500 }, { "epoch": 73.37496451887596, "eval_accuracy": 0.9859477331976855, "eval_loss": 0.05577891319990158, "eval_runtime": 35.9734, "eval_samples_per_second": 437.184, "eval_steps_per_second": 6.838, "step": 258500 }, { "epoch": 73.37780300879932, "grad_norm": 0.004370474256575108, "learning_rate": 2.6651717286403633e-05, "loss": 5.6313164532184604e-05, "step": 258510 }, { "epoch": 73.38064149872268, "grad_norm": 0.09496862441301346, "learning_rate": 2.6648878796480274e-05, "loss": 5.699582397937775e-05, "step": 258520 }, { "epoch": 73.38347998864604, "grad_norm": 0.2781621515750885, "learning_rate": 2.6646040306556912e-05, "loss": 0.00025993306189775467, "step": 258530 }, { "epoch": 73.38631847856941, "grad_norm": 0.019046157598495483, "learning_rate": 2.664320181663355e-05, "loss": 5.4628029465675355e-05, "step": 258540 }, { "epoch": 73.38915696849276, "grad_norm": 0.05367831140756607, "learning_rate": 2.6640363326710195e-05, "loss": 0.0005511607974767685, "step": 258550 }, { "epoch": 73.39199545841612, "grad_norm": 0.04984002560377121, "learning_rate": 2.6637524836786833e-05, "loss": 0.00020583607256412505, "step": 258560 }, { "epoch": 73.39483394833948, "grad_norm": 0.005016500595957041, "learning_rate": 2.6634686346863467e-05, "loss": 1.8873251974582672e-05, "step": 258570 }, { "epoch": 73.39767243826284, "grad_norm": 0.014868542551994324, "learning_rate": 2.6631847856940105e-05, "loss": 4.150886088609696e-05, "step": 258580 }, { "epoch": 73.40051092818621, "grad_norm": 0.006992412731051445, "learning_rate": 2.662900936701675e-05, "loss": 3.2650865614414214e-05, "step": 258590 }, { "epoch": 73.40334941810957, "grad_norm": 0.49739912152290344, "learning_rate": 2.6626170877093388e-05, "loss": 7.184110581874848e-05, "step": 258600 }, { "epoch": 73.40618790803293, "grad_norm": 0.07377862930297852, "learning_rate": 2.6623332387170026e-05, "loss": 4.7480687499046326e-05, "step": 258610 }, { "epoch": 73.40902639795628, "grad_norm": 0.0012790628243237734, "learning_rate": 2.6620493897246668e-05, "loss": 3.659166395664215e-05, "step": 258620 }, { "epoch": 73.41186488787964, "grad_norm": 0.02373482845723629, "learning_rate": 2.6617655407323306e-05, "loss": 4.8627890646457675e-05, "step": 258630 }, { "epoch": 73.41470337780301, "grad_norm": 0.005504698026925325, "learning_rate": 2.6614816917399944e-05, "loss": 0.0002283899113535881, "step": 258640 }, { "epoch": 73.41754186772637, "grad_norm": 0.0037753034848719835, "learning_rate": 2.6611978427476585e-05, "loss": 0.00010619107633829117, "step": 258650 }, { "epoch": 73.42038035764973, "grad_norm": 0.004350544419139624, "learning_rate": 2.6609139937553223e-05, "loss": 0.00013118423521518707, "step": 258660 }, { "epoch": 73.4232188475731, "grad_norm": 0.0017912308685481548, "learning_rate": 2.660630144762986e-05, "loss": 3.2091885805130006e-05, "step": 258670 }, { "epoch": 73.42605733749645, "grad_norm": 0.0025580560322850943, "learning_rate": 2.66034629577065e-05, "loss": 5.979631096124649e-05, "step": 258680 }, { "epoch": 73.42889582741981, "grad_norm": 0.005950386170297861, "learning_rate": 2.660062446778314e-05, "loss": 0.00010722167789936065, "step": 258690 }, { "epoch": 73.43173431734317, "grad_norm": 0.004673316143453121, "learning_rate": 2.6597785977859778e-05, "loss": 4.582162946462631e-05, "step": 258700 }, { "epoch": 73.43457280726653, "grad_norm": 0.0019293648656457663, "learning_rate": 2.6594947487936416e-05, "loss": 5.981568247079849e-05, "step": 258710 }, { "epoch": 73.4374112971899, "grad_norm": 0.011556986719369888, "learning_rate": 2.659210899801306e-05, "loss": 3.942660987377167e-05, "step": 258720 }, { "epoch": 73.44024978711326, "grad_norm": 0.0038505003321915865, "learning_rate": 2.65892705080897e-05, "loss": 4.67151403427124e-05, "step": 258730 }, { "epoch": 73.44308827703662, "grad_norm": 0.0037635096814483404, "learning_rate": 2.6586432018166334e-05, "loss": 1.2869387865066528e-05, "step": 258740 }, { "epoch": 73.44592676695997, "grad_norm": 0.01564878225326538, "learning_rate": 2.658359352824298e-05, "loss": 2.717338502407074e-05, "step": 258750 }, { "epoch": 73.44876525688333, "grad_norm": 0.021792380139231682, "learning_rate": 2.6580755038319616e-05, "loss": 1.5319138765335082e-05, "step": 258760 }, { "epoch": 73.4516037468067, "grad_norm": 0.0016065511154010892, "learning_rate": 2.6577916548396254e-05, "loss": 2.29688361287117e-05, "step": 258770 }, { "epoch": 73.45444223673006, "grad_norm": 0.001227493048645556, "learning_rate": 2.6575078058472896e-05, "loss": 4.3892674148082735e-05, "step": 258780 }, { "epoch": 73.45728072665342, "grad_norm": 0.006285759154707193, "learning_rate": 2.6572239568549534e-05, "loss": 2.6755034923553467e-05, "step": 258790 }, { "epoch": 73.46011921657679, "grad_norm": 0.009930042549967766, "learning_rate": 2.656940107862617e-05, "loss": 0.00011254884302616119, "step": 258800 }, { "epoch": 73.46295770650015, "grad_norm": 0.0017346658278256655, "learning_rate": 2.656656258870281e-05, "loss": 8.831676095724105e-05, "step": 258810 }, { "epoch": 73.4657961964235, "grad_norm": 0.11827389895915985, "learning_rate": 2.656372409877945e-05, "loss": 9.337756782770156e-05, "step": 258820 }, { "epoch": 73.46863468634686, "grad_norm": 0.054757166653871536, "learning_rate": 2.656088560885609e-05, "loss": 4.7798268496990204e-05, "step": 258830 }, { "epoch": 73.47147317627022, "grad_norm": 0.08436980843544006, "learning_rate": 2.6558047118932727e-05, "loss": 2.606268972158432e-05, "step": 258840 }, { "epoch": 73.47431166619359, "grad_norm": 0.0018262001685798168, "learning_rate": 2.6555208629009372e-05, "loss": 4.032328724861145e-05, "step": 258850 }, { "epoch": 73.47715015611695, "grad_norm": 0.008656725287437439, "learning_rate": 2.6552370139086006e-05, "loss": 4.262439906597137e-05, "step": 258860 }, { "epoch": 73.47998864604031, "grad_norm": 0.0066327378153800964, "learning_rate": 2.6549531649162644e-05, "loss": 4.683565348386765e-05, "step": 258870 }, { "epoch": 73.48282713596367, "grad_norm": 0.02399730309844017, "learning_rate": 2.654669315923929e-05, "loss": 3.0128844082355498e-05, "step": 258880 }, { "epoch": 73.48566562588702, "grad_norm": 0.0013895072042942047, "learning_rate": 2.6543854669315927e-05, "loss": 2.9407814145088197e-05, "step": 258890 }, { "epoch": 73.48850411581039, "grad_norm": 0.038259539753198624, "learning_rate": 2.6541016179392565e-05, "loss": 6.69790431857109e-05, "step": 258900 }, { "epoch": 73.49134260573375, "grad_norm": 0.2485053390264511, "learning_rate": 2.6538177689469203e-05, "loss": 0.000124889612197876, "step": 258910 }, { "epoch": 73.49418109565711, "grad_norm": 0.008467319421470165, "learning_rate": 2.6535339199545844e-05, "loss": 1.9803643226623535e-05, "step": 258920 }, { "epoch": 73.49701958558047, "grad_norm": 0.001058800844475627, "learning_rate": 2.6532500709622482e-05, "loss": 3.6854855716228484e-05, "step": 258930 }, { "epoch": 73.49985807550384, "grad_norm": 0.010621561668813229, "learning_rate": 2.652966221969912e-05, "loss": 3.0757300555706026e-05, "step": 258940 }, { "epoch": 73.50269656542719, "grad_norm": 0.0024201683700084686, "learning_rate": 2.6526823729775762e-05, "loss": 5.576238036155701e-05, "step": 258950 }, { "epoch": 73.50553505535055, "grad_norm": 0.009360664524137974, "learning_rate": 2.65239852398524e-05, "loss": 2.545975148677826e-05, "step": 258960 }, { "epoch": 73.50837354527391, "grad_norm": 0.01124532613903284, "learning_rate": 2.6521146749929038e-05, "loss": 0.00010893363505601883, "step": 258970 }, { "epoch": 73.51121203519727, "grad_norm": 0.01283424161374569, "learning_rate": 2.6518308260005682e-05, "loss": 0.0012300562113523484, "step": 258980 }, { "epoch": 73.51405052512064, "grad_norm": 0.009458343498408794, "learning_rate": 2.6515469770082317e-05, "loss": 0.00013156644999980926, "step": 258990 }, { "epoch": 73.516889015044, "grad_norm": 0.0010481415083631873, "learning_rate": 2.6512631280158955e-05, "loss": 1.8610619008541107e-05, "step": 259000 }, { "epoch": 73.516889015044, "eval_accuracy": 0.9863292427036306, "eval_loss": 0.05607524886727333, "eval_runtime": 36.7411, "eval_samples_per_second": 428.049, "eval_steps_per_second": 6.696, "step": 259000 }, { "epoch": 73.51972750496736, "grad_norm": 0.001987781375646591, "learning_rate": 2.6509792790235593e-05, "loss": 2.6621483266353606e-05, "step": 259010 }, { "epoch": 73.52256599489071, "grad_norm": 0.001574780559167266, "learning_rate": 2.6506954300312238e-05, "loss": 3.3892691135406494e-05, "step": 259020 }, { "epoch": 73.52540448481408, "grad_norm": 0.012948084622621536, "learning_rate": 2.6504115810388876e-05, "loss": 2.047307789325714e-05, "step": 259030 }, { "epoch": 73.52824297473744, "grad_norm": 0.005019554402679205, "learning_rate": 2.650127732046551e-05, "loss": 2.845320850610733e-05, "step": 259040 }, { "epoch": 73.5310814646608, "grad_norm": 0.12740063667297363, "learning_rate": 2.6498438830542155e-05, "loss": 3.55355441570282e-05, "step": 259050 }, { "epoch": 73.53391995458416, "grad_norm": 0.5328484773635864, "learning_rate": 2.6495600340618793e-05, "loss": 7.5506791472435e-05, "step": 259060 }, { "epoch": 73.53675844450753, "grad_norm": 0.0066962288692593575, "learning_rate": 2.649276185069543e-05, "loss": 0.00046847742050886153, "step": 259070 }, { "epoch": 73.53959693443089, "grad_norm": 0.0007494850433431566, "learning_rate": 2.6489923360772073e-05, "loss": 3.85478138923645e-05, "step": 259080 }, { "epoch": 73.54243542435424, "grad_norm": 0.0012001704890280962, "learning_rate": 2.648708487084871e-05, "loss": 0.003734622150659561, "step": 259090 }, { "epoch": 73.5452739142776, "grad_norm": 0.0004812061961274594, "learning_rate": 2.648424638092535e-05, "loss": 0.00024658627808094025, "step": 259100 }, { "epoch": 73.54811240420096, "grad_norm": 0.014908880926668644, "learning_rate": 2.6481407891001986e-05, "loss": 1.5437416732311248e-05, "step": 259110 }, { "epoch": 73.55095089412433, "grad_norm": 0.005797053687274456, "learning_rate": 2.6478569401078628e-05, "loss": 6.43240287899971e-05, "step": 259120 }, { "epoch": 73.55378938404769, "grad_norm": 0.02434302680194378, "learning_rate": 2.6475730911155266e-05, "loss": 2.0396150648593903e-05, "step": 259130 }, { "epoch": 73.55662787397105, "grad_norm": 11.307035446166992, "learning_rate": 2.6472892421231904e-05, "loss": 0.0023980394005775453, "step": 259140 }, { "epoch": 73.5594663638944, "grad_norm": 12.297657012939453, "learning_rate": 2.647005393130855e-05, "loss": 0.0021544670686125754, "step": 259150 }, { "epoch": 73.56230485381776, "grad_norm": 0.0058191195130348206, "learning_rate": 2.6467215441385183e-05, "loss": 0.0021730752661824225, "step": 259160 }, { "epoch": 73.56514334374113, "grad_norm": 0.001964065246284008, "learning_rate": 2.646437695146182e-05, "loss": 0.00010294504463672637, "step": 259170 }, { "epoch": 73.56798183366449, "grad_norm": 0.006532153580337763, "learning_rate": 2.6461538461538466e-05, "loss": 2.5931000709533692e-05, "step": 259180 }, { "epoch": 73.57082032358785, "grad_norm": 0.0068992795422673225, "learning_rate": 2.6458699971615104e-05, "loss": 0.00019699689000844954, "step": 259190 }, { "epoch": 73.57365881351122, "grad_norm": 0.04893995448946953, "learning_rate": 2.6455861481691742e-05, "loss": 3.0443817377090453e-05, "step": 259200 }, { "epoch": 73.57649730343458, "grad_norm": 0.020059233531355858, "learning_rate": 2.6453022991768377e-05, "loss": 0.007291321456432342, "step": 259210 }, { "epoch": 73.57933579335793, "grad_norm": 0.03136281669139862, "learning_rate": 2.645018450184502e-05, "loss": 0.0013699747622013092, "step": 259220 }, { "epoch": 73.58217428328129, "grad_norm": 0.045586396008729935, "learning_rate": 2.644734601192166e-05, "loss": 9.618923068046569e-05, "step": 259230 }, { "epoch": 73.58501277320465, "grad_norm": 0.0016263488214462996, "learning_rate": 2.6444507521998297e-05, "loss": 0.00012246277183294296, "step": 259240 }, { "epoch": 73.58785126312802, "grad_norm": 0.007116951514035463, "learning_rate": 2.644166903207494e-05, "loss": 0.0003283055499196053, "step": 259250 }, { "epoch": 73.59068975305138, "grad_norm": 0.8069697022438049, "learning_rate": 2.6438830542151577e-05, "loss": 0.0005582276731729508, "step": 259260 }, { "epoch": 73.59352824297474, "grad_norm": 0.12203100323677063, "learning_rate": 2.6435992052228215e-05, "loss": 0.00015307962894439697, "step": 259270 }, { "epoch": 73.5963667328981, "grad_norm": 0.0011742491042241454, "learning_rate": 2.6433153562304856e-05, "loss": 0.0002434283494949341, "step": 259280 }, { "epoch": 73.59920522282145, "grad_norm": 0.009926194325089455, "learning_rate": 2.6430315072381494e-05, "loss": 0.0003721162676811218, "step": 259290 }, { "epoch": 73.60204371274482, "grad_norm": 2.1735081672668457, "learning_rate": 2.6427476582458132e-05, "loss": 0.000800137221813202, "step": 259300 }, { "epoch": 73.60488220266818, "grad_norm": 0.3237132728099823, "learning_rate": 2.642463809253477e-05, "loss": 0.0016057262197136878, "step": 259310 }, { "epoch": 73.60772069259154, "grad_norm": 0.001043435069732368, "learning_rate": 2.6421799602611415e-05, "loss": 0.007047252357006073, "step": 259320 }, { "epoch": 73.6105591825149, "grad_norm": 0.00928714219480753, "learning_rate": 2.641896111268805e-05, "loss": 0.008658060431480407, "step": 259330 }, { "epoch": 73.61339767243827, "grad_norm": 0.07162926346063614, "learning_rate": 2.6416122622764687e-05, "loss": 0.000910993292927742, "step": 259340 }, { "epoch": 73.61623616236163, "grad_norm": 0.11987248808145523, "learning_rate": 2.6413284132841332e-05, "loss": 0.000667279027402401, "step": 259350 }, { "epoch": 73.61907465228498, "grad_norm": 0.010209755972027779, "learning_rate": 2.641044564291797e-05, "loss": 0.00023089777678251267, "step": 259360 }, { "epoch": 73.62191314220834, "grad_norm": 0.006823546718806028, "learning_rate": 2.6407607152994608e-05, "loss": 0.002448941022157669, "step": 259370 }, { "epoch": 73.6247516321317, "grad_norm": 0.2434442788362503, "learning_rate": 2.640476866307125e-05, "loss": 0.00043831225484609605, "step": 259380 }, { "epoch": 73.62759012205507, "grad_norm": 0.012877924367785454, "learning_rate": 2.6401930173147887e-05, "loss": 0.002431216649711132, "step": 259390 }, { "epoch": 73.63042861197843, "grad_norm": 0.05293847620487213, "learning_rate": 2.6399091683224525e-05, "loss": 0.00016058050096035004, "step": 259400 }, { "epoch": 73.6332671019018, "grad_norm": 0.06330346316099167, "learning_rate": 2.6396253193301163e-05, "loss": 0.0035139665007591246, "step": 259410 }, { "epoch": 73.63610559182514, "grad_norm": 0.17542192339897156, "learning_rate": 2.6393414703377805e-05, "loss": 8.997172117233276e-05, "step": 259420 }, { "epoch": 73.6389440817485, "grad_norm": 1.8622612953186035, "learning_rate": 2.6390576213454443e-05, "loss": 0.0003818497061729431, "step": 259430 }, { "epoch": 73.64178257167187, "grad_norm": 0.39807745814323425, "learning_rate": 2.638773772353108e-05, "loss": 0.0007605994120240212, "step": 259440 }, { "epoch": 73.64462106159523, "grad_norm": 0.018552714958786964, "learning_rate": 2.6384899233607725e-05, "loss": 0.0028352223336696623, "step": 259450 }, { "epoch": 73.6474595515186, "grad_norm": 0.04351534694433212, "learning_rate": 2.638206074368436e-05, "loss": 9.482335299253464e-05, "step": 259460 }, { "epoch": 73.65029804144196, "grad_norm": 0.006604464258998632, "learning_rate": 2.6379222253760998e-05, "loss": 0.0002993391826748848, "step": 259470 }, { "epoch": 73.65313653136532, "grad_norm": 0.058527424931526184, "learning_rate": 2.6376383763837643e-05, "loss": 0.00032824743539094925, "step": 259480 }, { "epoch": 73.65597502128867, "grad_norm": 0.010097227059304714, "learning_rate": 2.637354527391428e-05, "loss": 0.00044305063784122467, "step": 259490 }, { "epoch": 73.65881351121203, "grad_norm": 0.11724122613668442, "learning_rate": 2.637070678399092e-05, "loss": 0.00038061719387769697, "step": 259500 }, { "epoch": 73.65881351121203, "eval_accuracy": 0.9823233928912062, "eval_loss": 0.07640894502401352, "eval_runtime": 36.1024, "eval_samples_per_second": 435.622, "eval_steps_per_second": 6.814, "step": 259500 }, { "epoch": 73.6616520011354, "grad_norm": 2.1146128177642822, "learning_rate": 2.6367868294067553e-05, "loss": 0.0006009899079799652, "step": 259510 }, { "epoch": 73.66449049105876, "grad_norm": 0.0027711144648492336, "learning_rate": 2.6365029804144198e-05, "loss": 0.00012636594474315643, "step": 259520 }, { "epoch": 73.66732898098212, "grad_norm": 0.032306428998708725, "learning_rate": 2.6362191314220836e-05, "loss": 0.0013932859525084495, "step": 259530 }, { "epoch": 73.67016747090548, "grad_norm": 0.15961872041225433, "learning_rate": 2.6359352824297474e-05, "loss": 0.00035019610077142717, "step": 259540 }, { "epoch": 73.67300596082885, "grad_norm": 0.07770983874797821, "learning_rate": 2.6356514334374115e-05, "loss": 0.00021184049546718597, "step": 259550 }, { "epoch": 73.6758444507522, "grad_norm": 0.004192044958472252, "learning_rate": 2.6353675844450753e-05, "loss": 4.4393166899681094e-05, "step": 259560 }, { "epoch": 73.67868294067556, "grad_norm": 0.005719102919101715, "learning_rate": 2.635083735452739e-05, "loss": 6.910022348165512e-05, "step": 259570 }, { "epoch": 73.68152143059892, "grad_norm": 0.0008702921913936734, "learning_rate": 2.6347998864604033e-05, "loss": 0.00010752808302640914, "step": 259580 }, { "epoch": 73.68435992052228, "grad_norm": 0.09615164995193481, "learning_rate": 2.634516037468067e-05, "loss": 0.00025605298578739165, "step": 259590 }, { "epoch": 73.68719841044565, "grad_norm": 0.00706465682014823, "learning_rate": 2.634232188475731e-05, "loss": 0.0006745388731360435, "step": 259600 }, { "epoch": 73.69003690036901, "grad_norm": 0.03224867582321167, "learning_rate": 2.6339483394833947e-05, "loss": 9.698104113340378e-05, "step": 259610 }, { "epoch": 73.69287539029236, "grad_norm": 0.010980110615491867, "learning_rate": 2.633664490491059e-05, "loss": 0.0003693293780088425, "step": 259620 }, { "epoch": 73.69571388021572, "grad_norm": 0.008195760659873486, "learning_rate": 2.6333806414987226e-05, "loss": 0.0001273125410079956, "step": 259630 }, { "epoch": 73.69855237013908, "grad_norm": 0.00782888662070036, "learning_rate": 2.6330967925063864e-05, "loss": 7.521789520978928e-05, "step": 259640 }, { "epoch": 73.70139086006245, "grad_norm": 0.005443908274173737, "learning_rate": 2.632812943514051e-05, "loss": 0.0001375839114189148, "step": 259650 }, { "epoch": 73.70422934998581, "grad_norm": 0.017199723049998283, "learning_rate": 2.6325290945217147e-05, "loss": 0.0002771245315670967, "step": 259660 }, { "epoch": 73.70706783990917, "grad_norm": 0.049761902540922165, "learning_rate": 2.6322452455293785e-05, "loss": 6.814096122980118e-05, "step": 259670 }, { "epoch": 73.70990632983253, "grad_norm": 0.03295212239027023, "learning_rate": 2.6319613965370426e-05, "loss": 0.0002484070137143135, "step": 259680 }, { "epoch": 73.71274481975588, "grad_norm": 0.02805439569056034, "learning_rate": 2.6316775475447064e-05, "loss": 0.0005671156570315361, "step": 259690 }, { "epoch": 73.71558330967925, "grad_norm": 0.007165057584643364, "learning_rate": 2.6313936985523702e-05, "loss": 4.7845207154750825e-05, "step": 259700 }, { "epoch": 73.71842179960261, "grad_norm": 0.012704781256616116, "learning_rate": 2.631109849560034e-05, "loss": 9.639542549848557e-05, "step": 259710 }, { "epoch": 73.72126028952597, "grad_norm": 0.01048919279128313, "learning_rate": 2.630826000567698e-05, "loss": 0.0002130497246980667, "step": 259720 }, { "epoch": 73.72409877944933, "grad_norm": 0.02716432698071003, "learning_rate": 2.630542151575362e-05, "loss": 0.0002695849165320396, "step": 259730 }, { "epoch": 73.7269372693727, "grad_norm": 0.005416030064225197, "learning_rate": 2.6302583025830258e-05, "loss": 6.510093808174134e-05, "step": 259740 }, { "epoch": 73.72977575929606, "grad_norm": 0.001576450071297586, "learning_rate": 2.62997445359069e-05, "loss": 9.075626730918885e-05, "step": 259750 }, { "epoch": 73.73261424921941, "grad_norm": 0.14890053868293762, "learning_rate": 2.6296906045983537e-05, "loss": 0.000571347028017044, "step": 259760 }, { "epoch": 73.73545273914277, "grad_norm": 0.04990789666771889, "learning_rate": 2.6294067556060175e-05, "loss": 3.3644773066043856e-05, "step": 259770 }, { "epoch": 73.73829122906614, "grad_norm": 0.22854480147361755, "learning_rate": 2.629122906613682e-05, "loss": 0.000177047960460186, "step": 259780 }, { "epoch": 73.7411297189895, "grad_norm": 0.009228643961250782, "learning_rate": 2.6288390576213458e-05, "loss": 0.0019753696396946907, "step": 259790 }, { "epoch": 73.74396820891286, "grad_norm": 0.0024821599945425987, "learning_rate": 2.6285552086290092e-05, "loss": 0.00010867621749639511, "step": 259800 }, { "epoch": 73.74680669883622, "grad_norm": 0.0007203294662758708, "learning_rate": 2.628271359636673e-05, "loss": 0.007353118807077408, "step": 259810 }, { "epoch": 73.74964518875959, "grad_norm": 0.013916406780481339, "learning_rate": 2.6279875106443375e-05, "loss": 7.583461701869965e-05, "step": 259820 }, { "epoch": 73.75248367868294, "grad_norm": 0.019816963002085686, "learning_rate": 2.6277036616520013e-05, "loss": 3.532376140356064e-05, "step": 259830 }, { "epoch": 73.7553221686063, "grad_norm": 0.01844329573214054, "learning_rate": 2.627419812659665e-05, "loss": 0.0002324659377336502, "step": 259840 }, { "epoch": 73.75816065852966, "grad_norm": 0.032265741378068924, "learning_rate": 2.6271359636673292e-05, "loss": 7.113553583621978e-05, "step": 259850 }, { "epoch": 73.76099914845302, "grad_norm": 0.04216541722416878, "learning_rate": 2.626852114674993e-05, "loss": 0.0007626550272107124, "step": 259860 }, { "epoch": 73.76383763837639, "grad_norm": 0.007204069755971432, "learning_rate": 2.626568265682657e-05, "loss": 7.695350795984268e-05, "step": 259870 }, { "epoch": 73.76667612829975, "grad_norm": 0.011232544668018818, "learning_rate": 2.626284416690321e-05, "loss": 0.0005564035847783089, "step": 259880 }, { "epoch": 73.7695146182231, "grad_norm": 0.0028210128657519817, "learning_rate": 2.6260005676979848e-05, "loss": 3.807675093412399e-05, "step": 259890 }, { "epoch": 73.77235310814646, "grad_norm": 0.058016955852508545, "learning_rate": 2.6257167187056486e-05, "loss": 0.0002443430945277214, "step": 259900 }, { "epoch": 73.77519159806982, "grad_norm": 0.01025954820215702, "learning_rate": 2.6254328697133124e-05, "loss": 0.002199249342083931, "step": 259910 }, { "epoch": 73.77803008799319, "grad_norm": 0.000808387587312609, "learning_rate": 2.625149020720977e-05, "loss": 0.008884657919406892, "step": 259920 }, { "epoch": 73.78086857791655, "grad_norm": 0.00105948094278574, "learning_rate": 2.6248651717286403e-05, "loss": 7.513798773288727e-05, "step": 259930 }, { "epoch": 73.78370706783991, "grad_norm": 0.0009134896681644022, "learning_rate": 2.624581322736304e-05, "loss": 2.037733793258667e-05, "step": 259940 }, { "epoch": 73.78654555776328, "grad_norm": 0.036577124148607254, "learning_rate": 2.6242974737439686e-05, "loss": 3.630593419075012e-05, "step": 259950 }, { "epoch": 73.78938404768662, "grad_norm": 0.0011268119560554624, "learning_rate": 2.6240136247516324e-05, "loss": 1.2384727597236633e-05, "step": 259960 }, { "epoch": 73.79222253760999, "grad_norm": 0.08472298830747604, "learning_rate": 2.6237297757592962e-05, "loss": 5.472004413604736e-05, "step": 259970 }, { "epoch": 73.79506102753335, "grad_norm": 0.006169929634779692, "learning_rate": 2.6234459267669603e-05, "loss": 1.4653988182544709e-05, "step": 259980 }, { "epoch": 73.79789951745671, "grad_norm": 0.008066941983997822, "learning_rate": 2.623162077774624e-05, "loss": 2.893079072237015e-05, "step": 259990 }, { "epoch": 73.80073800738008, "grad_norm": 0.03257708624005318, "learning_rate": 2.622878228782288e-05, "loss": 3.9402209222316745e-05, "step": 260000 }, { "epoch": 73.80073800738008, "eval_accuracy": 0.9871558466331786, "eval_loss": 0.053415585309267044, "eval_runtime": 35.8602, "eval_samples_per_second": 438.564, "eval_steps_per_second": 6.86, "step": 260000 }, { "epoch": 73.80357649730344, "grad_norm": 0.005706007592380047, "learning_rate": 2.6225943797899517e-05, "loss": 2.5981478393077852e-05, "step": 260010 }, { "epoch": 73.8064149872268, "grad_norm": 0.010364408604800701, "learning_rate": 2.622310530797616e-05, "loss": 1.963060349225998e-05, "step": 260020 }, { "epoch": 73.80925347715015, "grad_norm": 0.002259668195620179, "learning_rate": 2.6220266818052796e-05, "loss": 2.8392113745212554e-05, "step": 260030 }, { "epoch": 73.81209196707351, "grad_norm": 0.003669829573482275, "learning_rate": 2.6217428328129434e-05, "loss": 0.0001366451382637024, "step": 260040 }, { "epoch": 73.81493045699688, "grad_norm": 0.22889527678489685, "learning_rate": 2.6214589838206076e-05, "loss": 5.399957299232483e-05, "step": 260050 }, { "epoch": 73.81776894692024, "grad_norm": 0.12766757607460022, "learning_rate": 2.6211751348282714e-05, "loss": 4.771836102008819e-05, "step": 260060 }, { "epoch": 73.8206074368436, "grad_norm": 0.0029448089189827442, "learning_rate": 2.6208912858359352e-05, "loss": 1.8130242824554444e-05, "step": 260070 }, { "epoch": 73.82344592676696, "grad_norm": 0.001276350230909884, "learning_rate": 2.6206074368435997e-05, "loss": 3.623552620410919e-05, "step": 260080 }, { "epoch": 73.82628441669033, "grad_norm": 0.001224966486915946, "learning_rate": 2.6203235878512635e-05, "loss": 2.335086464881897e-05, "step": 260090 }, { "epoch": 73.82912290661368, "grad_norm": 0.02822056971490383, "learning_rate": 2.620039738858927e-05, "loss": 3.075096756219864e-05, "step": 260100 }, { "epoch": 73.83196139653704, "grad_norm": 0.0034932736307382584, "learning_rate": 2.6197558898665914e-05, "loss": 5.904939025640488e-05, "step": 260110 }, { "epoch": 73.8347998864604, "grad_norm": 0.0013670396292582154, "learning_rate": 2.6194720408742552e-05, "loss": 4.0344148874282834e-05, "step": 260120 }, { "epoch": 73.83763837638377, "grad_norm": 0.0005990634090267122, "learning_rate": 2.619188191881919e-05, "loss": 1.9937008619308472e-05, "step": 260130 }, { "epoch": 73.84047686630713, "grad_norm": 0.05085377022624016, "learning_rate": 2.6189043428895828e-05, "loss": 4.2020156979560855e-05, "step": 260140 }, { "epoch": 73.84331535623049, "grad_norm": 0.0016439693281427026, "learning_rate": 2.618620493897247e-05, "loss": 1.864396035671234e-05, "step": 260150 }, { "epoch": 73.84615384615384, "grad_norm": 0.04077368602156639, "learning_rate": 2.6183366449049107e-05, "loss": 2.428349107503891e-05, "step": 260160 }, { "epoch": 73.8489923360772, "grad_norm": 0.0012840541312471032, "learning_rate": 2.6180527959125745e-05, "loss": 4.098881036043167e-05, "step": 260170 }, { "epoch": 73.85183082600057, "grad_norm": 0.003041808260604739, "learning_rate": 2.6177689469202387e-05, "loss": 3.6609172821044925e-05, "step": 260180 }, { "epoch": 73.85466931592393, "grad_norm": 0.0033896020613610744, "learning_rate": 2.6174850979279025e-05, "loss": 2.3190677165985107e-05, "step": 260190 }, { "epoch": 73.85750780584729, "grad_norm": 0.002021112246438861, "learning_rate": 2.6172012489355663e-05, "loss": 2.41084024310112e-05, "step": 260200 }, { "epoch": 73.86034629577065, "grad_norm": 0.0033860679250210524, "learning_rate": 2.6169173999432307e-05, "loss": 1.776367425918579e-05, "step": 260210 }, { "epoch": 73.86318478569402, "grad_norm": 0.034887634217739105, "learning_rate": 2.6166335509508942e-05, "loss": 3.22265550494194e-05, "step": 260220 }, { "epoch": 73.86602327561737, "grad_norm": 0.010958082973957062, "learning_rate": 2.616349701958558e-05, "loss": 2.847649157047272e-05, "step": 260230 }, { "epoch": 73.86886176554073, "grad_norm": 0.0058270469307899475, "learning_rate": 2.6160658529662218e-05, "loss": 6.0475990176200865e-05, "step": 260240 }, { "epoch": 73.87170025546409, "grad_norm": 0.0005396371125243604, "learning_rate": 2.6157820039738863e-05, "loss": 3.5647302865982056e-05, "step": 260250 }, { "epoch": 73.87453874538745, "grad_norm": 0.010955225676298141, "learning_rate": 2.61549815498155e-05, "loss": 2.7840211987495423e-05, "step": 260260 }, { "epoch": 73.87737723531082, "grad_norm": 0.000488700345158577, "learning_rate": 2.6152143059892135e-05, "loss": 7.138587534427643e-06, "step": 260270 }, { "epoch": 73.88021572523418, "grad_norm": 0.012195179238915443, "learning_rate": 2.614930456996878e-05, "loss": 6.224010139703751e-05, "step": 260280 }, { "epoch": 73.88305421515754, "grad_norm": 0.00628998875617981, "learning_rate": 2.6146466080045418e-05, "loss": 2.339407801628113e-05, "step": 260290 }, { "epoch": 73.88589270508089, "grad_norm": 0.009897356852889061, "learning_rate": 2.6143627590122056e-05, "loss": 1.4466606080532074e-05, "step": 260300 }, { "epoch": 73.88873119500425, "grad_norm": 0.01113208755850792, "learning_rate": 2.6140789100198697e-05, "loss": 2.153906971216202e-05, "step": 260310 }, { "epoch": 73.89156968492762, "grad_norm": 0.005725368391722441, "learning_rate": 2.6137950610275335e-05, "loss": 2.1933577954769135e-05, "step": 260320 }, { "epoch": 73.89440817485098, "grad_norm": 0.001962997717782855, "learning_rate": 2.6135112120351973e-05, "loss": 1.705605536699295e-05, "step": 260330 }, { "epoch": 73.89724666477434, "grad_norm": 0.007624124176800251, "learning_rate": 2.613227363042861e-05, "loss": 9.985081851482391e-06, "step": 260340 }, { "epoch": 73.9000851546977, "grad_norm": 0.010815685614943504, "learning_rate": 2.6129435140505253e-05, "loss": 1.5086308121681213e-05, "step": 260350 }, { "epoch": 73.90292364462105, "grad_norm": 0.10189825296401978, "learning_rate": 2.612659665058189e-05, "loss": 6.800703704357147e-05, "step": 260360 }, { "epoch": 73.90576213454442, "grad_norm": 0.04631911963224411, "learning_rate": 2.612375816065853e-05, "loss": 7.626619189977646e-05, "step": 260370 }, { "epoch": 73.90860062446778, "grad_norm": 0.0028339135460555553, "learning_rate": 2.6120919670735173e-05, "loss": 0.0001401064917445183, "step": 260380 }, { "epoch": 73.91143911439114, "grad_norm": 0.0010840153554454446, "learning_rate": 2.611808118081181e-05, "loss": 1.700539141893387e-05, "step": 260390 }, { "epoch": 73.9142776043145, "grad_norm": 0.023290438577532768, "learning_rate": 2.6115242690888446e-05, "loss": 5.1133893430233e-05, "step": 260400 }, { "epoch": 73.91711609423787, "grad_norm": 0.004420882556587458, "learning_rate": 2.611240420096509e-05, "loss": 2.8834864497184754e-05, "step": 260410 }, { "epoch": 73.91995458416123, "grad_norm": 0.046202123165130615, "learning_rate": 2.610956571104173e-05, "loss": 0.001034032553434372, "step": 260420 }, { "epoch": 73.92279307408458, "grad_norm": 0.05785827711224556, "learning_rate": 2.6106727221118367e-05, "loss": 3.112088888883591e-05, "step": 260430 }, { "epoch": 73.92563156400794, "grad_norm": 0.0017335289157927036, "learning_rate": 2.6103888731195005e-05, "loss": 0.00012861471623182297, "step": 260440 }, { "epoch": 73.9284700539313, "grad_norm": 0.0022087725810706615, "learning_rate": 2.6101050241271646e-05, "loss": 0.0003206893801689148, "step": 260450 }, { "epoch": 73.93130854385467, "grad_norm": 3.290027618408203, "learning_rate": 2.6098211751348284e-05, "loss": 0.0003658795729279518, "step": 260460 }, { "epoch": 73.93414703377803, "grad_norm": 0.026523921638727188, "learning_rate": 2.6095373261424922e-05, "loss": 0.0003769531846046448, "step": 260470 }, { "epoch": 73.9369855237014, "grad_norm": 0.001602593925781548, "learning_rate": 2.6092534771501563e-05, "loss": 0.003401947394013405, "step": 260480 }, { "epoch": 73.93982401362476, "grad_norm": 0.006415638606995344, "learning_rate": 2.60896962815782e-05, "loss": 0.0009081462398171425, "step": 260490 }, { "epoch": 73.9426625035481, "grad_norm": 0.002066172193735838, "learning_rate": 2.608685779165484e-05, "loss": 9.13720577955246e-05, "step": 260500 }, { "epoch": 73.9426625035481, "eval_accuracy": 0.9835315063266993, "eval_loss": 0.07016986608505249, "eval_runtime": 36.0454, "eval_samples_per_second": 436.311, "eval_steps_per_second": 6.825, "step": 260500 }, { "epoch": 73.94550099347147, "grad_norm": 0.006661728024482727, "learning_rate": 2.6084019301731484e-05, "loss": 0.00013323836028575898, "step": 260510 }, { "epoch": 73.94833948339483, "grad_norm": 0.0064380778931081295, "learning_rate": 2.608118081180812e-05, "loss": 8.957721292972565e-05, "step": 260520 }, { "epoch": 73.9511779733182, "grad_norm": 0.0006567254895344377, "learning_rate": 2.6078342321884757e-05, "loss": 2.501755952835083e-05, "step": 260530 }, { "epoch": 73.95401646324156, "grad_norm": 0.0024745967239141464, "learning_rate": 2.6075503831961395e-05, "loss": 5.4197199642658236e-05, "step": 260540 }, { "epoch": 73.95685495316492, "grad_norm": 0.007767071016132832, "learning_rate": 2.607266534203804e-05, "loss": 0.00030883848667144774, "step": 260550 }, { "epoch": 73.95969344308828, "grad_norm": 0.0009467728668823838, "learning_rate": 2.6069826852114678e-05, "loss": 4.0788762271404265e-05, "step": 260560 }, { "epoch": 73.96253193301163, "grad_norm": 0.11157253384590149, "learning_rate": 2.606727221118365e-05, "loss": 0.004532432556152344, "step": 260570 }, { "epoch": 73.965370422935, "grad_norm": 0.035712823271751404, "learning_rate": 2.606443372126029e-05, "loss": 6.349273025989533e-05, "step": 260580 }, { "epoch": 73.96820891285836, "grad_norm": 0.010876581072807312, "learning_rate": 2.6061595231336933e-05, "loss": 0.0004312310367822647, "step": 260590 }, { "epoch": 73.97104740278172, "grad_norm": 0.015259618870913982, "learning_rate": 2.605875674141357e-05, "loss": 0.0003903361037373543, "step": 260600 }, { "epoch": 73.97388589270508, "grad_norm": 0.0028842210303992033, "learning_rate": 2.605591825149021e-05, "loss": 0.0004051372408866882, "step": 260610 }, { "epoch": 73.97672438262845, "grad_norm": 0.00735901016741991, "learning_rate": 2.6053079761566844e-05, "loss": 0.0005210435017943382, "step": 260620 }, { "epoch": 73.9795628725518, "grad_norm": 0.007403666153550148, "learning_rate": 2.605024127164349e-05, "loss": 0.0024976808577775954, "step": 260630 }, { "epoch": 73.98240136247516, "grad_norm": 0.15715797245502472, "learning_rate": 2.6047402781720127e-05, "loss": 0.0002311842516064644, "step": 260640 }, { "epoch": 73.98523985239852, "grad_norm": 0.18139225244522095, "learning_rate": 2.6044564291796765e-05, "loss": 7.787290960550308e-05, "step": 260650 }, { "epoch": 73.98807834232188, "grad_norm": 0.22481881082057953, "learning_rate": 2.6041725801873406e-05, "loss": 0.00010275151580572129, "step": 260660 }, { "epoch": 73.99091683224525, "grad_norm": 0.010828286409378052, "learning_rate": 2.6038887311950044e-05, "loss": 0.0008185232058167457, "step": 260670 }, { "epoch": 73.99375532216861, "grad_norm": 4.772006511688232, "learning_rate": 2.6036048822026682e-05, "loss": 0.0006670987233519555, "step": 260680 }, { "epoch": 73.99659381209197, "grad_norm": 0.03694641590118408, "learning_rate": 2.6033210332103323e-05, "loss": 0.00010868366807699203, "step": 260690 }, { "epoch": 73.99943230201532, "grad_norm": 0.04621463641524315, "learning_rate": 2.603037184217996e-05, "loss": 0.0011935351416468621, "step": 260700 }, { "epoch": 74.00227079193868, "grad_norm": 0.04059403017163277, "learning_rate": 2.60275333522566e-05, "loss": 0.000347222201526165, "step": 260710 }, { "epoch": 74.00510928186205, "grad_norm": 0.00652899919077754, "learning_rate": 2.6024694862333237e-05, "loss": 8.590519428253174e-05, "step": 260720 }, { "epoch": 74.00794777178541, "grad_norm": 0.0032043862156569958, "learning_rate": 2.6021856372409882e-05, "loss": 0.0013878244906663894, "step": 260730 }, { "epoch": 74.01078626170877, "grad_norm": 0.01441018283367157, "learning_rate": 2.6019017882486517e-05, "loss": 8.194353431463241e-05, "step": 260740 }, { "epoch": 74.01362475163214, "grad_norm": 0.01194449607282877, "learning_rate": 2.6016179392563155e-05, "loss": 0.0076765954494476315, "step": 260750 }, { "epoch": 74.0164632415555, "grad_norm": 0.11023816466331482, "learning_rate": 2.60133409026398e-05, "loss": 0.0006852064281702042, "step": 260760 }, { "epoch": 74.01930173147885, "grad_norm": 0.007335142232477665, "learning_rate": 2.6010502412716438e-05, "loss": 0.011714328825473786, "step": 260770 }, { "epoch": 74.02214022140221, "grad_norm": 0.012298683635890484, "learning_rate": 2.6007663922793075e-05, "loss": 0.008590578287839889, "step": 260780 }, { "epoch": 74.02497871132557, "grad_norm": 0.042478471994400024, "learning_rate": 2.6004825432869717e-05, "loss": 0.0006212787702679634, "step": 260790 }, { "epoch": 74.02781720124894, "grad_norm": 0.2825026512145996, "learning_rate": 2.6001986942946355e-05, "loss": 0.0005472414195537567, "step": 260800 }, { "epoch": 74.0306556911723, "grad_norm": 0.0053865741938352585, "learning_rate": 2.5999148453022993e-05, "loss": 4.714839160442352e-05, "step": 260810 }, { "epoch": 74.03349418109566, "grad_norm": 0.005230875685811043, "learning_rate": 2.599630996309963e-05, "loss": 0.00012597218155860901, "step": 260820 }, { "epoch": 74.03633267101901, "grad_norm": 0.011423484422266483, "learning_rate": 2.5993471473176272e-05, "loss": 0.0004287226125597954, "step": 260830 }, { "epoch": 74.03917116094237, "grad_norm": 0.02357744239270687, "learning_rate": 2.599063298325291e-05, "loss": 0.00017328020185232164, "step": 260840 }, { "epoch": 74.04200965086574, "grad_norm": 0.02393648773431778, "learning_rate": 2.5987794493329548e-05, "loss": 0.0009807601571083068, "step": 260850 }, { "epoch": 74.0448481407891, "grad_norm": 0.0068245986476540565, "learning_rate": 2.598495600340619e-05, "loss": 4.411246627569199e-05, "step": 260860 }, { "epoch": 74.04768663071246, "grad_norm": 0.006786560174077749, "learning_rate": 2.5982117513482828e-05, "loss": 8.828118443489074e-05, "step": 260870 }, { "epoch": 74.05052512063583, "grad_norm": 8.797764778137207, "learning_rate": 2.5979279023559466e-05, "loss": 0.0021920572966337205, "step": 260880 }, { "epoch": 74.05336361055919, "grad_norm": 9.709416389465332, "learning_rate": 2.597644053363611e-05, "loss": 0.0009199649095535279, "step": 260890 }, { "epoch": 74.05620210048254, "grad_norm": 0.1977071911096573, "learning_rate": 2.5973602043712748e-05, "loss": 0.0006240615621209145, "step": 260900 }, { "epoch": 74.0590405904059, "grad_norm": 0.7660979628562927, "learning_rate": 2.5970763553789383e-05, "loss": 0.00012121759355068206, "step": 260910 }, { "epoch": 74.06187908032926, "grad_norm": 0.004168940708041191, "learning_rate": 2.596792506386602e-05, "loss": 0.0008068643510341644, "step": 260920 }, { "epoch": 74.06471757025263, "grad_norm": 0.00939044076949358, "learning_rate": 2.5965086573942666e-05, "loss": 0.0001310199499130249, "step": 260930 }, { "epoch": 74.06755606017599, "grad_norm": 4.105745792388916, "learning_rate": 2.5962248084019304e-05, "loss": 0.006465020775794983, "step": 260940 }, { "epoch": 74.07039455009935, "grad_norm": 0.020084630697965622, "learning_rate": 2.595940959409594e-05, "loss": 0.0003698805347084999, "step": 260950 }, { "epoch": 74.07323304002271, "grad_norm": 0.006883575581014156, "learning_rate": 2.5956571104172583e-05, "loss": 8.333083242177964e-05, "step": 260960 }, { "epoch": 74.07607152994606, "grad_norm": 0.1185644194483757, "learning_rate": 2.595373261424922e-05, "loss": 8.833184838294983e-05, "step": 260970 }, { "epoch": 74.07891001986943, "grad_norm": 0.013667882420122623, "learning_rate": 2.595089412432586e-05, "loss": 0.0024782178923487665, "step": 260980 }, { "epoch": 74.08174850979279, "grad_norm": 0.3110823333263397, "learning_rate": 2.59480556344025e-05, "loss": 0.0001313215121626854, "step": 260990 }, { "epoch": 74.08458699971615, "grad_norm": 0.011601424776017666, "learning_rate": 2.594521714447914e-05, "loss": 4.469994455575943e-05, "step": 261000 }, { "epoch": 74.08458699971615, "eval_accuracy": 0.9848667895975075, "eval_loss": 0.06314557790756226, "eval_runtime": 35.5099, "eval_samples_per_second": 442.891, "eval_steps_per_second": 6.928, "step": 261000 }, { "epoch": 74.08742548963951, "grad_norm": 0.011412754654884338, "learning_rate": 2.5942378654555776e-05, "loss": 0.0002503182739019394, "step": 261010 }, { "epoch": 74.09026397956288, "grad_norm": 0.0010436642915010452, "learning_rate": 2.5939540164632414e-05, "loss": 2.2662244737148286e-05, "step": 261020 }, { "epoch": 74.09310246948624, "grad_norm": 0.028348254039883614, "learning_rate": 2.593670167470906e-05, "loss": 7.091984152793885e-05, "step": 261030 }, { "epoch": 74.09594095940959, "grad_norm": 0.00722242658957839, "learning_rate": 2.5933863184785694e-05, "loss": 2.253558486700058e-05, "step": 261040 }, { "epoch": 74.09877944933295, "grad_norm": 0.024298589676618576, "learning_rate": 2.593102469486233e-05, "loss": 3.386437892913818e-05, "step": 261050 }, { "epoch": 74.10161793925631, "grad_norm": 0.003298489609733224, "learning_rate": 2.5928186204938976e-05, "loss": 0.0003338469192385674, "step": 261060 }, { "epoch": 74.10445642917968, "grad_norm": 0.011059864424169064, "learning_rate": 2.5925347715015614e-05, "loss": 2.7041137218475342e-05, "step": 261070 }, { "epoch": 74.10729491910304, "grad_norm": 0.043393250554800034, "learning_rate": 2.5922509225092252e-05, "loss": 4.846286028623581e-05, "step": 261080 }, { "epoch": 74.1101334090264, "grad_norm": 0.002185623161494732, "learning_rate": 2.5919670735168894e-05, "loss": 6.959345191717147e-05, "step": 261090 }, { "epoch": 74.11297189894975, "grad_norm": 0.0031904380302876234, "learning_rate": 2.5916832245245532e-05, "loss": 6.155818700790406e-05, "step": 261100 }, { "epoch": 74.11581038887311, "grad_norm": 0.002371663460507989, "learning_rate": 2.591399375532217e-05, "loss": 3.224760293960571e-05, "step": 261110 }, { "epoch": 74.11864887879648, "grad_norm": 0.007501783315092325, "learning_rate": 2.5911155265398808e-05, "loss": 6.432291120290756e-05, "step": 261120 }, { "epoch": 74.12148736871984, "grad_norm": 0.009353564120829105, "learning_rate": 2.590831677547545e-05, "loss": 8.560679852962494e-05, "step": 261130 }, { "epoch": 74.1243258586432, "grad_norm": 0.0013679577969014645, "learning_rate": 2.5905478285552087e-05, "loss": 2.1825358271598815e-05, "step": 261140 }, { "epoch": 74.12716434856657, "grad_norm": 0.006177142728120089, "learning_rate": 2.5902639795628725e-05, "loss": 2.407282590866089e-05, "step": 261150 }, { "epoch": 74.13000283848993, "grad_norm": 0.006331897806376219, "learning_rate": 2.5899801305705366e-05, "loss": 2.218298614025116e-05, "step": 261160 }, { "epoch": 74.13284132841328, "grad_norm": 0.005733083002269268, "learning_rate": 2.5896962815782004e-05, "loss": 1.717228442430496e-05, "step": 261170 }, { "epoch": 74.13567981833664, "grad_norm": 0.0011058019008487463, "learning_rate": 2.5894124325858642e-05, "loss": 1.5778280794620513e-05, "step": 261180 }, { "epoch": 74.13851830826, "grad_norm": 0.009441010653972626, "learning_rate": 2.5891285835935287e-05, "loss": 5.1005370914936064e-05, "step": 261190 }, { "epoch": 74.14135679818337, "grad_norm": 0.005435471888631582, "learning_rate": 2.5888447346011925e-05, "loss": 2.6162900030612946e-05, "step": 261200 }, { "epoch": 74.14419528810673, "grad_norm": 0.00568330567330122, "learning_rate": 2.588560885608856e-05, "loss": 2.2211670875549315e-05, "step": 261210 }, { "epoch": 74.14703377803009, "grad_norm": 0.023936638608574867, "learning_rate": 2.5882770366165198e-05, "loss": 4.1532143950462344e-05, "step": 261220 }, { "epoch": 74.14987226795346, "grad_norm": 0.00428796699270606, "learning_rate": 2.5879931876241842e-05, "loss": 2.6439130306243896e-05, "step": 261230 }, { "epoch": 74.1527107578768, "grad_norm": 0.0038922561798244715, "learning_rate": 2.587709338631848e-05, "loss": 2.4279952049255372e-05, "step": 261240 }, { "epoch": 74.15554924780017, "grad_norm": 0.0007905198726803064, "learning_rate": 2.587425489639512e-05, "loss": 2.469569444656372e-05, "step": 261250 }, { "epoch": 74.15838773772353, "grad_norm": 0.0015169999096542597, "learning_rate": 2.587141640647176e-05, "loss": 4.076268523931503e-05, "step": 261260 }, { "epoch": 74.16122622764689, "grad_norm": 0.0006348946481011808, "learning_rate": 2.5868577916548398e-05, "loss": 1.1864304542541505e-05, "step": 261270 }, { "epoch": 74.16406471757026, "grad_norm": 0.024723796173930168, "learning_rate": 2.5865739426625036e-05, "loss": 2.2919848561286926e-05, "step": 261280 }, { "epoch": 74.16690320749362, "grad_norm": 0.004174520261585712, "learning_rate": 2.5862900936701677e-05, "loss": 2.549812197685242e-05, "step": 261290 }, { "epoch": 74.16974169741698, "grad_norm": 0.002649871166795492, "learning_rate": 2.5860062446778315e-05, "loss": 2.7687102556228637e-05, "step": 261300 }, { "epoch": 74.17258018734033, "grad_norm": 0.0057017127983272076, "learning_rate": 2.5857223956854953e-05, "loss": 1.6145966947078703e-05, "step": 261310 }, { "epoch": 74.17541867726369, "grad_norm": 0.002051500603556633, "learning_rate": 2.585438546693159e-05, "loss": 8.969008922576904e-06, "step": 261320 }, { "epoch": 74.17825716718706, "grad_norm": 0.01509537361562252, "learning_rate": 2.5851546977008233e-05, "loss": 1.967940479516983e-05, "step": 261330 }, { "epoch": 74.18109565711042, "grad_norm": 0.002577335573732853, "learning_rate": 2.584870848708487e-05, "loss": 1.8480792641639708e-05, "step": 261340 }, { "epoch": 74.18393414703378, "grad_norm": 0.0014613406965509057, "learning_rate": 2.584586999716151e-05, "loss": 0.0005977129563689232, "step": 261350 }, { "epoch": 74.18677263695714, "grad_norm": 0.005185562651604414, "learning_rate": 2.5843031507238153e-05, "loss": 2.8086267411708833e-05, "step": 261360 }, { "epoch": 74.1896111268805, "grad_norm": 0.0030839545652270317, "learning_rate": 2.584019301731479e-05, "loss": 0.00010771285742521286, "step": 261370 }, { "epoch": 74.19244961680386, "grad_norm": 0.01855332776904106, "learning_rate": 2.5837354527391426e-05, "loss": 0.0006135968491435051, "step": 261380 }, { "epoch": 74.19528810672722, "grad_norm": 0.0018352542538195848, "learning_rate": 2.583451603746807e-05, "loss": 0.00014822762459516526, "step": 261390 }, { "epoch": 74.19812659665058, "grad_norm": 0.0066349986009299755, "learning_rate": 2.583167754754471e-05, "loss": 4.85861673951149e-05, "step": 261400 }, { "epoch": 74.20096508657394, "grad_norm": 0.0072020273655653, "learning_rate": 2.5828839057621347e-05, "loss": 3.7818774580955504e-05, "step": 261410 }, { "epoch": 74.20380357649731, "grad_norm": 0.0036788417492061853, "learning_rate": 2.5826000567697985e-05, "loss": 2.8564222157001494e-05, "step": 261420 }, { "epoch": 74.20664206642067, "grad_norm": 0.0014961663400754333, "learning_rate": 2.5823162077774626e-05, "loss": 2.0167045295238496e-05, "step": 261430 }, { "epoch": 74.20948055634402, "grad_norm": 0.00604010047391057, "learning_rate": 2.5820323587851264e-05, "loss": 5.460120737552643e-05, "step": 261440 }, { "epoch": 74.21231904626738, "grad_norm": 0.008457285352051258, "learning_rate": 2.5817485097927902e-05, "loss": 3.536976873874664e-05, "step": 261450 }, { "epoch": 74.21515753619074, "grad_norm": 0.0016508289845660329, "learning_rate": 2.5814646608004543e-05, "loss": 3.150664269924164e-05, "step": 261460 }, { "epoch": 74.21799602611411, "grad_norm": 0.0055099609307944775, "learning_rate": 2.581180811808118e-05, "loss": 4.665348678827286e-05, "step": 261470 }, { "epoch": 74.22083451603747, "grad_norm": 0.00042724431841634214, "learning_rate": 2.580896962815782e-05, "loss": 9.62408259510994e-05, "step": 261480 }, { "epoch": 74.22367300596083, "grad_norm": 0.012383392080664635, "learning_rate": 2.5806131138234464e-05, "loss": 0.0006797203794121742, "step": 261490 }, { "epoch": 74.2265114958842, "grad_norm": 0.00744224525988102, "learning_rate": 2.5803292648311102e-05, "loss": 2.0024552941322327e-05, "step": 261500 }, { "epoch": 74.2265114958842, "eval_accuracy": 0.9861384879506581, "eval_loss": 0.05743718519806862, "eval_runtime": 35.5677, "eval_samples_per_second": 442.171, "eval_steps_per_second": 6.916, "step": 261500 }, { "epoch": 74.22934998580754, "grad_norm": 0.02553010731935501, "learning_rate": 2.5800454158387737e-05, "loss": 3.3328123390674594e-05, "step": 261510 }, { "epoch": 74.23218847573091, "grad_norm": 0.0017028511501848698, "learning_rate": 2.5797615668464375e-05, "loss": 3.628898411989212e-05, "step": 261520 }, { "epoch": 74.23502696565427, "grad_norm": 0.00410250760614872, "learning_rate": 2.579477717854102e-05, "loss": 1.733098179101944e-05, "step": 261530 }, { "epoch": 74.23786545557763, "grad_norm": 0.0013912743888795376, "learning_rate": 2.5791938688617657e-05, "loss": 6.059259176254273e-05, "step": 261540 }, { "epoch": 74.240703945501, "grad_norm": 0.005022455472499132, "learning_rate": 2.5789100198694295e-05, "loss": 1.8230266869068144e-05, "step": 261550 }, { "epoch": 74.24354243542436, "grad_norm": 0.03666917234659195, "learning_rate": 2.5786261708770937e-05, "loss": 3.4378841519355774e-05, "step": 261560 }, { "epoch": 74.24638092534771, "grad_norm": 0.006530631799250841, "learning_rate": 2.5783423218847575e-05, "loss": 1.3644061982631684e-05, "step": 261570 }, { "epoch": 74.24921941527107, "grad_norm": 0.0018119154265150428, "learning_rate": 2.5780584728924213e-05, "loss": 7.362999022006989e-05, "step": 261580 }, { "epoch": 74.25205790519443, "grad_norm": 0.002856035018339753, "learning_rate": 2.5777746239000854e-05, "loss": 2.5152601301670076e-05, "step": 261590 }, { "epoch": 74.2548963951178, "grad_norm": 0.0026447416748851538, "learning_rate": 2.5774907749077492e-05, "loss": 2.0512379705905914e-05, "step": 261600 }, { "epoch": 74.25773488504116, "grad_norm": 0.013994067907333374, "learning_rate": 2.577206925915413e-05, "loss": 2.85835936665535e-05, "step": 261610 }, { "epoch": 74.26057337496452, "grad_norm": 0.000995215610601008, "learning_rate": 2.5769230769230768e-05, "loss": 1.6484968364238738e-05, "step": 261620 }, { "epoch": 74.26341186488789, "grad_norm": 0.005310283042490482, "learning_rate": 2.576639227930741e-05, "loss": 1.8340349197387695e-05, "step": 261630 }, { "epoch": 74.26625035481123, "grad_norm": 0.0006229287828318775, "learning_rate": 2.5763553789384047e-05, "loss": 1.9791722297668458e-05, "step": 261640 }, { "epoch": 74.2690888447346, "grad_norm": 0.0011882008984684944, "learning_rate": 2.5760715299460685e-05, "loss": 0.0018214421346783639, "step": 261650 }, { "epoch": 74.27192733465796, "grad_norm": 1.319117784500122, "learning_rate": 2.575787680953733e-05, "loss": 0.0003925943747162819, "step": 261660 }, { "epoch": 74.27476582458132, "grad_norm": 0.01992751471698284, "learning_rate": 2.5755038319613968e-05, "loss": 2.2563338279724122e-05, "step": 261670 }, { "epoch": 74.27760431450469, "grad_norm": 0.01908019185066223, "learning_rate": 2.5752199829690603e-05, "loss": 5.190782248973846e-05, "step": 261680 }, { "epoch": 74.28044280442805, "grad_norm": 0.03127250820398331, "learning_rate": 2.5749361339767247e-05, "loss": 0.00010116510093212127, "step": 261690 }, { "epoch": 74.28328129435141, "grad_norm": 0.0024691636208444834, "learning_rate": 2.5746522849843885e-05, "loss": 3.7691183388233183e-05, "step": 261700 }, { "epoch": 74.28611978427476, "grad_norm": 0.026378996670246124, "learning_rate": 2.5743684359920523e-05, "loss": 0.00011726114898920059, "step": 261710 }, { "epoch": 74.28895827419812, "grad_norm": 0.004758570343255997, "learning_rate": 2.574084586999716e-05, "loss": 7.64545053243637e-05, "step": 261720 }, { "epoch": 74.29179676412149, "grad_norm": 0.003287712810561061, "learning_rate": 2.5738007380073803e-05, "loss": 7.956083863973618e-05, "step": 261730 }, { "epoch": 74.29463525404485, "grad_norm": 0.010416495613753796, "learning_rate": 2.573516889015044e-05, "loss": 0.0001368315890431404, "step": 261740 }, { "epoch": 74.29747374396821, "grad_norm": 0.004785054828971624, "learning_rate": 2.573233040022708e-05, "loss": 0.0001251567155122757, "step": 261750 }, { "epoch": 74.30031223389157, "grad_norm": 0.010695849545300007, "learning_rate": 2.572949191030372e-05, "loss": 0.00010024774819612503, "step": 261760 }, { "epoch": 74.30315072381494, "grad_norm": 0.03202615678310394, "learning_rate": 2.5726653420380358e-05, "loss": 9.841416031122207e-05, "step": 261770 }, { "epoch": 74.30598921373829, "grad_norm": 0.0013578516663983464, "learning_rate": 2.5723814930456996e-05, "loss": 6.375238299369813e-05, "step": 261780 }, { "epoch": 74.30882770366165, "grad_norm": 0.00447424128651619, "learning_rate": 2.572097644053364e-05, "loss": 3.776159137487411e-05, "step": 261790 }, { "epoch": 74.31166619358501, "grad_norm": 0.008899950422346592, "learning_rate": 2.5718137950610275e-05, "loss": 1.5783309936523438e-05, "step": 261800 }, { "epoch": 74.31450468350837, "grad_norm": 0.0010380120947957039, "learning_rate": 2.5715299460686913e-05, "loss": 1.9559450447559356e-05, "step": 261810 }, { "epoch": 74.31734317343174, "grad_norm": 0.03366800397634506, "learning_rate": 2.5712460970763558e-05, "loss": 4.4248066842556e-05, "step": 261820 }, { "epoch": 74.3201816633551, "grad_norm": 0.009653927758336067, "learning_rate": 2.5709622480840196e-05, "loss": 2.2246502339839935e-05, "step": 261830 }, { "epoch": 74.32302015327845, "grad_norm": 0.0011738977627828717, "learning_rate": 2.5706783990916834e-05, "loss": 6.328001618385314e-05, "step": 261840 }, { "epoch": 74.32585864320181, "grad_norm": 0.008142572827637196, "learning_rate": 2.570394550099347e-05, "loss": 4.915371537208557e-05, "step": 261850 }, { "epoch": 74.32869713312517, "grad_norm": 0.015070724301040173, "learning_rate": 2.5701107011070114e-05, "loss": 0.00021757669746875764, "step": 261860 }, { "epoch": 74.33153562304854, "grad_norm": 0.007572877686470747, "learning_rate": 2.569826852114675e-05, "loss": 4.4487789273262025e-05, "step": 261870 }, { "epoch": 74.3343741129719, "grad_norm": 0.009075693786144257, "learning_rate": 2.569543003122339e-05, "loss": 0.00021951068192720412, "step": 261880 }, { "epoch": 74.33721260289526, "grad_norm": 0.15750721096992493, "learning_rate": 2.569259154130003e-05, "loss": 5.9301964938640594e-05, "step": 261890 }, { "epoch": 74.34005109281863, "grad_norm": 0.0014927868032827973, "learning_rate": 2.568975305137667e-05, "loss": 0.0001464972272515297, "step": 261900 }, { "epoch": 74.34288958274198, "grad_norm": 0.004123189020901918, "learning_rate": 2.5686914561453307e-05, "loss": 0.001250237785279751, "step": 261910 }, { "epoch": 74.34572807266534, "grad_norm": 0.023139281198382378, "learning_rate": 2.568407607152995e-05, "loss": 0.0001329166814684868, "step": 261920 }, { "epoch": 74.3485665625887, "grad_norm": 0.04719500616192818, "learning_rate": 2.5681237581606586e-05, "loss": 0.00020243655890226365, "step": 261930 }, { "epoch": 74.35140505251206, "grad_norm": 0.0017426597187295556, "learning_rate": 2.5678399091683224e-05, "loss": 0.0010728208348155023, "step": 261940 }, { "epoch": 74.35424354243543, "grad_norm": 0.011695396155118942, "learning_rate": 2.5675560601759862e-05, "loss": 0.0003621179610490799, "step": 261950 }, { "epoch": 74.35708203235879, "grad_norm": 0.008096052333712578, "learning_rate": 2.5672722111836507e-05, "loss": 0.0002836663275957108, "step": 261960 }, { "epoch": 74.35992052228215, "grad_norm": 0.0130595862865448, "learning_rate": 2.5669883621913145e-05, "loss": 0.00035673025995492936, "step": 261970 }, { "epoch": 74.3627590122055, "grad_norm": 0.016440846025943756, "learning_rate": 2.566704513198978e-05, "loss": 0.0003712479025125504, "step": 261980 }, { "epoch": 74.36559750212886, "grad_norm": 0.006613820791244507, "learning_rate": 2.5664206642066424e-05, "loss": 0.0007983675226569176, "step": 261990 }, { "epoch": 74.36843599205223, "grad_norm": 0.022445237264037132, "learning_rate": 2.5661368152143062e-05, "loss": 0.0011261608451604843, "step": 262000 }, { "epoch": 74.36843599205223, "eval_accuracy": 0.9814332040440008, "eval_loss": 0.07180297374725342, "eval_runtime": 35.7601, "eval_samples_per_second": 439.791, "eval_steps_per_second": 6.879, "step": 262000 }, { "epoch": 74.37127448197559, "grad_norm": 0.0026289166416972876, "learning_rate": 2.56585296622197e-05, "loss": 0.00023089032620191575, "step": 262010 }, { "epoch": 74.37411297189895, "grad_norm": 0.002578032435849309, "learning_rate": 2.565569117229634e-05, "loss": 5.033090710639954e-05, "step": 262020 }, { "epoch": 74.37695146182232, "grad_norm": 0.00776064395904541, "learning_rate": 2.565285268237298e-05, "loss": 7.333084940910339e-05, "step": 262030 }, { "epoch": 74.37978995174566, "grad_norm": 1.4581772089004517, "learning_rate": 2.5650014192449618e-05, "loss": 0.0003895465284585953, "step": 262040 }, { "epoch": 74.38262844166903, "grad_norm": 0.005280865356326103, "learning_rate": 2.5647175702526256e-05, "loss": 0.00011051483452320098, "step": 262050 }, { "epoch": 74.38546693159239, "grad_norm": 0.0007646549493074417, "learning_rate": 2.5644337212602897e-05, "loss": 2.9431283473968505e-05, "step": 262060 }, { "epoch": 74.38830542151575, "grad_norm": 0.07753007858991623, "learning_rate": 2.5641498722679535e-05, "loss": 5.400683730840683e-05, "step": 262070 }, { "epoch": 74.39114391143912, "grad_norm": 0.02020013891160488, "learning_rate": 2.5638660232756173e-05, "loss": 0.00011456478387117386, "step": 262080 }, { "epoch": 74.39398240136248, "grad_norm": 0.0058907573111355305, "learning_rate": 2.5635821742832818e-05, "loss": 0.00013158228248357772, "step": 262090 }, { "epoch": 74.39682089128584, "grad_norm": 0.04314425215125084, "learning_rate": 2.5632983252909452e-05, "loss": 0.004335552453994751, "step": 262100 }, { "epoch": 74.39965938120919, "grad_norm": 0.0006548468372784555, "learning_rate": 2.563014476298609e-05, "loss": 0.0245049387216568, "step": 262110 }, { "epoch": 74.40249787113255, "grad_norm": 0.0043706875294446945, "learning_rate": 2.5627306273062735e-05, "loss": 0.00029515605419874193, "step": 262120 }, { "epoch": 74.40533636105592, "grad_norm": 0.010981646366417408, "learning_rate": 2.5624467783139373e-05, "loss": 0.0006165126338601113, "step": 262130 }, { "epoch": 74.40817485097928, "grad_norm": 0.007940271869301796, "learning_rate": 2.562162929321601e-05, "loss": 0.0019354086369276046, "step": 262140 }, { "epoch": 74.41101334090264, "grad_norm": 0.005383737850934267, "learning_rate": 2.5618790803292646e-05, "loss": 0.00017376691102981566, "step": 262150 }, { "epoch": 74.413851830826, "grad_norm": 0.0038878635969012976, "learning_rate": 2.561595231336929e-05, "loss": 0.00018281955271959305, "step": 262160 }, { "epoch": 74.41669032074937, "grad_norm": 0.0027960496954619884, "learning_rate": 2.561311382344593e-05, "loss": 0.0003392362967133522, "step": 262170 }, { "epoch": 74.41952881067272, "grad_norm": 0.025499261915683746, "learning_rate": 2.5610275333522566e-05, "loss": 0.00010601412504911423, "step": 262180 }, { "epoch": 74.42236730059608, "grad_norm": 0.0395333357155323, "learning_rate": 2.5607436843599208e-05, "loss": 0.015801376104354857, "step": 262190 }, { "epoch": 74.42520579051944, "grad_norm": 0.0027060932479798794, "learning_rate": 2.5604598353675846e-05, "loss": 8.587036281824112e-05, "step": 262200 }, { "epoch": 74.4280442804428, "grad_norm": 0.00725987832993269, "learning_rate": 2.5601759863752484e-05, "loss": 0.0010024292394518852, "step": 262210 }, { "epoch": 74.43088277036617, "grad_norm": 0.004444032907485962, "learning_rate": 2.5598921373829125e-05, "loss": 0.0006415680050849915, "step": 262220 }, { "epoch": 74.43372126028953, "grad_norm": 0.34071627259254456, "learning_rate": 2.5596082883905763e-05, "loss": 0.001013435423374176, "step": 262230 }, { "epoch": 74.4365597502129, "grad_norm": 2.115173578262329, "learning_rate": 2.55932443939824e-05, "loss": 0.0003742694854736328, "step": 262240 }, { "epoch": 74.43939824013624, "grad_norm": 0.01173253171145916, "learning_rate": 2.559040590405904e-05, "loss": 0.0001472311094403267, "step": 262250 }, { "epoch": 74.4422367300596, "grad_norm": 0.0035001819487661123, "learning_rate": 2.5587567414135684e-05, "loss": 3.681071102619171e-05, "step": 262260 }, { "epoch": 74.44507521998297, "grad_norm": 0.0014475042698904872, "learning_rate": 2.558472892421232e-05, "loss": 0.0001739460974931717, "step": 262270 }, { "epoch": 74.44791370990633, "grad_norm": 0.013566524721682072, "learning_rate": 2.5581890434288956e-05, "loss": 9.724311530590057e-05, "step": 262280 }, { "epoch": 74.4507521998297, "grad_norm": 0.0050255111418664455, "learning_rate": 2.55790519443656e-05, "loss": 0.00023461561650037766, "step": 262290 }, { "epoch": 74.45359068975306, "grad_norm": 0.14539796113967896, "learning_rate": 2.557621345444224e-05, "loss": 0.0002926049754023552, "step": 262300 }, { "epoch": 74.4564291796764, "grad_norm": 12.28657054901123, "learning_rate": 2.5573374964518877e-05, "loss": 0.0015391750261187554, "step": 262310 }, { "epoch": 74.45926766959977, "grad_norm": 0.016317181289196014, "learning_rate": 2.557053647459552e-05, "loss": 0.0053802113980054855, "step": 262320 }, { "epoch": 74.46210615952313, "grad_norm": 0.12285836040973663, "learning_rate": 2.5567697984672157e-05, "loss": 0.0003086347132921219, "step": 262330 }, { "epoch": 74.4649446494465, "grad_norm": 0.003708733944222331, "learning_rate": 2.5564859494748795e-05, "loss": 0.0012915262952446938, "step": 262340 }, { "epoch": 74.46778313936986, "grad_norm": 0.016418201848864555, "learning_rate": 2.5562021004825433e-05, "loss": 0.00014277007430791856, "step": 262350 }, { "epoch": 74.47062162929322, "grad_norm": 0.004108394030481577, "learning_rate": 2.5559182514902074e-05, "loss": 0.0003376806154847145, "step": 262360 }, { "epoch": 74.47346011921658, "grad_norm": 0.002647643443197012, "learning_rate": 2.5556344024978712e-05, "loss": 0.0001513764262199402, "step": 262370 }, { "epoch": 74.47629860913993, "grad_norm": 0.007572723552584648, "learning_rate": 2.555350553505535e-05, "loss": 4.834495484828949e-05, "step": 262380 }, { "epoch": 74.4791370990633, "grad_norm": 0.0040962728671729565, "learning_rate": 2.5550667045131995e-05, "loss": 5.6364387273788455e-05, "step": 262390 }, { "epoch": 74.48197558898666, "grad_norm": 0.27546200156211853, "learning_rate": 2.554782855520863e-05, "loss": 0.0001501096412539482, "step": 262400 }, { "epoch": 74.48481407891002, "grad_norm": 0.002385615138337016, "learning_rate": 2.5544990065285267e-05, "loss": 0.006309086084365844, "step": 262410 }, { "epoch": 74.48765256883338, "grad_norm": 0.0031301567796617746, "learning_rate": 2.5542151575361912e-05, "loss": 7.628723978996277e-05, "step": 262420 }, { "epoch": 74.49049105875675, "grad_norm": 0.0035333377309143543, "learning_rate": 2.553931308543855e-05, "loss": 4.4242851436138154e-05, "step": 262430 }, { "epoch": 74.49332954868011, "grad_norm": 0.0007714618113823235, "learning_rate": 2.5536474595515188e-05, "loss": 0.00012118779122829437, "step": 262440 }, { "epoch": 74.49616803860346, "grad_norm": 0.00040424714097753167, "learning_rate": 2.5533636105591823e-05, "loss": 3.3608078956604005e-05, "step": 262450 }, { "epoch": 74.49900652852682, "grad_norm": 0.05882369354367256, "learning_rate": 2.5530797615668467e-05, "loss": 3.664698451757431e-05, "step": 262460 }, { "epoch": 74.50184501845018, "grad_norm": 2.9391562938690186, "learning_rate": 2.5527959125745105e-05, "loss": 0.00037063825875520705, "step": 262470 }, { "epoch": 74.50468350837355, "grad_norm": 0.013026207685470581, "learning_rate": 2.5525120635821743e-05, "loss": 0.004054905101656914, "step": 262480 }, { "epoch": 74.50752199829691, "grad_norm": 0.005962715018540621, "learning_rate": 2.5522282145898385e-05, "loss": 0.00018731597810983658, "step": 262490 }, { "epoch": 74.51036048822027, "grad_norm": 0.002907736226916313, "learning_rate": 2.5519443655975023e-05, "loss": 3.3043883740901946e-05, "step": 262500 }, { "epoch": 74.51036048822027, "eval_accuracy": 0.9858205633623705, "eval_loss": 0.057471420615911484, "eval_runtime": 35.9804, "eval_samples_per_second": 437.099, "eval_steps_per_second": 6.837, "step": 262500 }, { "epoch": 74.51319897814363, "grad_norm": 0.01857050135731697, "learning_rate": 2.551660516605166e-05, "loss": 3.0330754816532136e-05, "step": 262510 }, { "epoch": 74.51603746806698, "grad_norm": 0.007148245815187693, "learning_rate": 2.5513766676128302e-05, "loss": 0.00014765355736017226, "step": 262520 }, { "epoch": 74.51887595799035, "grad_norm": 0.41661912202835083, "learning_rate": 2.551092818620494e-05, "loss": 0.00043045952916145327, "step": 262530 }, { "epoch": 74.52171444791371, "grad_norm": 0.009557627141475677, "learning_rate": 2.5508089696281578e-05, "loss": 0.0012989621609449387, "step": 262540 }, { "epoch": 74.52455293783707, "grad_norm": 0.0022798306308686733, "learning_rate": 2.5505251206358216e-05, "loss": 0.00021412950009107589, "step": 262550 }, { "epoch": 74.52739142776043, "grad_norm": 0.017691465094685555, "learning_rate": 2.550241271643486e-05, "loss": 0.00048585459589958193, "step": 262560 }, { "epoch": 74.5302299176838, "grad_norm": 0.2645139694213867, "learning_rate": 2.5499574226511495e-05, "loss": 0.0002528805285692215, "step": 262570 }, { "epoch": 74.53306840760715, "grad_norm": 0.0038268903736025095, "learning_rate": 2.5496735736588133e-05, "loss": 0.0001256728544831276, "step": 262580 }, { "epoch": 74.53590689753051, "grad_norm": 0.012734957970678806, "learning_rate": 2.5493897246664778e-05, "loss": 0.0005817815661430358, "step": 262590 }, { "epoch": 74.53874538745387, "grad_norm": 0.1459779590368271, "learning_rate": 2.5491058756741416e-05, "loss": 4.934407770633698e-05, "step": 262600 }, { "epoch": 74.54158387737724, "grad_norm": 0.00115402159281075, "learning_rate": 2.5488220266818054e-05, "loss": 0.00012251324951648713, "step": 262610 }, { "epoch": 74.5444223673006, "grad_norm": 0.001488060224801302, "learning_rate": 2.5485381776894695e-05, "loss": 3.134980797767639e-05, "step": 262620 }, { "epoch": 74.54726085722396, "grad_norm": 1.9672529697418213, "learning_rate": 2.5482543286971333e-05, "loss": 0.0003156902268528938, "step": 262630 }, { "epoch": 74.55009934714732, "grad_norm": 0.03950069099664688, "learning_rate": 2.547970479704797e-05, "loss": 0.00010702349245548249, "step": 262640 }, { "epoch": 74.55293783707067, "grad_norm": 0.007090823259204626, "learning_rate": 2.547686630712461e-05, "loss": 3.0311942100524903e-05, "step": 262650 }, { "epoch": 74.55577632699404, "grad_norm": 0.11723928153514862, "learning_rate": 2.547402781720125e-05, "loss": 9.70562919974327e-05, "step": 262660 }, { "epoch": 74.5586148169174, "grad_norm": 0.0020763312932103872, "learning_rate": 2.547118932727789e-05, "loss": 2.0810216665267944e-05, "step": 262670 }, { "epoch": 74.56145330684076, "grad_norm": 5.441959381103516, "learning_rate": 2.5468350837354527e-05, "loss": 0.0012084724381566047, "step": 262680 }, { "epoch": 74.56429179676412, "grad_norm": 0.010430639609694481, "learning_rate": 2.5465512347431168e-05, "loss": 0.0010677758604288102, "step": 262690 }, { "epoch": 74.56713028668749, "grad_norm": 0.004347166977822781, "learning_rate": 2.5462673857507806e-05, "loss": 0.0003890169784426689, "step": 262700 }, { "epoch": 74.56996877661085, "grad_norm": 0.011770673096179962, "learning_rate": 2.5459835367584444e-05, "loss": 0.00018730778247117997, "step": 262710 }, { "epoch": 74.5728072665342, "grad_norm": 0.00329620111733675, "learning_rate": 2.545699687766109e-05, "loss": 6.342437118291854e-05, "step": 262720 }, { "epoch": 74.57564575645756, "grad_norm": 0.02337299846112728, "learning_rate": 2.5454158387737727e-05, "loss": 0.0001636827364563942, "step": 262730 }, { "epoch": 74.57848424638092, "grad_norm": 0.14662733674049377, "learning_rate": 2.545131989781436e-05, "loss": 9.368713945150376e-05, "step": 262740 }, { "epoch": 74.58132273630429, "grad_norm": 0.0015547135844826698, "learning_rate": 2.5448481407891e-05, "loss": 5.422551184892654e-05, "step": 262750 }, { "epoch": 74.58416122622765, "grad_norm": 0.286818265914917, "learning_rate": 2.5445642917967644e-05, "loss": 0.00022845249623060226, "step": 262760 }, { "epoch": 74.58699971615101, "grad_norm": 0.0017055915668606758, "learning_rate": 2.5442804428044282e-05, "loss": 2.7138367295265196e-05, "step": 262770 }, { "epoch": 74.58983820607436, "grad_norm": 0.014684469439089298, "learning_rate": 2.543996593812092e-05, "loss": 8.220840245485306e-05, "step": 262780 }, { "epoch": 74.59267669599772, "grad_norm": 0.0045242574997246265, "learning_rate": 2.543712744819756e-05, "loss": 2.0066834986209868e-05, "step": 262790 }, { "epoch": 74.59551518592109, "grad_norm": 0.014140551909804344, "learning_rate": 2.54342889582742e-05, "loss": 3.1935796141624453e-05, "step": 262800 }, { "epoch": 74.59835367584445, "grad_norm": 0.012218253687024117, "learning_rate": 2.5431450468350837e-05, "loss": 1.345910131931305e-05, "step": 262810 }, { "epoch": 74.60119216576781, "grad_norm": 0.030922014266252518, "learning_rate": 2.542861197842748e-05, "loss": 4.2283721268177035e-05, "step": 262820 }, { "epoch": 74.60403065569118, "grad_norm": 0.009161100722849369, "learning_rate": 2.5425773488504117e-05, "loss": 6.359908729791641e-05, "step": 262830 }, { "epoch": 74.60686914561454, "grad_norm": 0.001508289366029203, "learning_rate": 2.5422934998580755e-05, "loss": 1.8591806292533874e-05, "step": 262840 }, { "epoch": 74.60970763553789, "grad_norm": 0.015273502096533775, "learning_rate": 2.5420096508657393e-05, "loss": 2.9781274497509004e-05, "step": 262850 }, { "epoch": 74.61254612546125, "grad_norm": 0.001529933768324554, "learning_rate": 2.5417258018734034e-05, "loss": 3.2442435622215274e-05, "step": 262860 }, { "epoch": 74.61538461538461, "grad_norm": 0.0071279993280768394, "learning_rate": 2.5414419528810672e-05, "loss": 4.9466267228126526e-05, "step": 262870 }, { "epoch": 74.61822310530798, "grad_norm": 0.013009969145059586, "learning_rate": 2.541158103888731e-05, "loss": 9.060222655534744e-05, "step": 262880 }, { "epoch": 74.62106159523134, "grad_norm": 0.06743021309375763, "learning_rate": 2.5408742548963955e-05, "loss": 3.163646906614304e-05, "step": 262890 }, { "epoch": 74.6239000851547, "grad_norm": 0.05597076565027237, "learning_rate": 2.5405904059040593e-05, "loss": 2.3472495377063753e-05, "step": 262900 }, { "epoch": 74.62673857507806, "grad_norm": 0.0570199228823185, "learning_rate": 2.540306556911723e-05, "loss": 3.9264000952243806e-05, "step": 262910 }, { "epoch": 74.62957706500141, "grad_norm": 0.0037743856664747, "learning_rate": 2.5400227079193872e-05, "loss": 2.0543113350868225e-05, "step": 262920 }, { "epoch": 74.63241555492478, "grad_norm": 0.012355453334748745, "learning_rate": 2.539738858927051e-05, "loss": 5.272850394248963e-05, "step": 262930 }, { "epoch": 74.63525404484814, "grad_norm": 0.0014028878649696708, "learning_rate": 2.5394550099347148e-05, "loss": 1.703929156064987e-05, "step": 262940 }, { "epoch": 74.6380925347715, "grad_norm": 0.010979410260915756, "learning_rate": 2.5391711609423786e-05, "loss": 1.5117786824703217e-05, "step": 262950 }, { "epoch": 74.64093102469486, "grad_norm": 0.0007930730935186148, "learning_rate": 2.5388873119500428e-05, "loss": 1.0626763105392456e-05, "step": 262960 }, { "epoch": 74.64376951461823, "grad_norm": 0.006184583064168692, "learning_rate": 2.5386034629577066e-05, "loss": 3.828499466180801e-05, "step": 262970 }, { "epoch": 74.64660800454159, "grad_norm": 0.006093475501984358, "learning_rate": 2.5383196139653704e-05, "loss": 0.00010775905102491378, "step": 262980 }, { "epoch": 74.64944649446494, "grad_norm": 0.0028045738581568003, "learning_rate": 2.5380357649730345e-05, "loss": 1.9553303718566895e-05, "step": 262990 }, { "epoch": 74.6522849843883, "grad_norm": 0.02110123634338379, "learning_rate": 2.5377519159806983e-05, "loss": 0.000126664899289608, "step": 263000 }, { "epoch": 74.6522849843883, "eval_accuracy": 0.9873466013861512, "eval_loss": 0.05245589464902878, "eval_runtime": 35.4299, "eval_samples_per_second": 443.891, "eval_steps_per_second": 6.943, "step": 263000 }, { "epoch": 74.65512347431167, "grad_norm": 0.01605304144322872, "learning_rate": 2.537468066988362e-05, "loss": 7.188376039266587e-05, "step": 263010 }, { "epoch": 74.65796196423503, "grad_norm": 0.004310134798288345, "learning_rate": 2.5371842179960266e-05, "loss": 2.14405357837677e-05, "step": 263020 }, { "epoch": 74.66080045415839, "grad_norm": 0.00795859657227993, "learning_rate": 2.5369003690036904e-05, "loss": 2.1517835557460784e-05, "step": 263030 }, { "epoch": 74.66363894408175, "grad_norm": 0.0013225775910541415, "learning_rate": 2.5366165200113538e-05, "loss": 3.3971108496189115e-05, "step": 263040 }, { "epoch": 74.6664774340051, "grad_norm": 0.001831045956350863, "learning_rate": 2.5363326710190183e-05, "loss": 3.112070262432098e-05, "step": 263050 }, { "epoch": 74.66931592392847, "grad_norm": 0.0011172490194439888, "learning_rate": 2.536048822026682e-05, "loss": 1.4863908290863037e-05, "step": 263060 }, { "epoch": 74.67215441385183, "grad_norm": 0.08533458411693573, "learning_rate": 2.535764973034346e-05, "loss": 3.2787024974823e-05, "step": 263070 }, { "epoch": 74.67499290377519, "grad_norm": 0.0029439900536090136, "learning_rate": 2.5354811240420097e-05, "loss": 1.428648829460144e-05, "step": 263080 }, { "epoch": 74.67783139369855, "grad_norm": 0.0076530929654836655, "learning_rate": 2.535197275049674e-05, "loss": 1.3092160224914551e-05, "step": 263090 }, { "epoch": 74.68066988362192, "grad_norm": 0.007436764892190695, "learning_rate": 2.5349134260573376e-05, "loss": 3.668610006570816e-05, "step": 263100 }, { "epoch": 74.68350837354528, "grad_norm": 0.00023222618619911373, "learning_rate": 2.5346295770650014e-05, "loss": 2.322923392057419e-05, "step": 263110 }, { "epoch": 74.68634686346863, "grad_norm": 0.0006410355563275516, "learning_rate": 2.5343457280726656e-05, "loss": 1.023169606924057e-05, "step": 263120 }, { "epoch": 74.68918535339199, "grad_norm": 0.026438066735863686, "learning_rate": 2.5340618790803294e-05, "loss": 1.778900623321533e-05, "step": 263130 }, { "epoch": 74.69202384331535, "grad_norm": 0.0021558229345828295, "learning_rate": 2.5337780300879932e-05, "loss": 3.345385193824768e-05, "step": 263140 }, { "epoch": 74.69486233323872, "grad_norm": 0.0020199837163090706, "learning_rate": 2.5334941810956576e-05, "loss": 1.6449764370918273e-05, "step": 263150 }, { "epoch": 74.69770082316208, "grad_norm": 0.027785640209913254, "learning_rate": 2.533210332103321e-05, "loss": 2.0195543766021728e-05, "step": 263160 }, { "epoch": 74.70053931308544, "grad_norm": 0.004875251557677984, "learning_rate": 2.532926483110985e-05, "loss": 2.830158919095993e-05, "step": 263170 }, { "epoch": 74.7033778030088, "grad_norm": 0.0136678172275424, "learning_rate": 2.5326426341186487e-05, "loss": 3.2156147062778474e-05, "step": 263180 }, { "epoch": 74.70621629293215, "grad_norm": 0.0027557597495615482, "learning_rate": 2.5323587851263132e-05, "loss": 1.2668035924434663e-05, "step": 263190 }, { "epoch": 74.70905478285552, "grad_norm": 0.0032439783681184053, "learning_rate": 2.532074936133977e-05, "loss": 3.4009851515293124e-05, "step": 263200 }, { "epoch": 74.71189327277888, "grad_norm": 0.0127792377024889, "learning_rate": 2.5317910871416404e-05, "loss": 2.765543758869171e-05, "step": 263210 }, { "epoch": 74.71473176270224, "grad_norm": 0.01606140099465847, "learning_rate": 2.531507238149305e-05, "loss": 3.903694450855255e-05, "step": 263220 }, { "epoch": 74.7175702526256, "grad_norm": 0.003204119158908725, "learning_rate": 2.5312233891569687e-05, "loss": 3.5643577575683594e-05, "step": 263230 }, { "epoch": 74.72040874254897, "grad_norm": 0.001875908114016056, "learning_rate": 2.5309395401646325e-05, "loss": 3.759823739528656e-05, "step": 263240 }, { "epoch": 74.72324723247232, "grad_norm": 0.0021326441783457994, "learning_rate": 2.5306556911722967e-05, "loss": 4.778187721967697e-05, "step": 263250 }, { "epoch": 74.72608572239568, "grad_norm": 0.005919062998145819, "learning_rate": 2.5303718421799604e-05, "loss": 1.255236566066742e-05, "step": 263260 }, { "epoch": 74.72892421231904, "grad_norm": 0.002122822916135192, "learning_rate": 2.5300879931876242e-05, "loss": 8.862800896167756e-05, "step": 263270 }, { "epoch": 74.7317627022424, "grad_norm": 0.038143884390592575, "learning_rate": 2.529804144195288e-05, "loss": 4.1466206312179565e-05, "step": 263280 }, { "epoch": 74.73460119216577, "grad_norm": 0.003091036807745695, "learning_rate": 2.5295202952029522e-05, "loss": 2.9098056256771087e-05, "step": 263290 }, { "epoch": 74.73743968208913, "grad_norm": 0.061767321079969406, "learning_rate": 2.529236446210616e-05, "loss": 2.4537742137908936e-05, "step": 263300 }, { "epoch": 74.7402781720125, "grad_norm": 0.0020460360683500767, "learning_rate": 2.5289525972182798e-05, "loss": 3.724731504917145e-05, "step": 263310 }, { "epoch": 74.74311666193584, "grad_norm": 0.014085185714066029, "learning_rate": 2.5286687482259443e-05, "loss": 2.2246502339839935e-05, "step": 263320 }, { "epoch": 74.7459551518592, "grad_norm": 0.0021079126745462418, "learning_rate": 2.5283848992336077e-05, "loss": 7.011741399765015e-05, "step": 263330 }, { "epoch": 74.74879364178257, "grad_norm": 0.005160616710782051, "learning_rate": 2.5281010502412715e-05, "loss": 4.2183510959148404e-05, "step": 263340 }, { "epoch": 74.75163213170593, "grad_norm": 0.020094191655516624, "learning_rate": 2.527817201248936e-05, "loss": 2.246703952550888e-05, "step": 263350 }, { "epoch": 74.7544706216293, "grad_norm": 0.006669085472822189, "learning_rate": 2.5275333522565998e-05, "loss": 1.9190460443496703e-05, "step": 263360 }, { "epoch": 74.75730911155266, "grad_norm": 0.004179840907454491, "learning_rate": 2.5272495032642636e-05, "loss": 6.624814122915267e-05, "step": 263370 }, { "epoch": 74.76014760147602, "grad_norm": 0.006736247334629297, "learning_rate": 2.526965654271927e-05, "loss": 2.5502406060695648e-05, "step": 263380 }, { "epoch": 74.76298609139937, "grad_norm": 0.0032310595270246267, "learning_rate": 2.5266818052795915e-05, "loss": 2.913139760494232e-05, "step": 263390 }, { "epoch": 74.76582458132273, "grad_norm": 0.003979822620749474, "learning_rate": 2.5263979562872553e-05, "loss": 2.495795488357544e-05, "step": 263400 }, { "epoch": 74.7686630712461, "grad_norm": 0.0033049227204173803, "learning_rate": 2.526114107294919e-05, "loss": 2.617873251438141e-05, "step": 263410 }, { "epoch": 74.77150156116946, "grad_norm": 0.01966002769768238, "learning_rate": 2.5258302583025833e-05, "loss": 2.0231679081916808e-05, "step": 263420 }, { "epoch": 74.77434005109282, "grad_norm": 0.0027461142744868994, "learning_rate": 2.525546409310247e-05, "loss": 3.618989139795303e-05, "step": 263430 }, { "epoch": 74.77717854101618, "grad_norm": 0.021158572286367416, "learning_rate": 2.525262560317911e-05, "loss": 3.654006868600845e-05, "step": 263440 }, { "epoch": 74.78001703093955, "grad_norm": 0.004377278033643961, "learning_rate": 2.5249787113255753e-05, "loss": 1.610312610864639e-05, "step": 263450 }, { "epoch": 74.7828555208629, "grad_norm": 0.0015922533348202705, "learning_rate": 2.5246948623332388e-05, "loss": 4.043951630592346e-05, "step": 263460 }, { "epoch": 74.78569401078626, "grad_norm": 0.002459760755300522, "learning_rate": 2.5244110133409026e-05, "loss": 1.5478767454624177e-05, "step": 263470 }, { "epoch": 74.78853250070962, "grad_norm": 0.003548542270436883, "learning_rate": 2.5241271643485664e-05, "loss": 3.279969096183777e-05, "step": 263480 }, { "epoch": 74.79137099063298, "grad_norm": 0.006592580582946539, "learning_rate": 2.523843315356231e-05, "loss": 2.828482538461685e-05, "step": 263490 }, { "epoch": 74.79420948055635, "grad_norm": 0.0012131306575611234, "learning_rate": 2.5235594663638947e-05, "loss": 2.1352432668209076e-05, "step": 263500 }, { "epoch": 74.79420948055635, "eval_accuracy": 0.9872194315508361, "eval_loss": 0.0532657653093338, "eval_runtime": 35.6564, "eval_samples_per_second": 441.071, "eval_steps_per_second": 6.899, "step": 263500 }, { "epoch": 74.79704797047971, "grad_norm": 0.001863250508904457, "learning_rate": 2.523275617371558e-05, "loss": 3.895312547683716e-05, "step": 263510 }, { "epoch": 74.79988646040306, "grad_norm": 0.0018889672355726361, "learning_rate": 2.5229917683792226e-05, "loss": 2.201814204454422e-05, "step": 263520 }, { "epoch": 74.80272495032642, "grad_norm": 0.02105017751455307, "learning_rate": 2.5227079193868864e-05, "loss": 4.781316965818405e-05, "step": 263530 }, { "epoch": 74.80556344024978, "grad_norm": 0.003946194890886545, "learning_rate": 2.5224240703945502e-05, "loss": 4.032421857118607e-05, "step": 263540 }, { "epoch": 74.80840193017315, "grad_norm": 0.0015905542531982064, "learning_rate": 2.5221402214022143e-05, "loss": 1.9055232405662538e-05, "step": 263550 }, { "epoch": 74.81124042009651, "grad_norm": 0.0034960811026394367, "learning_rate": 2.521856372409878e-05, "loss": 1.3533607125282288e-05, "step": 263560 }, { "epoch": 74.81407891001987, "grad_norm": 0.001684701070189476, "learning_rate": 2.521572523417542e-05, "loss": 9.089335799217224e-06, "step": 263570 }, { "epoch": 74.81691739994324, "grad_norm": 0.00218298495747149, "learning_rate": 2.5212886744252057e-05, "loss": 1.5057437121868133e-05, "step": 263580 }, { "epoch": 74.81975588986658, "grad_norm": 0.002644282067194581, "learning_rate": 2.52100482543287e-05, "loss": 1.5512853860855102e-05, "step": 263590 }, { "epoch": 74.82259437978995, "grad_norm": 0.00709612388163805, "learning_rate": 2.5207209764405337e-05, "loss": 1.2790225446224212e-05, "step": 263600 }, { "epoch": 74.82543286971331, "grad_norm": 0.0005547325708903372, "learning_rate": 2.5204371274481975e-05, "loss": 2.034846693277359e-05, "step": 263610 }, { "epoch": 74.82827135963667, "grad_norm": 0.004877915605902672, "learning_rate": 2.520153278455862e-05, "loss": 2.8233975172042847e-05, "step": 263620 }, { "epoch": 74.83110984956004, "grad_norm": 0.009411581791937351, "learning_rate": 2.5198694294635254e-05, "loss": 0.00019703339785337448, "step": 263630 }, { "epoch": 74.8339483394834, "grad_norm": 0.016870135441422462, "learning_rate": 2.519613965370423e-05, "loss": 0.003328825905919075, "step": 263640 }, { "epoch": 74.83678682940676, "grad_norm": 0.007936110720038414, "learning_rate": 2.519330116378087e-05, "loss": 0.0013862794265151023, "step": 263650 }, { "epoch": 74.83962531933011, "grad_norm": 0.002816863590851426, "learning_rate": 2.5190462673857507e-05, "loss": 0.0026133690029382707, "step": 263660 }, { "epoch": 74.84246380925347, "grad_norm": 0.03405928239226341, "learning_rate": 2.518762418393415e-05, "loss": 0.0003123138099908829, "step": 263670 }, { "epoch": 74.84530229917684, "grad_norm": 0.010891158133745193, "learning_rate": 2.5184785694010786e-05, "loss": 0.0035738013684749603, "step": 263680 }, { "epoch": 74.8481407891002, "grad_norm": 0.01185453962534666, "learning_rate": 2.5181947204087424e-05, "loss": 8.049700409173965e-05, "step": 263690 }, { "epoch": 74.85097927902356, "grad_norm": 0.0036269519478082657, "learning_rate": 2.517910871416407e-05, "loss": 0.0003914188593626022, "step": 263700 }, { "epoch": 74.85381776894693, "grad_norm": 0.011512331664562225, "learning_rate": 2.5176270224240707e-05, "loss": 0.0001460343599319458, "step": 263710 }, { "epoch": 74.85665625887029, "grad_norm": 0.0043488843366503716, "learning_rate": 2.5173431734317345e-05, "loss": 1.9377470016479494e-05, "step": 263720 }, { "epoch": 74.85949474879364, "grad_norm": 0.0021462601143866777, "learning_rate": 2.5170593244393986e-05, "loss": 3.3633038401603696e-05, "step": 263730 }, { "epoch": 74.862333238717, "grad_norm": 0.02018420584499836, "learning_rate": 2.5167754754470624e-05, "loss": 4.961136728525162e-05, "step": 263740 }, { "epoch": 74.86517172864036, "grad_norm": 0.007796843536198139, "learning_rate": 2.5164916264547262e-05, "loss": 0.00010754652321338653, "step": 263750 }, { "epoch": 74.86801021856373, "grad_norm": 0.0031166246626526117, "learning_rate": 2.51620777746239e-05, "loss": 2.7128495275974274e-05, "step": 263760 }, { "epoch": 74.87084870848709, "grad_norm": 0.0028435378335416317, "learning_rate": 2.515923928470054e-05, "loss": 5.364324897527695e-05, "step": 263770 }, { "epoch": 74.87368719841045, "grad_norm": 0.004800813272595406, "learning_rate": 2.515640079477718e-05, "loss": 1.9790790975093842e-05, "step": 263780 }, { "epoch": 74.8765256883338, "grad_norm": 0.00631321407854557, "learning_rate": 2.5153562304853817e-05, "loss": 3.6398693919181826e-05, "step": 263790 }, { "epoch": 74.87936417825716, "grad_norm": 0.006382049061357975, "learning_rate": 2.515072381493046e-05, "loss": 1.3940595090389252e-05, "step": 263800 }, { "epoch": 74.88220266818053, "grad_norm": 0.000603646389208734, "learning_rate": 2.5147885325007097e-05, "loss": 2.63286754488945e-05, "step": 263810 }, { "epoch": 74.88504115810389, "grad_norm": 0.007348811719566584, "learning_rate": 2.5145046835083735e-05, "loss": 2.3265555500984193e-05, "step": 263820 }, { "epoch": 74.88787964802725, "grad_norm": 0.0008262904011644423, "learning_rate": 2.514220834516038e-05, "loss": 2.016425132751465e-05, "step": 263830 }, { "epoch": 74.89071813795061, "grad_norm": 0.005318641662597656, "learning_rate": 2.5139369855237017e-05, "loss": 1.9514746963977813e-05, "step": 263840 }, { "epoch": 74.89355662787398, "grad_norm": 0.019873039796948433, "learning_rate": 2.5136531365313652e-05, "loss": 3.98293137550354e-05, "step": 263850 }, { "epoch": 74.89639511779733, "grad_norm": 0.017581691965460777, "learning_rate": 2.513369287539029e-05, "loss": 4.25390899181366e-05, "step": 263860 }, { "epoch": 74.89923360772069, "grad_norm": 0.0033038360998034477, "learning_rate": 2.5130854385466935e-05, "loss": 1.870095729827881e-05, "step": 263870 }, { "epoch": 74.90207209764405, "grad_norm": 0.004156054463237524, "learning_rate": 2.5128015895543573e-05, "loss": 5.861576646566391e-05, "step": 263880 }, { "epoch": 74.90491058756741, "grad_norm": 0.002039708197116852, "learning_rate": 2.512517740562021e-05, "loss": 6.317775696516038e-05, "step": 263890 }, { "epoch": 74.90774907749078, "grad_norm": 0.0038582556881010532, "learning_rate": 2.5122338915696852e-05, "loss": 2.373885363340378e-05, "step": 263900 }, { "epoch": 74.91058756741414, "grad_norm": 0.026587622240185738, "learning_rate": 2.511950042577349e-05, "loss": 3.104321658611298e-05, "step": 263910 }, { "epoch": 74.9134260573375, "grad_norm": 0.003382294438779354, "learning_rate": 2.5116661935850128e-05, "loss": 5.077887326478958e-05, "step": 263920 }, { "epoch": 74.91626454726085, "grad_norm": 0.0019346550107002258, "learning_rate": 2.511382344592677e-05, "loss": 1.6862712800502778e-05, "step": 263930 }, { "epoch": 74.91910303718421, "grad_norm": 0.0034597464837133884, "learning_rate": 2.5110984956003407e-05, "loss": 4.347749054431915e-05, "step": 263940 }, { "epoch": 74.92194152710758, "grad_norm": 0.00969072338193655, "learning_rate": 2.5108146466080045e-05, "loss": 2.1057575941085817e-05, "step": 263950 }, { "epoch": 74.92478001703094, "grad_norm": 0.0033839845564216375, "learning_rate": 2.5105307976156683e-05, "loss": 2.2599659860134125e-05, "step": 263960 }, { "epoch": 74.9276185069543, "grad_norm": 0.004070873837918043, "learning_rate": 2.5102469486233325e-05, "loss": 2.816244959831238e-05, "step": 263970 }, { "epoch": 74.93045699687767, "grad_norm": 0.009060787037014961, "learning_rate": 2.5099630996309963e-05, "loss": 2.6381202042102812e-05, "step": 263980 }, { "epoch": 74.93329548680101, "grad_norm": 0.06656210124492645, "learning_rate": 2.50967925063866e-05, "loss": 2.4749524891376494e-05, "step": 263990 }, { "epoch": 74.93613397672438, "grad_norm": 0.006848298013210297, "learning_rate": 2.5093954016463246e-05, "loss": 5.307961255311966e-05, "step": 264000 }, { "epoch": 74.93613397672438, "eval_accuracy": 0.9867107522095759, "eval_loss": 0.05821870267391205, "eval_runtime": 35.8492, "eval_samples_per_second": 438.698, "eval_steps_per_second": 6.862, "step": 264000 }, { "epoch": 74.93897246664774, "grad_norm": 0.0019092122092843056, "learning_rate": 2.5091115526539884e-05, "loss": 8.854866027832031e-05, "step": 264010 }, { "epoch": 74.9418109565711, "grad_norm": 0.006947056390345097, "learning_rate": 2.508827703661652e-05, "loss": 2.6032887399196623e-05, "step": 264020 }, { "epoch": 74.94464944649447, "grad_norm": 0.004269746132194996, "learning_rate": 2.5085438546693163e-05, "loss": 0.0003829440101981163, "step": 264030 }, { "epoch": 74.94748793641783, "grad_norm": 4.524357795715332, "learning_rate": 2.50826000567698e-05, "loss": 0.0010606957599520682, "step": 264040 }, { "epoch": 74.95032642634119, "grad_norm": 0.07536736130714417, "learning_rate": 2.507976156684644e-05, "loss": 7.174741476774215e-05, "step": 264050 }, { "epoch": 74.95316491626454, "grad_norm": 0.007602308876812458, "learning_rate": 2.5076923076923077e-05, "loss": 0.0010063625872135163, "step": 264060 }, { "epoch": 74.9560034061879, "grad_norm": 0.09786123037338257, "learning_rate": 2.5074084586999718e-05, "loss": 6.257183849811554e-05, "step": 264070 }, { "epoch": 74.95884189611127, "grad_norm": 0.04223140701651573, "learning_rate": 2.5071246097076356e-05, "loss": 0.00015453435480594635, "step": 264080 }, { "epoch": 74.96168038603463, "grad_norm": 0.0022142089437693357, "learning_rate": 2.5068407607152994e-05, "loss": 2.3672357201576233e-05, "step": 264090 }, { "epoch": 74.96451887595799, "grad_norm": 0.015132863074541092, "learning_rate": 2.5065569117229636e-05, "loss": 0.00021330118179321289, "step": 264100 }, { "epoch": 74.96735736588136, "grad_norm": 0.025890668854117393, "learning_rate": 2.5062730627306274e-05, "loss": 0.0002605469897389412, "step": 264110 }, { "epoch": 74.97019585580472, "grad_norm": 0.14287739992141724, "learning_rate": 2.505989213738291e-05, "loss": 7.816590368747711e-05, "step": 264120 }, { "epoch": 74.97303434572807, "grad_norm": 0.004065782763063908, "learning_rate": 2.5057053647459556e-05, "loss": 0.00010837335139513016, "step": 264130 }, { "epoch": 74.97587283565143, "grad_norm": 0.0043926602229475975, "learning_rate": 2.5054215157536194e-05, "loss": 2.8155744075775146e-05, "step": 264140 }, { "epoch": 74.97871132557479, "grad_norm": 0.007914570160210133, "learning_rate": 2.505137666761283e-05, "loss": 0.00011501256376504898, "step": 264150 }, { "epoch": 74.98154981549816, "grad_norm": 0.0026453377213329077, "learning_rate": 2.5048538177689467e-05, "loss": 6.533302366733551e-05, "step": 264160 }, { "epoch": 74.98438830542152, "grad_norm": 0.0016919095069169998, "learning_rate": 2.504569968776611e-05, "loss": 5.5176392197608945e-05, "step": 264170 }, { "epoch": 74.98722679534488, "grad_norm": 0.00557989114895463, "learning_rate": 2.504286119784275e-05, "loss": 4.062335938215256e-05, "step": 264180 }, { "epoch": 74.99006528526824, "grad_norm": 0.001422625849954784, "learning_rate": 2.5040022707919388e-05, "loss": 0.00014872625470161439, "step": 264190 }, { "epoch": 74.99290377519159, "grad_norm": 0.016024962067604065, "learning_rate": 2.503718421799603e-05, "loss": 4.918333142995834e-05, "step": 264200 }, { "epoch": 74.99574226511496, "grad_norm": 0.0025654048658907413, "learning_rate": 2.5034345728072667e-05, "loss": 0.001407506875693798, "step": 264210 }, { "epoch": 74.99858075503832, "grad_norm": 0.002340245060622692, "learning_rate": 2.5031507238149305e-05, "loss": 0.003607667237520218, "step": 264220 }, { "epoch": 75.00141924496168, "grad_norm": 0.12771841883659363, "learning_rate": 2.5028668748225946e-05, "loss": 0.0001228648005053401, "step": 264230 }, { "epoch": 75.00425773488504, "grad_norm": 0.0020804093219339848, "learning_rate": 2.5025830258302584e-05, "loss": 0.0007938649505376816, "step": 264240 }, { "epoch": 75.00709622480841, "grad_norm": 0.0016081129433587193, "learning_rate": 2.5022991768379222e-05, "loss": 0.004008086770772934, "step": 264250 }, { "epoch": 75.00993471473176, "grad_norm": 0.04115382954478264, "learning_rate": 2.502015327845586e-05, "loss": 0.003194461762905121, "step": 264260 }, { "epoch": 75.01277320465512, "grad_norm": 0.003965567797422409, "learning_rate": 2.50173147885325e-05, "loss": 0.0044534094631671906, "step": 264270 }, { "epoch": 75.01561169457848, "grad_norm": 4.7350921630859375, "learning_rate": 2.501447629860914e-05, "loss": 0.0007097000256180763, "step": 264280 }, { "epoch": 75.01845018450184, "grad_norm": 0.03454846516251564, "learning_rate": 2.5011637808685778e-05, "loss": 0.0005835540592670441, "step": 264290 }, { "epoch": 75.02128867442521, "grad_norm": 0.009599254466593266, "learning_rate": 2.5008799318762422e-05, "loss": 0.0023849474266171454, "step": 264300 }, { "epoch": 75.02412716434857, "grad_norm": 1.4423589706420898, "learning_rate": 2.500596082883906e-05, "loss": 0.0006033828482031822, "step": 264310 }, { "epoch": 75.02696565427193, "grad_norm": 0.03142941743135452, "learning_rate": 2.5003122338915695e-05, "loss": 0.0007456695660948753, "step": 264320 }, { "epoch": 75.02980414419528, "grad_norm": 0.04066045209765434, "learning_rate": 2.500028384899234e-05, "loss": 0.003912574797868729, "step": 264330 }, { "epoch": 75.03264263411864, "grad_norm": 0.7051121592521667, "learning_rate": 2.4997445359068978e-05, "loss": 0.0002552583813667297, "step": 264340 }, { "epoch": 75.03548112404201, "grad_norm": 0.014090167358517647, "learning_rate": 2.4994606869145616e-05, "loss": 0.00011731907725334167, "step": 264350 }, { "epoch": 75.03831961396537, "grad_norm": 0.008642659522593021, "learning_rate": 2.4991768379222254e-05, "loss": 0.00038819126784801484, "step": 264360 }, { "epoch": 75.04115810388873, "grad_norm": 0.0037764578592032194, "learning_rate": 2.4988929889298895e-05, "loss": 5.772765725851059e-05, "step": 264370 }, { "epoch": 75.0439965938121, "grad_norm": 0.009147181175649166, "learning_rate": 2.4986091399375533e-05, "loss": 0.0008979441598057746, "step": 264380 }, { "epoch": 75.04683508373546, "grad_norm": 0.008911887183785439, "learning_rate": 2.4983252909452174e-05, "loss": 7.725004106760026e-05, "step": 264390 }, { "epoch": 75.04967357365881, "grad_norm": 0.1501513421535492, "learning_rate": 2.4980414419528812e-05, "loss": 0.0020598329603672026, "step": 264400 }, { "epoch": 75.05251206358217, "grad_norm": 0.2081843614578247, "learning_rate": 2.497757592960545e-05, "loss": 0.00023884568363428116, "step": 264410 }, { "epoch": 75.05535055350553, "grad_norm": 0.020934803411364555, "learning_rate": 2.4974737439682092e-05, "loss": 9.191799908876419e-05, "step": 264420 }, { "epoch": 75.0581890434289, "grad_norm": 0.0035995652433484793, "learning_rate": 2.497189894975873e-05, "loss": 0.000613982230424881, "step": 264430 }, { "epoch": 75.06102753335226, "grad_norm": 0.0036515684332698584, "learning_rate": 2.4969060459835368e-05, "loss": 0.0001340499147772789, "step": 264440 }, { "epoch": 75.06386602327562, "grad_norm": 0.022315099835395813, "learning_rate": 2.4966221969912006e-05, "loss": 0.0009671125560998916, "step": 264450 }, { "epoch": 75.06670451319899, "grad_norm": 0.02613736316561699, "learning_rate": 2.4963383479988647e-05, "loss": 0.0001241544261574745, "step": 264460 }, { "epoch": 75.06954300312233, "grad_norm": 0.002362308092415333, "learning_rate": 2.496054499006529e-05, "loss": 0.0020448967814445497, "step": 264470 }, { "epoch": 75.0723814930457, "grad_norm": 0.006396125070750713, "learning_rate": 2.4957706500141927e-05, "loss": 8.131656795740128e-05, "step": 264480 }, { "epoch": 75.07521998296906, "grad_norm": 0.008520659990608692, "learning_rate": 2.4954868010218564e-05, "loss": 0.0002076217904686928, "step": 264490 }, { "epoch": 75.07805847289242, "grad_norm": 0.004287525545805693, "learning_rate": 2.4952029520295202e-05, "loss": 0.0013998201116919517, "step": 264500 }, { "epoch": 75.07805847289242, "eval_accuracy": 0.9835950912443568, "eval_loss": 0.06696724891662598, "eval_runtime": 34.9638, "eval_samples_per_second": 449.809, "eval_steps_per_second": 7.036, "step": 264500 }, { "epoch": 75.08089696281579, "grad_norm": 0.010072953999042511, "learning_rate": 2.4949191030371844e-05, "loss": 8.711740374565125e-05, "step": 264510 }, { "epoch": 75.08373545273915, "grad_norm": 3.573160409927368, "learning_rate": 2.4946352540448485e-05, "loss": 0.0017786027863621711, "step": 264520 }, { "epoch": 75.0865739426625, "grad_norm": 0.00368303875438869, "learning_rate": 2.494351405052512e-05, "loss": 3.368519246578216e-05, "step": 264530 }, { "epoch": 75.08941243258586, "grad_norm": 0.0035082404501736164, "learning_rate": 2.494067556060176e-05, "loss": 0.0016895072534680367, "step": 264540 }, { "epoch": 75.09225092250922, "grad_norm": 0.05363570153713226, "learning_rate": 2.49378370706784e-05, "loss": 0.00015647634863853456, "step": 264550 }, { "epoch": 75.09508941243259, "grad_norm": 0.008803211152553558, "learning_rate": 2.493499858075504e-05, "loss": 3.328379243612289e-05, "step": 264560 }, { "epoch": 75.09792790235595, "grad_norm": 0.07845015078783035, "learning_rate": 2.493216009083168e-05, "loss": 0.00021941065788269042, "step": 264570 }, { "epoch": 75.10076639227931, "grad_norm": 0.08583743125200272, "learning_rate": 2.4929321600908317e-05, "loss": 3.0716322362422944e-05, "step": 264580 }, { "epoch": 75.10360488220267, "grad_norm": 0.001655885949730873, "learning_rate": 2.4926483110984958e-05, "loss": 0.0002752136439085007, "step": 264590 }, { "epoch": 75.10644337212602, "grad_norm": 0.010121304541826248, "learning_rate": 2.4923644621061596e-05, "loss": 0.0002748120576143265, "step": 264600 }, { "epoch": 75.10928186204939, "grad_norm": 0.0013226212467998266, "learning_rate": 2.4920806131138237e-05, "loss": 0.0003420768305659294, "step": 264610 }, { "epoch": 75.11212035197275, "grad_norm": 0.003992409445345402, "learning_rate": 2.4917967641214875e-05, "loss": 0.002790331467986107, "step": 264620 }, { "epoch": 75.11495884189611, "grad_norm": 0.005645047407597303, "learning_rate": 2.4915129151291513e-05, "loss": 8.408296853303909e-05, "step": 264630 }, { "epoch": 75.11779733181947, "grad_norm": 0.0009462023735977709, "learning_rate": 2.4912290661368155e-05, "loss": 0.0036205127835273743, "step": 264640 }, { "epoch": 75.12063582174284, "grad_norm": 0.02307259291410446, "learning_rate": 2.4909452171444793e-05, "loss": 8.873287588357925e-05, "step": 264650 }, { "epoch": 75.1234743116662, "grad_norm": 0.07248048484325409, "learning_rate": 2.490661368152143e-05, "loss": 0.000526944175362587, "step": 264660 }, { "epoch": 75.12631280158955, "grad_norm": 0.05904165282845497, "learning_rate": 2.4903775191598072e-05, "loss": 0.0026661574840545653, "step": 264670 }, { "epoch": 75.12915129151291, "grad_norm": 0.005670507438480854, "learning_rate": 2.490093670167471e-05, "loss": 0.0006585324183106423, "step": 264680 }, { "epoch": 75.13198978143627, "grad_norm": 0.0028563039377331734, "learning_rate": 2.489809821175135e-05, "loss": 0.01098138615489006, "step": 264690 }, { "epoch": 75.13482827135964, "grad_norm": 0.1112760677933693, "learning_rate": 2.4895259721827986e-05, "loss": 0.00013556908816099168, "step": 264700 }, { "epoch": 75.137666761283, "grad_norm": 0.007821685634553432, "learning_rate": 2.4892421231904627e-05, "loss": 0.00104575976729393, "step": 264710 }, { "epoch": 75.14050525120636, "grad_norm": 0.005968992132693529, "learning_rate": 2.488958274198127e-05, "loss": 0.00032982658594846723, "step": 264720 }, { "epoch": 75.14334374112971, "grad_norm": 0.004417577758431435, "learning_rate": 2.4886744252057907e-05, "loss": 0.002218068763613701, "step": 264730 }, { "epoch": 75.14618223105307, "grad_norm": 0.018133260309696198, "learning_rate": 2.4883905762134545e-05, "loss": 0.0019510159268975257, "step": 264740 }, { "epoch": 75.14902072097644, "grad_norm": 0.007547490764409304, "learning_rate": 2.4881067272211183e-05, "loss": 0.0002994611859321594, "step": 264750 }, { "epoch": 75.1518592108998, "grad_norm": 0.009469782002270222, "learning_rate": 2.4878228782287824e-05, "loss": 6.426870822906494e-05, "step": 264760 }, { "epoch": 75.15469770082316, "grad_norm": 0.023052381351590157, "learning_rate": 2.4875390292364465e-05, "loss": 0.00014067552983760835, "step": 264770 }, { "epoch": 75.15753619074653, "grad_norm": 0.030120378360152245, "learning_rate": 2.4872551802441103e-05, "loss": 3.2160244882106784e-05, "step": 264780 }, { "epoch": 75.16037468066989, "grad_norm": 0.010063649155199528, "learning_rate": 2.486971331251774e-05, "loss": 9.636171162128449e-05, "step": 264790 }, { "epoch": 75.16321317059324, "grad_norm": 0.043441418558359146, "learning_rate": 2.486687482259438e-05, "loss": 6.373412907123566e-05, "step": 264800 }, { "epoch": 75.1660516605166, "grad_norm": 0.0033035618253052235, "learning_rate": 2.486403633267102e-05, "loss": 4.640519618988037e-05, "step": 264810 }, { "epoch": 75.16889015043996, "grad_norm": 0.0019271522760391235, "learning_rate": 2.4861197842747662e-05, "loss": 9.036790579557419e-05, "step": 264820 }, { "epoch": 75.17172864036333, "grad_norm": 0.0143160754814744, "learning_rate": 2.4858359352824297e-05, "loss": 3.36475670337677e-05, "step": 264830 }, { "epoch": 75.17456713028669, "grad_norm": 0.007042176555842161, "learning_rate": 2.4855520862900938e-05, "loss": 5.553606897592545e-05, "step": 264840 }, { "epoch": 75.17740562021005, "grad_norm": 0.015606226399540901, "learning_rate": 2.4852682372977576e-05, "loss": 3.346148878335953e-05, "step": 264850 }, { "epoch": 75.18024411013342, "grad_norm": 0.015413887798786163, "learning_rate": 2.4849843883054217e-05, "loss": 8.151009678840637e-05, "step": 264860 }, { "epoch": 75.18308260005676, "grad_norm": 0.0028804317116737366, "learning_rate": 2.4847005393130855e-05, "loss": 4.695989191532135e-05, "step": 264870 }, { "epoch": 75.18592108998013, "grad_norm": 0.014143195934593678, "learning_rate": 2.4844166903207493e-05, "loss": 4.1147321462631224e-05, "step": 264880 }, { "epoch": 75.18875957990349, "grad_norm": 0.036028191447257996, "learning_rate": 2.4841328413284135e-05, "loss": 9.173173457384109e-05, "step": 264890 }, { "epoch": 75.19159806982685, "grad_norm": 0.015378902666270733, "learning_rate": 2.4838489923360773e-05, "loss": 3.630369901657105e-05, "step": 264900 }, { "epoch": 75.19443655975022, "grad_norm": 0.007811025250703096, "learning_rate": 2.483565143343741e-05, "loss": 4.806257784366608e-05, "step": 264910 }, { "epoch": 75.19727504967358, "grad_norm": 0.005240852478891611, "learning_rate": 2.4832812943514052e-05, "loss": 6.146654486656189e-05, "step": 264920 }, { "epoch": 75.20011353959694, "grad_norm": 0.026384148746728897, "learning_rate": 2.482997445359069e-05, "loss": 3.5830587148666385e-05, "step": 264930 }, { "epoch": 75.20295202952029, "grad_norm": 0.0010154839837923646, "learning_rate": 2.482713596366733e-05, "loss": 3.42274084687233e-05, "step": 264940 }, { "epoch": 75.20579051944365, "grad_norm": 0.01299804076552391, "learning_rate": 2.482429747374397e-05, "loss": 2.9275938868522643e-05, "step": 264950 }, { "epoch": 75.20862900936702, "grad_norm": 0.010312333703041077, "learning_rate": 2.4821458983820607e-05, "loss": 3.736000508069992e-05, "step": 264960 }, { "epoch": 75.21146749929038, "grad_norm": 0.006441653706133366, "learning_rate": 2.481862049389725e-05, "loss": 2.353079617023468e-05, "step": 264970 }, { "epoch": 75.21430598921374, "grad_norm": 0.019765378907322884, "learning_rate": 2.4815782003973887e-05, "loss": 3.0465610325336457e-05, "step": 264980 }, { "epoch": 75.2171444791371, "grad_norm": 0.0008601847221143544, "learning_rate": 2.4812943514050528e-05, "loss": 1.6734935343265533e-05, "step": 264990 }, { "epoch": 75.21998296906045, "grad_norm": 0.00943543016910553, "learning_rate": 2.4810105024127163e-05, "loss": 0.00012740343809127807, "step": 265000 }, { "epoch": 75.21998296906045, "eval_accuracy": 0.986011318115343, "eval_loss": 0.055050093680620193, "eval_runtime": 35.7259, "eval_samples_per_second": 440.213, "eval_steps_per_second": 6.886, "step": 265000 }, { "epoch": 75.22282145898382, "grad_norm": 0.007155078928917646, "learning_rate": 2.4807266534203804e-05, "loss": 6.146933883428573e-05, "step": 265010 }, { "epoch": 75.22565994890718, "grad_norm": 0.008754266425967216, "learning_rate": 2.4804428044280446e-05, "loss": 4.717353731393814e-05, "step": 265020 }, { "epoch": 75.22849843883054, "grad_norm": 0.0009296353673562407, "learning_rate": 2.4801589554357084e-05, "loss": 2.8936006128787994e-05, "step": 265030 }, { "epoch": 75.2313369287539, "grad_norm": 0.004050164483487606, "learning_rate": 2.479875106443372e-05, "loss": 2.1219998598098754e-05, "step": 265040 }, { "epoch": 75.23417541867727, "grad_norm": 0.0007837331504561007, "learning_rate": 2.479591257451036e-05, "loss": 0.0010336682200431824, "step": 265050 }, { "epoch": 75.23701390860063, "grad_norm": 0.010437887161970139, "learning_rate": 2.4793074084587e-05, "loss": 4.831142723560333e-05, "step": 265060 }, { "epoch": 75.23985239852398, "grad_norm": 0.11212024837732315, "learning_rate": 2.4790235594663642e-05, "loss": 6.499160081148147e-05, "step": 265070 }, { "epoch": 75.24269088844734, "grad_norm": 0.013381054624915123, "learning_rate": 2.478739710474028e-05, "loss": 0.00020045321434736252, "step": 265080 }, { "epoch": 75.2455293783707, "grad_norm": 0.00120843097101897, "learning_rate": 2.4784558614816918e-05, "loss": 0.0001573115587234497, "step": 265090 }, { "epoch": 75.24836786829407, "grad_norm": 0.022802401334047318, "learning_rate": 2.4781720124893556e-05, "loss": 1.9402429461479188e-05, "step": 265100 }, { "epoch": 75.25120635821743, "grad_norm": 0.012122389860451221, "learning_rate": 2.4778881634970198e-05, "loss": 3.194771707057953e-05, "step": 265110 }, { "epoch": 75.2540448481408, "grad_norm": 0.02879955805838108, "learning_rate": 2.4776043145046836e-05, "loss": 0.00010635554790496826, "step": 265120 }, { "epoch": 75.25688333806416, "grad_norm": 0.0022254865616559982, "learning_rate": 2.4773204655123474e-05, "loss": 2.9101409018039705e-05, "step": 265130 }, { "epoch": 75.2597218279875, "grad_norm": 0.03347911685705185, "learning_rate": 2.4770366165200115e-05, "loss": 1.9509904086589812e-05, "step": 265140 }, { "epoch": 75.26256031791087, "grad_norm": 4.790339469909668, "learning_rate": 2.4767527675276753e-05, "loss": 0.001065213605761528, "step": 265150 }, { "epoch": 75.26539880783423, "grad_norm": 0.0787016898393631, "learning_rate": 2.4764689185353394e-05, "loss": 5.298200994729996e-05, "step": 265160 }, { "epoch": 75.2682372977576, "grad_norm": 0.005348565522581339, "learning_rate": 2.4761850695430032e-05, "loss": 0.00010721329599618911, "step": 265170 }, { "epoch": 75.27107578768096, "grad_norm": 0.007674565073102713, "learning_rate": 2.475901220550667e-05, "loss": 0.000139734148979187, "step": 265180 }, { "epoch": 75.27391427760432, "grad_norm": 0.09054815769195557, "learning_rate": 2.475617371558331e-05, "loss": 0.00010278541594743729, "step": 265190 }, { "epoch": 75.27675276752768, "grad_norm": 0.005406536161899567, "learning_rate": 2.475333522565995e-05, "loss": 4.783552139997482e-05, "step": 265200 }, { "epoch": 75.27959125745103, "grad_norm": 0.018385659903287888, "learning_rate": 2.4750496735736588e-05, "loss": 2.252291887998581e-05, "step": 265210 }, { "epoch": 75.2824297473744, "grad_norm": 0.10757183283567429, "learning_rate": 2.474765824581323e-05, "loss": 8.863359689712525e-05, "step": 265220 }, { "epoch": 75.28526823729776, "grad_norm": 0.043668925762176514, "learning_rate": 2.4744819755889867e-05, "loss": 0.00014579035341739656, "step": 265230 }, { "epoch": 75.28810672722112, "grad_norm": 0.005266502965241671, "learning_rate": 2.474198126596651e-05, "loss": 0.00030032098293304444, "step": 265240 }, { "epoch": 75.29094521714448, "grad_norm": 0.2770242989063263, "learning_rate": 2.4739142776043146e-05, "loss": 0.0008665308356285095, "step": 265250 }, { "epoch": 75.29378370706785, "grad_norm": 0.001995956525206566, "learning_rate": 2.4736304286119784e-05, "loss": 4.043746739625931e-05, "step": 265260 }, { "epoch": 75.2966221969912, "grad_norm": 0.012903411872684956, "learning_rate": 2.4733465796196426e-05, "loss": 4.6659447252750394e-05, "step": 265270 }, { "epoch": 75.29946068691456, "grad_norm": 0.0028595176991075277, "learning_rate": 2.4730627306273064e-05, "loss": 9.332895278930664e-05, "step": 265280 }, { "epoch": 75.30229917683792, "grad_norm": 0.16650038957595825, "learning_rate": 2.4727788816349705e-05, "loss": 4.5524165034294126e-05, "step": 265290 }, { "epoch": 75.30513766676128, "grad_norm": 0.004184670280665159, "learning_rate": 2.472495032642634e-05, "loss": 1.785196363925934e-05, "step": 265300 }, { "epoch": 75.30797615668465, "grad_norm": 0.010438719764351845, "learning_rate": 2.472211183650298e-05, "loss": 8.154120296239853e-05, "step": 265310 }, { "epoch": 75.31081464660801, "grad_norm": 0.000556471582967788, "learning_rate": 2.4719273346579622e-05, "loss": 5.2335858345031736e-05, "step": 265320 }, { "epoch": 75.31365313653137, "grad_norm": 0.005008012056350708, "learning_rate": 2.471643485665626e-05, "loss": 2.8801895678043365e-05, "step": 265330 }, { "epoch": 75.31649162645472, "grad_norm": 0.005156711675226688, "learning_rate": 2.47135963667329e-05, "loss": 4.946812987327576e-05, "step": 265340 }, { "epoch": 75.31933011637808, "grad_norm": 0.04784989356994629, "learning_rate": 2.4710757876809536e-05, "loss": 6.233230233192444e-05, "step": 265350 }, { "epoch": 75.32216860630145, "grad_norm": 0.0033656377345323563, "learning_rate": 2.4707919386886178e-05, "loss": 6.041713058948517e-05, "step": 265360 }, { "epoch": 75.32500709622481, "grad_norm": 0.0014882100513204932, "learning_rate": 2.470508089696282e-05, "loss": 2.0280107855796813e-05, "step": 265370 }, { "epoch": 75.32784558614817, "grad_norm": 0.010186800733208656, "learning_rate": 2.4702242407039454e-05, "loss": 6.181783974170684e-05, "step": 265380 }, { "epoch": 75.33068407607153, "grad_norm": 0.002209191443398595, "learning_rate": 2.4699403917116095e-05, "loss": 2.2025592625141143e-05, "step": 265390 }, { "epoch": 75.3335225659949, "grad_norm": 0.0008696388686075807, "learning_rate": 2.4696565427192736e-05, "loss": 2.157166600227356e-05, "step": 265400 }, { "epoch": 75.33636105591825, "grad_norm": 0.0009345423895865679, "learning_rate": 2.4693726937269374e-05, "loss": 2.990458160638809e-05, "step": 265410 }, { "epoch": 75.33919954584161, "grad_norm": 0.001911739120259881, "learning_rate": 2.4690888447346012e-05, "loss": 2.5387480854988097e-05, "step": 265420 }, { "epoch": 75.34203803576497, "grad_norm": 0.009634388610720634, "learning_rate": 2.468804995742265e-05, "loss": 3.361180424690247e-05, "step": 265430 }, { "epoch": 75.34487652568833, "grad_norm": 0.003913936670869589, "learning_rate": 2.4685211467499292e-05, "loss": 1.3712234795093536e-05, "step": 265440 }, { "epoch": 75.3477150156117, "grad_norm": 0.0009705406846478581, "learning_rate": 2.4682372977575933e-05, "loss": 1.86234712600708e-05, "step": 265450 }, { "epoch": 75.35055350553506, "grad_norm": 0.0004125739505980164, "learning_rate": 2.467953448765257e-05, "loss": 1.4943629503250122e-05, "step": 265460 }, { "epoch": 75.35339199545841, "grad_norm": 0.006848564371466637, "learning_rate": 2.467669599772921e-05, "loss": 0.00010987501591444015, "step": 265470 }, { "epoch": 75.35623048538177, "grad_norm": 0.0052061863243579865, "learning_rate": 2.4673857507805847e-05, "loss": 6.619971245527267e-05, "step": 265480 }, { "epoch": 75.35906897530514, "grad_norm": 0.04786271974444389, "learning_rate": 2.467101901788249e-05, "loss": 3.8739107549190524e-05, "step": 265490 }, { "epoch": 75.3619074652285, "grad_norm": 0.0025242632254958153, "learning_rate": 2.466818052795913e-05, "loss": 7.195193320512772e-05, "step": 265500 }, { "epoch": 75.3619074652285, "eval_accuracy": 0.9851211292681376, "eval_loss": 0.05985916033387184, "eval_runtime": 35.9611, "eval_samples_per_second": 437.333, "eval_steps_per_second": 6.841, "step": 265500 }, { "epoch": 75.36474595515186, "grad_norm": 0.01845167763531208, "learning_rate": 2.4665342038035764e-05, "loss": 3.6748871207237246e-05, "step": 265510 }, { "epoch": 75.36758444507522, "grad_norm": 0.020116988569498062, "learning_rate": 2.4662503548112406e-05, "loss": 3.851577639579773e-05, "step": 265520 }, { "epoch": 75.37042293499859, "grad_norm": 0.0054169343784451485, "learning_rate": 2.4659665058189044e-05, "loss": 3.1982921063899995e-05, "step": 265530 }, { "epoch": 75.37326142492194, "grad_norm": 0.0023953767959028482, "learning_rate": 2.4656826568265685e-05, "loss": 2.774316817522049e-05, "step": 265540 }, { "epoch": 75.3760999148453, "grad_norm": 0.006104728672653437, "learning_rate": 2.4653988078342323e-05, "loss": 3.758687525987625e-05, "step": 265550 }, { "epoch": 75.37893840476866, "grad_norm": 0.006923032458871603, "learning_rate": 2.465114958841896e-05, "loss": 9.300746023654938e-05, "step": 265560 }, { "epoch": 75.38177689469202, "grad_norm": 0.011204336769878864, "learning_rate": 2.4648311098495603e-05, "loss": 3.4133344888687135e-05, "step": 265570 }, { "epoch": 75.38461538461539, "grad_norm": 0.011776944622397423, "learning_rate": 2.464547260857224e-05, "loss": 2.246890217065811e-05, "step": 265580 }, { "epoch": 75.38745387453875, "grad_norm": 0.006007109768688679, "learning_rate": 2.464263411864888e-05, "loss": 0.0006697041913866997, "step": 265590 }, { "epoch": 75.39029236446211, "grad_norm": 0.03561611846089363, "learning_rate": 2.463979562872552e-05, "loss": 0.000805281475186348, "step": 265600 }, { "epoch": 75.39313085438546, "grad_norm": 0.011207795701920986, "learning_rate": 2.4636957138802158e-05, "loss": 0.0007886011153459549, "step": 265610 }, { "epoch": 75.39596934430882, "grad_norm": 0.009547390975058079, "learning_rate": 2.46341186488788e-05, "loss": 8.470788598060607e-05, "step": 265620 }, { "epoch": 75.39880783423219, "grad_norm": 0.00705804955214262, "learning_rate": 2.4631280158955437e-05, "loss": 3.8868561387062075e-05, "step": 265630 }, { "epoch": 75.40164632415555, "grad_norm": 0.09248783439397812, "learning_rate": 2.4628441669032075e-05, "loss": 4.914868623018265e-05, "step": 265640 }, { "epoch": 75.40448481407891, "grad_norm": 0.008521215990185738, "learning_rate": 2.4625603179108717e-05, "loss": 7.974840700626373e-05, "step": 265650 }, { "epoch": 75.40732330400228, "grad_norm": 0.015263271518051624, "learning_rate": 2.4622764689185355e-05, "loss": 0.0029522024095058443, "step": 265660 }, { "epoch": 75.41016179392564, "grad_norm": 0.10492800921201706, "learning_rate": 2.4619926199261996e-05, "loss": 0.00013729967176914215, "step": 265670 }, { "epoch": 75.41300028384899, "grad_norm": 0.03630050644278526, "learning_rate": 2.461708770933863e-05, "loss": 3.4716539084911345e-05, "step": 265680 }, { "epoch": 75.41583877377235, "grad_norm": 0.0026670119259506464, "learning_rate": 2.4614249219415272e-05, "loss": 0.00031727589666843416, "step": 265690 }, { "epoch": 75.41867726369571, "grad_norm": 0.10141519457101822, "learning_rate": 2.4611410729491913e-05, "loss": 0.00010945964604616165, "step": 265700 }, { "epoch": 75.42151575361908, "grad_norm": 0.0011858991347253323, "learning_rate": 2.4608856088560887e-05, "loss": 0.008773370832204818, "step": 265710 }, { "epoch": 75.42435424354244, "grad_norm": 0.003941011149436235, "learning_rate": 2.4606017598637528e-05, "loss": 0.0011985322460532188, "step": 265720 }, { "epoch": 75.4271927334658, "grad_norm": 0.027245720848441124, "learning_rate": 2.4603179108714162e-05, "loss": 0.00018493104726076127, "step": 265730 }, { "epoch": 75.43003122338915, "grad_norm": 0.03736017644405365, "learning_rate": 2.4600340618790804e-05, "loss": 0.0003907740116119385, "step": 265740 }, { "epoch": 75.43286971331251, "grad_norm": 0.04983735829591751, "learning_rate": 2.4597502128867445e-05, "loss": 0.00010309759527444839, "step": 265750 }, { "epoch": 75.43570820323588, "grad_norm": 0.03952373191714287, "learning_rate": 2.4594663638944083e-05, "loss": 0.00013019926846027374, "step": 265760 }, { "epoch": 75.43854669315924, "grad_norm": 0.0024414630606770515, "learning_rate": 2.459182514902072e-05, "loss": 0.0002645164728164673, "step": 265770 }, { "epoch": 75.4413851830826, "grad_norm": 0.012633740901947021, "learning_rate": 2.4588986659097363e-05, "loss": 5.6677311658859256e-05, "step": 265780 }, { "epoch": 75.44422367300596, "grad_norm": 0.022136686369776726, "learning_rate": 2.4586148169174e-05, "loss": 0.00028748977929353713, "step": 265790 }, { "epoch": 75.44706216292933, "grad_norm": 0.011735343374311924, "learning_rate": 2.4583309679250642e-05, "loss": 0.00038306619971990586, "step": 265800 }, { "epoch": 75.44990065285268, "grad_norm": 0.000759800139348954, "learning_rate": 2.4580471189327277e-05, "loss": 9.920839220285415e-05, "step": 265810 }, { "epoch": 75.45273914277604, "grad_norm": 0.012433077208697796, "learning_rate": 2.4577632699403918e-05, "loss": 3.3117830753326416e-05, "step": 265820 }, { "epoch": 75.4555776326994, "grad_norm": 0.023530185222625732, "learning_rate": 2.457479420948056e-05, "loss": 4.0617585182189944e-05, "step": 265830 }, { "epoch": 75.45841612262276, "grad_norm": 0.003565141698345542, "learning_rate": 2.4571955719557197e-05, "loss": 6.120707839727402e-05, "step": 265840 }, { "epoch": 75.46125461254613, "grad_norm": 0.0024755129124969244, "learning_rate": 2.4569117229633835e-05, "loss": 3.8393400609493254e-05, "step": 265850 }, { "epoch": 75.46409310246949, "grad_norm": 0.03357122465968132, "learning_rate": 2.4566278739710473e-05, "loss": 5.616433918476105e-05, "step": 265860 }, { "epoch": 75.46693159239285, "grad_norm": 0.0062748733907938, "learning_rate": 2.4563440249787115e-05, "loss": 0.0004211077466607094, "step": 265870 }, { "epoch": 75.4697700823162, "grad_norm": 0.01697366312146187, "learning_rate": 2.4560601759863756e-05, "loss": 0.0016681628301739692, "step": 265880 }, { "epoch": 75.47260857223957, "grad_norm": 0.009963378310203552, "learning_rate": 2.4557763269940394e-05, "loss": 4.200916737318039e-05, "step": 265890 }, { "epoch": 75.47544706216293, "grad_norm": 0.03288717195391655, "learning_rate": 2.4554924780017032e-05, "loss": 3.17474827170372e-05, "step": 265900 }, { "epoch": 75.47828555208629, "grad_norm": 0.0030922614969313145, "learning_rate": 2.455208629009367e-05, "loss": 7.543042302131653e-05, "step": 265910 }, { "epoch": 75.48112404200965, "grad_norm": 0.06102864071726799, "learning_rate": 2.454924780017031e-05, "loss": 6.12078234553337e-05, "step": 265920 }, { "epoch": 75.48396253193302, "grad_norm": 0.0008448145817965269, "learning_rate": 2.4546409310246953e-05, "loss": 7.365085184574127e-05, "step": 265930 }, { "epoch": 75.48680102185637, "grad_norm": 0.050435133278369904, "learning_rate": 2.4543570820323587e-05, "loss": 0.0001939050853252411, "step": 265940 }, { "epoch": 75.48963951177973, "grad_norm": 0.0013185171410441399, "learning_rate": 2.454073233040023e-05, "loss": 9.764786809682847e-05, "step": 265950 }, { "epoch": 75.49247800170309, "grad_norm": 0.03276343271136284, "learning_rate": 2.4537893840476867e-05, "loss": 4.9017556011676786e-05, "step": 265960 }, { "epoch": 75.49531649162645, "grad_norm": 0.004515005275607109, "learning_rate": 2.4535055350553508e-05, "loss": 5.9598684310913085e-05, "step": 265970 }, { "epoch": 75.49815498154982, "grad_norm": 0.008141128346323967, "learning_rate": 2.4532216860630146e-05, "loss": 5.357265472412109e-05, "step": 265980 }, { "epoch": 75.50099347147318, "grad_norm": 0.014764193445444107, "learning_rate": 2.4529378370706784e-05, "loss": 7.28679820895195e-05, "step": 265990 }, { "epoch": 75.50383196139654, "grad_norm": 0.005904674995690584, "learning_rate": 2.4526539880783425e-05, "loss": 1.4668889343738556e-05, "step": 266000 }, { "epoch": 75.50383196139654, "eval_accuracy": 0.9853118840211101, "eval_loss": 0.061051443219184875, "eval_runtime": 36.2017, "eval_samples_per_second": 434.427, "eval_steps_per_second": 6.795, "step": 266000 }, { "epoch": 75.50667045131989, "grad_norm": 0.018691983073949814, "learning_rate": 2.4523701390860063e-05, "loss": 3.811214119195938e-05, "step": 266010 }, { "epoch": 75.50950894124325, "grad_norm": 0.048595160245895386, "learning_rate": 2.45208629009367e-05, "loss": 3.8845837116241456e-05, "step": 266020 }, { "epoch": 75.51234743116662, "grad_norm": 0.004983989521861076, "learning_rate": 2.4518024411013343e-05, "loss": 1.1751241981983184e-05, "step": 266030 }, { "epoch": 75.51518592108998, "grad_norm": 0.009483322501182556, "learning_rate": 2.451518592108998e-05, "loss": 1.9924528896808624e-05, "step": 266040 }, { "epoch": 75.51802441101334, "grad_norm": 0.01626541279256344, "learning_rate": 2.4512347431166622e-05, "loss": 1.724213361740112e-05, "step": 266050 }, { "epoch": 75.5208629009367, "grad_norm": 0.002599108498543501, "learning_rate": 2.450950894124326e-05, "loss": 2.975519746541977e-05, "step": 266060 }, { "epoch": 75.52370139086007, "grad_norm": 0.017827734351158142, "learning_rate": 2.4506670451319898e-05, "loss": 3.377571702003479e-05, "step": 266070 }, { "epoch": 75.52653988078342, "grad_norm": 0.0006439038552343845, "learning_rate": 2.450383196139654e-05, "loss": 3.175176680088043e-05, "step": 266080 }, { "epoch": 75.52937837070678, "grad_norm": 0.03434772044420242, "learning_rate": 2.4500993471473177e-05, "loss": 4.1721388697624207e-05, "step": 266090 }, { "epoch": 75.53221686063014, "grad_norm": 0.05977645888924599, "learning_rate": 2.449815498154982e-05, "loss": 3.726705908775329e-05, "step": 266100 }, { "epoch": 75.5350553505535, "grad_norm": 0.007451475132256746, "learning_rate": 2.4495316491626453e-05, "loss": 1.5814229846000673e-05, "step": 266110 }, { "epoch": 75.53789384047687, "grad_norm": 0.00269610364921391, "learning_rate": 2.4492478001703095e-05, "loss": 1.9856728613376617e-05, "step": 266120 }, { "epoch": 75.54073233040023, "grad_norm": 0.3705885112285614, "learning_rate": 2.4489639511779736e-05, "loss": 8.527114987373352e-05, "step": 266130 }, { "epoch": 75.5435708203236, "grad_norm": 0.005308316554874182, "learning_rate": 2.4486801021856374e-05, "loss": 2.5103799998760223e-05, "step": 266140 }, { "epoch": 75.54640931024694, "grad_norm": 0.021668381989002228, "learning_rate": 2.4483962531933012e-05, "loss": 3.520324826240539e-05, "step": 266150 }, { "epoch": 75.5492478001703, "grad_norm": 0.0031862081959843636, "learning_rate": 2.448112404200965e-05, "loss": 3.3865496516227725e-05, "step": 266160 }, { "epoch": 75.55208629009367, "grad_norm": 0.2901354730129242, "learning_rate": 2.447828555208629e-05, "loss": 4.869904369115829e-05, "step": 266170 }, { "epoch": 75.55492478001703, "grad_norm": 0.007236134260892868, "learning_rate": 2.4475447062162933e-05, "loss": 1.9667297601699828e-05, "step": 266180 }, { "epoch": 75.5577632699404, "grad_norm": 0.005839643068611622, "learning_rate": 2.447260857223957e-05, "loss": 2.2722966969013213e-05, "step": 266190 }, { "epoch": 75.56060175986376, "grad_norm": 0.003326262114569545, "learning_rate": 2.446977008231621e-05, "loss": 2.728700637817383e-05, "step": 266200 }, { "epoch": 75.5634402497871, "grad_norm": 0.004107012879103422, "learning_rate": 2.4466931592392847e-05, "loss": 0.0010577831417322159, "step": 266210 }, { "epoch": 75.56627873971047, "grad_norm": 2.4106743335723877, "learning_rate": 2.4464093102469488e-05, "loss": 0.00047725308686494825, "step": 266220 }, { "epoch": 75.56911722963383, "grad_norm": 0.00017133171786554158, "learning_rate": 2.4461254612546126e-05, "loss": 8.092820644378662e-06, "step": 266230 }, { "epoch": 75.5719557195572, "grad_norm": 0.0033574814442545176, "learning_rate": 2.4458416122622764e-05, "loss": 3.7375278770923616e-05, "step": 266240 }, { "epoch": 75.57479420948056, "grad_norm": 0.011961924843490124, "learning_rate": 2.4455577632699406e-05, "loss": 0.004871893674135208, "step": 266250 }, { "epoch": 75.57763269940392, "grad_norm": 0.00579812191426754, "learning_rate": 2.4452739142776044e-05, "loss": 4.2918138206005096e-05, "step": 266260 }, { "epoch": 75.58047118932728, "grad_norm": 0.00190162833314389, "learning_rate": 2.4449900652852685e-05, "loss": 0.0002993587404489517, "step": 266270 }, { "epoch": 75.58330967925063, "grad_norm": 0.09142577648162842, "learning_rate": 2.4447062162929323e-05, "loss": 0.00039986483752727506, "step": 266280 }, { "epoch": 75.586148169174, "grad_norm": 0.005388717167079449, "learning_rate": 2.444422367300596e-05, "loss": 6.51167705655098e-05, "step": 266290 }, { "epoch": 75.58898665909736, "grad_norm": 0.012071244418621063, "learning_rate": 2.4441385183082602e-05, "loss": 0.00018498972058296205, "step": 266300 }, { "epoch": 75.59182514902072, "grad_norm": 6.52514123916626, "learning_rate": 2.443854669315924e-05, "loss": 0.0008010728284716606, "step": 266310 }, { "epoch": 75.59466363894408, "grad_norm": 0.02456376515328884, "learning_rate": 2.4435708203235878e-05, "loss": 8.634328842163085e-05, "step": 266320 }, { "epoch": 75.59750212886745, "grad_norm": 14.266124725341797, "learning_rate": 2.443286971331252e-05, "loss": 0.00338621512055397, "step": 266330 }, { "epoch": 75.60034061879081, "grad_norm": 19.804759979248047, "learning_rate": 2.4430031223389158e-05, "loss": 0.018966446816921233, "step": 266340 }, { "epoch": 75.60317910871416, "grad_norm": 0.005849967245012522, "learning_rate": 2.44271927334658e-05, "loss": 0.00011151973158121108, "step": 266350 }, { "epoch": 75.60601759863752, "grad_norm": 0.003925866447389126, "learning_rate": 2.4424354243542437e-05, "loss": 2.5472044944763185e-05, "step": 266360 }, { "epoch": 75.60885608856088, "grad_norm": 0.15965910255908966, "learning_rate": 2.4421515753619075e-05, "loss": 8.682049810886383e-05, "step": 266370 }, { "epoch": 75.61169457848425, "grad_norm": 0.007030264008790255, "learning_rate": 2.4418677263695716e-05, "loss": 3.674384206533432e-05, "step": 266380 }, { "epoch": 75.61453306840761, "grad_norm": 0.005437014624476433, "learning_rate": 2.4415838773772354e-05, "loss": 0.00018944144248962403, "step": 266390 }, { "epoch": 75.61737155833097, "grad_norm": 0.011569843627512455, "learning_rate": 2.4413000283848996e-05, "loss": 0.0005323274061083793, "step": 266400 }, { "epoch": 75.62021004825434, "grad_norm": 0.45022228360176086, "learning_rate": 2.441016179392563e-05, "loss": 0.00010713804513216019, "step": 266410 }, { "epoch": 75.62304853817768, "grad_norm": 0.007066155783832073, "learning_rate": 2.440732330400227e-05, "loss": 2.201888710260391e-05, "step": 266420 }, { "epoch": 75.62588702810105, "grad_norm": 0.07198032736778259, "learning_rate": 2.4404484814078913e-05, "loss": 0.0004193853586912155, "step": 266430 }, { "epoch": 75.62872551802441, "grad_norm": 0.04777287691831589, "learning_rate": 2.440164632415555e-05, "loss": 0.00011775195598602295, "step": 266440 }, { "epoch": 75.63156400794777, "grad_norm": 0.08807249367237091, "learning_rate": 2.439880783423219e-05, "loss": 0.0001323424279689789, "step": 266450 }, { "epoch": 75.63440249787114, "grad_norm": 0.0023956107906997204, "learning_rate": 2.4395969344308827e-05, "loss": 8.517894893884659e-05, "step": 266460 }, { "epoch": 75.6372409877945, "grad_norm": 0.09402260929346085, "learning_rate": 2.439313085438547e-05, "loss": 0.0005966002121567726, "step": 266470 }, { "epoch": 75.64007947771785, "grad_norm": 0.06045260652899742, "learning_rate": 2.439029236446211e-05, "loss": 4.655271768569946e-05, "step": 266480 }, { "epoch": 75.64291796764121, "grad_norm": 0.07502672076225281, "learning_rate": 2.4387453874538744e-05, "loss": 0.000553237646818161, "step": 266490 }, { "epoch": 75.64575645756457, "grad_norm": 0.1712312549352646, "learning_rate": 2.4384615384615386e-05, "loss": 0.003723447397351265, "step": 266500 }, { "epoch": 75.64575645756457, "eval_accuracy": 0.9837222610796719, "eval_loss": 0.07254166156053543, "eval_runtime": 35.5655, "eval_samples_per_second": 442.199, "eval_steps_per_second": 6.917, "step": 266500 }, { "epoch": 75.64859494748794, "grad_norm": 0.03890785947442055, "learning_rate": 2.4381776894692024e-05, "loss": 0.00011792462319135666, "step": 266510 }, { "epoch": 75.6514334374113, "grad_norm": 0.000776830012910068, "learning_rate": 2.4378938404768665e-05, "loss": 3.1296908855438235e-05, "step": 266520 }, { "epoch": 75.65427192733466, "grad_norm": 0.030254622921347618, "learning_rate": 2.4376099914845303e-05, "loss": 8.975937962532043e-05, "step": 266530 }, { "epoch": 75.65711041725802, "grad_norm": 0.06359956413507462, "learning_rate": 2.437326142492194e-05, "loss": 3.648903220891953e-05, "step": 266540 }, { "epoch": 75.65994890718137, "grad_norm": 0.0017056994838640094, "learning_rate": 2.4370422934998582e-05, "loss": 3.9404816925525664e-05, "step": 266550 }, { "epoch": 75.66278739710474, "grad_norm": 0.0025729387998580933, "learning_rate": 2.436758444507522e-05, "loss": 4.77990135550499e-05, "step": 266560 }, { "epoch": 75.6656258870281, "grad_norm": 0.012150534428656101, "learning_rate": 2.4364745955151862e-05, "loss": 5.848873406648636e-05, "step": 266570 }, { "epoch": 75.66846437695146, "grad_norm": 0.007897076196968555, "learning_rate": 2.43619074652285e-05, "loss": 4.778485745191574e-05, "step": 266580 }, { "epoch": 75.67130286687483, "grad_norm": 0.022384565323591232, "learning_rate": 2.4359068975305138e-05, "loss": 3.463774919509888e-05, "step": 266590 }, { "epoch": 75.67414135679819, "grad_norm": 0.003007178660482168, "learning_rate": 2.435623048538178e-05, "loss": 3.287196159362793e-05, "step": 266600 }, { "epoch": 75.67697984672155, "grad_norm": 0.030290622264146805, "learning_rate": 2.4353391995458417e-05, "loss": 2.3207999765872957e-05, "step": 266610 }, { "epoch": 75.6798183366449, "grad_norm": 0.0014420708175748587, "learning_rate": 2.4350553505535055e-05, "loss": 1.7835944890975953e-05, "step": 266620 }, { "epoch": 75.68265682656826, "grad_norm": 0.004606684669852257, "learning_rate": 2.4347715015611696e-05, "loss": 1.7600134015083313e-05, "step": 266630 }, { "epoch": 75.68549531649163, "grad_norm": 0.004772561136633158, "learning_rate": 2.4344876525688334e-05, "loss": 1.8069148063659667e-05, "step": 266640 }, { "epoch": 75.68833380641499, "grad_norm": 0.00705950241535902, "learning_rate": 2.4342038035764976e-05, "loss": 2.1545402705669404e-05, "step": 266650 }, { "epoch": 75.69117229633835, "grad_norm": 0.002784017939120531, "learning_rate": 2.4339199545841614e-05, "loss": 2.6074983179569244e-05, "step": 266660 }, { "epoch": 75.69401078626171, "grad_norm": 0.004241444636136293, "learning_rate": 2.4336361055918252e-05, "loss": 2.249479293823242e-05, "step": 266670 }, { "epoch": 75.69684927618506, "grad_norm": 0.0048109181225299835, "learning_rate": 2.4333522565994893e-05, "loss": 0.00013936199247837066, "step": 266680 }, { "epoch": 75.69968776610843, "grad_norm": 0.00038574033533222973, "learning_rate": 2.433068407607153e-05, "loss": 0.00027682166546583176, "step": 266690 }, { "epoch": 75.70252625603179, "grad_norm": 0.02254292368888855, "learning_rate": 2.432784558614817e-05, "loss": 9.0746209025383e-05, "step": 266700 }, { "epoch": 75.70536474595515, "grad_norm": 0.01624121330678463, "learning_rate": 2.4325007096224807e-05, "loss": 3.616269677877426e-05, "step": 266710 }, { "epoch": 75.70820323587851, "grad_norm": 0.003833223134279251, "learning_rate": 2.432216860630145e-05, "loss": 3.297906368970871e-05, "step": 266720 }, { "epoch": 75.71104172580188, "grad_norm": 0.002977067371830344, "learning_rate": 2.431933011637809e-05, "loss": 2.9066950082778932e-05, "step": 266730 }, { "epoch": 75.71388021572524, "grad_norm": 0.0013214136706665158, "learning_rate": 2.4316491626454728e-05, "loss": 5.8696605265140533e-05, "step": 266740 }, { "epoch": 75.71671870564859, "grad_norm": 0.00597646739333868, "learning_rate": 2.4313653136531366e-05, "loss": 3.371983766555786e-05, "step": 266750 }, { "epoch": 75.71955719557195, "grad_norm": 0.0023230358492583036, "learning_rate": 2.4310814646608004e-05, "loss": 1.2703798711299897e-05, "step": 266760 }, { "epoch": 75.72239568549531, "grad_norm": 0.17442725598812103, "learning_rate": 2.4307976156684645e-05, "loss": 0.0009780526161193848, "step": 266770 }, { "epoch": 75.72523417541868, "grad_norm": 0.08130190521478653, "learning_rate": 2.4305137666761287e-05, "loss": 0.00015308409929275512, "step": 266780 }, { "epoch": 75.72807266534204, "grad_norm": 0.016958126798272133, "learning_rate": 2.430229917683792e-05, "loss": 0.0004129938781261444, "step": 266790 }, { "epoch": 75.7309111552654, "grad_norm": 0.002669412875548005, "learning_rate": 2.4299460686914563e-05, "loss": 9.439326822757721e-05, "step": 266800 }, { "epoch": 75.73374964518877, "grad_norm": 0.0036232525017112494, "learning_rate": 2.42966221969912e-05, "loss": 0.00017589423805475235, "step": 266810 }, { "epoch": 75.73658813511211, "grad_norm": 0.0020311346743255854, "learning_rate": 2.4293783707067842e-05, "loss": 0.00016358606517314912, "step": 266820 }, { "epoch": 75.73942662503548, "grad_norm": 0.027887754142284393, "learning_rate": 2.429094521714448e-05, "loss": 4.9031712114810945e-05, "step": 266830 }, { "epoch": 75.74226511495884, "grad_norm": 0.040228743106126785, "learning_rate": 2.4288106727221118e-05, "loss": 6.842538714408874e-05, "step": 266840 }, { "epoch": 75.7451036048822, "grad_norm": 0.0014802237274125218, "learning_rate": 2.428526823729776e-05, "loss": 0.00017151348292827607, "step": 266850 }, { "epoch": 75.74794209480557, "grad_norm": 0.025813018903136253, "learning_rate": 2.4282429747374397e-05, "loss": 4.395265132188797e-05, "step": 266860 }, { "epoch": 75.75078058472893, "grad_norm": 0.024692906066775322, "learning_rate": 2.427959125745104e-05, "loss": 4.129447042942047e-05, "step": 266870 }, { "epoch": 75.75361907465229, "grad_norm": 0.003907546866685152, "learning_rate": 2.4276752767527677e-05, "loss": 4.8862770199775696e-05, "step": 266880 }, { "epoch": 75.75645756457564, "grad_norm": 0.007144210394471884, "learning_rate": 2.4273914277604315e-05, "loss": 3.0215829610824585e-05, "step": 266890 }, { "epoch": 75.759296054499, "grad_norm": 0.0015469681238755584, "learning_rate": 2.4271075787680956e-05, "loss": 2.348553389310837e-05, "step": 266900 }, { "epoch": 75.76213454442237, "grad_norm": 0.012722620740532875, "learning_rate": 2.4268237297757594e-05, "loss": 6.827954202890397e-05, "step": 266910 }, { "epoch": 75.76497303434573, "grad_norm": 0.0017851627198979259, "learning_rate": 2.4265398807834232e-05, "loss": 2.660490572452545e-05, "step": 266920 }, { "epoch": 75.76781152426909, "grad_norm": 0.003375664120540023, "learning_rate": 2.4262560317910873e-05, "loss": 1.8235668540000917e-05, "step": 266930 }, { "epoch": 75.77065001419246, "grad_norm": 0.005746066104620695, "learning_rate": 2.425972182798751e-05, "loss": 3.647040575742722e-05, "step": 266940 }, { "epoch": 75.7734885041158, "grad_norm": 0.009613899514079094, "learning_rate": 2.4256883338064153e-05, "loss": 2.7197971940040587e-05, "step": 266950 }, { "epoch": 75.77632699403917, "grad_norm": 0.0030926100444048643, "learning_rate": 2.4254044848140787e-05, "loss": 0.00039370860904455187, "step": 266960 }, { "epoch": 75.77916548396253, "grad_norm": 11.654796600341797, "learning_rate": 2.425120635821743e-05, "loss": 0.002887924388051033, "step": 266970 }, { "epoch": 75.78200397388589, "grad_norm": 0.02050977200269699, "learning_rate": 2.424836786829407e-05, "loss": 4.811659455299378e-05, "step": 266980 }, { "epoch": 75.78484246380926, "grad_norm": 0.06511954218149185, "learning_rate": 2.4245529378370708e-05, "loss": 4.694424569606781e-05, "step": 266990 }, { "epoch": 75.78768095373262, "grad_norm": 0.003937454894185066, "learning_rate": 2.4242690888447346e-05, "loss": 0.0002169949933886528, "step": 267000 }, { "epoch": 75.78768095373262, "eval_accuracy": 0.9832771666560692, "eval_loss": 0.06780792772769928, "eval_runtime": 35.666, "eval_samples_per_second": 440.952, "eval_steps_per_second": 6.897, "step": 267000 }, { "epoch": 75.79051944365598, "grad_norm": 0.0586620569229126, "learning_rate": 2.4239852398523984e-05, "loss": 4.0436722338199614e-05, "step": 267010 }, { "epoch": 75.79335793357933, "grad_norm": 0.007115322630852461, "learning_rate": 2.4237013908600625e-05, "loss": 6.884504109621048e-05, "step": 267020 }, { "epoch": 75.79619642350269, "grad_norm": 0.0013985962141305208, "learning_rate": 2.4234175418677267e-05, "loss": 0.0002624373883008957, "step": 267030 }, { "epoch": 75.79903491342606, "grad_norm": 0.024547526612877846, "learning_rate": 2.4231336928753905e-05, "loss": 0.004240916296839714, "step": 267040 }, { "epoch": 75.80187340334942, "grad_norm": 0.002493110951036215, "learning_rate": 2.4228498438830543e-05, "loss": 0.0029948927462100984, "step": 267050 }, { "epoch": 75.80471189327278, "grad_norm": 1.4071146249771118, "learning_rate": 2.422565994890718e-05, "loss": 0.0028067400678992273, "step": 267060 }, { "epoch": 75.80755038319614, "grad_norm": 0.007182986009865999, "learning_rate": 2.4222821458983822e-05, "loss": 0.0038061171770095827, "step": 267070 }, { "epoch": 75.8103888731195, "grad_norm": 0.00812749844044447, "learning_rate": 2.4219982969060463e-05, "loss": 0.006090797483921051, "step": 267080 }, { "epoch": 75.81322736304286, "grad_norm": 0.050624921917915344, "learning_rate": 2.4217144479137098e-05, "loss": 5.2523240447044374e-05, "step": 267090 }, { "epoch": 75.81606585296622, "grad_norm": 11.36500358581543, "learning_rate": 2.421430598921374e-05, "loss": 0.0010767152532935142, "step": 267100 }, { "epoch": 75.81890434288958, "grad_norm": 0.0018284342950209975, "learning_rate": 2.421146749929038e-05, "loss": 0.001549774780869484, "step": 267110 }, { "epoch": 75.82174283281294, "grad_norm": 0.022225243970751762, "learning_rate": 2.420862900936702e-05, "loss": 1.9786879420280457e-05, "step": 267120 }, { "epoch": 75.82458132273631, "grad_norm": 0.0023713423870503902, "learning_rate": 2.4205790519443657e-05, "loss": 0.00017648190259933472, "step": 267130 }, { "epoch": 75.82741981265967, "grad_norm": 0.0031418721191585064, "learning_rate": 2.4202952029520295e-05, "loss": 6.352812051773072e-05, "step": 267140 }, { "epoch": 75.83025830258302, "grad_norm": 0.0040947082452476025, "learning_rate": 2.4200113539596936e-05, "loss": 0.0001913744956254959, "step": 267150 }, { "epoch": 75.83309679250638, "grad_norm": 0.0030380075331777334, "learning_rate": 2.4197275049673578e-05, "loss": 0.002417224273085594, "step": 267160 }, { "epoch": 75.83593528242974, "grad_norm": 0.04353579133749008, "learning_rate": 2.4194436559750212e-05, "loss": 5.924161523580551e-05, "step": 267170 }, { "epoch": 75.83877377235311, "grad_norm": 0.01477645430713892, "learning_rate": 2.419188191881919e-05, "loss": 0.009441088885068893, "step": 267180 }, { "epoch": 75.84161226227647, "grad_norm": 0.007495791185647249, "learning_rate": 2.4189043428895827e-05, "loss": 3.133472055196762e-05, "step": 267190 }, { "epoch": 75.84445075219983, "grad_norm": 0.07643265277147293, "learning_rate": 2.4186204938972468e-05, "loss": 6.2493234872818e-05, "step": 267200 }, { "epoch": 75.8472892421232, "grad_norm": 3.4767589569091797, "learning_rate": 2.418336644904911e-05, "loss": 0.0005276283249258995, "step": 267210 }, { "epoch": 75.85012773204654, "grad_norm": 0.7684476375579834, "learning_rate": 2.4180527959125744e-05, "loss": 0.00023531820625066757, "step": 267220 }, { "epoch": 75.85296622196991, "grad_norm": 0.16707845032215118, "learning_rate": 2.4177689469202385e-05, "loss": 0.0001971593126654625, "step": 267230 }, { "epoch": 75.85580471189327, "grad_norm": 0.18921683728694916, "learning_rate": 2.4174850979279023e-05, "loss": 0.00013774093240499498, "step": 267240 }, { "epoch": 75.85864320181663, "grad_norm": 0.019507132470607758, "learning_rate": 2.4172012489355665e-05, "loss": 0.0009211773052811622, "step": 267250 }, { "epoch": 75.86148169174, "grad_norm": 0.0005258637829683721, "learning_rate": 2.4169173999432303e-05, "loss": 0.008728303015232086, "step": 267260 }, { "epoch": 75.86432018166336, "grad_norm": 0.027403220534324646, "learning_rate": 2.416633550950894e-05, "loss": 0.00014980155974626542, "step": 267270 }, { "epoch": 75.86715867158672, "grad_norm": 0.00549418805167079, "learning_rate": 2.4163497019585582e-05, "loss": 0.0001684403046965599, "step": 267280 }, { "epoch": 75.86999716151007, "grad_norm": 0.0760556161403656, "learning_rate": 2.416065852966222e-05, "loss": 4.4877268373966216e-05, "step": 267290 }, { "epoch": 75.87283565143343, "grad_norm": 0.05645966902375221, "learning_rate": 2.415782003973886e-05, "loss": 4.532914608716965e-05, "step": 267300 }, { "epoch": 75.8756741413568, "grad_norm": 0.012145765125751495, "learning_rate": 2.41549815498155e-05, "loss": 3.2498128712177275e-05, "step": 267310 }, { "epoch": 75.87851263128016, "grad_norm": 0.02183924801647663, "learning_rate": 2.4152143059892137e-05, "loss": 8.097998797893525e-05, "step": 267320 }, { "epoch": 75.88135112120352, "grad_norm": 0.0034671956673264503, "learning_rate": 2.414930456996878e-05, "loss": 5.252454429864884e-05, "step": 267330 }, { "epoch": 75.88418961112689, "grad_norm": 0.02356666512787342, "learning_rate": 2.4146466080045417e-05, "loss": 3.145243972539902e-05, "step": 267340 }, { "epoch": 75.88702810105025, "grad_norm": 0.005552166607230902, "learning_rate": 2.4143627590122055e-05, "loss": 7.895100861787796e-05, "step": 267350 }, { "epoch": 75.8898665909736, "grad_norm": 0.06718844175338745, "learning_rate": 2.4140789100198696e-05, "loss": 5.1095522940158845e-05, "step": 267360 }, { "epoch": 75.89270508089696, "grad_norm": 0.003187594236806035, "learning_rate": 2.4137950610275334e-05, "loss": 0.0003950875252485275, "step": 267370 }, { "epoch": 75.89554357082032, "grad_norm": 0.016028717160224915, "learning_rate": 2.4135112120351976e-05, "loss": 0.00017640534788370131, "step": 267380 }, { "epoch": 75.89838206074369, "grad_norm": 0.004325282759964466, "learning_rate": 2.413227363042861e-05, "loss": 0.0006828064098954201, "step": 267390 }, { "epoch": 75.90122055066705, "grad_norm": 0.15384399890899658, "learning_rate": 2.412943514050525e-05, "loss": 0.005887992680072784, "step": 267400 }, { "epoch": 75.90405904059041, "grad_norm": 0.06187831237912178, "learning_rate": 2.4126596650581893e-05, "loss": 9.547043591737748e-05, "step": 267410 }, { "epoch": 75.90689753051376, "grad_norm": 0.028852136805653572, "learning_rate": 2.412375816065853e-05, "loss": 0.0030895108357071877, "step": 267420 }, { "epoch": 75.90973602043712, "grad_norm": 0.008672240190207958, "learning_rate": 2.412091967073517e-05, "loss": 0.00018656477332115173, "step": 267430 }, { "epoch": 75.91257451036049, "grad_norm": 0.0303290244191885, "learning_rate": 2.4118081180811807e-05, "loss": 7.44270160794258e-05, "step": 267440 }, { "epoch": 75.91541300028385, "grad_norm": 0.012160705402493477, "learning_rate": 2.4115242690888448e-05, "loss": 0.004313748329877853, "step": 267450 }, { "epoch": 75.91825149020721, "grad_norm": 0.022529790177941322, "learning_rate": 2.411240420096509e-05, "loss": 0.0009216593578457832, "step": 267460 }, { "epoch": 75.92108998013057, "grad_norm": 0.0018429403426125646, "learning_rate": 2.4109565711041728e-05, "loss": 4.0646828711032865e-05, "step": 267470 }, { "epoch": 75.92392847005394, "grad_norm": 0.017024308443069458, "learning_rate": 2.4106727221118366e-05, "loss": 0.00017393808811903, "step": 267480 }, { "epoch": 75.92676695997729, "grad_norm": 0.007733451668173075, "learning_rate": 2.4103888731195007e-05, "loss": 0.0010996831580996514, "step": 267490 }, { "epoch": 75.92960544990065, "grad_norm": 0.018558554351329803, "learning_rate": 2.4101050241271645e-05, "loss": 4.87281009554863e-05, "step": 267500 }, { "epoch": 75.92960544990065, "eval_accuracy": 0.9846760348445349, "eval_loss": 0.06296966969966888, "eval_runtime": 35.565, "eval_samples_per_second": 442.204, "eval_steps_per_second": 6.917, "step": 267500 }, { "epoch": 75.93244393982401, "grad_norm": 0.004625377710908651, "learning_rate": 2.4098211751348286e-05, "loss": 0.00016657691448926926, "step": 267510 }, { "epoch": 75.93528242974737, "grad_norm": 0.03266195207834244, "learning_rate": 2.409537326142492e-05, "loss": 9.481329470872879e-05, "step": 267520 }, { "epoch": 75.93812091967074, "grad_norm": 0.11873200535774231, "learning_rate": 2.4092534771501562e-05, "loss": 0.0004579391330480576, "step": 267530 }, { "epoch": 75.9409594095941, "grad_norm": 0.010703971609473228, "learning_rate": 2.4089696281578204e-05, "loss": 4.4607371091842654e-05, "step": 267540 }, { "epoch": 75.94379789951746, "grad_norm": 0.010389323346316814, "learning_rate": 2.408685779165484e-05, "loss": 0.0006732683628797531, "step": 267550 }, { "epoch": 75.94663638944081, "grad_norm": 0.0039005083963274956, "learning_rate": 2.408401930173148e-05, "loss": 0.0004418499767780304, "step": 267560 }, { "epoch": 75.94947487936417, "grad_norm": 0.061584871262311935, "learning_rate": 2.4081180811808118e-05, "loss": 0.0005676189437508583, "step": 267570 }, { "epoch": 75.95231336928754, "grad_norm": 0.00390587723813951, "learning_rate": 2.407834232188476e-05, "loss": 0.00030297953635454177, "step": 267580 }, { "epoch": 75.9551518592109, "grad_norm": 0.19058352708816528, "learning_rate": 2.40755038319614e-05, "loss": 0.00018126443028450013, "step": 267590 }, { "epoch": 75.95799034913426, "grad_norm": 0.003702235408127308, "learning_rate": 2.4072665342038035e-05, "loss": 0.00014908108860254288, "step": 267600 }, { "epoch": 75.96082883905763, "grad_norm": 0.00504331337288022, "learning_rate": 2.4069826852114676e-05, "loss": 7.8473798930645e-05, "step": 267610 }, { "epoch": 75.96366732898099, "grad_norm": 0.008029392920434475, "learning_rate": 2.4066988362191314e-05, "loss": 6.557051092386246e-05, "step": 267620 }, { "epoch": 75.96650581890434, "grad_norm": 0.1699458658695221, "learning_rate": 2.4064149872267956e-05, "loss": 8.349679410457611e-05, "step": 267630 }, { "epoch": 75.9693443088277, "grad_norm": 0.010655646212399006, "learning_rate": 2.4061311382344594e-05, "loss": 5.691312253475189e-05, "step": 267640 }, { "epoch": 75.97218279875106, "grad_norm": 0.0030325623229146004, "learning_rate": 2.405847289242123e-05, "loss": 6.374605000019074e-05, "step": 267650 }, { "epoch": 75.97502128867443, "grad_norm": 0.016109423711895943, "learning_rate": 2.4055634402497873e-05, "loss": 3.932733088731766e-05, "step": 267660 }, { "epoch": 75.97785977859779, "grad_norm": 0.0023073451593518257, "learning_rate": 2.405279591257451e-05, "loss": 0.00040111541748046877, "step": 267670 }, { "epoch": 75.98069826852115, "grad_norm": 0.06393657624721527, "learning_rate": 2.4049957422651152e-05, "loss": 0.0009070083498954773, "step": 267680 }, { "epoch": 75.9835367584445, "grad_norm": 0.04104577749967575, "learning_rate": 2.404711893272779e-05, "loss": 0.000160251185297966, "step": 267690 }, { "epoch": 75.98637524836786, "grad_norm": 0.23130980134010315, "learning_rate": 2.404428044280443e-05, "loss": 0.0007076440379023552, "step": 267700 }, { "epoch": 75.98921373829123, "grad_norm": 0.2004590630531311, "learning_rate": 2.404144195288107e-05, "loss": 0.00018083564937114714, "step": 267710 }, { "epoch": 75.99205222821459, "grad_norm": 0.03970911353826523, "learning_rate": 2.4038603462957708e-05, "loss": 0.0009061861783266068, "step": 267720 }, { "epoch": 75.99489071813795, "grad_norm": 0.00474888551980257, "learning_rate": 2.4035764973034346e-05, "loss": 3.346558660268784e-05, "step": 267730 }, { "epoch": 75.99772920806132, "grad_norm": 0.004232160747051239, "learning_rate": 2.4032926483110987e-05, "loss": 3.0300207436084746e-05, "step": 267740 }, { "epoch": 76.00056769798468, "grad_norm": 0.1404624730348587, "learning_rate": 2.4030087993187625e-05, "loss": 9.214869351126254e-05, "step": 267750 }, { "epoch": 76.00340618790803, "grad_norm": 0.007183436769992113, "learning_rate": 2.4027249503264266e-05, "loss": 3.7604570388793944e-05, "step": 267760 }, { "epoch": 76.00624467783139, "grad_norm": 0.0016905664233490825, "learning_rate": 2.4024411013340904e-05, "loss": 9.038429707288742e-05, "step": 267770 }, { "epoch": 76.00908316775475, "grad_norm": 0.005521358922123909, "learning_rate": 2.4021572523417542e-05, "loss": 3.617722541093826e-05, "step": 267780 }, { "epoch": 76.01192165767812, "grad_norm": 0.8601905703544617, "learning_rate": 2.4018734033494184e-05, "loss": 0.00011896342039108276, "step": 267790 }, { "epoch": 76.01476014760148, "grad_norm": 0.009960722178220749, "learning_rate": 2.4015895543570822e-05, "loss": 1.6148202121257782e-05, "step": 267800 }, { "epoch": 76.01759863752484, "grad_norm": 0.1185631975531578, "learning_rate": 2.401305705364746e-05, "loss": 8.527860045433044e-05, "step": 267810 }, { "epoch": 76.0204371274482, "grad_norm": 0.0014911583857610822, "learning_rate": 2.4010218563724098e-05, "loss": 0.0001184862107038498, "step": 267820 }, { "epoch": 76.02327561737155, "grad_norm": 0.0009232297306880355, "learning_rate": 2.400738007380074e-05, "loss": 3.112778067588806e-05, "step": 267830 }, { "epoch": 76.02611410729492, "grad_norm": 0.01733594946563244, "learning_rate": 2.400454158387738e-05, "loss": 3.642980009317398e-05, "step": 267840 }, { "epoch": 76.02895259721828, "grad_norm": 0.0006621201173402369, "learning_rate": 2.400170309395402e-05, "loss": 5.2347779273986816e-05, "step": 267850 }, { "epoch": 76.03179108714164, "grad_norm": 0.0007596213254146278, "learning_rate": 2.3998864604030656e-05, "loss": 1.4739483594894409e-05, "step": 267860 }, { "epoch": 76.034629577065, "grad_norm": 0.03564734011888504, "learning_rate": 2.3996026114107294e-05, "loss": 6.399434059858322e-05, "step": 267870 }, { "epoch": 76.03746806698837, "grad_norm": 0.031815726310014725, "learning_rate": 2.3993187624183936e-05, "loss": 3.420431166887283e-05, "step": 267880 }, { "epoch": 76.04030655691172, "grad_norm": 0.0027346815913915634, "learning_rate": 2.3990349134260577e-05, "loss": 6.593335419893264e-05, "step": 267890 }, { "epoch": 76.04314504683508, "grad_norm": 0.025059601292014122, "learning_rate": 2.3987510644337212e-05, "loss": 2.540815621614456e-05, "step": 267900 }, { "epoch": 76.04598353675844, "grad_norm": 0.008023502305150032, "learning_rate": 2.3984672154413853e-05, "loss": 3.1886249780654906e-05, "step": 267910 }, { "epoch": 76.0488220266818, "grad_norm": 0.0018459747079759836, "learning_rate": 2.398183366449049e-05, "loss": 2.2337958216667176e-05, "step": 267920 }, { "epoch": 76.05166051660517, "grad_norm": 0.002058825921267271, "learning_rate": 2.3978995174567133e-05, "loss": 6.8662129342556e-05, "step": 267930 }, { "epoch": 76.05449900652853, "grad_norm": 0.07618125528097153, "learning_rate": 2.397615668464377e-05, "loss": 0.00011191684752702713, "step": 267940 }, { "epoch": 76.0573374964519, "grad_norm": 0.009737367741763592, "learning_rate": 2.397331819472041e-05, "loss": 0.00019629299640655518, "step": 267950 }, { "epoch": 76.06017598637524, "grad_norm": 0.027592021971940994, "learning_rate": 2.397047970479705e-05, "loss": 4.549603909254074e-05, "step": 267960 }, { "epoch": 76.0630144762986, "grad_norm": 0.8052910566329956, "learning_rate": 2.3967641214873688e-05, "loss": 0.00017586275935173036, "step": 267970 }, { "epoch": 76.06585296622197, "grad_norm": 0.007371912244707346, "learning_rate": 2.396480272495033e-05, "loss": 7.395222783088685e-05, "step": 267980 }, { "epoch": 76.06869145614533, "grad_norm": 0.0014383804518729448, "learning_rate": 2.3961964235026967e-05, "loss": 0.0004139423370361328, "step": 267990 }, { "epoch": 76.0715299460687, "grad_norm": 0.015305754728615284, "learning_rate": 2.3959125745103605e-05, "loss": 2.5362521409988403e-05, "step": 268000 }, { "epoch": 76.0715299460687, "eval_accuracy": 0.9846760348445349, "eval_loss": 0.06346560269594193, "eval_runtime": 39.3987, "eval_samples_per_second": 399.176, "eval_steps_per_second": 6.244, "step": 268000 }, { "epoch": 76.07436843599206, "grad_norm": 0.0007201344706118107, "learning_rate": 2.3956287255180247e-05, "loss": 0.0008132642135024071, "step": 268010 }, { "epoch": 76.07720692591542, "grad_norm": 0.0694526731967926, "learning_rate": 2.3953448765256885e-05, "loss": 0.0001184169203042984, "step": 268020 }, { "epoch": 76.08004541583877, "grad_norm": 0.003807042259722948, "learning_rate": 2.3950610275333523e-05, "loss": 0.00013196058571338654, "step": 268030 }, { "epoch": 76.08288390576213, "grad_norm": 0.009457730688154697, "learning_rate": 2.3947771785410164e-05, "loss": 0.00037768203765153885, "step": 268040 }, { "epoch": 76.0857223956855, "grad_norm": 0.006250835955142975, "learning_rate": 2.3944933295486802e-05, "loss": 0.00014723893254995347, "step": 268050 }, { "epoch": 76.08856088560886, "grad_norm": 0.02061651460826397, "learning_rate": 2.3942094805563443e-05, "loss": 0.00014234259724617005, "step": 268060 }, { "epoch": 76.09139937553222, "grad_norm": 0.00784446019679308, "learning_rate": 2.3939256315640078e-05, "loss": 0.00042633041739463805, "step": 268070 }, { "epoch": 76.09423786545558, "grad_norm": 0.015520570799708366, "learning_rate": 2.393641782571672e-05, "loss": 8.460432291030884e-05, "step": 268080 }, { "epoch": 76.09707635537895, "grad_norm": 0.0010651183547452092, "learning_rate": 2.393357933579336e-05, "loss": 0.000204608216881752, "step": 268090 }, { "epoch": 76.0999148453023, "grad_norm": 0.07624209672212601, "learning_rate": 2.393074084587e-05, "loss": 0.00026051867753267287, "step": 268100 }, { "epoch": 76.10275333522566, "grad_norm": 0.9472465515136719, "learning_rate": 2.3927902355946637e-05, "loss": 0.0001849208027124405, "step": 268110 }, { "epoch": 76.10559182514902, "grad_norm": 0.010210330598056316, "learning_rate": 2.3925063866023275e-05, "loss": 8.900910615921021e-05, "step": 268120 }, { "epoch": 76.10843031507238, "grad_norm": 0.019491156563162804, "learning_rate": 2.3922225376099916e-05, "loss": 4.8329681158065795e-05, "step": 268130 }, { "epoch": 76.11126880499575, "grad_norm": 0.006829170975834131, "learning_rate": 2.3919386886176557e-05, "loss": 6.458312273025513e-05, "step": 268140 }, { "epoch": 76.11410729491911, "grad_norm": 0.0015042508020997047, "learning_rate": 2.3916548396253195e-05, "loss": 1.7739832401275635e-05, "step": 268150 }, { "epoch": 76.11694578484246, "grad_norm": 0.13765278458595276, "learning_rate": 2.3913709906329833e-05, "loss": 3.7104077637195584e-05, "step": 268160 }, { "epoch": 76.11978427476582, "grad_norm": 0.002790615661069751, "learning_rate": 2.391087141640647e-05, "loss": 3.3902376890182496e-05, "step": 268170 }, { "epoch": 76.12262276468918, "grad_norm": 0.013561136089265347, "learning_rate": 2.3908032926483113e-05, "loss": 0.000950128585100174, "step": 268180 }, { "epoch": 76.12546125461255, "grad_norm": 0.03954155370593071, "learning_rate": 2.3905194436559754e-05, "loss": 5.345083773136139e-05, "step": 268190 }, { "epoch": 76.12829974453591, "grad_norm": 0.0012396943056955934, "learning_rate": 2.390235594663639e-05, "loss": 0.00021523647010326385, "step": 268200 }, { "epoch": 76.13113823445927, "grad_norm": 0.002015146892517805, "learning_rate": 2.389951745671303e-05, "loss": 0.0002329394221305847, "step": 268210 }, { "epoch": 76.13397672438263, "grad_norm": 0.09184136986732483, "learning_rate": 2.3896678966789668e-05, "loss": 9.922627359628677e-05, "step": 268220 }, { "epoch": 76.13681521430598, "grad_norm": 5.7511444091796875, "learning_rate": 2.389384047686631e-05, "loss": 0.0007028946653008461, "step": 268230 }, { "epoch": 76.13965370422935, "grad_norm": 0.0021430589258670807, "learning_rate": 2.3891001986942947e-05, "loss": 0.0002448774874210358, "step": 268240 }, { "epoch": 76.14249219415271, "grad_norm": 0.17045308649539948, "learning_rate": 2.3888163497019585e-05, "loss": 0.005621185153722763, "step": 268250 }, { "epoch": 76.14533068407607, "grad_norm": 0.023757832124829292, "learning_rate": 2.3885325007096227e-05, "loss": 2.197548747062683e-05, "step": 268260 }, { "epoch": 76.14816917399943, "grad_norm": 0.003990934696048498, "learning_rate": 2.3882486517172865e-05, "loss": 0.0018938088789582253, "step": 268270 }, { "epoch": 76.1510076639228, "grad_norm": 0.010157479904592037, "learning_rate": 2.3879648027249503e-05, "loss": 0.00022908933460712432, "step": 268280 }, { "epoch": 76.15384615384616, "grad_norm": 0.008258541114628315, "learning_rate": 2.3876809537326144e-05, "loss": 0.001047302782535553, "step": 268290 }, { "epoch": 76.15668464376951, "grad_norm": 0.03174851834774017, "learning_rate": 2.3873971047402782e-05, "loss": 0.00048097185790538786, "step": 268300 }, { "epoch": 76.15952313369287, "grad_norm": 0.0005102794384583831, "learning_rate": 2.3871132557479423e-05, "loss": 9.106006473302841e-05, "step": 268310 }, { "epoch": 76.16236162361623, "grad_norm": 0.0010330730583518744, "learning_rate": 2.386829406755606e-05, "loss": 0.0007335074245929718, "step": 268320 }, { "epoch": 76.1652001135396, "grad_norm": 0.020449569448828697, "learning_rate": 2.38654555776327e-05, "loss": 0.0059955775737762455, "step": 268330 }, { "epoch": 76.16803860346296, "grad_norm": 0.2761836349964142, "learning_rate": 2.386261708770934e-05, "loss": 0.00031180623918771746, "step": 268340 }, { "epoch": 76.17087709338632, "grad_norm": 0.005483266897499561, "learning_rate": 2.385977859778598e-05, "loss": 5.278401076793671e-05, "step": 268350 }, { "epoch": 76.17371558330967, "grad_norm": 0.41503193974494934, "learning_rate": 2.385694010786262e-05, "loss": 0.0001264391466975212, "step": 268360 }, { "epoch": 76.17655407323304, "grad_norm": 0.003257379401475191, "learning_rate": 2.3854101617939255e-05, "loss": 7.542297244071961e-05, "step": 268370 }, { "epoch": 76.1793925631564, "grad_norm": 0.004387697670608759, "learning_rate": 2.3851263128015896e-05, "loss": 6.26659020781517e-05, "step": 268380 }, { "epoch": 76.18223105307976, "grad_norm": 0.021505245938897133, "learning_rate": 2.3848424638092538e-05, "loss": 0.0004635566845536232, "step": 268390 }, { "epoch": 76.18506954300312, "grad_norm": 0.028295908123254776, "learning_rate": 2.3845586148169176e-05, "loss": 0.00012810099869966506, "step": 268400 }, { "epoch": 76.18790803292649, "grad_norm": 0.018133962526917458, "learning_rate": 2.3842747658245814e-05, "loss": 7.819514721632003e-05, "step": 268410 }, { "epoch": 76.19074652284985, "grad_norm": 0.006716306786984205, "learning_rate": 2.383990916832245e-05, "loss": 0.0001247614622116089, "step": 268420 }, { "epoch": 76.1935850127732, "grad_norm": 0.07000630348920822, "learning_rate": 2.3837070678399093e-05, "loss": 3.339685499668121e-05, "step": 268430 }, { "epoch": 76.19642350269656, "grad_norm": 0.0011659201700240374, "learning_rate": 2.3834232188475734e-05, "loss": 0.0021239059045910836, "step": 268440 }, { "epoch": 76.19926199261992, "grad_norm": 0.0021535041742026806, "learning_rate": 2.3831393698552372e-05, "loss": 4.285108298063278e-05, "step": 268450 }, { "epoch": 76.20210048254329, "grad_norm": 0.05942239239811897, "learning_rate": 2.382855520862901e-05, "loss": 4.646796733140945e-05, "step": 268460 }, { "epoch": 76.20493897246665, "grad_norm": 0.006504606921225786, "learning_rate": 2.3825716718705648e-05, "loss": 2.6308558881282805e-05, "step": 268470 }, { "epoch": 76.20777746239001, "grad_norm": 0.004407866392284632, "learning_rate": 2.382287822878229e-05, "loss": 1.4042668044567109e-05, "step": 268480 }, { "epoch": 76.21061595231338, "grad_norm": 0.0019775470718741417, "learning_rate": 2.3820039738858928e-05, "loss": 5.83326444029808e-05, "step": 268490 }, { "epoch": 76.21345444223672, "grad_norm": 0.001982973190024495, "learning_rate": 2.3817201248935566e-05, "loss": 3.4055113792419435e-05, "step": 268500 }, { "epoch": 76.21345444223672, "eval_accuracy": 0.9847396197621924, "eval_loss": 0.0588245764374733, "eval_runtime": 35.5174, "eval_samples_per_second": 442.796, "eval_steps_per_second": 6.926, "step": 268500 }, { "epoch": 76.21629293216009, "grad_norm": 0.012852534651756287, "learning_rate": 2.3814362759012207e-05, "loss": 3.9105862379074095e-05, "step": 268510 }, { "epoch": 76.21913142208345, "grad_norm": 0.00626622186973691, "learning_rate": 2.3811524269088845e-05, "loss": 2.2327154874801634e-05, "step": 268520 }, { "epoch": 76.22196991200681, "grad_norm": 0.004851898178458214, "learning_rate": 2.3808685779165486e-05, "loss": 0.0002556825056672096, "step": 268530 }, { "epoch": 76.22480840193018, "grad_norm": 0.01666136644780636, "learning_rate": 2.3805847289242124e-05, "loss": 1.8611177802085875e-05, "step": 268540 }, { "epoch": 76.22764689185354, "grad_norm": 1.5642403364181519, "learning_rate": 2.3803008799318762e-05, "loss": 0.00022795554250478744, "step": 268550 }, { "epoch": 76.2304853817769, "grad_norm": 0.0018831134075298905, "learning_rate": 2.3800170309395404e-05, "loss": 7.242802530527115e-05, "step": 268560 }, { "epoch": 76.23332387170025, "grad_norm": 0.008897606283426285, "learning_rate": 2.379733181947204e-05, "loss": 0.00011331122368574142, "step": 268570 }, { "epoch": 76.23616236162361, "grad_norm": 0.003859810996800661, "learning_rate": 2.379449332954868e-05, "loss": 5.067102611064911e-05, "step": 268580 }, { "epoch": 76.23900085154698, "grad_norm": 0.0014993860386312008, "learning_rate": 2.379165483962532e-05, "loss": 5.4786913096904755e-05, "step": 268590 }, { "epoch": 76.24183934147034, "grad_norm": 0.0014883133117109537, "learning_rate": 2.378881634970196e-05, "loss": 3.4267082810401917e-05, "step": 268600 }, { "epoch": 76.2446778313937, "grad_norm": 0.006436859257519245, "learning_rate": 2.37859778597786e-05, "loss": 4.9023516476154326e-05, "step": 268610 }, { "epoch": 76.24751632131706, "grad_norm": 0.003389048855751753, "learning_rate": 2.378313936985524e-05, "loss": 5.929283797740936e-05, "step": 268620 }, { "epoch": 76.25035481124041, "grad_norm": 2.146702289581299, "learning_rate": 2.3780300879931876e-05, "loss": 0.00043095126748085024, "step": 268630 }, { "epoch": 76.25319330116378, "grad_norm": 0.01345468033105135, "learning_rate": 2.3777462390008518e-05, "loss": 0.00033936649560928345, "step": 268640 }, { "epoch": 76.25603179108714, "grad_norm": 0.023291263729333878, "learning_rate": 2.3774623900085156e-05, "loss": 4.069078713655472e-05, "step": 268650 }, { "epoch": 76.2588702810105, "grad_norm": 0.0968066155910492, "learning_rate": 2.3771785410161797e-05, "loss": 6.818119436502457e-05, "step": 268660 }, { "epoch": 76.26170877093386, "grad_norm": 0.017209287732839584, "learning_rate": 2.376894692023843e-05, "loss": 5.120355635881424e-05, "step": 268670 }, { "epoch": 76.26454726085723, "grad_norm": 0.004562600050121546, "learning_rate": 2.3766108430315073e-05, "loss": 3.413204103708267e-05, "step": 268680 }, { "epoch": 76.26738575078059, "grad_norm": 0.002626247936859727, "learning_rate": 2.3763269940391714e-05, "loss": 2.7882307767868043e-05, "step": 268690 }, { "epoch": 76.27022424070394, "grad_norm": 0.022449860349297523, "learning_rate": 2.3760431450468352e-05, "loss": 2.7045421302318572e-05, "step": 268700 }, { "epoch": 76.2730627306273, "grad_norm": 0.006620341446250677, "learning_rate": 2.375759296054499e-05, "loss": 4.9716047942638395e-05, "step": 268710 }, { "epoch": 76.27590122055067, "grad_norm": 0.043753333389759064, "learning_rate": 2.375475447062163e-05, "loss": 0.0003812357783317566, "step": 268720 }, { "epoch": 76.27873971047403, "grad_norm": 0.010068610310554504, "learning_rate": 2.375191598069827e-05, "loss": 0.0001186024397611618, "step": 268730 }, { "epoch": 76.28157820039739, "grad_norm": 0.004660740029066801, "learning_rate": 2.374907749077491e-05, "loss": 2.7507171034812926e-05, "step": 268740 }, { "epoch": 76.28441669032075, "grad_norm": 0.0030386371072381735, "learning_rate": 2.3746239000851546e-05, "loss": 0.00010250378400087357, "step": 268750 }, { "epoch": 76.28725518024412, "grad_norm": 0.1019442155957222, "learning_rate": 2.3743400510928187e-05, "loss": 3.0350685119628906e-05, "step": 268760 }, { "epoch": 76.29009367016747, "grad_norm": 0.0013573190663009882, "learning_rate": 2.374056202100483e-05, "loss": 3.603361546993256e-05, "step": 268770 }, { "epoch": 76.29293216009083, "grad_norm": 0.09665095061063766, "learning_rate": 2.3737723531081466e-05, "loss": 5.218219012022018e-05, "step": 268780 }, { "epoch": 76.29577065001419, "grad_norm": 0.08607485145330429, "learning_rate": 2.3734885041158104e-05, "loss": 5.084257572889328e-05, "step": 268790 }, { "epoch": 76.29860913993755, "grad_norm": 0.0015215823659673333, "learning_rate": 2.3732046551234742e-05, "loss": 7.020514458417892e-05, "step": 268800 }, { "epoch": 76.30144762986092, "grad_norm": 0.003319254145026207, "learning_rate": 2.3729208061311384e-05, "loss": 3.752857446670532e-05, "step": 268810 }, { "epoch": 76.30428611978428, "grad_norm": 0.001400437206029892, "learning_rate": 2.3726369571388025e-05, "loss": 7.716082036495209e-05, "step": 268820 }, { "epoch": 76.30712460970764, "grad_norm": 0.0015383538557216525, "learning_rate": 2.3723531081464663e-05, "loss": 1.7021223902702333e-05, "step": 268830 }, { "epoch": 76.30996309963099, "grad_norm": 0.8471470475196838, "learning_rate": 2.37206925915413e-05, "loss": 0.00012035705149173736, "step": 268840 }, { "epoch": 76.31280158955435, "grad_norm": 0.004898324143141508, "learning_rate": 2.371785410161794e-05, "loss": 5.1656179130077363e-05, "step": 268850 }, { "epoch": 76.31564007947772, "grad_norm": 0.003212723881006241, "learning_rate": 2.371501561169458e-05, "loss": 1.3263151049613953e-05, "step": 268860 }, { "epoch": 76.31847856940108, "grad_norm": 0.004640086088329554, "learning_rate": 2.3712177121771222e-05, "loss": 2.1641515195369722e-05, "step": 268870 }, { "epoch": 76.32131705932444, "grad_norm": 0.0037079460453242064, "learning_rate": 2.3709338631847856e-05, "loss": 0.0014342011883854866, "step": 268880 }, { "epoch": 76.3241555492478, "grad_norm": 0.009256750345230103, "learning_rate": 2.3706500141924498e-05, "loss": 1.794174313545227e-05, "step": 268890 }, { "epoch": 76.32699403917115, "grad_norm": 0.0013259539846330881, "learning_rate": 2.3703661652001136e-05, "loss": 2.1077878773212434e-05, "step": 268900 }, { "epoch": 76.32983252909452, "grad_norm": 0.0016660490073263645, "learning_rate": 2.3700823162077777e-05, "loss": 2.6587210595607757e-05, "step": 268910 }, { "epoch": 76.33267101901788, "grad_norm": 0.03182622417807579, "learning_rate": 2.3697984672154415e-05, "loss": 3.975182771682739e-05, "step": 268920 }, { "epoch": 76.33550950894124, "grad_norm": 0.00043740245746448636, "learning_rate": 2.3695146182231053e-05, "loss": 5.984678864479065e-05, "step": 268930 }, { "epoch": 76.3383479988646, "grad_norm": 0.002459320006892085, "learning_rate": 2.3692307692307695e-05, "loss": 3.218669444322586e-05, "step": 268940 }, { "epoch": 76.34118648878797, "grad_norm": 0.002667237538844347, "learning_rate": 2.3689469202384333e-05, "loss": 2.7710385620594024e-05, "step": 268950 }, { "epoch": 76.34402497871133, "grad_norm": 0.004114913754165173, "learning_rate": 2.368663071246097e-05, "loss": 2.5162473320961e-05, "step": 268960 }, { "epoch": 76.34686346863468, "grad_norm": 0.0014541358686983585, "learning_rate": 2.3683792222537612e-05, "loss": 2.0178034901618958e-05, "step": 268970 }, { "epoch": 76.34970195855804, "grad_norm": 0.007780621759593487, "learning_rate": 2.368095373261425e-05, "loss": 1.0416470468044281e-05, "step": 268980 }, { "epoch": 76.3525404484814, "grad_norm": 0.0007047276012599468, "learning_rate": 2.367811524269089e-05, "loss": 3.552045673131943e-05, "step": 268990 }, { "epoch": 76.35537893840477, "grad_norm": 0.01057931687682867, "learning_rate": 2.367527675276753e-05, "loss": 2.9935315251350402e-05, "step": 269000 }, { "epoch": 76.35537893840477, "eval_accuracy": 0.9869015069625485, "eval_loss": 0.055418383330106735, "eval_runtime": 35.4739, "eval_samples_per_second": 443.34, "eval_steps_per_second": 6.935, "step": 269000 }, { "epoch": 76.35821742832813, "grad_norm": 0.005279028322547674, "learning_rate": 2.3672438262844167e-05, "loss": 2.5239773094654083e-05, "step": 269010 }, { "epoch": 76.3610559182515, "grad_norm": 0.007040626369416714, "learning_rate": 2.366959977292081e-05, "loss": 2.457704395055771e-05, "step": 269020 }, { "epoch": 76.36389440817486, "grad_norm": 0.000465044955490157, "learning_rate": 2.3666761282997447e-05, "loss": 2.546105533838272e-05, "step": 269030 }, { "epoch": 76.3667328980982, "grad_norm": 0.002186079975217581, "learning_rate": 2.3663922793074088e-05, "loss": 2.532191574573517e-05, "step": 269040 }, { "epoch": 76.36957138802157, "grad_norm": 0.00821111910045147, "learning_rate": 2.3661084303150723e-05, "loss": 3.384221345186234e-05, "step": 269050 }, { "epoch": 76.37240987794493, "grad_norm": 0.005061687435954809, "learning_rate": 2.3658245813227364e-05, "loss": 2.817101776599884e-05, "step": 269060 }, { "epoch": 76.3752483678683, "grad_norm": 0.005806256085634232, "learning_rate": 2.3655407323304005e-05, "loss": 2.7033127844333647e-05, "step": 269070 }, { "epoch": 76.37808685779166, "grad_norm": 0.021746832877397537, "learning_rate": 2.3652568833380643e-05, "loss": 3.8843974471092225e-05, "step": 269080 }, { "epoch": 76.38092534771502, "grad_norm": 0.035423118621110916, "learning_rate": 2.364973034345728e-05, "loss": 2.093985676765442e-05, "step": 269090 }, { "epoch": 76.38376383763837, "grad_norm": 0.014067986980080605, "learning_rate": 2.364689185353392e-05, "loss": 1.8627382814884186e-05, "step": 269100 }, { "epoch": 76.38660232756173, "grad_norm": 0.002385932020843029, "learning_rate": 2.364405336361056e-05, "loss": 1.0797940194606782e-05, "step": 269110 }, { "epoch": 76.3894408174851, "grad_norm": 0.002345742890611291, "learning_rate": 2.3641214873687202e-05, "loss": 3.08426097035408e-05, "step": 269120 }, { "epoch": 76.39227930740846, "grad_norm": 0.0008431544410996139, "learning_rate": 2.363837638376384e-05, "loss": 1.3338029384613037e-05, "step": 269130 }, { "epoch": 76.39511779733182, "grad_norm": 0.0009573519346304238, "learning_rate": 2.3635537893840478e-05, "loss": 2.3359619081020356e-05, "step": 269140 }, { "epoch": 76.39795628725518, "grad_norm": 0.007242520339787006, "learning_rate": 2.3632699403917116e-05, "loss": 1.5055015683174134e-05, "step": 269150 }, { "epoch": 76.40079477717855, "grad_norm": 0.07288237661123276, "learning_rate": 2.3629860913993757e-05, "loss": 1.9515492022037505e-05, "step": 269160 }, { "epoch": 76.4036332671019, "grad_norm": 0.23242810368537903, "learning_rate": 2.3627022424070395e-05, "loss": 6.0502998530864714e-05, "step": 269170 }, { "epoch": 76.40647175702526, "grad_norm": 0.0009614910231903195, "learning_rate": 2.3624183934147033e-05, "loss": 9.414553642272949e-05, "step": 269180 }, { "epoch": 76.40931024694862, "grad_norm": 0.007865602150559425, "learning_rate": 2.3621345444223675e-05, "loss": 2.8429366648197173e-05, "step": 269190 }, { "epoch": 76.41214873687198, "grad_norm": 0.0002979285200126469, "learning_rate": 2.3618506954300313e-05, "loss": 5.340352654457092e-05, "step": 269200 }, { "epoch": 76.41498722679535, "grad_norm": 0.0042718625627458096, "learning_rate": 2.3615668464376954e-05, "loss": 3.264695405960083e-05, "step": 269210 }, { "epoch": 76.41782571671871, "grad_norm": 0.0014346285024657845, "learning_rate": 2.3612829974453592e-05, "loss": 1.417025923728943e-05, "step": 269220 }, { "epoch": 76.42066420664207, "grad_norm": 0.0015802260022610426, "learning_rate": 2.360999148453023e-05, "loss": 1.2547336518764496e-05, "step": 269230 }, { "epoch": 76.42350269656542, "grad_norm": 0.0017951837508007884, "learning_rate": 2.360715299460687e-05, "loss": 1.6824714839458467e-05, "step": 269240 }, { "epoch": 76.42634118648878, "grad_norm": 0.005283729638904333, "learning_rate": 2.360431450468351e-05, "loss": 1.666862517595291e-05, "step": 269250 }, { "epoch": 76.42917967641215, "grad_norm": 0.0018317580688744783, "learning_rate": 2.3601476014760147e-05, "loss": 1.2761354446411132e-05, "step": 269260 }, { "epoch": 76.43201816633551, "grad_norm": 0.0010984832188114524, "learning_rate": 2.359863752483679e-05, "loss": 3.5472400486469266e-05, "step": 269270 }, { "epoch": 76.43485665625887, "grad_norm": 0.005904218181967735, "learning_rate": 2.3595799034913427e-05, "loss": 1.7224438488483428e-05, "step": 269280 }, { "epoch": 76.43769514618224, "grad_norm": 0.000551421195268631, "learning_rate": 2.3592960544990068e-05, "loss": 1.284480094909668e-05, "step": 269290 }, { "epoch": 76.4405336361056, "grad_norm": 0.055895864963531494, "learning_rate": 2.3590122055066706e-05, "loss": 4.1711144149303436e-05, "step": 269300 }, { "epoch": 76.44337212602895, "grad_norm": 0.0014526299200952053, "learning_rate": 2.3587283565143344e-05, "loss": 1.3286620378494262e-05, "step": 269310 }, { "epoch": 76.44621061595231, "grad_norm": 0.008850526995956898, "learning_rate": 2.3584445075219985e-05, "loss": 4.8493966460227966e-05, "step": 269320 }, { "epoch": 76.44904910587567, "grad_norm": 0.0042239767499268055, "learning_rate": 2.3581606585296623e-05, "loss": 1.3713166117668152e-05, "step": 269330 }, { "epoch": 76.45188759579904, "grad_norm": 0.0021039799321442842, "learning_rate": 2.357876809537326e-05, "loss": 1.5566125512123108e-05, "step": 269340 }, { "epoch": 76.4547260857224, "grad_norm": 0.0026251645758748055, "learning_rate": 2.35759296054499e-05, "loss": 3.254879266023636e-05, "step": 269350 }, { "epoch": 76.45756457564576, "grad_norm": 0.0035894252359867096, "learning_rate": 2.357309111552654e-05, "loss": 1.2315809726715088e-05, "step": 269360 }, { "epoch": 76.46040306556911, "grad_norm": 0.01043967716395855, "learning_rate": 2.3570252625603182e-05, "loss": 2.811867743730545e-05, "step": 269370 }, { "epoch": 76.46324155549247, "grad_norm": 0.0024121992755681276, "learning_rate": 2.356741413567982e-05, "loss": 2.731774002313614e-05, "step": 269380 }, { "epoch": 76.46608004541584, "grad_norm": 0.010387810878455639, "learning_rate": 2.3564575645756458e-05, "loss": 1.4765188097953796e-05, "step": 269390 }, { "epoch": 76.4689185353392, "grad_norm": 0.009078859351575375, "learning_rate": 2.3561737155833096e-05, "loss": 2.6787817478179932e-05, "step": 269400 }, { "epoch": 76.47175702526256, "grad_norm": 0.019415926188230515, "learning_rate": 2.3558898665909738e-05, "loss": 1.634117215871811e-05, "step": 269410 }, { "epoch": 76.47459551518592, "grad_norm": 0.07805802673101425, "learning_rate": 2.355606017598638e-05, "loss": 3.690458834171295e-05, "step": 269420 }, { "epoch": 76.47743400510929, "grad_norm": 0.0013479405315592885, "learning_rate": 2.3553221686063014e-05, "loss": 9.201653301715852e-06, "step": 269430 }, { "epoch": 76.48027249503264, "grad_norm": 0.0057387626729905605, "learning_rate": 2.3550383196139655e-05, "loss": 1.8944591283798216e-05, "step": 269440 }, { "epoch": 76.483110984956, "grad_norm": 0.007139576133340597, "learning_rate": 2.3547544706216293e-05, "loss": 1.3852678239345551e-05, "step": 269450 }, { "epoch": 76.48594947487936, "grad_norm": 0.002605186076834798, "learning_rate": 2.3544706216292934e-05, "loss": 2.4332664906978607e-05, "step": 269460 }, { "epoch": 76.48878796480273, "grad_norm": 0.0021689448039978743, "learning_rate": 2.3541867726369572e-05, "loss": 0.0001159192994236946, "step": 269470 }, { "epoch": 76.49162645472609, "grad_norm": 0.0017222667811438441, "learning_rate": 2.353902923644621e-05, "loss": 0.0002171400934457779, "step": 269480 }, { "epoch": 76.49446494464945, "grad_norm": 0.07696570456027985, "learning_rate": 2.353619074652285e-05, "loss": 0.000520799309015274, "step": 269490 }, { "epoch": 76.49730343457281, "grad_norm": 0.0023420117795467377, "learning_rate": 2.353335225659949e-05, "loss": 0.0007561778649687767, "step": 269500 }, { "epoch": 76.49730343457281, "eval_accuracy": 0.9862656577859732, "eval_loss": 0.058692604303359985, "eval_runtime": 35.5932, "eval_samples_per_second": 441.854, "eval_steps_per_second": 6.911, "step": 269500 }, { "epoch": 76.50014192449616, "grad_norm": 0.05542995035648346, "learning_rate": 2.353051376667613e-05, "loss": 4.6872906386852266e-05, "step": 269510 }, { "epoch": 76.50298041441953, "grad_norm": 0.008485659025609493, "learning_rate": 2.352767527675277e-05, "loss": 0.0031257834285497665, "step": 269520 }, { "epoch": 76.50581890434289, "grad_norm": 0.005937663372606039, "learning_rate": 2.3524836786829407e-05, "loss": 0.0005294766277074813, "step": 269530 }, { "epoch": 76.50865739426625, "grad_norm": 0.028064552694559097, "learning_rate": 2.3521998296906048e-05, "loss": 0.00037918053567409515, "step": 269540 }, { "epoch": 76.51149588418961, "grad_norm": 0.16438548266887665, "learning_rate": 2.3519159806982686e-05, "loss": 0.00021373145282268524, "step": 269550 }, { "epoch": 76.51433437411298, "grad_norm": 0.01718107983469963, "learning_rate": 2.3516321317059324e-05, "loss": 5.742162466049194e-05, "step": 269560 }, { "epoch": 76.51717286403633, "grad_norm": 0.048077140003442764, "learning_rate": 2.3513482827135966e-05, "loss": 0.00011112652719020843, "step": 269570 }, { "epoch": 76.52001135395969, "grad_norm": 0.012503717094659805, "learning_rate": 2.3510644337212604e-05, "loss": 7.275044918060302e-05, "step": 269580 }, { "epoch": 76.52284984388305, "grad_norm": 0.0015766513533890247, "learning_rate": 2.3507805847289245e-05, "loss": 2.7298927307128906e-05, "step": 269590 }, { "epoch": 76.52568833380641, "grad_norm": 1.5313022136688232, "learning_rate": 2.350496735736588e-05, "loss": 0.0005425747483968734, "step": 269600 }, { "epoch": 76.52852682372978, "grad_norm": 2.710380792617798, "learning_rate": 2.350212886744252e-05, "loss": 0.0033219315111637114, "step": 269610 }, { "epoch": 76.53136531365314, "grad_norm": 0.002281260909512639, "learning_rate": 2.3499290377519162e-05, "loss": 7.774252444505691e-05, "step": 269620 }, { "epoch": 76.5342038035765, "grad_norm": 0.08842812478542328, "learning_rate": 2.34964518875958e-05, "loss": 0.00018980056047439574, "step": 269630 }, { "epoch": 76.53704229349985, "grad_norm": 0.012497439049184322, "learning_rate": 2.349361339767244e-05, "loss": 4.6768225729465485e-05, "step": 269640 }, { "epoch": 76.53988078342321, "grad_norm": 0.04154753312468529, "learning_rate": 2.3490774907749076e-05, "loss": 6.568729877471924e-05, "step": 269650 }, { "epoch": 76.54271927334658, "grad_norm": 0.19845768809318542, "learning_rate": 2.3487936417825718e-05, "loss": 7.035769522190094e-05, "step": 269660 }, { "epoch": 76.54555776326994, "grad_norm": 0.002880751620978117, "learning_rate": 2.348509792790236e-05, "loss": 0.00048057809472084044, "step": 269670 }, { "epoch": 76.5483962531933, "grad_norm": 0.8629123568534851, "learning_rate": 2.3482259437978997e-05, "loss": 0.0002836456522345543, "step": 269680 }, { "epoch": 76.55123474311667, "grad_norm": 0.0071768988855183125, "learning_rate": 2.3479420948055635e-05, "loss": 2.987906336784363e-05, "step": 269690 }, { "epoch": 76.55407323304003, "grad_norm": 0.002621095161885023, "learning_rate": 2.3476582458132273e-05, "loss": 3.253333270549774e-05, "step": 269700 }, { "epoch": 76.55691172296338, "grad_norm": 0.007946405559778214, "learning_rate": 2.3473743968208914e-05, "loss": 5.5966898798942566e-05, "step": 269710 }, { "epoch": 76.55975021288674, "grad_norm": 0.0008022324764169753, "learning_rate": 2.3470905478285556e-05, "loss": 0.00012452416121959685, "step": 269720 }, { "epoch": 76.5625887028101, "grad_norm": 0.003954213112592697, "learning_rate": 2.346806698836219e-05, "loss": 0.0020605005323886872, "step": 269730 }, { "epoch": 76.56542719273347, "grad_norm": 0.0030235820449888706, "learning_rate": 2.3465228498438832e-05, "loss": 0.0031335391104221344, "step": 269740 }, { "epoch": 76.56826568265683, "grad_norm": 0.7859320044517517, "learning_rate": 2.346239000851547e-05, "loss": 0.0005116730928421021, "step": 269750 }, { "epoch": 76.57110417258019, "grad_norm": 0.028433669358491898, "learning_rate": 2.345955151859211e-05, "loss": 0.00018807798624038697, "step": 269760 }, { "epoch": 76.57394266250355, "grad_norm": 0.005881054792553186, "learning_rate": 2.345671302866875e-05, "loss": 0.0016502240672707559, "step": 269770 }, { "epoch": 76.5767811524269, "grad_norm": 0.016250144690275192, "learning_rate": 2.3453874538745387e-05, "loss": 0.0023533161729574203, "step": 269780 }, { "epoch": 76.57961964235027, "grad_norm": 0.013712506741285324, "learning_rate": 2.345103604882203e-05, "loss": 0.005345232412219047, "step": 269790 }, { "epoch": 76.58245813227363, "grad_norm": 0.029500892385840416, "learning_rate": 2.3448197558898666e-05, "loss": 0.00026852525770664214, "step": 269800 }, { "epoch": 76.58529662219699, "grad_norm": 0.00647670216858387, "learning_rate": 2.3445359068975304e-05, "loss": 0.00036938227713108064, "step": 269810 }, { "epoch": 76.58813511212036, "grad_norm": 0.01883183792233467, "learning_rate": 2.3442520579051946e-05, "loss": 0.0003628140315413475, "step": 269820 }, { "epoch": 76.59097360204372, "grad_norm": 0.0681917816400528, "learning_rate": 2.3439682089128584e-05, "loss": 0.0006553022190928459, "step": 269830 }, { "epoch": 76.59381209196707, "grad_norm": 0.013938012532889843, "learning_rate": 2.3436843599205225e-05, "loss": 0.008162843436002732, "step": 269840 }, { "epoch": 76.59665058189043, "grad_norm": 0.010017698630690575, "learning_rate": 2.3434005109281863e-05, "loss": 7.948875427246093e-05, "step": 269850 }, { "epoch": 76.59948907181379, "grad_norm": 0.007212966214865446, "learning_rate": 2.34311666193585e-05, "loss": 0.00017278660088777543, "step": 269860 }, { "epoch": 76.60232756173716, "grad_norm": 0.0024288222193717957, "learning_rate": 2.3428328129435143e-05, "loss": 0.0003459973260760307, "step": 269870 }, { "epoch": 76.60516605166052, "grad_norm": 0.09728541225194931, "learning_rate": 2.342548963951178e-05, "loss": 0.00013825614005327224, "step": 269880 }, { "epoch": 76.60800454158388, "grad_norm": 0.003462087595835328, "learning_rate": 2.3422651149588422e-05, "loss": 0.00023854188621044158, "step": 269890 }, { "epoch": 76.61084303150724, "grad_norm": 0.0019909730181097984, "learning_rate": 2.3419812659665056e-05, "loss": 0.00035072248429059984, "step": 269900 }, { "epoch": 76.61368152143059, "grad_norm": 0.007904160767793655, "learning_rate": 2.3416974169741698e-05, "loss": 0.00016837306320667268, "step": 269910 }, { "epoch": 76.61652001135396, "grad_norm": 0.02288665995001793, "learning_rate": 2.341413567981834e-05, "loss": 0.00017859898507595063, "step": 269920 }, { "epoch": 76.61935850127732, "grad_norm": 0.019312897697091103, "learning_rate": 2.3411297189894977e-05, "loss": 0.0003953559324145317, "step": 269930 }, { "epoch": 76.62219699120068, "grad_norm": 0.18531285226345062, "learning_rate": 2.3408458699971615e-05, "loss": 0.00012109819799661636, "step": 269940 }, { "epoch": 76.62503548112404, "grad_norm": 0.023114141076803207, "learning_rate": 2.3405620210048253e-05, "loss": 7.218308746814728e-05, "step": 269950 }, { "epoch": 76.6278739710474, "grad_norm": 0.005823547951877117, "learning_rate": 2.3402781720124895e-05, "loss": 9.62061807513237e-05, "step": 269960 }, { "epoch": 76.63071246097077, "grad_norm": 0.0044481330551207066, "learning_rate": 2.3399943230201536e-05, "loss": 3.0263327062129974e-05, "step": 269970 }, { "epoch": 76.63355095089412, "grad_norm": 0.021296627819538116, "learning_rate": 2.3397104740278174e-05, "loss": 6.650760769844055e-05, "step": 269980 }, { "epoch": 76.63638944081748, "grad_norm": 3.0654616355895996, "learning_rate": 2.3394266250354812e-05, "loss": 0.0004050547257065773, "step": 269990 }, { "epoch": 76.63922793074084, "grad_norm": 0.0017359944758936763, "learning_rate": 2.339142776043145e-05, "loss": 9.930692613124847e-05, "step": 270000 }, { "epoch": 76.63922793074084, "eval_accuracy": 0.9851211292681376, "eval_loss": 0.062506765127182, "eval_runtime": 35.8116, "eval_samples_per_second": 439.159, "eval_steps_per_second": 6.869, "step": 270000 }, { "epoch": 76.64206642066421, "grad_norm": 0.07024214416742325, "learning_rate": 2.338858927050809e-05, "loss": 6.662383675575257e-05, "step": 270010 }, { "epoch": 76.64490491058757, "grad_norm": 0.004816880449652672, "learning_rate": 2.338575078058473e-05, "loss": 2.1336786448955537e-05, "step": 270020 }, { "epoch": 76.64774340051093, "grad_norm": 0.016482533887028694, "learning_rate": 2.3382912290661367e-05, "loss": 2.9537826776504517e-05, "step": 270030 }, { "epoch": 76.6505818904343, "grad_norm": 0.0010835245484486222, "learning_rate": 2.338007380073801e-05, "loss": 0.0021634815260767938, "step": 270040 }, { "epoch": 76.65342038035764, "grad_norm": 0.024671223014593124, "learning_rate": 2.3377235310814647e-05, "loss": 0.0001210276037454605, "step": 270050 }, { "epoch": 76.65625887028101, "grad_norm": 0.004676370415836573, "learning_rate": 2.3374396820891288e-05, "loss": 0.0003544153645634651, "step": 270060 }, { "epoch": 76.65909736020437, "grad_norm": 0.06146597862243652, "learning_rate": 2.3371558330967926e-05, "loss": 0.00010231882333755493, "step": 270070 }, { "epoch": 76.66193585012773, "grad_norm": 0.005994785111397505, "learning_rate": 2.3368719841044564e-05, "loss": 9.620692580938339e-05, "step": 270080 }, { "epoch": 76.6647743400511, "grad_norm": 1.115889072418213, "learning_rate": 2.3365881351121205e-05, "loss": 0.00027184989303350446, "step": 270090 }, { "epoch": 76.66761282997446, "grad_norm": 0.0021441972348839045, "learning_rate": 2.3363042861197847e-05, "loss": 0.0002328813076019287, "step": 270100 }, { "epoch": 76.67045131989781, "grad_norm": 0.0036801656242460012, "learning_rate": 2.336020437127448e-05, "loss": 3.378205001354217e-05, "step": 270110 }, { "epoch": 76.67328980982117, "grad_norm": 0.14438225328922272, "learning_rate": 2.3357365881351123e-05, "loss": 0.00024438723921775817, "step": 270120 }, { "epoch": 76.67612829974453, "grad_norm": 0.007450880017131567, "learning_rate": 2.335452739142776e-05, "loss": 0.0002497546374797821, "step": 270130 }, { "epoch": 76.6789667896679, "grad_norm": 0.001524748164229095, "learning_rate": 2.3351688901504402e-05, "loss": 0.00026070307940244674, "step": 270140 }, { "epoch": 76.68180527959126, "grad_norm": 0.03332792595028877, "learning_rate": 2.334885041158104e-05, "loss": 9.344182908535003e-05, "step": 270150 }, { "epoch": 76.68464376951462, "grad_norm": 0.1495731920003891, "learning_rate": 2.3346011921657678e-05, "loss": 5.938950926065445e-05, "step": 270160 }, { "epoch": 76.68748225943799, "grad_norm": 0.015841854736208916, "learning_rate": 2.334317343173432e-05, "loss": 0.00010065045207738877, "step": 270170 }, { "epoch": 76.69032074936133, "grad_norm": 0.03634214028716087, "learning_rate": 2.3340334941810957e-05, "loss": 0.006727471947669983, "step": 270180 }, { "epoch": 76.6931592392847, "grad_norm": 0.004625824745744467, "learning_rate": 2.33374964518876e-05, "loss": 0.00015559550374746322, "step": 270190 }, { "epoch": 76.69599772920806, "grad_norm": 0.004389577079564333, "learning_rate": 2.3334657961964237e-05, "loss": 4.784055054187775e-05, "step": 270200 }, { "epoch": 76.69883621913142, "grad_norm": 0.09772299230098724, "learning_rate": 2.3331819472040875e-05, "loss": 4.45585697889328e-05, "step": 270210 }, { "epoch": 76.70167470905479, "grad_norm": 0.02380155585706234, "learning_rate": 2.3328980982117516e-05, "loss": 6.588660180568695e-05, "step": 270220 }, { "epoch": 76.70451319897815, "grad_norm": 0.037297479808330536, "learning_rate": 2.3326142492194154e-05, "loss": 0.0010618282482028007, "step": 270230 }, { "epoch": 76.70735168890151, "grad_norm": 0.0013517679180949926, "learning_rate": 2.3323304002270792e-05, "loss": 8.91704112291336e-05, "step": 270240 }, { "epoch": 76.71019017882486, "grad_norm": 0.006368220783770084, "learning_rate": 2.3320465512347433e-05, "loss": 0.0001360587775707245, "step": 270250 }, { "epoch": 76.71302866874822, "grad_norm": 0.0008514040382578969, "learning_rate": 2.331762702242407e-05, "loss": 2.8142519295215606e-05, "step": 270260 }, { "epoch": 76.71586715867159, "grad_norm": 0.007439995650202036, "learning_rate": 2.3314788532500713e-05, "loss": 5.6134164333343505e-05, "step": 270270 }, { "epoch": 76.71870564859495, "grad_norm": 0.006227471865713596, "learning_rate": 2.3311950042577347e-05, "loss": 0.0002314327284693718, "step": 270280 }, { "epoch": 76.72154413851831, "grad_norm": 0.009318776428699493, "learning_rate": 2.330911155265399e-05, "loss": 6.123613566160202e-05, "step": 270290 }, { "epoch": 76.72438262844167, "grad_norm": 0.003184034489095211, "learning_rate": 2.330627306273063e-05, "loss": 2.9526837170124055e-05, "step": 270300 }, { "epoch": 76.72722111836502, "grad_norm": 0.01272000465542078, "learning_rate": 2.3303434572807268e-05, "loss": 4.011373966932297e-05, "step": 270310 }, { "epoch": 76.73005960828839, "grad_norm": 0.004122544080018997, "learning_rate": 2.3300596082883906e-05, "loss": 8.81873071193695e-05, "step": 270320 }, { "epoch": 76.73289809821175, "grad_norm": 0.004456841386854649, "learning_rate": 2.3297757592960544e-05, "loss": 1.7260201275348663e-05, "step": 270330 }, { "epoch": 76.73573658813511, "grad_norm": 0.005786072928458452, "learning_rate": 2.3294919103037185e-05, "loss": 1.873578876256943e-05, "step": 270340 }, { "epoch": 76.73857507805847, "grad_norm": 0.0007761708693578839, "learning_rate": 2.3292080613113827e-05, "loss": 2.3893825709819793e-05, "step": 270350 }, { "epoch": 76.74141356798184, "grad_norm": 0.00865889247506857, "learning_rate": 2.3289242123190465e-05, "loss": 2.3039989173412323e-05, "step": 270360 }, { "epoch": 76.7442520579052, "grad_norm": 0.07270244508981705, "learning_rate": 2.3286403633267103e-05, "loss": 4.718881100416183e-05, "step": 270370 }, { "epoch": 76.74709054782855, "grad_norm": 0.014721941202878952, "learning_rate": 2.328356514334374e-05, "loss": 4.7176145017147064e-05, "step": 270380 }, { "epoch": 76.74992903775191, "grad_norm": 0.03389611467719078, "learning_rate": 2.3280726653420382e-05, "loss": 2.9390305280685423e-05, "step": 270390 }, { "epoch": 76.75276752767527, "grad_norm": 0.009802393615245819, "learning_rate": 2.3277888163497024e-05, "loss": 2.8472952544689177e-05, "step": 270400 }, { "epoch": 76.75560601759864, "grad_norm": 0.00936646107584238, "learning_rate": 2.3275049673573658e-05, "loss": 3.4607574343681334e-05, "step": 270410 }, { "epoch": 76.758444507522, "grad_norm": 0.00397148821502924, "learning_rate": 2.32722111836503e-05, "loss": 1.7768330872058867e-05, "step": 270420 }, { "epoch": 76.76128299744536, "grad_norm": 0.008760307915508747, "learning_rate": 2.3269372693726938e-05, "loss": 3.0878931283950806e-05, "step": 270430 }, { "epoch": 76.76412148736873, "grad_norm": 0.02730792947113514, "learning_rate": 2.326653420380358e-05, "loss": 2.058204263448715e-05, "step": 270440 }, { "epoch": 76.76695997729207, "grad_norm": 0.002840672852471471, "learning_rate": 2.3263695713880217e-05, "loss": 2.444963902235031e-05, "step": 270450 }, { "epoch": 76.76979846721544, "grad_norm": 0.01578807458281517, "learning_rate": 2.3260857223956855e-05, "loss": 2.0538456737995148e-05, "step": 270460 }, { "epoch": 76.7726369571388, "grad_norm": 0.13849836587905884, "learning_rate": 2.3258018734033496e-05, "loss": 4.5649707317352295e-05, "step": 270470 }, { "epoch": 76.77547544706216, "grad_norm": 0.0020882238168269396, "learning_rate": 2.3255180244110134e-05, "loss": 2.5964900851249693e-05, "step": 270480 }, { "epoch": 76.77831393698553, "grad_norm": 0.0018106857314705849, "learning_rate": 2.3252341754186772e-05, "loss": 1.911036670207977e-05, "step": 270490 }, { "epoch": 76.78115242690889, "grad_norm": 0.0032113417983055115, "learning_rate": 2.3249503264263414e-05, "loss": 4.702638834714889e-05, "step": 270500 }, { "epoch": 76.78115242690889, "eval_accuracy": 0.9857569784447129, "eval_loss": 0.05564150586724281, "eval_runtime": 35.7831, "eval_samples_per_second": 439.509, "eval_steps_per_second": 6.875, "step": 270500 }, { "epoch": 76.78399091683225, "grad_norm": 0.006065260618925095, "learning_rate": 2.324666477434005e-05, "loss": 4.050135612487793e-05, "step": 270510 }, { "epoch": 76.7868294067556, "grad_norm": 0.0016499076737090945, "learning_rate": 2.3243826284416693e-05, "loss": 9.202957153320313e-06, "step": 270520 }, { "epoch": 76.78966789667896, "grad_norm": 0.000997760915197432, "learning_rate": 2.324098779449333e-05, "loss": 2.55681574344635e-05, "step": 270530 }, { "epoch": 76.79250638660233, "grad_norm": 0.006220439914613962, "learning_rate": 2.323814930456997e-05, "loss": 5.886200815439224e-05, "step": 270540 }, { "epoch": 76.79534487652569, "grad_norm": 0.011583272367715836, "learning_rate": 2.323531081464661e-05, "loss": 5.126390606164932e-05, "step": 270550 }, { "epoch": 76.79818336644905, "grad_norm": 0.08728073537349701, "learning_rate": 2.3232472324723248e-05, "loss": 3.810338675975799e-05, "step": 270560 }, { "epoch": 76.80102185637242, "grad_norm": 0.00240108254365623, "learning_rate": 2.322963383479989e-05, "loss": 2.4735182523727418e-05, "step": 270570 }, { "epoch": 76.80386034629576, "grad_norm": 0.0028189413715153933, "learning_rate": 2.3226795344876524e-05, "loss": 3.11337411403656e-05, "step": 270580 }, { "epoch": 76.80669883621913, "grad_norm": 0.008630488067865372, "learning_rate": 2.3223956854953166e-05, "loss": 1.792609691619873e-05, "step": 270590 }, { "epoch": 76.80953732614249, "grad_norm": 0.002580889267846942, "learning_rate": 2.3221118365029807e-05, "loss": 2.886597067117691e-05, "step": 270600 }, { "epoch": 76.81237581606585, "grad_norm": 0.001767507172189653, "learning_rate": 2.3218279875106445e-05, "loss": 9.17898491024971e-05, "step": 270610 }, { "epoch": 76.81521430598922, "grad_norm": 0.0018888760823756456, "learning_rate": 2.3215441385183083e-05, "loss": 7.016398012638092e-05, "step": 270620 }, { "epoch": 76.81805279591258, "grad_norm": 0.004399670287966728, "learning_rate": 2.321260289525972e-05, "loss": 0.00011311396956443786, "step": 270630 }, { "epoch": 76.82089128583594, "grad_norm": 0.24823029339313507, "learning_rate": 2.3209764405336362e-05, "loss": 5.929470062255859e-05, "step": 270640 }, { "epoch": 76.82372977575929, "grad_norm": 0.05454858019948006, "learning_rate": 2.3206925915413004e-05, "loss": 6.604120135307312e-05, "step": 270650 }, { "epoch": 76.82656826568265, "grad_norm": 0.0012485120678320527, "learning_rate": 2.3204087425489642e-05, "loss": 3.303084522485733e-05, "step": 270660 }, { "epoch": 76.82940675560602, "grad_norm": 0.004843070171773434, "learning_rate": 2.320124893556628e-05, "loss": 3.832504153251648e-05, "step": 270670 }, { "epoch": 76.83224524552938, "grad_norm": 0.00534519599750638, "learning_rate": 2.3198410445642918e-05, "loss": 0.0002261437475681305, "step": 270680 }, { "epoch": 76.83508373545274, "grad_norm": 0.022299710661172867, "learning_rate": 2.319557195571956e-05, "loss": 7.056277245283127e-05, "step": 270690 }, { "epoch": 76.8379222253761, "grad_norm": 0.006786315701901913, "learning_rate": 2.3192733465796197e-05, "loss": 0.0001463158056139946, "step": 270700 }, { "epoch": 76.84076071529947, "grad_norm": 0.0014686709037050605, "learning_rate": 2.3189894975872835e-05, "loss": 8.02328810095787e-05, "step": 270710 }, { "epoch": 76.84359920522282, "grad_norm": 0.00430584279820323, "learning_rate": 2.3187056485949476e-05, "loss": 3.4564174711704254e-05, "step": 270720 }, { "epoch": 76.84643769514618, "grad_norm": 0.0010409972164779902, "learning_rate": 2.3184217996026114e-05, "loss": 4.428848624229431e-05, "step": 270730 }, { "epoch": 76.84927618506954, "grad_norm": 0.017642877995967865, "learning_rate": 2.3181379506102756e-05, "loss": 0.00013948250561952592, "step": 270740 }, { "epoch": 76.8521146749929, "grad_norm": 0.0014042125549167395, "learning_rate": 2.3178541016179394e-05, "loss": 7.176734507083893e-05, "step": 270750 }, { "epoch": 76.85495316491627, "grad_norm": 0.0032876520417630672, "learning_rate": 2.3175702526256032e-05, "loss": 2.3012608289718627e-05, "step": 270760 }, { "epoch": 76.85779165483963, "grad_norm": 0.16292637586593628, "learning_rate": 2.3172864036332673e-05, "loss": 7.86392018198967e-05, "step": 270770 }, { "epoch": 76.86063014476298, "grad_norm": 0.003482020692899823, "learning_rate": 2.317002554640931e-05, "loss": 2.6262551546096802e-05, "step": 270780 }, { "epoch": 76.86346863468634, "grad_norm": 0.010358206927776337, "learning_rate": 2.316718705648595e-05, "loss": 2.3704953491687775e-05, "step": 270790 }, { "epoch": 76.8663071246097, "grad_norm": 0.013776988722383976, "learning_rate": 2.316434856656259e-05, "loss": 5.7524070143699646e-05, "step": 270800 }, { "epoch": 76.86914561453307, "grad_norm": 0.013554815202951431, "learning_rate": 2.316151007663923e-05, "loss": 1.3650581240653992e-05, "step": 270810 }, { "epoch": 76.87198410445643, "grad_norm": 0.003023760858923197, "learning_rate": 2.315867158671587e-05, "loss": 1.2156181037425995e-05, "step": 270820 }, { "epoch": 76.8748225943798, "grad_norm": 0.007847055792808533, "learning_rate": 2.3155833096792508e-05, "loss": 2.1587871015071868e-05, "step": 270830 }, { "epoch": 76.87766108430316, "grad_norm": 0.00238964450545609, "learning_rate": 2.3152994606869146e-05, "loss": 2.926662564277649e-05, "step": 270840 }, { "epoch": 76.8804995742265, "grad_norm": 0.0006596436141990125, "learning_rate": 2.3150156116945787e-05, "loss": 2.6817992329597472e-05, "step": 270850 }, { "epoch": 76.88333806414987, "grad_norm": 0.0016395221464335918, "learning_rate": 2.3147317627022425e-05, "loss": 1.2875162065029145e-05, "step": 270860 }, { "epoch": 76.88617655407323, "grad_norm": 0.01053901482373476, "learning_rate": 2.3144479137099067e-05, "loss": 2.7552060782909393e-05, "step": 270870 }, { "epoch": 76.8890150439966, "grad_norm": 0.001564144273288548, "learning_rate": 2.31416406471757e-05, "loss": 5.437508225440979e-05, "step": 270880 }, { "epoch": 76.89185353391996, "grad_norm": 0.00824740156531334, "learning_rate": 2.3138802157252343e-05, "loss": 1.8023140728473664e-05, "step": 270890 }, { "epoch": 76.89469202384332, "grad_norm": 0.0020655791740864515, "learning_rate": 2.3135963667328984e-05, "loss": 1.107398420572281e-05, "step": 270900 }, { "epoch": 76.89753051376668, "grad_norm": 0.0014228217769414186, "learning_rate": 2.3133125177405622e-05, "loss": 1.8283352255821227e-05, "step": 270910 }, { "epoch": 76.90036900369003, "grad_norm": 0.24212680757045746, "learning_rate": 2.313028668748226e-05, "loss": 0.0006783587858080864, "step": 270920 }, { "epoch": 76.9032074936134, "grad_norm": 0.020447537302970886, "learning_rate": 2.3127448197558898e-05, "loss": 0.0033985212445259093, "step": 270930 }, { "epoch": 76.90604598353676, "grad_norm": 0.5111569166183472, "learning_rate": 2.312460970763554e-05, "loss": 0.00029075182974338534, "step": 270940 }, { "epoch": 76.90888447346012, "grad_norm": 0.022775832563638687, "learning_rate": 2.312177121771218e-05, "loss": 0.0002206442877650261, "step": 270950 }, { "epoch": 76.91172296338348, "grad_norm": 0.012978332117199898, "learning_rate": 2.3118932727788815e-05, "loss": 0.00025996938347816467, "step": 270960 }, { "epoch": 76.91456145330685, "grad_norm": 0.003016156842932105, "learning_rate": 2.3116094237865457e-05, "loss": 8.763447403907776e-05, "step": 270970 }, { "epoch": 76.91739994323021, "grad_norm": 0.0037071581464260817, "learning_rate": 2.3113255747942095e-05, "loss": 0.0005134554579854011, "step": 270980 }, { "epoch": 76.92023843315356, "grad_norm": 0.14160890877246857, "learning_rate": 2.3110417258018736e-05, "loss": 3.939960151910782e-05, "step": 270990 }, { "epoch": 76.92307692307692, "grad_norm": 0.0031731894705444574, "learning_rate": 2.3107578768095374e-05, "loss": 9.807907044887543e-05, "step": 271000 }, { "epoch": 76.92307692307692, "eval_accuracy": 0.9855662236917403, "eval_loss": 0.05998048186302185, "eval_runtime": 36.0771, "eval_samples_per_second": 435.927, "eval_steps_per_second": 6.819, "step": 271000 }, { "epoch": 76.92591541300028, "grad_norm": 0.0023258384317159653, "learning_rate": 2.3104740278172012e-05, "loss": 4.538111388683319e-05, "step": 271010 }, { "epoch": 76.92875390292365, "grad_norm": 0.001016818918287754, "learning_rate": 2.3101901788248653e-05, "loss": 0.0002111166715621948, "step": 271020 }, { "epoch": 76.93159239284701, "grad_norm": 0.006670498754829168, "learning_rate": 2.309906329832529e-05, "loss": 0.0001610688865184784, "step": 271030 }, { "epoch": 76.93443088277037, "grad_norm": 0.004318575374782085, "learning_rate": 2.3096224808401933e-05, "loss": 6.369892507791519e-05, "step": 271040 }, { "epoch": 76.93726937269372, "grad_norm": 0.002076472621411085, "learning_rate": 2.309338631847857e-05, "loss": 0.00016834046691656114, "step": 271050 }, { "epoch": 76.94010786261708, "grad_norm": 0.0015107074286788702, "learning_rate": 2.309054782855521e-05, "loss": 4.437994211912155e-05, "step": 271060 }, { "epoch": 76.94294635254045, "grad_norm": 0.015102040022611618, "learning_rate": 2.308770933863185e-05, "loss": 0.00013199225068092347, "step": 271070 }, { "epoch": 76.94578484246381, "grad_norm": 0.0032620306592434645, "learning_rate": 2.3084870848708488e-05, "loss": 4.998072981834412e-05, "step": 271080 }, { "epoch": 76.94862333238717, "grad_norm": 0.00209617312066257, "learning_rate": 2.3082032358785126e-05, "loss": 7.739048451185226e-05, "step": 271090 }, { "epoch": 76.95146182231053, "grad_norm": 0.0020146905444562435, "learning_rate": 2.3079193868861767e-05, "loss": 0.0001624109223484993, "step": 271100 }, { "epoch": 76.9543003122339, "grad_norm": 0.01178759429603815, "learning_rate": 2.3076355378938405e-05, "loss": 0.00011615529656410218, "step": 271110 }, { "epoch": 76.95713880215725, "grad_norm": 0.001495665404945612, "learning_rate": 2.3073516889015047e-05, "loss": 5.74721023440361e-05, "step": 271120 }, { "epoch": 76.95997729208061, "grad_norm": 0.0014498558593913913, "learning_rate": 2.3070678399091685e-05, "loss": 7.799733430147171e-05, "step": 271130 }, { "epoch": 76.96281578200397, "grad_norm": 0.010511750355362892, "learning_rate": 2.3067839909168323e-05, "loss": 0.0009009508416056633, "step": 271140 }, { "epoch": 76.96565427192733, "grad_norm": 0.00867635291069746, "learning_rate": 2.3065001419244964e-05, "loss": 5.749184638261795e-05, "step": 271150 }, { "epoch": 76.9684927618507, "grad_norm": 0.0008338121115230024, "learning_rate": 2.3062162929321602e-05, "loss": 7.438752800226212e-05, "step": 271160 }, { "epoch": 76.97133125177406, "grad_norm": 0.015339458361268044, "learning_rate": 2.305932443939824e-05, "loss": 2.729389816522598e-05, "step": 271170 }, { "epoch": 76.97416974169742, "grad_norm": 0.0023641297593712807, "learning_rate": 2.3056485949474878e-05, "loss": 4.2175129055976865e-05, "step": 271180 }, { "epoch": 76.97700823162077, "grad_norm": 0.004863742273300886, "learning_rate": 2.305364745955152e-05, "loss": 3.612637519836426e-05, "step": 271190 }, { "epoch": 76.97984672154413, "grad_norm": 0.002665505977347493, "learning_rate": 2.305080896962816e-05, "loss": 2.471320331096649e-05, "step": 271200 }, { "epoch": 76.9826852114675, "grad_norm": 0.005253246985375881, "learning_rate": 2.30479704797048e-05, "loss": 1.2711621820926666e-05, "step": 271210 }, { "epoch": 76.98552370139086, "grad_norm": 0.00897938758134842, "learning_rate": 2.3045131989781437e-05, "loss": 2.0558014512062073e-05, "step": 271220 }, { "epoch": 76.98836219131422, "grad_norm": 0.00353022082708776, "learning_rate": 2.3042293499858075e-05, "loss": 0.000479867123067379, "step": 271230 }, { "epoch": 76.99120068123759, "grad_norm": 0.017836060374975204, "learning_rate": 2.3039455009934716e-05, "loss": 0.002111095190048218, "step": 271240 }, { "epoch": 76.99403917116095, "grad_norm": 0.003464785870164633, "learning_rate": 2.3036616520011357e-05, "loss": 0.00018092021346092225, "step": 271250 }, { "epoch": 76.9968776610843, "grad_norm": 0.0009151760605163872, "learning_rate": 2.303406187908033e-05, "loss": 0.0033032864332199095, "step": 271260 }, { "epoch": 76.99971615100766, "grad_norm": 0.003737372811883688, "learning_rate": 2.303122338915697e-05, "loss": 0.0004908459261059761, "step": 271270 }, { "epoch": 77.00255464093102, "grad_norm": 0.0033489603083580732, "learning_rate": 2.302838489923361e-05, "loss": 4.849884426221252e-05, "step": 271280 }, { "epoch": 77.00539313085439, "grad_norm": 0.2469228059053421, "learning_rate": 2.3025546409310248e-05, "loss": 8.486341685056686e-05, "step": 271290 }, { "epoch": 77.00823162077775, "grad_norm": 0.0013637954834848642, "learning_rate": 2.302270791938689e-05, "loss": 0.00026959329843521116, "step": 271300 }, { "epoch": 77.01107011070111, "grad_norm": 0.003910786937922239, "learning_rate": 2.3019869429463524e-05, "loss": 9.08493995666504e-05, "step": 271310 }, { "epoch": 77.01390860062446, "grad_norm": 0.0028212727047502995, "learning_rate": 2.3017030939540165e-05, "loss": 8.238647133111954e-05, "step": 271320 }, { "epoch": 77.01674709054782, "grad_norm": 0.04542429372668266, "learning_rate": 2.3014192449616807e-05, "loss": 3.860257565975189e-05, "step": 271330 }, { "epoch": 77.01958558047119, "grad_norm": 0.0012781417462974787, "learning_rate": 2.3011353959693445e-05, "loss": 5.491189658641815e-05, "step": 271340 }, { "epoch": 77.02242407039455, "grad_norm": 0.06655097007751465, "learning_rate": 2.3008515469770083e-05, "loss": 2.8625503182411195e-05, "step": 271350 }, { "epoch": 77.02526256031791, "grad_norm": 0.008395766839385033, "learning_rate": 2.300567697984672e-05, "loss": 1.802220940589905e-05, "step": 271360 }, { "epoch": 77.02810105024128, "grad_norm": 0.0019389069639146328, "learning_rate": 2.3002838489923362e-05, "loss": 2.121254801750183e-05, "step": 271370 }, { "epoch": 77.03093954016464, "grad_norm": 0.01864500716328621, "learning_rate": 2.3000000000000003e-05, "loss": 5.2502378821372986e-05, "step": 271380 }, { "epoch": 77.03377803008799, "grad_norm": 0.006939888000488281, "learning_rate": 2.2997161510076638e-05, "loss": 3.193523734807968e-05, "step": 271390 }, { "epoch": 77.03661652001135, "grad_norm": 0.004349410068243742, "learning_rate": 2.299432302015328e-05, "loss": 4.631653428077698e-05, "step": 271400 }, { "epoch": 77.03945500993471, "grad_norm": 0.011109918355941772, "learning_rate": 2.2991484530229917e-05, "loss": 3.814306110143662e-05, "step": 271410 }, { "epoch": 77.04229349985808, "grad_norm": 0.0019221880938857794, "learning_rate": 2.298864604030656e-05, "loss": 8.37668776512146e-05, "step": 271420 }, { "epoch": 77.04513198978144, "grad_norm": 0.0010091941803693771, "learning_rate": 2.2985807550383197e-05, "loss": 3.1787529587745665e-05, "step": 271430 }, { "epoch": 77.0479704797048, "grad_norm": 0.0013634538045153022, "learning_rate": 2.2982969060459835e-05, "loss": 1.5617161989212035e-05, "step": 271440 }, { "epoch": 77.05080896962816, "grad_norm": 0.0017740955809131265, "learning_rate": 2.2980130570536476e-05, "loss": 3.18136066198349e-05, "step": 271450 }, { "epoch": 77.05364745955151, "grad_norm": 0.0013245795853435993, "learning_rate": 2.2977292080613114e-05, "loss": 5.5992230772972104e-05, "step": 271460 }, { "epoch": 77.05648594947488, "grad_norm": 0.002513387007638812, "learning_rate": 2.2974453590689755e-05, "loss": 9.003523737192154e-05, "step": 271470 }, { "epoch": 77.05932443939824, "grad_norm": 0.1312398910522461, "learning_rate": 2.2971615100766393e-05, "loss": 0.00020090322941541673, "step": 271480 }, { "epoch": 77.0621629293216, "grad_norm": 0.006904202979058027, "learning_rate": 2.296877661084303e-05, "loss": 3.727506846189499e-05, "step": 271490 }, { "epoch": 77.06500141924496, "grad_norm": 0.0501374825835228, "learning_rate": 2.2965938120919673e-05, "loss": 2.887081354856491e-05, "step": 271500 }, { "epoch": 77.06500141924496, "eval_accuracy": 0.9844852800915623, "eval_loss": 0.06490615010261536, "eval_runtime": 35.3696, "eval_samples_per_second": 444.648, "eval_steps_per_second": 6.955, "step": 271500 }, { "epoch": 77.06783990916833, "grad_norm": 0.005145286209881306, "learning_rate": 2.296309963099631e-05, "loss": 0.0009053802117705345, "step": 271510 }, { "epoch": 77.07067839909168, "grad_norm": 0.058292124420404434, "learning_rate": 2.296026114107295e-05, "loss": 4.922784864902496e-05, "step": 271520 }, { "epoch": 77.07351688901504, "grad_norm": 0.002868081210181117, "learning_rate": 2.295742265114959e-05, "loss": 0.0004941048100590706, "step": 271530 }, { "epoch": 77.0763553789384, "grad_norm": 2.8403773307800293, "learning_rate": 2.2954584161226228e-05, "loss": 0.003983336687088013, "step": 271540 }, { "epoch": 77.07919386886176, "grad_norm": 0.0051037766970694065, "learning_rate": 2.295174567130287e-05, "loss": 0.003900972381234169, "step": 271550 }, { "epoch": 77.08203235878513, "grad_norm": 3.255206823348999, "learning_rate": 2.2948907181379507e-05, "loss": 0.001240421086549759, "step": 271560 }, { "epoch": 77.08487084870849, "grad_norm": 0.09566228836774826, "learning_rate": 2.2946068691456145e-05, "loss": 0.002630399912595749, "step": 271570 }, { "epoch": 77.08770933863185, "grad_norm": 0.7381330728530884, "learning_rate": 2.2943514050525122e-05, "loss": 0.0040235757827758786, "step": 271580 }, { "epoch": 77.0905478285552, "grad_norm": 0.7073355317115784, "learning_rate": 2.294067556060176e-05, "loss": 0.00019149500876665116, "step": 271590 }, { "epoch": 77.09338631847857, "grad_norm": 0.004766655154526234, "learning_rate": 2.29378370706784e-05, "loss": 9.679961949586869e-05, "step": 271600 }, { "epoch": 77.09622480840193, "grad_norm": 9.676045417785645, "learning_rate": 2.293499858075504e-05, "loss": 0.0014627592638134957, "step": 271610 }, { "epoch": 77.09906329832529, "grad_norm": 0.005013293121010065, "learning_rate": 2.2932160090831677e-05, "loss": 4.195161163806915e-05, "step": 271620 }, { "epoch": 77.10190178824865, "grad_norm": 0.020997457206249237, "learning_rate": 2.292932160090832e-05, "loss": 0.0003977013751864433, "step": 271630 }, { "epoch": 77.10474027817202, "grad_norm": 0.012420403771102428, "learning_rate": 2.2926483110984957e-05, "loss": 2.6918575167655943e-05, "step": 271640 }, { "epoch": 77.10757876809538, "grad_norm": 0.04321796074509621, "learning_rate": 2.2923644621061595e-05, "loss": 6.199702620506287e-05, "step": 271650 }, { "epoch": 77.11041725801873, "grad_norm": 0.009433507919311523, "learning_rate": 2.2920806131138236e-05, "loss": 0.0001010676845908165, "step": 271660 }, { "epoch": 77.11325574794209, "grad_norm": 0.006939398590475321, "learning_rate": 2.2917967641214874e-05, "loss": 2.6968494057655335e-05, "step": 271670 }, { "epoch": 77.11609423786545, "grad_norm": 0.002205867087468505, "learning_rate": 2.2915129151291515e-05, "loss": 6.597265601158143e-05, "step": 271680 }, { "epoch": 77.11893272778882, "grad_norm": 0.002501808339729905, "learning_rate": 2.2912290661368153e-05, "loss": 3.6679580807685853e-05, "step": 271690 }, { "epoch": 77.12177121771218, "grad_norm": 0.009525882080197334, "learning_rate": 2.290945217144479e-05, "loss": 5.557239055633545e-05, "step": 271700 }, { "epoch": 77.12460970763554, "grad_norm": 0.010849544778466225, "learning_rate": 2.2906613681521433e-05, "loss": 2.0115822553634644e-05, "step": 271710 }, { "epoch": 77.1274481975589, "grad_norm": 4.641879081726074, "learning_rate": 2.290377519159807e-05, "loss": 0.0014257820323109627, "step": 271720 }, { "epoch": 77.13028668748225, "grad_norm": 2.532846689224243, "learning_rate": 2.2900936701674712e-05, "loss": 0.0003125492483377457, "step": 271730 }, { "epoch": 77.13312517740562, "grad_norm": 1.458115816116333, "learning_rate": 2.2898098211751347e-05, "loss": 0.00023749787360429763, "step": 271740 }, { "epoch": 77.13596366732898, "grad_norm": 0.030530860647559166, "learning_rate": 2.2895259721827988e-05, "loss": 0.00012552943080663682, "step": 271750 }, { "epoch": 77.13880215725234, "grad_norm": 0.017234142869710922, "learning_rate": 2.289242123190463e-05, "loss": 8.973758667707443e-05, "step": 271760 }, { "epoch": 77.1416406471757, "grad_norm": 0.0029433006420731544, "learning_rate": 2.2889582741981268e-05, "loss": 0.000347677618265152, "step": 271770 }, { "epoch": 77.14447913709907, "grad_norm": 0.013677381910383701, "learning_rate": 2.2886744252057905e-05, "loss": 0.002315747179090977, "step": 271780 }, { "epoch": 77.14731762702242, "grad_norm": 0.6045483350753784, "learning_rate": 2.2883905762134543e-05, "loss": 0.0014623042196035385, "step": 271790 }, { "epoch": 77.15015611694578, "grad_norm": 5.199056625366211, "learning_rate": 2.2881067272211185e-05, "loss": 0.0007514428347349167, "step": 271800 }, { "epoch": 77.15299460686914, "grad_norm": 0.02566665969789028, "learning_rate": 2.2878228782287826e-05, "loss": 6.0366839170455934e-05, "step": 271810 }, { "epoch": 77.1558330967925, "grad_norm": 0.041058674454689026, "learning_rate": 2.287539029236446e-05, "loss": 0.0001042831689119339, "step": 271820 }, { "epoch": 77.15867158671587, "grad_norm": 0.010448617860674858, "learning_rate": 2.2872551802441102e-05, "loss": 0.00023144781589508058, "step": 271830 }, { "epoch": 77.16151007663923, "grad_norm": 0.0021792124025523663, "learning_rate": 2.286971331251774e-05, "loss": 3.343243151903153e-05, "step": 271840 }, { "epoch": 77.1643485665626, "grad_norm": 0.7143957018852234, "learning_rate": 2.286687482259438e-05, "loss": 0.004753376543521881, "step": 271850 }, { "epoch": 77.16718705648594, "grad_norm": 0.0019470860715955496, "learning_rate": 2.286403633267102e-05, "loss": 0.00011851582676172256, "step": 271860 }, { "epoch": 77.1700255464093, "grad_norm": 0.014058203436434269, "learning_rate": 2.2861197842747658e-05, "loss": 0.00054401233792305, "step": 271870 }, { "epoch": 77.17286403633267, "grad_norm": 0.04019547253847122, "learning_rate": 2.28583593528243e-05, "loss": 5.30412420630455e-05, "step": 271880 }, { "epoch": 77.17570252625603, "grad_norm": 0.012020103633403778, "learning_rate": 2.2855520862900937e-05, "loss": 1.8435530364513397e-05, "step": 271890 }, { "epoch": 77.1785410161794, "grad_norm": 0.030467044562101364, "learning_rate": 2.2852682372977578e-05, "loss": 4.8502534627914426e-05, "step": 271900 }, { "epoch": 77.18137950610276, "grad_norm": 0.0009320618701167405, "learning_rate": 2.2849843883054216e-05, "loss": 5.549117922782898e-05, "step": 271910 }, { "epoch": 77.18421799602612, "grad_norm": 0.015466323122382164, "learning_rate": 2.2847005393130854e-05, "loss": 8.216947317123413e-05, "step": 271920 }, { "epoch": 77.18705648594947, "grad_norm": 0.0037861098535358906, "learning_rate": 2.2844166903207496e-05, "loss": 2.620033919811249e-05, "step": 271930 }, { "epoch": 77.18989497587283, "grad_norm": 0.001067622099071741, "learning_rate": 2.2841328413284134e-05, "loss": 2.4254806339740755e-05, "step": 271940 }, { "epoch": 77.1927334657962, "grad_norm": 0.021581295877695084, "learning_rate": 2.283848992336077e-05, "loss": 3.196634352207184e-05, "step": 271950 }, { "epoch": 77.19557195571956, "grad_norm": 0.0068866354413330555, "learning_rate": 2.2835651433437413e-05, "loss": 3.434810787439346e-05, "step": 271960 }, { "epoch": 77.19841044564292, "grad_norm": 0.001026597456075251, "learning_rate": 2.283281294351405e-05, "loss": 3.0295364558696748e-05, "step": 271970 }, { "epoch": 77.20124893556628, "grad_norm": 0.004064358305186033, "learning_rate": 2.2829974453590692e-05, "loss": 0.0003274831920862198, "step": 271980 }, { "epoch": 77.20408742548965, "grad_norm": 0.012442789040505886, "learning_rate": 2.282713596366733e-05, "loss": 6.74733892083168e-05, "step": 271990 }, { "epoch": 77.206925915413, "grad_norm": 0.0013142561074346304, "learning_rate": 2.2824297473743968e-05, "loss": 6.183311343193054e-05, "step": 272000 }, { "epoch": 77.206925915413, "eval_accuracy": 0.9856933935270554, "eval_loss": 0.060780491679906845, "eval_runtime": 35.7222, "eval_samples_per_second": 440.258, "eval_steps_per_second": 6.886, "step": 272000 }, { "epoch": 77.20976440533636, "grad_norm": 0.043037015944719315, "learning_rate": 2.282145898382061e-05, "loss": 0.00012730453163385392, "step": 272010 }, { "epoch": 77.21260289525972, "grad_norm": 0.06244143098592758, "learning_rate": 2.2818620493897248e-05, "loss": 5.730707198381424e-05, "step": 272020 }, { "epoch": 77.21544138518308, "grad_norm": 0.0060256896540522575, "learning_rate": 2.2815782003973886e-05, "loss": 2.464279532432556e-05, "step": 272030 }, { "epoch": 77.21827987510645, "grad_norm": 0.02057739533483982, "learning_rate": 2.2812943514050524e-05, "loss": 9.567271918058396e-05, "step": 272040 }, { "epoch": 77.22111836502981, "grad_norm": 0.06987816095352173, "learning_rate": 2.2810105024127165e-05, "loss": 3.2641924917697906e-05, "step": 272050 }, { "epoch": 77.22395685495316, "grad_norm": 0.011946449987590313, "learning_rate": 2.2807266534203806e-05, "loss": 9.934250265359878e-05, "step": 272060 }, { "epoch": 77.22679534487652, "grad_norm": 0.005356386303901672, "learning_rate": 2.2804428044280444e-05, "loss": 2.7965940535068513e-05, "step": 272070 }, { "epoch": 77.22963383479988, "grad_norm": 0.0013684754958376288, "learning_rate": 2.2801589554357082e-05, "loss": 1.3879500329494476e-05, "step": 272080 }, { "epoch": 77.23247232472325, "grad_norm": 0.0013025365769863129, "learning_rate": 2.279875106443372e-05, "loss": 5.533546209335327e-05, "step": 272090 }, { "epoch": 77.23531081464661, "grad_norm": 0.005679137073457241, "learning_rate": 2.2795912574510362e-05, "loss": 8.270516991615295e-05, "step": 272100 }, { "epoch": 77.23814930456997, "grad_norm": 0.0030788923613727093, "learning_rate": 2.2793074084587003e-05, "loss": 4.413202404975891e-05, "step": 272110 }, { "epoch": 77.24098779449334, "grad_norm": 0.0016606138087809086, "learning_rate": 2.2790235594663638e-05, "loss": 3.211162984371185e-05, "step": 272120 }, { "epoch": 77.24382628441668, "grad_norm": 0.00246390700340271, "learning_rate": 2.278739710474028e-05, "loss": 1.1678040027618409e-05, "step": 272130 }, { "epoch": 77.24666477434005, "grad_norm": 0.03906668350100517, "learning_rate": 2.2784558614816917e-05, "loss": 3.501400351524353e-05, "step": 272140 }, { "epoch": 77.24950326426341, "grad_norm": 0.00156927271746099, "learning_rate": 2.278172012489356e-05, "loss": 5.4333917796611785e-05, "step": 272150 }, { "epoch": 77.25234175418677, "grad_norm": 0.0017057302175089717, "learning_rate": 2.2778881634970196e-05, "loss": 0.00013862308114767074, "step": 272160 }, { "epoch": 77.25518024411014, "grad_norm": 0.0011152857914566994, "learning_rate": 2.2776043145046834e-05, "loss": 8.945949375629425e-05, "step": 272170 }, { "epoch": 77.2580187340335, "grad_norm": 0.010627525858581066, "learning_rate": 2.2773204655123476e-05, "loss": 8.237417787313462e-05, "step": 272180 }, { "epoch": 77.26085722395686, "grad_norm": 0.013106838800013065, "learning_rate": 2.2770366165200117e-05, "loss": 0.0002599811181426048, "step": 272190 }, { "epoch": 77.26369571388021, "grad_norm": 0.009174310602247715, "learning_rate": 2.2767527675276755e-05, "loss": 5.716215819120407e-05, "step": 272200 }, { "epoch": 77.26653420380357, "grad_norm": 0.008637665770947933, "learning_rate": 2.2764689185353393e-05, "loss": 0.0004503257572650909, "step": 272210 }, { "epoch": 77.26937269372694, "grad_norm": 0.0437418632209301, "learning_rate": 2.276185069543003e-05, "loss": 9.706281125545501e-05, "step": 272220 }, { "epoch": 77.2722111836503, "grad_norm": 0.008223151788115501, "learning_rate": 2.2759012205506672e-05, "loss": 5.383361130952835e-05, "step": 272230 }, { "epoch": 77.27504967357366, "grad_norm": 0.006804970093071461, "learning_rate": 2.275617371558331e-05, "loss": 6.394293159246444e-05, "step": 272240 }, { "epoch": 77.27788816349702, "grad_norm": 0.1680215746164322, "learning_rate": 2.275333522565995e-05, "loss": 6.52652233839035e-05, "step": 272250 }, { "epoch": 77.28072665342037, "grad_norm": 0.004202220123261213, "learning_rate": 2.275049673573659e-05, "loss": 2.529416233301163e-05, "step": 272260 }, { "epoch": 77.28356514334374, "grad_norm": 0.0019829587545245886, "learning_rate": 2.2747658245813228e-05, "loss": 1.6679428517818452e-05, "step": 272270 }, { "epoch": 77.2864036332671, "grad_norm": 0.004426958039402962, "learning_rate": 2.274481975588987e-05, "loss": 2.4299696087837218e-05, "step": 272280 }, { "epoch": 77.28924212319046, "grad_norm": 0.010676288977265358, "learning_rate": 2.2741981265966507e-05, "loss": 1.8513761460781097e-05, "step": 272290 }, { "epoch": 77.29208061311382, "grad_norm": 0.004265394061803818, "learning_rate": 2.2739142776043145e-05, "loss": 1.6965530812740325e-05, "step": 272300 }, { "epoch": 77.29491910303719, "grad_norm": 0.0009538179729133844, "learning_rate": 2.2736304286119787e-05, "loss": 2.021249383687973e-05, "step": 272310 }, { "epoch": 77.29775759296055, "grad_norm": 0.0016588722355663776, "learning_rate": 2.2733465796196425e-05, "loss": 1.1038966476917267e-05, "step": 272320 }, { "epoch": 77.3005960828839, "grad_norm": 0.051062967628240585, "learning_rate": 2.2730627306273063e-05, "loss": 2.8271786868572234e-05, "step": 272330 }, { "epoch": 77.30343457280726, "grad_norm": 0.005179556552320719, "learning_rate": 2.2727788816349704e-05, "loss": 1.0371394455432893e-05, "step": 272340 }, { "epoch": 77.30627306273063, "grad_norm": 0.00044054302270524204, "learning_rate": 2.2724950326426342e-05, "loss": 1.814626157283783e-05, "step": 272350 }, { "epoch": 77.30911155265399, "grad_norm": 0.004269956611096859, "learning_rate": 2.2722111836502983e-05, "loss": 1.9240938127040862e-05, "step": 272360 }, { "epoch": 77.31195004257735, "grad_norm": 0.005236954428255558, "learning_rate": 2.271927334657962e-05, "loss": 1.6231276094913483e-05, "step": 272370 }, { "epoch": 77.31478853250071, "grad_norm": 0.012469621375203133, "learning_rate": 2.271643485665626e-05, "loss": 3.076549619436264e-05, "step": 272380 }, { "epoch": 77.31762702242408, "grad_norm": 0.000998887699097395, "learning_rate": 2.27135963667329e-05, "loss": 3.049299120903015e-05, "step": 272390 }, { "epoch": 77.32046551234743, "grad_norm": 0.0013781265588477254, "learning_rate": 2.271075787680954e-05, "loss": 2.8129108250141145e-05, "step": 272400 }, { "epoch": 77.32330400227079, "grad_norm": 0.0018616345478221774, "learning_rate": 2.270791938688618e-05, "loss": 9.365212172269821e-05, "step": 272410 }, { "epoch": 77.32614249219415, "grad_norm": 0.014067240059375763, "learning_rate": 2.2705080896962815e-05, "loss": 3.570225089788437e-05, "step": 272420 }, { "epoch": 77.32898098211751, "grad_norm": 0.01802808977663517, "learning_rate": 2.2702242407039456e-05, "loss": 2.456456422805786e-05, "step": 272430 }, { "epoch": 77.33181947204088, "grad_norm": 0.006597515661269426, "learning_rate": 2.2699403917116097e-05, "loss": 1.4445930719375611e-05, "step": 272440 }, { "epoch": 77.33465796196424, "grad_norm": 0.0016597695648670197, "learning_rate": 2.2696565427192735e-05, "loss": 3.882516175508499e-05, "step": 272450 }, { "epoch": 77.3374964518876, "grad_norm": 0.001933731255121529, "learning_rate": 2.2693726937269373e-05, "loss": 2.780817449092865e-05, "step": 272460 }, { "epoch": 77.34033494181095, "grad_norm": 0.0015496131964027882, "learning_rate": 2.269088844734601e-05, "loss": 1.874975860118866e-05, "step": 272470 }, { "epoch": 77.34317343173431, "grad_norm": 0.0014753907453268766, "learning_rate": 2.2688049957422653e-05, "loss": 3.462694585323334e-05, "step": 272480 }, { "epoch": 77.34601192165768, "grad_norm": 0.0023780015762895346, "learning_rate": 2.2685211467499294e-05, "loss": 1.5620701014995574e-05, "step": 272490 }, { "epoch": 77.34885041158104, "grad_norm": 0.0019014774588868022, "learning_rate": 2.268237297757593e-05, "loss": 2.512820065021515e-05, "step": 272500 }, { "epoch": 77.34885041158104, "eval_accuracy": 0.986965091880206, "eval_loss": 0.05227120220661163, "eval_runtime": 36.2357, "eval_samples_per_second": 434.019, "eval_steps_per_second": 6.789, "step": 272500 }, { "epoch": 77.3516889015044, "grad_norm": 0.002360337181016803, "learning_rate": 2.267953448765257e-05, "loss": 2.9310211539268492e-05, "step": 272510 }, { "epoch": 77.35452739142777, "grad_norm": 0.0013088699197396636, "learning_rate": 2.2676695997729208e-05, "loss": 2.7681514620780944e-05, "step": 272520 }, { "epoch": 77.35736588135111, "grad_norm": 0.005150010343641043, "learning_rate": 2.267385750780585e-05, "loss": 1.6260147094726563e-05, "step": 272530 }, { "epoch": 77.36020437127448, "grad_norm": 0.006279855500906706, "learning_rate": 2.2671019017882487e-05, "loss": 7.332116365432739e-05, "step": 272540 }, { "epoch": 77.36304286119784, "grad_norm": 0.003178427228704095, "learning_rate": 2.2668180527959125e-05, "loss": 1.3522244989871978e-05, "step": 272550 }, { "epoch": 77.3658813511212, "grad_norm": 0.0015732619212940335, "learning_rate": 2.2665342038035767e-05, "loss": 5.822628736495972e-05, "step": 272560 }, { "epoch": 77.36871984104457, "grad_norm": 0.002947760047391057, "learning_rate": 2.2662503548112405e-05, "loss": 0.00023593883961439132, "step": 272570 }, { "epoch": 77.37155833096793, "grad_norm": 0.0019603590480983257, "learning_rate": 2.2659665058189046e-05, "loss": 0.005095641687512397, "step": 272580 }, { "epoch": 77.37439682089129, "grad_norm": 0.005143502727150917, "learning_rate": 2.2656826568265684e-05, "loss": 3.870148211717606e-05, "step": 272590 }, { "epoch": 77.37723531081464, "grad_norm": 9.224297523498535, "learning_rate": 2.2653988078342322e-05, "loss": 0.0018342429772019385, "step": 272600 }, { "epoch": 77.380073800738, "grad_norm": 0.38431188464164734, "learning_rate": 2.2651149588418963e-05, "loss": 0.0001091761514544487, "step": 272610 }, { "epoch": 77.38291229066137, "grad_norm": 0.011584758758544922, "learning_rate": 2.26483110984956e-05, "loss": 2.9623880982398988e-05, "step": 272620 }, { "epoch": 77.38575078058473, "grad_norm": 0.002868643496185541, "learning_rate": 2.264547260857224e-05, "loss": 1.8659234046936036e-05, "step": 272630 }, { "epoch": 77.38858927050809, "grad_norm": 0.016662951558828354, "learning_rate": 2.264263411864888e-05, "loss": 5.385465919971466e-05, "step": 272640 }, { "epoch": 77.39142776043145, "grad_norm": 0.0034726278390735388, "learning_rate": 2.263979562872552e-05, "loss": 6.24561682343483e-05, "step": 272650 }, { "epoch": 77.39426625035482, "grad_norm": 0.002662711078301072, "learning_rate": 2.263695713880216e-05, "loss": 3.599058836698532e-05, "step": 272660 }, { "epoch": 77.39710474027817, "grad_norm": 0.002606204943731427, "learning_rate": 2.2634118648878798e-05, "loss": 4.085302352905273e-05, "step": 272670 }, { "epoch": 77.39994323020153, "grad_norm": 0.0064167785458266735, "learning_rate": 2.2631280158955436e-05, "loss": 3.688596189022064e-05, "step": 272680 }, { "epoch": 77.40278172012489, "grad_norm": 0.002159043215215206, "learning_rate": 2.2628441669032077e-05, "loss": 2.5020539760589598e-05, "step": 272690 }, { "epoch": 77.40562021004826, "grad_norm": 0.0010014629224315286, "learning_rate": 2.2625603179108715e-05, "loss": 2.5946088135242464e-05, "step": 272700 }, { "epoch": 77.40845869997162, "grad_norm": 0.0035404351074248552, "learning_rate": 2.2622764689185353e-05, "loss": 1.3750605285167694e-05, "step": 272710 }, { "epoch": 77.41129718989498, "grad_norm": 0.015423335134983063, "learning_rate": 2.261992619926199e-05, "loss": 2.1232850849628447e-05, "step": 272720 }, { "epoch": 77.41413567981834, "grad_norm": 0.01176750473678112, "learning_rate": 2.2617087709338633e-05, "loss": 2.1506473422050477e-05, "step": 272730 }, { "epoch": 77.41697416974169, "grad_norm": 0.0014753697905689478, "learning_rate": 2.2614249219415274e-05, "loss": 2.0876899361610414e-05, "step": 272740 }, { "epoch": 77.41981265966506, "grad_norm": 0.003173029748722911, "learning_rate": 2.2611410729491912e-05, "loss": 1.36522576212883e-05, "step": 272750 }, { "epoch": 77.42265114958842, "grad_norm": 0.003864034079015255, "learning_rate": 2.260857223956855e-05, "loss": 2.0562298595905303e-05, "step": 272760 }, { "epoch": 77.42548963951178, "grad_norm": 0.0037391758523881435, "learning_rate": 2.2605733749645188e-05, "loss": 2.8908252716064453e-05, "step": 272770 }, { "epoch": 77.42832812943514, "grad_norm": 0.0009323362610302866, "learning_rate": 2.260289525972183e-05, "loss": 1.95113942027092e-05, "step": 272780 }, { "epoch": 77.4311666193585, "grad_norm": 0.0008527092286385596, "learning_rate": 2.260005676979847e-05, "loss": 9.519979357719421e-06, "step": 272790 }, { "epoch": 77.43400510928186, "grad_norm": 0.01569034717977047, "learning_rate": 2.2597218279875105e-05, "loss": 2.1203793585300447e-05, "step": 272800 }, { "epoch": 77.43684359920522, "grad_norm": 0.008394889533519745, "learning_rate": 2.2594379789951747e-05, "loss": 2.92263925075531e-05, "step": 272810 }, { "epoch": 77.43968208912858, "grad_norm": 0.0034811445511877537, "learning_rate": 2.2591541300028385e-05, "loss": 3.9359182119369505e-05, "step": 272820 }, { "epoch": 77.44252057905194, "grad_norm": 0.0006713751936331391, "learning_rate": 2.2588702810105026e-05, "loss": 2.6675499975681304e-05, "step": 272830 }, { "epoch": 77.4453590689753, "grad_norm": 0.029058024287223816, "learning_rate": 2.2585864320181664e-05, "loss": 6.898734718561173e-05, "step": 272840 }, { "epoch": 77.44819755889867, "grad_norm": 0.00740710599347949, "learning_rate": 2.2583025830258302e-05, "loss": 2.6524625718593596e-05, "step": 272850 }, { "epoch": 77.45103604882203, "grad_norm": 0.03378129377961159, "learning_rate": 2.2580187340334944e-05, "loss": 2.6132166385650635e-05, "step": 272860 }, { "epoch": 77.45387453874538, "grad_norm": 0.0032941740937530994, "learning_rate": 2.257734885041158e-05, "loss": 1.7558038234710693e-05, "step": 272870 }, { "epoch": 77.45671302866874, "grad_norm": 0.011508445255458355, "learning_rate": 2.2574510360488223e-05, "loss": 2.001263201236725e-05, "step": 272880 }, { "epoch": 77.45955151859211, "grad_norm": 0.0036874122451990843, "learning_rate": 2.257167187056486e-05, "loss": 2.0584836602210998e-05, "step": 272890 }, { "epoch": 77.46239000851547, "grad_norm": 0.16130070388317108, "learning_rate": 2.25688333806415e-05, "loss": 5.416981875896454e-05, "step": 272900 }, { "epoch": 77.46522849843883, "grad_norm": 0.0019491014536470175, "learning_rate": 2.256599489071814e-05, "loss": 1.2225285172462463e-05, "step": 272910 }, { "epoch": 77.4680669883622, "grad_norm": 0.011757954023778439, "learning_rate": 2.2563156400794778e-05, "loss": 2.1094270050525665e-05, "step": 272920 }, { "epoch": 77.47090547828556, "grad_norm": 0.0012718841899186373, "learning_rate": 2.2560317910871416e-05, "loss": 2.335458993911743e-05, "step": 272930 }, { "epoch": 77.47374396820891, "grad_norm": 0.000716762151569128, "learning_rate": 2.2557479420948058e-05, "loss": 1.2713298201560973e-05, "step": 272940 }, { "epoch": 77.47658245813227, "grad_norm": 0.0006563130882568657, "learning_rate": 2.2554640931024696e-05, "loss": 1.7213821411132813e-05, "step": 272950 }, { "epoch": 77.47942094805563, "grad_norm": 0.0014670531963929534, "learning_rate": 2.2551802441101337e-05, "loss": 1.4850497245788575e-05, "step": 272960 }, { "epoch": 77.482259437979, "grad_norm": 0.011147643439471722, "learning_rate": 2.254896395117797e-05, "loss": 2.374015748500824e-05, "step": 272970 }, { "epoch": 77.48509792790236, "grad_norm": 0.0014257591683417559, "learning_rate": 2.2546125461254613e-05, "loss": 8.892640471458435e-06, "step": 272980 }, { "epoch": 77.48793641782572, "grad_norm": 0.0069803278893232346, "learning_rate": 2.2543286971331254e-05, "loss": 1.585446298122406e-05, "step": 272990 }, { "epoch": 77.49077490774907, "grad_norm": 0.011315039359033108, "learning_rate": 2.2540448481407892e-05, "loss": 1.5850365161895753e-05, "step": 273000 }, { "epoch": 77.49077490774907, "eval_accuracy": 0.9871558466331786, "eval_loss": 0.05250488966703415, "eval_runtime": 35.7459, "eval_samples_per_second": 439.966, "eval_steps_per_second": 6.882, "step": 273000 }, { "epoch": 77.49361339767243, "grad_norm": 0.0023935039062052965, "learning_rate": 2.253760999148453e-05, "loss": 1.008007675409317e-05, "step": 273010 }, { "epoch": 77.4964518875958, "grad_norm": 0.002008492359891534, "learning_rate": 2.2534771501561168e-05, "loss": 1.1097826063632966e-05, "step": 273020 }, { "epoch": 77.49929037751916, "grad_norm": 0.003691214369609952, "learning_rate": 2.253193301163781e-05, "loss": 3.22556123137474e-05, "step": 273030 }, { "epoch": 77.50212886744252, "grad_norm": 0.0018880991265177727, "learning_rate": 2.252909452171445e-05, "loss": 1.7622858285903932e-05, "step": 273040 }, { "epoch": 77.50496735736589, "grad_norm": 0.0034464490599930286, "learning_rate": 2.252625603179109e-05, "loss": 6.416346877813339e-05, "step": 273050 }, { "epoch": 77.50780584728925, "grad_norm": 0.0013990751467645168, "learning_rate": 2.2523417541867727e-05, "loss": 0.0003711951896548271, "step": 273060 }, { "epoch": 77.5106443372126, "grad_norm": 0.002814744133502245, "learning_rate": 2.2520579051944365e-05, "loss": 0.0001800447702407837, "step": 273070 }, { "epoch": 77.51348282713596, "grad_norm": 0.05756504088640213, "learning_rate": 2.2517740562021006e-05, "loss": 0.0014646397903561592, "step": 273080 }, { "epoch": 77.51632131705932, "grad_norm": 0.006867547053843737, "learning_rate": 2.2514902072097648e-05, "loss": 7.966216653585435e-05, "step": 273090 }, { "epoch": 77.51915980698269, "grad_norm": 0.012981590814888477, "learning_rate": 2.2512063582174282e-05, "loss": 5.4753944277763365e-05, "step": 273100 }, { "epoch": 77.52199829690605, "grad_norm": 0.003977746237069368, "learning_rate": 2.2509225092250924e-05, "loss": 0.0006632836535573006, "step": 273110 }, { "epoch": 77.52483678682941, "grad_norm": 0.004161823075264692, "learning_rate": 2.2506386602327562e-05, "loss": 7.291920483112335e-05, "step": 273120 }, { "epoch": 77.52767527675277, "grad_norm": 0.010787720791995525, "learning_rate": 2.2503548112404203e-05, "loss": 7.96884298324585e-05, "step": 273130 }, { "epoch": 77.53051376667612, "grad_norm": 0.033935535699129105, "learning_rate": 2.250070962248084e-05, "loss": 8.99285078048706e-05, "step": 273140 }, { "epoch": 77.53335225659949, "grad_norm": 0.0034595003817230463, "learning_rate": 2.249787113255748e-05, "loss": 4.119891673326492e-05, "step": 273150 }, { "epoch": 77.53619074652285, "grad_norm": 0.0029517514631152153, "learning_rate": 2.249503264263412e-05, "loss": 2.402961254119873e-05, "step": 273160 }, { "epoch": 77.53902923644621, "grad_norm": 0.015194268897175789, "learning_rate": 2.249219415271076e-05, "loss": 6.391871720552444e-05, "step": 273170 }, { "epoch": 77.54186772636957, "grad_norm": 0.02992132492363453, "learning_rate": 2.2489355662787396e-05, "loss": 3.904830664396286e-05, "step": 273180 }, { "epoch": 77.54470621629294, "grad_norm": 0.11787249892950058, "learning_rate": 2.2486517172864038e-05, "loss": 8.221585303544999e-05, "step": 273190 }, { "epoch": 77.5475447062163, "grad_norm": 0.0011880301171913743, "learning_rate": 2.2483678682940676e-05, "loss": 1.277681440114975e-05, "step": 273200 }, { "epoch": 77.55038319613965, "grad_norm": 0.012711089104413986, "learning_rate": 2.2480840193017317e-05, "loss": 1.583639532327652e-05, "step": 273210 }, { "epoch": 77.55322168606301, "grad_norm": 0.0010770594235509634, "learning_rate": 2.2478001703093955e-05, "loss": 1.804772764444351e-05, "step": 273220 }, { "epoch": 77.55606017598637, "grad_norm": 0.00594246294349432, "learning_rate": 2.2475163213170593e-05, "loss": 1.5644356608390808e-05, "step": 273230 }, { "epoch": 77.55889866590974, "grad_norm": 0.0018588524544611573, "learning_rate": 2.2472324723247234e-05, "loss": 1.5993602573871613e-05, "step": 273240 }, { "epoch": 77.5617371558331, "grad_norm": 0.009392051957547665, "learning_rate": 2.2469486233323872e-05, "loss": 2.3670494556427002e-05, "step": 273250 }, { "epoch": 77.56457564575646, "grad_norm": 0.003893459914252162, "learning_rate": 2.2466647743400514e-05, "loss": 1.2963637709617615e-05, "step": 273260 }, { "epoch": 77.56741413567981, "grad_norm": 0.16166679561138153, "learning_rate": 2.246380925347715e-05, "loss": 6.530582904815674e-05, "step": 273270 }, { "epoch": 77.57025262560317, "grad_norm": 0.0077712018974125385, "learning_rate": 2.246097076355379e-05, "loss": 5.137939006090164e-05, "step": 273280 }, { "epoch": 77.57309111552654, "grad_norm": 0.07671002298593521, "learning_rate": 2.245813227363043e-05, "loss": 3.221072256565094e-05, "step": 273290 }, { "epoch": 77.5759296054499, "grad_norm": 0.0010997224599123001, "learning_rate": 2.245529378370707e-05, "loss": 1.2636557221412658e-05, "step": 273300 }, { "epoch": 77.57876809537326, "grad_norm": 0.0010996247874572873, "learning_rate": 2.2452455293783707e-05, "loss": 1.9683688879013062e-05, "step": 273310 }, { "epoch": 77.58160658529663, "grad_norm": 0.0009865351021289825, "learning_rate": 2.2449616803860345e-05, "loss": 1.2744031846523284e-05, "step": 273320 }, { "epoch": 77.58444507521999, "grad_norm": 0.0016621119575574994, "learning_rate": 2.2446778313936987e-05, "loss": 1.1539459228515625e-05, "step": 273330 }, { "epoch": 77.58728356514334, "grad_norm": 0.008297459222376347, "learning_rate": 2.2443939824013628e-05, "loss": 1.9309110939502716e-05, "step": 273340 }, { "epoch": 77.5901220550667, "grad_norm": 0.026010064408183098, "learning_rate": 2.2441101334090266e-05, "loss": 1.3498589396476746e-05, "step": 273350 }, { "epoch": 77.59296054499006, "grad_norm": 0.010798588395118713, "learning_rate": 2.2438262844166904e-05, "loss": 1.3613700866699219e-05, "step": 273360 }, { "epoch": 77.59579903491343, "grad_norm": 0.016846483573317528, "learning_rate": 2.2435424354243542e-05, "loss": 5.0570443272590636e-05, "step": 273370 }, { "epoch": 77.59863752483679, "grad_norm": 0.0044111451134085655, "learning_rate": 2.2432585864320183e-05, "loss": 2.6934966444969178e-05, "step": 273380 }, { "epoch": 77.60147601476015, "grad_norm": 0.003366771386936307, "learning_rate": 2.242974737439682e-05, "loss": 1.4516524970531464e-05, "step": 273390 }, { "epoch": 77.60431450468351, "grad_norm": 0.0008932722266763449, "learning_rate": 2.242690888447346e-05, "loss": 1.5456974506378173e-05, "step": 273400 }, { "epoch": 77.60715299460686, "grad_norm": 0.002020491287112236, "learning_rate": 2.24240703945501e-05, "loss": 1.7328932881355285e-05, "step": 273410 }, { "epoch": 77.60999148453023, "grad_norm": 0.0009134382125921547, "learning_rate": 2.242123190462674e-05, "loss": 1.6718544065952302e-05, "step": 273420 }, { "epoch": 77.61282997445359, "grad_norm": 0.01558243203908205, "learning_rate": 2.241839341470338e-05, "loss": 1.277681440114975e-05, "step": 273430 }, { "epoch": 77.61566846437695, "grad_norm": 0.0020923870615661144, "learning_rate": 2.2415554924780018e-05, "loss": 1.646149903535843e-05, "step": 273440 }, { "epoch": 77.61850695430032, "grad_norm": 0.007205531001091003, "learning_rate": 2.2412716434856656e-05, "loss": 2.5989487767219543e-05, "step": 273450 }, { "epoch": 77.62134544422368, "grad_norm": 0.4377900958061218, "learning_rate": 2.2409877944933297e-05, "loss": 8.056387305259705e-05, "step": 273460 }, { "epoch": 77.62418393414703, "grad_norm": 0.0005862712860107422, "learning_rate": 2.240703945500994e-05, "loss": 2.5861337780952455e-05, "step": 273470 }, { "epoch": 77.62702242407039, "grad_norm": 0.00929416436702013, "learning_rate": 2.2404200965086573e-05, "loss": 5.6173093616962434e-05, "step": 273480 }, { "epoch": 77.62986091399375, "grad_norm": 0.003988861571997404, "learning_rate": 2.2401362475163215e-05, "loss": 2.8624385595321656e-05, "step": 273490 }, { "epoch": 77.63269940391712, "grad_norm": 0.0048758648335933685, "learning_rate": 2.2398523985239853e-05, "loss": 2.701207995414734e-05, "step": 273500 }, { "epoch": 77.63269940391712, "eval_accuracy": 0.9867743371272334, "eval_loss": 0.05512353405356407, "eval_runtime": 35.3297, "eval_samples_per_second": 445.149, "eval_steps_per_second": 6.963, "step": 273500 }, { "epoch": 77.63553789384048, "grad_norm": 0.8350512385368347, "learning_rate": 2.2395685495316494e-05, "loss": 0.00039597079157829287, "step": 273510 }, { "epoch": 77.63837638376384, "grad_norm": 0.005180487409234047, "learning_rate": 2.2392847005393132e-05, "loss": 0.0012008899822831154, "step": 273520 }, { "epoch": 77.6412148736872, "grad_norm": 0.004106196109205484, "learning_rate": 2.239000851546977e-05, "loss": 0.0001208597794175148, "step": 273530 }, { "epoch": 77.64405336361055, "grad_norm": 0.0970403254032135, "learning_rate": 2.238717002554641e-05, "loss": 0.00048079900443553925, "step": 273540 }, { "epoch": 77.64689185353392, "grad_norm": 5.5302886962890625, "learning_rate": 2.238433153562305e-05, "loss": 0.008237004280090332, "step": 273550 }, { "epoch": 77.64973034345728, "grad_norm": 0.02525976672768593, "learning_rate": 2.238149304569969e-05, "loss": 0.0007334830239415169, "step": 273560 }, { "epoch": 77.65256883338064, "grad_norm": 11.335063934326172, "learning_rate": 2.237865455577633e-05, "loss": 0.0029235797002911566, "step": 273570 }, { "epoch": 77.655407323304, "grad_norm": 0.0244901180267334, "learning_rate": 2.2375816065852967e-05, "loss": 0.0032756298780441282, "step": 273580 }, { "epoch": 77.65824581322737, "grad_norm": 0.052874986082315445, "learning_rate": 2.2372977575929608e-05, "loss": 0.0034227591007947923, "step": 273590 }, { "epoch": 77.66108430315073, "grad_norm": 0.0015272137243300676, "learning_rate": 2.2370139086006246e-05, "loss": 0.001527145691215992, "step": 273600 }, { "epoch": 77.66392279307408, "grad_norm": 10.113699913024902, "learning_rate": 2.2367300596082884e-05, "loss": 0.002784351445734501, "step": 273610 }, { "epoch": 77.66676128299744, "grad_norm": 0.07857436686754227, "learning_rate": 2.2364462106159525e-05, "loss": 0.0006528588011860848, "step": 273620 }, { "epoch": 77.6695997729208, "grad_norm": 0.0023574845399707556, "learning_rate": 2.2361623616236163e-05, "loss": 0.0017976531758904456, "step": 273630 }, { "epoch": 77.67243826284417, "grad_norm": 2.121873617172241, "learning_rate": 2.2358785126312805e-05, "loss": 0.002815025672316551, "step": 273640 }, { "epoch": 77.67527675276753, "grad_norm": 0.03574462980031967, "learning_rate": 2.235594663638944e-05, "loss": 0.0038645148277282717, "step": 273650 }, { "epoch": 77.6781152426909, "grad_norm": 0.046489156782627106, "learning_rate": 2.235310814646608e-05, "loss": 0.001892034523189068, "step": 273660 }, { "epoch": 77.68095373261426, "grad_norm": 0.005411892663687468, "learning_rate": 2.2350269656542722e-05, "loss": 0.007391972094774246, "step": 273670 }, { "epoch": 77.6837922225376, "grad_norm": 0.02115769125521183, "learning_rate": 2.234743116661936e-05, "loss": 0.0010009920224547386, "step": 273680 }, { "epoch": 77.68663071246097, "grad_norm": 0.012486145831644535, "learning_rate": 2.2344592676695998e-05, "loss": 0.000502157025039196, "step": 273690 }, { "epoch": 77.68946920238433, "grad_norm": 0.24282518029212952, "learning_rate": 2.2341754186772636e-05, "loss": 7.756166160106659e-05, "step": 273700 }, { "epoch": 77.6923076923077, "grad_norm": 0.04969274252653122, "learning_rate": 2.2338915696849277e-05, "loss": 7.753372192382812e-05, "step": 273710 }, { "epoch": 77.69514618223106, "grad_norm": 0.0022013115230947733, "learning_rate": 2.233607720692592e-05, "loss": 0.0008016908541321754, "step": 273720 }, { "epoch": 77.69798467215442, "grad_norm": 0.205114483833313, "learning_rate": 2.2333238717002557e-05, "loss": 0.0007122978568077087, "step": 273730 }, { "epoch": 77.70082316207777, "grad_norm": 0.002728167222812772, "learning_rate": 2.2330400227079195e-05, "loss": 0.0001878904178738594, "step": 273740 }, { "epoch": 77.70366165200113, "grad_norm": 1.5136985778808594, "learning_rate": 2.2327561737155833e-05, "loss": 0.00023533590137958527, "step": 273750 }, { "epoch": 77.7065001419245, "grad_norm": 0.20484937727451324, "learning_rate": 2.2324723247232474e-05, "loss": 9.203627705574035e-05, "step": 273760 }, { "epoch": 77.70933863184786, "grad_norm": 0.11897235363721848, "learning_rate": 2.2321884757309116e-05, "loss": 0.001892387308180332, "step": 273770 }, { "epoch": 77.71217712177122, "grad_norm": 0.04815559834241867, "learning_rate": 2.231904626738575e-05, "loss": 2.9142387211322786e-05, "step": 273780 }, { "epoch": 77.71501561169458, "grad_norm": 0.0015971018001437187, "learning_rate": 2.231620777746239e-05, "loss": 0.000959588959813118, "step": 273790 }, { "epoch": 77.71785410161795, "grad_norm": 0.02316119894385338, "learning_rate": 2.231336928753903e-05, "loss": 0.0012715060263872146, "step": 273800 }, { "epoch": 77.7206925915413, "grad_norm": 0.04595073312520981, "learning_rate": 2.231053079761567e-05, "loss": 6.054677069187164e-05, "step": 273810 }, { "epoch": 77.72353108146466, "grad_norm": 0.02265152707695961, "learning_rate": 2.230769230769231e-05, "loss": 9.719915688037873e-05, "step": 273820 }, { "epoch": 77.72636957138802, "grad_norm": 0.006466009188443422, "learning_rate": 2.2304853817768947e-05, "loss": 8.994005620479583e-05, "step": 273830 }, { "epoch": 77.72920806131138, "grad_norm": 0.07150452584028244, "learning_rate": 2.2302015327845588e-05, "loss": 5.231127142906189e-05, "step": 273840 }, { "epoch": 77.73204655123475, "grad_norm": 0.0018912034574896097, "learning_rate": 2.2299176837922226e-05, "loss": 0.0030454883351922037, "step": 273850 }, { "epoch": 77.73488504115811, "grad_norm": 0.037293691188097, "learning_rate": 2.2296338347998864e-05, "loss": 0.001933116465806961, "step": 273860 }, { "epoch": 77.73772353108147, "grad_norm": 0.46897387504577637, "learning_rate": 2.2293499858075506e-05, "loss": 0.0004956740885972977, "step": 273870 }, { "epoch": 77.74056202100482, "grad_norm": 0.0440821647644043, "learning_rate": 2.2290661368152144e-05, "loss": 4.962347447872162e-05, "step": 273880 }, { "epoch": 77.74340051092818, "grad_norm": 0.010956991463899612, "learning_rate": 2.2287822878228785e-05, "loss": 0.00013673510402441026, "step": 273890 }, { "epoch": 77.74623900085155, "grad_norm": 0.004168002866208553, "learning_rate": 2.2284984388305423e-05, "loss": 7.136929780244827e-05, "step": 273900 }, { "epoch": 77.74907749077491, "grad_norm": 0.004728814586997032, "learning_rate": 2.228214589838206e-05, "loss": 0.0006155187264084816, "step": 273910 }, { "epoch": 77.75191598069827, "grad_norm": 0.005260768346488476, "learning_rate": 2.2279307408458702e-05, "loss": 5.842093378305435e-05, "step": 273920 }, { "epoch": 77.75475447062163, "grad_norm": 0.003372836858034134, "learning_rate": 2.227646891853534e-05, "loss": 0.0005366247147321701, "step": 273930 }, { "epoch": 77.757592960545, "grad_norm": 0.006014436483383179, "learning_rate": 2.227363042861198e-05, "loss": 6.063152104616165e-05, "step": 273940 }, { "epoch": 77.76043145046835, "grad_norm": 0.021416248753666878, "learning_rate": 2.2270791938688616e-05, "loss": 6.580613553524018e-05, "step": 273950 }, { "epoch": 77.76326994039171, "grad_norm": 0.39950400590896606, "learning_rate": 2.2267953448765258e-05, "loss": 0.0001363394781947136, "step": 273960 }, { "epoch": 77.76610843031507, "grad_norm": 0.002270472701638937, "learning_rate": 2.22651149588419e-05, "loss": 0.00010429993271827697, "step": 273970 }, { "epoch": 77.76894692023843, "grad_norm": 0.02094152197241783, "learning_rate": 2.2262276468918537e-05, "loss": 5.536116659641266e-05, "step": 273980 }, { "epoch": 77.7717854101618, "grad_norm": 0.00841517373919487, "learning_rate": 2.2259437978995175e-05, "loss": 3.835912793874741e-05, "step": 273990 }, { "epoch": 77.77462390008516, "grad_norm": 0.0021769937593489885, "learning_rate": 2.2256599489071813e-05, "loss": 2.823732793331146e-05, "step": 274000 }, { "epoch": 77.77462390008516, "eval_accuracy": 0.986011318115343, "eval_loss": 0.05798258259892464, "eval_runtime": 35.8902, "eval_samples_per_second": 438.197, "eval_steps_per_second": 6.854, "step": 274000 }, { "epoch": 77.77746239000851, "grad_norm": 0.03078044205904007, "learning_rate": 2.2253760999148454e-05, "loss": 5.100555717945099e-05, "step": 274010 }, { "epoch": 77.78030087993187, "grad_norm": 0.004174785688519478, "learning_rate": 2.2250922509225096e-05, "loss": 0.00010249167680740357, "step": 274020 }, { "epoch": 77.78313936985523, "grad_norm": 0.19152987003326416, "learning_rate": 2.2248084019301734e-05, "loss": 6.95783644914627e-05, "step": 274030 }, { "epoch": 77.7859778597786, "grad_norm": 0.004855554550886154, "learning_rate": 2.224524552937837e-05, "loss": 7.469542324543e-05, "step": 274040 }, { "epoch": 77.78881634970196, "grad_norm": 0.008547944016754627, "learning_rate": 2.224240703945501e-05, "loss": 4.4383108615875244e-05, "step": 274050 }, { "epoch": 77.79165483962532, "grad_norm": 0.015859736129641533, "learning_rate": 2.223956854953165e-05, "loss": 0.00010587982833385468, "step": 274060 }, { "epoch": 77.79449332954869, "grad_norm": 0.003983561880886555, "learning_rate": 2.223673005960829e-05, "loss": 0.0010455021634697913, "step": 274070 }, { "epoch": 77.79733181947203, "grad_norm": 0.006059488747268915, "learning_rate": 2.2233891569684927e-05, "loss": 3.195535391569138e-05, "step": 274080 }, { "epoch": 77.8001703093954, "grad_norm": 0.015251253731548786, "learning_rate": 2.223105307976157e-05, "loss": 8.142311125993728e-05, "step": 274090 }, { "epoch": 77.80300879931876, "grad_norm": 0.029775578528642654, "learning_rate": 2.2228214589838206e-05, "loss": 5.61075285077095e-05, "step": 274100 }, { "epoch": 77.80584728924212, "grad_norm": 0.007975282147526741, "learning_rate": 2.2225376099914848e-05, "loss": 0.00011945851147174836, "step": 274110 }, { "epoch": 77.80868577916549, "grad_norm": 0.01052657701075077, "learning_rate": 2.2222537609991486e-05, "loss": 3.518052399158478e-05, "step": 274120 }, { "epoch": 77.81152426908885, "grad_norm": 0.009130487218499184, "learning_rate": 2.2219699120068124e-05, "loss": 0.00016078576445579528, "step": 274130 }, { "epoch": 77.81436275901221, "grad_norm": 0.024380750954151154, "learning_rate": 2.2216860630144765e-05, "loss": 0.0008545020595192909, "step": 274140 }, { "epoch": 77.81720124893556, "grad_norm": 0.008665863424539566, "learning_rate": 2.2214022140221403e-05, "loss": 0.0005217621102929116, "step": 274150 }, { "epoch": 77.82003973885892, "grad_norm": 0.006773811765015125, "learning_rate": 2.221118365029804e-05, "loss": 3.3101439476013185e-05, "step": 274160 }, { "epoch": 77.82287822878229, "grad_norm": 0.011111459694802761, "learning_rate": 2.2208345160374682e-05, "loss": 2.21986323595047e-05, "step": 274170 }, { "epoch": 77.82571671870565, "grad_norm": 0.05113824084401131, "learning_rate": 2.220550667045132e-05, "loss": 4.201158881187439e-05, "step": 274180 }, { "epoch": 77.82855520862901, "grad_norm": 0.0037117544561624527, "learning_rate": 2.2202668180527962e-05, "loss": 4.656817764043808e-05, "step": 274190 }, { "epoch": 77.83139369855238, "grad_norm": 0.005610077176243067, "learning_rate": 2.21998296906046e-05, "loss": 0.00021477770060300826, "step": 274200 }, { "epoch": 77.83423218847572, "grad_norm": 0.11246929317712784, "learning_rate": 2.2196991200681238e-05, "loss": 8.091367781162261e-05, "step": 274210 }, { "epoch": 77.83707067839909, "grad_norm": 0.00844427291303873, "learning_rate": 2.219415271075788e-05, "loss": 9.046513587236404e-05, "step": 274220 }, { "epoch": 77.83990916832245, "grad_norm": 0.5001529455184937, "learning_rate": 2.2191314220834517e-05, "loss": 0.00010978076606988907, "step": 274230 }, { "epoch": 77.84274765824581, "grad_norm": 0.0013038284378126264, "learning_rate": 2.218847573091116e-05, "loss": 5.8037228882312775e-05, "step": 274240 }, { "epoch": 77.84558614816918, "grad_norm": 0.20076844096183777, "learning_rate": 2.2185637240987793e-05, "loss": 5.4113753139972684e-05, "step": 274250 }, { "epoch": 77.84842463809254, "grad_norm": 0.04019399359822273, "learning_rate": 2.2182798751064434e-05, "loss": 5.635488778352737e-05, "step": 274260 }, { "epoch": 77.8512631280159, "grad_norm": 0.01695646531879902, "learning_rate": 2.2179960261141076e-05, "loss": 4.7302059829235074e-05, "step": 274270 }, { "epoch": 77.85410161793925, "grad_norm": 0.0013930590357631445, "learning_rate": 2.2177121771217714e-05, "loss": 2.4440884590148926e-05, "step": 274280 }, { "epoch": 77.85694010786261, "grad_norm": 0.02591363899409771, "learning_rate": 2.2174283281294352e-05, "loss": 3.2141990959644315e-05, "step": 274290 }, { "epoch": 77.85977859778598, "grad_norm": 0.1040387898683548, "learning_rate": 2.217144479137099e-05, "loss": 4.234109073877335e-05, "step": 274300 }, { "epoch": 77.86261708770934, "grad_norm": 0.0068994020111858845, "learning_rate": 2.216860630144763e-05, "loss": 3.8228556513786315e-05, "step": 274310 }, { "epoch": 77.8654555776327, "grad_norm": 0.007199369370937347, "learning_rate": 2.2165767811524273e-05, "loss": 4.0201470255851746e-05, "step": 274320 }, { "epoch": 77.86829406755606, "grad_norm": 0.007718768436461687, "learning_rate": 2.2162929321600907e-05, "loss": 4.032794386148453e-05, "step": 274330 }, { "epoch": 77.87113255747943, "grad_norm": 0.00512794079259038, "learning_rate": 2.216009083167755e-05, "loss": 2.5491230189800262e-05, "step": 274340 }, { "epoch": 77.87397104740278, "grad_norm": 0.0071336692199110985, "learning_rate": 2.2157252341754187e-05, "loss": 1.559443771839142e-05, "step": 274350 }, { "epoch": 77.87680953732614, "grad_norm": 0.002818835200741887, "learning_rate": 2.2154413851830828e-05, "loss": 1.9828230142593385e-05, "step": 274360 }, { "epoch": 77.8796480272495, "grad_norm": 0.003007416380569339, "learning_rate": 2.2151575361907466e-05, "loss": 2.7476251125335695e-05, "step": 274370 }, { "epoch": 77.88248651717286, "grad_norm": 0.005121611058712006, "learning_rate": 2.2148736871984104e-05, "loss": 1.6489438712596895e-05, "step": 274380 }, { "epoch": 77.88532500709623, "grad_norm": 0.012535826303064823, "learning_rate": 2.2145898382060745e-05, "loss": 1.7751194536685944e-05, "step": 274390 }, { "epoch": 77.88816349701959, "grad_norm": 0.5035597681999207, "learning_rate": 2.2143059892137383e-05, "loss": 0.0006620319560170174, "step": 274400 }, { "epoch": 77.89100198694295, "grad_norm": 0.012940874323248863, "learning_rate": 2.2140221402214025e-05, "loss": 0.0005410285666584969, "step": 274410 }, { "epoch": 77.8938404768663, "grad_norm": 0.029179932549595833, "learning_rate": 2.2137382912290663e-05, "loss": 0.00634535476565361, "step": 274420 }, { "epoch": 77.89667896678966, "grad_norm": 0.0018780773971229792, "learning_rate": 2.21345444223673e-05, "loss": 4.0479563176631926e-05, "step": 274430 }, { "epoch": 77.89951745671303, "grad_norm": 0.001622101990506053, "learning_rate": 2.2131705932443942e-05, "loss": 0.00010919608175754547, "step": 274440 }, { "epoch": 77.90235594663639, "grad_norm": 0.008544531650841236, "learning_rate": 2.212886744252058e-05, "loss": 2.8705038130283355e-05, "step": 274450 }, { "epoch": 77.90519443655975, "grad_norm": 0.0011692732805386186, "learning_rate": 2.2126028952597218e-05, "loss": 2.788826823234558e-05, "step": 274460 }, { "epoch": 77.90803292648312, "grad_norm": 0.0006529862876050174, "learning_rate": 2.212319046267386e-05, "loss": 1.6387924551963808e-05, "step": 274470 }, { "epoch": 77.91087141640647, "grad_norm": 0.12447113543748856, "learning_rate": 2.2120351972750497e-05, "loss": 4.7450140118598935e-05, "step": 274480 }, { "epoch": 77.91370990632983, "grad_norm": 0.006329019088298082, "learning_rate": 2.211751348282714e-05, "loss": 2.51229852437973e-05, "step": 274490 }, { "epoch": 77.91654839625319, "grad_norm": 0.0038001625798642635, "learning_rate": 2.2114674992903777e-05, "loss": 0.00023461077362298965, "step": 274500 }, { "epoch": 77.91654839625319, "eval_accuracy": 0.9867743371272334, "eval_loss": 0.053684160113334656, "eval_runtime": 35.967, "eval_samples_per_second": 437.262, "eval_steps_per_second": 6.84, "step": 274500 }, { "epoch": 77.91938688617655, "grad_norm": 0.0021206021774560213, "learning_rate": 2.2111836502980415e-05, "loss": 3.740452229976654e-05, "step": 274510 }, { "epoch": 77.92222537609992, "grad_norm": 0.027004819363355637, "learning_rate": 2.2108998013057056e-05, "loss": 6.017852574586868e-05, "step": 274520 }, { "epoch": 77.92506386602328, "grad_norm": 0.005567003972828388, "learning_rate": 2.2106159523133694e-05, "loss": 0.00010601356625556946, "step": 274530 }, { "epoch": 77.92790235594664, "grad_norm": 0.03283842280507088, "learning_rate": 2.2103321033210332e-05, "loss": 2.449899911880493e-05, "step": 274540 }, { "epoch": 77.93074084586999, "grad_norm": 0.0038126714061945677, "learning_rate": 2.210048254328697e-05, "loss": 3.0519999563694e-05, "step": 274550 }, { "epoch": 77.93357933579335, "grad_norm": 0.024507727473974228, "learning_rate": 2.209764405336361e-05, "loss": 2.4258345365524293e-05, "step": 274560 }, { "epoch": 77.93641782571672, "grad_norm": 0.005614748224616051, "learning_rate": 2.2094805563440253e-05, "loss": 3.337841480970383e-05, "step": 274570 }, { "epoch": 77.93925631564008, "grad_norm": 0.0022167451679706573, "learning_rate": 2.209196707351689e-05, "loss": 0.00017591454088687896, "step": 274580 }, { "epoch": 77.94209480556344, "grad_norm": 0.001976271392777562, "learning_rate": 2.208912858359353e-05, "loss": 2.3688003420829772e-05, "step": 274590 }, { "epoch": 77.9449332954868, "grad_norm": 0.004611494485288858, "learning_rate": 2.2086290093670167e-05, "loss": 0.0001432415097951889, "step": 274600 }, { "epoch": 77.94777178541017, "grad_norm": 0.0017445511184632778, "learning_rate": 2.2083451603746808e-05, "loss": 0.000737956166267395, "step": 274610 }, { "epoch": 77.95061027533352, "grad_norm": 0.00472560990601778, "learning_rate": 2.208061311382345e-05, "loss": 4.8551894724369046e-05, "step": 274620 }, { "epoch": 77.95344876525688, "grad_norm": 0.0011255211429670453, "learning_rate": 2.2077774623900084e-05, "loss": 6.880685687065125e-05, "step": 274630 }, { "epoch": 77.95628725518024, "grad_norm": 0.021424459293484688, "learning_rate": 2.2074936133976725e-05, "loss": 1.6941875219345093e-05, "step": 274640 }, { "epoch": 77.9591257451036, "grad_norm": 0.004077176563441753, "learning_rate": 2.2072097644053363e-05, "loss": 8.635241538286209e-05, "step": 274650 }, { "epoch": 77.96196423502697, "grad_norm": 0.061681102961301804, "learning_rate": 2.2069259154130005e-05, "loss": 5.749911069869995e-05, "step": 274660 }, { "epoch": 77.96480272495033, "grad_norm": 0.004529668018221855, "learning_rate": 2.2066420664206643e-05, "loss": 3.3311359584331515e-05, "step": 274670 }, { "epoch": 77.96764121487368, "grad_norm": 0.06121145933866501, "learning_rate": 2.206358217428328e-05, "loss": 8.350834250450134e-05, "step": 274680 }, { "epoch": 77.97047970479704, "grad_norm": 0.005408271215856075, "learning_rate": 2.2060743684359922e-05, "loss": 2.622511237859726e-05, "step": 274690 }, { "epoch": 77.9733181947204, "grad_norm": 0.0042176032438874245, "learning_rate": 2.205790519443656e-05, "loss": 2.3706443607807158e-05, "step": 274700 }, { "epoch": 77.97615668464377, "grad_norm": 0.001129354815930128, "learning_rate": 2.2055066704513198e-05, "loss": 2.143271267414093e-05, "step": 274710 }, { "epoch": 77.97899517456713, "grad_norm": 0.0009350103791803122, "learning_rate": 2.205222821458984e-05, "loss": 3.622937947511673e-05, "step": 274720 }, { "epoch": 77.9818336644905, "grad_norm": 0.001573448651470244, "learning_rate": 2.2049389724666477e-05, "loss": 1.1311471462249756e-05, "step": 274730 }, { "epoch": 77.98467215441386, "grad_norm": 0.0007594219641759992, "learning_rate": 2.204655123474312e-05, "loss": 3.250986337661743e-05, "step": 274740 }, { "epoch": 77.9875106443372, "grad_norm": 0.014441720210015774, "learning_rate": 2.2043712744819757e-05, "loss": 2.9772520065307617e-05, "step": 274750 }, { "epoch": 77.99034913426057, "grad_norm": 0.028887085616588593, "learning_rate": 2.2040874254896395e-05, "loss": 2.4263747036457062e-05, "step": 274760 }, { "epoch": 77.99318762418393, "grad_norm": 0.004560191184282303, "learning_rate": 2.2038035764973036e-05, "loss": 2.2027641534805298e-05, "step": 274770 }, { "epoch": 77.9960261141073, "grad_norm": 0.0006500357994809747, "learning_rate": 2.2035197275049674e-05, "loss": 2.2172555327415465e-05, "step": 274780 }, { "epoch": 77.99886460403066, "grad_norm": 0.008483157493174076, "learning_rate": 2.2032358785126316e-05, "loss": 3.074668347835541e-05, "step": 274790 }, { "epoch": 78.00170309395402, "grad_norm": 0.0007837286684662104, "learning_rate": 2.2029520295202954e-05, "loss": 2.0454704645089806e-05, "step": 274800 }, { "epoch": 78.00454158387738, "grad_norm": 0.0033600428141653538, "learning_rate": 2.202668180527959e-05, "loss": 1.4066696166992188e-05, "step": 274810 }, { "epoch": 78.00738007380073, "grad_norm": 0.0012405599700286984, "learning_rate": 2.2023843315356233e-05, "loss": 4.0502287447452545e-05, "step": 274820 }, { "epoch": 78.0102185637241, "grad_norm": 0.0037004181649535894, "learning_rate": 2.202100482543287e-05, "loss": 2.317260950803757e-05, "step": 274830 }, { "epoch": 78.01305705364746, "grad_norm": 0.06006808206439018, "learning_rate": 2.201816633550951e-05, "loss": 3.670528531074524e-05, "step": 274840 }, { "epoch": 78.01589554357082, "grad_norm": 0.0062566595152020454, "learning_rate": 2.201532784558615e-05, "loss": 3.118235617876053e-05, "step": 274850 }, { "epoch": 78.01873403349418, "grad_norm": 0.004055518191307783, "learning_rate": 2.2012489355662788e-05, "loss": 2.829674631357193e-05, "step": 274860 }, { "epoch": 78.02157252341755, "grad_norm": 0.02173498459160328, "learning_rate": 2.200965086573943e-05, "loss": 1.331120729446411e-05, "step": 274870 }, { "epoch": 78.02441101334091, "grad_norm": 0.0014706533402204514, "learning_rate": 2.2006812375816068e-05, "loss": 1.5827082097530366e-05, "step": 274880 }, { "epoch": 78.02724950326426, "grad_norm": 0.008191538974642754, "learning_rate": 2.2003973885892706e-05, "loss": 1.7451681196689604e-05, "step": 274890 }, { "epoch": 78.03008799318762, "grad_norm": 0.0058960202150046825, "learning_rate": 2.2001135395969347e-05, "loss": 1.443568617105484e-05, "step": 274900 }, { "epoch": 78.03292648311098, "grad_norm": 0.003297819523140788, "learning_rate": 2.1998296906045985e-05, "loss": 4.878714680671692e-05, "step": 274910 }, { "epoch": 78.03576497303435, "grad_norm": 0.010890877805650234, "learning_rate": 2.1995458416122623e-05, "loss": 1.1808425188064576e-05, "step": 274920 }, { "epoch": 78.03860346295771, "grad_norm": 0.0025526913814246655, "learning_rate": 2.199261992619926e-05, "loss": 1.4373846352100372e-05, "step": 274930 }, { "epoch": 78.04144195288107, "grad_norm": 0.005421640817075968, "learning_rate": 2.1989781436275902e-05, "loss": 2.3770332336425782e-05, "step": 274940 }, { "epoch": 78.04428044280442, "grad_norm": 0.07094369828701019, "learning_rate": 2.1986942946352544e-05, "loss": 2.8298236429691315e-05, "step": 274950 }, { "epoch": 78.04711893272778, "grad_norm": 0.020080694928765297, "learning_rate": 2.198410445642918e-05, "loss": 2.5220587849617006e-05, "step": 274960 }, { "epoch": 78.04995742265115, "grad_norm": 0.00303045567125082, "learning_rate": 2.198126596650582e-05, "loss": 1.6133300960063934e-05, "step": 274970 }, { "epoch": 78.05279591257451, "grad_norm": 0.004169152583926916, "learning_rate": 2.1978427476582458e-05, "loss": 1.982543617486954e-05, "step": 274980 }, { "epoch": 78.05563440249787, "grad_norm": 0.0036371275782585144, "learning_rate": 2.19755889866591e-05, "loss": 4.208460450172424e-05, "step": 274990 }, { "epoch": 78.05847289242124, "grad_norm": 0.0031760241836309433, "learning_rate": 2.197275049673574e-05, "loss": 1.842472702264786e-05, "step": 275000 }, { "epoch": 78.05847289242124, "eval_accuracy": 0.9881732053156991, "eval_loss": 0.04936003312468529, "eval_runtime": 35.1557, "eval_samples_per_second": 447.353, "eval_steps_per_second": 6.997, "step": 275000 }, { "epoch": 78.0613113823446, "grad_norm": 0.0004920706851407886, "learning_rate": 2.1969912006812375e-05, "loss": 1.824125647544861e-05, "step": 275010 }, { "epoch": 78.06414987226795, "grad_norm": 0.003634587163105607, "learning_rate": 2.1967073516889016e-05, "loss": 1.2639351189136505e-05, "step": 275020 }, { "epoch": 78.06698836219131, "grad_norm": 0.02025315910577774, "learning_rate": 2.1964235026965654e-05, "loss": 1.806449145078659e-05, "step": 275030 }, { "epoch": 78.06982685211467, "grad_norm": 0.0003331840271130204, "learning_rate": 2.1961396537042296e-05, "loss": 1.4549866318702697e-05, "step": 275040 }, { "epoch": 78.07266534203804, "grad_norm": 0.0024085224140435457, "learning_rate": 2.1958558047118934e-05, "loss": 1.942608505487442e-05, "step": 275050 }, { "epoch": 78.0755038319614, "grad_norm": 0.0009715055348351598, "learning_rate": 2.195571955719557e-05, "loss": 1.2131594121456147e-05, "step": 275060 }, { "epoch": 78.07834232188476, "grad_norm": 0.0037367430049926043, "learning_rate": 2.1952881067272213e-05, "loss": 1.5615858137607576e-05, "step": 275070 }, { "epoch": 78.08118081180812, "grad_norm": 0.009652766399085522, "learning_rate": 2.195004257734885e-05, "loss": 1.8487125635147096e-05, "step": 275080 }, { "epoch": 78.08401930173147, "grad_norm": 0.003992829937487841, "learning_rate": 2.1947204087425492e-05, "loss": 1.3404525816440582e-05, "step": 275090 }, { "epoch": 78.08685779165484, "grad_norm": 0.006787613499909639, "learning_rate": 2.194436559750213e-05, "loss": 1.9505433738231658e-05, "step": 275100 }, { "epoch": 78.0896962815782, "grad_norm": 0.003208908485248685, "learning_rate": 2.194152710757877e-05, "loss": 2.2782385349273683e-05, "step": 275110 }, { "epoch": 78.09253477150156, "grad_norm": 0.0003129859105683863, "learning_rate": 2.193868861765541e-05, "loss": 1.6789138317108155e-05, "step": 275120 }, { "epoch": 78.09537326142492, "grad_norm": 0.0007337589049711823, "learning_rate": 2.1935850127732048e-05, "loss": 3.288239240646362e-05, "step": 275130 }, { "epoch": 78.09821175134829, "grad_norm": 0.000909984577447176, "learning_rate": 2.1933011637808686e-05, "loss": 2.3788772523403168e-05, "step": 275140 }, { "epoch": 78.10105024127165, "grad_norm": 0.002224692841991782, "learning_rate": 2.1930173147885327e-05, "loss": 1.441948115825653e-05, "step": 275150 }, { "epoch": 78.103888731195, "grad_norm": 0.0013880300102755427, "learning_rate": 2.1927334657961965e-05, "loss": 1.6370229423046113e-05, "step": 275160 }, { "epoch": 78.10672722111836, "grad_norm": 0.0005963274161331356, "learning_rate": 2.1924496168038606e-05, "loss": 2.1683983504772186e-05, "step": 275170 }, { "epoch": 78.10956571104172, "grad_norm": 0.026792198419570923, "learning_rate": 2.192165767811524e-05, "loss": 0.00017360933125019074, "step": 275180 }, { "epoch": 78.11240420096509, "grad_norm": 0.1796657145023346, "learning_rate": 2.1918819188191882e-05, "loss": 8.937213569879532e-05, "step": 275190 }, { "epoch": 78.11524269088845, "grad_norm": 0.8660899996757507, "learning_rate": 2.1915980698268524e-05, "loss": 0.0003120696172118187, "step": 275200 }, { "epoch": 78.11808118081181, "grad_norm": 0.0019540279172360897, "learning_rate": 2.1913142208345162e-05, "loss": 0.00065480787307024, "step": 275210 }, { "epoch": 78.12091967073516, "grad_norm": 0.003855746006593108, "learning_rate": 2.19103037184218e-05, "loss": 0.007668378949165344, "step": 275220 }, { "epoch": 78.12375816065853, "grad_norm": 0.020761258900165558, "learning_rate": 2.1907465228498438e-05, "loss": 0.005112326890230179, "step": 275230 }, { "epoch": 78.12659665058189, "grad_norm": 0.02436342090368271, "learning_rate": 2.190462673857508e-05, "loss": 0.0005147797986865043, "step": 275240 }, { "epoch": 78.12943514050525, "grad_norm": 0.0008312254212796688, "learning_rate": 2.190178824865172e-05, "loss": 0.002294977568089962, "step": 275250 }, { "epoch": 78.13227363042861, "grad_norm": 0.0016335488762706518, "learning_rate": 2.189894975872836e-05, "loss": 0.004531579464673996, "step": 275260 }, { "epoch": 78.13511212035198, "grad_norm": 0.0015707621350884438, "learning_rate": 2.1896111268804996e-05, "loss": 0.001253238506615162, "step": 275270 }, { "epoch": 78.13795061027534, "grad_norm": 0.012545128352940083, "learning_rate": 2.1893272778881634e-05, "loss": 0.0005130395293235779, "step": 275280 }, { "epoch": 78.14078910019869, "grad_norm": 0.01583576761186123, "learning_rate": 2.1890434288958276e-05, "loss": 0.0031846106052398683, "step": 275290 }, { "epoch": 78.14362759012205, "grad_norm": 0.0009946698555722833, "learning_rate": 2.1887595799034917e-05, "loss": 4.02987003326416e-05, "step": 275300 }, { "epoch": 78.14646608004541, "grad_norm": 0.0064287083223462105, "learning_rate": 2.1884757309111552e-05, "loss": 3.616493195295334e-05, "step": 275310 }, { "epoch": 78.14930456996878, "grad_norm": 0.06156017631292343, "learning_rate": 2.1881918819188193e-05, "loss": 0.0002973735332489014, "step": 275320 }, { "epoch": 78.15214305989214, "grad_norm": 0.011094595305621624, "learning_rate": 2.187908032926483e-05, "loss": 0.00038988757878541944, "step": 275330 }, { "epoch": 78.1549815498155, "grad_norm": 0.007233661599457264, "learning_rate": 2.1876241839341473e-05, "loss": 3.425013273954392e-05, "step": 275340 }, { "epoch": 78.15782003973887, "grad_norm": 0.0015084246406331658, "learning_rate": 2.187340334941811e-05, "loss": 1.935753971338272e-05, "step": 275350 }, { "epoch": 78.16065852966221, "grad_norm": 0.10078307241201401, "learning_rate": 2.187056485949475e-05, "loss": 4.2026303708553316e-05, "step": 275360 }, { "epoch": 78.16349701958558, "grad_norm": 0.003134796628728509, "learning_rate": 2.186772636957139e-05, "loss": 2.0113959908485413e-05, "step": 275370 }, { "epoch": 78.16633550950894, "grad_norm": 0.058716632425785065, "learning_rate": 2.1864887879648028e-05, "loss": 3.751646727323532e-05, "step": 275380 }, { "epoch": 78.1691739994323, "grad_norm": 0.00970109086483717, "learning_rate": 2.1862049389724666e-05, "loss": 2.5383196771144867e-05, "step": 275390 }, { "epoch": 78.17201248935567, "grad_norm": 0.007515472825616598, "learning_rate": 2.1859210899801307e-05, "loss": 2.856161445379257e-05, "step": 275400 }, { "epoch": 78.17485097927903, "grad_norm": 10.774479866027832, "learning_rate": 2.1856372409877945e-05, "loss": 0.012024689465761185, "step": 275410 }, { "epoch": 78.17768946920238, "grad_norm": 0.0018591207917779684, "learning_rate": 2.1853533919954587e-05, "loss": 1.5731342136859895e-05, "step": 275420 }, { "epoch": 78.18052795912574, "grad_norm": 0.04006224870681763, "learning_rate": 2.1850695430031225e-05, "loss": 4.298314452171326e-05, "step": 275430 }, { "epoch": 78.1833664490491, "grad_norm": 0.0347086563706398, "learning_rate": 2.1847856940107863e-05, "loss": 0.000486406683921814, "step": 275440 }, { "epoch": 78.18620493897247, "grad_norm": 0.002680589910596609, "learning_rate": 2.1845018450184504e-05, "loss": 1.4541484415531158e-05, "step": 275450 }, { "epoch": 78.18904342889583, "grad_norm": 0.2551484704017639, "learning_rate": 2.1842179960261142e-05, "loss": 4.89240512251854e-05, "step": 275460 }, { "epoch": 78.19188191881919, "grad_norm": 0.00029436603654175997, "learning_rate": 2.1839341470337783e-05, "loss": 0.00030326433479785917, "step": 275470 }, { "epoch": 78.19472040874255, "grad_norm": 0.020139042288064957, "learning_rate": 2.1836502980414418e-05, "loss": 6.743855774402619e-05, "step": 275480 }, { "epoch": 78.1975588986659, "grad_norm": 0.007147625088691711, "learning_rate": 2.183366449049106e-05, "loss": 7.960554212331772e-05, "step": 275490 }, { "epoch": 78.20039738858927, "grad_norm": 0.7060343027114868, "learning_rate": 2.18308260005677e-05, "loss": 0.00010668579488992691, "step": 275500 }, { "epoch": 78.20039738858927, "eval_accuracy": 0.9867743371272334, "eval_loss": 0.05616002529859543, "eval_runtime": 35.8649, "eval_samples_per_second": 438.506, "eval_steps_per_second": 6.859, "step": 275500 }, { "epoch": 78.20323587851263, "grad_norm": 0.014651339501142502, "learning_rate": 2.182798751064434e-05, "loss": 0.0005977962166070939, "step": 275510 }, { "epoch": 78.20607436843599, "grad_norm": 0.29191190004348755, "learning_rate": 2.1825149020720977e-05, "loss": 7.134787738323211e-05, "step": 275520 }, { "epoch": 78.20891285835935, "grad_norm": 0.0019820090383291245, "learning_rate": 2.1822310530797615e-05, "loss": 0.0001156805083155632, "step": 275530 }, { "epoch": 78.21175134828272, "grad_norm": 0.008134103380143642, "learning_rate": 2.1819472040874256e-05, "loss": 5.7398155331611636e-05, "step": 275540 }, { "epoch": 78.21458983820608, "grad_norm": 0.20744632184505463, "learning_rate": 2.1816633550950897e-05, "loss": 6.253812462091446e-05, "step": 275550 }, { "epoch": 78.21742832812943, "grad_norm": 0.002395356772467494, "learning_rate": 2.1813795061027535e-05, "loss": 0.0001385549083352089, "step": 275560 }, { "epoch": 78.22026681805279, "grad_norm": 0.0023386385291814804, "learning_rate": 2.1810956571104173e-05, "loss": 0.00012811776250600814, "step": 275570 }, { "epoch": 78.22310530797616, "grad_norm": 0.31762343645095825, "learning_rate": 2.180811808118081e-05, "loss": 5.424786359071731e-05, "step": 275580 }, { "epoch": 78.22594379789952, "grad_norm": 0.0035371598787605762, "learning_rate": 2.1805279591257453e-05, "loss": 0.00060440544039011, "step": 275590 }, { "epoch": 78.22878228782288, "grad_norm": 0.0029977774247527122, "learning_rate": 2.180244110133409e-05, "loss": 4.527196288108826e-05, "step": 275600 }, { "epoch": 78.23162077774624, "grad_norm": 0.004168340936303139, "learning_rate": 2.179960261141073e-05, "loss": 4.333574324846268e-05, "step": 275610 }, { "epoch": 78.2344592676696, "grad_norm": 0.0025315394159406424, "learning_rate": 2.179676412148737e-05, "loss": 0.00014133527874946595, "step": 275620 }, { "epoch": 78.23729775759296, "grad_norm": 0.001899438677355647, "learning_rate": 2.1793925631564008e-05, "loss": 4.6524964272975924e-05, "step": 275630 }, { "epoch": 78.24013624751632, "grad_norm": 0.0013734732056036592, "learning_rate": 2.179108714164065e-05, "loss": 2.6912428438663483e-05, "step": 275640 }, { "epoch": 78.24297473743968, "grad_norm": 0.0156604815274477, "learning_rate": 2.1788248651717287e-05, "loss": 0.001906396821141243, "step": 275650 }, { "epoch": 78.24581322736304, "grad_norm": 0.0032177669927477837, "learning_rate": 2.1785410161793925e-05, "loss": 8.982587605714798e-05, "step": 275660 }, { "epoch": 78.2486517172864, "grad_norm": 0.009552840143442154, "learning_rate": 2.1782571671870567e-05, "loss": 0.0001298239454627037, "step": 275670 }, { "epoch": 78.25149020720977, "grad_norm": 0.003176533617079258, "learning_rate": 2.1779733181947205e-05, "loss": 1.930445432662964e-05, "step": 275680 }, { "epoch": 78.25432869713312, "grad_norm": 0.0013591013848781586, "learning_rate": 2.1776894692023843e-05, "loss": 4.369616508483887e-05, "step": 275690 }, { "epoch": 78.25716718705648, "grad_norm": 0.005482431501150131, "learning_rate": 2.1774056202100484e-05, "loss": 1.5264563262462617e-05, "step": 275700 }, { "epoch": 78.26000567697984, "grad_norm": 0.0005226925713941455, "learning_rate": 2.1771217712177122e-05, "loss": 6.414391100406647e-05, "step": 275710 }, { "epoch": 78.26284416690321, "grad_norm": 0.0027519753202795982, "learning_rate": 2.1768379222253763e-05, "loss": 0.0002690097317099571, "step": 275720 }, { "epoch": 78.26568265682657, "grad_norm": 0.013125256635248661, "learning_rate": 2.17655407323304e-05, "loss": 2.7955323457717895e-05, "step": 275730 }, { "epoch": 78.26852114674993, "grad_norm": 1.690688967704773, "learning_rate": 2.176270224240704e-05, "loss": 0.00015252158045768737, "step": 275740 }, { "epoch": 78.2713596366733, "grad_norm": 0.0029400356579571962, "learning_rate": 2.175986375248368e-05, "loss": 6.615519523620606e-05, "step": 275750 }, { "epoch": 78.27419812659664, "grad_norm": 0.04031193628907204, "learning_rate": 2.175702526256032e-05, "loss": 8.332207798957824e-05, "step": 275760 }, { "epoch": 78.27703661652001, "grad_norm": 0.003116050735116005, "learning_rate": 2.175418677263696e-05, "loss": 0.00012832079082727432, "step": 275770 }, { "epoch": 78.27987510644337, "grad_norm": 0.0019780464936047792, "learning_rate": 2.1751348282713595e-05, "loss": 3.386437892913818e-05, "step": 275780 }, { "epoch": 78.28271359636673, "grad_norm": 0.009441366419196129, "learning_rate": 2.1748509792790236e-05, "loss": 4.1417032480239865e-05, "step": 275790 }, { "epoch": 78.2855520862901, "grad_norm": 0.055990591645240784, "learning_rate": 2.1745671302866878e-05, "loss": 6.162095814943314e-05, "step": 275800 }, { "epoch": 78.28839057621346, "grad_norm": 1.471897840499878, "learning_rate": 2.1742832812943516e-05, "loss": 0.0007402427494525909, "step": 275810 }, { "epoch": 78.29122906613682, "grad_norm": 0.004117550794035196, "learning_rate": 2.1739994323020154e-05, "loss": 7.959865033626556e-05, "step": 275820 }, { "epoch": 78.29406755606017, "grad_norm": 0.0007486271788366139, "learning_rate": 2.173715583309679e-05, "loss": 0.0005357200279831886, "step": 275830 }, { "epoch": 78.29690604598353, "grad_norm": 0.003880411386489868, "learning_rate": 2.1734317343173433e-05, "loss": 0.00017812978476285934, "step": 275840 }, { "epoch": 78.2997445359069, "grad_norm": 0.002207340905442834, "learning_rate": 2.1731478853250074e-05, "loss": 3.1162798404693606e-05, "step": 275850 }, { "epoch": 78.30258302583026, "grad_norm": 0.0015802272828295827, "learning_rate": 2.172864036332671e-05, "loss": 0.00012242440134286882, "step": 275860 }, { "epoch": 78.30542151575362, "grad_norm": 0.003210242372006178, "learning_rate": 2.172580187340335e-05, "loss": 7.0965476334095e-05, "step": 275870 }, { "epoch": 78.30826000567698, "grad_norm": 0.028756223618984222, "learning_rate": 2.1722963383479988e-05, "loss": 0.0004038482904434204, "step": 275880 }, { "epoch": 78.31109849560033, "grad_norm": 0.0012365877628326416, "learning_rate": 2.172012489355663e-05, "loss": 3.138110041618347e-05, "step": 275890 }, { "epoch": 78.3139369855237, "grad_norm": 0.0034258796367794275, "learning_rate": 2.1717286403633268e-05, "loss": 0.00025699697434902193, "step": 275900 }, { "epoch": 78.31677547544706, "grad_norm": 0.12965743243694305, "learning_rate": 2.1714447913709906e-05, "loss": 0.00016184225678443908, "step": 275910 }, { "epoch": 78.31961396537042, "grad_norm": 0.002090183785185218, "learning_rate": 2.1711609423786547e-05, "loss": 0.0005344750359654426, "step": 275920 }, { "epoch": 78.32245245529379, "grad_norm": 0.06253083795309067, "learning_rate": 2.1708770933863185e-05, "loss": 0.0001877404749393463, "step": 275930 }, { "epoch": 78.32529094521715, "grad_norm": 0.008878180757164955, "learning_rate": 2.1705932443939826e-05, "loss": 0.00013905148953199386, "step": 275940 }, { "epoch": 78.32812943514051, "grad_norm": 0.0020975121296942234, "learning_rate": 2.1703093954016464e-05, "loss": 7.719434797763825e-05, "step": 275950 }, { "epoch": 78.33096792506386, "grad_norm": 0.0016894378932192922, "learning_rate": 2.1700255464093102e-05, "loss": 8.598826825618744e-05, "step": 275960 }, { "epoch": 78.33380641498722, "grad_norm": 0.001502709579654038, "learning_rate": 2.1697416974169744e-05, "loss": 0.00040031522512435914, "step": 275970 }, { "epoch": 78.33664490491059, "grad_norm": 0.09232940524816513, "learning_rate": 2.169457848424638e-05, "loss": 0.00023426581174135208, "step": 275980 }, { "epoch": 78.33948339483395, "grad_norm": 0.02738749235868454, "learning_rate": 2.169173999432302e-05, "loss": 3.4351646900177005e-05, "step": 275990 }, { "epoch": 78.34232188475731, "grad_norm": 0.006452503614127636, "learning_rate": 2.168890150439966e-05, "loss": 0.0032000113278627396, "step": 276000 }, { "epoch": 78.34232188475731, "eval_accuracy": 0.9857569784447129, "eval_loss": 0.059838347136974335, "eval_runtime": 35.4059, "eval_samples_per_second": 444.192, "eval_steps_per_second": 6.948, "step": 276000 }, { "epoch": 78.34516037468067, "grad_norm": 0.027575038373470306, "learning_rate": 2.16860630144763e-05, "loss": 5.8380886912345886e-05, "step": 276010 }, { "epoch": 78.34799886460404, "grad_norm": 0.008039894513785839, "learning_rate": 2.168322452455294e-05, "loss": 2.766065299510956e-05, "step": 276020 }, { "epoch": 78.35083735452739, "grad_norm": 0.010400095023214817, "learning_rate": 2.168038603462958e-05, "loss": 4.1309930384159085e-05, "step": 276030 }, { "epoch": 78.35367584445075, "grad_norm": 0.002346796216443181, "learning_rate": 2.1677547544706216e-05, "loss": 7.654670625925064e-05, "step": 276040 }, { "epoch": 78.35651433437411, "grad_norm": 0.01907450333237648, "learning_rate": 2.1674709054782858e-05, "loss": 3.351029008626938e-05, "step": 276050 }, { "epoch": 78.35935282429747, "grad_norm": 0.005797024816274643, "learning_rate": 2.1671870564859496e-05, "loss": 2.658627927303314e-05, "step": 276060 }, { "epoch": 78.36219131422084, "grad_norm": 0.0015110826352611184, "learning_rate": 2.1669032074936134e-05, "loss": 5.459357053041458e-05, "step": 276070 }, { "epoch": 78.3650298041442, "grad_norm": 0.0037870227824896574, "learning_rate": 2.1666193585012775e-05, "loss": 3.193393349647522e-05, "step": 276080 }, { "epoch": 78.36786829406756, "grad_norm": 0.007657102774828672, "learning_rate": 2.1663355095089413e-05, "loss": 1.6162917017936706e-05, "step": 276090 }, { "epoch": 78.37070678399091, "grad_norm": 0.0014054074417799711, "learning_rate": 2.1660516605166054e-05, "loss": 2.7380138635635377e-05, "step": 276100 }, { "epoch": 78.37354527391427, "grad_norm": 0.05396163836121559, "learning_rate": 2.1657678115242692e-05, "loss": 3.622528165578842e-05, "step": 276110 }, { "epoch": 78.37638376383764, "grad_norm": 0.01564119942486286, "learning_rate": 2.165483962531933e-05, "loss": 2.3861043155193328e-05, "step": 276120 }, { "epoch": 78.379222253761, "grad_norm": 0.031052205711603165, "learning_rate": 2.1652001135395972e-05, "loss": 1.9061937928199767e-05, "step": 276130 }, { "epoch": 78.38206074368436, "grad_norm": 0.0026911108288913965, "learning_rate": 2.164916264547261e-05, "loss": 4.064813256263733e-05, "step": 276140 }, { "epoch": 78.38489923360773, "grad_norm": 0.006329639349132776, "learning_rate": 2.164632415554925e-05, "loss": 1.406744122505188e-05, "step": 276150 }, { "epoch": 78.38773772353107, "grad_norm": 0.008753979578614235, "learning_rate": 2.1643485665625886e-05, "loss": 8.151847869157791e-05, "step": 276160 }, { "epoch": 78.39057621345444, "grad_norm": 0.004839002620428801, "learning_rate": 2.1640647175702527e-05, "loss": 1.8562935292720794e-05, "step": 276170 }, { "epoch": 78.3934147033778, "grad_norm": 0.04729785397648811, "learning_rate": 2.163780868577917e-05, "loss": 3.418736159801483e-05, "step": 276180 }, { "epoch": 78.39625319330116, "grad_norm": 0.0005934537621214986, "learning_rate": 2.1634970195855806e-05, "loss": 3.679394721984863e-05, "step": 276190 }, { "epoch": 78.39909168322453, "grad_norm": 0.005325534846633673, "learning_rate": 2.1632131705932444e-05, "loss": 1.275818794965744e-05, "step": 276200 }, { "epoch": 78.40193017314789, "grad_norm": 0.00840358342975378, "learning_rate": 2.1629293216009082e-05, "loss": 1.8995627760887147e-05, "step": 276210 }, { "epoch": 78.40476866307125, "grad_norm": 0.016406187787652016, "learning_rate": 2.1626454726085724e-05, "loss": 2.2691860795021058e-05, "step": 276220 }, { "epoch": 78.4076071529946, "grad_norm": 0.00027128838701173663, "learning_rate": 2.1623616236162365e-05, "loss": 2.3174844682216644e-05, "step": 276230 }, { "epoch": 78.41044564291796, "grad_norm": 0.0010535336332395673, "learning_rate": 2.1620777746239003e-05, "loss": 3.6720745265483855e-05, "step": 276240 }, { "epoch": 78.41328413284133, "grad_norm": 0.001936865272000432, "learning_rate": 2.161793925631564e-05, "loss": 5.38799911737442e-05, "step": 276250 }, { "epoch": 78.41612262276469, "grad_norm": 0.022307347506284714, "learning_rate": 2.161510076639228e-05, "loss": 3.2769888639450075e-05, "step": 276260 }, { "epoch": 78.41896111268805, "grad_norm": 0.0017920030513778329, "learning_rate": 2.161226227646892e-05, "loss": 0.00019825268536806108, "step": 276270 }, { "epoch": 78.42179960261142, "grad_norm": 0.03369733691215515, "learning_rate": 2.160942378654556e-05, "loss": 0.00013624150305986406, "step": 276280 }, { "epoch": 78.42463809253478, "grad_norm": 0.0023832444567233324, "learning_rate": 2.1606585296622196e-05, "loss": 3.476887941360474e-05, "step": 276290 }, { "epoch": 78.42747658245813, "grad_norm": 0.00806689914315939, "learning_rate": 2.1603746806698838e-05, "loss": 6.945505738258362e-05, "step": 276300 }, { "epoch": 78.43031507238149, "grad_norm": 0.005916165187954903, "learning_rate": 2.1600908316775476e-05, "loss": 0.00011366736143827438, "step": 276310 }, { "epoch": 78.43315356230485, "grad_norm": 0.02291463129222393, "learning_rate": 2.1598069826852117e-05, "loss": 0.0007387185469269753, "step": 276320 }, { "epoch": 78.43599205222822, "grad_norm": 0.17845873534679413, "learning_rate": 2.1595231336928755e-05, "loss": 0.0009916316717863082, "step": 276330 }, { "epoch": 78.43883054215158, "grad_norm": 0.00495107239112258, "learning_rate": 2.1592392847005393e-05, "loss": 0.0003216346725821495, "step": 276340 }, { "epoch": 78.44166903207494, "grad_norm": 0.0025964234955608845, "learning_rate": 2.1589554357082035e-05, "loss": 7.553864270448685e-05, "step": 276350 }, { "epoch": 78.4445075219983, "grad_norm": 0.03683730959892273, "learning_rate": 2.1586715867158673e-05, "loss": 9.761769324541092e-05, "step": 276360 }, { "epoch": 78.44734601192165, "grad_norm": 0.03209395334124565, "learning_rate": 2.158387737723531e-05, "loss": 3.762766718864441e-05, "step": 276370 }, { "epoch": 78.45018450184502, "grad_norm": 0.004182604141533375, "learning_rate": 2.1581038887311952e-05, "loss": 0.00016911067068576814, "step": 276380 }, { "epoch": 78.45302299176838, "grad_norm": 0.602600634098053, "learning_rate": 2.157820039738859e-05, "loss": 6.425213068723678e-05, "step": 276390 }, { "epoch": 78.45586148169174, "grad_norm": 0.013608909212052822, "learning_rate": 2.157536190746523e-05, "loss": 5.866885185241699e-05, "step": 276400 }, { "epoch": 78.4586999716151, "grad_norm": 0.0024059766437858343, "learning_rate": 2.157252341754187e-05, "loss": 7.078051567077637e-05, "step": 276410 }, { "epoch": 78.46153846153847, "grad_norm": 0.007401955313980579, "learning_rate": 2.1569684927618507e-05, "loss": 2.7852319180965424e-05, "step": 276420 }, { "epoch": 78.46437695146182, "grad_norm": 0.013655123300850391, "learning_rate": 2.156684643769515e-05, "loss": 1.99219211935997e-05, "step": 276430 }, { "epoch": 78.46721544138518, "grad_norm": 0.006843505427241325, "learning_rate": 2.1564007947771787e-05, "loss": 3.872029483318329e-05, "step": 276440 }, { "epoch": 78.47005393130854, "grad_norm": 0.004049949813634157, "learning_rate": 2.1561169457848428e-05, "loss": 8.189510554075241e-05, "step": 276450 }, { "epoch": 78.4728924212319, "grad_norm": 0.008664168417453766, "learning_rate": 2.1558330967925063e-05, "loss": 7.063616067171097e-05, "step": 276460 }, { "epoch": 78.47573091115527, "grad_norm": 0.0010508557315915823, "learning_rate": 2.1555492478001704e-05, "loss": 0.00018311887979507446, "step": 276470 }, { "epoch": 78.47856940107863, "grad_norm": 0.012340126559138298, "learning_rate": 2.1552653988078345e-05, "loss": 0.0005357136949896812, "step": 276480 }, { "epoch": 78.481407891002, "grad_norm": 0.003483504755422473, "learning_rate": 2.1549815498154983e-05, "loss": 7.665734738111496e-05, "step": 276490 }, { "epoch": 78.48424638092534, "grad_norm": 0.004724954720586538, "learning_rate": 2.154697700823162e-05, "loss": 0.0017160005867481233, "step": 276500 }, { "epoch": 78.48424638092534, "eval_accuracy": 0.9856933935270554, "eval_loss": 0.06094194948673248, "eval_runtime": 35.9941, "eval_samples_per_second": 436.932, "eval_steps_per_second": 6.834, "step": 276500 }, { "epoch": 78.4870848708487, "grad_norm": 0.006314704194664955, "learning_rate": 2.154413851830826e-05, "loss": 4.735458642244339e-05, "step": 276510 }, { "epoch": 78.48992336077207, "grad_norm": 0.019226161763072014, "learning_rate": 2.15413000283849e-05, "loss": 0.00024100001901388167, "step": 276520 }, { "epoch": 78.49276185069543, "grad_norm": 0.004643563646823168, "learning_rate": 2.1538461538461542e-05, "loss": 5.5097788572311404e-05, "step": 276530 }, { "epoch": 78.4956003406188, "grad_norm": 0.006956690456718206, "learning_rate": 2.1535623048538177e-05, "loss": 0.00010350495576858521, "step": 276540 }, { "epoch": 78.49843883054216, "grad_norm": 0.01569577120244503, "learning_rate": 2.1532784558614818e-05, "loss": 4.7891028225421904e-05, "step": 276550 }, { "epoch": 78.50127732046552, "grad_norm": 0.37260475754737854, "learning_rate": 2.1529946068691456e-05, "loss": 0.00010033603757619858, "step": 276560 }, { "epoch": 78.50411581038887, "grad_norm": 0.008109734393656254, "learning_rate": 2.1527107578768097e-05, "loss": 6.557554006576538e-05, "step": 276570 }, { "epoch": 78.50695430031223, "grad_norm": 0.0022774047683924437, "learning_rate": 2.1524269088844735e-05, "loss": 2.3579597473144533e-05, "step": 276580 }, { "epoch": 78.5097927902356, "grad_norm": 0.0006696851924061775, "learning_rate": 2.1521430598921373e-05, "loss": 6.008967757225037e-05, "step": 276590 }, { "epoch": 78.51263128015896, "grad_norm": 0.008992358110845089, "learning_rate": 2.1518592108998015e-05, "loss": 2.4883449077606203e-05, "step": 276600 }, { "epoch": 78.51546977008232, "grad_norm": 0.010510887019336224, "learning_rate": 2.1515753619074653e-05, "loss": 5.392078310251236e-05, "step": 276610 }, { "epoch": 78.51830826000568, "grad_norm": 0.006271944846957922, "learning_rate": 2.1512915129151294e-05, "loss": 2.4934299290180206e-05, "step": 276620 }, { "epoch": 78.52114674992903, "grad_norm": 0.0011086518643423915, "learning_rate": 2.1510076639227932e-05, "loss": 2.064742147922516e-05, "step": 276630 }, { "epoch": 78.5239852398524, "grad_norm": 0.016096239909529686, "learning_rate": 2.150723814930457e-05, "loss": 3.3725984394550323e-05, "step": 276640 }, { "epoch": 78.52682372977576, "grad_norm": 0.0014012412866577506, "learning_rate": 2.150439965938121e-05, "loss": 2.2834353148937226e-05, "step": 276650 }, { "epoch": 78.52966221969912, "grad_norm": 0.10828351229429245, "learning_rate": 2.150156116945785e-05, "loss": 4.355181008577347e-05, "step": 276660 }, { "epoch": 78.53250070962248, "grad_norm": 0.0065732053481042385, "learning_rate": 2.1498722679534487e-05, "loss": 1.4256127178668975e-05, "step": 276670 }, { "epoch": 78.53533919954585, "grad_norm": 0.009081453084945679, "learning_rate": 2.149588418961113e-05, "loss": 2.8396211564540864e-05, "step": 276680 }, { "epoch": 78.53817768946921, "grad_norm": 0.0010130038717761636, "learning_rate": 2.1493045699687767e-05, "loss": 3.945119678974152e-05, "step": 276690 }, { "epoch": 78.54101617939256, "grad_norm": 0.0013796723214909434, "learning_rate": 2.1490207209764408e-05, "loss": 2.6926957070827483e-05, "step": 276700 }, { "epoch": 78.54385466931592, "grad_norm": 0.02155921421945095, "learning_rate": 2.1487368719841046e-05, "loss": 1.776423305273056e-05, "step": 276710 }, { "epoch": 78.54669315923928, "grad_norm": 0.0040605319663882256, "learning_rate": 2.1484530229917684e-05, "loss": 1.750010997056961e-05, "step": 276720 }, { "epoch": 78.54953164916265, "grad_norm": 0.0026876635383814573, "learning_rate": 2.1481691739994325e-05, "loss": 1.2510269880294799e-05, "step": 276730 }, { "epoch": 78.55237013908601, "grad_norm": 0.00788064207881689, "learning_rate": 2.1478853250070963e-05, "loss": 2.3398175835609435e-05, "step": 276740 }, { "epoch": 78.55520862900937, "grad_norm": 0.006993299350142479, "learning_rate": 2.14760147601476e-05, "loss": 0.00019395332783460616, "step": 276750 }, { "epoch": 78.55804711893273, "grad_norm": 0.0031117009930312634, "learning_rate": 2.147317627022424e-05, "loss": 0.0001408768817782402, "step": 276760 }, { "epoch": 78.56088560885608, "grad_norm": 0.0005050731706432998, "learning_rate": 2.147033778030088e-05, "loss": 0.0001548096537590027, "step": 276770 }, { "epoch": 78.56372409877945, "grad_norm": 0.024073462933301926, "learning_rate": 2.1467499290377522e-05, "loss": 3.985408693552017e-05, "step": 276780 }, { "epoch": 78.56656258870281, "grad_norm": 0.005054935347288847, "learning_rate": 2.146466080045416e-05, "loss": 0.00022168289870023729, "step": 276790 }, { "epoch": 78.56940107862617, "grad_norm": 5.803516387939453, "learning_rate": 2.1461822310530798e-05, "loss": 0.0016782917082309723, "step": 276800 }, { "epoch": 78.57223956854953, "grad_norm": 0.002971700858324766, "learning_rate": 2.1458983820607436e-05, "loss": 0.0014508109539747237, "step": 276810 }, { "epoch": 78.5750780584729, "grad_norm": 0.015191640704870224, "learning_rate": 2.1456145330684078e-05, "loss": 1.8207915127277373e-05, "step": 276820 }, { "epoch": 78.57791654839626, "grad_norm": 0.0010896167950704694, "learning_rate": 2.145330684076072e-05, "loss": 8.773617446422577e-05, "step": 276830 }, { "epoch": 78.58075503831961, "grad_norm": 0.01460184808820486, "learning_rate": 2.1450468350837354e-05, "loss": 2.457592636346817e-05, "step": 276840 }, { "epoch": 78.58359352824297, "grad_norm": 0.0026099260430783033, "learning_rate": 2.1447629860913995e-05, "loss": 1.7722323536872864e-05, "step": 276850 }, { "epoch": 78.58643201816633, "grad_norm": 0.003638142952695489, "learning_rate": 2.1444791370990633e-05, "loss": 3.992300480604172e-05, "step": 276860 }, { "epoch": 78.5892705080897, "grad_norm": 0.004397358745336533, "learning_rate": 2.1441952881067274e-05, "loss": 1.9506923854351045e-05, "step": 276870 }, { "epoch": 78.59210899801306, "grad_norm": 0.01694781892001629, "learning_rate": 2.1439114391143912e-05, "loss": 3.015287220478058e-05, "step": 276880 }, { "epoch": 78.59494748793642, "grad_norm": 0.009743047878146172, "learning_rate": 2.143627590122055e-05, "loss": 5.089696496725082e-05, "step": 276890 }, { "epoch": 78.59778597785977, "grad_norm": 0.0023628249764442444, "learning_rate": 2.143343741129719e-05, "loss": 2.6285089552402498e-05, "step": 276900 }, { "epoch": 78.60062446778313, "grad_norm": 0.0015557199949398637, "learning_rate": 2.143059892137383e-05, "loss": 1.0107643902301788e-05, "step": 276910 }, { "epoch": 78.6034629577065, "grad_norm": 0.014500112272799015, "learning_rate": 2.142776043145047e-05, "loss": 6.67862594127655e-05, "step": 276920 }, { "epoch": 78.60630144762986, "grad_norm": 0.0014731361297890544, "learning_rate": 2.142492194152711e-05, "loss": 1.536812633275986e-05, "step": 276930 }, { "epoch": 78.60913993755322, "grad_norm": 0.012170016765594482, "learning_rate": 2.1422083451603747e-05, "loss": 2.1319091320037842e-05, "step": 276940 }, { "epoch": 78.61197842747659, "grad_norm": 0.003219262231141329, "learning_rate": 2.141924496168039e-05, "loss": 4.027672111988068e-05, "step": 276950 }, { "epoch": 78.61481691739995, "grad_norm": 0.0002839925291482359, "learning_rate": 2.1416406471757026e-05, "loss": 2.694372087717056e-05, "step": 276960 }, { "epoch": 78.6176554073233, "grad_norm": 0.062194015830755234, "learning_rate": 2.1413567981833664e-05, "loss": 3.0860118567943576e-05, "step": 276970 }, { "epoch": 78.62049389724666, "grad_norm": 0.046429578214883804, "learning_rate": 2.1410729491910306e-05, "loss": 2.054814249277115e-05, "step": 276980 }, { "epoch": 78.62333238717002, "grad_norm": 0.0034000584855675697, "learning_rate": 2.1407891001986944e-05, "loss": 2.6601366698741913e-05, "step": 276990 }, { "epoch": 78.62617087709339, "grad_norm": 0.005852679722011089, "learning_rate": 2.1405052512063585e-05, "loss": 5.5648013949394225e-05, "step": 277000 }, { "epoch": 78.62617087709339, "eval_accuracy": 0.9872194315508361, "eval_loss": 0.052299726754426956, "eval_runtime": 35.3627, "eval_samples_per_second": 444.734, "eval_steps_per_second": 6.956, "step": 277000 }, { "epoch": 78.62900936701675, "grad_norm": 0.000997699098661542, "learning_rate": 2.140221402214022e-05, "loss": 1.946277916431427e-05, "step": 277010 }, { "epoch": 78.63184785694011, "grad_norm": 0.753031313419342, "learning_rate": 2.139937553221686e-05, "loss": 0.0002473689615726471, "step": 277020 }, { "epoch": 78.63468634686348, "grad_norm": 0.016389599069952965, "learning_rate": 2.1396537042293502e-05, "loss": 0.00044542979449033735, "step": 277030 }, { "epoch": 78.63752483678682, "grad_norm": 0.007434281520545483, "learning_rate": 2.139369855237014e-05, "loss": 0.0001871895045042038, "step": 277040 }, { "epoch": 78.64036332671019, "grad_norm": 0.050448887050151825, "learning_rate": 2.139086006244678e-05, "loss": 7.7715702354908e-05, "step": 277050 }, { "epoch": 78.64320181663355, "grad_norm": 0.023820975795388222, "learning_rate": 2.1388021572523416e-05, "loss": 0.00043665897101163864, "step": 277060 }, { "epoch": 78.64604030655691, "grad_norm": 0.018948396667838097, "learning_rate": 2.1385183082600058e-05, "loss": 6.668530404567719e-05, "step": 277070 }, { "epoch": 78.64887879648028, "grad_norm": 0.00267637986689806, "learning_rate": 2.13823445926767e-05, "loss": 0.0001541653648018837, "step": 277080 }, { "epoch": 78.65171728640364, "grad_norm": 0.06800585985183716, "learning_rate": 2.1379506102753337e-05, "loss": 7.404610514640808e-05, "step": 277090 }, { "epoch": 78.65455577632699, "grad_norm": 0.0012730701128020883, "learning_rate": 2.1376667612829975e-05, "loss": 3.088023513555527e-05, "step": 277100 }, { "epoch": 78.65739426625035, "grad_norm": 0.0006868411437608302, "learning_rate": 2.1373829122906613e-05, "loss": 2.689845860004425e-05, "step": 277110 }, { "epoch": 78.66023275617371, "grad_norm": 0.002812118036672473, "learning_rate": 2.1370990632983254e-05, "loss": 3.185644745826721e-05, "step": 277120 }, { "epoch": 78.66307124609708, "grad_norm": 0.394640177488327, "learning_rate": 2.1368152143059896e-05, "loss": 0.00010283049196004867, "step": 277130 }, { "epoch": 78.66590973602044, "grad_norm": 0.0065993983298540115, "learning_rate": 2.136531365313653e-05, "loss": 0.00034877397119998933, "step": 277140 }, { "epoch": 78.6687482259438, "grad_norm": 0.11017755419015884, "learning_rate": 2.1362475163213172e-05, "loss": 4.10204753279686e-05, "step": 277150 }, { "epoch": 78.67158671586716, "grad_norm": 0.01004171185195446, "learning_rate": 2.135963667328981e-05, "loss": 2.904050052165985e-05, "step": 277160 }, { "epoch": 78.67442520579051, "grad_norm": 0.022917933762073517, "learning_rate": 2.135679818336645e-05, "loss": 5.923658609390259e-05, "step": 277170 }, { "epoch": 78.67726369571388, "grad_norm": 0.028608860448002815, "learning_rate": 2.135395969344309e-05, "loss": 0.00012238509953022003, "step": 277180 }, { "epoch": 78.68010218563724, "grad_norm": 0.16730579733848572, "learning_rate": 2.1351121203519727e-05, "loss": 0.00038619861006736753, "step": 277190 }, { "epoch": 78.6829406755606, "grad_norm": 0.0010742867598310113, "learning_rate": 2.134828271359637e-05, "loss": 4.541967064142227e-05, "step": 277200 }, { "epoch": 78.68577916548396, "grad_norm": 0.15172189474105835, "learning_rate": 2.1345444223673006e-05, "loss": 6.366949528455735e-05, "step": 277210 }, { "epoch": 78.68861765540733, "grad_norm": 0.0032911747694015503, "learning_rate": 2.1342605733749644e-05, "loss": 2.1764449775218965e-05, "step": 277220 }, { "epoch": 78.69145614533069, "grad_norm": 0.005126433912664652, "learning_rate": 2.1339767243826286e-05, "loss": 2.7806311845779418e-05, "step": 277230 }, { "epoch": 78.69429463525404, "grad_norm": 0.012477393262088299, "learning_rate": 2.1336928753902924e-05, "loss": 0.0003509635105729103, "step": 277240 }, { "epoch": 78.6971331251774, "grad_norm": 0.017468931153416634, "learning_rate": 2.1334090263979565e-05, "loss": 0.00034048669040203093, "step": 277250 }, { "epoch": 78.69997161510076, "grad_norm": 0.22219255566596985, "learning_rate": 2.1331251774056203e-05, "loss": 5.21492213010788e-05, "step": 277260 }, { "epoch": 78.70281010502413, "grad_norm": 0.0025014816783368587, "learning_rate": 2.132841328413284e-05, "loss": 9.754244238138198e-05, "step": 277270 }, { "epoch": 78.70564859494749, "grad_norm": 0.002019779523834586, "learning_rate": 2.1325574794209483e-05, "loss": 2.941358834505081e-05, "step": 277280 }, { "epoch": 78.70848708487085, "grad_norm": 0.012696734629571438, "learning_rate": 2.132273630428612e-05, "loss": 2.5066547095775604e-05, "step": 277290 }, { "epoch": 78.71132557479422, "grad_norm": 0.02331140823662281, "learning_rate": 2.1319897814362762e-05, "loss": 2.9376894235610963e-05, "step": 277300 }, { "epoch": 78.71416406471756, "grad_norm": 0.0031498221214860678, "learning_rate": 2.1317059324439396e-05, "loss": 2.8193369507789613e-05, "step": 277310 }, { "epoch": 78.71700255464093, "grad_norm": 0.003148697316646576, "learning_rate": 2.1314220834516038e-05, "loss": 4.630833864212036e-05, "step": 277320 }, { "epoch": 78.71984104456429, "grad_norm": 0.018818378448486328, "learning_rate": 2.131138234459268e-05, "loss": 0.0005057942122220993, "step": 277330 }, { "epoch": 78.72267953448765, "grad_norm": 0.010882114060223103, "learning_rate": 2.1308543854669317e-05, "loss": 0.00018952470272779465, "step": 277340 }, { "epoch": 78.72551802441102, "grad_norm": 0.011903831735253334, "learning_rate": 2.1305705364745955e-05, "loss": 0.00022726990282535554, "step": 277350 }, { "epoch": 78.72835651433438, "grad_norm": 0.019008630886673927, "learning_rate": 2.1302866874822593e-05, "loss": 0.0003583662211894989, "step": 277360 }, { "epoch": 78.73119500425773, "grad_norm": 0.5173274278640747, "learning_rate": 2.1300028384899235e-05, "loss": 0.0008064195513725281, "step": 277370 }, { "epoch": 78.73403349418109, "grad_norm": 0.2273377776145935, "learning_rate": 2.1297189894975876e-05, "loss": 7.051415741443634e-05, "step": 277380 }, { "epoch": 78.73687198410445, "grad_norm": 0.0017937954980880022, "learning_rate": 2.1294351405052514e-05, "loss": 3.896653652191162e-05, "step": 277390 }, { "epoch": 78.73971047402782, "grad_norm": 0.0021729315631091595, "learning_rate": 2.1291512915129152e-05, "loss": 0.00012603141367435456, "step": 277400 }, { "epoch": 78.74254896395118, "grad_norm": 0.18016138672828674, "learning_rate": 2.1288674425205793e-05, "loss": 8.708816021680832e-05, "step": 277410 }, { "epoch": 78.74538745387454, "grad_norm": 0.9948338866233826, "learning_rate": 2.128583593528243e-05, "loss": 0.00013790111988782884, "step": 277420 }, { "epoch": 78.7482259437979, "grad_norm": 0.010926368646323681, "learning_rate": 2.128299744535907e-05, "loss": 4.277899861335754e-05, "step": 277430 }, { "epoch": 78.75106443372125, "grad_norm": 0.008277084678411484, "learning_rate": 2.1280158955435707e-05, "loss": 0.0007126530632376671, "step": 277440 }, { "epoch": 78.75390292364462, "grad_norm": 0.010479321703314781, "learning_rate": 2.127732046551235e-05, "loss": 0.00017502885311841964, "step": 277450 }, { "epoch": 78.75674141356798, "grad_norm": 0.010785236954689026, "learning_rate": 2.127448197558899e-05, "loss": 0.00013577733188867568, "step": 277460 }, { "epoch": 78.75957990349134, "grad_norm": 0.026364825665950775, "learning_rate": 2.1271643485665628e-05, "loss": 0.00019957348704338073, "step": 277470 }, { "epoch": 78.7624183934147, "grad_norm": 0.003624275792390108, "learning_rate": 2.1268804995742266e-05, "loss": 7.17945396900177e-05, "step": 277480 }, { "epoch": 78.76525688333807, "grad_norm": 0.008457791060209274, "learning_rate": 2.1265966505818904e-05, "loss": 0.00017161723226308823, "step": 277490 }, { "epoch": 78.76809537326143, "grad_norm": 0.11698398739099503, "learning_rate": 2.1263128015895545e-05, "loss": 0.0006714129820466041, "step": 277500 }, { "epoch": 78.76809537326143, "eval_accuracy": 0.9856933935270554, "eval_loss": 0.06143480911850929, "eval_runtime": 35.8204, "eval_samples_per_second": 439.052, "eval_steps_per_second": 6.868, "step": 277500 }, { "epoch": 78.77093386318478, "grad_norm": 0.001198489568196237, "learning_rate": 2.1260289525972187e-05, "loss": 4.986729472875595e-05, "step": 277510 }, { "epoch": 78.77377235310814, "grad_norm": 0.0019617322832345963, "learning_rate": 2.125745103604882e-05, "loss": 0.0005502471700310707, "step": 277520 }, { "epoch": 78.7766108430315, "grad_norm": 0.002345732180401683, "learning_rate": 2.1254612546125463e-05, "loss": 3.279075026512146e-05, "step": 277530 }, { "epoch": 78.77944933295487, "grad_norm": 0.025465548038482666, "learning_rate": 2.12517740562021e-05, "loss": 9.401310235261917e-05, "step": 277540 }, { "epoch": 78.78228782287823, "grad_norm": 0.004710640292614698, "learning_rate": 2.1248935566278742e-05, "loss": 6.0574524104595186e-05, "step": 277550 }, { "epoch": 78.7851263128016, "grad_norm": 0.002289371332153678, "learning_rate": 2.124609707635538e-05, "loss": 2.2841431200504302e-05, "step": 277560 }, { "epoch": 78.78796480272496, "grad_norm": 0.003636875655502081, "learning_rate": 2.1243258586432018e-05, "loss": 5.0689838826656344e-05, "step": 277570 }, { "epoch": 78.7908032926483, "grad_norm": 0.003410456934943795, "learning_rate": 2.124042009650866e-05, "loss": 2.6602670550346376e-05, "step": 277580 }, { "epoch": 78.79364178257167, "grad_norm": 0.018536299467086792, "learning_rate": 2.1237581606585297e-05, "loss": 2.318304032087326e-05, "step": 277590 }, { "epoch": 78.79648027249503, "grad_norm": 0.0004834143037442118, "learning_rate": 2.1234743116661935e-05, "loss": 2.0181015133857728e-05, "step": 277600 }, { "epoch": 78.7993187624184, "grad_norm": 0.02257700078189373, "learning_rate": 2.1231904626738577e-05, "loss": 3.182552754878998e-05, "step": 277610 }, { "epoch": 78.80215725234176, "grad_norm": 0.002307716989889741, "learning_rate": 2.1229066136815215e-05, "loss": 3.897696733474731e-05, "step": 277620 }, { "epoch": 78.80499574226512, "grad_norm": 0.00482964888215065, "learning_rate": 2.1226227646891856e-05, "loss": 2.011805772781372e-05, "step": 277630 }, { "epoch": 78.80783423218847, "grad_norm": 0.0107607776299119, "learning_rate": 2.1223389156968494e-05, "loss": 3.2708048820495605e-05, "step": 277640 }, { "epoch": 78.81067272211183, "grad_norm": 0.014721193350851536, "learning_rate": 2.1220550667045132e-05, "loss": 5.4031051695346834e-05, "step": 277650 }, { "epoch": 78.8135112120352, "grad_norm": 0.0037748904433101416, "learning_rate": 2.1217712177121773e-05, "loss": 5.531869828701019e-05, "step": 277660 }, { "epoch": 78.81634970195856, "grad_norm": 0.005003413651138544, "learning_rate": 2.121487368719841e-05, "loss": 9.875185787677765e-06, "step": 277670 }, { "epoch": 78.81918819188192, "grad_norm": 0.006060135085135698, "learning_rate": 2.1212035197275053e-05, "loss": 3.489963710308075e-05, "step": 277680 }, { "epoch": 78.82202668180528, "grad_norm": 0.002233265433460474, "learning_rate": 2.1209196707351687e-05, "loss": 1.1048465967178345e-05, "step": 277690 }, { "epoch": 78.82486517172865, "grad_norm": 0.002476554596796632, "learning_rate": 2.120635821742833e-05, "loss": 2.1053478121757507e-05, "step": 277700 }, { "epoch": 78.827703661652, "grad_norm": 0.0011827856069430709, "learning_rate": 2.120351972750497e-05, "loss": 4.849322140216827e-05, "step": 277710 }, { "epoch": 78.83054215157536, "grad_norm": 0.004908084869384766, "learning_rate": 2.1200681237581608e-05, "loss": 1.688636839389801e-05, "step": 277720 }, { "epoch": 78.83338064149872, "grad_norm": 0.01528698019683361, "learning_rate": 2.1197842747658246e-05, "loss": 7.21704214811325e-05, "step": 277730 }, { "epoch": 78.83621913142208, "grad_norm": 0.005432081874459982, "learning_rate": 2.1195004257734884e-05, "loss": 2.8700381517410278e-05, "step": 277740 }, { "epoch": 78.83905762134545, "grad_norm": 0.0035681300796568394, "learning_rate": 2.1192165767811525e-05, "loss": 0.0005049986764788627, "step": 277750 }, { "epoch": 78.84189611126881, "grad_norm": 0.004915240220725536, "learning_rate": 2.1189327277888167e-05, "loss": 0.00013014525175094604, "step": 277760 }, { "epoch": 78.84473460119217, "grad_norm": 0.07859182357788086, "learning_rate": 2.1186488787964805e-05, "loss": 2.5495514273643492e-05, "step": 277770 }, { "epoch": 78.84757309111552, "grad_norm": 0.00042357403435744345, "learning_rate": 2.1183650298041443e-05, "loss": 4.675090312957764e-05, "step": 277780 }, { "epoch": 78.85041158103888, "grad_norm": 0.0015654437011107802, "learning_rate": 2.118081180811808e-05, "loss": 1.6327761113643646e-05, "step": 277790 }, { "epoch": 78.85325007096225, "grad_norm": 0.03619042783975601, "learning_rate": 2.1177973318194722e-05, "loss": 3.32830473780632e-05, "step": 277800 }, { "epoch": 78.85608856088561, "grad_norm": 0.008737333118915558, "learning_rate": 2.117513482827136e-05, "loss": 1.529380679130554e-05, "step": 277810 }, { "epoch": 78.85892705080897, "grad_norm": 0.004026961047202349, "learning_rate": 2.1172296338347998e-05, "loss": 3.451108932495117e-05, "step": 277820 }, { "epoch": 78.86176554073234, "grad_norm": 0.0020802216604351997, "learning_rate": 2.116945784842464e-05, "loss": 1.546051353216171e-05, "step": 277830 }, { "epoch": 78.86460403065568, "grad_norm": 0.009689856320619583, "learning_rate": 2.1166619358501278e-05, "loss": 5.245208740234375e-05, "step": 277840 }, { "epoch": 78.86744252057905, "grad_norm": 0.0011117917019873857, "learning_rate": 2.116378086857792e-05, "loss": 1.7099082469940186e-05, "step": 277850 }, { "epoch": 78.87028101050241, "grad_norm": 0.0017848311690613627, "learning_rate": 2.1160942378654557e-05, "loss": 2.6216544210910796e-05, "step": 277860 }, { "epoch": 78.87311950042577, "grad_norm": 0.0040672542527318, "learning_rate": 2.1158103888731195e-05, "loss": 3.2524578273296356e-05, "step": 277870 }, { "epoch": 78.87595799034914, "grad_norm": Infinity, "learning_rate": 2.1155265398807836e-05, "loss": 0.004633947461843491, "step": 277880 }, { "epoch": 78.8787964802725, "grad_norm": 0.0028650849126279354, "learning_rate": 2.1152710757876813e-05, "loss": 0.0006190584972500801, "step": 277890 }, { "epoch": 78.88163497019586, "grad_norm": 7.491851329803467, "learning_rate": 2.1150156116945786e-05, "loss": 0.004408992826938629, "step": 277900 }, { "epoch": 78.88447346011921, "grad_norm": 0.007317090407013893, "learning_rate": 2.1147317627022424e-05, "loss": 0.0006704114377498627, "step": 277910 }, { "epoch": 78.88731195004257, "grad_norm": 0.48153719305992126, "learning_rate": 2.1144479137099062e-05, "loss": 0.022649028897285463, "step": 277920 }, { "epoch": 78.89015043996594, "grad_norm": 0.03244597464799881, "learning_rate": 2.1141640647175703e-05, "loss": 0.00017420742660760878, "step": 277930 }, { "epoch": 78.8929889298893, "grad_norm": 0.010008308105170727, "learning_rate": 2.1138802157252345e-05, "loss": 0.007871569693088531, "step": 277940 }, { "epoch": 78.89582741981266, "grad_norm": 0.028146892786026, "learning_rate": 2.1135963667328983e-05, "loss": 0.00345056988298893, "step": 277950 }, { "epoch": 78.89866590973602, "grad_norm": 0.00776858301833272, "learning_rate": 2.113312517740562e-05, "loss": 0.0027644861489534377, "step": 277960 }, { "epoch": 78.90150439965939, "grad_norm": 0.013901561498641968, "learning_rate": 2.113028668748226e-05, "loss": 0.00020320136100053786, "step": 277970 }, { "epoch": 78.90434288958274, "grad_norm": 0.03931991383433342, "learning_rate": 2.11274481975589e-05, "loss": 7.81310722231865e-05, "step": 277980 }, { "epoch": 78.9071813795061, "grad_norm": 0.0021226247772574425, "learning_rate": 2.112460970763554e-05, "loss": 0.0008694693446159363, "step": 277990 }, { "epoch": 78.91001986942946, "grad_norm": 0.480803519487381, "learning_rate": 2.1121771217712176e-05, "loss": 0.00014777444303035737, "step": 278000 }, { "epoch": 78.91001986942946, "eval_accuracy": 0.9823869778088637, "eval_loss": 0.07554083317518234, "eval_runtime": 35.5217, "eval_samples_per_second": 442.744, "eval_steps_per_second": 6.925, "step": 278000 }, { "epoch": 78.91285835935282, "grad_norm": 0.016033904626965523, "learning_rate": 2.1118932727788817e-05, "loss": 0.0001064501702785492, "step": 278010 }, { "epoch": 78.91569684927619, "grad_norm": 0.0005088847246952355, "learning_rate": 2.1116094237865455e-05, "loss": 3.2065436244010925e-05, "step": 278020 }, { "epoch": 78.91853533919955, "grad_norm": 0.0038839250337332487, "learning_rate": 2.1113255747942097e-05, "loss": 9.748078882694244e-05, "step": 278030 }, { "epoch": 78.92137382912291, "grad_norm": 9.57472038269043, "learning_rate": 2.1110417258018735e-05, "loss": 0.001474701054394245, "step": 278040 }, { "epoch": 78.92421231904626, "grad_norm": 0.006355457939207554, "learning_rate": 2.1107578768095373e-05, "loss": 0.0005518974736332894, "step": 278050 }, { "epoch": 78.92705080896962, "grad_norm": 0.024061908945441246, "learning_rate": 2.1104740278172014e-05, "loss": 5.5983103811740877e-05, "step": 278060 }, { "epoch": 78.92988929889299, "grad_norm": 0.0031897653825581074, "learning_rate": 2.1101901788248652e-05, "loss": 4.16858121752739e-05, "step": 278070 }, { "epoch": 78.93272778881635, "grad_norm": 0.004513618536293507, "learning_rate": 2.109906329832529e-05, "loss": 4.8578903079032895e-05, "step": 278080 }, { "epoch": 78.93556627873971, "grad_norm": 0.021200338378548622, "learning_rate": 2.109622480840193e-05, "loss": 7.935389876365662e-05, "step": 278090 }, { "epoch": 78.93840476866308, "grad_norm": 0.02625785768032074, "learning_rate": 2.109338631847857e-05, "loss": 0.00030712652951478956, "step": 278100 }, { "epoch": 78.94124325858643, "grad_norm": 0.25884270668029785, "learning_rate": 2.109054782855521e-05, "loss": 0.0001218484714627266, "step": 278110 }, { "epoch": 78.94408174850979, "grad_norm": 0.417813777923584, "learning_rate": 2.108770933863185e-05, "loss": 0.0005534367635846138, "step": 278120 }, { "epoch": 78.94692023843315, "grad_norm": 0.1713506579399109, "learning_rate": 2.1084870848708487e-05, "loss": 7.879380136728286e-05, "step": 278130 }, { "epoch": 78.94975872835651, "grad_norm": 0.012881558388471603, "learning_rate": 2.1082032358785128e-05, "loss": 0.00032359529286623, "step": 278140 }, { "epoch": 78.95259721827988, "grad_norm": 0.006097471807152033, "learning_rate": 2.1079193868861766e-05, "loss": 1.9627809524536133e-05, "step": 278150 }, { "epoch": 78.95543570820324, "grad_norm": 0.005774075165390968, "learning_rate": 2.1076355378938408e-05, "loss": 0.00011238791048526764, "step": 278160 }, { "epoch": 78.9582741981266, "grad_norm": 0.002144763246178627, "learning_rate": 2.1073516889015046e-05, "loss": 5.606096237897873e-05, "step": 278170 }, { "epoch": 78.96111268804995, "grad_norm": 0.008417048491537571, "learning_rate": 2.1070678399091683e-05, "loss": 4.3276511132717135e-05, "step": 278180 }, { "epoch": 78.96395117797331, "grad_norm": 0.010577458888292313, "learning_rate": 2.1067839909168325e-05, "loss": 6.626900285482407e-05, "step": 278190 }, { "epoch": 78.96678966789668, "grad_norm": 0.00047251026262529194, "learning_rate": 2.1065001419244963e-05, "loss": 6.381440907716751e-05, "step": 278200 }, { "epoch": 78.96962815782004, "grad_norm": 2.8739213943481445, "learning_rate": 2.10621629293216e-05, "loss": 0.00032305531203746796, "step": 278210 }, { "epoch": 78.9724666477434, "grad_norm": 0.0144611531868577, "learning_rate": 2.1059324439398242e-05, "loss": 0.0015254948288202287, "step": 278220 }, { "epoch": 78.97530513766677, "grad_norm": 0.0054060351103544235, "learning_rate": 2.105648594947488e-05, "loss": 9.116679430007935e-05, "step": 278230 }, { "epoch": 78.97814362759013, "grad_norm": 0.05688755214214325, "learning_rate": 2.105364745955152e-05, "loss": 0.0011996090412139892, "step": 278240 }, { "epoch": 78.98098211751348, "grad_norm": 0.07764909416437149, "learning_rate": 2.105080896962816e-05, "loss": 0.0002631610259413719, "step": 278250 }, { "epoch": 78.98382060743684, "grad_norm": 0.006361846812069416, "learning_rate": 2.1047970479704798e-05, "loss": 0.008542591333389282, "step": 278260 }, { "epoch": 78.9866590973602, "grad_norm": 0.005821028258651495, "learning_rate": 2.104513198978144e-05, "loss": 0.0007986553013324737, "step": 278270 }, { "epoch": 78.98949758728357, "grad_norm": 0.011330491863191128, "learning_rate": 2.1042293499858077e-05, "loss": 0.0006408471614122391, "step": 278280 }, { "epoch": 78.99233607720693, "grad_norm": 0.3057596981525421, "learning_rate": 2.1039455009934715e-05, "loss": 5.811825394630432e-05, "step": 278290 }, { "epoch": 78.99517456713029, "grad_norm": 0.016198541969060898, "learning_rate": 2.1036616520011353e-05, "loss": 0.0032142847776412964, "step": 278300 }, { "epoch": 78.99801305705365, "grad_norm": 0.0025581195950508118, "learning_rate": 2.1033778030087994e-05, "loss": 0.00018297024071216582, "step": 278310 }, { "epoch": 79.000851546977, "grad_norm": 0.10167727619409561, "learning_rate": 2.1030939540164636e-05, "loss": 3.434914106037468e-05, "step": 278320 }, { "epoch": 79.00369003690037, "grad_norm": 0.06647836416959763, "learning_rate": 2.1028101050241274e-05, "loss": 5.186963826417923e-05, "step": 278330 }, { "epoch": 79.00652852682373, "grad_norm": 0.026488129049539566, "learning_rate": 2.102526256031791e-05, "loss": 8.410066366195678e-05, "step": 278340 }, { "epoch": 79.00936701674709, "grad_norm": 0.015518460422754288, "learning_rate": 2.102242407039455e-05, "loss": 2.4609640240669252e-05, "step": 278350 }, { "epoch": 79.01220550667045, "grad_norm": 0.004447609186172485, "learning_rate": 2.101958558047119e-05, "loss": 0.00025353282690048217, "step": 278360 }, { "epoch": 79.01504399659382, "grad_norm": 5.140649318695068, "learning_rate": 2.1016747090547832e-05, "loss": 0.0008824100717902183, "step": 278370 }, { "epoch": 79.01788248651717, "grad_norm": 0.0026577808894217014, "learning_rate": 2.1013908600624467e-05, "loss": 2.3432262241840363e-05, "step": 278380 }, { "epoch": 79.02072097644053, "grad_norm": 0.003594697220250964, "learning_rate": 2.101107011070111e-05, "loss": 9.407103061676025e-05, "step": 278390 }, { "epoch": 79.02355946636389, "grad_norm": 0.014376549050211906, "learning_rate": 2.1008231620777746e-05, "loss": 1.8918514251708983e-05, "step": 278400 }, { "epoch": 79.02639795628725, "grad_norm": 0.033634383231401443, "learning_rate": 2.1005393130854388e-05, "loss": 1.676473766565323e-05, "step": 278410 }, { "epoch": 79.02923644621062, "grad_norm": 0.009742188267409801, "learning_rate": 2.1002554640931026e-05, "loss": 1.8651410937309264e-05, "step": 278420 }, { "epoch": 79.03207493613398, "grad_norm": 0.0014199689030647278, "learning_rate": 2.0999716151007664e-05, "loss": 2.3317337036132812e-05, "step": 278430 }, { "epoch": 79.03491342605734, "grad_norm": 0.006210260558873415, "learning_rate": 2.0996877661084305e-05, "loss": 1.550409942865372e-05, "step": 278440 }, { "epoch": 79.03775191598069, "grad_norm": 0.003720544744282961, "learning_rate": 2.0994039171160943e-05, "loss": 1.0113418102264405e-05, "step": 278450 }, { "epoch": 79.04059040590406, "grad_norm": 0.006242755800485611, "learning_rate": 2.0991200681237584e-05, "loss": 3.0077248811721802e-05, "step": 278460 }, { "epoch": 79.04342889582742, "grad_norm": 0.006571725010871887, "learning_rate": 2.0988362191314222e-05, "loss": 1.7653591930866243e-05, "step": 278470 }, { "epoch": 79.04626738575078, "grad_norm": 0.0011681988835334778, "learning_rate": 2.098552370139086e-05, "loss": 1.6642920672893525e-05, "step": 278480 }, { "epoch": 79.04910587567414, "grad_norm": 0.041572168469429016, "learning_rate": 2.0982685211467502e-05, "loss": 2.128053456544876e-05, "step": 278490 }, { "epoch": 79.0519443655975, "grad_norm": 0.001899812021292746, "learning_rate": 2.097984672154414e-05, "loss": 1.1155381798744202e-05, "step": 278500 }, { "epoch": 79.0519443655975, "eval_accuracy": 0.9870922617155211, "eval_loss": 0.053126007318496704, "eval_runtime": 35.1503, "eval_samples_per_second": 447.422, "eval_steps_per_second": 6.999, "step": 278500 }, { "epoch": 79.05478285552087, "grad_norm": 0.000574699544813484, "learning_rate": 2.0977008231620778e-05, "loss": 1.9862875342369078e-05, "step": 278510 }, { "epoch": 79.05762134544422, "grad_norm": 0.0072406805120408535, "learning_rate": 2.097416974169742e-05, "loss": 5.959346890449524e-05, "step": 278520 }, { "epoch": 79.06045983536758, "grad_norm": 0.0009358797688037157, "learning_rate": 2.0971331251774057e-05, "loss": 1.9973143935203552e-05, "step": 278530 }, { "epoch": 79.06329832529094, "grad_norm": 0.027022583410143852, "learning_rate": 2.09684927618507e-05, "loss": 4.1182152926921844e-05, "step": 278540 }, { "epoch": 79.0661368152143, "grad_norm": 0.009398938156664371, "learning_rate": 2.0965654271927333e-05, "loss": 0.0004428522661328316, "step": 278550 }, { "epoch": 79.06897530513767, "grad_norm": 6.0500688552856445, "learning_rate": 2.0962815782003974e-05, "loss": 0.0009860137477517128, "step": 278560 }, { "epoch": 79.07181379506103, "grad_norm": 0.007410776801407337, "learning_rate": 2.0959977292080616e-05, "loss": 0.000741385854780674, "step": 278570 }, { "epoch": 79.07465228498438, "grad_norm": 0.006247620563954115, "learning_rate": 2.0957138802157254e-05, "loss": 5.16137108206749e-05, "step": 278580 }, { "epoch": 79.07749077490774, "grad_norm": 0.001142855267971754, "learning_rate": 2.0954300312233892e-05, "loss": 0.0009515151381492615, "step": 278590 }, { "epoch": 79.08032926483111, "grad_norm": 0.006187325343489647, "learning_rate": 2.095146182231053e-05, "loss": 0.0008418688550591469, "step": 278600 }, { "epoch": 79.08316775475447, "grad_norm": 0.0043062856420874596, "learning_rate": 2.094862333238717e-05, "loss": 5.529876798391342e-05, "step": 278610 }, { "epoch": 79.08600624467783, "grad_norm": 3.544539451599121, "learning_rate": 2.0945784842463813e-05, "loss": 0.003383653610944748, "step": 278620 }, { "epoch": 79.0888447346012, "grad_norm": 0.052254483103752136, "learning_rate": 2.094294635254045e-05, "loss": 6.665326654911041e-05, "step": 278630 }, { "epoch": 79.09168322452456, "grad_norm": 0.029597440734505653, "learning_rate": 2.094010786261709e-05, "loss": 9.693987667560577e-05, "step": 278640 }, { "epoch": 79.09452171444791, "grad_norm": 0.026736853644251823, "learning_rate": 2.0937269372693726e-05, "loss": 0.001734045334160328, "step": 278650 }, { "epoch": 79.09736020437127, "grad_norm": 0.00994840171188116, "learning_rate": 2.0934430882770368e-05, "loss": 0.00023868344724178314, "step": 278660 }, { "epoch": 79.10019869429463, "grad_norm": 0.0017448232974857092, "learning_rate": 2.093159239284701e-05, "loss": 0.0032319970428943633, "step": 278670 }, { "epoch": 79.103037184218, "grad_norm": 0.017981024459004402, "learning_rate": 2.0928753902923644e-05, "loss": 0.0002672340720891953, "step": 278680 }, { "epoch": 79.10587567414136, "grad_norm": 0.12494487315416336, "learning_rate": 2.0925915413000285e-05, "loss": 0.0009625459089875222, "step": 278690 }, { "epoch": 79.10871416406472, "grad_norm": 0.6213298439979553, "learning_rate": 2.0923076923076923e-05, "loss": 0.0003815708681941032, "step": 278700 }, { "epoch": 79.11155265398808, "grad_norm": 0.1132315918803215, "learning_rate": 2.0920238433153565e-05, "loss": 0.00011567715555429458, "step": 278710 }, { "epoch": 79.11439114391143, "grad_norm": 0.0027050827629864216, "learning_rate": 2.0917399943230203e-05, "loss": 8.899886161088944e-05, "step": 278720 }, { "epoch": 79.1172296338348, "grad_norm": 0.007126845885068178, "learning_rate": 2.091456145330684e-05, "loss": 4.6284496784210204e-05, "step": 278730 }, { "epoch": 79.12006812375816, "grad_norm": 0.0020604950841516256, "learning_rate": 2.0911722963383482e-05, "loss": 2.394486218690872e-05, "step": 278740 }, { "epoch": 79.12290661368152, "grad_norm": 0.006936684250831604, "learning_rate": 2.090888447346012e-05, "loss": 4.3303519487380984e-05, "step": 278750 }, { "epoch": 79.12574510360488, "grad_norm": 2.41756272315979, "learning_rate": 2.0906045983536758e-05, "loss": 0.0003839416429400444, "step": 278760 }, { "epoch": 79.12858359352825, "grad_norm": 0.0021845223382115364, "learning_rate": 2.09032074936134e-05, "loss": 3.039799630641937e-05, "step": 278770 }, { "epoch": 79.13142208345161, "grad_norm": 0.002067985711619258, "learning_rate": 2.0900369003690037e-05, "loss": 4.0397234261035916e-05, "step": 278780 }, { "epoch": 79.13426057337496, "grad_norm": 0.03203170374035835, "learning_rate": 2.089753051376668e-05, "loss": 3.331899642944336e-05, "step": 278790 }, { "epoch": 79.13709906329832, "grad_norm": 0.001534786308184266, "learning_rate": 2.0894692023843317e-05, "loss": 3.089364618062973e-05, "step": 278800 }, { "epoch": 79.13993755322169, "grad_norm": 0.009415646083652973, "learning_rate": 2.0891853533919955e-05, "loss": 6.072353571653366e-05, "step": 278810 }, { "epoch": 79.14277604314505, "grad_norm": 0.006202016957104206, "learning_rate": 2.0889015043996596e-05, "loss": 0.00011152289807796479, "step": 278820 }, { "epoch": 79.14561453306841, "grad_norm": 0.1584450900554657, "learning_rate": 2.0886176554073234e-05, "loss": 4.859287291765213e-05, "step": 278830 }, { "epoch": 79.14845302299177, "grad_norm": 0.0015978164738044143, "learning_rate": 2.0883338064149875e-05, "loss": 4.16237860918045e-05, "step": 278840 }, { "epoch": 79.15129151291512, "grad_norm": 0.004772151354700327, "learning_rate": 2.088049957422651e-05, "loss": 1.6954727470874786e-05, "step": 278850 }, { "epoch": 79.15413000283849, "grad_norm": 0.0006889955839142203, "learning_rate": 2.087766108430315e-05, "loss": 4.883743822574616e-05, "step": 278860 }, { "epoch": 79.15696849276185, "grad_norm": 0.0037172825541347265, "learning_rate": 2.0874822594379793e-05, "loss": 4.122033715248108e-05, "step": 278870 }, { "epoch": 79.15980698268521, "grad_norm": 0.011849923059344292, "learning_rate": 2.087198410445643e-05, "loss": 2.690982073545456e-05, "step": 278880 }, { "epoch": 79.16264547260857, "grad_norm": 0.0034706597216427326, "learning_rate": 2.086914561453307e-05, "loss": 2.209711819887161e-05, "step": 278890 }, { "epoch": 79.16548396253194, "grad_norm": 0.025075258687138557, "learning_rate": 2.0866307124609707e-05, "loss": 2.536233514547348e-05, "step": 278900 }, { "epoch": 79.1683224524553, "grad_norm": 0.00575411319732666, "learning_rate": 2.0863468634686348e-05, "loss": 2.1055527031421662e-05, "step": 278910 }, { "epoch": 79.17116094237865, "grad_norm": 0.001263648271560669, "learning_rate": 2.086063014476299e-05, "loss": 1.4666281640529632e-05, "step": 278920 }, { "epoch": 79.17399943230201, "grad_norm": 0.0010746917687356472, "learning_rate": 2.0857791654839627e-05, "loss": 4.020277410745621e-05, "step": 278930 }, { "epoch": 79.17683792222537, "grad_norm": 0.0019057673634961247, "learning_rate": 2.0854953164916265e-05, "loss": 0.00010390076786279679, "step": 278940 }, { "epoch": 79.17967641214874, "grad_norm": 0.05026492476463318, "learning_rate": 2.0852114674992903e-05, "loss": 2.6858784258365632e-05, "step": 278950 }, { "epoch": 79.1825149020721, "grad_norm": 0.028362223878502846, "learning_rate": 2.0849276185069545e-05, "loss": 4.269126802682877e-05, "step": 278960 }, { "epoch": 79.18535339199546, "grad_norm": 0.05944199860095978, "learning_rate": 2.0846437695146183e-05, "loss": 3.035757690668106e-05, "step": 278970 }, { "epoch": 79.18819188191883, "grad_norm": 0.000447695842012763, "learning_rate": 2.084359920522282e-05, "loss": 3.0447542667388915e-05, "step": 278980 }, { "epoch": 79.19103037184217, "grad_norm": 0.0030981875024735928, "learning_rate": 2.0840760715299462e-05, "loss": 4.761107265949249e-05, "step": 278990 }, { "epoch": 79.19386886176554, "grad_norm": 0.004902961198240519, "learning_rate": 2.08379222253761e-05, "loss": 2.04259529709816e-05, "step": 279000 }, { "epoch": 79.19386886176554, "eval_accuracy": 0.9874101863038087, "eval_loss": 0.05383085086941719, "eval_runtime": 35.7049, "eval_samples_per_second": 440.471, "eval_steps_per_second": 6.89, "step": 279000 }, { "epoch": 79.1967073516889, "grad_norm": 0.0071294791996479034, "learning_rate": 2.083508373545274e-05, "loss": 1.676175743341446e-05, "step": 279010 }, { "epoch": 79.19954584161226, "grad_norm": 0.0008928981842473149, "learning_rate": 2.083224524552938e-05, "loss": 2.5777332484722138e-05, "step": 279020 }, { "epoch": 79.20238433153563, "grad_norm": 0.0042856596410274506, "learning_rate": 2.0829406755606017e-05, "loss": 1.8004700541496278e-05, "step": 279030 }, { "epoch": 79.20522282145899, "grad_norm": 0.06514091044664383, "learning_rate": 2.082656826568266e-05, "loss": 4.9593672156333926e-05, "step": 279040 }, { "epoch": 79.20806131138234, "grad_norm": 0.007262133527547121, "learning_rate": 2.0823729775759297e-05, "loss": 7.458366453647614e-05, "step": 279050 }, { "epoch": 79.2108998013057, "grad_norm": 0.026940859854221344, "learning_rate": 2.0820891285835935e-05, "loss": 4.3997541069984436e-05, "step": 279060 }, { "epoch": 79.21373829122906, "grad_norm": 0.0013485682429745793, "learning_rate": 2.0818052795912576e-05, "loss": 1.2793578207492828e-05, "step": 279070 }, { "epoch": 79.21657678115243, "grad_norm": 0.032321639358997345, "learning_rate": 2.0815214305989214e-05, "loss": 4.175640642642975e-05, "step": 279080 }, { "epoch": 79.21941527107579, "grad_norm": 0.005999539513140917, "learning_rate": 2.0812375816065855e-05, "loss": 3.9443932473659514e-05, "step": 279090 }, { "epoch": 79.22225376099915, "grad_norm": 0.0028701145201921463, "learning_rate": 2.0809537326142493e-05, "loss": 2.1709129214286804e-05, "step": 279100 }, { "epoch": 79.22509225092251, "grad_norm": 0.0012323070550337434, "learning_rate": 2.080669883621913e-05, "loss": 1.4862976968288422e-05, "step": 279110 }, { "epoch": 79.22793074084586, "grad_norm": 0.0019530784338712692, "learning_rate": 2.0803860346295773e-05, "loss": 4.777088761329651e-05, "step": 279120 }, { "epoch": 79.23076923076923, "grad_norm": 0.014551818370819092, "learning_rate": 2.080102185637241e-05, "loss": 2.9869377613067627e-05, "step": 279130 }, { "epoch": 79.23360772069259, "grad_norm": 0.009374252520501614, "learning_rate": 2.0798183366449052e-05, "loss": 1.3319216668605804e-05, "step": 279140 }, { "epoch": 79.23644621061595, "grad_norm": 0.004324145615100861, "learning_rate": 2.0795344876525687e-05, "loss": 2.2102519869804384e-05, "step": 279150 }, { "epoch": 79.23928470053932, "grad_norm": 0.0030776599887758493, "learning_rate": 2.0792506386602328e-05, "loss": 9.623542428016663e-05, "step": 279160 }, { "epoch": 79.24212319046268, "grad_norm": 0.003871927037835121, "learning_rate": 2.078966789667897e-05, "loss": 2.3351982235908508e-05, "step": 279170 }, { "epoch": 79.24496168038604, "grad_norm": 0.0018656476167961955, "learning_rate": 2.0786829406755608e-05, "loss": 0.00042268484830856325, "step": 279180 }, { "epoch": 79.24780017030939, "grad_norm": 0.0015719798393547535, "learning_rate": 2.0783990916832246e-05, "loss": 0.0003195330500602722, "step": 279190 }, { "epoch": 79.25063866023275, "grad_norm": 0.00968187116086483, "learning_rate": 2.0781152426908883e-05, "loss": 2.678520977497101e-05, "step": 279200 }, { "epoch": 79.25347715015612, "grad_norm": 0.11787788569927216, "learning_rate": 2.0778313936985525e-05, "loss": 0.00021033529192209243, "step": 279210 }, { "epoch": 79.25631564007948, "grad_norm": 0.0031044811476022005, "learning_rate": 2.0775475447062166e-05, "loss": 2.2934190928936006e-05, "step": 279220 }, { "epoch": 79.25915413000284, "grad_norm": 0.005767439492046833, "learning_rate": 2.07726369571388e-05, "loss": 7.256790995597839e-05, "step": 279230 }, { "epoch": 79.2619926199262, "grad_norm": 0.004233460873365402, "learning_rate": 2.0769798467215442e-05, "loss": 1.1130794882774353e-05, "step": 279240 }, { "epoch": 79.26483110984957, "grad_norm": 0.012478482909500599, "learning_rate": 2.076695997729208e-05, "loss": 3.863070160150528e-05, "step": 279250 }, { "epoch": 79.26766959977292, "grad_norm": 0.000286525406409055, "learning_rate": 2.076412148736872e-05, "loss": 4.2057409882545474e-05, "step": 279260 }, { "epoch": 79.27050808969628, "grad_norm": 0.021550942212343216, "learning_rate": 2.076128299744536e-05, "loss": 2.230815589427948e-05, "step": 279270 }, { "epoch": 79.27334657961964, "grad_norm": 0.0023616282269358635, "learning_rate": 2.0758444507521998e-05, "loss": 1.829788088798523e-05, "step": 279280 }, { "epoch": 79.276185069543, "grad_norm": 0.005156514700502157, "learning_rate": 2.075560601759864e-05, "loss": 2.9848888516426086e-05, "step": 279290 }, { "epoch": 79.27902355946637, "grad_norm": 0.0005217656726017594, "learning_rate": 2.0752767527675277e-05, "loss": 1.0996311902999879e-05, "step": 279300 }, { "epoch": 79.28186204938973, "grad_norm": 0.002479717368260026, "learning_rate": 2.0749929037751918e-05, "loss": 2.0115077495574953e-05, "step": 279310 }, { "epoch": 79.28470053931308, "grad_norm": 0.01578538864850998, "learning_rate": 2.0747090547828556e-05, "loss": 3.0169077217578887e-05, "step": 279320 }, { "epoch": 79.28753902923644, "grad_norm": 0.0013453244464471936, "learning_rate": 2.0744252057905194e-05, "loss": 8.789803832769394e-05, "step": 279330 }, { "epoch": 79.2903775191598, "grad_norm": 0.009222890250384808, "learning_rate": 2.0741413567981836e-05, "loss": 4.962366074323654e-05, "step": 279340 }, { "epoch": 79.29321600908317, "grad_norm": 0.004725913982838392, "learning_rate": 2.0738575078058474e-05, "loss": 1.1538900434970855e-05, "step": 279350 }, { "epoch": 79.29605449900653, "grad_norm": 0.001607635640539229, "learning_rate": 2.073573658813511e-05, "loss": 5.9732794761657713e-05, "step": 279360 }, { "epoch": 79.2988929889299, "grad_norm": 0.02775111421942711, "learning_rate": 2.0732898098211753e-05, "loss": 9.091757237911224e-05, "step": 279370 }, { "epoch": 79.30173147885326, "grad_norm": 0.1665821522474289, "learning_rate": 2.073005960828839e-05, "loss": 4.940330982208252e-05, "step": 279380 }, { "epoch": 79.3045699687766, "grad_norm": 0.03776297718286514, "learning_rate": 2.0727221118365032e-05, "loss": 6.554052233695984e-05, "step": 279390 }, { "epoch": 79.30740845869997, "grad_norm": 0.005161263979971409, "learning_rate": 2.072438262844167e-05, "loss": 4.283152520656586e-05, "step": 279400 }, { "epoch": 79.31024694862333, "grad_norm": 0.0022110261488705873, "learning_rate": 2.072154413851831e-05, "loss": 1.8101930618286132e-05, "step": 279410 }, { "epoch": 79.3130854385467, "grad_norm": 0.0335657075047493, "learning_rate": 2.071870564859495e-05, "loss": 2.0056962966918945e-05, "step": 279420 }, { "epoch": 79.31592392847006, "grad_norm": 0.001846419065259397, "learning_rate": 2.0715867158671588e-05, "loss": 1.5103071928024291e-05, "step": 279430 }, { "epoch": 79.31876241839342, "grad_norm": 0.0015460600843653083, "learning_rate": 2.0713028668748226e-05, "loss": 7.072463631629944e-06, "step": 279440 }, { "epoch": 79.32160090831678, "grad_norm": 0.034176673740148544, "learning_rate": 2.0710190178824864e-05, "loss": 1.5587545931339264e-05, "step": 279450 }, { "epoch": 79.32443939824013, "grad_norm": 0.01461587194353342, "learning_rate": 2.0707351688901505e-05, "loss": 2.8542801737785338e-05, "step": 279460 }, { "epoch": 79.3272778881635, "grad_norm": 0.0008953953511081636, "learning_rate": 2.0704513198978146e-05, "loss": 3.486592322587967e-05, "step": 279470 }, { "epoch": 79.33011637808686, "grad_norm": 0.001151828095316887, "learning_rate": 2.0701674709054784e-05, "loss": 1.2030452489852906e-05, "step": 279480 }, { "epoch": 79.33295486801022, "grad_norm": 0.0009641346405260265, "learning_rate": 2.0698836219131422e-05, "loss": 3.1169690191745755e-05, "step": 279490 }, { "epoch": 79.33579335793358, "grad_norm": 0.011678953655064106, "learning_rate": 2.0695997729208064e-05, "loss": 1.420825719833374e-05, "step": 279500 }, { "epoch": 79.33579335793358, "eval_accuracy": 0.9872194315508361, "eval_loss": 0.05133616924285889, "eval_runtime": 35.9997, "eval_samples_per_second": 436.865, "eval_steps_per_second": 6.833, "step": 279500 }, { "epoch": 79.33863184785695, "grad_norm": 0.0018660506466403604, "learning_rate": 2.0693159239284702e-05, "loss": 1.970045268535614e-05, "step": 279510 }, { "epoch": 79.34147033778031, "grad_norm": 0.003843186656013131, "learning_rate": 2.0690320749361343e-05, "loss": 6.330739706754685e-05, "step": 279520 }, { "epoch": 79.34430882770366, "grad_norm": 0.002167057478800416, "learning_rate": 2.0687482259437978e-05, "loss": 1.992080360651016e-05, "step": 279530 }, { "epoch": 79.34714731762702, "grad_norm": 0.0005134328384883702, "learning_rate": 2.068464376951462e-05, "loss": 1.6012415289878846e-05, "step": 279540 }, { "epoch": 79.34998580755038, "grad_norm": 0.006567187607288361, "learning_rate": 2.068180527959126e-05, "loss": 1.5719421207904814e-05, "step": 279550 }, { "epoch": 79.35282429747375, "grad_norm": 0.00479811942204833, "learning_rate": 2.06789667896679e-05, "loss": 1.9605644047260286e-05, "step": 279560 }, { "epoch": 79.35566278739711, "grad_norm": 0.0006001695874147117, "learning_rate": 2.0676128299744536e-05, "loss": 2.30325385928154e-05, "step": 279570 }, { "epoch": 79.35850127732047, "grad_norm": 0.008555669337511063, "learning_rate": 2.0673289809821174e-05, "loss": 2.2939220070838928e-05, "step": 279580 }, { "epoch": 79.36133976724382, "grad_norm": 0.004219334106892347, "learning_rate": 2.0670451319897816e-05, "loss": 1.6824714839458467e-05, "step": 279590 }, { "epoch": 79.36417825716718, "grad_norm": 0.0006413043010979891, "learning_rate": 2.0667612829974457e-05, "loss": 1.7832405865192415e-05, "step": 279600 }, { "epoch": 79.36701674709055, "grad_norm": 0.010613431222736835, "learning_rate": 2.0664774340051095e-05, "loss": 1.3063289225101471e-05, "step": 279610 }, { "epoch": 79.36985523701391, "grad_norm": 0.0015290819574147463, "learning_rate": 2.0661935850127733e-05, "loss": 1.975279301404953e-05, "step": 279620 }, { "epoch": 79.37269372693727, "grad_norm": 0.0013244481524452567, "learning_rate": 2.065909736020437e-05, "loss": 1.0079890489578248e-05, "step": 279630 }, { "epoch": 79.37553221686063, "grad_norm": 0.035536449402570724, "learning_rate": 2.0656258870281012e-05, "loss": 3.2304227352142335e-05, "step": 279640 }, { "epoch": 79.378370706784, "grad_norm": 0.001251806621439755, "learning_rate": 2.065342038035765e-05, "loss": 1.4643743634223938e-05, "step": 279650 }, { "epoch": 79.38120919670735, "grad_norm": 0.0070482902228832245, "learning_rate": 2.065058189043429e-05, "loss": 4.2363815009593966e-05, "step": 279660 }, { "epoch": 79.38404768663071, "grad_norm": 0.0027785359416157007, "learning_rate": 2.064774340051093e-05, "loss": 9.722448885440826e-06, "step": 279670 }, { "epoch": 79.38688617655407, "grad_norm": 0.001587797305546701, "learning_rate": 2.0644904910587568e-05, "loss": 9.490735828876496e-06, "step": 279680 }, { "epoch": 79.38972466647743, "grad_norm": 0.0007349245715886354, "learning_rate": 2.064206642066421e-05, "loss": 1.8346309661865235e-05, "step": 279690 }, { "epoch": 79.3925631564008, "grad_norm": 0.005375022068619728, "learning_rate": 2.0639227930740847e-05, "loss": 1.1692196130752563e-05, "step": 279700 }, { "epoch": 79.39540164632416, "grad_norm": 0.007989154197275639, "learning_rate": 2.0636389440817485e-05, "loss": 1.7652846872806548e-05, "step": 279710 }, { "epoch": 79.39824013624752, "grad_norm": 0.0032500470988452435, "learning_rate": 2.0633550950894127e-05, "loss": 0.00043697450309991835, "step": 279720 }, { "epoch": 79.40107862617087, "grad_norm": 0.0014336813474074006, "learning_rate": 2.0630712460970765e-05, "loss": 1.261737197637558e-05, "step": 279730 }, { "epoch": 79.40391711609423, "grad_norm": 0.000976408482529223, "learning_rate": 2.0627873971047403e-05, "loss": 0.0032129906117916106, "step": 279740 }, { "epoch": 79.4067556060176, "grad_norm": 0.003477465594187379, "learning_rate": 2.0625035481124044e-05, "loss": 1.4651380479335784e-05, "step": 279750 }, { "epoch": 79.40959409594096, "grad_norm": 0.0018173020798712969, "learning_rate": 2.0622196991200682e-05, "loss": 1.962278038263321e-05, "step": 279760 }, { "epoch": 79.41243258586432, "grad_norm": 0.0018691527657210827, "learning_rate": 2.0619358501277323e-05, "loss": 1.9497424364089966e-05, "step": 279770 }, { "epoch": 79.41527107578769, "grad_norm": 0.4562208354473114, "learning_rate": 2.061652001135396e-05, "loss": 7.021613419055939e-05, "step": 279780 }, { "epoch": 79.41810956571103, "grad_norm": 0.004590698983520269, "learning_rate": 2.06136815214306e-05, "loss": 3.1745806336402896e-05, "step": 279790 }, { "epoch": 79.4209480556344, "grad_norm": 0.020333638414740562, "learning_rate": 2.061084303150724e-05, "loss": 2.2323243319988252e-05, "step": 279800 }, { "epoch": 79.42378654555776, "grad_norm": 0.0009955576388165355, "learning_rate": 2.060800454158388e-05, "loss": 2.1516904234886168e-05, "step": 279810 }, { "epoch": 79.42662503548112, "grad_norm": 0.003786770161241293, "learning_rate": 2.0605166051660517e-05, "loss": 4.1857175529003146e-05, "step": 279820 }, { "epoch": 79.42946352540449, "grad_norm": 0.0033055846579372883, "learning_rate": 2.0602327561737155e-05, "loss": 1.8362514674663542e-05, "step": 279830 }, { "epoch": 79.43230201532785, "grad_norm": 0.004232621751725674, "learning_rate": 2.0599489071813796e-05, "loss": 2.0844489336013793e-05, "step": 279840 }, { "epoch": 79.43514050525121, "grad_norm": 0.032166365534067154, "learning_rate": 2.0596650581890437e-05, "loss": 2.0544975996017456e-05, "step": 279850 }, { "epoch": 79.43797899517456, "grad_norm": 0.021454889327287674, "learning_rate": 2.0593812091967075e-05, "loss": 3.215502947568893e-05, "step": 279860 }, { "epoch": 79.44081748509792, "grad_norm": 0.0026832139119505882, "learning_rate": 2.0590973602043713e-05, "loss": 6.410162895917893e-05, "step": 279870 }, { "epoch": 79.44365597502129, "grad_norm": 0.0027363246772438288, "learning_rate": 2.058813511212035e-05, "loss": 8.635967969894409e-06, "step": 279880 }, { "epoch": 79.44649446494465, "grad_norm": 0.0009926892817020416, "learning_rate": 2.0585296622196993e-05, "loss": 9.446591138839722e-06, "step": 279890 }, { "epoch": 79.44933295486801, "grad_norm": 0.01196245290338993, "learning_rate": 2.0582458132273634e-05, "loss": 1.9237957894802092e-05, "step": 279900 }, { "epoch": 79.45217144479138, "grad_norm": 0.02925526350736618, "learning_rate": 2.057961964235027e-05, "loss": 3.098752349615097e-05, "step": 279910 }, { "epoch": 79.45500993471474, "grad_norm": 0.2633715271949768, "learning_rate": 2.057678115242691e-05, "loss": 5.832649767398834e-05, "step": 279920 }, { "epoch": 79.45784842463809, "grad_norm": 0.001168263959698379, "learning_rate": 2.0573942662503548e-05, "loss": 1.0318495333194732e-05, "step": 279930 }, { "epoch": 79.46068691456145, "grad_norm": 0.002478854265064001, "learning_rate": 2.057110417258019e-05, "loss": 1.7581693828105927e-05, "step": 279940 }, { "epoch": 79.46352540448481, "grad_norm": 0.005781521555036306, "learning_rate": 2.0568265682656827e-05, "loss": 0.0009434908628463745, "step": 279950 }, { "epoch": 79.46636389440818, "grad_norm": 0.006472598295658827, "learning_rate": 2.0565427192733465e-05, "loss": 3.887172788381576e-05, "step": 279960 }, { "epoch": 79.46920238433154, "grad_norm": 0.009691625833511353, "learning_rate": 2.0562872551802442e-05, "loss": 0.006930913031101227, "step": 279970 }, { "epoch": 79.4720408742549, "grad_norm": 0.004641084000468254, "learning_rate": 2.0560034061879083e-05, "loss": 9.407103061676025e-06, "step": 279980 }, { "epoch": 79.47487936417826, "grad_norm": 0.025216015055775642, "learning_rate": 2.055719557195572e-05, "loss": 3.4028850495815274e-05, "step": 279990 }, { "epoch": 79.47771785410161, "grad_norm": 15.801149368286133, "learning_rate": 2.055435708203236e-05, "loss": 0.004272207245230674, "step": 280000 }, { "epoch": 79.47771785410161, "eval_accuracy": 0.9860749030330006, "eval_loss": 0.05783560872077942, "eval_runtime": 35.7409, "eval_samples_per_second": 440.028, "eval_steps_per_second": 6.883, "step": 280000 }, { "epoch": 79.48055634402498, "grad_norm": 0.03740852326154709, "learning_rate": 2.0551518592108997e-05, "loss": 0.00017086174339056014, "step": 280010 }, { "epoch": 79.48339483394834, "grad_norm": 0.01793588697910309, "learning_rate": 2.054868010218564e-05, "loss": 0.00024270135909318924, "step": 280020 }, { "epoch": 79.4862333238717, "grad_norm": 0.008082223124802113, "learning_rate": 2.054584161226228e-05, "loss": 7.197372615337372e-05, "step": 280030 }, { "epoch": 79.48907181379506, "grad_norm": 0.01153172180056572, "learning_rate": 2.0543003122338918e-05, "loss": 4.047621041536331e-05, "step": 280040 }, { "epoch": 79.49191030371843, "grad_norm": 0.02636824920773506, "learning_rate": 2.0540164632415556e-05, "loss": 3.0922889709472654e-05, "step": 280050 }, { "epoch": 79.49474879364178, "grad_norm": 0.0062414612621068954, "learning_rate": 2.0537326142492194e-05, "loss": 7.150098681449891e-05, "step": 280060 }, { "epoch": 79.49758728356514, "grad_norm": 0.0018713782774284482, "learning_rate": 2.0534487652568835e-05, "loss": 1.3360939919948579e-05, "step": 280070 }, { "epoch": 79.5004257734885, "grad_norm": 0.07793035358190536, "learning_rate": 2.0531649162645473e-05, "loss": 5.8577023446559905e-05, "step": 280080 }, { "epoch": 79.50326426341186, "grad_norm": 0.010270140133798122, "learning_rate": 2.052881067272211e-05, "loss": 2.394411712884903e-05, "step": 280090 }, { "epoch": 79.50610275333523, "grad_norm": 0.019139675423502922, "learning_rate": 2.0525972182798753e-05, "loss": 0.00013203676789999008, "step": 280100 }, { "epoch": 79.50894124325859, "grad_norm": 0.011400614865124226, "learning_rate": 2.052313369287539e-05, "loss": 0.00035374443978071214, "step": 280110 }, { "epoch": 79.51177973318195, "grad_norm": 0.0039251395501196384, "learning_rate": 2.0520295202952032e-05, "loss": 5.8548152446746825e-05, "step": 280120 }, { "epoch": 79.5146182231053, "grad_norm": 0.0674012079834938, "learning_rate": 2.051745671302867e-05, "loss": 9.868070483207703e-05, "step": 280130 }, { "epoch": 79.51745671302866, "grad_norm": 0.06772097945213318, "learning_rate": 2.0514618223105308e-05, "loss": 0.00012590624392032622, "step": 280140 }, { "epoch": 79.52029520295203, "grad_norm": 0.011552395299077034, "learning_rate": 2.051177973318195e-05, "loss": 0.0001557065173983574, "step": 280150 }, { "epoch": 79.52313369287539, "grad_norm": 0.008434305898845196, "learning_rate": 2.0508941243258587e-05, "loss": 7.555298507213592e-05, "step": 280160 }, { "epoch": 79.52597218279875, "grad_norm": 2.944122791290283, "learning_rate": 2.0506102753335225e-05, "loss": 0.000375048816204071, "step": 280170 }, { "epoch": 79.52881067272212, "grad_norm": 0.0869642049074173, "learning_rate": 2.0503264263411867e-05, "loss": 3.114603459835053e-05, "step": 280180 }, { "epoch": 79.53164916264548, "grad_norm": 0.024759335443377495, "learning_rate": 2.0500425773488505e-05, "loss": 3.2576359808444975e-05, "step": 280190 }, { "epoch": 79.53448765256883, "grad_norm": 0.005790839437395334, "learning_rate": 2.0497587283565146e-05, "loss": 7.82577320933342e-05, "step": 280200 }, { "epoch": 79.53732614249219, "grad_norm": 0.004166852217167616, "learning_rate": 2.0494748793641784e-05, "loss": 3.9586052298545835e-05, "step": 280210 }, { "epoch": 79.54016463241555, "grad_norm": 0.001389546087011695, "learning_rate": 2.0491910303718422e-05, "loss": 0.0001110197976231575, "step": 280220 }, { "epoch": 79.54300312233892, "grad_norm": 0.0028377906419336796, "learning_rate": 2.0489071813795063e-05, "loss": 2.3861415684223175e-05, "step": 280230 }, { "epoch": 79.54584161226228, "grad_norm": 0.0017485716380178928, "learning_rate": 2.04862333238717e-05, "loss": 1.8429197371006013e-05, "step": 280240 }, { "epoch": 79.54868010218564, "grad_norm": 0.0010637976229190826, "learning_rate": 2.0483394833948343e-05, "loss": 2.1461956202983855e-05, "step": 280250 }, { "epoch": 79.551518592109, "grad_norm": 0.20919594168663025, "learning_rate": 2.0480556344024977e-05, "loss": 4.21036034822464e-05, "step": 280260 }, { "epoch": 79.55435708203235, "grad_norm": 0.0037901715841144323, "learning_rate": 2.047771785410162e-05, "loss": 1.5153177082538605e-05, "step": 280270 }, { "epoch": 79.55719557195572, "grad_norm": 0.028598062694072723, "learning_rate": 2.047487936417826e-05, "loss": 2.4093687534332277e-05, "step": 280280 }, { "epoch": 79.56003406187908, "grad_norm": 0.003065940923988819, "learning_rate": 2.0472040874254898e-05, "loss": 2.1513737738132477e-05, "step": 280290 }, { "epoch": 79.56287255180244, "grad_norm": 0.0022703029680997133, "learning_rate": 2.0469202384331536e-05, "loss": 1.6593560576438904e-05, "step": 280300 }, { "epoch": 79.5657110417258, "grad_norm": 0.010565439239144325, "learning_rate": 2.0466363894408174e-05, "loss": 1.2682192027568817e-05, "step": 280310 }, { "epoch": 79.56854953164917, "grad_norm": 0.0030709034763276577, "learning_rate": 2.0463525404484815e-05, "loss": 3.27644869685173e-05, "step": 280320 }, { "epoch": 79.57138802157252, "grad_norm": 0.0019100293284282088, "learning_rate": 2.0460686914561457e-05, "loss": 1.430753618478775e-05, "step": 280330 }, { "epoch": 79.57422651149588, "grad_norm": 0.0024456626269966364, "learning_rate": 2.045784842463809e-05, "loss": 1.6803108155727387e-05, "step": 280340 }, { "epoch": 79.57706500141924, "grad_norm": 0.018074771389365196, "learning_rate": 2.0455009934714733e-05, "loss": 0.0003667498007416725, "step": 280350 }, { "epoch": 79.5799034913426, "grad_norm": 0.008788328617811203, "learning_rate": 2.045217144479137e-05, "loss": 0.00018761400133371353, "step": 280360 }, { "epoch": 79.58274198126597, "grad_norm": 0.006770283915102482, "learning_rate": 2.0449332954868012e-05, "loss": 3.4153833985328676e-05, "step": 280370 }, { "epoch": 79.58558047118933, "grad_norm": 0.003642918076366186, "learning_rate": 2.044649446494465e-05, "loss": 0.00012674201279878617, "step": 280380 }, { "epoch": 79.5884189611127, "grad_norm": 0.0019158173818141222, "learning_rate": 2.0443655975021288e-05, "loss": 0.00010042265057563782, "step": 280390 }, { "epoch": 79.59125745103604, "grad_norm": 0.018876979127526283, "learning_rate": 2.044081748509793e-05, "loss": 2.341456711292267e-05, "step": 280400 }, { "epoch": 79.5940959409594, "grad_norm": 0.01628878526389599, "learning_rate": 2.0437978995174568e-05, "loss": 2.0314007997512817e-05, "step": 280410 }, { "epoch": 79.59693443088277, "grad_norm": 10.316728591918945, "learning_rate": 2.043514050525121e-05, "loss": 0.004426739364862442, "step": 280420 }, { "epoch": 79.59977292080613, "grad_norm": 0.0010587360011413693, "learning_rate": 2.0432302015327847e-05, "loss": 2.2232718765735627e-05, "step": 280430 }, { "epoch": 79.6026114107295, "grad_norm": 0.012364261783659458, "learning_rate": 2.0429463525404485e-05, "loss": 3.5625137388706205e-05, "step": 280440 }, { "epoch": 79.60544990065286, "grad_norm": 0.001473550801165402, "learning_rate": 2.0426625035481126e-05, "loss": 0.00012633204460144043, "step": 280450 }, { "epoch": 79.60828839057622, "grad_norm": 0.019319994375109673, "learning_rate": 2.0423786545557764e-05, "loss": 3.45500186085701e-05, "step": 280460 }, { "epoch": 79.61112688049957, "grad_norm": 0.0016043243231251836, "learning_rate": 2.0420948055634402e-05, "loss": 4.307031631469727e-05, "step": 280470 }, { "epoch": 79.61396537042293, "grad_norm": 0.0035444123204797506, "learning_rate": 2.0418109565711044e-05, "loss": 1.6702152788639068e-05, "step": 280480 }, { "epoch": 79.6168038603463, "grad_norm": 0.0026847035624086857, "learning_rate": 2.041527107578768e-05, "loss": 3.483220934867859e-05, "step": 280490 }, { "epoch": 79.61964235026966, "grad_norm": 0.012612566351890564, "learning_rate": 2.0412432585864323e-05, "loss": 1.9737333059310912e-05, "step": 280500 }, { "epoch": 79.61964235026966, "eval_accuracy": 0.985884148280028, "eval_loss": 0.054660532623529434, "eval_runtime": 35.3948, "eval_samples_per_second": 444.33, "eval_steps_per_second": 6.95, "step": 280500 }, { "epoch": 79.62248084019302, "grad_norm": 0.021207889541983604, "learning_rate": 2.040959409594096e-05, "loss": 4.0889158844947816e-05, "step": 280510 }, { "epoch": 79.62531933011638, "grad_norm": 0.0030105668120086193, "learning_rate": 2.04067556060176e-05, "loss": 1.6140565276145934e-05, "step": 280520 }, { "epoch": 79.62815782003973, "grad_norm": 0.00041594335925765336, "learning_rate": 2.040391711609424e-05, "loss": 4.9044564366340634e-05, "step": 280530 }, { "epoch": 79.6309963099631, "grad_norm": 0.024792898446321487, "learning_rate": 2.0401078626170878e-05, "loss": 2.5921687483787537e-05, "step": 280540 }, { "epoch": 79.63383479988646, "grad_norm": 0.003434069687500596, "learning_rate": 2.0398240136247516e-05, "loss": 4.277452826499939e-05, "step": 280550 }, { "epoch": 79.63667328980982, "grad_norm": 0.0009229037677869201, "learning_rate": 2.0395401646324154e-05, "loss": 3.0406005680561066e-05, "step": 280560 }, { "epoch": 79.63951177973318, "grad_norm": 0.049041785299777985, "learning_rate": 2.0392563156400796e-05, "loss": 2.2408924996852876e-05, "step": 280570 }, { "epoch": 79.64235026965655, "grad_norm": 0.0020957086235284805, "learning_rate": 2.0389724666477437e-05, "loss": 1.6347691416740418e-05, "step": 280580 }, { "epoch": 79.64518875957991, "grad_norm": 0.0024504626635462046, "learning_rate": 2.0386886176554075e-05, "loss": 2.2073276340961456e-05, "step": 280590 }, { "epoch": 79.64802724950326, "grad_norm": 0.0032766021322458982, "learning_rate": 2.0384047686630713e-05, "loss": 0.00011635720729827881, "step": 280600 }, { "epoch": 79.65086573942662, "grad_norm": 0.011558557860553265, "learning_rate": 2.038120919670735e-05, "loss": 0.0001408921554684639, "step": 280610 }, { "epoch": 79.65370422934998, "grad_norm": 0.0038728825747966766, "learning_rate": 2.0378370706783992e-05, "loss": 2.1816790103912352e-05, "step": 280620 }, { "epoch": 79.65654271927335, "grad_norm": 0.007617305964231491, "learning_rate": 2.0375532216860634e-05, "loss": 3.658086061477661e-05, "step": 280630 }, { "epoch": 79.65938120919671, "grad_norm": 0.024363892152905464, "learning_rate": 2.037269372693727e-05, "loss": 0.0018382223322987556, "step": 280640 }, { "epoch": 79.66221969912007, "grad_norm": 0.01550520583987236, "learning_rate": 2.036985523701391e-05, "loss": 1.6685761511325837e-05, "step": 280650 }, { "epoch": 79.66505818904344, "grad_norm": 0.06573118269443512, "learning_rate": 2.0367016747090548e-05, "loss": 0.001066979207098484, "step": 280660 }, { "epoch": 79.66789667896678, "grad_norm": 0.014403525739908218, "learning_rate": 2.036417825716719e-05, "loss": 9.518973529338836e-05, "step": 280670 }, { "epoch": 79.67073516889015, "grad_norm": 0.010319763794541359, "learning_rate": 2.0361339767243827e-05, "loss": 0.00011127442121505737, "step": 280680 }, { "epoch": 79.67357365881351, "grad_norm": 0.017561862245202065, "learning_rate": 2.0358501277320465e-05, "loss": 3.9342232048511506e-05, "step": 280690 }, { "epoch": 79.67641214873687, "grad_norm": 0.10764268040657043, "learning_rate": 2.0355662787397106e-05, "loss": 8.020512759685517e-05, "step": 280700 }, { "epoch": 79.67925063866024, "grad_norm": 0.004118610639125109, "learning_rate": 2.0352824297473744e-05, "loss": 1.0691024363040924e-05, "step": 280710 }, { "epoch": 79.6820891285836, "grad_norm": 0.014690232463181019, "learning_rate": 2.0349985807550386e-05, "loss": 3.6483816802501676e-05, "step": 280720 }, { "epoch": 79.68492761850696, "grad_norm": 0.016232805326581, "learning_rate": 2.0347147317627024e-05, "loss": 1.575108617544174e-05, "step": 280730 }, { "epoch": 79.68776610843031, "grad_norm": 0.017561305314302444, "learning_rate": 2.0344308827703662e-05, "loss": 2.284068614244461e-05, "step": 280740 }, { "epoch": 79.69060459835367, "grad_norm": 0.03252040594816208, "learning_rate": 2.0341470337780303e-05, "loss": 0.00022871680557727813, "step": 280750 }, { "epoch": 79.69344308827704, "grad_norm": 0.006984027102589607, "learning_rate": 2.033863184785694e-05, "loss": 0.00013995151966810225, "step": 280760 }, { "epoch": 79.6962815782004, "grad_norm": 0.014164007268846035, "learning_rate": 2.033579335793358e-05, "loss": 1.5300512313842773e-05, "step": 280770 }, { "epoch": 79.69912006812376, "grad_norm": 1.4045487642288208, "learning_rate": 2.033295486801022e-05, "loss": 0.00034192707389593127, "step": 280780 }, { "epoch": 79.70195855804712, "grad_norm": 0.0032222853042185307, "learning_rate": 2.033011637808686e-05, "loss": 0.008452010154724122, "step": 280790 }, { "epoch": 79.70479704797047, "grad_norm": 0.7175098061561584, "learning_rate": 2.03272778881635e-05, "loss": 0.0014939181506633758, "step": 280800 }, { "epoch": 79.70763553789384, "grad_norm": 0.0757066160440445, "learning_rate": 2.0324439398240134e-05, "loss": 0.0002550097182393074, "step": 280810 }, { "epoch": 79.7104740278172, "grad_norm": 0.006987649016082287, "learning_rate": 2.0321600908316776e-05, "loss": 0.00020536985248327256, "step": 280820 }, { "epoch": 79.71331251774056, "grad_norm": 0.0038770849350839853, "learning_rate": 2.0318762418393417e-05, "loss": 2.432558685541153e-05, "step": 280830 }, { "epoch": 79.71615100766392, "grad_norm": 0.07005435228347778, "learning_rate": 2.0315923928470055e-05, "loss": 0.0001894606277346611, "step": 280840 }, { "epoch": 79.71898949758729, "grad_norm": 0.002425870858132839, "learning_rate": 2.0313085438546693e-05, "loss": 0.0002046145498752594, "step": 280850 }, { "epoch": 79.72182798751065, "grad_norm": 0.0034499720204621553, "learning_rate": 2.031024694862333e-05, "loss": 3.5179778933525085e-05, "step": 280860 }, { "epoch": 79.724666477434, "grad_norm": 0.053830835968256, "learning_rate": 2.0307408458699973e-05, "loss": 0.0011140096932649613, "step": 280870 }, { "epoch": 79.72750496735736, "grad_norm": 0.03295903280377388, "learning_rate": 2.0304569968776614e-05, "loss": 0.0002891272306442261, "step": 280880 }, { "epoch": 79.73034345728072, "grad_norm": 0.047222573310136795, "learning_rate": 2.0301731478853252e-05, "loss": 6.411410868167877e-05, "step": 280890 }, { "epoch": 79.73318194720409, "grad_norm": 0.32887181639671326, "learning_rate": 2.029889298892989e-05, "loss": 9.746141731739044e-05, "step": 280900 }, { "epoch": 79.73602043712745, "grad_norm": 0.0014275122666731477, "learning_rate": 2.0296054499006528e-05, "loss": 0.0002510292455554008, "step": 280910 }, { "epoch": 79.73885892705081, "grad_norm": 0.005220280960202217, "learning_rate": 2.029321600908317e-05, "loss": 0.00011449139565229416, "step": 280920 }, { "epoch": 79.74169741697418, "grad_norm": 0.0045799496583640575, "learning_rate": 2.0290377519159807e-05, "loss": 3.5843253135681154e-05, "step": 280930 }, { "epoch": 79.74453590689753, "grad_norm": 0.13805986940860748, "learning_rate": 2.0287539029236445e-05, "loss": 0.0001555768772959709, "step": 280940 }, { "epoch": 79.74737439682089, "grad_norm": 0.0023390355054289103, "learning_rate": 2.0284700539313087e-05, "loss": 4.224199801683426e-05, "step": 280950 }, { "epoch": 79.75021288674425, "grad_norm": 0.006875454913824797, "learning_rate": 2.0281862049389725e-05, "loss": 2.571437507867813e-05, "step": 280960 }, { "epoch": 79.75305137666761, "grad_norm": 0.0009254126925952733, "learning_rate": 2.0279023559466366e-05, "loss": 5.0733424723148345e-05, "step": 280970 }, { "epoch": 79.75588986659098, "grad_norm": 0.013288444839417934, "learning_rate": 2.0276185069543004e-05, "loss": 2.3442879319190978e-05, "step": 280980 }, { "epoch": 79.75872835651434, "grad_norm": 0.004856517072767019, "learning_rate": 2.0273346579619642e-05, "loss": 1.3937614858150482e-05, "step": 280990 }, { "epoch": 79.76156684643769, "grad_norm": 0.0031798218842595816, "learning_rate": 2.0270508089696283e-05, "loss": 4.571676254272461e-05, "step": 281000 }, { "epoch": 79.76156684643769, "eval_accuracy": 0.9863928276212882, "eval_loss": 0.052759479731321335, "eval_runtime": 36.1331, "eval_samples_per_second": 435.252, "eval_steps_per_second": 6.808, "step": 281000 }, { "epoch": 79.76440533636105, "grad_norm": 0.0063055818900465965, "learning_rate": 2.026766959977292e-05, "loss": 5.324259400367737e-05, "step": 281010 }, { "epoch": 79.76724382628441, "grad_norm": 0.018012546002864838, "learning_rate": 2.026483110984956e-05, "loss": 2.4958699941635133e-05, "step": 281020 }, { "epoch": 79.77008231620778, "grad_norm": 0.006126358173787594, "learning_rate": 2.02619926199262e-05, "loss": 3.387480974197388e-05, "step": 281030 }, { "epoch": 79.77292080613114, "grad_norm": 0.0019068910041823983, "learning_rate": 2.025915413000284e-05, "loss": 1.6984157264232634e-05, "step": 281040 }, { "epoch": 79.7757592960545, "grad_norm": 0.006047877017408609, "learning_rate": 2.025631564007948e-05, "loss": 2.374090254306793e-05, "step": 281050 }, { "epoch": 79.77859778597787, "grad_norm": 0.003345098113641143, "learning_rate": 2.0253477150156118e-05, "loss": 4.855450242757797e-05, "step": 281060 }, { "epoch": 79.78143627590121, "grad_norm": 0.003848272142931819, "learning_rate": 2.0250638660232756e-05, "loss": 2.6218406856060027e-05, "step": 281070 }, { "epoch": 79.78427476582458, "grad_norm": 0.0015606610104441643, "learning_rate": 2.0247800170309397e-05, "loss": 2.1225400269031523e-05, "step": 281080 }, { "epoch": 79.78711325574794, "grad_norm": 0.0028762647416442633, "learning_rate": 2.0244961680386035e-05, "loss": 2.3475848138332367e-05, "step": 281090 }, { "epoch": 79.7899517456713, "grad_norm": 0.24010424315929413, "learning_rate": 2.0242123190462677e-05, "loss": 4.9711577594280244e-05, "step": 281100 }, { "epoch": 79.79279023559467, "grad_norm": 0.003680333960801363, "learning_rate": 2.023928470053931e-05, "loss": 2.0571984350681305e-05, "step": 281110 }, { "epoch": 79.79562872551803, "grad_norm": 0.009224876761436462, "learning_rate": 2.0236446210615953e-05, "loss": 2.4433806538581847e-05, "step": 281120 }, { "epoch": 79.79846721544139, "grad_norm": 0.0016138376668095589, "learning_rate": 2.0233607720692594e-05, "loss": 1.637618988752365e-05, "step": 281130 }, { "epoch": 79.80130570536474, "grad_norm": 0.010451880283653736, "learning_rate": 2.0230769230769232e-05, "loss": 2.260003238916397e-05, "step": 281140 }, { "epoch": 79.8041441952881, "grad_norm": 0.003466714173555374, "learning_rate": 2.022793074084587e-05, "loss": 0.00011829584836959839, "step": 281150 }, { "epoch": 79.80698268521147, "grad_norm": 0.004533274564892054, "learning_rate": 2.0225092250922508e-05, "loss": 2.6835128664970398e-05, "step": 281160 }, { "epoch": 79.80982117513483, "grad_norm": 0.013917772099375725, "learning_rate": 2.022225376099915e-05, "loss": 4.331506788730621e-05, "step": 281170 }, { "epoch": 79.81265966505819, "grad_norm": 0.18858739733695984, "learning_rate": 2.021941527107579e-05, "loss": 5.0350278615951535e-05, "step": 281180 }, { "epoch": 79.81549815498155, "grad_norm": 0.00029219919815659523, "learning_rate": 2.021657678115243e-05, "loss": 1.8887408077716828e-05, "step": 281190 }, { "epoch": 79.81833664490492, "grad_norm": 0.0019326237961649895, "learning_rate": 2.0213738291229067e-05, "loss": 2.7888827025890352e-05, "step": 281200 }, { "epoch": 79.82117513482827, "grad_norm": 0.07431917637586594, "learning_rate": 2.0210899801305708e-05, "loss": 2.8841011226177215e-05, "step": 281210 }, { "epoch": 79.82401362475163, "grad_norm": 0.00909978523850441, "learning_rate": 2.0208061311382346e-05, "loss": 3.265943378210068e-05, "step": 281220 }, { "epoch": 79.82685211467499, "grad_norm": 0.003120960434898734, "learning_rate": 2.0205222821458984e-05, "loss": 1.7009861767292023e-05, "step": 281230 }, { "epoch": 79.82969060459835, "grad_norm": 0.011328312568366528, "learning_rate": 2.0202384331535622e-05, "loss": 1.4226697385311126e-05, "step": 281240 }, { "epoch": 79.83252909452172, "grad_norm": 0.02596723660826683, "learning_rate": 2.0199545841612263e-05, "loss": 2.843756228685379e-05, "step": 281250 }, { "epoch": 79.83536758444508, "grad_norm": 0.010840503498911858, "learning_rate": 2.0196707351688905e-05, "loss": 3.67235392332077e-05, "step": 281260 }, { "epoch": 79.83820607436843, "grad_norm": 0.0025573307648301125, "learning_rate": 2.0193868861765543e-05, "loss": 1.6934610903263093e-05, "step": 281270 }, { "epoch": 79.84104456429179, "grad_norm": 0.0021249756682664156, "learning_rate": 2.019103037184218e-05, "loss": 1.9726529717445374e-05, "step": 281280 }, { "epoch": 79.84388305421515, "grad_norm": 0.0008747928077355027, "learning_rate": 2.018819188191882e-05, "loss": 2.7336739003658293e-05, "step": 281290 }, { "epoch": 79.84672154413852, "grad_norm": 0.0008977303514257073, "learning_rate": 2.018535339199546e-05, "loss": 1.1324137449264527e-05, "step": 281300 }, { "epoch": 79.84956003406188, "grad_norm": 0.0008700024336576462, "learning_rate": 2.01825149020721e-05, "loss": 1.5820376574993134e-05, "step": 281310 }, { "epoch": 79.85239852398524, "grad_norm": 0.003278435440734029, "learning_rate": 2.0179676412148736e-05, "loss": 1.3683177530765534e-05, "step": 281320 }, { "epoch": 79.8552370139086, "grad_norm": 0.005296408198773861, "learning_rate": 2.0176837922225377e-05, "loss": 6.616152822971343e-05, "step": 281330 }, { "epoch": 79.85807550383196, "grad_norm": 0.06847696006298065, "learning_rate": 2.0173999432302015e-05, "loss": 3.1474605202674864e-05, "step": 281340 }, { "epoch": 79.86091399375532, "grad_norm": 0.0019954347517341375, "learning_rate": 2.0171160942378657e-05, "loss": 2.111140638589859e-05, "step": 281350 }, { "epoch": 79.86375248367868, "grad_norm": 0.0020280799362808466, "learning_rate": 2.0168322452455295e-05, "loss": 1.8092989921569825e-05, "step": 281360 }, { "epoch": 79.86659097360204, "grad_norm": 0.001603327807970345, "learning_rate": 2.0165483962531933e-05, "loss": 2.5014951825141905e-05, "step": 281370 }, { "epoch": 79.8694294635254, "grad_norm": 0.004573903512209654, "learning_rate": 2.0162645472608574e-05, "loss": 6.259679794311524e-05, "step": 281380 }, { "epoch": 79.87226795344877, "grad_norm": 0.0011022959370166063, "learning_rate": 2.0159806982685212e-05, "loss": 9.468570351600646e-06, "step": 281390 }, { "epoch": 79.87510644337213, "grad_norm": 0.02143675461411476, "learning_rate": 2.015696849276185e-05, "loss": 2.1507777273654937e-05, "step": 281400 }, { "epoch": 79.87794493329548, "grad_norm": 0.0045280177146196365, "learning_rate": 2.015413000283849e-05, "loss": 1.299045979976654e-05, "step": 281410 }, { "epoch": 79.88078342321884, "grad_norm": 0.0034920780453830957, "learning_rate": 2.015129151291513e-05, "loss": 4.222095012664795e-05, "step": 281420 }, { "epoch": 79.8836219131422, "grad_norm": 0.02169874869287014, "learning_rate": 2.014845302299177e-05, "loss": 2.140142023563385e-05, "step": 281430 }, { "epoch": 79.88646040306557, "grad_norm": 0.001210011076182127, "learning_rate": 2.014561453306841e-05, "loss": 2.692136913537979e-05, "step": 281440 }, { "epoch": 79.88929889298893, "grad_norm": 0.0010772020323202014, "learning_rate": 2.0142776043145047e-05, "loss": 1.1344440281391144e-05, "step": 281450 }, { "epoch": 79.8921373829123, "grad_norm": 0.019970854744315147, "learning_rate": 2.0139937553221688e-05, "loss": 2.6128068566322326e-05, "step": 281460 }, { "epoch": 79.89497587283566, "grad_norm": 0.001136095612309873, "learning_rate": 2.0137099063298326e-05, "loss": 1.1313706636428833e-05, "step": 281470 }, { "epoch": 79.89781436275901, "grad_norm": 0.006394742522388697, "learning_rate": 2.0134260573374968e-05, "loss": 3.0530430376529696e-05, "step": 281480 }, { "epoch": 79.90065285268237, "grad_norm": 0.010350468568503857, "learning_rate": 2.0131422083451602e-05, "loss": 1.911874860525131e-05, "step": 281490 }, { "epoch": 79.90349134260573, "grad_norm": 0.0007871090201660991, "learning_rate": 2.0128583593528244e-05, "loss": 1.7582625150680543e-05, "step": 281500 }, { "epoch": 79.90349134260573, "eval_accuracy": 0.9872830164684937, "eval_loss": 0.05098092928528786, "eval_runtime": 36.0235, "eval_samples_per_second": 436.576, "eval_steps_per_second": 6.829, "step": 281500 }, { "epoch": 79.9063298325291, "grad_norm": 0.0015984720084816217, "learning_rate": 2.0125745103604885e-05, "loss": 1.7588213086128236e-05, "step": 281510 }, { "epoch": 79.90916832245246, "grad_norm": 0.006060895510017872, "learning_rate": 2.0122906613681523e-05, "loss": 2.6949122548103334e-05, "step": 281520 }, { "epoch": 79.91200681237582, "grad_norm": 0.0033870371989905834, "learning_rate": 2.012006812375816e-05, "loss": 1.3600476086139679e-05, "step": 281530 }, { "epoch": 79.91484530229917, "grad_norm": 0.044133082032203674, "learning_rate": 2.01172296338348e-05, "loss": 2.3464113473892213e-05, "step": 281540 }, { "epoch": 79.91768379222253, "grad_norm": 0.0033877426758408546, "learning_rate": 2.011439114391144e-05, "loss": 1.5476718544960022e-05, "step": 281550 }, { "epoch": 79.9205222821459, "grad_norm": 0.005172175820916891, "learning_rate": 2.011155265398808e-05, "loss": 1.452658325433731e-05, "step": 281560 }, { "epoch": 79.92336077206926, "grad_norm": 0.00043727015145123005, "learning_rate": 2.010871416406472e-05, "loss": 1.4653243124485015e-05, "step": 281570 }, { "epoch": 79.92619926199262, "grad_norm": 0.0038819455076009035, "learning_rate": 2.0105875674141358e-05, "loss": 1.8327683210372925e-05, "step": 281580 }, { "epoch": 79.92903775191598, "grad_norm": 0.001962891547009349, "learning_rate": 2.0103037184217996e-05, "loss": 1.1024996638298035e-05, "step": 281590 }, { "epoch": 79.93187624183935, "grad_norm": 0.00682072201743722, "learning_rate": 2.0100198694294637e-05, "loss": 2.2123195230960845e-05, "step": 281600 }, { "epoch": 79.9347147317627, "grad_norm": 0.0022590344306081533, "learning_rate": 2.0097360204371275e-05, "loss": 2.0315684378147125e-05, "step": 281610 }, { "epoch": 79.93755322168606, "grad_norm": 0.08234090358018875, "learning_rate": 2.0094521714447913e-05, "loss": 2.969764173030853e-05, "step": 281620 }, { "epoch": 79.94039171160942, "grad_norm": 0.011940794996917248, "learning_rate": 2.0091683224524554e-05, "loss": 1.777186989784241e-05, "step": 281630 }, { "epoch": 79.94323020153278, "grad_norm": 0.03529172018170357, "learning_rate": 2.0088844734601192e-05, "loss": 1.827031373977661e-05, "step": 281640 }, { "epoch": 79.94606869145615, "grad_norm": 0.016605660319328308, "learning_rate": 2.0086006244677834e-05, "loss": 1.5907175838947297e-05, "step": 281650 }, { "epoch": 79.94890718137951, "grad_norm": 0.00035840526106767356, "learning_rate": 2.0083167754754472e-05, "loss": 8.490495383739472e-06, "step": 281660 }, { "epoch": 79.95174567130287, "grad_norm": 0.0011850034352391958, "learning_rate": 2.008032926483111e-05, "loss": 1.6671791672706605e-05, "step": 281670 }, { "epoch": 79.95458416122622, "grad_norm": 0.007295381743460894, "learning_rate": 2.007749077490775e-05, "loss": 1.535937190055847e-05, "step": 281680 }, { "epoch": 79.95742265114959, "grad_norm": 0.00400262838229537, "learning_rate": 2.007465228498439e-05, "loss": 1.3855844736099244e-05, "step": 281690 }, { "epoch": 79.96026114107295, "grad_norm": 0.0003563838836271316, "learning_rate": 2.0071813795061027e-05, "loss": 2.8907880187034606e-05, "step": 281700 }, { "epoch": 79.96309963099631, "grad_norm": 0.0017147755715996027, "learning_rate": 2.006897530513767e-05, "loss": 1.9494444131851196e-05, "step": 281710 }, { "epoch": 79.96593812091967, "grad_norm": 0.005840607453137636, "learning_rate": 2.0066136815214306e-05, "loss": 1.3232044875621795e-05, "step": 281720 }, { "epoch": 79.96877661084304, "grad_norm": 0.005426195915788412, "learning_rate": 2.0063298325290948e-05, "loss": 2.3043341934680937e-05, "step": 281730 }, { "epoch": 79.97161510076639, "grad_norm": 0.012386355549097061, "learning_rate": 2.0060459835367586e-05, "loss": 3.328379243612289e-05, "step": 281740 }, { "epoch": 79.97445359068975, "grad_norm": 0.09811152517795563, "learning_rate": 2.0057621345444224e-05, "loss": 2.882983535528183e-05, "step": 281750 }, { "epoch": 79.97729208061311, "grad_norm": 0.0009527429356239736, "learning_rate": 2.0054782855520865e-05, "loss": 2.1072477102279664e-05, "step": 281760 }, { "epoch": 79.98013057053647, "grad_norm": 0.001906834077090025, "learning_rate": 2.0051944365597503e-05, "loss": 2.6232749223709107e-05, "step": 281770 }, { "epoch": 79.98296906045984, "grad_norm": 0.0027012841310352087, "learning_rate": 2.0049105875674144e-05, "loss": 1.1064298450946809e-05, "step": 281780 }, { "epoch": 79.9858075503832, "grad_norm": 0.002087418921291828, "learning_rate": 2.004626738575078e-05, "loss": 1.763906329870224e-05, "step": 281790 }, { "epoch": 79.98864604030656, "grad_norm": 0.0014239499578252435, "learning_rate": 2.004342889582742e-05, "loss": 1.3745203614234924e-05, "step": 281800 }, { "epoch": 79.99148453022991, "grad_norm": 0.00017720347386784852, "learning_rate": 2.0040590405904062e-05, "loss": 1.0796450078487397e-05, "step": 281810 }, { "epoch": 79.99432302015327, "grad_norm": 0.006879121530801058, "learning_rate": 2.00377519159807e-05, "loss": 1.774989068508148e-05, "step": 281820 }, { "epoch": 79.99716151007664, "grad_norm": 0.0017038638470694423, "learning_rate": 2.0034913426057338e-05, "loss": 1.430213451385498e-05, "step": 281830 }, { "epoch": 80.0, "grad_norm": 0.0395050123333931, "learning_rate": 2.0032074936133976e-05, "loss": 2.4569712695665658e-05, "step": 281840 }, { "epoch": 80.00283848992336, "grad_norm": 0.005122131668031216, "learning_rate": 2.0029236446210617e-05, "loss": 6.489455699920654e-06, "step": 281850 }, { "epoch": 80.00567697984673, "grad_norm": 0.0014246872160583735, "learning_rate": 2.002639795628726e-05, "loss": 1.28844752907753e-05, "step": 281860 }, { "epoch": 80.00851546977009, "grad_norm": 0.0038832188583910465, "learning_rate": 2.0023559466363893e-05, "loss": 7.984042167663574e-06, "step": 281870 }, { "epoch": 80.01135395969344, "grad_norm": 0.0007799058221280575, "learning_rate": 2.0020720976440535e-05, "loss": 7.293745875358582e-06, "step": 281880 }, { "epoch": 80.0141924496168, "grad_norm": 0.0007048560655675828, "learning_rate": 2.0017882486517172e-05, "loss": 3.102961927652359e-05, "step": 281890 }, { "epoch": 80.01703093954016, "grad_norm": 0.0007624770514667034, "learning_rate": 2.0015043996593814e-05, "loss": 1.0217539966106415e-05, "step": 281900 }, { "epoch": 80.01986942946353, "grad_norm": 0.002160588977858424, "learning_rate": 2.0012205506670452e-05, "loss": 2.0534731447696686e-05, "step": 281910 }, { "epoch": 80.02270791938689, "grad_norm": 0.0007597374496981502, "learning_rate": 2.000936701674709e-05, "loss": 1.4910474419593811e-05, "step": 281920 }, { "epoch": 80.02554640931025, "grad_norm": 0.005946943070739508, "learning_rate": 2.000652852682373e-05, "loss": 2.4188868701457976e-05, "step": 281930 }, { "epoch": 80.02838489923361, "grad_norm": 0.002185862511396408, "learning_rate": 2.000369003690037e-05, "loss": 8.248910307884217e-06, "step": 281940 }, { "epoch": 80.03122338915696, "grad_norm": 0.001376515836454928, "learning_rate": 2.000085154697701e-05, "loss": 1.814942806959152e-05, "step": 281950 }, { "epoch": 80.03406187908033, "grad_norm": 0.0005399108049459755, "learning_rate": 1.999801305705365e-05, "loss": 1.568179577589035e-05, "step": 281960 }, { "epoch": 80.03690036900369, "grad_norm": 0.04894169792532921, "learning_rate": 1.9995174567130287e-05, "loss": 3.594271838665008e-05, "step": 281970 }, { "epoch": 80.03973885892705, "grad_norm": 0.00438341498374939, "learning_rate": 1.9992336077206928e-05, "loss": 1.9061379134655e-05, "step": 281980 }, { "epoch": 80.04257734885041, "grad_norm": 0.0018458300037309527, "learning_rate": 1.9989497587283566e-05, "loss": 8.09989869594574e-06, "step": 281990 }, { "epoch": 80.04541583877378, "grad_norm": 0.008335358463227749, "learning_rate": 1.9986659097360204e-05, "loss": 1.6166456043720244e-05, "step": 282000 }, { "epoch": 80.04541583877378, "eval_accuracy": 0.9874737712214663, "eval_loss": 0.04884262755513191, "eval_runtime": 35.0496, "eval_samples_per_second": 448.707, "eval_steps_per_second": 7.019, "step": 282000 }, { "epoch": 80.04825432869713, "grad_norm": 0.011251780204474926, "learning_rate": 1.9983820607436845e-05, "loss": 1.3251230120658874e-05, "step": 282010 }, { "epoch": 80.05109281862049, "grad_norm": 0.0033713625743985176, "learning_rate": 1.9980982117513483e-05, "loss": 5.196593701839447e-06, "step": 282020 }, { "epoch": 80.05393130854385, "grad_norm": 0.0014136824756860733, "learning_rate": 1.9978143627590125e-05, "loss": 1.688823103904724e-05, "step": 282030 }, { "epoch": 80.05676979846722, "grad_norm": 0.0028228156734257936, "learning_rate": 1.9975305137666763e-05, "loss": 1.628231257200241e-05, "step": 282040 }, { "epoch": 80.05960828839058, "grad_norm": 0.0010163818951696157, "learning_rate": 1.99724666477434e-05, "loss": 1.1428818106651306e-05, "step": 282050 }, { "epoch": 80.06244677831394, "grad_norm": 0.006712102331221104, "learning_rate": 1.9969628157820042e-05, "loss": 1.1336803436279296e-05, "step": 282060 }, { "epoch": 80.0652852682373, "grad_norm": 0.002493436448276043, "learning_rate": 1.996678966789668e-05, "loss": 1.263301819562912e-05, "step": 282070 }, { "epoch": 80.06812375816065, "grad_norm": 0.0007421349873766303, "learning_rate": 1.9963951177973318e-05, "loss": 3.0996464192867276e-05, "step": 282080 }, { "epoch": 80.07096224808402, "grad_norm": 0.001482599414885044, "learning_rate": 1.9961112688049956e-05, "loss": 1.0250695049762726e-05, "step": 282090 }, { "epoch": 80.07380073800738, "grad_norm": 0.00223570060916245, "learning_rate": 1.9958274198126597e-05, "loss": 7.189996540546417e-06, "step": 282100 }, { "epoch": 80.07663922793074, "grad_norm": 0.0035129161551594734, "learning_rate": 1.995543570820324e-05, "loss": 8.120760321617127e-06, "step": 282110 }, { "epoch": 80.0794777178541, "grad_norm": 0.0002052016498055309, "learning_rate": 1.9952597218279877e-05, "loss": 1.0968931019306182e-05, "step": 282120 }, { "epoch": 80.08231620777747, "grad_norm": 0.0012502026511356235, "learning_rate": 1.9949758728356515e-05, "loss": 1.1252984404563903e-05, "step": 282130 }, { "epoch": 80.08515469770083, "grad_norm": 0.0012725520646199584, "learning_rate": 1.9946920238433153e-05, "loss": 1.1789426207542419e-05, "step": 282140 }, { "epoch": 80.08799318762418, "grad_norm": 0.002380599733442068, "learning_rate": 1.9944081748509794e-05, "loss": 7.197819650173188e-06, "step": 282150 }, { "epoch": 80.09083167754754, "grad_norm": 0.03313722088932991, "learning_rate": 1.9941243258586435e-05, "loss": 1.4916807413101196e-05, "step": 282160 }, { "epoch": 80.0936701674709, "grad_norm": 0.018614474684000015, "learning_rate": 1.993840476866307e-05, "loss": 1.4937669038772584e-05, "step": 282170 }, { "epoch": 80.09650865739427, "grad_norm": 0.000908602902200073, "learning_rate": 1.993556627873971e-05, "loss": 4.181917756795883e-05, "step": 282180 }, { "epoch": 80.09934714731763, "grad_norm": 0.005206328351050615, "learning_rate": 1.993272778881635e-05, "loss": 1.0566972196102143e-05, "step": 282190 }, { "epoch": 80.10218563724099, "grad_norm": 0.001730646239593625, "learning_rate": 1.992988929889299e-05, "loss": 5.22516667842865e-05, "step": 282200 }, { "epoch": 80.10502412716434, "grad_norm": 0.0012044466566294432, "learning_rate": 1.992705080896963e-05, "loss": 1.3955309987068176e-05, "step": 282210 }, { "epoch": 80.1078626170877, "grad_norm": 0.005500028375536203, "learning_rate": 1.9924212319046267e-05, "loss": 1.7525069415569307e-05, "step": 282220 }, { "epoch": 80.11070110701107, "grad_norm": 0.0018946523778140545, "learning_rate": 1.9921373829122908e-05, "loss": 3.4043006598949434e-05, "step": 282230 }, { "epoch": 80.11353959693443, "grad_norm": 0.0010196521179750562, "learning_rate": 1.9918535339199546e-05, "loss": 1.337788999080658e-05, "step": 282240 }, { "epoch": 80.1163780868578, "grad_norm": 0.0012347950832918286, "learning_rate": 1.9915696849276187e-05, "loss": 2.597793936729431e-05, "step": 282250 }, { "epoch": 80.11921657678116, "grad_norm": 0.005539819598197937, "learning_rate": 1.9912858359352825e-05, "loss": 1.2330152094364166e-05, "step": 282260 }, { "epoch": 80.12205506670452, "grad_norm": 0.0032959487289190292, "learning_rate": 1.9910019869429463e-05, "loss": 1.3524852693080903e-05, "step": 282270 }, { "epoch": 80.12489355662787, "grad_norm": 0.006550466641783714, "learning_rate": 1.9907181379506105e-05, "loss": 2.2923387587070464e-05, "step": 282280 }, { "epoch": 80.12773204655123, "grad_norm": 0.0022483021020889282, "learning_rate": 1.9904342889582743e-05, "loss": 8.940137922763824e-06, "step": 282290 }, { "epoch": 80.1305705364746, "grad_norm": 0.01147159282118082, "learning_rate": 1.990150439965938e-05, "loss": 9.442120790481567e-06, "step": 282300 }, { "epoch": 80.13340902639796, "grad_norm": 0.0009557073353789747, "learning_rate": 1.9898665909736022e-05, "loss": 1.2123212218284608e-05, "step": 282310 }, { "epoch": 80.13624751632132, "grad_norm": 0.0009801259730011225, "learning_rate": 1.989582741981266e-05, "loss": 9.37972217798233e-06, "step": 282320 }, { "epoch": 80.13908600624468, "grad_norm": 0.0025350807700306177, "learning_rate": 1.98929889298893e-05, "loss": 1.5517883002758027e-05, "step": 282330 }, { "epoch": 80.14192449616804, "grad_norm": 0.0011113464133813977, "learning_rate": 1.9890150439965936e-05, "loss": 8.27014446258545e-06, "step": 282340 }, { "epoch": 80.1447629860914, "grad_norm": 0.0017696230206638575, "learning_rate": 1.9887311950042577e-05, "loss": 1.3512745499610902e-05, "step": 282350 }, { "epoch": 80.14760147601476, "grad_norm": 0.0007310761720873415, "learning_rate": 1.988447346011922e-05, "loss": 1.1830031871795654e-05, "step": 282360 }, { "epoch": 80.15043996593812, "grad_norm": 0.0015489396173506975, "learning_rate": 1.9881634970195857e-05, "loss": 1.1531636118888855e-05, "step": 282370 }, { "epoch": 80.15327845586148, "grad_norm": 0.051986176520586014, "learning_rate": 1.9878796480272495e-05, "loss": 2.3947283625602723e-05, "step": 282380 }, { "epoch": 80.15611694578485, "grad_norm": 0.0042860545217990875, "learning_rate": 1.9875957990349133e-05, "loss": 1.4674291014671325e-05, "step": 282390 }, { "epoch": 80.15895543570821, "grad_norm": 0.005534681957215071, "learning_rate": 1.9873119500425774e-05, "loss": 1.5840120613574982e-05, "step": 282400 }, { "epoch": 80.16179392563157, "grad_norm": 0.009478827007114887, "learning_rate": 1.9870281010502416e-05, "loss": 1.752357929944992e-05, "step": 282410 }, { "epoch": 80.16463241555492, "grad_norm": 0.004074523225426674, "learning_rate": 1.9867442520579054e-05, "loss": 1.0311789810657502e-05, "step": 282420 }, { "epoch": 80.16747090547828, "grad_norm": 0.0031315386295318604, "learning_rate": 1.986460403065569e-05, "loss": 1.2413226068019867e-05, "step": 282430 }, { "epoch": 80.17030939540165, "grad_norm": 0.0008533121435903013, "learning_rate": 1.986176554073233e-05, "loss": 2.0444951951503752e-05, "step": 282440 }, { "epoch": 80.17314788532501, "grad_norm": 0.004018109757453203, "learning_rate": 1.985892705080897e-05, "loss": 7.592327892780304e-06, "step": 282450 }, { "epoch": 80.17598637524837, "grad_norm": 0.001406927709467709, "learning_rate": 1.9856088560885612e-05, "loss": 1.1384859681129455e-05, "step": 282460 }, { "epoch": 80.17882486517173, "grad_norm": 0.0012655139435082674, "learning_rate": 1.9853250070962247e-05, "loss": 1.1606141924858093e-05, "step": 282470 }, { "epoch": 80.18166335509508, "grad_norm": 0.0023980706464499235, "learning_rate": 1.9850411581038888e-05, "loss": 9.76845622062683e-06, "step": 282480 }, { "epoch": 80.18450184501845, "grad_norm": 0.0017808392876759171, "learning_rate": 1.984757309111553e-05, "loss": 1.4120526611804961e-05, "step": 282490 }, { "epoch": 80.18734033494181, "grad_norm": 0.0016533685848116875, "learning_rate": 1.9844734601192168e-05, "loss": 8.401647210121155e-06, "step": 282500 }, { "epoch": 80.18734033494181, "eval_accuracy": 0.9874737712214663, "eval_loss": 0.04905133321881294, "eval_runtime": 35.8343, "eval_samples_per_second": 438.881, "eval_steps_per_second": 6.865, "step": 282500 }, { "epoch": 80.19017882486517, "grad_norm": 0.019200408831238747, "learning_rate": 1.9841896111268806e-05, "loss": 1.711789518594742e-05, "step": 282510 }, { "epoch": 80.19301731478853, "grad_norm": 0.0009926484199240804, "learning_rate": 1.9839057621345444e-05, "loss": 9.264424443244934e-06, "step": 282520 }, { "epoch": 80.1958558047119, "grad_norm": 0.001637552515603602, "learning_rate": 1.9836219131422085e-05, "loss": 1.3796053826808929e-05, "step": 282530 }, { "epoch": 80.19869429463526, "grad_norm": 0.0006899838335812092, "learning_rate": 1.9833380641498726e-05, "loss": 1.2759491801261902e-05, "step": 282540 }, { "epoch": 80.20153278455861, "grad_norm": 0.0055356877855956554, "learning_rate": 1.983054215157536e-05, "loss": 2.6158802211284637e-05, "step": 282550 }, { "epoch": 80.20437127448197, "grad_norm": 0.0009212405420839787, "learning_rate": 1.9827703661652002e-05, "loss": 1.457761973142624e-05, "step": 282560 }, { "epoch": 80.20720976440533, "grad_norm": 0.0011684877099469304, "learning_rate": 1.982486517172864e-05, "loss": 1.5616044402122496e-05, "step": 282570 }, { "epoch": 80.2100482543287, "grad_norm": 0.0011452827602624893, "learning_rate": 1.982202668180528e-05, "loss": 1.3579800724983216e-05, "step": 282580 }, { "epoch": 80.21288674425206, "grad_norm": 0.0011643933830782771, "learning_rate": 1.981918819188192e-05, "loss": 1.4023110270500184e-05, "step": 282590 }, { "epoch": 80.21572523417542, "grad_norm": 0.002875453559681773, "learning_rate": 1.9816349701958558e-05, "loss": 9.263493120670319e-06, "step": 282600 }, { "epoch": 80.21856372409879, "grad_norm": 0.0013152670580893755, "learning_rate": 1.98135112120352e-05, "loss": 1.0397285223007202e-05, "step": 282610 }, { "epoch": 80.22140221402213, "grad_norm": 0.002177555812522769, "learning_rate": 1.9810672722111837e-05, "loss": 1.571383327245712e-05, "step": 282620 }, { "epoch": 80.2242407039455, "grad_norm": 0.001881889533251524, "learning_rate": 1.980783423218848e-05, "loss": 8.991360664367675e-06, "step": 282630 }, { "epoch": 80.22707919386886, "grad_norm": 0.003101675771176815, "learning_rate": 1.9804995742265116e-05, "loss": 1.4223530888557434e-05, "step": 282640 }, { "epoch": 80.22991768379222, "grad_norm": 0.0011376733891665936, "learning_rate": 1.9802157252341754e-05, "loss": 8.611194789409638e-06, "step": 282650 }, { "epoch": 80.23275617371559, "grad_norm": 0.0032853414304554462, "learning_rate": 1.9799318762418396e-05, "loss": 1.209750771522522e-05, "step": 282660 }, { "epoch": 80.23559466363895, "grad_norm": 0.0008111572824418545, "learning_rate": 1.9796480272495034e-05, "loss": 1.0253489017486573e-05, "step": 282670 }, { "epoch": 80.23843315356231, "grad_norm": 0.00048417079960927367, "learning_rate": 1.979364178257167e-05, "loss": 1.4369748532772064e-05, "step": 282680 }, { "epoch": 80.24127164348566, "grad_norm": 0.000797167478594929, "learning_rate": 1.9790803292648313e-05, "loss": 7.588602602481842e-06, "step": 282690 }, { "epoch": 80.24411013340902, "grad_norm": 0.0011177535634487867, "learning_rate": 1.978796480272495e-05, "loss": 7.264502346515655e-06, "step": 282700 }, { "epoch": 80.24694862333239, "grad_norm": 0.0022084233351051807, "learning_rate": 1.9785126312801592e-05, "loss": 2.1644309163093568e-05, "step": 282710 }, { "epoch": 80.24978711325575, "grad_norm": 0.001263202866539359, "learning_rate": 1.978228782287823e-05, "loss": 1.4389492571353913e-05, "step": 282720 }, { "epoch": 80.25262560317911, "grad_norm": 0.001130787655711174, "learning_rate": 1.977944933295487e-05, "loss": 1.4178641140460969e-05, "step": 282730 }, { "epoch": 80.25546409310247, "grad_norm": 0.002281002001836896, "learning_rate": 1.977661084303151e-05, "loss": 8.860789239406586e-06, "step": 282740 }, { "epoch": 80.25830258302582, "grad_norm": 0.0014683735789731145, "learning_rate": 1.9773772353108148e-05, "loss": 1.0973401367664337e-05, "step": 282750 }, { "epoch": 80.26114107294919, "grad_norm": 0.0010187398875132203, "learning_rate": 1.9770933863184786e-05, "loss": 1.117512583732605e-05, "step": 282760 }, { "epoch": 80.26397956287255, "grad_norm": 0.0012029220815747976, "learning_rate": 1.9768095373261424e-05, "loss": 7.314607501029968e-06, "step": 282770 }, { "epoch": 80.26681805279591, "grad_norm": 0.000489089114125818, "learning_rate": 1.9765256883338065e-05, "loss": 9.935908019542694e-06, "step": 282780 }, { "epoch": 80.26965654271928, "grad_norm": 0.0013648751191794872, "learning_rate": 1.9762418393414706e-05, "loss": 1.2453086674213409e-05, "step": 282790 }, { "epoch": 80.27249503264264, "grad_norm": 0.0014810170978307724, "learning_rate": 1.9759579903491344e-05, "loss": 8.982792496681214e-06, "step": 282800 }, { "epoch": 80.275333522566, "grad_norm": 0.00470730708912015, "learning_rate": 1.9756741413567982e-05, "loss": 1.0282173752784729e-05, "step": 282810 }, { "epoch": 80.27817201248935, "grad_norm": 0.0009747603326104581, "learning_rate": 1.975390292364462e-05, "loss": 8.645281195640563e-06, "step": 282820 }, { "epoch": 80.28101050241271, "grad_norm": 0.0029441777151077986, "learning_rate": 1.9751064433721262e-05, "loss": 1.1473707854747772e-05, "step": 282830 }, { "epoch": 80.28384899233608, "grad_norm": 0.0008112153154797852, "learning_rate": 1.9748225943797903e-05, "loss": 1.4010444283485413e-05, "step": 282840 }, { "epoch": 80.28668748225944, "grad_norm": 0.007893956266343594, "learning_rate": 1.9745387453874538e-05, "loss": 2.8219446539878847e-05, "step": 282850 }, { "epoch": 80.2895259721828, "grad_norm": 0.012568448670208454, "learning_rate": 1.974254896395118e-05, "loss": 1.2986920773983002e-05, "step": 282860 }, { "epoch": 80.29236446210616, "grad_norm": 0.000691570807248354, "learning_rate": 1.9739710474027817e-05, "loss": 1.95501372218132e-05, "step": 282870 }, { "epoch": 80.29520295202953, "grad_norm": 0.0004147737054154277, "learning_rate": 1.973687198410446e-05, "loss": 1.726187765598297e-05, "step": 282880 }, { "epoch": 80.29804144195288, "grad_norm": 0.0013880273327231407, "learning_rate": 1.9734033494181097e-05, "loss": 1.5646591782569887e-05, "step": 282890 }, { "epoch": 80.30087993187624, "grad_norm": 0.0017655760748311877, "learning_rate": 1.9731195004257735e-05, "loss": 1.8097646534442902e-05, "step": 282900 }, { "epoch": 80.3037184217996, "grad_norm": 0.0008921234402805567, "learning_rate": 1.9728356514334376e-05, "loss": 8.307211101055145e-06, "step": 282910 }, { "epoch": 80.30655691172296, "grad_norm": 0.0017630219226703048, "learning_rate": 1.9725518024411014e-05, "loss": 2.6554986834526063e-05, "step": 282920 }, { "epoch": 80.30939540164633, "grad_norm": 0.0014361763605847955, "learning_rate": 1.9722679534487655e-05, "loss": 1.4261528849601746e-05, "step": 282930 }, { "epoch": 80.31223389156969, "grad_norm": 0.001892282161861658, "learning_rate": 1.972012489355663e-05, "loss": 0.0007869362831115723, "step": 282940 }, { "epoch": 80.31507238149304, "grad_norm": 0.4491569697856903, "learning_rate": 1.9717286403633266e-05, "loss": 8.282214403152466e-05, "step": 282950 }, { "epoch": 80.3179108714164, "grad_norm": 0.32461726665496826, "learning_rate": 1.9714447913709908e-05, "loss": 8.776579052209854e-05, "step": 282960 }, { "epoch": 80.32074936133976, "grad_norm": 0.06150040403008461, "learning_rate": 1.971160942378655e-05, "loss": 0.00012567266821861267, "step": 282970 }, { "epoch": 80.32358785126313, "grad_norm": 0.013122114352881908, "learning_rate": 1.9708770933863184e-05, "loss": 9.574051946401597e-05, "step": 282980 }, { "epoch": 80.32642634118649, "grad_norm": 0.0044446527026593685, "learning_rate": 1.9705932443939825e-05, "loss": 0.0014853281900286674, "step": 282990 }, { "epoch": 80.32926483110985, "grad_norm": 0.02714652009308338, "learning_rate": 1.9703093954016463e-05, "loss": 0.00021296627819538115, "step": 283000 }, { "epoch": 80.32926483110985, "eval_accuracy": 0.9858205633623705, "eval_loss": 0.05777212232351303, "eval_runtime": 35.4031, "eval_samples_per_second": 444.226, "eval_steps_per_second": 6.949, "step": 283000 }, { "epoch": 80.33210332103322, "grad_norm": 0.0012216611066833138, "learning_rate": 1.9700255464093104e-05, "loss": 9.506307542324066e-05, "step": 283010 }, { "epoch": 80.33494181095656, "grad_norm": 0.01570688560605049, "learning_rate": 1.9697416974169742e-05, "loss": 0.00012708734720945358, "step": 283020 }, { "epoch": 80.33778030087993, "grad_norm": 0.00724784517660737, "learning_rate": 1.969457848424638e-05, "loss": 3.557447344064713e-05, "step": 283030 }, { "epoch": 80.34061879080329, "grad_norm": 0.001649864250794053, "learning_rate": 1.9691739994323022e-05, "loss": 2.333410084247589e-05, "step": 283040 }, { "epoch": 80.34345728072665, "grad_norm": 0.003069588914513588, "learning_rate": 1.968890150439966e-05, "loss": 5.197394639253616e-05, "step": 283050 }, { "epoch": 80.34629577065002, "grad_norm": 0.0033221880439668894, "learning_rate": 1.96860630144763e-05, "loss": 0.00011081714183092117, "step": 283060 }, { "epoch": 80.34913426057338, "grad_norm": 0.0021052113734185696, "learning_rate": 1.968322452455294e-05, "loss": 1.8524006009101868e-05, "step": 283070 }, { "epoch": 80.35197275049674, "grad_norm": 0.0014933214988559484, "learning_rate": 1.9680386034629577e-05, "loss": 1.909695565700531e-05, "step": 283080 }, { "epoch": 80.35481124042009, "grad_norm": 0.002021810272708535, "learning_rate": 1.967754754470622e-05, "loss": 1.0314583778381348e-05, "step": 283090 }, { "epoch": 80.35764973034345, "grad_norm": 0.08868247270584106, "learning_rate": 1.9674709054782857e-05, "loss": 5.741603672504425e-05, "step": 283100 }, { "epoch": 80.36048822026682, "grad_norm": 0.16209590435028076, "learning_rate": 1.9671870564859495e-05, "loss": 5.2171386778354643e-05, "step": 283110 }, { "epoch": 80.36332671019018, "grad_norm": 0.013762989081442356, "learning_rate": 1.9669032074936136e-05, "loss": 2.997554838657379e-05, "step": 283120 }, { "epoch": 80.36616520011354, "grad_norm": 0.0061326115392148495, "learning_rate": 1.9666193585012774e-05, "loss": 1.8132105469703675e-05, "step": 283130 }, { "epoch": 80.3690036900369, "grad_norm": 0.00118284544441849, "learning_rate": 1.9663355095089415e-05, "loss": 4.506763070821762e-05, "step": 283140 }, { "epoch": 80.37184217996027, "grad_norm": 0.0021669166162610054, "learning_rate": 1.9660516605166053e-05, "loss": 0.00022495370358228685, "step": 283150 }, { "epoch": 80.37468066988362, "grad_norm": 0.049735501408576965, "learning_rate": 1.965767811524269e-05, "loss": 0.00014295540750026704, "step": 283160 }, { "epoch": 80.37751915980698, "grad_norm": 0.019453037530183792, "learning_rate": 1.9654839625319333e-05, "loss": 0.010117460042238235, "step": 283170 }, { "epoch": 80.38035764973034, "grad_norm": 0.22964170575141907, "learning_rate": 1.965200113539597e-05, "loss": 0.006384300440549851, "step": 283180 }, { "epoch": 80.3831961396537, "grad_norm": 0.0015435511013492942, "learning_rate": 1.964916264547261e-05, "loss": 0.0007460976019501686, "step": 283190 }, { "epoch": 80.38603462957707, "grad_norm": 0.009939259849488735, "learning_rate": 1.9646324155549247e-05, "loss": 0.00026317816227674486, "step": 283200 }, { "epoch": 80.38887311950043, "grad_norm": 0.010484222322702408, "learning_rate": 1.9643485665625888e-05, "loss": 0.0010540613904595375, "step": 283210 }, { "epoch": 80.39171160942378, "grad_norm": 0.004848931450396776, "learning_rate": 1.964064717570253e-05, "loss": 0.0048472538590431215, "step": 283220 }, { "epoch": 80.39455009934714, "grad_norm": 2.3543360233306885, "learning_rate": 1.9637808685779167e-05, "loss": 0.0004162827506661415, "step": 283230 }, { "epoch": 80.3973885892705, "grad_norm": 0.055403806269168854, "learning_rate": 1.9634970195855805e-05, "loss": 0.00026719067245721816, "step": 283240 }, { "epoch": 80.40022707919387, "grad_norm": 0.11045314371585846, "learning_rate": 1.9632131705932443e-05, "loss": 0.00013410262763500214, "step": 283250 }, { "epoch": 80.40306556911723, "grad_norm": 0.11585811525583267, "learning_rate": 1.9629293216009085e-05, "loss": 8.797980844974518e-05, "step": 283260 }, { "epoch": 80.4059040590406, "grad_norm": 0.0424322709441185, "learning_rate": 1.9626454726085726e-05, "loss": 0.00012364257127046586, "step": 283270 }, { "epoch": 80.40874254896396, "grad_norm": 0.017371581867337227, "learning_rate": 1.962361623616236e-05, "loss": 0.000129568949341774, "step": 283280 }, { "epoch": 80.4115810388873, "grad_norm": 0.003794969990849495, "learning_rate": 1.9620777746239002e-05, "loss": 0.0006956744939088822, "step": 283290 }, { "epoch": 80.41441952881067, "grad_norm": 0.003312731394544244, "learning_rate": 1.961793925631564e-05, "loss": 0.0006877569481730461, "step": 283300 }, { "epoch": 80.41725801873403, "grad_norm": 0.004374770447611809, "learning_rate": 1.961510076639228e-05, "loss": 0.0007939267903566361, "step": 283310 }, { "epoch": 80.4200965086574, "grad_norm": 0.003861719975247979, "learning_rate": 1.961226227646892e-05, "loss": 0.0002162521705031395, "step": 283320 }, { "epoch": 80.42293499858076, "grad_norm": 0.010144712403416634, "learning_rate": 1.9609423786545557e-05, "loss": 9.81312245130539e-05, "step": 283330 }, { "epoch": 80.42577348850412, "grad_norm": 0.043060436844825745, "learning_rate": 1.96065852966222e-05, "loss": 0.0006225224584341049, "step": 283340 }, { "epoch": 80.42861197842748, "grad_norm": 0.017913132905960083, "learning_rate": 1.9603746806698837e-05, "loss": 0.0009795419871807098, "step": 283350 }, { "epoch": 80.43145046835083, "grad_norm": 0.03529990464448929, "learning_rate": 1.9600908316775478e-05, "loss": 0.00023893453180789948, "step": 283360 }, { "epoch": 80.4342889582742, "grad_norm": 1.516191005706787, "learning_rate": 1.9598069826852116e-05, "loss": 0.0003618573769927025, "step": 283370 }, { "epoch": 80.43712744819756, "grad_norm": 0.24748387932777405, "learning_rate": 1.9595231336928754e-05, "loss": 0.00015176665037870408, "step": 283380 }, { "epoch": 80.43996593812092, "grad_norm": 11.85282039642334, "learning_rate": 1.9592392847005395e-05, "loss": 0.003117693029344082, "step": 283390 }, { "epoch": 80.44280442804428, "grad_norm": 0.02336529642343521, "learning_rate": 1.9589554357082033e-05, "loss": 9.12228599190712e-05, "step": 283400 }, { "epoch": 80.44564291796765, "grad_norm": 0.007106421981006861, "learning_rate": 1.958671586715867e-05, "loss": 0.004612171649932861, "step": 283410 }, { "epoch": 80.448481407891, "grad_norm": 1.7173539400100708, "learning_rate": 1.9583877377235313e-05, "loss": 0.00031021181493997576, "step": 283420 }, { "epoch": 80.45131989781436, "grad_norm": 0.005252385511994362, "learning_rate": 1.958103888731195e-05, "loss": 0.00044507719576358795, "step": 283430 }, { "epoch": 80.45415838773772, "grad_norm": 0.0014449643203988671, "learning_rate": 1.9578200397388592e-05, "loss": 6.866473704576492e-05, "step": 283440 }, { "epoch": 80.45699687766108, "grad_norm": 0.08434896171092987, "learning_rate": 1.9575361907465227e-05, "loss": 0.0002232234925031662, "step": 283450 }, { "epoch": 80.45983536758445, "grad_norm": 0.0033790594898164272, "learning_rate": 1.9572523417541868e-05, "loss": 0.00019882619380950928, "step": 283460 }, { "epoch": 80.46267385750781, "grad_norm": 0.0025651762261986732, "learning_rate": 1.956968492761851e-05, "loss": 0.0008536815643310547, "step": 283470 }, { "epoch": 80.46551234743117, "grad_norm": 0.06021850183606148, "learning_rate": 1.9566846437695147e-05, "loss": 0.000102878175675869, "step": 283480 }, { "epoch": 80.46835083735452, "grad_norm": 0.0361897349357605, "learning_rate": 1.9564007947771785e-05, "loss": 0.0002914292737841606, "step": 283490 }, { "epoch": 80.47118932727788, "grad_norm": 0.07987362891435623, "learning_rate": 1.9561169457848423e-05, "loss": 8.34830105304718e-05, "step": 283500 }, { "epoch": 80.47118932727788, "eval_accuracy": 0.9859477331976855, "eval_loss": 0.05742231756448746, "eval_runtime": 35.3754, "eval_samples_per_second": 444.574, "eval_steps_per_second": 6.954, "step": 283500 }, { "epoch": 80.47402781720125, "grad_norm": 0.0026413975283503532, "learning_rate": 1.9558330967925065e-05, "loss": 0.00011997632682323456, "step": 283510 }, { "epoch": 80.47686630712461, "grad_norm": 0.013352343812584877, "learning_rate": 1.9555492478001706e-05, "loss": 3.617182374000549e-05, "step": 283520 }, { "epoch": 80.47970479704797, "grad_norm": 0.002188019687309861, "learning_rate": 1.9552653988078344e-05, "loss": 3.4634210169315335e-05, "step": 283530 }, { "epoch": 80.48254328697134, "grad_norm": 0.008692426607012749, "learning_rate": 1.9549815498154982e-05, "loss": 0.0001649497076869011, "step": 283540 }, { "epoch": 80.4853817768947, "grad_norm": 0.02118721790611744, "learning_rate": 1.954697700823162e-05, "loss": 7.421039044857025e-05, "step": 283550 }, { "epoch": 80.48822026681805, "grad_norm": 0.002162529155611992, "learning_rate": 1.954413851830826e-05, "loss": 6.320718675851822e-05, "step": 283560 }, { "epoch": 80.49105875674141, "grad_norm": 0.004714068025350571, "learning_rate": 1.9541300028384903e-05, "loss": 4.227515310049057e-05, "step": 283570 }, { "epoch": 80.49389724666477, "grad_norm": 0.014321920461952686, "learning_rate": 1.9538461538461537e-05, "loss": 2.6786699891090393e-05, "step": 283580 }, { "epoch": 80.49673573658814, "grad_norm": 0.04517778381705284, "learning_rate": 1.953562304853818e-05, "loss": 3.740731626749039e-05, "step": 283590 }, { "epoch": 80.4995742265115, "grad_norm": 0.00263022817671299, "learning_rate": 1.9532784558614817e-05, "loss": 2.426113933324814e-05, "step": 283600 }, { "epoch": 80.50241271643486, "grad_norm": 0.005182694643735886, "learning_rate": 1.9529946068691458e-05, "loss": 6.7836232483387e-05, "step": 283610 }, { "epoch": 80.50525120635822, "grad_norm": 0.001458306796848774, "learning_rate": 1.9527107578768096e-05, "loss": 3.447216004133224e-05, "step": 283620 }, { "epoch": 80.50808969628157, "grad_norm": 0.007420403882861137, "learning_rate": 1.9524269088844734e-05, "loss": 1.5763193368911742e-05, "step": 283630 }, { "epoch": 80.51092818620494, "grad_norm": 0.00562315946444869, "learning_rate": 1.9521430598921376e-05, "loss": 3.999825567007065e-05, "step": 283640 }, { "epoch": 80.5137666761283, "grad_norm": 0.0014330089325085282, "learning_rate": 1.9518592108998014e-05, "loss": 0.00036886222660541533, "step": 283650 }, { "epoch": 80.51660516605166, "grad_norm": 0.004672331269830465, "learning_rate": 1.951575361907465e-05, "loss": 6.067100912332535e-05, "step": 283660 }, { "epoch": 80.51944365597502, "grad_norm": 0.09113195538520813, "learning_rate": 1.9512915129151293e-05, "loss": 0.00011424496769905091, "step": 283670 }, { "epoch": 80.52228214589839, "grad_norm": 0.0005255496362224221, "learning_rate": 1.951007663922793e-05, "loss": 1.6406737267971037e-05, "step": 283680 }, { "epoch": 80.52512063582174, "grad_norm": 0.019360048696398735, "learning_rate": 1.9507238149304572e-05, "loss": 5.232468247413635e-05, "step": 283690 }, { "epoch": 80.5279591257451, "grad_norm": 0.011917711235582829, "learning_rate": 1.950439965938121e-05, "loss": 3.587566316127777e-05, "step": 283700 }, { "epoch": 80.53079761566846, "grad_norm": 0.017577029764652252, "learning_rate": 1.9501561169457848e-05, "loss": 2.729315310716629e-05, "step": 283710 }, { "epoch": 80.53363610559182, "grad_norm": 0.006750219501554966, "learning_rate": 1.949872267953449e-05, "loss": 8.058715611696244e-05, "step": 283720 }, { "epoch": 80.53647459551519, "grad_norm": 0.0021374928764998913, "learning_rate": 1.9495884189611128e-05, "loss": 1.985393464565277e-05, "step": 283730 }, { "epoch": 80.53931308543855, "grad_norm": 0.0764959380030632, "learning_rate": 1.949304569968777e-05, "loss": 6.353762000799179e-05, "step": 283740 }, { "epoch": 80.54215157536191, "grad_norm": 0.05084817856550217, "learning_rate": 1.9490207209764404e-05, "loss": 3.9949826896190646e-05, "step": 283750 }, { "epoch": 80.54499006528526, "grad_norm": 0.002355494536459446, "learning_rate": 1.9487368719841045e-05, "loss": 7.736217230558395e-05, "step": 283760 }, { "epoch": 80.54782855520862, "grad_norm": 0.049371425062417984, "learning_rate": 1.9484530229917686e-05, "loss": 3.560278564691543e-05, "step": 283770 }, { "epoch": 80.55066704513199, "grad_norm": 0.009748018346726894, "learning_rate": 1.9481691739994324e-05, "loss": 2.3746304214000703e-05, "step": 283780 }, { "epoch": 80.55350553505535, "grad_norm": 0.0014545497251674533, "learning_rate": 1.9478853250070962e-05, "loss": 5.9013441205024717e-05, "step": 283790 }, { "epoch": 80.55634402497871, "grad_norm": 0.004485609941184521, "learning_rate": 1.94760147601476e-05, "loss": 3.274399787187576e-05, "step": 283800 }, { "epoch": 80.55918251490208, "grad_norm": 0.0017891175812110305, "learning_rate": 1.947317627022424e-05, "loss": 1.8683448433876038e-05, "step": 283810 }, { "epoch": 80.56202100482544, "grad_norm": 0.004690824542194605, "learning_rate": 1.9470337780300883e-05, "loss": 1.830440014600754e-05, "step": 283820 }, { "epoch": 80.56485949474879, "grad_norm": 0.012297208420932293, "learning_rate": 1.946749929037752e-05, "loss": 4.47295606136322e-05, "step": 283830 }, { "epoch": 80.56769798467215, "grad_norm": 0.0005978646804578602, "learning_rate": 1.946466080045416e-05, "loss": 0.00013262126594781876, "step": 283840 }, { "epoch": 80.57053647459551, "grad_norm": 0.0009370818152092397, "learning_rate": 1.9461822310530797e-05, "loss": 0.0018988508731126786, "step": 283850 }, { "epoch": 80.57337496451888, "grad_norm": 0.044990312308073044, "learning_rate": 1.945898382060744e-05, "loss": 3.23878601193428e-05, "step": 283860 }, { "epoch": 80.57621345444224, "grad_norm": 0.006456183269619942, "learning_rate": 1.9456145330684076e-05, "loss": 5.346164107322693e-05, "step": 283870 }, { "epoch": 80.5790519443656, "grad_norm": 0.0018409995827823877, "learning_rate": 1.9453306840760714e-05, "loss": 2.0220689475536346e-05, "step": 283880 }, { "epoch": 80.58189043428897, "grad_norm": 0.010569685138761997, "learning_rate": 1.9450468350837356e-05, "loss": 5.2751787006855014e-05, "step": 283890 }, { "epoch": 80.58472892421231, "grad_norm": 0.0013714444357901812, "learning_rate": 1.9447629860913994e-05, "loss": 3.324691206216812e-05, "step": 283900 }, { "epoch": 80.58756741413568, "grad_norm": 0.0027143515180796385, "learning_rate": 1.9444791370990635e-05, "loss": 1.9625015556812287e-05, "step": 283910 }, { "epoch": 80.59040590405904, "grad_norm": 0.002322859363630414, "learning_rate": 1.9441952881067273e-05, "loss": 1.3658776879310608e-05, "step": 283920 }, { "epoch": 80.5932443939824, "grad_norm": 0.021091727539896965, "learning_rate": 1.943911439114391e-05, "loss": 4.811231046915054e-05, "step": 283930 }, { "epoch": 80.59608288390577, "grad_norm": 0.10575748234987259, "learning_rate": 1.9436275901220552e-05, "loss": 4.696119576692581e-05, "step": 283940 }, { "epoch": 80.59892137382913, "grad_norm": 0.005002531688660383, "learning_rate": 1.943343741129719e-05, "loss": 2.61625275015831e-05, "step": 283950 }, { "epoch": 80.60175986375248, "grad_norm": 0.010658815503120422, "learning_rate": 1.943059892137383e-05, "loss": 2.3862533271312714e-05, "step": 283960 }, { "epoch": 80.60459835367584, "grad_norm": 0.0013230015756562352, "learning_rate": 1.942776043145047e-05, "loss": 1.54886394739151e-05, "step": 283970 }, { "epoch": 80.6074368435992, "grad_norm": 0.06286446750164032, "learning_rate": 1.9424921941527108e-05, "loss": 3.553386777639389e-05, "step": 283980 }, { "epoch": 80.61027533352257, "grad_norm": 0.009682166390120983, "learning_rate": 1.942208345160375e-05, "loss": 2.027619630098343e-05, "step": 283990 }, { "epoch": 80.61311382344593, "grad_norm": 0.006453599315136671, "learning_rate": 1.9419244961680387e-05, "loss": 3.658849745988846e-05, "step": 284000 }, { "epoch": 80.61311382344593, "eval_accuracy": 0.9870286767978635, "eval_loss": 0.052404917776584625, "eval_runtime": 35.7557, "eval_samples_per_second": 439.846, "eval_steps_per_second": 6.88, "step": 284000 }, { "epoch": 80.61595231336929, "grad_norm": 0.0018842873396351933, "learning_rate": 1.9416406471757025e-05, "loss": 9.364448487758636e-06, "step": 284010 }, { "epoch": 80.61879080329265, "grad_norm": 0.005830444395542145, "learning_rate": 1.9413567981833666e-05, "loss": 1.6460567712783815e-05, "step": 284020 }, { "epoch": 80.621629293216, "grad_norm": 0.0048731472343206406, "learning_rate": 1.9410729491910304e-05, "loss": 3.600064665079117e-05, "step": 284030 }, { "epoch": 80.62446778313937, "grad_norm": 0.0017451999010518193, "learning_rate": 1.9407891001986946e-05, "loss": 2.1554157137870787e-05, "step": 284040 }, { "epoch": 80.62730627306273, "grad_norm": 0.011760943569242954, "learning_rate": 1.940505251206358e-05, "loss": 0.00042345598340034487, "step": 284050 }, { "epoch": 80.63014476298609, "grad_norm": 0.03586691617965698, "learning_rate": 1.9402214022140222e-05, "loss": 0.0001864314079284668, "step": 284060 }, { "epoch": 80.63298325290945, "grad_norm": 0.002891644835472107, "learning_rate": 1.9399375532216863e-05, "loss": 2.0032748579978943e-05, "step": 284070 }, { "epoch": 80.63582174283282, "grad_norm": 0.0029341112822294235, "learning_rate": 1.93965370422935e-05, "loss": 1.9900500774383545e-05, "step": 284080 }, { "epoch": 80.63866023275618, "grad_norm": 0.0012347400188446045, "learning_rate": 1.939369855237014e-05, "loss": 0.0001553663983941078, "step": 284090 }, { "epoch": 80.64149872267953, "grad_norm": 0.0011371554573997855, "learning_rate": 1.9390860062446777e-05, "loss": 1.887790858745575e-05, "step": 284100 }, { "epoch": 80.64433721260289, "grad_norm": 0.002386554377153516, "learning_rate": 1.938802157252342e-05, "loss": 0.00021147243678569793, "step": 284110 }, { "epoch": 80.64717570252625, "grad_norm": 0.06097172945737839, "learning_rate": 1.938518308260006e-05, "loss": 0.00021035484969615935, "step": 284120 }, { "epoch": 80.65001419244962, "grad_norm": 1.7764596939086914, "learning_rate": 1.9382344592676695e-05, "loss": 0.00034977328032255174, "step": 284130 }, { "epoch": 80.65285268237298, "grad_norm": 0.000814924540463835, "learning_rate": 1.9379506102753336e-05, "loss": 7.80126079916954e-05, "step": 284140 }, { "epoch": 80.65569117229634, "grad_norm": 0.008649351075291634, "learning_rate": 1.9376667612829974e-05, "loss": 3.619883209466934e-05, "step": 284150 }, { "epoch": 80.65852966221969, "grad_norm": 3.3787271976470947, "learning_rate": 1.9373829122906615e-05, "loss": 0.000456228107213974, "step": 284160 }, { "epoch": 80.66136815214305, "grad_norm": 0.007045924197882414, "learning_rate": 1.9370990632983253e-05, "loss": 0.00013948995620012284, "step": 284170 }, { "epoch": 80.66420664206642, "grad_norm": 0.009851045906543732, "learning_rate": 1.936815214305989e-05, "loss": 4.2918696999549864e-05, "step": 284180 }, { "epoch": 80.66704513198978, "grad_norm": 0.008788729086518288, "learning_rate": 1.9365313653136533e-05, "loss": 0.006033638492226601, "step": 284190 }, { "epoch": 80.66988362191314, "grad_norm": 0.003300946205854416, "learning_rate": 1.9362475163213174e-05, "loss": 1.764800399541855e-05, "step": 284200 }, { "epoch": 80.6727221118365, "grad_norm": 0.0022373350802809, "learning_rate": 1.9359636673289812e-05, "loss": 1.944638788700104e-05, "step": 284210 }, { "epoch": 80.67556060175987, "grad_norm": 0.0017381769139319658, "learning_rate": 1.935679818336645e-05, "loss": 4.5851431787014006e-05, "step": 284220 }, { "epoch": 80.67839909168322, "grad_norm": 0.0059103225357830524, "learning_rate": 1.9353959693443088e-05, "loss": 1.9133836030960082e-05, "step": 284230 }, { "epoch": 80.68123758160658, "grad_norm": 0.0013407691149041057, "learning_rate": 1.935112120351973e-05, "loss": 2.2026896476745606e-05, "step": 284240 }, { "epoch": 80.68407607152994, "grad_norm": 0.0013712850632146, "learning_rate": 1.934828271359637e-05, "loss": 8.869059383869171e-05, "step": 284250 }, { "epoch": 80.6869145614533, "grad_norm": 0.0014738183235749602, "learning_rate": 1.9345444223673005e-05, "loss": 3.922842442989349e-05, "step": 284260 }, { "epoch": 80.68975305137667, "grad_norm": 0.00311077362857759, "learning_rate": 1.9342605733749647e-05, "loss": 3.2156333327293395e-05, "step": 284270 }, { "epoch": 80.69259154130003, "grad_norm": 0.009509770199656487, "learning_rate": 1.9339767243826285e-05, "loss": 0.00016642455011606216, "step": 284280 }, { "epoch": 80.6954300312234, "grad_norm": 0.0529690645635128, "learning_rate": 1.9336928753902926e-05, "loss": 5.23347407579422e-05, "step": 284290 }, { "epoch": 80.69826852114674, "grad_norm": 0.08246760815382004, "learning_rate": 1.9334090263979564e-05, "loss": 8.748993277549744e-05, "step": 284300 }, { "epoch": 80.7011070110701, "grad_norm": 0.012613828293979168, "learning_rate": 1.9331251774056202e-05, "loss": 4.2255222797393796e-05, "step": 284310 }, { "epoch": 80.70394550099347, "grad_norm": 0.0004385569191072136, "learning_rate": 1.9328413284132843e-05, "loss": 4.240870475769043e-05, "step": 284320 }, { "epoch": 80.70678399091683, "grad_norm": 0.0003796062374021858, "learning_rate": 1.932557479420948e-05, "loss": 2.5621429085731506e-05, "step": 284330 }, { "epoch": 80.7096224808402, "grad_norm": 0.017664780840277672, "learning_rate": 1.932273630428612e-05, "loss": 1.6162358224391938e-05, "step": 284340 }, { "epoch": 80.71246097076356, "grad_norm": 0.0011390880681574345, "learning_rate": 1.931989781436276e-05, "loss": 2.587437629699707e-05, "step": 284350 }, { "epoch": 80.71529946068692, "grad_norm": 0.011765364557504654, "learning_rate": 1.93170593244394e-05, "loss": 9.817052632570267e-05, "step": 284360 }, { "epoch": 80.71813795061027, "grad_norm": 0.18141569197177887, "learning_rate": 1.931422083451604e-05, "loss": 6.013102829456329e-05, "step": 284370 }, { "epoch": 80.72097644053363, "grad_norm": 0.3193526566028595, "learning_rate": 1.9311382344592678e-05, "loss": 8.903555572032928e-05, "step": 284380 }, { "epoch": 80.723814930457, "grad_norm": 0.004972080234438181, "learning_rate": 1.9308543854669316e-05, "loss": 1.3998337090015412e-05, "step": 284390 }, { "epoch": 80.72665342038036, "grad_norm": 0.00482054241001606, "learning_rate": 1.9305705364745957e-05, "loss": 1.692064106464386e-05, "step": 284400 }, { "epoch": 80.72949191030372, "grad_norm": 0.0019364507170394063, "learning_rate": 1.9302866874822595e-05, "loss": 2.7105584740638734e-05, "step": 284410 }, { "epoch": 80.73233040022708, "grad_norm": 0.006456919480115175, "learning_rate": 1.9300028384899237e-05, "loss": 2.0663999021053314e-05, "step": 284420 }, { "epoch": 80.73516889015043, "grad_norm": 0.0008680199389345944, "learning_rate": 1.929718989497587e-05, "loss": 9.028427302837373e-06, "step": 284430 }, { "epoch": 80.7380073800738, "grad_norm": 0.0016182780964300036, "learning_rate": 1.9294351405052513e-05, "loss": 3.4344382584095e-05, "step": 284440 }, { "epoch": 80.74084586999716, "grad_norm": 0.007968052290380001, "learning_rate": 1.9291512915129154e-05, "loss": 1.3569556176662446e-05, "step": 284450 }, { "epoch": 80.74368435992052, "grad_norm": 0.0016977479681372643, "learning_rate": 1.9288674425205792e-05, "loss": 1.8386170268058777e-05, "step": 284460 }, { "epoch": 80.74652284984388, "grad_norm": 0.0017460314556956291, "learning_rate": 1.928583593528243e-05, "loss": 9.43802297115326e-06, "step": 284470 }, { "epoch": 80.74936133976725, "grad_norm": 0.011997719295322895, "learning_rate": 1.9282997445359068e-05, "loss": 1.634843647480011e-05, "step": 284480 }, { "epoch": 80.75219982969061, "grad_norm": 0.00512357335537672, "learning_rate": 1.928015895543571e-05, "loss": 1.955479383468628e-05, "step": 284490 }, { "epoch": 80.75503831961396, "grad_norm": 0.0030096471309661865, "learning_rate": 1.927732046551235e-05, "loss": 5.383864045143128e-05, "step": 284500 }, { "epoch": 80.75503831961396, "eval_accuracy": 0.9862656577859732, "eval_loss": 0.05294109880924225, "eval_runtime": 35.395, "eval_samples_per_second": 444.328, "eval_steps_per_second": 6.95, "step": 284500 }, { "epoch": 80.75787680953732, "grad_norm": 0.001716318540275097, "learning_rate": 1.927448197558899e-05, "loss": 8.66670161485672e-06, "step": 284510 }, { "epoch": 80.76071529946068, "grad_norm": 0.0024229229893535376, "learning_rate": 1.9271643485665627e-05, "loss": 1.7579272389411925e-05, "step": 284520 }, { "epoch": 80.76355378938405, "grad_norm": 0.07207883894443512, "learning_rate": 1.9268804995742265e-05, "loss": 5.194302648305893e-05, "step": 284530 }, { "epoch": 80.76639227930741, "grad_norm": 0.0010096024489030242, "learning_rate": 1.9265966505818906e-05, "loss": 2.028457820415497e-05, "step": 284540 }, { "epoch": 80.76923076923077, "grad_norm": 0.0010737852426245809, "learning_rate": 1.9263128015895544e-05, "loss": 1.8611736595630646e-05, "step": 284550 }, { "epoch": 80.77206925915414, "grad_norm": 0.0009593295981176198, "learning_rate": 1.9260289525972182e-05, "loss": 1.2169033288955688e-05, "step": 284560 }, { "epoch": 80.77490774907749, "grad_norm": 0.003699293127283454, "learning_rate": 1.9257451036048824e-05, "loss": 1.9428879022598265e-05, "step": 284570 }, { "epoch": 80.77774623900085, "grad_norm": 0.005352908279746771, "learning_rate": 1.925461254612546e-05, "loss": 2.3667514324188232e-05, "step": 284580 }, { "epoch": 80.78058472892421, "grad_norm": 0.0015002908185124397, "learning_rate": 1.9251774056202103e-05, "loss": 1.6494281589984892e-05, "step": 284590 }, { "epoch": 80.78342321884757, "grad_norm": 0.0010955720208585262, "learning_rate": 1.924893556627874e-05, "loss": 1.8616952002048492e-05, "step": 284600 }, { "epoch": 80.78626170877094, "grad_norm": 0.002444178331643343, "learning_rate": 1.924609707635538e-05, "loss": 1.5276670455932618e-05, "step": 284610 }, { "epoch": 80.7891001986943, "grad_norm": 0.005773458164185286, "learning_rate": 1.924325858643202e-05, "loss": 2.4209730327129364e-05, "step": 284620 }, { "epoch": 80.79193868861765, "grad_norm": 0.0018709703581407666, "learning_rate": 1.9240420096508658e-05, "loss": 6.9534406065940855e-06, "step": 284630 }, { "epoch": 80.79477717854101, "grad_norm": 0.0051488266326487064, "learning_rate": 1.9237581606585296e-05, "loss": 7.940325886011124e-05, "step": 284640 }, { "epoch": 80.79761566846437, "grad_norm": 0.0030943031888455153, "learning_rate": 1.9234743116661938e-05, "loss": 4.811398684978485e-05, "step": 284650 }, { "epoch": 80.80045415838774, "grad_norm": 0.00033470976632088423, "learning_rate": 1.9231904626738576e-05, "loss": 1.7413869500160217e-05, "step": 284660 }, { "epoch": 80.8032926483111, "grad_norm": 0.0029319515451788902, "learning_rate": 1.9229066136815217e-05, "loss": 1.8950924277305604e-05, "step": 284670 }, { "epoch": 80.80613113823446, "grad_norm": 0.00830126740038395, "learning_rate": 1.9226227646891855e-05, "loss": 2.3502483963966368e-05, "step": 284680 }, { "epoch": 80.80896962815783, "grad_norm": 0.001540178433060646, "learning_rate": 1.9223389156968493e-05, "loss": 2.7870945632457734e-05, "step": 284690 }, { "epoch": 80.81180811808117, "grad_norm": 0.003936118446290493, "learning_rate": 1.9220550667045134e-05, "loss": 1.4220550656318665e-05, "step": 284700 }, { "epoch": 80.81464660800454, "grad_norm": 0.00041233672527596354, "learning_rate": 1.9217712177121772e-05, "loss": 1.891888678073883e-05, "step": 284710 }, { "epoch": 80.8174850979279, "grad_norm": 0.005703164264559746, "learning_rate": 1.9214873687198414e-05, "loss": 3.0242465436458586e-05, "step": 284720 }, { "epoch": 80.82032358785126, "grad_norm": 0.017503900453448296, "learning_rate": 1.9212035197275048e-05, "loss": 4.4383294880390165e-05, "step": 284730 }, { "epoch": 80.82316207777463, "grad_norm": 0.0022645459976047277, "learning_rate": 1.920919670735169e-05, "loss": 8.2472525537014e-05, "step": 284740 }, { "epoch": 80.82600056769799, "grad_norm": 0.0033197160810232162, "learning_rate": 1.920635821742833e-05, "loss": 2.659279853105545e-05, "step": 284750 }, { "epoch": 80.82883905762135, "grad_norm": 0.0025984260719269514, "learning_rate": 1.920351972750497e-05, "loss": 2.2151507437229156e-05, "step": 284760 }, { "epoch": 80.8316775475447, "grad_norm": 0.0018361841794103384, "learning_rate": 1.9200681237581607e-05, "loss": 3.5760924220085144e-05, "step": 284770 }, { "epoch": 80.83451603746806, "grad_norm": 0.0012746206484735012, "learning_rate": 1.9197842747658245e-05, "loss": 0.001260816864669323, "step": 284780 }, { "epoch": 80.83735452739143, "grad_norm": 0.003278645221143961, "learning_rate": 1.9195004257734886e-05, "loss": 2.532508224248886e-05, "step": 284790 }, { "epoch": 80.84019301731479, "grad_norm": 0.031650543212890625, "learning_rate": 1.9192165767811528e-05, "loss": 2.361815422773361e-05, "step": 284800 }, { "epoch": 80.84303150723815, "grad_norm": 0.009619396179914474, "learning_rate": 1.9189327277888162e-05, "loss": 5.189254879951477e-05, "step": 284810 }, { "epoch": 80.84586999716151, "grad_norm": 0.002100695390254259, "learning_rate": 1.9186488787964804e-05, "loss": 0.0005475534126162529, "step": 284820 }, { "epoch": 80.84870848708488, "grad_norm": 0.0019152103923261166, "learning_rate": 1.918365029804144e-05, "loss": 2.9217451810836792e-05, "step": 284830 }, { "epoch": 80.85154697700823, "grad_norm": 0.0060312217101454735, "learning_rate": 1.9180811808118083e-05, "loss": 0.00022184792906045913, "step": 284840 }, { "epoch": 80.85438546693159, "grad_norm": 0.016772570088505745, "learning_rate": 1.917797331819472e-05, "loss": 2.0992383360862733e-05, "step": 284850 }, { "epoch": 80.85722395685495, "grad_norm": 0.0013141882373020053, "learning_rate": 1.917513482827136e-05, "loss": 0.0012759115546941758, "step": 284860 }, { "epoch": 80.86006244677831, "grad_norm": 0.007976123131811619, "learning_rate": 1.9172296338348e-05, "loss": 5.464367568492889e-05, "step": 284870 }, { "epoch": 80.86290093670168, "grad_norm": 0.004396217875182629, "learning_rate": 1.916945784842464e-05, "loss": 0.00015204045921564103, "step": 284880 }, { "epoch": 80.86573942662504, "grad_norm": 0.0033183738123625517, "learning_rate": 1.916661935850128e-05, "loss": 4.406031221151352e-05, "step": 284890 }, { "epoch": 80.86857791654839, "grad_norm": 0.0010638893581926823, "learning_rate": 1.9163780868577918e-05, "loss": 3.6363862454891206e-05, "step": 284900 }, { "epoch": 80.87141640647175, "grad_norm": 0.007786772679537535, "learning_rate": 1.9160942378654556e-05, "loss": 2.04259529709816e-05, "step": 284910 }, { "epoch": 80.87425489639512, "grad_norm": 0.005166396964341402, "learning_rate": 1.9158103888731197e-05, "loss": 1.47942453622818e-05, "step": 284920 }, { "epoch": 80.87709338631848, "grad_norm": 0.06098214164376259, "learning_rate": 1.9155265398807835e-05, "loss": 3.831833600997925e-05, "step": 284930 }, { "epoch": 80.87993187624184, "grad_norm": 0.00795820914208889, "learning_rate": 1.9152426908884473e-05, "loss": 2.364572137594223e-05, "step": 284940 }, { "epoch": 80.8827703661652, "grad_norm": 0.00943947397172451, "learning_rate": 1.9149588418961114e-05, "loss": 2.959687262773514e-05, "step": 284950 }, { "epoch": 80.88560885608857, "grad_norm": 0.00684537785127759, "learning_rate": 1.9146749929037752e-05, "loss": 2.4911202490329743e-05, "step": 284960 }, { "epoch": 80.88844734601192, "grad_norm": 0.001589752035215497, "learning_rate": 1.9143911439114394e-05, "loss": 1.0129064321517944e-05, "step": 284970 }, { "epoch": 80.89128583593528, "grad_norm": 0.005655441898852587, "learning_rate": 1.9141072949191032e-05, "loss": 1.864098012447357e-05, "step": 284980 }, { "epoch": 80.89412432585864, "grad_norm": 0.011894753202795982, "learning_rate": 1.913823445926767e-05, "loss": 2.1410733461380004e-05, "step": 284990 }, { "epoch": 80.896962815782, "grad_norm": 0.0074319192208349705, "learning_rate": 1.913539596934431e-05, "loss": 1.7582252621650695e-05, "step": 285000 }, { "epoch": 80.896962815782, "eval_accuracy": 0.9871558466331786, "eval_loss": 0.05239196494221687, "eval_runtime": 35.4227, "eval_samples_per_second": 443.981, "eval_steps_per_second": 6.945, "step": 285000 }, { "epoch": 80.89980130570537, "grad_norm": 0.011752213351428509, "learning_rate": 1.913255747942095e-05, "loss": 4.02817502617836e-05, "step": 285010 }, { "epoch": 80.90263979562873, "grad_norm": 0.006102048326283693, "learning_rate": 1.9129718989497587e-05, "loss": 2.8436817228794097e-05, "step": 285020 }, { "epoch": 80.90547828555209, "grad_norm": 0.01568003185093403, "learning_rate": 1.9126880499574225e-05, "loss": 2.5636516511440277e-05, "step": 285030 }, { "epoch": 80.90831677547544, "grad_norm": 0.0019295192323625088, "learning_rate": 1.9124042009650866e-05, "loss": 2.4945102632045744e-05, "step": 285040 }, { "epoch": 80.9111552653988, "grad_norm": 0.0073494440875947475, "learning_rate": 1.9121203519727508e-05, "loss": 1.9588321447372436e-05, "step": 285050 }, { "epoch": 80.91399375532217, "grad_norm": 0.0037547482643276453, "learning_rate": 1.9118365029804146e-05, "loss": 1.5509501099586487e-05, "step": 285060 }, { "epoch": 80.91683224524553, "grad_norm": 0.0006807704921811819, "learning_rate": 1.9115526539880784e-05, "loss": 1.9085034728050233e-05, "step": 285070 }, { "epoch": 80.91967073516889, "grad_norm": 0.0008588955388404429, "learning_rate": 1.9112688049957422e-05, "loss": 2.6965886354446412e-05, "step": 285080 }, { "epoch": 80.92250922509226, "grad_norm": 0.018689362332224846, "learning_rate": 1.9109849560034063e-05, "loss": 1.730937510728836e-05, "step": 285090 }, { "epoch": 80.92534771501562, "grad_norm": 1.8148826360702515, "learning_rate": 1.9107011070110705e-05, "loss": 0.000227375328540802, "step": 285100 }, { "epoch": 80.92818620493897, "grad_norm": 1.3798458576202393, "learning_rate": 1.910417258018734e-05, "loss": 0.00021742042154073715, "step": 285110 }, { "epoch": 80.93102469486233, "grad_norm": 0.0005088530597276986, "learning_rate": 1.910133409026398e-05, "loss": 0.0011846417561173438, "step": 285120 }, { "epoch": 80.9338631847857, "grad_norm": 0.004591942299157381, "learning_rate": 1.909849560034062e-05, "loss": 2.2813864052295685e-05, "step": 285130 }, { "epoch": 80.93670167470906, "grad_norm": 0.09562883526086807, "learning_rate": 1.909565711041726e-05, "loss": 5.462579429149628e-05, "step": 285140 }, { "epoch": 80.93954016463242, "grad_norm": 0.08886750042438507, "learning_rate": 1.9092818620493898e-05, "loss": 0.00010324567556381225, "step": 285150 }, { "epoch": 80.94237865455578, "grad_norm": 0.001528289751149714, "learning_rate": 1.9089980130570536e-05, "loss": 0.0023025128990411758, "step": 285160 }, { "epoch": 80.94521714447913, "grad_norm": 0.0027631395496428013, "learning_rate": 1.9087141640647177e-05, "loss": 0.0008384134620428085, "step": 285170 }, { "epoch": 80.9480556344025, "grad_norm": 0.00832153670489788, "learning_rate": 1.9084303150723815e-05, "loss": 1.3599358499050141e-05, "step": 285180 }, { "epoch": 80.95089412432586, "grad_norm": 0.00660673389211297, "learning_rate": 1.9081464660800457e-05, "loss": 9.181182831525802e-05, "step": 285190 }, { "epoch": 80.95373261424922, "grad_norm": 0.00805452186614275, "learning_rate": 1.9078626170877095e-05, "loss": 5.075465887784958e-05, "step": 285200 }, { "epoch": 80.95657110417258, "grad_norm": 0.007184869609773159, "learning_rate": 1.9075787680953733e-05, "loss": 0.00010658185929059982, "step": 285210 }, { "epoch": 80.95940959409594, "grad_norm": 0.003944877069443464, "learning_rate": 1.9072949191030374e-05, "loss": 0.001351816952228546, "step": 285220 }, { "epoch": 80.96224808401931, "grad_norm": 0.0019839347805827856, "learning_rate": 1.9070110701107012e-05, "loss": 5.093533545732498e-05, "step": 285230 }, { "epoch": 80.96508657394266, "grad_norm": 0.0018968413351103663, "learning_rate": 1.906727221118365e-05, "loss": 3.193728625774384e-05, "step": 285240 }, { "epoch": 80.96792506386602, "grad_norm": 0.00795601587742567, "learning_rate": 1.906443372126029e-05, "loss": 6.535034626722335e-05, "step": 285250 }, { "epoch": 80.97076355378938, "grad_norm": 0.0016019707545638084, "learning_rate": 1.906159523133693e-05, "loss": 6.535351276397705e-05, "step": 285260 }, { "epoch": 80.97360204371275, "grad_norm": 0.019133547320961952, "learning_rate": 1.905875674141357e-05, "loss": 0.0003447510302066803, "step": 285270 }, { "epoch": 80.97644053363611, "grad_norm": 0.012915031984448433, "learning_rate": 1.9055918251490205e-05, "loss": 0.00015251245349645615, "step": 285280 }, { "epoch": 80.97927902355947, "grad_norm": 0.012539342045783997, "learning_rate": 1.9053079761566847e-05, "loss": 0.00015334375202655793, "step": 285290 }, { "epoch": 80.98211751348283, "grad_norm": 0.014119812287390232, "learning_rate": 1.9050241271643488e-05, "loss": 0.0005785580724477768, "step": 285300 }, { "epoch": 80.98495600340618, "grad_norm": 0.033291589468717575, "learning_rate": 1.9047402781720126e-05, "loss": 0.00015410352498292922, "step": 285310 }, { "epoch": 80.98779449332955, "grad_norm": 0.0010235439985990524, "learning_rate": 1.9044564291796764e-05, "loss": 3.4051574766635893e-05, "step": 285320 }, { "epoch": 80.99063298325291, "grad_norm": 0.0015607503009960055, "learning_rate": 1.9041725801873402e-05, "loss": 1.4824420213699341e-05, "step": 285330 }, { "epoch": 80.99347147317627, "grad_norm": 0.0007951011066325009, "learning_rate": 1.9038887311950043e-05, "loss": 5.5786408483982083e-05, "step": 285340 }, { "epoch": 80.99630996309963, "grad_norm": 0.0036839263048022985, "learning_rate": 1.9036048822026685e-05, "loss": 2.4648942053318022e-05, "step": 285350 }, { "epoch": 80.999148453023, "grad_norm": 0.006965104024857283, "learning_rate": 1.9033210332103323e-05, "loss": 7.202867418527603e-05, "step": 285360 }, { "epoch": 81.00198694294635, "grad_norm": 0.0013855923898518085, "learning_rate": 1.903037184217996e-05, "loss": 2.60717497440055e-05, "step": 285370 }, { "epoch": 81.00482543286971, "grad_norm": 0.003212584648281336, "learning_rate": 1.90275333522566e-05, "loss": 1.700911670923233e-05, "step": 285380 }, { "epoch": 81.00766392279307, "grad_norm": 0.003839696291834116, "learning_rate": 1.902469486233324e-05, "loss": 2.8009526431560516e-05, "step": 285390 }, { "epoch": 81.01050241271643, "grad_norm": 0.004150889813899994, "learning_rate": 1.9021856372409878e-05, "loss": 1.636482775211334e-05, "step": 285400 }, { "epoch": 81.0133409026398, "grad_norm": 0.0037734711077064276, "learning_rate": 1.9019017882486516e-05, "loss": 2.8882734477519988e-05, "step": 285410 }, { "epoch": 81.01617939256316, "grad_norm": 0.006676630116999149, "learning_rate": 1.9016179392563157e-05, "loss": 1.3499893248081207e-05, "step": 285420 }, { "epoch": 81.01901788248652, "grad_norm": 0.0033306328114122152, "learning_rate": 1.9013340902639795e-05, "loss": 1.9798055291175842e-05, "step": 285430 }, { "epoch": 81.02185637240987, "grad_norm": 0.004243526607751846, "learning_rate": 1.9010502412716437e-05, "loss": 1.6124919056892395e-05, "step": 285440 }, { "epoch": 81.02469486233323, "grad_norm": 0.011348764412105083, "learning_rate": 1.9007663922793075e-05, "loss": 1.4645233750343323e-05, "step": 285450 }, { "epoch": 81.0275333522566, "grad_norm": 0.0019629551097750664, "learning_rate": 1.9004825432869713e-05, "loss": 1.47918239235878e-05, "step": 285460 }, { "epoch": 81.03037184217996, "grad_norm": 0.002710591536015272, "learning_rate": 1.9001986942946354e-05, "loss": 1.3430602848529816e-05, "step": 285470 }, { "epoch": 81.03321033210332, "grad_norm": 0.011001027189195156, "learning_rate": 1.8999148453022995e-05, "loss": 3.0193664133548737e-05, "step": 285480 }, { "epoch": 81.03604882202669, "grad_norm": 0.00025005984934978187, "learning_rate": 1.899630996309963e-05, "loss": 1.4176592230796814e-05, "step": 285490 }, { "epoch": 81.03888731195005, "grad_norm": 0.000967858883086592, "learning_rate": 1.899347147317627e-05, "loss": 2.4453923106193543e-05, "step": 285500 }, { "epoch": 81.03888731195005, "eval_accuracy": 0.9875373561391237, "eval_loss": 0.05177297815680504, "eval_runtime": 40.7077, "eval_samples_per_second": 386.34, "eval_steps_per_second": 6.043, "step": 285500 }, { "epoch": 81.0417258018734, "grad_norm": 0.0015055297408252954, "learning_rate": 1.899063298325291e-05, "loss": 2.2841617465019226e-05, "step": 285510 }, { "epoch": 81.04456429179676, "grad_norm": 0.014144614338874817, "learning_rate": 1.898779449332955e-05, "loss": 1.197177916765213e-05, "step": 285520 }, { "epoch": 81.04740278172012, "grad_norm": 0.0048371306620538235, "learning_rate": 1.898495600340619e-05, "loss": 1.3942830264568329e-05, "step": 285530 }, { "epoch": 81.05024127164349, "grad_norm": 0.0018994436832144856, "learning_rate": 1.8982117513482827e-05, "loss": 1.6379356384277344e-05, "step": 285540 }, { "epoch": 81.05307976156685, "grad_norm": 0.0442471019923687, "learning_rate": 1.8979279023559468e-05, "loss": 3.877989947795868e-05, "step": 285550 }, { "epoch": 81.05591825149021, "grad_norm": 0.011102864518761635, "learning_rate": 1.8976440533636106e-05, "loss": 0.0004997439682483673, "step": 285560 }, { "epoch": 81.05875674141357, "grad_norm": 0.004493041429668665, "learning_rate": 1.8973602043712748e-05, "loss": 9.762756526470185e-05, "step": 285570 }, { "epoch": 81.06159523133692, "grad_norm": 0.0017703683115541935, "learning_rate": 1.8970763553789386e-05, "loss": 4.828684031963348e-05, "step": 285580 }, { "epoch": 81.06443372126029, "grad_norm": 0.0007903585792519152, "learning_rate": 1.8967925063866024e-05, "loss": 3.425981849431991e-05, "step": 285590 }, { "epoch": 81.06727221118365, "grad_norm": 0.015409216284751892, "learning_rate": 1.8965086573942665e-05, "loss": 6.936341524124145e-05, "step": 285600 }, { "epoch": 81.07011070110701, "grad_norm": 0.003175349673256278, "learning_rate": 1.8962248084019303e-05, "loss": 6.942395120859146e-05, "step": 285610 }, { "epoch": 81.07294919103038, "grad_norm": 0.025682931765913963, "learning_rate": 1.895940959409594e-05, "loss": 1.5569478273391722e-05, "step": 285620 }, { "epoch": 81.07578768095374, "grad_norm": 0.018721893429756165, "learning_rate": 1.8956571104172582e-05, "loss": 8.067097514867783e-05, "step": 285630 }, { "epoch": 81.07862617087709, "grad_norm": 0.028133900836110115, "learning_rate": 1.895373261424922e-05, "loss": 6.642788648605346e-05, "step": 285640 }, { "epoch": 81.08146466080045, "grad_norm": 0.02226395159959793, "learning_rate": 1.895089412432586e-05, "loss": 2.0020082592964174e-05, "step": 285650 }, { "epoch": 81.08430315072381, "grad_norm": 0.028099017217755318, "learning_rate": 1.8948055634402496e-05, "loss": 2.7233175933361055e-05, "step": 285660 }, { "epoch": 81.08714164064718, "grad_norm": 0.0010367146460339427, "learning_rate": 1.8945217144479138e-05, "loss": 6.121788173913955e-05, "step": 285670 }, { "epoch": 81.08998013057054, "grad_norm": 0.016659852117300034, "learning_rate": 1.894237865455578e-05, "loss": 1.666974276304245e-05, "step": 285680 }, { "epoch": 81.0928186204939, "grad_norm": 0.0011145196622237563, "learning_rate": 1.8939540164632417e-05, "loss": 3.125164657831192e-05, "step": 285690 }, { "epoch": 81.09565711041726, "grad_norm": 0.005532930139452219, "learning_rate": 1.8936701674709055e-05, "loss": 1.1081807315349579e-05, "step": 285700 }, { "epoch": 81.09849560034061, "grad_norm": 0.002834843937307596, "learning_rate": 1.8933863184785693e-05, "loss": 1.6244500875473024e-05, "step": 285710 }, { "epoch": 81.10133409026398, "grad_norm": 0.0008281400077976286, "learning_rate": 1.8931024694862334e-05, "loss": 1.2736395001411438e-05, "step": 285720 }, { "epoch": 81.10417258018734, "grad_norm": 0.00045030107139609754, "learning_rate": 1.8928186204938976e-05, "loss": 1.919697970151901e-05, "step": 285730 }, { "epoch": 81.1070110701107, "grad_norm": 0.0005027637234888971, "learning_rate": 1.8925347715015614e-05, "loss": 1.3032928109169006e-05, "step": 285740 }, { "epoch": 81.10984956003406, "grad_norm": 0.10009705275297165, "learning_rate": 1.892250922509225e-05, "loss": 2.430025488138199e-05, "step": 285750 }, { "epoch": 81.11268804995743, "grad_norm": 0.0009051103261299431, "learning_rate": 1.891967073516889e-05, "loss": 1.821443438529968e-05, "step": 285760 }, { "epoch": 81.11552653988079, "grad_norm": 0.006431158632040024, "learning_rate": 1.891683224524553e-05, "loss": 1.5463680028915405e-05, "step": 285770 }, { "epoch": 81.11836502980414, "grad_norm": 0.008769252337515354, "learning_rate": 1.8913993755322172e-05, "loss": 1.1651404201984406e-05, "step": 285780 }, { "epoch": 81.1212035197275, "grad_norm": 0.0012325274292379618, "learning_rate": 1.8911155265398807e-05, "loss": 2.26801261305809e-05, "step": 285790 }, { "epoch": 81.12404200965086, "grad_norm": 0.0008388670976273715, "learning_rate": 1.890831677547545e-05, "loss": 1.2083165347576142e-05, "step": 285800 }, { "epoch": 81.12688049957423, "grad_norm": 0.007179551757872105, "learning_rate": 1.8905478285552086e-05, "loss": 1.711566001176834e-05, "step": 285810 }, { "epoch": 81.12971898949759, "grad_norm": 0.0012794923968613148, "learning_rate": 1.8902639795628728e-05, "loss": 2.0507536828517913e-05, "step": 285820 }, { "epoch": 81.13255747942095, "grad_norm": 0.0011574729578569531, "learning_rate": 1.8899801305705366e-05, "loss": 1.1659227311611176e-05, "step": 285830 }, { "epoch": 81.13539596934432, "grad_norm": 0.0008727122913114727, "learning_rate": 1.8896962815782004e-05, "loss": 1.766663044691086e-05, "step": 285840 }, { "epoch": 81.13823445926766, "grad_norm": 0.0019633404444903135, "learning_rate": 1.8894124325858645e-05, "loss": 1.1090748012065888e-05, "step": 285850 }, { "epoch": 81.14107294919103, "grad_norm": 0.004857672844082117, "learning_rate": 1.8891285835935283e-05, "loss": 8.453056216239929e-06, "step": 285860 }, { "epoch": 81.14391143911439, "grad_norm": 0.004411458503454924, "learning_rate": 1.888844734601192e-05, "loss": 1.606326550245285e-05, "step": 285870 }, { "epoch": 81.14674992903775, "grad_norm": 0.0009092538966797292, "learning_rate": 1.8885608856088562e-05, "loss": 1.1921115219593049e-05, "step": 285880 }, { "epoch": 81.14958841896112, "grad_norm": 0.0004512554151006043, "learning_rate": 1.88827703661652e-05, "loss": 8.022654801607133e-05, "step": 285890 }, { "epoch": 81.15242690888448, "grad_norm": 0.0038454746827483177, "learning_rate": 1.8879931876241842e-05, "loss": 2.269148826599121e-05, "step": 285900 }, { "epoch": 81.15526539880783, "grad_norm": 0.0026476962957531214, "learning_rate": 1.887709338631848e-05, "loss": 0.00010802187025547028, "step": 285910 }, { "epoch": 81.15810388873119, "grad_norm": 0.14371508359909058, "learning_rate": 1.8874254896395118e-05, "loss": 4.059355705976486e-05, "step": 285920 }, { "epoch": 81.16094237865455, "grad_norm": 0.053249165415763855, "learning_rate": 1.887141640647176e-05, "loss": 5.183275789022446e-05, "step": 285930 }, { "epoch": 81.16378086857792, "grad_norm": 0.003575024427846074, "learning_rate": 1.8868577916548397e-05, "loss": 1.216791570186615e-05, "step": 285940 }, { "epoch": 81.16661935850128, "grad_norm": 0.007199988700449467, "learning_rate": 1.886573942662504e-05, "loss": 1.628994941711426e-05, "step": 285950 }, { "epoch": 81.16945784842464, "grad_norm": 0.0025675075594335794, "learning_rate": 1.8862900936701673e-05, "loss": 2.9052607715129852e-05, "step": 285960 }, { "epoch": 81.172296338348, "grad_norm": 0.001740945503115654, "learning_rate": 1.8860062446778314e-05, "loss": 1.2152083218097687e-05, "step": 285970 }, { "epoch": 81.17513482827135, "grad_norm": 0.0041473861783742905, "learning_rate": 1.8857223956854956e-05, "loss": 2.1521002054214478e-05, "step": 285980 }, { "epoch": 81.17797331819472, "grad_norm": 0.0030757326167076826, "learning_rate": 1.8854385466931594e-05, "loss": 1.4227069914340974e-05, "step": 285990 }, { "epoch": 81.18081180811808, "grad_norm": 0.00010528161510592327, "learning_rate": 1.8851546977008232e-05, "loss": 1.1499226093292237e-05, "step": 286000 }, { "epoch": 81.18081180811808, "eval_accuracy": 0.9865199974566033, "eval_loss": 0.05557217076420784, "eval_runtime": 35.8729, "eval_samples_per_second": 438.408, "eval_steps_per_second": 6.858, "step": 286000 }, { "epoch": 81.18365029804144, "grad_norm": 0.002999783493578434, "learning_rate": 1.884870848708487e-05, "loss": 3.7765130400657654e-05, "step": 286010 }, { "epoch": 81.1864887879648, "grad_norm": 0.002220551948994398, "learning_rate": 1.884586999716151e-05, "loss": 1.644529402256012e-05, "step": 286020 }, { "epoch": 81.18932727788817, "grad_norm": 0.0020157499238848686, "learning_rate": 1.8843031507238153e-05, "loss": 5.650799721479416e-05, "step": 286030 }, { "epoch": 81.19216576781153, "grad_norm": 0.001081197871826589, "learning_rate": 1.884019301731479e-05, "loss": 1.8233992159366606e-05, "step": 286040 }, { "epoch": 81.19500425773488, "grad_norm": 0.001700687687844038, "learning_rate": 1.883735452739143e-05, "loss": 1.331530511379242e-05, "step": 286050 }, { "epoch": 81.19784274765824, "grad_norm": 0.003985812421888113, "learning_rate": 1.8834516037468066e-05, "loss": 1.7536431550979613e-05, "step": 286060 }, { "epoch": 81.2006812375816, "grad_norm": 0.0036469141487032175, "learning_rate": 1.8831677547544708e-05, "loss": 1.0212324559688568e-05, "step": 286070 }, { "epoch": 81.20351972750497, "grad_norm": 0.06732100993394852, "learning_rate": 1.8828839057621346e-05, "loss": 1.9499287009239197e-05, "step": 286080 }, { "epoch": 81.20635821742833, "grad_norm": 0.0009199007763527334, "learning_rate": 1.8826000567697984e-05, "loss": 3.080274909734726e-05, "step": 286090 }, { "epoch": 81.2091967073517, "grad_norm": 0.004839306231588125, "learning_rate": 1.8823162077774625e-05, "loss": 1.1280551552772522e-05, "step": 286100 }, { "epoch": 81.21203519727504, "grad_norm": 0.008249304257333279, "learning_rate": 1.8820323587851263e-05, "loss": 1.3046152889728547e-05, "step": 286110 }, { "epoch": 81.2148736871984, "grad_norm": 0.0030294773168861866, "learning_rate": 1.8817485097927905e-05, "loss": 2.773292362689972e-05, "step": 286120 }, { "epoch": 81.21771217712177, "grad_norm": 0.0009307425934821367, "learning_rate": 1.8814646608004543e-05, "loss": 9.077601134777069e-06, "step": 286130 }, { "epoch": 81.22055066704513, "grad_norm": 0.009658437222242355, "learning_rate": 1.881180811808118e-05, "loss": 1.233946532011032e-05, "step": 286140 }, { "epoch": 81.2233891569685, "grad_norm": 0.006278397981077433, "learning_rate": 1.8808969628157822e-05, "loss": 1.4452636241912841e-05, "step": 286150 }, { "epoch": 81.22622764689186, "grad_norm": 0.0023043763358145952, "learning_rate": 1.880613113823446e-05, "loss": 7.760711014270782e-06, "step": 286160 }, { "epoch": 81.22906613681522, "grad_norm": 0.000801486661657691, "learning_rate": 1.8803292648311098e-05, "loss": 1.1742115020751953e-05, "step": 286170 }, { "epoch": 81.23190462673857, "grad_norm": 0.0013092693407088518, "learning_rate": 1.880045415838774e-05, "loss": 1.6955099999904634e-05, "step": 286180 }, { "epoch": 81.23474311666193, "grad_norm": 0.014996721409261227, "learning_rate": 1.8797615668464377e-05, "loss": 1.663845032453537e-05, "step": 286190 }, { "epoch": 81.2375816065853, "grad_norm": 0.0017475822241976857, "learning_rate": 1.879477717854102e-05, "loss": 2.115778625011444e-05, "step": 286200 }, { "epoch": 81.24042009650866, "grad_norm": 0.03817865625023842, "learning_rate": 1.8791938688617657e-05, "loss": 1.5774369239807128e-05, "step": 286210 }, { "epoch": 81.24325858643202, "grad_norm": 0.0008001840906217694, "learning_rate": 1.8789100198694295e-05, "loss": 2.001337707042694e-05, "step": 286220 }, { "epoch": 81.24609707635538, "grad_norm": 0.0033952912781387568, "learning_rate": 1.8786261708770936e-05, "loss": 1.0561570525169372e-05, "step": 286230 }, { "epoch": 81.24893556627875, "grad_norm": 0.001083230716176331, "learning_rate": 1.8783423218847574e-05, "loss": 1.595504581928253e-05, "step": 286240 }, { "epoch": 81.2517740562021, "grad_norm": 0.00286016589961946, "learning_rate": 1.8780584728924215e-05, "loss": 1.5024840831756591e-05, "step": 286250 }, { "epoch": 81.25461254612546, "grad_norm": 0.001082881004549563, "learning_rate": 1.877774623900085e-05, "loss": 8.704513311386108e-06, "step": 286260 }, { "epoch": 81.25745103604882, "grad_norm": 0.0018975295824930072, "learning_rate": 1.877490774907749e-05, "loss": 2.2610649466514587e-05, "step": 286270 }, { "epoch": 81.26028952597218, "grad_norm": 0.0012142442865297198, "learning_rate": 1.8772069259154133e-05, "loss": 1.0571815073490142e-05, "step": 286280 }, { "epoch": 81.26312801589555, "grad_norm": 0.005328564438968897, "learning_rate": 1.876923076923077e-05, "loss": 1.1822208762168884e-05, "step": 286290 }, { "epoch": 81.26596650581891, "grad_norm": 0.002242245012894273, "learning_rate": 1.876639227930741e-05, "loss": 1.3593025505542755e-05, "step": 286300 }, { "epoch": 81.26880499574227, "grad_norm": 0.0032011899165809155, "learning_rate": 1.8763553789384047e-05, "loss": 6.73588365316391e-06, "step": 286310 }, { "epoch": 81.27164348566562, "grad_norm": 0.0012940653832629323, "learning_rate": 1.8760715299460688e-05, "loss": 7.128901779651642e-06, "step": 286320 }, { "epoch": 81.27448197558898, "grad_norm": 0.0014903099508956075, "learning_rate": 1.875787680953733e-05, "loss": 1.1603906750679016e-05, "step": 286330 }, { "epoch": 81.27732046551235, "grad_norm": 0.002755678491666913, "learning_rate": 1.8755038319613964e-05, "loss": 1.1741369962692261e-05, "step": 286340 }, { "epoch": 81.28015895543571, "grad_norm": 0.0029886660631746054, "learning_rate": 1.8752199829690605e-05, "loss": 1.2735649943351745e-05, "step": 286350 }, { "epoch": 81.28299744535907, "grad_norm": 0.0018299035727977753, "learning_rate": 1.8749361339767243e-05, "loss": 1.0478124022483825e-05, "step": 286360 }, { "epoch": 81.28583593528244, "grad_norm": 0.024379799142479897, "learning_rate": 1.8746522849843885e-05, "loss": 1.578181982040405e-05, "step": 286370 }, { "epoch": 81.28867442520578, "grad_norm": 0.01358880940824747, "learning_rate": 1.8743684359920523e-05, "loss": 1.3097189366817474e-05, "step": 286380 }, { "epoch": 81.29151291512915, "grad_norm": 0.0032862855587154627, "learning_rate": 1.874084586999716e-05, "loss": 1.1021271347999573e-05, "step": 286390 }, { "epoch": 81.29435140505251, "grad_norm": 0.0003825776802841574, "learning_rate": 1.8738007380073802e-05, "loss": 1.1126697063446046e-05, "step": 286400 }, { "epoch": 81.29718989497587, "grad_norm": 0.013791554607450962, "learning_rate": 1.873516889015044e-05, "loss": 1.3164617121219635e-05, "step": 286410 }, { "epoch": 81.30002838489924, "grad_norm": 0.011170154437422752, "learning_rate": 1.873233040022708e-05, "loss": 1.2775324285030365e-05, "step": 286420 }, { "epoch": 81.3028668748226, "grad_norm": 0.0018294417532160878, "learning_rate": 1.872949191030372e-05, "loss": 0.00018396973609924316, "step": 286430 }, { "epoch": 81.30570536474596, "grad_norm": 0.027154590934515, "learning_rate": 1.8726653420380357e-05, "loss": 0.0012651430442929268, "step": 286440 }, { "epoch": 81.30854385466931, "grad_norm": 0.0036151448730379343, "learning_rate": 1.8723814930457e-05, "loss": 9.26293432712555e-06, "step": 286450 }, { "epoch": 81.31138234459267, "grad_norm": 0.0038319635204970837, "learning_rate": 1.8720976440533637e-05, "loss": 1.9065290689468384e-05, "step": 286460 }, { "epoch": 81.31422083451604, "grad_norm": 0.0011048143496736884, "learning_rate": 1.8718137950610275e-05, "loss": 7.5643882155418394e-06, "step": 286470 }, { "epoch": 81.3170593244394, "grad_norm": 0.000465274672023952, "learning_rate": 1.8715299460686916e-05, "loss": 1.0466761887073517e-05, "step": 286480 }, { "epoch": 81.31989781436276, "grad_norm": 0.0006773764034733176, "learning_rate": 1.8712460970763554e-05, "loss": 1.1949241161346435e-05, "step": 286490 }, { "epoch": 81.32273630428612, "grad_norm": 0.0021289538126438856, "learning_rate": 1.8709622480840195e-05, "loss": 1.2886524200439452e-05, "step": 286500 }, { "epoch": 81.32273630428612, "eval_accuracy": 0.9872194315508361, "eval_loss": 0.05240129306912422, "eval_runtime": 35.5154, "eval_samples_per_second": 442.822, "eval_steps_per_second": 6.927, "step": 286500 }, { "epoch": 81.32557479420949, "grad_norm": 0.0010907694231718779, "learning_rate": 1.8706783990916833e-05, "loss": 4.6741217374801636e-05, "step": 286510 }, { "epoch": 81.32841328413284, "grad_norm": 0.03417312726378441, "learning_rate": 1.870394550099347e-05, "loss": 1.5286728739738464e-05, "step": 286520 }, { "epoch": 81.3312517740562, "grad_norm": 0.00037514802534133196, "learning_rate": 1.8701107011070113e-05, "loss": 1.406986266374588e-05, "step": 286530 }, { "epoch": 81.33409026397956, "grad_norm": 0.0006419955170713365, "learning_rate": 1.869826852114675e-05, "loss": 9.284727275371551e-06, "step": 286540 }, { "epoch": 81.33692875390292, "grad_norm": 0.0018256028415635228, "learning_rate": 1.869543003122339e-05, "loss": 7.164720445871354e-05, "step": 286550 }, { "epoch": 81.33976724382629, "grad_norm": 0.002111246110871434, "learning_rate": 1.8692591541300027e-05, "loss": 0.00025096070021390915, "step": 286560 }, { "epoch": 81.34260573374965, "grad_norm": 0.013552837073802948, "learning_rate": 1.8689753051376668e-05, "loss": 1.9565224647521973e-05, "step": 286570 }, { "epoch": 81.34544422367301, "grad_norm": 0.0011816787300631404, "learning_rate": 1.868691456145331e-05, "loss": 0.0003514908254146576, "step": 286580 }, { "epoch": 81.34828271359636, "grad_norm": 0.007277514319866896, "learning_rate": 1.8684076071529948e-05, "loss": 0.0007814383134245872, "step": 286590 }, { "epoch": 81.35112120351972, "grad_norm": 0.019373638555407524, "learning_rate": 1.8681237581606586e-05, "loss": 0.00760510191321373, "step": 286600 }, { "epoch": 81.35395969344309, "grad_norm": 0.002268032170832157, "learning_rate": 1.8678682940675562e-05, "loss": 0.022590574622154237, "step": 286610 }, { "epoch": 81.35679818336645, "grad_norm": 0.0011166390031576157, "learning_rate": 1.86758444507522e-05, "loss": 0.0023207526654005052, "step": 286620 }, { "epoch": 81.35963667328981, "grad_norm": 25.076486587524414, "learning_rate": 1.867300596082884e-05, "loss": 0.007056169956922531, "step": 286630 }, { "epoch": 81.36247516321318, "grad_norm": 0.024054808542132378, "learning_rate": 1.867016747090548e-05, "loss": 0.0002465277910232544, "step": 286640 }, { "epoch": 81.36531365313652, "grad_norm": 0.4948144257068634, "learning_rate": 1.8667328980982117e-05, "loss": 0.0028407499194145203, "step": 286650 }, { "epoch": 81.36815214305989, "grad_norm": 0.03292900696396828, "learning_rate": 1.866449049105876e-05, "loss": 0.00010453350841999054, "step": 286660 }, { "epoch": 81.37099063298325, "grad_norm": 0.0031628909055143595, "learning_rate": 1.8661652001135397e-05, "loss": 0.0015165120363235473, "step": 286670 }, { "epoch": 81.37382912290661, "grad_norm": 0.01684965379536152, "learning_rate": 1.8658813511212038e-05, "loss": 9.335596114397049e-05, "step": 286680 }, { "epoch": 81.37666761282998, "grad_norm": 0.021484503522515297, "learning_rate": 1.8655975021288673e-05, "loss": 0.0003015158697962761, "step": 286690 }, { "epoch": 81.37950610275334, "grad_norm": 0.009705581702291965, "learning_rate": 1.8653136531365314e-05, "loss": 0.0001326562836766243, "step": 286700 }, { "epoch": 81.3823445926767, "grad_norm": 0.05960075184702873, "learning_rate": 1.8650298041441955e-05, "loss": 0.00018140580505132675, "step": 286710 }, { "epoch": 81.38518308260005, "grad_norm": 0.005301442462950945, "learning_rate": 1.8647459551518593e-05, "loss": 9.48360189795494e-05, "step": 286720 }, { "epoch": 81.38802157252341, "grad_norm": 0.005008009262382984, "learning_rate": 1.864462106159523e-05, "loss": 2.8334744274616242e-05, "step": 286730 }, { "epoch": 81.39086006244678, "grad_norm": 0.0023287911899387836, "learning_rate": 1.864178257167187e-05, "loss": 1.4089234173297883e-05, "step": 286740 }, { "epoch": 81.39369855237014, "grad_norm": 0.09918241947889328, "learning_rate": 1.863894408174851e-05, "loss": 4.0938891470432284e-05, "step": 286750 }, { "epoch": 81.3965370422935, "grad_norm": 0.0016656817169860005, "learning_rate": 1.8636105591825152e-05, "loss": 6.534997373819351e-05, "step": 286760 }, { "epoch": 81.39937553221687, "grad_norm": 0.016831405460834503, "learning_rate": 1.8633267101901787e-05, "loss": 2.0163692533969878e-05, "step": 286770 }, { "epoch": 81.40221402214023, "grad_norm": 0.005408619996160269, "learning_rate": 1.8630428611978428e-05, "loss": 4.155393689870834e-05, "step": 286780 }, { "epoch": 81.40505251206358, "grad_norm": 0.0064782639965415, "learning_rate": 1.8627590122055066e-05, "loss": 4.710555076599121e-05, "step": 286790 }, { "epoch": 81.40789100198694, "grad_norm": 0.002815141575410962, "learning_rate": 1.8624751632131708e-05, "loss": 1.6523338854312896e-05, "step": 286800 }, { "epoch": 81.4107294919103, "grad_norm": 0.0026398745831102133, "learning_rate": 1.8621913142208346e-05, "loss": 2.5380775332450868e-05, "step": 286810 }, { "epoch": 81.41356798183367, "grad_norm": 0.0017618476413190365, "learning_rate": 1.8619074652284984e-05, "loss": 1.9260868430137635e-05, "step": 286820 }, { "epoch": 81.41640647175703, "grad_norm": 0.00583680858835578, "learning_rate": 1.8616236162361625e-05, "loss": 1.3106130063533783e-05, "step": 286830 }, { "epoch": 81.41924496168039, "grad_norm": 0.11434178054332733, "learning_rate": 1.8613397672438263e-05, "loss": 4.799030721187591e-05, "step": 286840 }, { "epoch": 81.42208345160374, "grad_norm": 0.003172295866534114, "learning_rate": 1.8610559182514904e-05, "loss": 4.214923828840256e-05, "step": 286850 }, { "epoch": 81.4249219415271, "grad_norm": 0.05402405932545662, "learning_rate": 1.8607720692591542e-05, "loss": 2.8106756508350374e-05, "step": 286860 }, { "epoch": 81.42776043145047, "grad_norm": 0.003217903431504965, "learning_rate": 1.860488220266818e-05, "loss": 4.6156533062458036e-05, "step": 286870 }, { "epoch": 81.43059892137383, "grad_norm": 0.004249671474099159, "learning_rate": 1.860204371274482e-05, "loss": 2.3297034204006195e-05, "step": 286880 }, { "epoch": 81.43343741129719, "grad_norm": 0.009916613809764385, "learning_rate": 1.859920522282146e-05, "loss": 1.3977289199829102e-05, "step": 286890 }, { "epoch": 81.43627590122055, "grad_norm": 0.0015100068412721157, "learning_rate": 1.8596366732898098e-05, "loss": 6.649699062108994e-05, "step": 286900 }, { "epoch": 81.43911439114392, "grad_norm": 0.0018741677049547434, "learning_rate": 1.859352824297474e-05, "loss": 2.8460100293159484e-05, "step": 286910 }, { "epoch": 81.44195288106727, "grad_norm": 0.0025398824363946915, "learning_rate": 1.8590689753051377e-05, "loss": 2.1466612815856932e-05, "step": 286920 }, { "epoch": 81.44479137099063, "grad_norm": 0.003574756206944585, "learning_rate": 1.8587851263128018e-05, "loss": 2.150833606719971e-05, "step": 286930 }, { "epoch": 81.44762986091399, "grad_norm": 0.015115329995751381, "learning_rate": 1.8585012773204656e-05, "loss": 1.339782029390335e-05, "step": 286940 }, { "epoch": 81.45046835083735, "grad_norm": 0.0025799828581511974, "learning_rate": 1.8582174283281294e-05, "loss": 1.4527887105941773e-05, "step": 286950 }, { "epoch": 81.45330684076072, "grad_norm": 0.023582622408866882, "learning_rate": 1.8579335793357936e-05, "loss": 1.3349950313568115e-05, "step": 286960 }, { "epoch": 81.45614533068408, "grad_norm": 0.04634072259068489, "learning_rate": 1.8576497303434574e-05, "loss": 0.00015228241682052612, "step": 286970 }, { "epoch": 81.45898382060744, "grad_norm": 0.005613063927739859, "learning_rate": 1.857365881351121e-05, "loss": 0.00019580889493227006, "step": 286980 }, { "epoch": 81.46182231053079, "grad_norm": 1.3893786668777466, "learning_rate": 1.8571104172580188e-05, "loss": 0.005128800868988037, "step": 286990 }, { "epoch": 81.46466080045415, "grad_norm": 0.04752803221344948, "learning_rate": 1.8568265682656826e-05, "loss": 3.3188052475452424e-05, "step": 287000 }, { "epoch": 81.46466080045415, "eval_accuracy": 0.9846124499268774, "eval_loss": 0.06582488864660263, "eval_runtime": 35.9571, "eval_samples_per_second": 437.382, "eval_steps_per_second": 6.841, "step": 287000 }, { "epoch": 81.46749929037752, "grad_norm": 0.001858257339335978, "learning_rate": 1.8565427192733468e-05, "loss": 3.804527223110199e-05, "step": 287010 }, { "epoch": 81.47033778030088, "grad_norm": 0.0018692304147407413, "learning_rate": 1.8562588702810106e-05, "loss": 9.1545470058918e-05, "step": 287020 }, { "epoch": 81.47317627022424, "grad_norm": 0.2365543097257614, "learning_rate": 1.8559750212886744e-05, "loss": 0.00012606624513864518, "step": 287030 }, { "epoch": 81.4760147601476, "grad_norm": 0.09766840189695358, "learning_rate": 1.8556911722963385e-05, "loss": 0.0025966653600335123, "step": 287040 }, { "epoch": 81.47885325007097, "grad_norm": 0.02991832047700882, "learning_rate": 1.8554073233040023e-05, "loss": 0.006349093466997147, "step": 287050 }, { "epoch": 81.48169173999432, "grad_norm": 0.08647076785564423, "learning_rate": 1.8551234743116664e-05, "loss": 3.245975822210312e-05, "step": 287060 }, { "epoch": 81.48453022991768, "grad_norm": 0.005324722733348608, "learning_rate": 1.8548396253193302e-05, "loss": 1.4300271868705749e-05, "step": 287070 }, { "epoch": 81.48736871984104, "grad_norm": 0.002600046107545495, "learning_rate": 1.854555776326994e-05, "loss": 0.00011352915316820145, "step": 287080 }, { "epoch": 81.4902072097644, "grad_norm": 0.0042981370352208614, "learning_rate": 1.854271927334658e-05, "loss": 0.00011145025491714478, "step": 287090 }, { "epoch": 81.49304569968777, "grad_norm": 0.005997739732265472, "learning_rate": 1.853988078342322e-05, "loss": 0.0011953480541706085, "step": 287100 }, { "epoch": 81.49588418961113, "grad_norm": 0.0010704559972509742, "learning_rate": 1.853704229349986e-05, "loss": 3.536958247423172e-05, "step": 287110 }, { "epoch": 81.49872267953448, "grad_norm": 1.6179553270339966, "learning_rate": 1.8534203803576496e-05, "loss": 0.003970866650342941, "step": 287120 }, { "epoch": 81.50156116945784, "grad_norm": 0.14434729516506195, "learning_rate": 1.8531365313653137e-05, "loss": 0.00024796798825263975, "step": 287130 }, { "epoch": 81.5043996593812, "grad_norm": 0.00686925183981657, "learning_rate": 1.852852682372978e-05, "loss": 0.009299396723508834, "step": 287140 }, { "epoch": 81.50723814930457, "grad_norm": 0.007579219993203878, "learning_rate": 1.8525688333806416e-05, "loss": 7.80981034040451e-05, "step": 287150 }, { "epoch": 81.51007663922793, "grad_norm": 0.009729122743010521, "learning_rate": 1.8522849843883054e-05, "loss": 0.00010343994945287705, "step": 287160 }, { "epoch": 81.5129151291513, "grad_norm": 0.012812496162950993, "learning_rate": 1.8520011353959692e-05, "loss": 8.52653756737709e-05, "step": 287170 }, { "epoch": 81.51575361907466, "grad_norm": 0.010425584390759468, "learning_rate": 1.8517172864036334e-05, "loss": 0.00012772548943758012, "step": 287180 }, { "epoch": 81.518592108998, "grad_norm": 0.00356274819932878, "learning_rate": 1.8514334374112975e-05, "loss": 7.61277973651886e-05, "step": 287190 }, { "epoch": 81.52143059892137, "grad_norm": 0.0019901765044778585, "learning_rate": 1.8511495884189613e-05, "loss": 4.1253864765167236e-05, "step": 287200 }, { "epoch": 81.52426908884473, "grad_norm": 0.018371354788541794, "learning_rate": 1.850865739426625e-05, "loss": 0.0003565773367881775, "step": 287210 }, { "epoch": 81.5271075787681, "grad_norm": 0.11840970069169998, "learning_rate": 1.850581890434289e-05, "loss": 0.002014154940843582, "step": 287220 }, { "epoch": 81.52994606869146, "grad_norm": 0.5569551587104797, "learning_rate": 1.850298041441953e-05, "loss": 0.001358208619058132, "step": 287230 }, { "epoch": 81.53278455861482, "grad_norm": 0.010044434107840061, "learning_rate": 1.850014192449617e-05, "loss": 0.0002049831673502922, "step": 287240 }, { "epoch": 81.53562304853818, "grad_norm": 0.009901650249958038, "learning_rate": 1.8497303434572806e-05, "loss": 0.0001561865210533142, "step": 287250 }, { "epoch": 81.53846153846153, "grad_norm": 0.11204380542039871, "learning_rate": 1.8494464944649448e-05, "loss": 6.042104214429855e-05, "step": 287260 }, { "epoch": 81.5413000283849, "grad_norm": 0.01137592177838087, "learning_rate": 1.8491626454726086e-05, "loss": 0.0004233483225107193, "step": 287270 }, { "epoch": 81.54413851830826, "grad_norm": 0.0022267920430749655, "learning_rate": 1.8488787964802727e-05, "loss": 0.005824643746018409, "step": 287280 }, { "epoch": 81.54697700823162, "grad_norm": 0.0005277649033814669, "learning_rate": 1.8485949474879365e-05, "loss": 1.4351680874824524e-05, "step": 287290 }, { "epoch": 81.54981549815498, "grad_norm": 0.0018749844748526812, "learning_rate": 1.8483110984956003e-05, "loss": 0.00011705663055181504, "step": 287300 }, { "epoch": 81.55265398807835, "grad_norm": 0.001363388611935079, "learning_rate": 1.8480272495032644e-05, "loss": 0.00032468214631080626, "step": 287310 }, { "epoch": 81.5554924780017, "grad_norm": 0.010333205573260784, "learning_rate": 1.8477434005109282e-05, "loss": 0.002761143445968628, "step": 287320 }, { "epoch": 81.55833096792506, "grad_norm": 0.05894985422492027, "learning_rate": 1.847459551518592e-05, "loss": 0.0001667110249400139, "step": 287330 }, { "epoch": 81.56116945784842, "grad_norm": 0.39668741822242737, "learning_rate": 1.8471757025262562e-05, "loss": 7.575284689664841e-05, "step": 287340 }, { "epoch": 81.56400794777178, "grad_norm": 0.0014453172916546464, "learning_rate": 1.84689185353392e-05, "loss": 2.5400333106517793e-05, "step": 287350 }, { "epoch": 81.56684643769515, "grad_norm": 0.005056543275713921, "learning_rate": 1.846608004541584e-05, "loss": 0.0005392108112573623, "step": 287360 }, { "epoch": 81.56968492761851, "grad_norm": 0.001948133809491992, "learning_rate": 1.846324155549248e-05, "loss": 4.448220133781433e-05, "step": 287370 }, { "epoch": 81.57252341754187, "grad_norm": 0.10527033358812332, "learning_rate": 1.8460403065569117e-05, "loss": 7.445495575666427e-05, "step": 287380 }, { "epoch": 81.57536190746522, "grad_norm": 0.03275391459465027, "learning_rate": 1.845756457564576e-05, "loss": 0.00028014052659273145, "step": 287390 }, { "epoch": 81.57820039738858, "grad_norm": 0.0010427262168377638, "learning_rate": 1.8454726085722396e-05, "loss": 1.5336461365222932e-05, "step": 287400 }, { "epoch": 81.58103888731195, "grad_norm": 0.00796889141201973, "learning_rate": 1.8451887595799038e-05, "loss": 0.0007940312847495079, "step": 287410 }, { "epoch": 81.58387737723531, "grad_norm": 0.003382675349712372, "learning_rate": 1.8449049105875672e-05, "loss": 6.735362112522125e-05, "step": 287420 }, { "epoch": 81.58671586715867, "grad_norm": 0.03493930399417877, "learning_rate": 1.8446210615952314e-05, "loss": 0.00020554214715957643, "step": 287430 }, { "epoch": 81.58955435708204, "grad_norm": 0.015748897567391396, "learning_rate": 1.8443372126028955e-05, "loss": 3.634132444858551e-05, "step": 287440 }, { "epoch": 81.5923928470054, "grad_norm": 0.0012962158070877194, "learning_rate": 1.8440533636105593e-05, "loss": 2.683158963918686e-05, "step": 287450 }, { "epoch": 81.59523133692875, "grad_norm": 0.005951074883341789, "learning_rate": 1.843769514618223e-05, "loss": 7.365904748439789e-05, "step": 287460 }, { "epoch": 81.59806982685211, "grad_norm": 0.0004132955218665302, "learning_rate": 1.843485665625887e-05, "loss": 0.00010030195116996765, "step": 287470 }, { "epoch": 81.60090831677547, "grad_norm": 0.0005668801604770124, "learning_rate": 1.843201816633551e-05, "loss": 0.0009575529024004936, "step": 287480 }, { "epoch": 81.60374680669884, "grad_norm": 0.02160286344587803, "learning_rate": 1.8429179676412152e-05, "loss": 2.162586897611618e-05, "step": 287490 }, { "epoch": 81.6065852966222, "grad_norm": 0.018603196367621422, "learning_rate": 1.8426341186488786e-05, "loss": 0.0001675676554441452, "step": 287500 }, { "epoch": 81.6065852966222, "eval_accuracy": 0.986011318115343, "eval_loss": 0.05955271050333977, "eval_runtime": 35.5944, "eval_samples_per_second": 441.839, "eval_steps_per_second": 6.911, "step": 287500 }, { "epoch": 81.60942378654556, "grad_norm": 1.1563079357147217, "learning_rate": 1.8423502696565428e-05, "loss": 0.00015498157590627671, "step": 287510 }, { "epoch": 81.61226227646893, "grad_norm": 0.0057112374342978, "learning_rate": 1.8420664206642066e-05, "loss": 0.0001338137313723564, "step": 287520 }, { "epoch": 81.61510076639227, "grad_norm": 0.002784068463370204, "learning_rate": 1.8417825716718707e-05, "loss": 5.861911922693253e-05, "step": 287530 }, { "epoch": 81.61793925631564, "grad_norm": 0.0063302163034677505, "learning_rate": 1.8414987226795345e-05, "loss": 6.585866212844848e-05, "step": 287540 }, { "epoch": 81.620777746239, "grad_norm": 0.0011758230393752456, "learning_rate": 1.8412148736871983e-05, "loss": 0.0004254870116710663, "step": 287550 }, { "epoch": 81.62361623616236, "grad_norm": 0.009540705941617489, "learning_rate": 1.8409310246948625e-05, "loss": 3.892090171575546e-05, "step": 287560 }, { "epoch": 81.62645472608573, "grad_norm": 0.0056779831647872925, "learning_rate": 1.8406471757025266e-05, "loss": 0.0002224709838628769, "step": 287570 }, { "epoch": 81.62929321600909, "grad_norm": 0.8700964450836182, "learning_rate": 1.8403633267101904e-05, "loss": 0.0001528942957520485, "step": 287580 }, { "epoch": 81.63213170593244, "grad_norm": 0.005305394995957613, "learning_rate": 1.8400794777178542e-05, "loss": 0.001127566397190094, "step": 287590 }, { "epoch": 81.6349701958558, "grad_norm": 0.0024739468935877085, "learning_rate": 1.839795628725518e-05, "loss": 3.158282488584519e-05, "step": 287600 }, { "epoch": 81.63780868577916, "grad_norm": 0.008562954142689705, "learning_rate": 1.839511779733182e-05, "loss": 0.0008086742833256722, "step": 287610 }, { "epoch": 81.64064717570253, "grad_norm": 0.0006605299422517419, "learning_rate": 1.839227930740846e-05, "loss": 7.611270993947982e-05, "step": 287620 }, { "epoch": 81.64348566562589, "grad_norm": 0.02014424465596676, "learning_rate": 1.8389440817485097e-05, "loss": 6.794203072786331e-05, "step": 287630 }, { "epoch": 81.64632415554925, "grad_norm": 0.001076839049346745, "learning_rate": 1.838660232756174e-05, "loss": 0.00017944108694791793, "step": 287640 }, { "epoch": 81.64916264547261, "grad_norm": 0.04311511293053627, "learning_rate": 1.8383763837638377e-05, "loss": 0.0001534579321742058, "step": 287650 }, { "epoch": 81.65200113539596, "grad_norm": 0.001306328922510147, "learning_rate": 1.8380925347715018e-05, "loss": 0.0002064347267150879, "step": 287660 }, { "epoch": 81.65483962531933, "grad_norm": 0.007252998650074005, "learning_rate": 1.8378086857791656e-05, "loss": 7.576849311590195e-05, "step": 287670 }, { "epoch": 81.65767811524269, "grad_norm": 0.018114488571882248, "learning_rate": 1.8375248367868294e-05, "loss": 4.552733153104782e-05, "step": 287680 }, { "epoch": 81.66051660516605, "grad_norm": 0.022696293890476227, "learning_rate": 1.8372409877944935e-05, "loss": 0.0015632286667823792, "step": 287690 }, { "epoch": 81.66335509508941, "grad_norm": 0.0038944343104958534, "learning_rate": 1.8369571388021573e-05, "loss": 1.9277259707450866e-05, "step": 287700 }, { "epoch": 81.66619358501278, "grad_norm": 0.006708736065775156, "learning_rate": 1.836673289809821e-05, "loss": 2.491716295480728e-05, "step": 287710 }, { "epoch": 81.66903207493614, "grad_norm": 0.09708798676729202, "learning_rate": 1.8363894408174853e-05, "loss": 6.015393882989884e-05, "step": 287720 }, { "epoch": 81.67187056485949, "grad_norm": 0.014907920733094215, "learning_rate": 1.836105591825149e-05, "loss": 9.417496621608735e-05, "step": 287730 }, { "epoch": 81.67470905478285, "grad_norm": 0.009281367063522339, "learning_rate": 1.8358217428328132e-05, "loss": 0.0001418398693203926, "step": 287740 }, { "epoch": 81.67754754470621, "grad_norm": 0.07575779408216476, "learning_rate": 1.835537893840477e-05, "loss": 4.899948835372925e-05, "step": 287750 }, { "epoch": 81.68038603462958, "grad_norm": 0.11177762597799301, "learning_rate": 1.8352540448481408e-05, "loss": 8.99510458111763e-05, "step": 287760 }, { "epoch": 81.68322452455294, "grad_norm": 0.005851022433489561, "learning_rate": 1.834970195855805e-05, "loss": 1.5933439135551454e-05, "step": 287770 }, { "epoch": 81.6860630144763, "grad_norm": 0.00873697642236948, "learning_rate": 1.8346863468634687e-05, "loss": 7.111802697181701e-05, "step": 287780 }, { "epoch": 81.68890150439967, "grad_norm": 0.017542801797389984, "learning_rate": 1.834402497871133e-05, "loss": 5.933977663516998e-05, "step": 287790 }, { "epoch": 81.69173999432302, "grad_norm": 0.003286984981968999, "learning_rate": 1.8341186488787963e-05, "loss": 3.6255083978176116e-05, "step": 287800 }, { "epoch": 81.69457848424638, "grad_norm": 0.0010818958980962634, "learning_rate": 1.8338347998864605e-05, "loss": 1.2067705392837525e-05, "step": 287810 }, { "epoch": 81.69741697416974, "grad_norm": 0.0008925640722736716, "learning_rate": 1.8335509508941246e-05, "loss": 0.0004039183259010315, "step": 287820 }, { "epoch": 81.7002554640931, "grad_norm": 0.0034397996496409178, "learning_rate": 1.8332671019017884e-05, "loss": 2.4112127721309663e-05, "step": 287830 }, { "epoch": 81.70309395401647, "grad_norm": 0.010747067630290985, "learning_rate": 1.8329832529094522e-05, "loss": 0.00010229460895061493, "step": 287840 }, { "epoch": 81.70593244393983, "grad_norm": 0.0012168815592303872, "learning_rate": 1.832699403917116e-05, "loss": 3.5845115780830385e-05, "step": 287850 }, { "epoch": 81.70877093386318, "grad_norm": 0.5401164293289185, "learning_rate": 1.83241555492478e-05, "loss": 0.000183197483420372, "step": 287860 }, { "epoch": 81.71160942378654, "grad_norm": 0.005303825251758099, "learning_rate": 1.8321317059324443e-05, "loss": 1.94525346159935e-05, "step": 287870 }, { "epoch": 81.7144479137099, "grad_norm": 0.03429344296455383, "learning_rate": 1.8318478569401077e-05, "loss": 2.632569521665573e-05, "step": 287880 }, { "epoch": 81.71728640363327, "grad_norm": 0.004764224402606487, "learning_rate": 1.831564007947772e-05, "loss": 4.5779161155223846e-05, "step": 287890 }, { "epoch": 81.72012489355663, "grad_norm": 0.001805078238248825, "learning_rate": 1.8312801589554357e-05, "loss": 3.753956407308579e-05, "step": 287900 }, { "epoch": 81.72296338347999, "grad_norm": 0.009638993069529533, "learning_rate": 1.8309963099630998e-05, "loss": 2.0931102335453032e-05, "step": 287910 }, { "epoch": 81.72580187340336, "grad_norm": 0.0008121840073727071, "learning_rate": 1.8307124609707636e-05, "loss": 2.0688958466053008e-05, "step": 287920 }, { "epoch": 81.7286403633267, "grad_norm": 0.0033584285993129015, "learning_rate": 1.8304286119784274e-05, "loss": 1.051872968673706e-05, "step": 287930 }, { "epoch": 81.73147885325007, "grad_norm": 0.0074615636840462685, "learning_rate": 1.8301447629860915e-05, "loss": 2.212896943092346e-05, "step": 287940 }, { "epoch": 81.73431734317343, "grad_norm": 0.0049674478359520435, "learning_rate": 1.8298609139937553e-05, "loss": 2.1524354815483092e-05, "step": 287950 }, { "epoch": 81.73715583309679, "grad_norm": 0.0018541377503424883, "learning_rate": 1.8295770650014195e-05, "loss": 2.8524361550807952e-05, "step": 287960 }, { "epoch": 81.73999432302016, "grad_norm": 0.0013914147857576609, "learning_rate": 1.8292932160090833e-05, "loss": 5.974136292934418e-05, "step": 287970 }, { "epoch": 81.74283281294352, "grad_norm": 0.0008819756912998855, "learning_rate": 1.829009367016747e-05, "loss": 3.116633743047714e-05, "step": 287980 }, { "epoch": 81.74567130286688, "grad_norm": 0.001074009109288454, "learning_rate": 1.8287255180244112e-05, "loss": 5.856472998857498e-05, "step": 287990 }, { "epoch": 81.74850979279023, "grad_norm": 0.0014604163588955998, "learning_rate": 1.828441669032075e-05, "loss": 3.057904541492462e-05, "step": 288000 }, { "epoch": 81.74850979279023, "eval_accuracy": 0.9865199974566033, "eval_loss": 0.057627953588962555, "eval_runtime": 37.4388, "eval_samples_per_second": 420.072, "eval_steps_per_second": 6.571, "step": 288000 }, { "epoch": 81.7513482827136, "grad_norm": 0.013495735824108124, "learning_rate": 1.8281578200397388e-05, "loss": 3.9838626980781557e-05, "step": 288010 }, { "epoch": 81.75418677263696, "grad_norm": 0.0020666676573455334, "learning_rate": 1.827873971047403e-05, "loss": 4.837382584810257e-05, "step": 288020 }, { "epoch": 81.75702526256032, "grad_norm": 0.0010813961271196604, "learning_rate": 1.8275901220550668e-05, "loss": 1.176297664642334e-05, "step": 288030 }, { "epoch": 81.75986375248368, "grad_norm": 0.009540116414427757, "learning_rate": 1.827306273062731e-05, "loss": 1.9659847021102904e-05, "step": 288040 }, { "epoch": 81.76270224240704, "grad_norm": 0.0011737653985619545, "learning_rate": 1.8270224240703947e-05, "loss": 1.9041262567043306e-05, "step": 288050 }, { "epoch": 81.7655407323304, "grad_norm": 0.005739912390708923, "learning_rate": 1.8267385750780585e-05, "loss": 2.5464966893196105e-05, "step": 288060 }, { "epoch": 81.76837922225376, "grad_norm": 0.0012743892148137093, "learning_rate": 1.8264547260857226e-05, "loss": 4.3147243559360504e-05, "step": 288070 }, { "epoch": 81.77121771217712, "grad_norm": 0.0007161519606597722, "learning_rate": 1.8261708770933864e-05, "loss": 1.791995018720627e-05, "step": 288080 }, { "epoch": 81.77405620210048, "grad_norm": 0.0007935583707876503, "learning_rate": 1.8258870281010502e-05, "loss": 2.705603837966919e-05, "step": 288090 }, { "epoch": 81.77689469202384, "grad_norm": 0.012571658939123154, "learning_rate": 1.825603179108714e-05, "loss": 2.1890364587306975e-05, "step": 288100 }, { "epoch": 81.77973318194721, "grad_norm": 0.1576426476240158, "learning_rate": 1.825319330116378e-05, "loss": 3.8537010550498965e-05, "step": 288110 }, { "epoch": 81.78257167187057, "grad_norm": 0.008565215393900871, "learning_rate": 1.8250354811240423e-05, "loss": 1.6099587082862853e-05, "step": 288120 }, { "epoch": 81.78541016179392, "grad_norm": 0.004488150589168072, "learning_rate": 1.824751632131706e-05, "loss": 1.270156353712082e-05, "step": 288130 }, { "epoch": 81.78824865171728, "grad_norm": 0.00822166446596384, "learning_rate": 1.82446778313937e-05, "loss": 1.8317811191082e-05, "step": 288140 }, { "epoch": 81.79108714164065, "grad_norm": 0.00727640837430954, "learning_rate": 1.8241839341470337e-05, "loss": 0.00037304963916540146, "step": 288150 }, { "epoch": 81.79392563156401, "grad_norm": 0.006839977111667395, "learning_rate": 1.823900085154698e-05, "loss": 0.0001576123759150505, "step": 288160 }, { "epoch": 81.79676412148737, "grad_norm": 0.8587371110916138, "learning_rate": 1.823616236162362e-05, "loss": 0.0001618584617972374, "step": 288170 }, { "epoch": 81.79960261141073, "grad_norm": 0.035652436316013336, "learning_rate": 1.8233323871700254e-05, "loss": 1.5772134065628052e-05, "step": 288180 }, { "epoch": 81.8024411013341, "grad_norm": 0.005787517875432968, "learning_rate": 1.8230485381776896e-05, "loss": 6.326921284198761e-05, "step": 288190 }, { "epoch": 81.80527959125745, "grad_norm": 0.011698960326611996, "learning_rate": 1.8227646891853534e-05, "loss": 2.4313293397426606e-05, "step": 288200 }, { "epoch": 81.80811808118081, "grad_norm": 0.0011943562421947718, "learning_rate": 1.8224808401930175e-05, "loss": 0.00020780134946107864, "step": 288210 }, { "epoch": 81.81095657110417, "grad_norm": 0.062045108526945114, "learning_rate": 1.8221969912006813e-05, "loss": 3.960337489843369e-05, "step": 288220 }, { "epoch": 81.81379506102753, "grad_norm": 0.03761361911892891, "learning_rate": 1.821913142208345e-05, "loss": 0.0002493062987923622, "step": 288230 }, { "epoch": 81.8166335509509, "grad_norm": 0.0052765002474188805, "learning_rate": 1.8216292932160092e-05, "loss": 1.5139020979404449e-05, "step": 288240 }, { "epoch": 81.81947204087426, "grad_norm": 0.021946316584944725, "learning_rate": 1.821345444223673e-05, "loss": 0.0001306626945734024, "step": 288250 }, { "epoch": 81.82231053079762, "grad_norm": 0.0037731430493295193, "learning_rate": 1.8210615952313372e-05, "loss": 8.285399526357651e-05, "step": 288260 }, { "epoch": 81.82514902072097, "grad_norm": 0.0081380819901824, "learning_rate": 1.820777746239001e-05, "loss": 2.4846941232681275e-05, "step": 288270 }, { "epoch": 81.82798751064433, "grad_norm": 0.00413241982460022, "learning_rate": 1.8204938972466648e-05, "loss": 2.230741083621979e-05, "step": 288280 }, { "epoch": 81.8308260005677, "grad_norm": 0.06876467168331146, "learning_rate": 1.820210048254329e-05, "loss": 0.0008757343515753746, "step": 288290 }, { "epoch": 81.83366449049106, "grad_norm": 0.47814691066741943, "learning_rate": 1.8199261992619927e-05, "loss": 7.054172456264496e-05, "step": 288300 }, { "epoch": 81.83650298041442, "grad_norm": 0.0040609510615468025, "learning_rate": 1.8196423502696565e-05, "loss": 2.7994997799396516e-05, "step": 288310 }, { "epoch": 81.83934147033779, "grad_norm": 0.0013029547408223152, "learning_rate": 1.8193585012773206e-05, "loss": 0.00029827523976564405, "step": 288320 }, { "epoch": 81.84217996026113, "grad_norm": 3.972280979156494, "learning_rate": 1.8190746522849844e-05, "loss": 0.00141570121049881, "step": 288330 }, { "epoch": 81.8450184501845, "grad_norm": 0.01201534178107977, "learning_rate": 1.8187908032926486e-05, "loss": 0.00010883118957281113, "step": 288340 }, { "epoch": 81.84785694010786, "grad_norm": 0.007176599465310574, "learning_rate": 1.818506954300312e-05, "loss": 0.006064050644636154, "step": 288350 }, { "epoch": 81.85069543003122, "grad_norm": 0.03608078882098198, "learning_rate": 1.8182231053079762e-05, "loss": 7.792934775352478e-05, "step": 288360 }, { "epoch": 81.85353391995459, "grad_norm": 0.04941477254033089, "learning_rate": 1.8179392563156403e-05, "loss": 0.00390201210975647, "step": 288370 }, { "epoch": 81.85637240987795, "grad_norm": 0.006309337913990021, "learning_rate": 1.817655407323304e-05, "loss": 4.9565546214580535e-05, "step": 288380 }, { "epoch": 81.85921089980131, "grad_norm": 0.006760995835065842, "learning_rate": 1.817371558330968e-05, "loss": 6.061121821403504e-05, "step": 288390 }, { "epoch": 81.86204938972466, "grad_norm": 0.00631245831027627, "learning_rate": 1.8170877093386317e-05, "loss": 2.9809586703777313e-05, "step": 288400 }, { "epoch": 81.86488787964802, "grad_norm": 0.014634696766734123, "learning_rate": 1.816803860346296e-05, "loss": 2.0518898963928223e-05, "step": 288410 }, { "epoch": 81.86772636957139, "grad_norm": 0.004562285263091326, "learning_rate": 1.81652001135396e-05, "loss": 7.871147245168686e-05, "step": 288420 }, { "epoch": 81.87056485949475, "grad_norm": 0.010415961965918541, "learning_rate": 1.8162361623616238e-05, "loss": 3.419481217861176e-05, "step": 288430 }, { "epoch": 81.87340334941811, "grad_norm": 0.004622498992830515, "learning_rate": 1.8159523133692876e-05, "loss": 7.226727902889251e-05, "step": 288440 }, { "epoch": 81.87624183934147, "grad_norm": 0.0020924012642353773, "learning_rate": 1.8156684643769514e-05, "loss": 1.3203173875808715e-05, "step": 288450 }, { "epoch": 81.87908032926484, "grad_norm": 0.014119481667876244, "learning_rate": 1.8153846153846155e-05, "loss": 2.109445631504059e-05, "step": 288460 }, { "epoch": 81.88191881918819, "grad_norm": 0.04581049829721451, "learning_rate": 1.8151007663922797e-05, "loss": 4.0489807724952696e-05, "step": 288470 }, { "epoch": 81.88475730911155, "grad_norm": 0.0032105508726090193, "learning_rate": 1.814816917399943e-05, "loss": 1.0290369391441346e-05, "step": 288480 }, { "epoch": 81.88759579903491, "grad_norm": 0.017619725316762924, "learning_rate": 1.8145330684076073e-05, "loss": 1.849234104156494e-05, "step": 288490 }, { "epoch": 81.89043428895828, "grad_norm": 0.009971735067665577, "learning_rate": 1.814249219415271e-05, "loss": 8.213762193918229e-05, "step": 288500 }, { "epoch": 81.89043428895828, "eval_accuracy": 0.9871558466331786, "eval_loss": 0.05843665823340416, "eval_runtime": 36.2355, "eval_samples_per_second": 434.022, "eval_steps_per_second": 6.789, "step": 288500 }, { "epoch": 81.89327277888164, "grad_norm": 0.0011541516287252307, "learning_rate": 1.8139653704229352e-05, "loss": 1.974720507860184e-05, "step": 288510 }, { "epoch": 81.896111268805, "grad_norm": 0.025016602128744125, "learning_rate": 1.813681521430599e-05, "loss": 9.757541120052338e-05, "step": 288520 }, { "epoch": 81.89894975872835, "grad_norm": 0.0019300916464999318, "learning_rate": 1.8133976724382628e-05, "loss": 0.0001054517924785614, "step": 288530 }, { "epoch": 81.90178824865171, "grad_norm": 0.017368417233228683, "learning_rate": 1.813113823445927e-05, "loss": 2.826489508152008e-05, "step": 288540 }, { "epoch": 81.90462673857508, "grad_norm": 0.004438039381057024, "learning_rate": 1.8128299744535907e-05, "loss": 2.2441707551479338e-05, "step": 288550 }, { "epoch": 81.90746522849844, "grad_norm": 0.016720931977033615, "learning_rate": 1.8125461254612545e-05, "loss": 0.00010235682129859925, "step": 288560 }, { "epoch": 81.9103037184218, "grad_norm": 0.0004495041794143617, "learning_rate": 1.8122622764689187e-05, "loss": 2.1042115986347198e-05, "step": 288570 }, { "epoch": 81.91314220834516, "grad_norm": 0.010576210916042328, "learning_rate": 1.8119784274765825e-05, "loss": 2.0502880215644836e-05, "step": 288580 }, { "epoch": 81.91598069826853, "grad_norm": 0.04519074410200119, "learning_rate": 1.8116945784842466e-05, "loss": 6.679948419332504e-05, "step": 288590 }, { "epoch": 81.91881918819188, "grad_norm": 0.0017313237767666578, "learning_rate": 1.8114107294919104e-05, "loss": 1.2720003724098206e-05, "step": 288600 }, { "epoch": 81.92165767811524, "grad_norm": 0.011813746765255928, "learning_rate": 1.8111268804995742e-05, "loss": 2.072323113679886e-05, "step": 288610 }, { "epoch": 81.9244961680386, "grad_norm": 0.002445642603561282, "learning_rate": 1.8108430315072383e-05, "loss": 1.0781921446323395e-05, "step": 288620 }, { "epoch": 81.92733465796196, "grad_norm": 0.009002143517136574, "learning_rate": 1.810559182514902e-05, "loss": 0.0002573922276496887, "step": 288630 }, { "epoch": 81.93017314788533, "grad_norm": 0.003681160043925047, "learning_rate": 1.8102753335225663e-05, "loss": 0.00015327557921409606, "step": 288640 }, { "epoch": 81.93301163780869, "grad_norm": 0.027641136199235916, "learning_rate": 1.8099914845302297e-05, "loss": 5.871430039405823e-05, "step": 288650 }, { "epoch": 81.93585012773205, "grad_norm": 0.015061940997838974, "learning_rate": 1.809707635537894e-05, "loss": 1.6133859753608702e-05, "step": 288660 }, { "epoch": 81.9386886176554, "grad_norm": 0.004233133047819138, "learning_rate": 1.809423786545558e-05, "loss": 3.300737589597702e-05, "step": 288670 }, { "epoch": 81.94152710757876, "grad_norm": 0.0013659859541803598, "learning_rate": 1.8091399375532218e-05, "loss": 3.451965749263763e-05, "step": 288680 }, { "epoch": 81.94436559750213, "grad_norm": 0.019563671201467514, "learning_rate": 1.8088560885608856e-05, "loss": 5.504488945007324e-05, "step": 288690 }, { "epoch": 81.94720408742549, "grad_norm": 0.006594452075660229, "learning_rate": 1.8085722395685494e-05, "loss": 2.4089962244033815e-05, "step": 288700 }, { "epoch": 81.95004257734885, "grad_norm": 0.0028469727840274572, "learning_rate": 1.8082883905762135e-05, "loss": 5.475860089063644e-05, "step": 288710 }, { "epoch": 81.95288106727222, "grad_norm": 0.00238772202283144, "learning_rate": 1.8080045415838777e-05, "loss": 4.9103982746601105e-05, "step": 288720 }, { "epoch": 81.95571955719558, "grad_norm": 0.0023108297027647495, "learning_rate": 1.8077206925915415e-05, "loss": 0.0005261659622192383, "step": 288730 }, { "epoch": 81.95855804711893, "grad_norm": 0.06783691793680191, "learning_rate": 1.8074368435992053e-05, "loss": 3.480017185211182e-05, "step": 288740 }, { "epoch": 81.96139653704229, "grad_norm": 0.0015508861979469657, "learning_rate": 1.807152994606869e-05, "loss": 0.00010443255305290223, "step": 288750 }, { "epoch": 81.96423502696565, "grad_norm": 0.007658268325030804, "learning_rate": 1.8068691456145332e-05, "loss": 1.4895014464855194e-05, "step": 288760 }, { "epoch": 81.96707351688902, "grad_norm": 0.0010797700379043818, "learning_rate": 1.806585296622197e-05, "loss": 1.7121993005275725e-05, "step": 288770 }, { "epoch": 81.96991200681238, "grad_norm": 0.03023510053753853, "learning_rate": 1.8063014476298608e-05, "loss": 3.440883010625839e-05, "step": 288780 }, { "epoch": 81.97275049673574, "grad_norm": 0.004064709879457951, "learning_rate": 1.806017598637525e-05, "loss": 3.017168492078781e-05, "step": 288790 }, { "epoch": 81.97558898665909, "grad_norm": 0.032426681369543076, "learning_rate": 1.8057337496451887e-05, "loss": 7.061492651700973e-05, "step": 288800 }, { "epoch": 81.97842747658245, "grad_norm": 0.016656793653964996, "learning_rate": 1.805449900652853e-05, "loss": 2.6148371398448943e-05, "step": 288810 }, { "epoch": 81.98126596650582, "grad_norm": 0.0009588073007762432, "learning_rate": 1.8051660516605167e-05, "loss": 1.7655454576015474e-05, "step": 288820 }, { "epoch": 81.98410445642918, "grad_norm": 0.0013639758108183742, "learning_rate": 1.8048822026681805e-05, "loss": 1.950729638338089e-05, "step": 288830 }, { "epoch": 81.98694294635254, "grad_norm": 0.0030571320094168186, "learning_rate": 1.8045983536758446e-05, "loss": 2.6636384427547453e-05, "step": 288840 }, { "epoch": 81.9897814362759, "grad_norm": 0.002990373410284519, "learning_rate": 1.8043145046835084e-05, "loss": 1.4185719192028045e-05, "step": 288850 }, { "epoch": 81.99261992619927, "grad_norm": 0.024935679510235786, "learning_rate": 1.8040306556911722e-05, "loss": 1.4130212366580964e-05, "step": 288860 }, { "epoch": 81.99545841612262, "grad_norm": 0.0015530480304732919, "learning_rate": 1.8037468066988363e-05, "loss": 1.6536563634872437e-05, "step": 288870 }, { "epoch": 81.99829690604598, "grad_norm": 0.0018689959542825818, "learning_rate": 1.8034629577065e-05, "loss": 3.1366944313049316e-05, "step": 288880 }, { "epoch": 82.00113539596934, "grad_norm": 0.002636493882164359, "learning_rate": 1.8031791087141643e-05, "loss": 3.7621569936163725e-05, "step": 288890 }, { "epoch": 82.0039738858927, "grad_norm": 0.0024733429308980703, "learning_rate": 1.802895259721828e-05, "loss": 9.650364518165588e-06, "step": 288900 }, { "epoch": 82.00681237581607, "grad_norm": 0.0037517512682825327, "learning_rate": 1.802611410729492e-05, "loss": 1.9642896950244905e-05, "step": 288910 }, { "epoch": 82.00965086573943, "grad_norm": 0.0007212692289613187, "learning_rate": 1.802327561737156e-05, "loss": 1.4477036893367768e-05, "step": 288920 }, { "epoch": 82.0124893556628, "grad_norm": 0.0022064174991101027, "learning_rate": 1.8020437127448198e-05, "loss": 1.7777644097805022e-05, "step": 288930 }, { "epoch": 82.01532784558614, "grad_norm": 0.0012261916417628527, "learning_rate": 1.801759863752484e-05, "loss": 1.5463307499885558e-05, "step": 288940 }, { "epoch": 82.0181663355095, "grad_norm": 0.006138665601611137, "learning_rate": 1.8014760147601478e-05, "loss": 1.4878809452056885e-05, "step": 288950 }, { "epoch": 82.02100482543287, "grad_norm": 0.0480022206902504, "learning_rate": 1.8011921657678115e-05, "loss": 2.0505674183368682e-05, "step": 288960 }, { "epoch": 82.02384331535623, "grad_norm": 0.0019191722385585308, "learning_rate": 1.8009083167754757e-05, "loss": 1.836400479078293e-05, "step": 288970 }, { "epoch": 82.0266818052796, "grad_norm": 0.05224478617310524, "learning_rate": 1.8006244677831395e-05, "loss": 1.9840523600578307e-05, "step": 288980 }, { "epoch": 82.02952029520296, "grad_norm": 0.0021594634745270014, "learning_rate": 1.8003406187908033e-05, "loss": 1.1886842548847198e-05, "step": 288990 }, { "epoch": 82.03235878512632, "grad_norm": 0.0022227433510124683, "learning_rate": 1.8000567697984674e-05, "loss": 1.4497153460979462e-05, "step": 289000 }, { "epoch": 82.03235878512632, "eval_accuracy": 0.9881096203980416, "eval_loss": 0.053075965493917465, "eval_runtime": 34.8835, "eval_samples_per_second": 450.843, "eval_steps_per_second": 7.052, "step": 289000 }, { "epoch": 82.03519727504967, "grad_norm": 0.007325160317122936, "learning_rate": 1.7997729208061312e-05, "loss": 1.4784373342990876e-05, "step": 289010 }, { "epoch": 82.03803576497303, "grad_norm": 0.0013882351340726018, "learning_rate": 1.7994890718137954e-05, "loss": 5.534477531909943e-06, "step": 289020 }, { "epoch": 82.0408742548964, "grad_norm": 0.008536609821021557, "learning_rate": 1.7992052228214588e-05, "loss": 1.3169832527637482e-05, "step": 289030 }, { "epoch": 82.04371274481976, "grad_norm": 0.002479142975062132, "learning_rate": 1.798921373829123e-05, "loss": 1.163054257631302e-05, "step": 289040 }, { "epoch": 82.04655123474312, "grad_norm": 0.001008432824164629, "learning_rate": 1.798637524836787e-05, "loss": 1.8216855823993684e-05, "step": 289050 }, { "epoch": 82.04938972466648, "grad_norm": 0.0009284315747208893, "learning_rate": 1.798353675844451e-05, "loss": 7.691234350204467e-06, "step": 289060 }, { "epoch": 82.05222821458983, "grad_norm": 0.0004167939187027514, "learning_rate": 1.7980698268521147e-05, "loss": 2.7753040194511412e-05, "step": 289070 }, { "epoch": 82.0550667045132, "grad_norm": 0.0010819623712450266, "learning_rate": 1.7977859778597785e-05, "loss": 7.884949445724487e-06, "step": 289080 }, { "epoch": 82.05790519443656, "grad_norm": 0.00384760950691998, "learning_rate": 1.7975021288674426e-05, "loss": 1.2675113976001739e-05, "step": 289090 }, { "epoch": 82.06074368435992, "grad_norm": 0.0030442269053310156, "learning_rate": 1.7972182798751068e-05, "loss": 8.770264685153961e-06, "step": 289100 }, { "epoch": 82.06358217428328, "grad_norm": 0.0020944103598594666, "learning_rate": 1.7969344308827706e-05, "loss": 1.026540994644165e-05, "step": 289110 }, { "epoch": 82.06642066420665, "grad_norm": 0.007885090075433254, "learning_rate": 1.7966505818904344e-05, "loss": 1.6254745423793794e-05, "step": 289120 }, { "epoch": 82.06925915413001, "grad_norm": 0.0013259630650281906, "learning_rate": 1.796366732898098e-05, "loss": 3.653205931186676e-06, "step": 289130 }, { "epoch": 82.07209764405336, "grad_norm": 0.00082092807861045, "learning_rate": 1.7960828839057623e-05, "loss": 1.537855714559555e-05, "step": 289140 }, { "epoch": 82.07493613397672, "grad_norm": 0.00477113900706172, "learning_rate": 1.7957990349134264e-05, "loss": 7.651932537555695e-06, "step": 289150 }, { "epoch": 82.07777462390008, "grad_norm": 0.0056443470530211926, "learning_rate": 1.79551518592109e-05, "loss": 1.1050142347812653e-05, "step": 289160 }, { "epoch": 82.08061311382345, "grad_norm": 0.004331658594310284, "learning_rate": 1.795231336928754e-05, "loss": 1.5102140605449676e-05, "step": 289170 }, { "epoch": 82.08345160374681, "grad_norm": 0.03687672317028046, "learning_rate": 1.7949474879364178e-05, "loss": 3.414079546928406e-05, "step": 289180 }, { "epoch": 82.08629009367017, "grad_norm": 0.0033888998441398144, "learning_rate": 1.794663638944082e-05, "loss": 4.3910741806030276e-05, "step": 289190 }, { "epoch": 82.08912858359353, "grad_norm": 0.0015524053014814854, "learning_rate": 1.7943797899517458e-05, "loss": 1.2330524623394012e-05, "step": 289200 }, { "epoch": 82.09196707351688, "grad_norm": 0.0028316311072558165, "learning_rate": 1.7940959409594096e-05, "loss": 1.0464712977409362e-05, "step": 289210 }, { "epoch": 82.09480556344025, "grad_norm": 0.04339379444718361, "learning_rate": 1.7938120919670737e-05, "loss": 3.737844526767731e-05, "step": 289220 }, { "epoch": 82.09764405336361, "grad_norm": 0.001971668330952525, "learning_rate": 1.7935282429747375e-05, "loss": 2.6742741465568542e-05, "step": 289230 }, { "epoch": 82.10048254328697, "grad_norm": 0.003071041079238057, "learning_rate": 1.7932443939824013e-05, "loss": 1.949351280927658e-05, "step": 289240 }, { "epoch": 82.10332103321034, "grad_norm": 0.004384030122309923, "learning_rate": 1.7929605449900654e-05, "loss": 1.1884421110153198e-05, "step": 289250 }, { "epoch": 82.1061595231337, "grad_norm": 0.0007688089972361922, "learning_rate": 1.7926766959977292e-05, "loss": 1.1008791625499726e-05, "step": 289260 }, { "epoch": 82.10899801305705, "grad_norm": 0.056394096463918686, "learning_rate": 1.7923928470053934e-05, "loss": 1.8055364489555358e-05, "step": 289270 }, { "epoch": 82.11183650298041, "grad_norm": 0.0015386886661872268, "learning_rate": 1.7921089980130572e-05, "loss": 8.304789662361146e-06, "step": 289280 }, { "epoch": 82.11467499290377, "grad_norm": 0.023650990799069405, "learning_rate": 1.791825149020721e-05, "loss": 1.635141670703888e-05, "step": 289290 }, { "epoch": 82.11751348282714, "grad_norm": 0.0007540472433902323, "learning_rate": 1.791541300028385e-05, "loss": 1.0613352060317993e-05, "step": 289300 }, { "epoch": 82.1203519727505, "grad_norm": 0.01137526985257864, "learning_rate": 1.791257451036049e-05, "loss": 1.0648556053638458e-05, "step": 289310 }, { "epoch": 82.12319046267386, "grad_norm": 0.0022471221163868904, "learning_rate": 1.790973602043713e-05, "loss": 1.669079065322876e-05, "step": 289320 }, { "epoch": 82.12602895259722, "grad_norm": 0.0016478687757626176, "learning_rate": 1.7906897530513765e-05, "loss": 8.684955537319183e-06, "step": 289330 }, { "epoch": 82.12886744252057, "grad_norm": 0.0008385838009417057, "learning_rate": 1.7904059040590406e-05, "loss": 8.715689182281494e-06, "step": 289340 }, { "epoch": 82.13170593244394, "grad_norm": 0.001849012915045023, "learning_rate": 1.7901220550667048e-05, "loss": 7.210299372673035e-06, "step": 289350 }, { "epoch": 82.1345444223673, "grad_norm": 0.007051368243992329, "learning_rate": 1.7898382060743686e-05, "loss": 1.3162940740585327e-05, "step": 289360 }, { "epoch": 82.13738291229066, "grad_norm": 0.001796492375433445, "learning_rate": 1.7895543570820324e-05, "loss": 4.1153281927108765e-05, "step": 289370 }, { "epoch": 82.14022140221402, "grad_norm": 0.01688830554485321, "learning_rate": 1.7892705080896962e-05, "loss": 1.5833787620067598e-05, "step": 289380 }, { "epoch": 82.14305989213739, "grad_norm": 0.0059095327742397785, "learning_rate": 1.7889866590973603e-05, "loss": 1.1341273784637452e-05, "step": 289390 }, { "epoch": 82.14589838206075, "grad_norm": 0.008053891360759735, "learning_rate": 1.7887028101050244e-05, "loss": 3.184638917446137e-05, "step": 289400 }, { "epoch": 82.1487368719841, "grad_norm": 0.0010977124329656363, "learning_rate": 1.7884189611126882e-05, "loss": 1.2965872883796692e-05, "step": 289410 }, { "epoch": 82.15157536190746, "grad_norm": 0.00020475649216677994, "learning_rate": 1.788135112120352e-05, "loss": 9.986385703086852e-06, "step": 289420 }, { "epoch": 82.15441385183082, "grad_norm": 0.007951888255774975, "learning_rate": 1.787851263128016e-05, "loss": 1.4452263712882996e-05, "step": 289430 }, { "epoch": 82.15725234175419, "grad_norm": 0.002699899487197399, "learning_rate": 1.78756741413568e-05, "loss": 1.2028031051158905e-05, "step": 289440 }, { "epoch": 82.16009083167755, "grad_norm": 0.42846810817718506, "learning_rate": 1.7872835651433438e-05, "loss": 8.508730679750443e-05, "step": 289450 }, { "epoch": 82.16292932160091, "grad_norm": 0.0008689822861924767, "learning_rate": 1.7869997161510076e-05, "loss": 1.2245774269104003e-05, "step": 289460 }, { "epoch": 82.16576781152428, "grad_norm": 0.0008755746530368924, "learning_rate": 1.7867158671586717e-05, "loss": 1.2123771011829376e-05, "step": 289470 }, { "epoch": 82.16860630144762, "grad_norm": 0.0016753077507019043, "learning_rate": 1.7864320181663355e-05, "loss": 9.537395089864731e-05, "step": 289480 }, { "epoch": 82.17144479137099, "grad_norm": 0.007691546343266964, "learning_rate": 1.7861481691739997e-05, "loss": 3.518275916576385e-05, "step": 289490 }, { "epoch": 82.17428328129435, "grad_norm": 0.0022558702621608973, "learning_rate": 1.7858643201816635e-05, "loss": 2.6211701333522795e-05, "step": 289500 }, { "epoch": 82.17428328129435, "eval_accuracy": 0.9874101863038087, "eval_loss": 0.05618131533265114, "eval_runtime": 35.5167, "eval_samples_per_second": 442.806, "eval_steps_per_second": 6.926, "step": 289500 }, { "epoch": 82.17712177121771, "grad_norm": 0.00344542576931417, "learning_rate": 1.7855804711893273e-05, "loss": 1.2014061212539673e-05, "step": 289510 }, { "epoch": 82.17996026114108, "grad_norm": 0.0014060774119570851, "learning_rate": 1.7852966221969914e-05, "loss": 1.1455826461315155e-05, "step": 289520 }, { "epoch": 82.18279875106444, "grad_norm": 0.002727795159444213, "learning_rate": 1.7850127732046552e-05, "loss": 1.3552606105804443e-05, "step": 289530 }, { "epoch": 82.18563724098779, "grad_norm": 0.00041513776523061097, "learning_rate": 1.784728924212319e-05, "loss": 1.4398433268070222e-05, "step": 289540 }, { "epoch": 82.18847573091115, "grad_norm": 0.0021025268360972404, "learning_rate": 1.784445075219983e-05, "loss": 8.287951350212097e-05, "step": 289550 }, { "epoch": 82.19131422083451, "grad_norm": 0.020587127655744553, "learning_rate": 1.784161226227647e-05, "loss": 3.3847056329250336e-05, "step": 289560 }, { "epoch": 82.19415271075788, "grad_norm": 0.0018232687143608928, "learning_rate": 1.783877377235311e-05, "loss": 1.2310408055782319e-05, "step": 289570 }, { "epoch": 82.19699120068124, "grad_norm": 0.012190507724881172, "learning_rate": 1.783593528242975e-05, "loss": 2.2804923355579375e-05, "step": 289580 }, { "epoch": 82.1998296906046, "grad_norm": 0.02348177134990692, "learning_rate": 1.7833096792506387e-05, "loss": 1.8808431923389436e-05, "step": 289590 }, { "epoch": 82.20266818052797, "grad_norm": 0.001118206069804728, "learning_rate": 1.7830258302583028e-05, "loss": 2.6295334100723268e-05, "step": 289600 }, { "epoch": 82.20550667045131, "grad_norm": 0.0008581670699641109, "learning_rate": 1.7827419812659666e-05, "loss": 5.9355422854423524e-05, "step": 289610 }, { "epoch": 82.20834516037468, "grad_norm": 0.0026130948681384325, "learning_rate": 1.7824581322736307e-05, "loss": 1.3890489935874938e-05, "step": 289620 }, { "epoch": 82.21118365029804, "grad_norm": 0.0009550384129397571, "learning_rate": 1.7821742832812942e-05, "loss": 1.5424005687236787e-05, "step": 289630 }, { "epoch": 82.2140221402214, "grad_norm": 0.005848814733326435, "learning_rate": 1.7818904342889583e-05, "loss": 1.0518543422222138e-05, "step": 289640 }, { "epoch": 82.21686063014477, "grad_norm": 0.0031011321116238832, "learning_rate": 1.7816065852966225e-05, "loss": 2.6107020676136018e-05, "step": 289650 }, { "epoch": 82.21969912006813, "grad_norm": 0.0008435424533672631, "learning_rate": 1.7813227363042863e-05, "loss": 9.190663695335388e-06, "step": 289660 }, { "epoch": 82.22253760999149, "grad_norm": 0.002499853726476431, "learning_rate": 1.78103888731195e-05, "loss": 2.0891614258289338e-05, "step": 289670 }, { "epoch": 82.22537609991484, "grad_norm": 0.006394925992935896, "learning_rate": 1.780755038319614e-05, "loss": 8.174777030944825e-06, "step": 289680 }, { "epoch": 82.2282145898382, "grad_norm": 0.0010563459945842624, "learning_rate": 1.780471189327278e-05, "loss": 7.28461891412735e-06, "step": 289690 }, { "epoch": 82.23105307976157, "grad_norm": 0.00404254300519824, "learning_rate": 1.780187340334942e-05, "loss": 4.0357932448387146e-05, "step": 289700 }, { "epoch": 82.23389156968493, "grad_norm": 0.0021975762210786343, "learning_rate": 1.7799034913426056e-05, "loss": 1.795124262571335e-05, "step": 289710 }, { "epoch": 82.23673005960829, "grad_norm": 0.014024103060364723, "learning_rate": 1.7796196423502697e-05, "loss": 1.1481530964374543e-05, "step": 289720 }, { "epoch": 82.23956854953165, "grad_norm": 0.011706752702593803, "learning_rate": 1.7793357933579335e-05, "loss": 1.0632164776325227e-05, "step": 289730 }, { "epoch": 82.242407039455, "grad_norm": 0.0033374184276908636, "learning_rate": 1.7790519443655977e-05, "loss": 2.0636245608329773e-05, "step": 289740 }, { "epoch": 82.24524552937837, "grad_norm": 0.0022859894670546055, "learning_rate": 1.7787680953732615e-05, "loss": 1.983996480703354e-05, "step": 289750 }, { "epoch": 82.24808401930173, "grad_norm": 0.0019960012286901474, "learning_rate": 1.7784842463809253e-05, "loss": 1.2290850281715394e-05, "step": 289760 }, { "epoch": 82.25092250922509, "grad_norm": 0.0006426101317629218, "learning_rate": 1.7782003973885894e-05, "loss": 2.434235066175461e-05, "step": 289770 }, { "epoch": 82.25376099914845, "grad_norm": 0.00136626570019871, "learning_rate": 1.7779165483962532e-05, "loss": 8.665770292282104e-06, "step": 289780 }, { "epoch": 82.25659948907182, "grad_norm": 0.0016848333179950714, "learning_rate": 1.7776326994039173e-05, "loss": 1.1792033910751343e-05, "step": 289790 }, { "epoch": 82.25943797899518, "grad_norm": 0.0013099535135552287, "learning_rate": 1.777348850411581e-05, "loss": 5.899928510189056e-06, "step": 289800 }, { "epoch": 82.26227646891853, "grad_norm": 0.01086276862770319, "learning_rate": 1.777065001419245e-05, "loss": 1.18304044008255e-05, "step": 289810 }, { "epoch": 82.26511495884189, "grad_norm": 0.0020153108052909374, "learning_rate": 1.776781152426909e-05, "loss": 7.678940892219544e-06, "step": 289820 }, { "epoch": 82.26795344876525, "grad_norm": 0.0006477105780504644, "learning_rate": 1.776497303434573e-05, "loss": 9.969621896743774e-06, "step": 289830 }, { "epoch": 82.27079193868862, "grad_norm": 0.001994789345189929, "learning_rate": 1.7762134544422367e-05, "loss": 6.0087069869041445e-06, "step": 289840 }, { "epoch": 82.27363042861198, "grad_norm": 0.0029438436031341553, "learning_rate": 1.7759296054499008e-05, "loss": 1.850072294473648e-05, "step": 289850 }, { "epoch": 82.27646891853534, "grad_norm": 0.0018526572966948152, "learning_rate": 1.7756457564575646e-05, "loss": 7.536262273788452e-06, "step": 289860 }, { "epoch": 82.2793074084587, "grad_norm": 0.0031629169825464487, "learning_rate": 1.7753619074652287e-05, "loss": 1.86307355761528e-05, "step": 289870 }, { "epoch": 82.28214589838205, "grad_norm": 0.0007995313499122858, "learning_rate": 1.7750780584728925e-05, "loss": 1.0155700147151947e-05, "step": 289880 }, { "epoch": 82.28498438830542, "grad_norm": 0.0014055969659239054, "learning_rate": 1.7747942094805563e-05, "loss": 9.106472134590149e-06, "step": 289890 }, { "epoch": 82.28782287822878, "grad_norm": 0.003984510898590088, "learning_rate": 1.7745103604882205e-05, "loss": 1.1066347360610962e-05, "step": 289900 }, { "epoch": 82.29066136815214, "grad_norm": 0.0006696299533359706, "learning_rate": 1.7742265114958843e-05, "loss": 8.276291191577912e-06, "step": 289910 }, { "epoch": 82.2934998580755, "grad_norm": 0.0013343002647161484, "learning_rate": 1.773942662503548e-05, "loss": 9.083189070224762e-06, "step": 289920 }, { "epoch": 82.29633834799887, "grad_norm": 0.0025231088511645794, "learning_rate": 1.773658813511212e-05, "loss": 6.270594894886017e-06, "step": 289930 }, { "epoch": 82.29917683792223, "grad_norm": 0.005365937482565641, "learning_rate": 1.773374964518876e-05, "loss": 8.272938430309296e-06, "step": 289940 }, { "epoch": 82.30201532784558, "grad_norm": 0.0011138884583488107, "learning_rate": 1.77309111552654e-05, "loss": 1.1370703577995301e-05, "step": 289950 }, { "epoch": 82.30485381776894, "grad_norm": 0.010852992534637451, "learning_rate": 1.772807266534204e-05, "loss": 1.2726709246635438e-05, "step": 289960 }, { "epoch": 82.3076923076923, "grad_norm": 0.0071370904333889484, "learning_rate": 1.7725234175418677e-05, "loss": 2.7803704142570496e-05, "step": 289970 }, { "epoch": 82.31053079761567, "grad_norm": 0.0022913829889148474, "learning_rate": 1.7722395685495315e-05, "loss": 1.1363625526428223e-05, "step": 289980 }, { "epoch": 82.31336928753903, "grad_norm": 0.0011966770980507135, "learning_rate": 1.7719557195571957e-05, "loss": 1.1014007031917573e-05, "step": 289990 }, { "epoch": 82.3162077774624, "grad_norm": 0.00399529654532671, "learning_rate": 1.7716718705648598e-05, "loss": 9.835138916969299e-06, "step": 290000 }, { "epoch": 82.3162077774624, "eval_accuracy": 0.9877281108920964, "eval_loss": 0.052793119102716446, "eval_runtime": 36.1951, "eval_samples_per_second": 434.506, "eval_steps_per_second": 6.796, "step": 290000 }, { "epoch": 82.31904626738574, "grad_norm": 0.0025295319501310587, "learning_rate": 1.7713880215725233e-05, "loss": 9.103491902351379e-06, "step": 290010 }, { "epoch": 82.3218847573091, "grad_norm": 0.000799448462203145, "learning_rate": 1.7711041725801874e-05, "loss": 1.1840835213661194e-05, "step": 290020 }, { "epoch": 82.32472324723247, "grad_norm": 0.0013872137060388923, "learning_rate": 1.7708203235878512e-05, "loss": 1.1018849909305572e-05, "step": 290030 }, { "epoch": 82.32756173715583, "grad_norm": 0.0031858347356319427, "learning_rate": 1.7705364745955154e-05, "loss": 1.3185292482376099e-05, "step": 290040 }, { "epoch": 82.3304002270792, "grad_norm": 0.006588018964976072, "learning_rate": 1.770252625603179e-05, "loss": 1.2829527258872986e-05, "step": 290050 }, { "epoch": 82.33323871700256, "grad_norm": 0.001428307848982513, "learning_rate": 1.769968776610843e-05, "loss": 8.794479072093964e-06, "step": 290060 }, { "epoch": 82.33607720692592, "grad_norm": 0.001298220013268292, "learning_rate": 1.769684927618507e-05, "loss": 8.079037070274352e-06, "step": 290070 }, { "epoch": 82.33891569684927, "grad_norm": 0.00438068900257349, "learning_rate": 1.769401078626171e-05, "loss": 1.2828782200813293e-05, "step": 290080 }, { "epoch": 82.34175418677263, "grad_norm": 0.0017749677645042539, "learning_rate": 1.769117229633835e-05, "loss": 1.0840781033039094e-05, "step": 290090 }, { "epoch": 82.344592676696, "grad_norm": 0.0008869463345035911, "learning_rate": 1.7688333806414988e-05, "loss": 1.2121349573135377e-05, "step": 290100 }, { "epoch": 82.34743116661936, "grad_norm": 0.001485146931372583, "learning_rate": 1.7685495316491626e-05, "loss": 1.8673390150070192e-05, "step": 290110 }, { "epoch": 82.35026965654272, "grad_norm": 0.013990889303386211, "learning_rate": 1.7682656826568268e-05, "loss": 1.6351975500583648e-05, "step": 290120 }, { "epoch": 82.35310814646608, "grad_norm": 0.0018399786204099655, "learning_rate": 1.7679818336644906e-05, "loss": 4.028081893920898e-05, "step": 290130 }, { "epoch": 82.35594663638945, "grad_norm": 0.00037309579784050584, "learning_rate": 1.7676979846721544e-05, "loss": 1.4781951904296875e-05, "step": 290140 }, { "epoch": 82.3587851263128, "grad_norm": 0.0008985360618680716, "learning_rate": 1.7674141356798185e-05, "loss": 1.3947859406471252e-05, "step": 290150 }, { "epoch": 82.36162361623616, "grad_norm": 0.042951468378305435, "learning_rate": 1.7671302866874823e-05, "loss": 2.9667094349861144e-05, "step": 290160 }, { "epoch": 82.36446210615952, "grad_norm": 0.002257185522466898, "learning_rate": 1.7668464376951464e-05, "loss": 2.4498440325260162e-05, "step": 290170 }, { "epoch": 82.36730059608288, "grad_norm": 0.003420493798330426, "learning_rate": 1.7665625887028102e-05, "loss": 2.348758280277252e-05, "step": 290180 }, { "epoch": 82.37013908600625, "grad_norm": 0.00029525082209147513, "learning_rate": 1.766278739710474e-05, "loss": 9.360909461975098e-06, "step": 290190 }, { "epoch": 82.37297757592961, "grad_norm": 0.010540662333369255, "learning_rate": 1.765994890718138e-05, "loss": 1.677908003330231e-05, "step": 290200 }, { "epoch": 82.37581606585297, "grad_norm": 0.02775215171277523, "learning_rate": 1.765711041725802e-05, "loss": 0.00010993406176567078, "step": 290210 }, { "epoch": 82.37865455577632, "grad_norm": 0.004769919440150261, "learning_rate": 1.7654271927334658e-05, "loss": 0.00022785346955060958, "step": 290220 }, { "epoch": 82.38149304569968, "grad_norm": 0.004060169216245413, "learning_rate": 1.76514334374113e-05, "loss": 0.00012720730155706405, "step": 290230 }, { "epoch": 82.38433153562305, "grad_norm": 0.007199675310403109, "learning_rate": 1.7648594947487937e-05, "loss": 0.0005120454356074333, "step": 290240 }, { "epoch": 82.38717002554641, "grad_norm": 0.4080602526664734, "learning_rate": 1.764575645756458e-05, "loss": 0.002153703570365906, "step": 290250 }, { "epoch": 82.39000851546977, "grad_norm": 0.12752540409564972, "learning_rate": 1.7642917967641216e-05, "loss": 0.0013297729194164277, "step": 290260 }, { "epoch": 82.39284700539314, "grad_norm": 0.005199226085096598, "learning_rate": 1.7640079477717854e-05, "loss": 0.007339802384376526, "step": 290270 }, { "epoch": 82.39568549531649, "grad_norm": 0.490189790725708, "learning_rate": 1.7637240987794496e-05, "loss": 0.0009326012805104256, "step": 290280 }, { "epoch": 82.39852398523985, "grad_norm": 14.669075965881348, "learning_rate": 1.7634402497871134e-05, "loss": 0.0035248931497335436, "step": 290290 }, { "epoch": 82.40136247516321, "grad_norm": 0.04620184376835823, "learning_rate": 1.7631564007947775e-05, "loss": 0.0007105572149157524, "step": 290300 }, { "epoch": 82.40420096508657, "grad_norm": 0.008708338253200054, "learning_rate": 1.762872551802441e-05, "loss": 0.00012453924864530563, "step": 290310 }, { "epoch": 82.40703945500994, "grad_norm": 0.03011120669543743, "learning_rate": 1.762588702810105e-05, "loss": 0.002846446819603443, "step": 290320 }, { "epoch": 82.4098779449333, "grad_norm": 0.0004927797126583755, "learning_rate": 1.7623048538177692e-05, "loss": 5.25202602148056e-05, "step": 290330 }, { "epoch": 82.41271643485666, "grad_norm": 12.573472023010254, "learning_rate": 1.762021004825433e-05, "loss": 0.005464616417884827, "step": 290340 }, { "epoch": 82.41555492478001, "grad_norm": 15.200692176818848, "learning_rate": 1.761737155833097e-05, "loss": 0.002930902875959873, "step": 290350 }, { "epoch": 82.41839341470337, "grad_norm": 0.20242159068584442, "learning_rate": 1.7614533068407606e-05, "loss": 3.5381689667701724e-05, "step": 290360 }, { "epoch": 82.42123190462674, "grad_norm": 0.02245601825416088, "learning_rate": 1.7611694578484248e-05, "loss": 0.0069811508059501644, "step": 290370 }, { "epoch": 82.4240703945501, "grad_norm": 0.009347489103674889, "learning_rate": 1.760885608856089e-05, "loss": 5.579143762588501e-05, "step": 290380 }, { "epoch": 82.42690888447346, "grad_norm": 0.00636763172224164, "learning_rate": 1.7606017598637524e-05, "loss": 2.430696040391922e-05, "step": 290390 }, { "epoch": 82.42974737439683, "grad_norm": 0.004753479268401861, "learning_rate": 1.7603179108714165e-05, "loss": 1.1940672993659974e-05, "step": 290400 }, { "epoch": 82.43258586432019, "grad_norm": 0.0018262373050674796, "learning_rate": 1.7600340618790803e-05, "loss": 0.004041790217161179, "step": 290410 }, { "epoch": 82.43542435424354, "grad_norm": 0.006863721180707216, "learning_rate": 1.7597502128867444e-05, "loss": 5.6848861277103424e-05, "step": 290420 }, { "epoch": 82.4382628441669, "grad_norm": 0.0021717497147619724, "learning_rate": 1.7594663638944082e-05, "loss": 2.6716850697994232e-05, "step": 290430 }, { "epoch": 82.44110133409026, "grad_norm": 0.0019052594434469938, "learning_rate": 1.759182514902072e-05, "loss": 0.0009440302848815918, "step": 290440 }, { "epoch": 82.44393982401363, "grad_norm": 0.00046960252802819014, "learning_rate": 1.7588986659097362e-05, "loss": 1.8878467381000517e-05, "step": 290450 }, { "epoch": 82.44677831393699, "grad_norm": 0.015180226415395737, "learning_rate": 1.7586148169174e-05, "loss": 1.818891614675522e-05, "step": 290460 }, { "epoch": 82.44961680386035, "grad_norm": 0.0019344870233908296, "learning_rate": 1.758330967925064e-05, "loss": 3.2168440520763396e-05, "step": 290470 }, { "epoch": 82.4524552937837, "grad_norm": 0.022852936759591103, "learning_rate": 1.758047118932728e-05, "loss": 0.0003778308629989624, "step": 290480 }, { "epoch": 82.45529378370706, "grad_norm": 0.002108577871695161, "learning_rate": 1.7577632699403917e-05, "loss": 1.6056932508945465e-05, "step": 290490 }, { "epoch": 82.45813227363043, "grad_norm": 0.01792205683887005, "learning_rate": 1.757479420948056e-05, "loss": 4.524216055870056e-05, "step": 290500 }, { "epoch": 82.45813227363043, "eval_accuracy": 0.9843581102562472, "eval_loss": 0.06738296896219254, "eval_runtime": 35.6517, "eval_samples_per_second": 441.129, "eval_steps_per_second": 6.9, "step": 290500 }, { "epoch": 82.46097076355379, "grad_norm": 0.004278857726603746, "learning_rate": 1.7571955719557197e-05, "loss": 4.18221578001976e-05, "step": 290510 }, { "epoch": 82.46380925347715, "grad_norm": 0.007216992788016796, "learning_rate": 1.7569117229633835e-05, "loss": 1.3935752213001251e-05, "step": 290520 }, { "epoch": 82.46664774340051, "grad_norm": 0.008454910479485989, "learning_rate": 1.7566278739710476e-05, "loss": 6.225276738405227e-05, "step": 290530 }, { "epoch": 82.46948623332388, "grad_norm": 0.0024063962046056986, "learning_rate": 1.7563440249787114e-05, "loss": 3.758855164051056e-05, "step": 290540 }, { "epoch": 82.47232472324723, "grad_norm": 0.0014625159092247486, "learning_rate": 1.7560601759863755e-05, "loss": 2.8769485652446747e-05, "step": 290550 }, { "epoch": 82.47516321317059, "grad_norm": 0.009016413241624832, "learning_rate": 1.7557763269940393e-05, "loss": 6.283614784479142e-05, "step": 290560 }, { "epoch": 82.47800170309395, "grad_norm": 0.007374242879450321, "learning_rate": 1.755492478001703e-05, "loss": 1.4578737318515777e-05, "step": 290570 }, { "epoch": 82.48084019301731, "grad_norm": 0.2865677773952484, "learning_rate": 1.7552086290093673e-05, "loss": 8.931737393140793e-05, "step": 290580 }, { "epoch": 82.48367868294068, "grad_norm": 0.004048585891723633, "learning_rate": 1.754924780017031e-05, "loss": 0.00013366062194108963, "step": 290590 }, { "epoch": 82.48651717286404, "grad_norm": 0.0015719837974756956, "learning_rate": 1.754640931024695e-05, "loss": 2.0816735923290252e-05, "step": 290600 }, { "epoch": 82.4893556627874, "grad_norm": 0.0030938286799937487, "learning_rate": 1.7543570820323587e-05, "loss": 2.454947680234909e-05, "step": 290610 }, { "epoch": 82.49219415271075, "grad_norm": 0.01146309357136488, "learning_rate": 1.7540732330400228e-05, "loss": 2.4642236530780794e-05, "step": 290620 }, { "epoch": 82.49503264263411, "grad_norm": 0.0045545874163508415, "learning_rate": 1.753789384047687e-05, "loss": 3.173723816871643e-05, "step": 290630 }, { "epoch": 82.49787113255748, "grad_norm": 0.03397361934185028, "learning_rate": 1.7535055350553507e-05, "loss": 3.691110759973526e-05, "step": 290640 }, { "epoch": 82.50070962248084, "grad_norm": 0.0076637910678982735, "learning_rate": 1.7532216860630145e-05, "loss": 0.0010022474452853203, "step": 290650 }, { "epoch": 82.5035481124042, "grad_norm": 0.003955052699893713, "learning_rate": 1.7529378370706783e-05, "loss": 0.00013047680258750915, "step": 290660 }, { "epoch": 82.50638660232757, "grad_norm": 0.0038569041062146425, "learning_rate": 1.7526539880783425e-05, "loss": 6.509982049465179e-05, "step": 290670 }, { "epoch": 82.50922509225093, "grad_norm": 0.002424046862870455, "learning_rate": 1.7523701390860066e-05, "loss": 4.2539648711681365e-05, "step": 290680 }, { "epoch": 82.51206358217428, "grad_norm": 0.019609272480010986, "learning_rate": 1.75208629009367e-05, "loss": 2.1517835557460784e-05, "step": 290690 }, { "epoch": 82.51490207209764, "grad_norm": 0.0014131433563306928, "learning_rate": 1.7518024411013342e-05, "loss": 1.624021679162979e-05, "step": 290700 }, { "epoch": 82.517740562021, "grad_norm": 0.0020774444565176964, "learning_rate": 1.751518592108998e-05, "loss": 2.101082354784012e-05, "step": 290710 }, { "epoch": 82.52057905194437, "grad_norm": 0.08099626004695892, "learning_rate": 1.751234743116662e-05, "loss": 3.7723593413829805e-05, "step": 290720 }, { "epoch": 82.52341754186773, "grad_norm": 0.004683698993176222, "learning_rate": 1.750950894124326e-05, "loss": 1.8565356731414796e-05, "step": 290730 }, { "epoch": 82.52625603179109, "grad_norm": 0.001745110028423369, "learning_rate": 1.7506670451319897e-05, "loss": 2.6981905102729796e-05, "step": 290740 }, { "epoch": 82.52909452171444, "grad_norm": 0.013214832171797752, "learning_rate": 1.750383196139654e-05, "loss": 2.040006220340729e-05, "step": 290750 }, { "epoch": 82.5319330116378, "grad_norm": 0.07065624743700027, "learning_rate": 1.7500993471473177e-05, "loss": 2.7331523597240448e-05, "step": 290760 }, { "epoch": 82.53477150156117, "grad_norm": 0.0038450625725090504, "learning_rate": 1.7498154981549815e-05, "loss": 2.7098879218101502e-05, "step": 290770 }, { "epoch": 82.53760999148453, "grad_norm": 0.02497211843729019, "learning_rate": 1.7495316491626456e-05, "loss": 3.714598715305328e-05, "step": 290780 }, { "epoch": 82.54044848140789, "grad_norm": 0.00546979857608676, "learning_rate": 1.7492478001703094e-05, "loss": 1.3840384781360626e-05, "step": 290790 }, { "epoch": 82.54328697133126, "grad_norm": 0.0018206326058134437, "learning_rate": 1.7489639511779735e-05, "loss": 5.96262514591217e-05, "step": 290800 }, { "epoch": 82.54612546125462, "grad_norm": 0.01585419476032257, "learning_rate": 1.7486801021856373e-05, "loss": 1.7921067774295806e-05, "step": 290810 }, { "epoch": 82.54896395117797, "grad_norm": 0.006262145005166531, "learning_rate": 1.748396253193301e-05, "loss": 1.2660957872867585e-05, "step": 290820 }, { "epoch": 82.55180244110133, "grad_norm": 0.022886473685503006, "learning_rate": 1.7481124042009653e-05, "loss": 1.1923350393772126e-05, "step": 290830 }, { "epoch": 82.5546409310247, "grad_norm": 0.0035183478612452745, "learning_rate": 1.747828555208629e-05, "loss": 1.9102916121482848e-05, "step": 290840 }, { "epoch": 82.55747942094806, "grad_norm": 0.0005244649364612997, "learning_rate": 1.7475447062162932e-05, "loss": 1.4517642557621003e-05, "step": 290850 }, { "epoch": 82.56031791087142, "grad_norm": 0.00683516263961792, "learning_rate": 1.7472608572239567e-05, "loss": 1.1001899838447571e-05, "step": 290860 }, { "epoch": 82.56315640079478, "grad_norm": 0.005748220719397068, "learning_rate": 1.7469770082316208e-05, "loss": 3.5985186696052554e-05, "step": 290870 }, { "epoch": 82.56599489071814, "grad_norm": 0.04866720363497734, "learning_rate": 1.746693159239285e-05, "loss": 1.5462376177310942e-05, "step": 290880 }, { "epoch": 82.5688333806415, "grad_norm": 0.002941154409199953, "learning_rate": 1.7464093102469487e-05, "loss": 0.00018540415912866593, "step": 290890 }, { "epoch": 82.57167187056486, "grad_norm": 0.04032627120614052, "learning_rate": 1.7461254612546125e-05, "loss": 2.1895021200180052e-05, "step": 290900 }, { "epoch": 82.57451036048822, "grad_norm": 0.02014247700572014, "learning_rate": 1.7458416122622763e-05, "loss": 3.68485227227211e-05, "step": 290910 }, { "epoch": 82.57734885041158, "grad_norm": 0.038176946341991425, "learning_rate": 1.7455577632699405e-05, "loss": 2.0223110914230348e-05, "step": 290920 }, { "epoch": 82.58018734033494, "grad_norm": 0.007590907160192728, "learning_rate": 1.7452739142776046e-05, "loss": 1.49579718708992e-05, "step": 290930 }, { "epoch": 82.58302583025831, "grad_norm": 0.03830573707818985, "learning_rate": 1.7449900652852684e-05, "loss": 3.0054710805416107e-05, "step": 290940 }, { "epoch": 82.58586432018166, "grad_norm": 0.007556887809187174, "learning_rate": 1.7447062162929322e-05, "loss": 2.1286122500896453e-05, "step": 290950 }, { "epoch": 82.58870281010502, "grad_norm": 0.006738942116498947, "learning_rate": 1.744422367300596e-05, "loss": 1.4076568186283112e-05, "step": 290960 }, { "epoch": 82.59154130002838, "grad_norm": 0.005457004997879267, "learning_rate": 1.74413851830826e-05, "loss": 1.362282782793045e-05, "step": 290970 }, { "epoch": 82.59437978995174, "grad_norm": 0.010878023691475391, "learning_rate": 1.743854669315924e-05, "loss": 1.4649517834186553e-05, "step": 290980 }, { "epoch": 82.59721827987511, "grad_norm": 0.0013977993512526155, "learning_rate": 1.7435708203235877e-05, "loss": 1.6617216169834136e-05, "step": 290990 }, { "epoch": 82.60005676979847, "grad_norm": 0.006155961658805609, "learning_rate": 1.743286971331252e-05, "loss": 5.3802691400051114e-05, "step": 291000 }, { "epoch": 82.60005676979847, "eval_accuracy": 0.9873466013861512, "eval_loss": 0.054172057658433914, "eval_runtime": 35.8576, "eval_samples_per_second": 438.596, "eval_steps_per_second": 6.86, "step": 291000 }, { "epoch": 82.60289525972183, "grad_norm": 0.0009905307088047266, "learning_rate": 1.7430031223389157e-05, "loss": 2.2097863256931306e-05, "step": 291010 }, { "epoch": 82.60573374964518, "grad_norm": 0.0006283179391175508, "learning_rate": 1.7427192733465798e-05, "loss": 1.2758374214172364e-05, "step": 291020 }, { "epoch": 82.60857223956855, "grad_norm": 0.0013331917580217123, "learning_rate": 1.7424354243542436e-05, "loss": 1.3556703925132751e-05, "step": 291030 }, { "epoch": 82.61141072949191, "grad_norm": 0.007265887688845396, "learning_rate": 1.7421515753619074e-05, "loss": 1.9854679703712463e-05, "step": 291040 }, { "epoch": 82.61424921941527, "grad_norm": 0.0013344574254006147, "learning_rate": 1.7418677263695716e-05, "loss": 5.353093147277832e-05, "step": 291050 }, { "epoch": 82.61708770933863, "grad_norm": 0.010618298314511776, "learning_rate": 1.7415838773772354e-05, "loss": 3.139320760965347e-05, "step": 291060 }, { "epoch": 82.619926199262, "grad_norm": 0.0006356539088301361, "learning_rate": 1.741300028384899e-05, "loss": 1.8979422748088837e-05, "step": 291070 }, { "epoch": 82.62276468918536, "grad_norm": 0.0003009786014445126, "learning_rate": 1.7410161793925633e-05, "loss": 2.4478882551193237e-05, "step": 291080 }, { "epoch": 82.62560317910871, "grad_norm": 0.004823788534849882, "learning_rate": 1.740732330400227e-05, "loss": 1.0250508785247802e-05, "step": 291090 }, { "epoch": 82.62844166903207, "grad_norm": 0.008464633487164974, "learning_rate": 1.7404484814078912e-05, "loss": 1.480039209127426e-05, "step": 291100 }, { "epoch": 82.63128015895543, "grad_norm": 0.0006535882130265236, "learning_rate": 1.740164632415555e-05, "loss": 9.580329060554504e-06, "step": 291110 }, { "epoch": 82.6341186488788, "grad_norm": 0.01477288268506527, "learning_rate": 1.7398807834232188e-05, "loss": 1.6245245933532716e-05, "step": 291120 }, { "epoch": 82.63695713880216, "grad_norm": 0.0012481010053306818, "learning_rate": 1.739596934430883e-05, "loss": 1.346934586763382e-05, "step": 291130 }, { "epoch": 82.63979562872552, "grad_norm": 0.0035555842332541943, "learning_rate": 1.7393130854385468e-05, "loss": 1.5272758901119232e-05, "step": 291140 }, { "epoch": 82.64263411864889, "grad_norm": 0.020686691626906395, "learning_rate": 1.739029236446211e-05, "loss": 1.5841983258724213e-05, "step": 291150 }, { "epoch": 82.64547260857223, "grad_norm": 0.0011514112120494246, "learning_rate": 1.7387453874538744e-05, "loss": 1.679975539445877e-05, "step": 291160 }, { "epoch": 82.6483110984956, "grad_norm": 0.018070608377456665, "learning_rate": 1.7384615384615385e-05, "loss": 1.8906965851783753e-05, "step": 291170 }, { "epoch": 82.65114958841896, "grad_norm": 0.0009066356578841805, "learning_rate": 1.7381776894692026e-05, "loss": 1.1010095477104187e-05, "step": 291180 }, { "epoch": 82.65398807834232, "grad_norm": 0.031096220016479492, "learning_rate": 1.7378938404768664e-05, "loss": 2.2890232503414155e-05, "step": 291190 }, { "epoch": 82.65682656826569, "grad_norm": 0.001836021663621068, "learning_rate": 1.7376099914845302e-05, "loss": 1.5490315854549407e-05, "step": 291200 }, { "epoch": 82.65966505818905, "grad_norm": 0.0032338681630790234, "learning_rate": 1.737326142492194e-05, "loss": 1.4997832477092742e-05, "step": 291210 }, { "epoch": 82.6625035481124, "grad_norm": 0.003538718679919839, "learning_rate": 1.737042293499858e-05, "loss": 1.6713887453079225e-05, "step": 291220 }, { "epoch": 82.66534203803576, "grad_norm": 0.004052731208503246, "learning_rate": 1.7367584445075223e-05, "loss": 1.9720755517482758e-05, "step": 291230 }, { "epoch": 82.66818052795912, "grad_norm": 0.00613973755389452, "learning_rate": 1.7364745955151858e-05, "loss": 1.2187659740447998e-05, "step": 291240 }, { "epoch": 82.67101901788249, "grad_norm": 0.01628563180565834, "learning_rate": 1.73619074652285e-05, "loss": 1.8792785704135893e-05, "step": 291250 }, { "epoch": 82.67385750780585, "grad_norm": 0.0057900878600776196, "learning_rate": 1.7359068975305137e-05, "loss": 1.9727647304534913e-05, "step": 291260 }, { "epoch": 82.67669599772921, "grad_norm": 0.0008248106460087001, "learning_rate": 1.735623048538178e-05, "loss": 1.0813400149345397e-05, "step": 291270 }, { "epoch": 82.67953448765257, "grad_norm": 0.00477262120693922, "learning_rate": 1.7353391995458416e-05, "loss": 2.872161567211151e-05, "step": 291280 }, { "epoch": 82.68237297757592, "grad_norm": 0.006049261894077063, "learning_rate": 1.7350553505535054e-05, "loss": 1.1977925896644593e-05, "step": 291290 }, { "epoch": 82.68521146749929, "grad_norm": 0.00028539003687910736, "learning_rate": 1.7347715015611696e-05, "loss": 1.57870352268219e-05, "step": 291300 }, { "epoch": 82.68804995742265, "grad_norm": 0.005825815722346306, "learning_rate": 1.7344876525688334e-05, "loss": 1.6706250607967377e-05, "step": 291310 }, { "epoch": 82.69088844734601, "grad_norm": 0.0007621750701218843, "learning_rate": 1.7342038035764975e-05, "loss": 2.617575228214264e-05, "step": 291320 }, { "epoch": 82.69372693726937, "grad_norm": 0.0013814723351970315, "learning_rate": 1.7339199545841613e-05, "loss": 9.922683238983154e-06, "step": 291330 }, { "epoch": 82.69656542719274, "grad_norm": 0.0006613894365727901, "learning_rate": 1.733636105591825e-05, "loss": 1.07545405626297e-05, "step": 291340 }, { "epoch": 82.6994039171161, "grad_norm": 0.004884431138634682, "learning_rate": 1.7333522565994892e-05, "loss": 1.3258308172225952e-05, "step": 291350 }, { "epoch": 82.70224240703945, "grad_norm": 0.0030618617311120033, "learning_rate": 1.733068407607153e-05, "loss": 1.1958181858062744e-05, "step": 291360 }, { "epoch": 82.70508089696281, "grad_norm": 0.005691984668374062, "learning_rate": 1.732784558614817e-05, "loss": 8.890591561794282e-06, "step": 291370 }, { "epoch": 82.70791938688618, "grad_norm": 0.0009467456839047372, "learning_rate": 1.732500709622481e-05, "loss": 7.73537904024124e-06, "step": 291380 }, { "epoch": 82.71075787680954, "grad_norm": 0.004490801598876715, "learning_rate": 1.7322168606301448e-05, "loss": 7.740054279565811e-05, "step": 291390 }, { "epoch": 82.7135963667329, "grad_norm": 0.0036864476278424263, "learning_rate": 1.731933011637809e-05, "loss": 2.070106565952301e-05, "step": 291400 }, { "epoch": 82.71643485665626, "grad_norm": 0.007126172073185444, "learning_rate": 1.7316491626454727e-05, "loss": 2.5549717247486114e-05, "step": 291410 }, { "epoch": 82.71927334657963, "grad_norm": 0.00034374435199424624, "learning_rate": 1.7313653136531365e-05, "loss": 2.2940710186958314e-05, "step": 291420 }, { "epoch": 82.72211183650298, "grad_norm": 0.001651161815971136, "learning_rate": 1.7310814646608007e-05, "loss": 1.584254205226898e-05, "step": 291430 }, { "epoch": 82.72495032642634, "grad_norm": 0.0018907251069322228, "learning_rate": 1.7307976156684644e-05, "loss": 1.4397129416465759e-05, "step": 291440 }, { "epoch": 82.7277888163497, "grad_norm": 0.019253265112638474, "learning_rate": 1.7305137666761282e-05, "loss": 1.5252269804477692e-05, "step": 291450 }, { "epoch": 82.73062730627306, "grad_norm": 0.002320083323866129, "learning_rate": 1.730229917683792e-05, "loss": 1.2140907347202302e-05, "step": 291460 }, { "epoch": 82.73346579619643, "grad_norm": 0.005610703025013208, "learning_rate": 1.7299460686914562e-05, "loss": 1.712292432785034e-05, "step": 291470 }, { "epoch": 82.73630428611979, "grad_norm": 0.011961828917264938, "learning_rate": 1.7296622196991203e-05, "loss": 1.8889829516410827e-05, "step": 291480 }, { "epoch": 82.73914277604314, "grad_norm": 0.006352677009999752, "learning_rate": 1.729378370706784e-05, "loss": 1.6584806144237518e-05, "step": 291490 }, { "epoch": 82.7419812659665, "grad_norm": 0.004855662118643522, "learning_rate": 1.729094521714448e-05, "loss": 1.3463012874126434e-05, "step": 291500 }, { "epoch": 82.7419812659665, "eval_accuracy": 0.9879824505627265, "eval_loss": 0.05124958977103233, "eval_runtime": 36.1278, "eval_samples_per_second": 435.316, "eval_steps_per_second": 6.809, "step": 291500 }, { "epoch": 82.74481975588986, "grad_norm": 0.002802562201395631, "learning_rate": 1.728810672722112e-05, "loss": 2.6168860495090483e-05, "step": 291510 }, { "epoch": 82.74765824581323, "grad_norm": 0.0016746012261137366, "learning_rate": 1.728526823729776e-05, "loss": 1.3014674186706543e-05, "step": 291520 }, { "epoch": 82.75049673573659, "grad_norm": 0.005548429675400257, "learning_rate": 1.72824297473744e-05, "loss": 1.7908960580825804e-05, "step": 291530 }, { "epoch": 82.75333522565995, "grad_norm": 0.0027132651302963495, "learning_rate": 1.7279591257451035e-05, "loss": 1.3335049152374267e-05, "step": 291540 }, { "epoch": 82.75617371558332, "grad_norm": 0.0007013906142674387, "learning_rate": 1.7276752767527676e-05, "loss": 7.167644798755646e-06, "step": 291550 }, { "epoch": 82.75901220550666, "grad_norm": 0.0052689481526613235, "learning_rate": 1.7273914277604317e-05, "loss": 9.936466813087464e-06, "step": 291560 }, { "epoch": 82.76185069543003, "grad_norm": 0.0014057098887860775, "learning_rate": 1.7271075787680955e-05, "loss": 1.3032928109169006e-05, "step": 291570 }, { "epoch": 82.76468918535339, "grad_norm": 0.0011017596116289496, "learning_rate": 1.7268237297757593e-05, "loss": 1.698695123195648e-05, "step": 291580 }, { "epoch": 82.76752767527675, "grad_norm": 0.0024303654208779335, "learning_rate": 1.726539880783423e-05, "loss": 1.712776720523834e-05, "step": 291590 }, { "epoch": 82.77036616520012, "grad_norm": 0.0008410704904235899, "learning_rate": 1.7262560317910873e-05, "loss": 5.334056913852691e-06, "step": 291600 }, { "epoch": 82.77320465512348, "grad_norm": 0.0012826727470383048, "learning_rate": 1.7259721827987514e-05, "loss": 8.032098412513733e-06, "step": 291610 }, { "epoch": 82.77604314504684, "grad_norm": 0.01933041214942932, "learning_rate": 1.7256883338064152e-05, "loss": 1.7777830362319946e-05, "step": 291620 }, { "epoch": 82.77888163497019, "grad_norm": 0.0007485548267140985, "learning_rate": 1.725404484814079e-05, "loss": 6.9338828325271605e-06, "step": 291630 }, { "epoch": 82.78172012489355, "grad_norm": 0.0009134348947554827, "learning_rate": 1.7251206358217428e-05, "loss": 1.021958887577057e-05, "step": 291640 }, { "epoch": 82.78455861481692, "grad_norm": 0.006542829796671867, "learning_rate": 1.724836786829407e-05, "loss": 1.0171905159950256e-05, "step": 291650 }, { "epoch": 82.78739710474028, "grad_norm": 0.0019031293923035264, "learning_rate": 1.7245529378370707e-05, "loss": 8.756667375564575e-06, "step": 291660 }, { "epoch": 82.79023559466364, "grad_norm": 0.0060427226126194, "learning_rate": 1.7242690888447345e-05, "loss": 1.163184642791748e-05, "step": 291670 }, { "epoch": 82.793074084587, "grad_norm": 0.006752148270606995, "learning_rate": 1.7239852398523987e-05, "loss": 1.2837350368499756e-05, "step": 291680 }, { "epoch": 82.79591257451035, "grad_norm": 0.0030903187580406666, "learning_rate": 1.7237013908600625e-05, "loss": 1.2441724538803101e-05, "step": 291690 }, { "epoch": 82.79875106443372, "grad_norm": 0.001607949729077518, "learning_rate": 1.7234175418677266e-05, "loss": 5.187839269638061e-06, "step": 291700 }, { "epoch": 82.80158955435708, "grad_norm": 0.013560274615883827, "learning_rate": 1.7231336928753904e-05, "loss": 1.806821674108505e-05, "step": 291710 }, { "epoch": 82.80442804428044, "grad_norm": 0.0023673733230680227, "learning_rate": 1.7228498438830542e-05, "loss": 1.8262863159179686e-05, "step": 291720 }, { "epoch": 82.8072665342038, "grad_norm": 0.003125940915197134, "learning_rate": 1.7225659948907183e-05, "loss": 1.6887485980987548e-05, "step": 291730 }, { "epoch": 82.81010502412717, "grad_norm": 0.0011066466104239225, "learning_rate": 1.722282145898382e-05, "loss": 9.896792471408844e-06, "step": 291740 }, { "epoch": 82.81294351405053, "grad_norm": 0.0003482213069219142, "learning_rate": 1.721998296906046e-05, "loss": 8.646771311759948e-06, "step": 291750 }, { "epoch": 82.81578200397388, "grad_norm": 0.0010758255375549197, "learning_rate": 1.72171444791371e-05, "loss": 1.0474584996700287e-05, "step": 291760 }, { "epoch": 82.81862049389724, "grad_norm": 0.0029994871001690626, "learning_rate": 1.721430598921374e-05, "loss": 1.3143569231033325e-05, "step": 291770 }, { "epoch": 82.8214589838206, "grad_norm": 0.00031353184022009373, "learning_rate": 1.721146749929038e-05, "loss": 1.3162195682525634e-05, "step": 291780 }, { "epoch": 82.82429747374397, "grad_norm": 0.0020154837984591722, "learning_rate": 1.7208629009367018e-05, "loss": 2.16212123632431e-05, "step": 291790 }, { "epoch": 82.82713596366733, "grad_norm": 0.000660784135106951, "learning_rate": 1.7205790519443656e-05, "loss": 5.979090929031372e-06, "step": 291800 }, { "epoch": 82.8299744535907, "grad_norm": 0.001260610530152917, "learning_rate": 1.7202952029520297e-05, "loss": 1.0462850332260131e-05, "step": 291810 }, { "epoch": 82.83281294351406, "grad_norm": 0.0026913227047771215, "learning_rate": 1.7200113539596935e-05, "loss": 1.2398511171340942e-05, "step": 291820 }, { "epoch": 82.8356514334374, "grad_norm": 0.001439109561033547, "learning_rate": 1.7197275049673577e-05, "loss": 8.76113772392273e-06, "step": 291830 }, { "epoch": 82.83848992336077, "grad_norm": 0.014869431033730507, "learning_rate": 1.719443655975021e-05, "loss": 9.728409349918365e-06, "step": 291840 }, { "epoch": 82.84132841328413, "grad_norm": 0.0005369781865738332, "learning_rate": 1.7191598069826853e-05, "loss": 8.909404277801513e-06, "step": 291850 }, { "epoch": 82.8441669032075, "grad_norm": 0.0005990783683955669, "learning_rate": 1.7188759579903494e-05, "loss": 9.778514504432679e-06, "step": 291860 }, { "epoch": 82.84700539313086, "grad_norm": 0.0014475315110757947, "learning_rate": 1.7185921089980132e-05, "loss": 9.601190686225892e-06, "step": 291870 }, { "epoch": 82.84984388305422, "grad_norm": 0.00604784581810236, "learning_rate": 1.718308260005677e-05, "loss": 1.6783922910690306e-05, "step": 291880 }, { "epoch": 82.85268237297758, "grad_norm": 0.0016397135332226753, "learning_rate": 1.7180244110133408e-05, "loss": 1.697540283203125e-05, "step": 291890 }, { "epoch": 82.85552086290093, "grad_norm": 0.00028589015710167587, "learning_rate": 1.717740562021005e-05, "loss": 1.1829845607280732e-05, "step": 291900 }, { "epoch": 82.8583593528243, "grad_norm": 0.0014598045963793993, "learning_rate": 1.717456713028669e-05, "loss": 9.399279952049255e-06, "step": 291910 }, { "epoch": 82.86119784274766, "grad_norm": 0.006240673828870058, "learning_rate": 1.7171728640363325e-05, "loss": 9.594298899173736e-06, "step": 291920 }, { "epoch": 82.86403633267102, "grad_norm": 0.004542795941233635, "learning_rate": 1.7168890150439967e-05, "loss": 9.438395500183105e-06, "step": 291930 }, { "epoch": 82.86687482259438, "grad_norm": 0.00230382289737463, "learning_rate": 1.7166051660516605e-05, "loss": 1.124013215303421e-05, "step": 291940 }, { "epoch": 82.86971331251775, "grad_norm": 0.0004425622755661607, "learning_rate": 1.7163213170593246e-05, "loss": 7.321871817111969e-06, "step": 291950 }, { "epoch": 82.8725518024411, "grad_norm": 0.0034711735788732767, "learning_rate": 1.7160374680669884e-05, "loss": 9.100884199142456e-06, "step": 291960 }, { "epoch": 82.87539029236446, "grad_norm": 0.0023789869155734777, "learning_rate": 1.7157536190746522e-05, "loss": 1.4639273285865783e-05, "step": 291970 }, { "epoch": 82.87822878228782, "grad_norm": 0.0005999173154123127, "learning_rate": 1.7154697700823164e-05, "loss": 1.0669417679309846e-05, "step": 291980 }, { "epoch": 82.88106727221118, "grad_norm": 0.002714053960517049, "learning_rate": 1.71518592108998e-05, "loss": 1.589227467775345e-05, "step": 291990 }, { "epoch": 82.88390576213455, "grad_norm": 0.0010774758411571383, "learning_rate": 1.7149020720976443e-05, "loss": 7.61616975069046e-06, "step": 292000 }, { "epoch": 82.88390576213455, "eval_accuracy": 0.9881732053156991, "eval_loss": 0.05011169984936714, "eval_runtime": 35.5576, "eval_samples_per_second": 442.296, "eval_steps_per_second": 6.918, "step": 292000 }, { "epoch": 82.88674425205791, "grad_norm": 0.007970881648361683, "learning_rate": 1.714618223105308e-05, "loss": 5.780905485153198e-06, "step": 292010 }, { "epoch": 82.88958274198127, "grad_norm": 0.0025105830281972885, "learning_rate": 1.714334374112972e-05, "loss": 8.970815688371658e-05, "step": 292020 }, { "epoch": 82.89242123190462, "grad_norm": 0.004675464238971472, "learning_rate": 1.714050525120636e-05, "loss": 1.1599436402320861e-05, "step": 292030 }, { "epoch": 82.89525972182798, "grad_norm": 0.001479236874729395, "learning_rate": 1.7137666761282998e-05, "loss": 2.2192485630512238e-05, "step": 292040 }, { "epoch": 82.89809821175135, "grad_norm": 0.001104352530092001, "learning_rate": 1.7134828271359636e-05, "loss": 1.503489911556244e-05, "step": 292050 }, { "epoch": 82.90093670167471, "grad_norm": 0.001430871314369142, "learning_rate": 1.7131989781436278e-05, "loss": 1.785196363925934e-05, "step": 292060 }, { "epoch": 82.90377519159807, "grad_norm": 0.00325192348100245, "learning_rate": 1.7129151291512916e-05, "loss": 1.1166743934154511e-05, "step": 292070 }, { "epoch": 82.90661368152143, "grad_norm": 0.07540178298950195, "learning_rate": 1.7126312801589557e-05, "loss": 4.3322332203388214e-05, "step": 292080 }, { "epoch": 82.9094521714448, "grad_norm": 0.0004640457045752555, "learning_rate": 1.7123474311666195e-05, "loss": 2.3602508008480072e-05, "step": 292090 }, { "epoch": 82.91229066136815, "grad_norm": 0.0017029065638780594, "learning_rate": 1.7120635821742833e-05, "loss": 7.033906877040863e-06, "step": 292100 }, { "epoch": 82.91512915129151, "grad_norm": 0.004235580563545227, "learning_rate": 1.7117797331819474e-05, "loss": 1.1724047362804413e-05, "step": 292110 }, { "epoch": 82.91796764121487, "grad_norm": 0.0017399959033355117, "learning_rate": 1.7114958841896112e-05, "loss": 1.74669548869133e-05, "step": 292120 }, { "epoch": 82.92080613113824, "grad_norm": 0.0005127392360009253, "learning_rate": 1.711212035197275e-05, "loss": 1.1146068572998047e-05, "step": 292130 }, { "epoch": 82.9236446210616, "grad_norm": 0.005111328326165676, "learning_rate": 1.7109281862049388e-05, "loss": 9.880401194095612e-06, "step": 292140 }, { "epoch": 82.92648311098496, "grad_norm": 0.011802366934716702, "learning_rate": 1.710644337212603e-05, "loss": 4.207659512758255e-05, "step": 292150 }, { "epoch": 82.92932160090831, "grad_norm": 0.005835528951138258, "learning_rate": 1.710360488220267e-05, "loss": 1.851096749305725e-05, "step": 292160 }, { "epoch": 82.93216009083167, "grad_norm": 0.0009443629533052444, "learning_rate": 1.710076639227931e-05, "loss": 1.501571387052536e-05, "step": 292170 }, { "epoch": 82.93499858075504, "grad_norm": 0.000693035195581615, "learning_rate": 1.7097927902355947e-05, "loss": 9.64682549238205e-06, "step": 292180 }, { "epoch": 82.9378370706784, "grad_norm": 0.009850853122770786, "learning_rate": 1.7095089412432585e-05, "loss": 1.1647306382656098e-05, "step": 292190 }, { "epoch": 82.94067556060176, "grad_norm": 0.03991926461458206, "learning_rate": 1.7092250922509226e-05, "loss": 1.7406605184078217e-05, "step": 292200 }, { "epoch": 82.94351405052512, "grad_norm": 0.0006512213731184602, "learning_rate": 1.7089412432585868e-05, "loss": 9.277649223804473e-06, "step": 292210 }, { "epoch": 82.94635254044849, "grad_norm": 0.0009435707470402122, "learning_rate": 1.7086573942662502e-05, "loss": 1.0920688509941101e-05, "step": 292220 }, { "epoch": 82.94919103037184, "grad_norm": 0.004448465071618557, "learning_rate": 1.7083735452739144e-05, "loss": 1.538824290037155e-05, "step": 292230 }, { "epoch": 82.9520295202952, "grad_norm": 0.002556731691583991, "learning_rate": 1.708089696281578e-05, "loss": 2.1170638501644134e-05, "step": 292240 }, { "epoch": 82.95486801021856, "grad_norm": 0.00031408751965500414, "learning_rate": 1.7078058472892423e-05, "loss": 7.392093539237976e-06, "step": 292250 }, { "epoch": 82.95770650014192, "grad_norm": 0.0008550803759135306, "learning_rate": 1.707521998296906e-05, "loss": 7.602758705615997e-06, "step": 292260 }, { "epoch": 82.96054499006529, "grad_norm": 0.009476838633418083, "learning_rate": 1.70723814930457e-05, "loss": 1.0996870696544647e-05, "step": 292270 }, { "epoch": 82.96338347998865, "grad_norm": 0.0005361236399039626, "learning_rate": 1.706954300312234e-05, "loss": 8.42958688735962e-06, "step": 292280 }, { "epoch": 82.96622196991201, "grad_norm": 0.0005297033931128681, "learning_rate": 1.706670451319898e-05, "loss": 9.331293404102325e-06, "step": 292290 }, { "epoch": 82.96906045983536, "grad_norm": 0.006086449138820171, "learning_rate": 1.706386602327562e-05, "loss": 9.936466813087464e-06, "step": 292300 }, { "epoch": 82.97189894975872, "grad_norm": 0.0014153742231428623, "learning_rate": 1.7061027533352258e-05, "loss": 9.004957973957062e-06, "step": 292310 }, { "epoch": 82.97473743968209, "grad_norm": 0.0013788017677143216, "learning_rate": 1.7058189043428896e-05, "loss": 8.737854659557343e-06, "step": 292320 }, { "epoch": 82.97757592960545, "grad_norm": 0.0006678533973172307, "learning_rate": 1.7055350553505537e-05, "loss": 8.386000990867614e-06, "step": 292330 }, { "epoch": 82.98041441952881, "grad_norm": 0.007260105572640896, "learning_rate": 1.7052512063582175e-05, "loss": 1.760106533765793e-05, "step": 292340 }, { "epoch": 82.98325290945218, "grad_norm": 0.0019319442799314857, "learning_rate": 1.7049673573658813e-05, "loss": 9.616464376449585e-06, "step": 292350 }, { "epoch": 82.98609139937554, "grad_norm": 0.0026715893764048815, "learning_rate": 1.7046835083735454e-05, "loss": 2.971123903989792e-05, "step": 292360 }, { "epoch": 82.98892988929889, "grad_norm": 0.0007611477049067616, "learning_rate": 1.7043996593812092e-05, "loss": 8.644349873065948e-06, "step": 292370 }, { "epoch": 82.99176837922225, "grad_norm": 0.00789925642311573, "learning_rate": 1.7041158103888734e-05, "loss": 1.6310252249240875e-05, "step": 292380 }, { "epoch": 82.99460686914561, "grad_norm": 0.015529247932136059, "learning_rate": 1.703831961396537e-05, "loss": 1.2170709669589997e-05, "step": 292390 }, { "epoch": 82.99744535906898, "grad_norm": 0.0013881066115573049, "learning_rate": 1.703548112404201e-05, "loss": 1.82516872882843e-05, "step": 292400 }, { "epoch": 83.00028384899234, "grad_norm": 0.0016057664761319757, "learning_rate": 1.703264263411865e-05, "loss": 1.06004998087883e-05, "step": 292410 }, { "epoch": 83.0031223389157, "grad_norm": 0.00265583791770041, "learning_rate": 1.702980414419529e-05, "loss": 8.090399205684662e-06, "step": 292420 }, { "epoch": 83.00596082883905, "grad_norm": 0.0016202006954699755, "learning_rate": 1.7026965654271927e-05, "loss": 7.858499884605408e-06, "step": 292430 }, { "epoch": 83.00879931876241, "grad_norm": 0.0015941213350743055, "learning_rate": 1.7024127164348565e-05, "loss": 7.2427093982696535e-06, "step": 292440 }, { "epoch": 83.01163780868578, "grad_norm": 0.0011264861095696688, "learning_rate": 1.7021288674425206e-05, "loss": 6.468780338764191e-06, "step": 292450 }, { "epoch": 83.01447629860914, "grad_norm": 0.0008752350113354623, "learning_rate": 1.7018450184501848e-05, "loss": 8.420087397098541e-06, "step": 292460 }, { "epoch": 83.0173147885325, "grad_norm": 0.005837364587932825, "learning_rate": 1.7015611694578486e-05, "loss": 7.038749754428864e-06, "step": 292470 }, { "epoch": 83.02015327845587, "grad_norm": 0.004360250663012266, "learning_rate": 1.7012773204655124e-05, "loss": 8.96267592906952e-06, "step": 292480 }, { "epoch": 83.02299176837923, "grad_norm": 0.0014496492221951485, "learning_rate": 1.7009934714731762e-05, "loss": 9.506754577159881e-06, "step": 292490 }, { "epoch": 83.02583025830258, "grad_norm": 0.0004344753688201308, "learning_rate": 1.7007096224808403e-05, "loss": 1.183226704597473e-05, "step": 292500 }, { "epoch": 83.02583025830258, "eval_accuracy": 0.9884911299039868, "eval_loss": 0.049627114087343216, "eval_runtime": 35.1644, "eval_samples_per_second": 447.242, "eval_steps_per_second": 6.996, "step": 292500 }, { "epoch": 83.02866874822594, "grad_norm": 0.001596523099578917, "learning_rate": 1.7004257734885045e-05, "loss": 6.685592234134674e-06, "step": 292510 }, { "epoch": 83.0315072381493, "grad_norm": 0.0008089299662970006, "learning_rate": 1.700141924496168e-05, "loss": 1.2742914259433747e-05, "step": 292520 }, { "epoch": 83.03434572807267, "grad_norm": 0.002712167100980878, "learning_rate": 1.699858075503832e-05, "loss": 8.980371057987213e-06, "step": 292530 }, { "epoch": 83.03718421799603, "grad_norm": 0.0006047881906852126, "learning_rate": 1.699574226511496e-05, "loss": 9.365193545818328e-06, "step": 292540 }, { "epoch": 83.04002270791939, "grad_norm": 0.0007571641472168267, "learning_rate": 1.69929037751916e-05, "loss": 9.651854634284973e-06, "step": 292550 }, { "epoch": 83.04286119784275, "grad_norm": 0.001292139757424593, "learning_rate": 1.6990065285268238e-05, "loss": 1.0966882109642029e-05, "step": 292560 }, { "epoch": 83.0456996877661, "grad_norm": 0.005691870115697384, "learning_rate": 1.6987226795344876e-05, "loss": 8.194148540496826e-06, "step": 292570 }, { "epoch": 83.04853817768947, "grad_norm": 0.0004780337621923536, "learning_rate": 1.6984388305421517e-05, "loss": 9.451620280742646e-06, "step": 292580 }, { "epoch": 83.05137666761283, "grad_norm": 0.004361179657280445, "learning_rate": 1.6981549815498155e-05, "loss": 1.3761036098003387e-05, "step": 292590 }, { "epoch": 83.05421515753619, "grad_norm": 0.002216510009020567, "learning_rate": 1.6978711325574793e-05, "loss": 1.3323873281478881e-05, "step": 292600 }, { "epoch": 83.05705364745955, "grad_norm": 0.001752857700921595, "learning_rate": 1.6975872835651435e-05, "loss": 1.4028698205947877e-05, "step": 292610 }, { "epoch": 83.05989213738292, "grad_norm": 0.004448493476957083, "learning_rate": 1.6973034345728073e-05, "loss": 3.752615302801132e-05, "step": 292620 }, { "epoch": 83.06273062730628, "grad_norm": 0.0013307675253599882, "learning_rate": 1.6970195855804714e-05, "loss": 9.841658174991607e-06, "step": 292630 }, { "epoch": 83.06556911722963, "grad_norm": 0.004949029069393873, "learning_rate": 1.6967357365881352e-05, "loss": 1.5337951481342316e-05, "step": 292640 }, { "epoch": 83.06840760715299, "grad_norm": 0.025952624157071114, "learning_rate": 1.696451887595799e-05, "loss": 1.1668726801872254e-05, "step": 292650 }, { "epoch": 83.07124609707635, "grad_norm": 0.05294441804289818, "learning_rate": 1.696168038603463e-05, "loss": 1.938100904226303e-05, "step": 292660 }, { "epoch": 83.07408458699972, "grad_norm": 0.0006298029911704361, "learning_rate": 1.695884189611127e-05, "loss": 2.564620226621628e-05, "step": 292670 }, { "epoch": 83.07692307692308, "grad_norm": 0.002629996510222554, "learning_rate": 1.695600340618791e-05, "loss": 1.786462962627411e-05, "step": 292680 }, { "epoch": 83.07976156684644, "grad_norm": 0.0034663793630898, "learning_rate": 1.6953164916264545e-05, "loss": 1.3282708823680878e-05, "step": 292690 }, { "epoch": 83.08260005676979, "grad_norm": 0.007972238585352898, "learning_rate": 1.6950326426341187e-05, "loss": 9.363330900669097e-06, "step": 292700 }, { "epoch": 83.08543854669315, "grad_norm": 0.0018902660813182592, "learning_rate": 1.6947487936417828e-05, "loss": 2.5638565421104432e-05, "step": 292710 }, { "epoch": 83.08827703661652, "grad_norm": 0.0011702599003911018, "learning_rate": 1.6944649446494466e-05, "loss": 8.337385952472687e-06, "step": 292720 }, { "epoch": 83.09111552653988, "grad_norm": 0.0007658949471078813, "learning_rate": 1.6941810956571104e-05, "loss": 5.8002769947052e-06, "step": 292730 }, { "epoch": 83.09395401646324, "grad_norm": 0.0009689959697425365, "learning_rate": 1.6938972466647742e-05, "loss": 7.52769410610199e-06, "step": 292740 }, { "epoch": 83.0967925063866, "grad_norm": 0.0007785246125422418, "learning_rate": 1.6936133976724383e-05, "loss": 9.565800428390504e-06, "step": 292750 }, { "epoch": 83.09963099630997, "grad_norm": 0.002554302802309394, "learning_rate": 1.6933295486801025e-05, "loss": 2.081897109746933e-05, "step": 292760 }, { "epoch": 83.10246948623332, "grad_norm": 0.009027904830873013, "learning_rate": 1.6930456996877663e-05, "loss": 1.0242499411106109e-05, "step": 292770 }, { "epoch": 83.10530797615668, "grad_norm": 0.003855033777654171, "learning_rate": 1.69276185069543e-05, "loss": 8.22022557258606e-06, "step": 292780 }, { "epoch": 83.10814646608004, "grad_norm": 0.049288101494312286, "learning_rate": 1.6924780017030942e-05, "loss": 1.3378448784351349e-05, "step": 292790 }, { "epoch": 83.1109849560034, "grad_norm": 0.0021356006618589163, "learning_rate": 1.692194152710758e-05, "loss": 1.3165920972824096e-05, "step": 292800 }, { "epoch": 83.11382344592677, "grad_norm": 0.00229712319560349, "learning_rate": 1.6919103037184218e-05, "loss": 2.0811520516872407e-05, "step": 292810 }, { "epoch": 83.11666193585013, "grad_norm": 0.0028112041763961315, "learning_rate": 1.6916264547260856e-05, "loss": 6.284564733505249e-06, "step": 292820 }, { "epoch": 83.1195004257735, "grad_norm": 0.004114639479666948, "learning_rate": 1.6913426057337497e-05, "loss": 7.3429197072982785e-06, "step": 292830 }, { "epoch": 83.12233891569684, "grad_norm": 0.00186105293687433, "learning_rate": 1.691058756741414e-05, "loss": 1.1682324111461639e-05, "step": 292840 }, { "epoch": 83.1251774056202, "grad_norm": 0.0008288318640552461, "learning_rate": 1.6907749077490777e-05, "loss": 1.0266713798046111e-05, "step": 292850 }, { "epoch": 83.12801589554357, "grad_norm": 0.001109809149056673, "learning_rate": 1.6904910587567415e-05, "loss": 2.4496018886566163e-05, "step": 292860 }, { "epoch": 83.13085438546693, "grad_norm": 0.010540173389017582, "learning_rate": 1.6902072097644053e-05, "loss": 6.698407232761384e-05, "step": 292870 }, { "epoch": 83.1336928753903, "grad_norm": 0.006201327312737703, "learning_rate": 1.6899233607720694e-05, "loss": 1.2751109898090362e-05, "step": 292880 }, { "epoch": 83.13653136531366, "grad_norm": 0.0026777605526149273, "learning_rate": 1.6896395117797336e-05, "loss": 0.00017474796622991563, "step": 292890 }, { "epoch": 83.139369855237, "grad_norm": 0.04006519541144371, "learning_rate": 1.689355662787397e-05, "loss": 9.56263393163681e-05, "step": 292900 }, { "epoch": 83.14220834516037, "grad_norm": 0.000734262983314693, "learning_rate": 1.689071813795061e-05, "loss": 6.244909018278122e-05, "step": 292910 }, { "epoch": 83.14504683508373, "grad_norm": 0.0007367433281615376, "learning_rate": 1.688787964802725e-05, "loss": 9.136274456977844e-06, "step": 292920 }, { "epoch": 83.1478853250071, "grad_norm": 0.0027930201031267643, "learning_rate": 1.688504115810389e-05, "loss": 2.6093609631061553e-05, "step": 292930 }, { "epoch": 83.15072381493046, "grad_norm": 0.0013139619259163737, "learning_rate": 1.688220266818053e-05, "loss": 2.6860833168029784e-05, "step": 292940 }, { "epoch": 83.15356230485382, "grad_norm": 0.005516470409929752, "learning_rate": 1.6879364178257167e-05, "loss": 1.125577837228775e-05, "step": 292950 }, { "epoch": 83.15640079477718, "grad_norm": 0.0008628535433672369, "learning_rate": 1.6876525688333808e-05, "loss": 1.558307558298111e-05, "step": 292960 }, { "epoch": 83.15923928470053, "grad_norm": 0.005475703161209822, "learning_rate": 1.6873687198410446e-05, "loss": 2.8713606297969818e-05, "step": 292970 }, { "epoch": 83.1620777746239, "grad_norm": 0.0007282037404365838, "learning_rate": 1.6870848708487088e-05, "loss": 8.03595408797264e-05, "step": 292980 }, { "epoch": 83.16491626454726, "grad_norm": Infinity, "learning_rate": 1.6868010218563726e-05, "loss": 0.000820419006049633, "step": 292990 }, { "epoch": 83.16775475447062, "grad_norm": 0.00044268282363191247, "learning_rate": 1.68654555776327e-05, "loss": 1.4326535165309906e-05, "step": 293000 }, { "epoch": 83.16775475447062, "eval_accuracy": 0.9860749030330006, "eval_loss": 0.05587715655565262, "eval_runtime": 35.1643, "eval_samples_per_second": 447.243, "eval_steps_per_second": 6.996, "step": 293000 }, { "epoch": 83.17059324439398, "grad_norm": 0.0018561093602329493, "learning_rate": 1.686261708770934e-05, "loss": 6.278101354837418e-05, "step": 293010 }, { "epoch": 83.17343173431735, "grad_norm": 0.0017248669173568487, "learning_rate": 1.6859778597785978e-05, "loss": 1.6336515545845032e-05, "step": 293020 }, { "epoch": 83.17627022424071, "grad_norm": 0.0036326448898762465, "learning_rate": 1.6856940107862616e-05, "loss": 2.9404833912849427e-05, "step": 293030 }, { "epoch": 83.17910871416406, "grad_norm": 0.013685723766684532, "learning_rate": 1.6854101617939257e-05, "loss": 2.110730856657028e-05, "step": 293040 }, { "epoch": 83.18194720408742, "grad_norm": 0.0031992497388273478, "learning_rate": 1.6851263128015895e-05, "loss": 2.908073365688324e-05, "step": 293050 }, { "epoch": 83.18478569401078, "grad_norm": 0.002183271571993828, "learning_rate": 1.6848424638092537e-05, "loss": 5.136672407388687e-05, "step": 293060 }, { "epoch": 83.18762418393415, "grad_norm": 0.02727111615240574, "learning_rate": 1.6845586148169175e-05, "loss": 1.9040703773498534e-05, "step": 293070 }, { "epoch": 83.19046267385751, "grad_norm": 0.004321068990975618, "learning_rate": 1.6842747658245813e-05, "loss": 2.2557936608791353e-05, "step": 293080 }, { "epoch": 83.19330116378087, "grad_norm": 0.0039589740335941315, "learning_rate": 1.6839909168322454e-05, "loss": 1.3531558215618133e-05, "step": 293090 }, { "epoch": 83.19613965370424, "grad_norm": 0.0015860919374972582, "learning_rate": 1.6837070678399092e-05, "loss": 1.9047968089580535e-05, "step": 293100 }, { "epoch": 83.19897814362758, "grad_norm": 0.05558442324399948, "learning_rate": 1.6834232188475734e-05, "loss": 2.4617277085781096e-05, "step": 293110 }, { "epoch": 83.20181663355095, "grad_norm": 0.007570362649857998, "learning_rate": 1.6831393698552368e-05, "loss": 1.6816891729831696e-05, "step": 293120 }, { "epoch": 83.20465512347431, "grad_norm": 0.004340591374784708, "learning_rate": 1.682855520862901e-05, "loss": 1.3677217066287994e-05, "step": 293130 }, { "epoch": 83.20749361339767, "grad_norm": 0.003164685098454356, "learning_rate": 1.682571671870565e-05, "loss": 1.0666809976100921e-05, "step": 293140 }, { "epoch": 83.21033210332104, "grad_norm": 0.0976419448852539, "learning_rate": 1.682287822878229e-05, "loss": 2.621430903673172e-05, "step": 293150 }, { "epoch": 83.2131705932444, "grad_norm": 0.0017723491182550788, "learning_rate": 1.6820039738858927e-05, "loss": 1.2573786079883575e-05, "step": 293160 }, { "epoch": 83.21600908316775, "grad_norm": 0.0031591500155627728, "learning_rate": 1.6817201248935565e-05, "loss": 3.163572400808334e-05, "step": 293170 }, { "epoch": 83.21884757309111, "grad_norm": 0.0021119697485119104, "learning_rate": 1.6814362759012206e-05, "loss": 1.0906718671321869e-05, "step": 293180 }, { "epoch": 83.22168606301447, "grad_norm": 0.0010659856488928199, "learning_rate": 1.6811524269088848e-05, "loss": 9.207986295223236e-06, "step": 293190 }, { "epoch": 83.22452455293784, "grad_norm": 0.015091524459421635, "learning_rate": 1.6808685779165486e-05, "loss": 1.2916885316371917e-05, "step": 293200 }, { "epoch": 83.2273630428612, "grad_norm": 0.0035099589731544256, "learning_rate": 1.6805847289242124e-05, "loss": 2.0586885511875153e-05, "step": 293210 }, { "epoch": 83.23020153278456, "grad_norm": 0.0013108461862429976, "learning_rate": 1.6803008799318765e-05, "loss": 1.0634958744049073e-05, "step": 293220 }, { "epoch": 83.23304002270793, "grad_norm": 0.0019620417151600122, "learning_rate": 1.6800170309395403e-05, "loss": 5.491059273481369e-05, "step": 293230 }, { "epoch": 83.23587851263127, "grad_norm": 0.004242185968905687, "learning_rate": 1.679733181947204e-05, "loss": 1.4073215425014496e-05, "step": 293240 }, { "epoch": 83.23871700255464, "grad_norm": 0.011939385905861855, "learning_rate": 1.679449332954868e-05, "loss": 1.934915781021118e-05, "step": 293250 }, { "epoch": 83.241555492478, "grad_norm": 0.004645003471523523, "learning_rate": 1.679165483962532e-05, "loss": 5.511194467544556e-05, "step": 293260 }, { "epoch": 83.24439398240136, "grad_norm": 0.0012421385617926717, "learning_rate": 1.678881634970196e-05, "loss": 2.7839653193950652e-05, "step": 293270 }, { "epoch": 83.24723247232473, "grad_norm": 0.09271064400672913, "learning_rate": 1.67859778597786e-05, "loss": 4.420876502990723e-05, "step": 293280 }, { "epoch": 83.25007096224809, "grad_norm": 0.00047588266897946596, "learning_rate": 1.6783139369855238e-05, "loss": 2.0918436348438263e-05, "step": 293290 }, { "epoch": 83.25290945217145, "grad_norm": 0.00239614793099463, "learning_rate": 1.6780300879931876e-05, "loss": 1.1158362030982972e-05, "step": 293300 }, { "epoch": 83.2557479420948, "grad_norm": 0.0004973722388967872, "learning_rate": 1.6777462390008517e-05, "loss": 1.0258518159389496e-05, "step": 293310 }, { "epoch": 83.25858643201816, "grad_norm": 0.0016364722978323698, "learning_rate": 1.677462390008516e-05, "loss": 5.500577390193939e-06, "step": 293320 }, { "epoch": 83.26142492194153, "grad_norm": 0.0010730709182098508, "learning_rate": 1.6771785410161793e-05, "loss": 2.8160400688648224e-05, "step": 293330 }, { "epoch": 83.26426341186489, "grad_norm": 0.005139329470694065, "learning_rate": 1.6768946920238434e-05, "loss": 1.1928007006645203e-05, "step": 293340 }, { "epoch": 83.26710190178825, "grad_norm": 0.00225365930236876, "learning_rate": 1.6766108430315072e-05, "loss": 1.698993146419525e-05, "step": 293350 }, { "epoch": 83.26994039171161, "grad_norm": 0.003392324550077319, "learning_rate": 1.6763269940391714e-05, "loss": 6.90799206495285e-06, "step": 293360 }, { "epoch": 83.27277888163498, "grad_norm": 0.001289211679250002, "learning_rate": 1.676043145046835e-05, "loss": 1.3832561671733857e-05, "step": 293370 }, { "epoch": 83.27561737155833, "grad_norm": 0.0018930750666186213, "learning_rate": 1.675759296054499e-05, "loss": 1.1808052659034728e-05, "step": 293380 }, { "epoch": 83.27845586148169, "grad_norm": 0.0009550870745442808, "learning_rate": 1.675475447062163e-05, "loss": 1.0844133794307708e-05, "step": 293390 }, { "epoch": 83.28129435140505, "grad_norm": 0.001204514061100781, "learning_rate": 1.675191598069827e-05, "loss": 0.0003641998395323753, "step": 293400 }, { "epoch": 83.28413284132841, "grad_norm": 0.01421587448567152, "learning_rate": 1.674907749077491e-05, "loss": 0.00011590085923671723, "step": 293410 }, { "epoch": 83.28697133125178, "grad_norm": 0.004404174163937569, "learning_rate": 1.674623900085155e-05, "loss": 2.389475703239441e-05, "step": 293420 }, { "epoch": 83.28980982117514, "grad_norm": 0.025539960712194443, "learning_rate": 1.6743400510928186e-05, "loss": 4.270486533641815e-05, "step": 293430 }, { "epoch": 83.29264831109849, "grad_norm": 0.04170786216855049, "learning_rate": 1.6740562021004828e-05, "loss": 6.92499801516533e-05, "step": 293440 }, { "epoch": 83.29548680102185, "grad_norm": 0.00983918271958828, "learning_rate": 1.6737723531081466e-05, "loss": 4.3254345655441284e-05, "step": 293450 }, { "epoch": 83.29832529094521, "grad_norm": 0.001856851507909596, "learning_rate": 1.6734885041158104e-05, "loss": 2.3771077394485474e-05, "step": 293460 }, { "epoch": 83.30116378086858, "grad_norm": 0.028402531519532204, "learning_rate": 1.6732046551234745e-05, "loss": 2.584382891654968e-05, "step": 293470 }, { "epoch": 83.30400227079194, "grad_norm": 0.0017430575098842382, "learning_rate": 1.6729208061311383e-05, "loss": 2.757906913757324e-05, "step": 293480 }, { "epoch": 83.3068407607153, "grad_norm": 0.0004542274691630155, "learning_rate": 1.6726369571388024e-05, "loss": 1.9694864749908448e-05, "step": 293490 }, { "epoch": 83.30967925063867, "grad_norm": 0.0024674562737345695, "learning_rate": 1.672353108146466e-05, "loss": 7.674656808376313e-06, "step": 293500 }, { "epoch": 83.30967925063867, "eval_accuracy": 0.9870286767978635, "eval_loss": 0.05626044422388077, "eval_runtime": 35.8142, "eval_samples_per_second": 439.127, "eval_steps_per_second": 6.869, "step": 293500 }, { "epoch": 83.31251774056201, "grad_norm": 0.008212593384087086, "learning_rate": 1.67206925915413e-05, "loss": 1.8551014363765717e-05, "step": 293510 }, { "epoch": 83.31535623048538, "grad_norm": 0.004527374170720577, "learning_rate": 1.6717854101617942e-05, "loss": 5.547851324081421e-05, "step": 293520 }, { "epoch": 83.31819472040874, "grad_norm": 0.003926399629563093, "learning_rate": 1.671501561169458e-05, "loss": 2.690572291612625e-05, "step": 293530 }, { "epoch": 83.3210332103321, "grad_norm": 0.011682821437716484, "learning_rate": 1.6712177121771218e-05, "loss": 2.099405974149704e-05, "step": 293540 }, { "epoch": 83.32387170025547, "grad_norm": 0.0021331359166651964, "learning_rate": 1.6709338631847856e-05, "loss": 3.9770640432834627e-05, "step": 293550 }, { "epoch": 83.32671019017883, "grad_norm": 0.011414071545004845, "learning_rate": 1.6706500141924497e-05, "loss": 1.615900546312332e-05, "step": 293560 }, { "epoch": 83.32954868010219, "grad_norm": 0.0023956450168043375, "learning_rate": 1.670366165200114e-05, "loss": 1.372266560792923e-05, "step": 293570 }, { "epoch": 83.33238717002554, "grad_norm": 0.0004687366308644414, "learning_rate": 1.6700823162077776e-05, "loss": 1.4056265354156493e-05, "step": 293580 }, { "epoch": 83.3352256599489, "grad_norm": 0.08657485246658325, "learning_rate": 1.6697984672154414e-05, "loss": 2.9441341757774354e-05, "step": 293590 }, { "epoch": 83.33806414987227, "grad_norm": 0.0026416636537760496, "learning_rate": 1.6695146182231052e-05, "loss": 1.5222840011119842e-05, "step": 293600 }, { "epoch": 83.34090263979563, "grad_norm": 0.0023122846614569426, "learning_rate": 1.6692307692307694e-05, "loss": 1.0657869279384612e-05, "step": 293610 }, { "epoch": 83.34374112971899, "grad_norm": 0.004573432728648186, "learning_rate": 1.6689469202384335e-05, "loss": 1.0580196976661682e-05, "step": 293620 }, { "epoch": 83.34657961964236, "grad_norm": 0.00019039222388528287, "learning_rate": 1.668663071246097e-05, "loss": 1.1947564780712128e-05, "step": 293630 }, { "epoch": 83.3494181095657, "grad_norm": 0.0007482998771592975, "learning_rate": 1.668379222253761e-05, "loss": 8.801184594631196e-06, "step": 293640 }, { "epoch": 83.35225659948907, "grad_norm": 0.0007941755466163158, "learning_rate": 1.668095373261425e-05, "loss": 1.1830590665340424e-05, "step": 293650 }, { "epoch": 83.35509508941243, "grad_norm": 0.011415484361350536, "learning_rate": 1.667811524269089e-05, "loss": 1.3613514602184295e-05, "step": 293660 }, { "epoch": 83.35793357933579, "grad_norm": 0.003393770195543766, "learning_rate": 1.667527675276753e-05, "loss": 1.2688525021076203e-05, "step": 293670 }, { "epoch": 83.36077206925916, "grad_norm": 0.0023004307877272367, "learning_rate": 1.6672438262844166e-05, "loss": 1.2788549065589905e-05, "step": 293680 }, { "epoch": 83.36361055918252, "grad_norm": 0.0016075483290478587, "learning_rate": 1.6669599772920808e-05, "loss": 1.1345185339450836e-05, "step": 293690 }, { "epoch": 83.36644904910588, "grad_norm": 0.000918945821467787, "learning_rate": 1.6666761282997446e-05, "loss": 9.790249168872834e-06, "step": 293700 }, { "epoch": 83.36928753902923, "grad_norm": 0.00194178381934762, "learning_rate": 1.6663922793074084e-05, "loss": 9.994208812713624e-06, "step": 293710 }, { "epoch": 83.3721260289526, "grad_norm": 0.00037692487239837646, "learning_rate": 1.6661084303150725e-05, "loss": 1.8595531582832336e-05, "step": 293720 }, { "epoch": 83.37496451887596, "grad_norm": 0.0008848505676724017, "learning_rate": 1.6658245813227363e-05, "loss": 1.0740943253040314e-05, "step": 293730 }, { "epoch": 83.37780300879932, "grad_norm": 0.0009345185244455934, "learning_rate": 1.6655407323304005e-05, "loss": 9.058043360710144e-06, "step": 293740 }, { "epoch": 83.38064149872268, "grad_norm": 0.012477679178118706, "learning_rate": 1.6652568833380643e-05, "loss": 1.1408701539039612e-05, "step": 293750 }, { "epoch": 83.38347998864604, "grad_norm": 0.0063416012562811375, "learning_rate": 1.664973034345728e-05, "loss": 2.7814321219921113e-05, "step": 293760 }, { "epoch": 83.38631847856941, "grad_norm": 0.002326343907043338, "learning_rate": 1.6646891853533922e-05, "loss": 1.3890117406845092e-05, "step": 293770 }, { "epoch": 83.38915696849276, "grad_norm": 0.003792396979406476, "learning_rate": 1.664405336361056e-05, "loss": 2.2073276340961456e-05, "step": 293780 }, { "epoch": 83.39199545841612, "grad_norm": 0.007358253002166748, "learning_rate": 1.66412148736872e-05, "loss": 7.730908691883087e-06, "step": 293790 }, { "epoch": 83.39483394833948, "grad_norm": 0.003901312593370676, "learning_rate": 1.6638376383763836e-05, "loss": 8.831731975078583e-06, "step": 293800 }, { "epoch": 83.39767243826284, "grad_norm": 0.0013407596852630377, "learning_rate": 1.6635537893840477e-05, "loss": 2.7213431894779206e-05, "step": 293810 }, { "epoch": 83.40051092818621, "grad_norm": 0.08187597990036011, "learning_rate": 1.663269940391712e-05, "loss": 3.1492859125137326e-05, "step": 293820 }, { "epoch": 83.40334941810957, "grad_norm": 0.001525243860669434, "learning_rate": 1.6629860913993757e-05, "loss": 1.5102699398994445e-05, "step": 293830 }, { "epoch": 83.40618790803293, "grad_norm": 0.07234930992126465, "learning_rate": 1.6627022424070395e-05, "loss": 4.247818142175674e-05, "step": 293840 }, { "epoch": 83.40902639795628, "grad_norm": 0.0017283366760239005, "learning_rate": 1.6624183934147033e-05, "loss": 1.1411122977733612e-05, "step": 293850 }, { "epoch": 83.41186488787964, "grad_norm": 0.02017992176115513, "learning_rate": 1.6621345444223674e-05, "loss": 1.2204237282276154e-05, "step": 293860 }, { "epoch": 83.41470337780301, "grad_norm": 0.001067965873517096, "learning_rate": 1.6618506954300315e-05, "loss": 6.845593452453613e-06, "step": 293870 }, { "epoch": 83.41754186772637, "grad_norm": 0.0009624693193472922, "learning_rate": 1.6615668464376953e-05, "loss": 1.1093541979789734e-05, "step": 293880 }, { "epoch": 83.42038035764973, "grad_norm": 0.007799492683261633, "learning_rate": 1.661282997445359e-05, "loss": 1.3361312448978424e-05, "step": 293890 }, { "epoch": 83.4232188475731, "grad_norm": 0.003304106183350086, "learning_rate": 1.660999148453023e-05, "loss": 6.648898124694824e-06, "step": 293900 }, { "epoch": 83.42605733749645, "grad_norm": 0.0015847515314817429, "learning_rate": 1.660715299460687e-05, "loss": 1.4821067452430725e-05, "step": 293910 }, { "epoch": 83.42889582741981, "grad_norm": 0.0009781613480299711, "learning_rate": 1.660431450468351e-05, "loss": 1.2983940541744232e-05, "step": 293920 }, { "epoch": 83.43173431734317, "grad_norm": 0.0019562742672860622, "learning_rate": 1.6601476014760147e-05, "loss": 1.0211579501628876e-05, "step": 293930 }, { "epoch": 83.43457280726653, "grad_norm": 0.0007796752033755183, "learning_rate": 1.6598637524836788e-05, "loss": 1.3387762010097504e-05, "step": 293940 }, { "epoch": 83.4374112971899, "grad_norm": 0.0007384990458376706, "learning_rate": 1.6595799034913426e-05, "loss": 1.3245642185211181e-05, "step": 293950 }, { "epoch": 83.44024978711326, "grad_norm": 0.003936964552849531, "learning_rate": 1.6592960544990067e-05, "loss": 1.0047852993011474e-05, "step": 293960 }, { "epoch": 83.44308827703662, "grad_norm": 0.01101703941822052, "learning_rate": 1.6590122055066705e-05, "loss": 1.1784955859184265e-05, "step": 293970 }, { "epoch": 83.44592676695997, "grad_norm": 0.00048352539306506515, "learning_rate": 1.6587283565143343e-05, "loss": 8.19433480501175e-06, "step": 293980 }, { "epoch": 83.44876525688333, "grad_norm": 0.0014152565272524953, "learning_rate": 1.6584445075219985e-05, "loss": 1.1462904512882233e-05, "step": 293990 }, { "epoch": 83.4516037468067, "grad_norm": 0.005573977716267109, "learning_rate": 1.6581606585296623e-05, "loss": 1.9420497119426726e-05, "step": 294000 }, { "epoch": 83.4516037468067, "eval_accuracy": 0.9886182997393018, "eval_loss": 0.05101526156067848, "eval_runtime": 35.0302, "eval_samples_per_second": 448.955, "eval_steps_per_second": 7.023, "step": 294000 }, { "epoch": 83.45444223673006, "grad_norm": 0.00017667606880422682, "learning_rate": 1.657876809537326e-05, "loss": 6.5837055444717406e-06, "step": 294010 }, { "epoch": 83.45728072665342, "grad_norm": 0.0001906512916320935, "learning_rate": 1.6575929605449902e-05, "loss": 7.991865277290345e-06, "step": 294020 }, { "epoch": 83.46011921657679, "grad_norm": 0.007082962431013584, "learning_rate": 1.657309111552654e-05, "loss": 1.2945383787155151e-05, "step": 294030 }, { "epoch": 83.46295770650015, "grad_norm": 0.005844367202371359, "learning_rate": 1.657025262560318e-05, "loss": 9.074434638023377e-06, "step": 294040 }, { "epoch": 83.4657961964235, "grad_norm": 0.0025028157979249954, "learning_rate": 1.656741413567982e-05, "loss": 1.1092610657215119e-05, "step": 294050 }, { "epoch": 83.46863468634686, "grad_norm": 0.0028316546231508255, "learning_rate": 1.6564575645756457e-05, "loss": 8.182227611541748e-06, "step": 294060 }, { "epoch": 83.47147317627022, "grad_norm": 0.0014632755191996694, "learning_rate": 1.65617371558331e-05, "loss": 6.191991269588471e-06, "step": 294070 }, { "epoch": 83.47431166619359, "grad_norm": 0.001527746208012104, "learning_rate": 1.6558898665909737e-05, "loss": 1.1505372822284698e-05, "step": 294080 }, { "epoch": 83.47715015611695, "grad_norm": 0.002981561701744795, "learning_rate": 1.6556060175986378e-05, "loss": 8.508563041687012e-06, "step": 294090 }, { "epoch": 83.47998864604031, "grad_norm": 0.00422977888956666, "learning_rate": 1.6553221686063013e-05, "loss": 9.039975702762604e-06, "step": 294100 }, { "epoch": 83.48282713596367, "grad_norm": 0.0012768751475960016, "learning_rate": 1.6550383196139654e-05, "loss": 1.005474478006363e-05, "step": 294110 }, { "epoch": 83.48566562588702, "grad_norm": 0.0005102668073959649, "learning_rate": 1.6547544706216296e-05, "loss": 1.6641244292259218e-05, "step": 294120 }, { "epoch": 83.48850411581039, "grad_norm": 0.0010232306085526943, "learning_rate": 1.6544706216292933e-05, "loss": 1.9381940364837647e-05, "step": 294130 }, { "epoch": 83.49134260573375, "grad_norm": 0.00396536011248827, "learning_rate": 1.654186772636957e-05, "loss": 9.366683661937713e-06, "step": 294140 }, { "epoch": 83.49418109565711, "grad_norm": 0.004469689913094044, "learning_rate": 1.653902923644621e-05, "loss": 8.395873010158539e-06, "step": 294150 }, { "epoch": 83.49701958558047, "grad_norm": 0.0053767054341733456, "learning_rate": 1.653619074652285e-05, "loss": 8.223764598369599e-06, "step": 294160 }, { "epoch": 83.49985807550384, "grad_norm": 0.0002766972756944597, "learning_rate": 1.6533352256599492e-05, "loss": 5.193613469600678e-06, "step": 294170 }, { "epoch": 83.50269656542719, "grad_norm": 0.000434800807852298, "learning_rate": 1.6530513766676127e-05, "loss": 3.768540918827057e-05, "step": 294180 }, { "epoch": 83.50553505535055, "grad_norm": 0.0016498557524755597, "learning_rate": 1.6527675276752768e-05, "loss": 4.9461983144283296e-05, "step": 294190 }, { "epoch": 83.50837354527391, "grad_norm": 0.004801791161298752, "learning_rate": 1.6524836786829406e-05, "loss": 0.00012872423976659775, "step": 294200 }, { "epoch": 83.51121203519727, "grad_norm": 0.000588586088269949, "learning_rate": 1.6522282145898383e-05, "loss": 0.005475197732448578, "step": 294210 }, { "epoch": 83.51405052512064, "grad_norm": 0.0036785672418773174, "learning_rate": 1.6519443655975024e-05, "loss": 5.056336522102356e-05, "step": 294220 }, { "epoch": 83.516889015044, "grad_norm": 0.0098478514701128, "learning_rate": 1.651660516605166e-05, "loss": 0.0001469094306230545, "step": 294230 }, { "epoch": 83.51972750496736, "grad_norm": 0.020519347861409187, "learning_rate": 1.65137666761283e-05, "loss": 9.687282145023345e-05, "step": 294240 }, { "epoch": 83.52256599489071, "grad_norm": 0.003143136389553547, "learning_rate": 1.651092818620494e-05, "loss": 5.548689514398575e-05, "step": 294250 }, { "epoch": 83.52540448481408, "grad_norm": 0.011472545564174652, "learning_rate": 1.650808969628158e-05, "loss": 3.6522559821605685e-05, "step": 294260 }, { "epoch": 83.52824297473744, "grad_norm": 0.09713811427354813, "learning_rate": 1.6505251206358217e-05, "loss": 0.00022816210985183717, "step": 294270 }, { "epoch": 83.5310814646608, "grad_norm": 0.006456742994487286, "learning_rate": 1.6502412716434855e-05, "loss": 0.00019249469041824341, "step": 294280 }, { "epoch": 83.53391995458416, "grad_norm": 0.23956961929798126, "learning_rate": 1.6499574226511497e-05, "loss": 0.0055481776595115665, "step": 294290 }, { "epoch": 83.53675844450753, "grad_norm": 0.21136532723903656, "learning_rate": 1.6496735736588138e-05, "loss": 0.00017112046480178834, "step": 294300 }, { "epoch": 83.53959693443089, "grad_norm": 0.0037039618473500013, "learning_rate": 1.6493897246664776e-05, "loss": 0.00021933633834123612, "step": 294310 }, { "epoch": 83.54243542435424, "grad_norm": 0.0032921298407018185, "learning_rate": 1.6491058756741414e-05, "loss": 2.3226626217365266e-05, "step": 294320 }, { "epoch": 83.5452739142776, "grad_norm": 0.2599877715110779, "learning_rate": 1.6488220266818052e-05, "loss": 0.0004726598039269447, "step": 294330 }, { "epoch": 83.54811240420096, "grad_norm": 0.013426629826426506, "learning_rate": 1.6485381776894694e-05, "loss": 4.200953990221024e-05, "step": 294340 }, { "epoch": 83.55095089412433, "grad_norm": 0.04118511080741882, "learning_rate": 1.648254328697133e-05, "loss": 0.00012072194367647171, "step": 294350 }, { "epoch": 83.55378938404769, "grad_norm": 0.035157401114702225, "learning_rate": 1.647970479704797e-05, "loss": 0.00012333709746599198, "step": 294360 }, { "epoch": 83.55662787397105, "grad_norm": 0.0065515004098415375, "learning_rate": 1.647686630712461e-05, "loss": 0.0002746792510151863, "step": 294370 }, { "epoch": 83.5594663638944, "grad_norm": 0.011203020811080933, "learning_rate": 1.647402781720125e-05, "loss": 0.0001163974404335022, "step": 294380 }, { "epoch": 83.56230485381776, "grad_norm": 0.005925189703702927, "learning_rate": 1.647118932727789e-05, "loss": 0.0008657468482851982, "step": 294390 }, { "epoch": 83.56514334374113, "grad_norm": 0.01163907814770937, "learning_rate": 1.6468350837354528e-05, "loss": 0.00288446843624115, "step": 294400 }, { "epoch": 83.56798183366449, "grad_norm": 0.0016878715250641108, "learning_rate": 1.6465512347431166e-05, "loss": 5.6034885346889496e-05, "step": 294410 }, { "epoch": 83.57082032358785, "grad_norm": 0.0758977085351944, "learning_rate": 1.6462673857507808e-05, "loss": 0.0016207365319132806, "step": 294420 }, { "epoch": 83.57365881351122, "grad_norm": 0.07878220826387405, "learning_rate": 1.6459835367584446e-05, "loss": 0.00012437589466571808, "step": 294430 }, { "epoch": 83.57649730343458, "grad_norm": 0.013281099498271942, "learning_rate": 1.6456996877661084e-05, "loss": 5.637593567371368e-05, "step": 294440 }, { "epoch": 83.57933579335793, "grad_norm": 0.0011263012420386076, "learning_rate": 1.6454158387737725e-05, "loss": 0.00026344861835241317, "step": 294450 }, { "epoch": 83.58217428328129, "grad_norm": 0.4608611762523651, "learning_rate": 1.6451319897814363e-05, "loss": 0.0033474206924438477, "step": 294460 }, { "epoch": 83.58501277320465, "grad_norm": 0.00605285307392478, "learning_rate": 1.6448481407891004e-05, "loss": 0.00011156518012285232, "step": 294470 }, { "epoch": 83.58785126312802, "grad_norm": 0.005928810685873032, "learning_rate": 1.6445642917967642e-05, "loss": 3.9505958557128904e-05, "step": 294480 }, { "epoch": 83.59068975305138, "grad_norm": 0.049453847110271454, "learning_rate": 1.644280442804428e-05, "loss": 7.345713675022125e-05, "step": 294490 }, { "epoch": 83.59352824297474, "grad_norm": 0.0016493068542331457, "learning_rate": 1.643996593812092e-05, "loss": 3.4976005554199216e-05, "step": 294500 }, { "epoch": 83.59352824297474, "eval_accuracy": 0.9864564125389458, "eval_loss": 0.05752965807914734, "eval_runtime": 35.3782, "eval_samples_per_second": 444.54, "eval_steps_per_second": 6.953, "step": 294500 }, { "epoch": 83.5963667328981, "grad_norm": 0.003542973194271326, "learning_rate": 1.643712744819756e-05, "loss": 7.088575512170792e-05, "step": 294510 }, { "epoch": 83.59920522282145, "grad_norm": 0.048593465238809586, "learning_rate": 1.64342889582742e-05, "loss": 4.496872425079346e-05, "step": 294520 }, { "epoch": 83.60204371274482, "grad_norm": 0.0025877251755446196, "learning_rate": 1.6431450468350836e-05, "loss": 0.00013285931199789048, "step": 294530 }, { "epoch": 83.60488220266818, "grad_norm": 4.4503254890441895, "learning_rate": 1.6428611978427477e-05, "loss": 0.000714765302836895, "step": 294540 }, { "epoch": 83.60772069259154, "grad_norm": 0.0019995023030787706, "learning_rate": 1.642577348850412e-05, "loss": 0.00023515522480010986, "step": 294550 }, { "epoch": 83.6105591825149, "grad_norm": 0.001752162235789001, "learning_rate": 1.6422934998580756e-05, "loss": 0.0004311397671699524, "step": 294560 }, { "epoch": 83.61339767243827, "grad_norm": 0.012521056458353996, "learning_rate": 1.6420096508657394e-05, "loss": 6.386470049619674e-05, "step": 294570 }, { "epoch": 83.61623616236163, "grad_norm": 0.09599446505308151, "learning_rate": 1.6417258018734032e-05, "loss": 5.173962563276291e-05, "step": 294580 }, { "epoch": 83.61907465228498, "grad_norm": 0.007167758885771036, "learning_rate": 1.6414419528810674e-05, "loss": 5.263183265924454e-05, "step": 294590 }, { "epoch": 83.62191314220834, "grad_norm": 0.04822216555476189, "learning_rate": 1.6411581038887315e-05, "loss": 3.111306577920914e-05, "step": 294600 }, { "epoch": 83.6247516321317, "grad_norm": 0.028668293729424477, "learning_rate": 1.640874254896395e-05, "loss": 0.00015612095594406128, "step": 294610 }, { "epoch": 83.62759012205507, "grad_norm": 0.026784980669617653, "learning_rate": 1.640590405904059e-05, "loss": 2.4829618632793425e-05, "step": 294620 }, { "epoch": 83.63042861197843, "grad_norm": 0.009934070520102978, "learning_rate": 1.640306556911723e-05, "loss": 2.3131817579269408e-05, "step": 294630 }, { "epoch": 83.6332671019018, "grad_norm": 0.21898531913757324, "learning_rate": 1.640022707919387e-05, "loss": 8.128434419631957e-05, "step": 294640 }, { "epoch": 83.63610559182514, "grad_norm": 0.0051409536972641945, "learning_rate": 1.639738858927051e-05, "loss": 3.8189440965652464e-05, "step": 294650 }, { "epoch": 83.6389440817485, "grad_norm": 0.005205738823860884, "learning_rate": 1.6394550099347146e-05, "loss": 6.050020456314087e-05, "step": 294660 }, { "epoch": 83.64178257167187, "grad_norm": 0.0027388951275497675, "learning_rate": 1.6391711609423788e-05, "loss": 1.0774470865726471e-05, "step": 294670 }, { "epoch": 83.64462106159523, "grad_norm": 0.003525169100612402, "learning_rate": 1.6388873119500426e-05, "loss": 7.95384868979454e-05, "step": 294680 }, { "epoch": 83.6474595515186, "grad_norm": 0.0014646850759163499, "learning_rate": 1.6386034629577067e-05, "loss": 9.645260870456695e-05, "step": 294690 }, { "epoch": 83.65029804144196, "grad_norm": 0.00524092186242342, "learning_rate": 1.6383196139653705e-05, "loss": 2.2632814943790435e-05, "step": 294700 }, { "epoch": 83.65313653136532, "grad_norm": 0.0162255447357893, "learning_rate": 1.6380357649730343e-05, "loss": 0.0005649793893098831, "step": 294710 }, { "epoch": 83.65597502128867, "grad_norm": 0.001733741839416325, "learning_rate": 1.6377519159806984e-05, "loss": 0.00014613475650548935, "step": 294720 }, { "epoch": 83.65881351121203, "grad_norm": 0.005860069300979376, "learning_rate": 1.6374680669883622e-05, "loss": 6.110221147537231e-05, "step": 294730 }, { "epoch": 83.6616520011354, "grad_norm": 0.018187616020441055, "learning_rate": 1.637184217996026e-05, "loss": 0.00017237309366464616, "step": 294740 }, { "epoch": 83.66449049105876, "grad_norm": 0.005997293628752232, "learning_rate": 1.6369003690036902e-05, "loss": 5.713403224945068e-05, "step": 294750 }, { "epoch": 83.66732898098212, "grad_norm": 0.003594451118260622, "learning_rate": 1.636616520011354e-05, "loss": 5.047321319580078e-05, "step": 294760 }, { "epoch": 83.67016747090548, "grad_norm": 0.021033652126789093, "learning_rate": 1.636332671019018e-05, "loss": 2.869460731744766e-05, "step": 294770 }, { "epoch": 83.67300596082885, "grad_norm": 0.0009137570159509778, "learning_rate": 1.636048822026682e-05, "loss": 0.00026205647736787796, "step": 294780 }, { "epoch": 83.6758444507522, "grad_norm": 0.0054022944532334805, "learning_rate": 1.6357649730343457e-05, "loss": 2.2801384329795837e-05, "step": 294790 }, { "epoch": 83.67868294067556, "grad_norm": 0.02466205693781376, "learning_rate": 1.63548112404201e-05, "loss": 0.0008076552301645278, "step": 294800 }, { "epoch": 83.68152143059892, "grad_norm": 0.00653120456263423, "learning_rate": 1.6351972750496736e-05, "loss": 3.469139337539673e-05, "step": 294810 }, { "epoch": 83.68435992052228, "grad_norm": 0.03111533261835575, "learning_rate": 1.6349134260573374e-05, "loss": 0.00024027302861213685, "step": 294820 }, { "epoch": 83.68719841044565, "grad_norm": 0.0184610765427351, "learning_rate": 1.6346295770650012e-05, "loss": 0.00010290779173374175, "step": 294830 }, { "epoch": 83.69003690036901, "grad_norm": 0.01712733879685402, "learning_rate": 1.6343457280726654e-05, "loss": 0.00019211750477552414, "step": 294840 }, { "epoch": 83.69287539029236, "grad_norm": 0.00379774603061378, "learning_rate": 1.6340618790803295e-05, "loss": 7.159821689128876e-05, "step": 294850 }, { "epoch": 83.69571388021572, "grad_norm": 0.002352698240429163, "learning_rate": 1.6337780300879933e-05, "loss": 3.498103469610214e-05, "step": 294860 }, { "epoch": 83.69855237013908, "grad_norm": 0.0030766502022743225, "learning_rate": 1.633494181095657e-05, "loss": 1.906082034111023e-05, "step": 294870 }, { "epoch": 83.70139086006245, "grad_norm": 0.004248383920639753, "learning_rate": 1.6332103321033213e-05, "loss": 3.436524420976639e-05, "step": 294880 }, { "epoch": 83.70422934998581, "grad_norm": 0.0019630210008472204, "learning_rate": 1.632926483110985e-05, "loss": 0.00024292506277561187, "step": 294890 }, { "epoch": 83.70706783990917, "grad_norm": 0.011783953756093979, "learning_rate": 1.6326426341186492e-05, "loss": 3.6633946001529695e-05, "step": 294900 }, { "epoch": 83.70990632983253, "grad_norm": 0.0013806665083393455, "learning_rate": 1.6323587851263127e-05, "loss": 3.380924463272095e-05, "step": 294910 }, { "epoch": 83.71274481975588, "grad_norm": 0.0035022965166717768, "learning_rate": 1.6320749361339768e-05, "loss": 2.1072104573249817e-05, "step": 294920 }, { "epoch": 83.71558330967925, "grad_norm": 0.002431063447147608, "learning_rate": 1.631791087141641e-05, "loss": 0.0001340636983513832, "step": 294930 }, { "epoch": 83.71842179960261, "grad_norm": 0.004574817605316639, "learning_rate": 1.6315072381493047e-05, "loss": 2.0180828869342804e-05, "step": 294940 }, { "epoch": 83.72126028952597, "grad_norm": 0.009490628726780415, "learning_rate": 1.6312233891569685e-05, "loss": 7.076337933540345e-05, "step": 294950 }, { "epoch": 83.72409877944933, "grad_norm": 0.014624539762735367, "learning_rate": 1.6309395401646323e-05, "loss": 2.218242734670639e-05, "step": 294960 }, { "epoch": 83.7269372693727, "grad_norm": 0.013449238613247871, "learning_rate": 1.6306556911722965e-05, "loss": 3.147702664136886e-05, "step": 294970 }, { "epoch": 83.72977575929606, "grad_norm": 0.005378904286772013, "learning_rate": 1.6303718421799606e-05, "loss": 6.386209279298782e-05, "step": 294980 }, { "epoch": 83.73261424921941, "grad_norm": 0.008807038888335228, "learning_rate": 1.6300879931876244e-05, "loss": 2.8769299387931823e-05, "step": 294990 }, { "epoch": 83.73545273914277, "grad_norm": 0.0037083185743540525, "learning_rate": 1.6298041441952882e-05, "loss": 1.620464026927948e-05, "step": 295000 }, { "epoch": 83.73545273914277, "eval_accuracy": 0.9862656577859732, "eval_loss": 0.058482956141233444, "eval_runtime": 35.7226, "eval_samples_per_second": 440.254, "eval_steps_per_second": 6.886, "step": 295000 }, { "epoch": 83.73829122906614, "grad_norm": 0.0007960214279592037, "learning_rate": 1.629520295202952e-05, "loss": 4.902109503746033e-05, "step": 295010 }, { "epoch": 83.7411297189895, "grad_norm": 0.0009506786591373384, "learning_rate": 1.629236446210616e-05, "loss": 8.559171110391616e-05, "step": 295020 }, { "epoch": 83.74396820891286, "grad_norm": 0.003918017260730267, "learning_rate": 1.62895259721828e-05, "loss": 2.5422871112823485e-05, "step": 295030 }, { "epoch": 83.74680669883622, "grad_norm": 0.03489838168025017, "learning_rate": 1.6286687482259437e-05, "loss": 2.9717199504375456e-05, "step": 295040 }, { "epoch": 83.74964518875959, "grad_norm": 0.002277570776641369, "learning_rate": 1.628384899233608e-05, "loss": 0.00018581058830022812, "step": 295050 }, { "epoch": 83.75248367868294, "grad_norm": 0.004647917114198208, "learning_rate": 1.6281010502412717e-05, "loss": 2.694893628358841e-05, "step": 295060 }, { "epoch": 83.7553221686063, "grad_norm": 0.003644760465249419, "learning_rate": 1.6278172012489358e-05, "loss": 1.890435814857483e-05, "step": 295070 }, { "epoch": 83.75816065852966, "grad_norm": 0.0064095621928572655, "learning_rate": 1.6275333522565996e-05, "loss": 7.073339074850082e-05, "step": 295080 }, { "epoch": 83.76099914845302, "grad_norm": 0.05029977113008499, "learning_rate": 1.6272495032642634e-05, "loss": 0.00010596998035907745, "step": 295090 }, { "epoch": 83.76383763837639, "grad_norm": 0.0036463493015617132, "learning_rate": 1.6269656542719275e-05, "loss": 0.0015505896881222725, "step": 295100 }, { "epoch": 83.76667612829975, "grad_norm": 0.0963483676314354, "learning_rate": 1.6266818052795913e-05, "loss": 0.0004392948001623154, "step": 295110 }, { "epoch": 83.7695146182231, "grad_norm": 0.06782761216163635, "learning_rate": 1.626397956287255e-05, "loss": 5.8490410447120665e-05, "step": 295120 }, { "epoch": 83.77235310814646, "grad_norm": 0.014391086995601654, "learning_rate": 1.6261141072949193e-05, "loss": 7.957853376865387e-05, "step": 295130 }, { "epoch": 83.77519159806982, "grad_norm": 0.24507738649845123, "learning_rate": 1.625830258302583e-05, "loss": 0.00014476198703050613, "step": 295140 }, { "epoch": 83.77803008799319, "grad_norm": 0.0032644972670823336, "learning_rate": 1.6255464093102472e-05, "loss": 1.996830105781555e-05, "step": 295150 }, { "epoch": 83.78086857791655, "grad_norm": 0.0014660194283351302, "learning_rate": 1.625262560317911e-05, "loss": 2.7878768742084502e-05, "step": 295160 }, { "epoch": 83.78370706783991, "grad_norm": 0.0036074023228138685, "learning_rate": 1.6249787113255748e-05, "loss": 0.00020907931029796601, "step": 295170 }, { "epoch": 83.78654555776328, "grad_norm": 0.023796841502189636, "learning_rate": 1.624694862333239e-05, "loss": 2.104956656694412e-05, "step": 295180 }, { "epoch": 83.78938404768662, "grad_norm": 0.11776126176118851, "learning_rate": 1.6244110133409027e-05, "loss": 6.616152822971343e-05, "step": 295190 }, { "epoch": 83.79222253760999, "grad_norm": 0.005551531910896301, "learning_rate": 1.624127164348567e-05, "loss": 8.773133158683777e-05, "step": 295200 }, { "epoch": 83.79506102753335, "grad_norm": 0.011568027548491955, "learning_rate": 1.6238433153562303e-05, "loss": 1.5564635396003724e-05, "step": 295210 }, { "epoch": 83.79789951745671, "grad_norm": 0.008198557421565056, "learning_rate": 1.6235594663638945e-05, "loss": 1.7041712999343873e-05, "step": 295220 }, { "epoch": 83.80073800738008, "grad_norm": 0.000878060469403863, "learning_rate": 1.6232756173715586e-05, "loss": 1.6184337437152862e-05, "step": 295230 }, { "epoch": 83.80357649730344, "grad_norm": 0.003266157815232873, "learning_rate": 1.6229917683792224e-05, "loss": 3.26499342918396e-05, "step": 295240 }, { "epoch": 83.8064149872268, "grad_norm": 0.001997255254536867, "learning_rate": 1.6227079193868862e-05, "loss": 1.5225261449813843e-05, "step": 295250 }, { "epoch": 83.80925347715015, "grad_norm": 0.003549814922735095, "learning_rate": 1.62242407039455e-05, "loss": 1.1371262371540069e-05, "step": 295260 }, { "epoch": 83.81209196707351, "grad_norm": 0.00484327832236886, "learning_rate": 1.622140221402214e-05, "loss": 1.5680678188800812e-05, "step": 295270 }, { "epoch": 83.81493045699688, "grad_norm": 0.000434642075560987, "learning_rate": 1.6218563724098783e-05, "loss": 1.2318789958953858e-05, "step": 295280 }, { "epoch": 83.81776894692024, "grad_norm": 0.00011124116281280294, "learning_rate": 1.6215725234175417e-05, "loss": 2.1579675376415253e-05, "step": 295290 }, { "epoch": 83.8206074368436, "grad_norm": 0.005742691457271576, "learning_rate": 1.621288674425206e-05, "loss": 1.3101845979690552e-05, "step": 295300 }, { "epoch": 83.82344592676696, "grad_norm": 0.0150978434830904, "learning_rate": 1.6210048254328697e-05, "loss": 2.0123086869716644e-05, "step": 295310 }, { "epoch": 83.82628441669033, "grad_norm": 0.0002033067576121539, "learning_rate": 1.6207209764405338e-05, "loss": 4.779733717441559e-05, "step": 295320 }, { "epoch": 83.82912290661368, "grad_norm": 0.001628337660804391, "learning_rate": 1.6204371274481976e-05, "loss": 4.9501098692417146e-05, "step": 295330 }, { "epoch": 83.83196139653704, "grad_norm": 0.007909636944532394, "learning_rate": 1.6201532784558614e-05, "loss": 1.4373846352100372e-05, "step": 295340 }, { "epoch": 83.8347998864604, "grad_norm": 0.01209199521690607, "learning_rate": 1.6198694294635256e-05, "loss": 3.7590228021144864e-05, "step": 295350 }, { "epoch": 83.83763837638377, "grad_norm": 0.010922085493803024, "learning_rate": 1.6195855804711893e-05, "loss": 2.20663845539093e-05, "step": 295360 }, { "epoch": 83.84047686630713, "grad_norm": 0.021070560440421104, "learning_rate": 1.6193017314788535e-05, "loss": 2.6983022689819335e-05, "step": 295370 }, { "epoch": 83.84331535623049, "grad_norm": 0.0003455541154835373, "learning_rate": 1.6190178824865173e-05, "loss": 1.3957172632217407e-05, "step": 295380 }, { "epoch": 83.84615384615384, "grad_norm": 0.019594326615333557, "learning_rate": 1.618734033494181e-05, "loss": 8.346326649188995e-06, "step": 295390 }, { "epoch": 83.8489923360772, "grad_norm": 0.000599872728344053, "learning_rate": 1.6184501845018452e-05, "loss": 6.217136979103089e-06, "step": 295400 }, { "epoch": 83.85183082600057, "grad_norm": 0.003201971761882305, "learning_rate": 1.618166335509509e-05, "loss": 1.1987611651420594e-05, "step": 295410 }, { "epoch": 83.85466931592393, "grad_norm": 0.00027217218303121626, "learning_rate": 1.6178824865171728e-05, "loss": 2.9514357447624206e-05, "step": 295420 }, { "epoch": 83.85750780584729, "grad_norm": 0.0026599718257784843, "learning_rate": 1.617598637524837e-05, "loss": 1.4165416359901428e-05, "step": 295430 }, { "epoch": 83.86034629577065, "grad_norm": 0.0010144533589482307, "learning_rate": 1.6173147885325008e-05, "loss": 7.290393114089966e-06, "step": 295440 }, { "epoch": 83.86318478569402, "grad_norm": 0.05191967636346817, "learning_rate": 1.617030939540165e-05, "loss": 1.832451671361923e-05, "step": 295450 }, { "epoch": 83.86602327561737, "grad_norm": 0.0045969439670443535, "learning_rate": 1.6167470905478287e-05, "loss": 1.0342895984649659e-05, "step": 295460 }, { "epoch": 83.86886176554073, "grad_norm": 0.0004635247169062495, "learning_rate": 1.6164632415554925e-05, "loss": 6.0727819800376895e-06, "step": 295470 }, { "epoch": 83.87170025546409, "grad_norm": 0.0025760605931282043, "learning_rate": 1.6161793925631566e-05, "loss": 1.3250298798084259e-05, "step": 295480 }, { "epoch": 83.87453874538745, "grad_norm": 0.0012292992323637009, "learning_rate": 1.6158955435708204e-05, "loss": 1.0779500007629394e-05, "step": 295490 }, { "epoch": 83.87737723531082, "grad_norm": 0.0028021151665598154, "learning_rate": 1.6156116945784842e-05, "loss": 2.5559403002262116e-05, "step": 295500 }, { "epoch": 83.87737723531082, "eval_accuracy": 0.9875373561391237, "eval_loss": 0.05133650824427605, "eval_runtime": 35.6902, "eval_samples_per_second": 440.653, "eval_steps_per_second": 6.893, "step": 295500 }, { "epoch": 83.88021572523418, "grad_norm": 0.0005585010512731969, "learning_rate": 1.615327845586148e-05, "loss": 1.2020952999591827e-05, "step": 295510 }, { "epoch": 83.88305421515754, "grad_norm": 0.00581887923181057, "learning_rate": 1.615043996593812e-05, "loss": 1.4318525791168213e-05, "step": 295520 }, { "epoch": 83.88589270508089, "grad_norm": 0.002095407573506236, "learning_rate": 1.6147601476014763e-05, "loss": 3.3463723957538606e-05, "step": 295530 }, { "epoch": 83.88873119500425, "grad_norm": 0.0013865182409062982, "learning_rate": 1.61447629860914e-05, "loss": 1.0751746594905854e-05, "step": 295540 }, { "epoch": 83.89156968492762, "grad_norm": 0.00149140739813447, "learning_rate": 1.614192449616804e-05, "loss": 1.0655447840690613e-05, "step": 295550 }, { "epoch": 83.89440817485098, "grad_norm": 0.000968695676419884, "learning_rate": 1.6139086006244677e-05, "loss": 8.935853838920593e-06, "step": 295560 }, { "epoch": 83.89724666477434, "grad_norm": 0.0019378842553123832, "learning_rate": 1.613624751632132e-05, "loss": 1.9137933850288392e-05, "step": 295570 }, { "epoch": 83.9000851546977, "grad_norm": 0.003679807297885418, "learning_rate": 1.613340902639796e-05, "loss": 2.6128068566322326e-05, "step": 295580 }, { "epoch": 83.90292364462105, "grad_norm": 0.0012047773925587535, "learning_rate": 1.6130570536474594e-05, "loss": 1.2624636292457581e-05, "step": 295590 }, { "epoch": 83.90576213454442, "grad_norm": 0.003584636142477393, "learning_rate": 1.6127732046551236e-05, "loss": 1.0475702583789826e-05, "step": 295600 }, { "epoch": 83.90860062446778, "grad_norm": 0.00363164278678596, "learning_rate": 1.6124893556627874e-05, "loss": 2.161860466003418e-05, "step": 295610 }, { "epoch": 83.91143911439114, "grad_norm": 0.0009848333429545164, "learning_rate": 1.6122055066704515e-05, "loss": 9.283050894737244e-06, "step": 295620 }, { "epoch": 83.9142776043145, "grad_norm": 0.0013305842876434326, "learning_rate": 1.6119216576781153e-05, "loss": 9.650364518165588e-06, "step": 295630 }, { "epoch": 83.91711609423787, "grad_norm": 0.0019229206955060363, "learning_rate": 1.611637808685779e-05, "loss": 1.4919601380825042e-05, "step": 295640 }, { "epoch": 83.91995458416123, "grad_norm": 0.0008959723054431379, "learning_rate": 1.6113539596934432e-05, "loss": 1.3517774641513825e-05, "step": 295650 }, { "epoch": 83.92279307408458, "grad_norm": 0.008061718195676804, "learning_rate": 1.611070110701107e-05, "loss": 1.5999749302864073e-05, "step": 295660 }, { "epoch": 83.92563156400794, "grad_norm": 0.0007871253765188158, "learning_rate": 1.6107862617087712e-05, "loss": 6.511993706226349e-06, "step": 295670 }, { "epoch": 83.9284700539313, "grad_norm": 0.009678508155047894, "learning_rate": 1.610502412716435e-05, "loss": 1.3429298996925354e-05, "step": 295680 }, { "epoch": 83.93130854385467, "grad_norm": 0.007034068927168846, "learning_rate": 1.6102185637240988e-05, "loss": 1.668427139520645e-05, "step": 295690 }, { "epoch": 83.93414703377803, "grad_norm": 0.001518985372968018, "learning_rate": 1.609934714731763e-05, "loss": 3.222469240427017e-05, "step": 295700 }, { "epoch": 83.9369855237014, "grad_norm": 0.00659157894551754, "learning_rate": 1.6096508657394267e-05, "loss": 1.0162964463233947e-05, "step": 295710 }, { "epoch": 83.93982401362476, "grad_norm": 0.0008254894055426121, "learning_rate": 1.6093670167470905e-05, "loss": 2.1380558609962464e-05, "step": 295720 }, { "epoch": 83.9426625035481, "grad_norm": 0.005073063541203737, "learning_rate": 1.6090831677547546e-05, "loss": 1.2838095426559448e-05, "step": 295730 }, { "epoch": 83.94550099347147, "grad_norm": 0.0012707376154139638, "learning_rate": 1.6087993187624184e-05, "loss": 1.5445426106452943e-05, "step": 295740 }, { "epoch": 83.94833948339483, "grad_norm": 0.008892069570720196, "learning_rate": 1.6085154697700826e-05, "loss": 1.2024305760860443e-05, "step": 295750 }, { "epoch": 83.9511779733182, "grad_norm": 0.01464647427201271, "learning_rate": 1.608231620777746e-05, "loss": 2.633165568113327e-05, "step": 295760 }, { "epoch": 83.95401646324156, "grad_norm": 0.004781771916896105, "learning_rate": 1.6079477717854102e-05, "loss": 1.1289678514003754e-05, "step": 295770 }, { "epoch": 83.95685495316492, "grad_norm": 0.0005804034881293774, "learning_rate": 1.6076639227930743e-05, "loss": 8.110329508781433e-06, "step": 295780 }, { "epoch": 83.95969344308828, "grad_norm": 0.0026415097527205944, "learning_rate": 1.607380073800738e-05, "loss": 1.0209530591964721e-05, "step": 295790 }, { "epoch": 83.96253193301163, "grad_norm": 0.0018138433806598186, "learning_rate": 1.607096224808402e-05, "loss": 1.1703558266162873e-05, "step": 295800 }, { "epoch": 83.965370422935, "grad_norm": 0.00047110990271903574, "learning_rate": 1.6068123758160657e-05, "loss": 8.350983262062072e-06, "step": 295810 }, { "epoch": 83.96820891285836, "grad_norm": 0.00046205587568692863, "learning_rate": 1.60652852682373e-05, "loss": 1.3122707605361938e-05, "step": 295820 }, { "epoch": 83.97104740278172, "grad_norm": 0.010174068622291088, "learning_rate": 1.606244677831394e-05, "loss": 1.229308545589447e-05, "step": 295830 }, { "epoch": 83.97388589270508, "grad_norm": 0.04156505689024925, "learning_rate": 1.6059608288390578e-05, "loss": 1.4996901154518127e-05, "step": 295840 }, { "epoch": 83.97672438262845, "grad_norm": 0.00674574077129364, "learning_rate": 1.6056769798467216e-05, "loss": 1.5021301805973053e-05, "step": 295850 }, { "epoch": 83.9795628725518, "grad_norm": 0.002269954886287451, "learning_rate": 1.6053931308543854e-05, "loss": 9.308755397796632e-06, "step": 295860 }, { "epoch": 83.98240136247516, "grad_norm": 0.01808878779411316, "learning_rate": 1.6051092818620495e-05, "loss": 0.00011679381132125854, "step": 295870 }, { "epoch": 83.98523985239852, "grad_norm": 0.0041749123483896255, "learning_rate": 1.6048254328697133e-05, "loss": 0.0001392066478729248, "step": 295880 }, { "epoch": 83.98807834232188, "grad_norm": 0.005739323794841766, "learning_rate": 1.604541583877377e-05, "loss": 1.3688951730728149e-05, "step": 295890 }, { "epoch": 83.99091683224525, "grad_norm": 0.0010283216834068298, "learning_rate": 1.6042577348850413e-05, "loss": 1.3843551278114319e-05, "step": 295900 }, { "epoch": 83.99375532216861, "grad_norm": 0.016980323940515518, "learning_rate": 1.603973885892705e-05, "loss": 1.4782324433326721e-05, "step": 295910 }, { "epoch": 83.99659381209197, "grad_norm": 0.0009224251843988895, "learning_rate": 1.6036900369003692e-05, "loss": 2.0512938499450682e-05, "step": 295920 }, { "epoch": 83.99943230201532, "grad_norm": 0.002358950674533844, "learning_rate": 1.603406187908033e-05, "loss": 2.0523928105831148e-05, "step": 295930 }, { "epoch": 84.00227079193868, "grad_norm": 0.00086756277596578, "learning_rate": 1.6031223389156968e-05, "loss": 1.3202936679590494e-05, "step": 295940 }, { "epoch": 84.00510928186205, "grad_norm": 0.003577884053811431, "learning_rate": 1.602838489923361e-05, "loss": 8.697621524333954e-06, "step": 295950 }, { "epoch": 84.00794777178541, "grad_norm": 0.005048999562859535, "learning_rate": 1.6025546409310247e-05, "loss": 9.238719940185547e-06, "step": 295960 }, { "epoch": 84.01078626170877, "grad_norm": 0.0017065108986571431, "learning_rate": 1.6022707919386885e-05, "loss": 1.4930963516235352e-05, "step": 295970 }, { "epoch": 84.01362475163214, "grad_norm": 0.0014427306596189737, "learning_rate": 1.6019869429463527e-05, "loss": 2.1020136773586274e-05, "step": 295980 }, { "epoch": 84.0164632415555, "grad_norm": 0.0052632903680205345, "learning_rate": 1.6017030939540165e-05, "loss": 2.243686467409134e-05, "step": 295990 }, { "epoch": 84.01930173147885, "grad_norm": 0.0003910947125405073, "learning_rate": 1.6014192449616806e-05, "loss": 9.462051093578338e-06, "step": 296000 }, { "epoch": 84.01930173147885, "eval_accuracy": 0.9871558466331786, "eval_loss": 0.05004455894231796, "eval_runtime": 34.9559, "eval_samples_per_second": 449.91, "eval_steps_per_second": 7.037, "step": 296000 }, { "epoch": 84.02214022140221, "grad_norm": 0.0037257967051118612, "learning_rate": 1.6011353959693444e-05, "loss": 1.009330153465271e-05, "step": 296010 }, { "epoch": 84.02497871132557, "grad_norm": 0.0008485456346534193, "learning_rate": 1.6008515469770082e-05, "loss": 1.2051127851009368e-05, "step": 296020 }, { "epoch": 84.02781720124894, "grad_norm": 0.0012813480570912361, "learning_rate": 1.6005676979846723e-05, "loss": 7.765181362628936e-06, "step": 296030 }, { "epoch": 84.0306556911723, "grad_norm": 0.0026135160587728024, "learning_rate": 1.600283848992336e-05, "loss": 8.245557546615601e-06, "step": 296040 }, { "epoch": 84.03349418109566, "grad_norm": 0.0014848967548459768, "learning_rate": 1.6000000000000003e-05, "loss": 2.5272369384765625e-05, "step": 296050 }, { "epoch": 84.03633267101901, "grad_norm": 0.0007080142968334258, "learning_rate": 1.5997161510076637e-05, "loss": 1.4771334826946259e-05, "step": 296060 }, { "epoch": 84.03917116094237, "grad_norm": 0.00025961388018913567, "learning_rate": 1.599432302015328e-05, "loss": 6.834603846073151e-06, "step": 296070 }, { "epoch": 84.04200965086574, "grad_norm": 0.005869443062692881, "learning_rate": 1.599148453022992e-05, "loss": 1.8842704594135285e-05, "step": 296080 }, { "epoch": 84.0448481407891, "grad_norm": 0.0012370044132694602, "learning_rate": 1.5988646040306558e-05, "loss": 1.1484883725643157e-05, "step": 296090 }, { "epoch": 84.04768663071246, "grad_norm": 0.005657334811985493, "learning_rate": 1.5985807550383196e-05, "loss": 3.460198640823364e-05, "step": 296100 }, { "epoch": 84.05052512063583, "grad_norm": 0.0023927083238959312, "learning_rate": 1.5982969060459834e-05, "loss": 1.921132206916809e-05, "step": 296110 }, { "epoch": 84.05336361055919, "grad_norm": 0.0014397531049326062, "learning_rate": 1.5980130570536475e-05, "loss": 1.4980696141719818e-05, "step": 296120 }, { "epoch": 84.05620210048254, "grad_norm": 0.0009899428114295006, "learning_rate": 1.5977292080613117e-05, "loss": 9.14689153432846e-06, "step": 296130 }, { "epoch": 84.0590405904059, "grad_norm": 0.008247783407568932, "learning_rate": 1.597445359068975e-05, "loss": 1.1964328587055207e-05, "step": 296140 }, { "epoch": 84.06187908032926, "grad_norm": 0.0007364741759374738, "learning_rate": 1.5971615100766393e-05, "loss": 8.462555706501007e-06, "step": 296150 }, { "epoch": 84.06471757025263, "grad_norm": 0.0050046080723404884, "learning_rate": 1.596877661084303e-05, "loss": 1.079421490430832e-05, "step": 296160 }, { "epoch": 84.06755606017599, "grad_norm": 0.0017121686832979321, "learning_rate": 1.5965938120919672e-05, "loss": 6.363168358802795e-06, "step": 296170 }, { "epoch": 84.07039455009935, "grad_norm": 0.002567059826105833, "learning_rate": 1.596309963099631e-05, "loss": 1.10674649477005e-05, "step": 296180 }, { "epoch": 84.07323304002271, "grad_norm": 0.001966595184057951, "learning_rate": 1.5960261141072948e-05, "loss": 9.968690574169158e-06, "step": 296190 }, { "epoch": 84.07607152994606, "grad_norm": 0.0015325596323236823, "learning_rate": 1.595742265114959e-05, "loss": 1.0301172733306884e-05, "step": 296200 }, { "epoch": 84.07891001986943, "grad_norm": 0.002942604711279273, "learning_rate": 1.595458416122623e-05, "loss": 9.925477206707e-06, "step": 296210 }, { "epoch": 84.08174850979279, "grad_norm": 0.0014210729859769344, "learning_rate": 1.595174567130287e-05, "loss": 6.754696369171143e-06, "step": 296220 }, { "epoch": 84.08458699971615, "grad_norm": 0.0007527395500801504, "learning_rate": 1.5948907181379507e-05, "loss": 6.580539047718048e-06, "step": 296230 }, { "epoch": 84.08742548963951, "grad_norm": 0.0012039400171488523, "learning_rate": 1.5946068691456145e-05, "loss": 1.0176748037338258e-05, "step": 296240 }, { "epoch": 84.09026397956288, "grad_norm": 0.009505148977041245, "learning_rate": 1.5943230201532786e-05, "loss": 1.2633204460144042e-05, "step": 296250 }, { "epoch": 84.09310246948624, "grad_norm": 0.000892804644536227, "learning_rate": 1.5940391711609427e-05, "loss": 1.826118677854538e-05, "step": 296260 }, { "epoch": 84.09594095940959, "grad_norm": 0.0007290068315342069, "learning_rate": 1.5937553221686062e-05, "loss": 1.1403299868106843e-05, "step": 296270 }, { "epoch": 84.09877944933295, "grad_norm": 0.030483366921544075, "learning_rate": 1.5934714731762703e-05, "loss": 1.5887804329395296e-05, "step": 296280 }, { "epoch": 84.10161793925631, "grad_norm": 0.0013683510478585958, "learning_rate": 1.593187624183934e-05, "loss": 7.231533527374268e-06, "step": 296290 }, { "epoch": 84.10445642917968, "grad_norm": 0.00244316877797246, "learning_rate": 1.5929037751915983e-05, "loss": 1.3442523777484893e-05, "step": 296300 }, { "epoch": 84.10729491910304, "grad_norm": 0.0018808965105563402, "learning_rate": 1.592619926199262e-05, "loss": 9.416975080966949e-06, "step": 296310 }, { "epoch": 84.1101334090264, "grad_norm": 0.006269616540521383, "learning_rate": 1.592336077206926e-05, "loss": 1.0849907994270325e-05, "step": 296320 }, { "epoch": 84.11297189894975, "grad_norm": 0.0011233962140977383, "learning_rate": 1.59205222821459e-05, "loss": 1.3816170394420624e-05, "step": 296330 }, { "epoch": 84.11581038887311, "grad_norm": 0.004290325567126274, "learning_rate": 1.5917683792222538e-05, "loss": 1.052115112543106e-05, "step": 296340 }, { "epoch": 84.11864887879648, "grad_norm": 0.010223230347037315, "learning_rate": 1.5914845302299176e-05, "loss": 8.193030953407288e-06, "step": 296350 }, { "epoch": 84.12148736871984, "grad_norm": 0.0005408009164966643, "learning_rate": 1.5912006812375818e-05, "loss": 9.641796350479125e-06, "step": 296360 }, { "epoch": 84.1243258586432, "grad_norm": 0.001345616183243692, "learning_rate": 1.5909168322452456e-05, "loss": 8.322671055793762e-06, "step": 296370 }, { "epoch": 84.12716434856657, "grad_norm": 0.000493602070491761, "learning_rate": 1.5906329832529097e-05, "loss": 8.644349873065948e-06, "step": 296380 }, { "epoch": 84.13000283848993, "grad_norm": 0.002429139567539096, "learning_rate": 1.5903491342605735e-05, "loss": 5.203858017921448e-06, "step": 296390 }, { "epoch": 84.13284132841328, "grad_norm": 0.001976722851395607, "learning_rate": 1.5900652852682373e-05, "loss": 8.454732596874238e-06, "step": 296400 }, { "epoch": 84.13567981833664, "grad_norm": 0.000839187006931752, "learning_rate": 1.5897814362759014e-05, "loss": 1.1976994574069977e-05, "step": 296410 }, { "epoch": 84.13851830826, "grad_norm": 0.0008034450584091246, "learning_rate": 1.5894975872835652e-05, "loss": 1.68653205037117e-05, "step": 296420 }, { "epoch": 84.14135679818337, "grad_norm": 0.0030974613036960363, "learning_rate": 1.5892137382912294e-05, "loss": 1.0291114449501038e-05, "step": 296430 }, { "epoch": 84.14419528810673, "grad_norm": 0.0006779953255318105, "learning_rate": 1.5889298892988928e-05, "loss": 1.2709014117717744e-05, "step": 296440 }, { "epoch": 84.14703377803009, "grad_norm": 0.002478294540196657, "learning_rate": 1.588646040306557e-05, "loss": 9.828992187976836e-06, "step": 296450 }, { "epoch": 84.14987226795346, "grad_norm": 0.0014630263904109597, "learning_rate": 1.588362191314221e-05, "loss": 8.374266326427459e-06, "step": 296460 }, { "epoch": 84.1527107578768, "grad_norm": 0.002590075833722949, "learning_rate": 1.588078342321885e-05, "loss": 1.5893764793872833e-05, "step": 296470 }, { "epoch": 84.15554924780017, "grad_norm": 0.0007046394166536629, "learning_rate": 1.5877944933295487e-05, "loss": 8.32006335258484e-06, "step": 296480 }, { "epoch": 84.15838773772353, "grad_norm": 0.0020058141089975834, "learning_rate": 1.5875106443372125e-05, "loss": 1.7598271369934082e-05, "step": 296490 }, { "epoch": 84.16122622764689, "grad_norm": 0.0034892752300947905, "learning_rate": 1.5872267953448766e-05, "loss": 7.521919906139374e-06, "step": 296500 }, { "epoch": 84.16122622764689, "eval_accuracy": 0.9881096203980416, "eval_loss": 0.049745261669158936, "eval_runtime": 35.3927, "eval_samples_per_second": 444.357, "eval_steps_per_second": 6.951, "step": 296500 }, { "epoch": 84.16406471757026, "grad_norm": 0.0017053927294909954, "learning_rate": 1.5869429463525408e-05, "loss": 1.4794990420341492e-05, "step": 296510 }, { "epoch": 84.16690320749362, "grad_norm": 0.0027550829108804464, "learning_rate": 1.5866590973602046e-05, "loss": 9.96757298707962e-06, "step": 296520 }, { "epoch": 84.16974169741698, "grad_norm": 0.006168820429593325, "learning_rate": 1.5863752483678684e-05, "loss": 1.4038011431694031e-05, "step": 296530 }, { "epoch": 84.17258018734033, "grad_norm": 0.05492206662893295, "learning_rate": 1.586091399375532e-05, "loss": 1.5987642109394072e-05, "step": 296540 }, { "epoch": 84.17541867726369, "grad_norm": 0.0017129691550508142, "learning_rate": 1.5858075503831963e-05, "loss": 1.2430176138877868e-05, "step": 296550 }, { "epoch": 84.17825716718706, "grad_norm": 0.0014822520315647125, "learning_rate": 1.58552370139086e-05, "loss": 1.243986189365387e-05, "step": 296560 }, { "epoch": 84.18109565711042, "grad_norm": 0.001972437370568514, "learning_rate": 1.585239852398524e-05, "loss": 1.2022443115711212e-05, "step": 296570 }, { "epoch": 84.18393414703378, "grad_norm": 0.0009562775958329439, "learning_rate": 1.584956003406188e-05, "loss": 9.968318045139312e-06, "step": 296580 }, { "epoch": 84.18677263695714, "grad_norm": 0.006878669373691082, "learning_rate": 1.584672154413852e-05, "loss": 8.67992639541626e-06, "step": 296590 }, { "epoch": 84.1896111268805, "grad_norm": 0.000803906936198473, "learning_rate": 1.584388305421516e-05, "loss": 9.714066982269287e-06, "step": 296600 }, { "epoch": 84.19244961680386, "grad_norm": 0.0005015629576519132, "learning_rate": 1.5841044564291798e-05, "loss": 1.0659173130989075e-05, "step": 296610 }, { "epoch": 84.19528810672722, "grad_norm": 0.0037199335638433695, "learning_rate": 1.5838206074368436e-05, "loss": 1.7001666128635408e-05, "step": 296620 }, { "epoch": 84.19812659665058, "grad_norm": 0.0005614288966171443, "learning_rate": 1.5835367584445077e-05, "loss": 9.530596435070038e-06, "step": 296630 }, { "epoch": 84.20096508657394, "grad_norm": 0.0003016546252183616, "learning_rate": 1.5832529094521715e-05, "loss": 1.1356547474861145e-05, "step": 296640 }, { "epoch": 84.20380357649731, "grad_norm": 0.015299099497497082, "learning_rate": 1.5829690604598353e-05, "loss": 1.3593584299087524e-05, "step": 296650 }, { "epoch": 84.20664206642067, "grad_norm": 0.002663069637492299, "learning_rate": 1.5826852114674994e-05, "loss": 8.83936882019043e-06, "step": 296660 }, { "epoch": 84.20948055634402, "grad_norm": 0.0011399160139262676, "learning_rate": 1.5824013624751632e-05, "loss": 1.2670084834098816e-05, "step": 296670 }, { "epoch": 84.21231904626738, "grad_norm": 0.0029775784350931644, "learning_rate": 1.5821175134828274e-05, "loss": 1.3065896928310394e-05, "step": 296680 }, { "epoch": 84.21515753619074, "grad_norm": 0.005487257149070501, "learning_rate": 1.5818336644904912e-05, "loss": 8.596107363700867e-06, "step": 296690 }, { "epoch": 84.21799602611411, "grad_norm": 0.0007438442553393543, "learning_rate": 1.581549815498155e-05, "loss": 9.092874825000763e-06, "step": 296700 }, { "epoch": 84.22083451603747, "grad_norm": 0.004386795684695244, "learning_rate": 1.581265966505819e-05, "loss": 7.335096597671509e-06, "step": 296710 }, { "epoch": 84.22367300596083, "grad_norm": 0.0015789690660312772, "learning_rate": 1.580982117513483e-05, "loss": 6.256438791751862e-06, "step": 296720 }, { "epoch": 84.2265114958842, "grad_norm": 0.0004355823912192136, "learning_rate": 1.580698268521147e-05, "loss": 9.284541010856629e-06, "step": 296730 }, { "epoch": 84.22934998580754, "grad_norm": 0.002567610703408718, "learning_rate": 1.5804144195288105e-05, "loss": 9.843893349170684e-06, "step": 296740 }, { "epoch": 84.23218847573091, "grad_norm": 0.0006201996584422886, "learning_rate": 1.5801305705364746e-05, "loss": 1.0286085307598114e-05, "step": 296750 }, { "epoch": 84.23502696565427, "grad_norm": 0.0004989896551705897, "learning_rate": 1.5798467215441388e-05, "loss": 1.168716698884964e-05, "step": 296760 }, { "epoch": 84.23786545557763, "grad_norm": 0.008553258143365383, "learning_rate": 1.5795628725518026e-05, "loss": 1.3377144932746888e-05, "step": 296770 }, { "epoch": 84.240703945501, "grad_norm": 0.00030454303487204015, "learning_rate": 1.5792790235594664e-05, "loss": 5.603022873401642e-06, "step": 296780 }, { "epoch": 84.24354243542436, "grad_norm": 0.0005679805180989206, "learning_rate": 1.5789951745671302e-05, "loss": 1.1096708476543427e-05, "step": 296790 }, { "epoch": 84.24638092534771, "grad_norm": 0.020517760887742043, "learning_rate": 1.5787113255747943e-05, "loss": 1.146998256444931e-05, "step": 296800 }, { "epoch": 84.24921941527107, "grad_norm": 0.0009127371595241129, "learning_rate": 1.5784274765824585e-05, "loss": 1.0558217763900756e-05, "step": 296810 }, { "epoch": 84.25205790519443, "grad_norm": 0.00045216281432658434, "learning_rate": 1.578143627590122e-05, "loss": 1.009143888950348e-05, "step": 296820 }, { "epoch": 84.2548963951178, "grad_norm": 0.0009105314966291189, "learning_rate": 1.577859778597786e-05, "loss": 9.883008897304534e-06, "step": 296830 }, { "epoch": 84.25773488504116, "grad_norm": 0.0004973313771188259, "learning_rate": 1.57757592960545e-05, "loss": 7.28406012058258e-06, "step": 296840 }, { "epoch": 84.26057337496452, "grad_norm": 0.001603156328201294, "learning_rate": 1.577292080613114e-05, "loss": 7.602199912071228e-06, "step": 296850 }, { "epoch": 84.26341186488789, "grad_norm": 0.004022025968879461, "learning_rate": 1.5770082316207778e-05, "loss": 1.3838522136211395e-05, "step": 296860 }, { "epoch": 84.26625035481123, "grad_norm": 0.0026349378749728203, "learning_rate": 1.5767243826284416e-05, "loss": 2.48810276389122e-05, "step": 296870 }, { "epoch": 84.2690888447346, "grad_norm": 0.0018295982154086232, "learning_rate": 1.5764405336361057e-05, "loss": 9.968876838684082e-06, "step": 296880 }, { "epoch": 84.27192733465796, "grad_norm": 0.007807622198015451, "learning_rate": 1.5761566846437695e-05, "loss": 1.3989023864269257e-05, "step": 296890 }, { "epoch": 84.27476582458132, "grad_norm": 0.0035260228905826807, "learning_rate": 1.5758728356514337e-05, "loss": 6.816163659095764e-06, "step": 296900 }, { "epoch": 84.27760431450469, "grad_norm": 0.0017386563122272491, "learning_rate": 1.5755889866590975e-05, "loss": 7.4176117777824405e-06, "step": 296910 }, { "epoch": 84.28044280442805, "grad_norm": 0.016155533492565155, "learning_rate": 1.5753051376667613e-05, "loss": 6.732717156410217e-06, "step": 296920 }, { "epoch": 84.28328129435141, "grad_norm": 0.006044237874448299, "learning_rate": 1.5750212886744254e-05, "loss": 8.145719766616821e-06, "step": 296930 }, { "epoch": 84.28611978427476, "grad_norm": 0.0006628448027186096, "learning_rate": 1.5747374396820892e-05, "loss": 4.3616630136966705e-05, "step": 296940 }, { "epoch": 84.28895827419812, "grad_norm": 0.0014932436170056462, "learning_rate": 1.574453590689753e-05, "loss": 2.7446821331977844e-05, "step": 296950 }, { "epoch": 84.29179676412149, "grad_norm": 0.0019741938449442387, "learning_rate": 1.574169741697417e-05, "loss": 1.2878887355327607e-05, "step": 296960 }, { "epoch": 84.29463525404485, "grad_norm": 0.004573402460664511, "learning_rate": 1.573885892705081e-05, "loss": 1.1611543595790862e-05, "step": 296970 }, { "epoch": 84.29747374396821, "grad_norm": 0.01353870052844286, "learning_rate": 1.573602043712745e-05, "loss": 1.1184439063072205e-05, "step": 296980 }, { "epoch": 84.30031223389157, "grad_norm": 0.003808221546933055, "learning_rate": 1.573318194720409e-05, "loss": 1.046881079673767e-05, "step": 296990 }, { "epoch": 84.30315072381494, "grad_norm": 0.0035187192261219025, "learning_rate": 1.5730343457280727e-05, "loss": 1.131594181060791e-05, "step": 297000 }, { "epoch": 84.30315072381494, "eval_accuracy": 0.9881732053156991, "eval_loss": 0.04910951480269432, "eval_runtime": 47.125, "eval_samples_per_second": 333.729, "eval_steps_per_second": 5.22, "step": 297000 }, { "epoch": 84.30598921373829, "grad_norm": 0.005780006758868694, "learning_rate": 1.5727504967357368e-05, "loss": 1.9750185310840605e-05, "step": 297010 }, { "epoch": 84.30882770366165, "grad_norm": 0.0009589456021785736, "learning_rate": 1.5724666477434006e-05, "loss": 8.673965930938721e-06, "step": 297020 }, { "epoch": 84.31166619358501, "grad_norm": 0.001108446973375976, "learning_rate": 1.5721827987510644e-05, "loss": 7.002986967563629e-06, "step": 297030 }, { "epoch": 84.31450468350837, "grad_norm": 0.0013424726203083992, "learning_rate": 1.5718989497587282e-05, "loss": 1.2898258864879608e-05, "step": 297040 }, { "epoch": 84.31734317343174, "grad_norm": 0.003797184908762574, "learning_rate": 1.5716151007663923e-05, "loss": 9.721331298351289e-06, "step": 297050 }, { "epoch": 84.3201816633551, "grad_norm": 0.0016820725286379457, "learning_rate": 1.5713312517740565e-05, "loss": 9.883567690849304e-06, "step": 297060 }, { "epoch": 84.32302015327845, "grad_norm": 0.0010826032375916839, "learning_rate": 1.5710474027817203e-05, "loss": 7.249973714351654e-06, "step": 297070 }, { "epoch": 84.32585864320181, "grad_norm": 0.001013177796266973, "learning_rate": 1.570763553789384e-05, "loss": 1.3221800327301025e-05, "step": 297080 }, { "epoch": 84.32869713312517, "grad_norm": 0.0007694950327277184, "learning_rate": 1.570479704797048e-05, "loss": 7.983110845088958e-06, "step": 297090 }, { "epoch": 84.33153562304854, "grad_norm": 0.010831590741872787, "learning_rate": 1.570195855804712e-05, "loss": 8.199363946914673e-06, "step": 297100 }, { "epoch": 84.3343741129719, "grad_norm": 0.0018291310407221317, "learning_rate": 1.569912006812376e-05, "loss": 6.157346069812774e-06, "step": 297110 }, { "epoch": 84.33721260289526, "grad_norm": 0.01589248888194561, "learning_rate": 1.5696281578200396e-05, "loss": 8.42716544866562e-06, "step": 297120 }, { "epoch": 84.34005109281863, "grad_norm": 0.0015535833081230521, "learning_rate": 1.5693443088277037e-05, "loss": 1.0119006037712098e-05, "step": 297130 }, { "epoch": 84.34288958274198, "grad_norm": 0.0025389401707798243, "learning_rate": 1.5690604598353675e-05, "loss": 8.294731378555298e-06, "step": 297140 }, { "epoch": 84.34572807266534, "grad_norm": 0.0009230563882738352, "learning_rate": 1.5687766108430317e-05, "loss": 6.983242928981781e-06, "step": 297150 }, { "epoch": 84.3485665625887, "grad_norm": 0.0012159417383372784, "learning_rate": 1.5684927618506955e-05, "loss": 7.165595889091492e-06, "step": 297160 }, { "epoch": 84.35140505251206, "grad_norm": 0.000628713343758136, "learning_rate": 1.5682089128583593e-05, "loss": 9.031780064105987e-06, "step": 297170 }, { "epoch": 84.35424354243543, "grad_norm": 0.0002474590437486768, "learning_rate": 1.5679250638660234e-05, "loss": 6.0176476836204525e-06, "step": 297180 }, { "epoch": 84.35708203235879, "grad_norm": 0.0002829276490956545, "learning_rate": 1.5676412148736872e-05, "loss": 8.70339572429657e-06, "step": 297190 }, { "epoch": 84.35992052228215, "grad_norm": 0.0021856038365513086, "learning_rate": 1.5673573658813513e-05, "loss": 5.586817860603333e-06, "step": 297200 }, { "epoch": 84.3627590122055, "grad_norm": 0.0011458158260211349, "learning_rate": 1.567073516889015e-05, "loss": 4.8389658331871034e-06, "step": 297210 }, { "epoch": 84.36559750212886, "grad_norm": 0.00241671409457922, "learning_rate": 1.566789667896679e-05, "loss": 1.2771971523761749e-05, "step": 297220 }, { "epoch": 84.36843599205223, "grad_norm": 0.0011639115400612354, "learning_rate": 1.566505818904343e-05, "loss": 5.331076681613922e-06, "step": 297230 }, { "epoch": 84.37127448197559, "grad_norm": 0.005488951224833727, "learning_rate": 1.566221969912007e-05, "loss": 1.3043172657489777e-05, "step": 297240 }, { "epoch": 84.37411297189895, "grad_norm": 0.0009939768351614475, "learning_rate": 1.5659381209196707e-05, "loss": 8.459761738777161e-06, "step": 297250 }, { "epoch": 84.37695146182232, "grad_norm": 0.0007171184406615794, "learning_rate": 1.5656542719273348e-05, "loss": 1.0969676077365876e-05, "step": 297260 }, { "epoch": 84.37978995174566, "grad_norm": 0.000733205524738878, "learning_rate": 1.5653704229349986e-05, "loss": 4.7462061047554014e-06, "step": 297270 }, { "epoch": 84.38262844166903, "grad_norm": 0.003367386059835553, "learning_rate": 1.5650865739426627e-05, "loss": 7.845275104045867e-06, "step": 297280 }, { "epoch": 84.38546693159239, "grad_norm": 0.000718474795576185, "learning_rate": 1.5648027249503262e-05, "loss": 9.72263514995575e-06, "step": 297290 }, { "epoch": 84.38830542151575, "grad_norm": 0.00037122148205526173, "learning_rate": 1.5645188759579903e-05, "loss": 1.0404177010059356e-05, "step": 297300 }, { "epoch": 84.39114391143912, "grad_norm": 0.005395148880779743, "learning_rate": 1.5642350269656545e-05, "loss": 9.83644276857376e-06, "step": 297310 }, { "epoch": 84.39398240136248, "grad_norm": 0.010863850824534893, "learning_rate": 1.5639511779733183e-05, "loss": 1.2180954217910767e-05, "step": 297320 }, { "epoch": 84.39682089128584, "grad_norm": 0.0010317950509488583, "learning_rate": 1.563667328980982e-05, "loss": 6.60773366689682e-06, "step": 297330 }, { "epoch": 84.39965938120919, "grad_norm": 0.004426519386470318, "learning_rate": 1.563383479988646e-05, "loss": 4.968978464603424e-06, "step": 297340 }, { "epoch": 84.40249787113255, "grad_norm": 0.000944076688028872, "learning_rate": 1.56309963099631e-05, "loss": 1.0138191282749175e-05, "step": 297350 }, { "epoch": 84.40533636105592, "grad_norm": 0.0012951570097357035, "learning_rate": 1.562815782003974e-05, "loss": 9.47415828704834e-06, "step": 297360 }, { "epoch": 84.40817485097928, "grad_norm": 0.0012418925762176514, "learning_rate": 1.562531933011638e-05, "loss": 9.948387742042542e-06, "step": 297370 }, { "epoch": 84.41101334090264, "grad_norm": 0.0052511487156152725, "learning_rate": 1.5622480840193018e-05, "loss": 1.1335313320159913e-05, "step": 297380 }, { "epoch": 84.413851830826, "grad_norm": 0.000497135566547513, "learning_rate": 1.5619642350269655e-05, "loss": 1.4016963541507721e-05, "step": 297390 }, { "epoch": 84.41669032074937, "grad_norm": 0.0034904011990875006, "learning_rate": 1.5616803860346297e-05, "loss": 5.606189370155335e-06, "step": 297400 }, { "epoch": 84.41952881067272, "grad_norm": 0.005807061679661274, "learning_rate": 1.5613965370422938e-05, "loss": 6.6764652729034426e-06, "step": 297410 }, { "epoch": 84.42236730059608, "grad_norm": 0.007985814474523067, "learning_rate": 1.5611126880499573e-05, "loss": 1.4367513358592987e-05, "step": 297420 }, { "epoch": 84.42520579051944, "grad_norm": 0.004214612301439047, "learning_rate": 1.5608288390576214e-05, "loss": 1.4642812311649323e-05, "step": 297430 }, { "epoch": 84.4280442804428, "grad_norm": 0.005069981794804335, "learning_rate": 1.5605449900652852e-05, "loss": 1.0677054524421692e-05, "step": 297440 }, { "epoch": 84.43088277036617, "grad_norm": 0.0007170451572164893, "learning_rate": 1.5602611410729494e-05, "loss": 5.404464900493622e-06, "step": 297450 }, { "epoch": 84.43372126028953, "grad_norm": 0.0007700085989199579, "learning_rate": 1.559977292080613e-05, "loss": 7.64504075050354e-06, "step": 297460 }, { "epoch": 84.4365597502129, "grad_norm": 0.003801607759669423, "learning_rate": 1.559693443088277e-05, "loss": 6.3728541135787966e-06, "step": 297470 }, { "epoch": 84.43939824013624, "grad_norm": 0.0015063333557918668, "learning_rate": 1.559409594095941e-05, "loss": 1.162160187959671e-05, "step": 297480 }, { "epoch": 84.4422367300596, "grad_norm": 0.0013251648051664233, "learning_rate": 1.5591257451036052e-05, "loss": 1.0630860924720763e-05, "step": 297490 }, { "epoch": 84.44507521998297, "grad_norm": 0.0012374042998999357, "learning_rate": 1.5588418961112687e-05, "loss": 1.2156553566455841e-05, "step": 297500 }, { "epoch": 84.44507521998297, "eval_accuracy": 0.9879824505627265, "eval_loss": 0.04816132038831711, "eval_runtime": 35.4579, "eval_samples_per_second": 443.541, "eval_steps_per_second": 6.938, "step": 297500 }, { "epoch": 84.44791370990633, "grad_norm": 0.0006887152558192611, "learning_rate": 1.5585580471189328e-05, "loss": 4.633888602256775e-06, "step": 297510 }, { "epoch": 84.4507521998297, "grad_norm": 0.0009183816146105528, "learning_rate": 1.5582741981265966e-05, "loss": 5.524419248104096e-06, "step": 297520 }, { "epoch": 84.45359068975306, "grad_norm": 0.004656611010432243, "learning_rate": 1.5579903491342608e-05, "loss": 1.3697333633899688e-05, "step": 297530 }, { "epoch": 84.4564291796764, "grad_norm": 0.00487687811255455, "learning_rate": 1.5577065001419246e-05, "loss": 5.877576768398285e-06, "step": 297540 }, { "epoch": 84.45926766959977, "grad_norm": 0.0009597833268344402, "learning_rate": 1.5574226511495884e-05, "loss": 4.675425589084625e-06, "step": 297550 }, { "epoch": 84.46210615952313, "grad_norm": 0.0008544981828890741, "learning_rate": 1.5571388021572525e-05, "loss": 8.482486009597778e-06, "step": 297560 }, { "epoch": 84.4649446494465, "grad_norm": 0.0023554174695163965, "learning_rate": 1.5568549531649163e-05, "loss": 8.712522685527802e-06, "step": 297570 }, { "epoch": 84.46778313936986, "grad_norm": 0.0011247387155890465, "learning_rate": 1.5565711041725804e-05, "loss": 7.002800703048706e-06, "step": 297580 }, { "epoch": 84.47062162929322, "grad_norm": 0.00217454438097775, "learning_rate": 1.5562872551802442e-05, "loss": 7.00186938047409e-06, "step": 297590 }, { "epoch": 84.47346011921658, "grad_norm": 0.0005373961757868528, "learning_rate": 1.556003406187908e-05, "loss": 8.49161297082901e-06, "step": 297600 }, { "epoch": 84.47629860913993, "grad_norm": 0.0011169139761477709, "learning_rate": 1.5557195571955722e-05, "loss": 7.202103734016418e-06, "step": 297610 }, { "epoch": 84.4791370990633, "grad_norm": 0.0001478970079915598, "learning_rate": 1.555435708203236e-05, "loss": 7.415562868118286e-06, "step": 297620 }, { "epoch": 84.48197558898666, "grad_norm": 0.0027922866865992546, "learning_rate": 1.5551518592108998e-05, "loss": 5.611404776573181e-06, "step": 297630 }, { "epoch": 84.48481407891002, "grad_norm": 0.0015140717150643468, "learning_rate": 1.554868010218564e-05, "loss": 1.0425597429275512e-05, "step": 297640 }, { "epoch": 84.48765256883338, "grad_norm": 0.0010525648249313235, "learning_rate": 1.5545841612262277e-05, "loss": 8.920766413211823e-06, "step": 297650 }, { "epoch": 84.49049105875675, "grad_norm": 0.000499413930810988, "learning_rate": 1.554300312233892e-05, "loss": 8.892826735973358e-06, "step": 297660 }, { "epoch": 84.49332954868011, "grad_norm": 0.001290220650844276, "learning_rate": 1.5540164632415556e-05, "loss": 1.1480040848255158e-05, "step": 297670 }, { "epoch": 84.49616803860346, "grad_norm": 0.0008645877824164927, "learning_rate": 1.5537326142492194e-05, "loss": 6.400048732757568e-06, "step": 297680 }, { "epoch": 84.49900652852682, "grad_norm": 0.002892036223784089, "learning_rate": 1.5534487652568836e-05, "loss": 9.232573211193084e-06, "step": 297690 }, { "epoch": 84.50184501845018, "grad_norm": 0.0016008494421839714, "learning_rate": 1.5531649162645474e-05, "loss": 6.724335253238678e-06, "step": 297700 }, { "epoch": 84.50468350837355, "grad_norm": 0.011791354976594448, "learning_rate": 1.5528810672722112e-05, "loss": 1.1069886386394501e-05, "step": 297710 }, { "epoch": 84.50752199829691, "grad_norm": 7.605044083902612e-05, "learning_rate": 1.552597218279875e-05, "loss": 7.327087223529815e-06, "step": 297720 }, { "epoch": 84.51036048822027, "grad_norm": 0.0019471889827400446, "learning_rate": 1.552313369287539e-05, "loss": 1.820158213376999e-05, "step": 297730 }, { "epoch": 84.51319897814363, "grad_norm": 0.0026893080212175846, "learning_rate": 1.5520295202952032e-05, "loss": 9.540840983390808e-06, "step": 297740 }, { "epoch": 84.51603746806698, "grad_norm": 0.00348646123893559, "learning_rate": 1.551745671302867e-05, "loss": 9.005144238471986e-06, "step": 297750 }, { "epoch": 84.51887595799035, "grad_norm": 0.008661177940666676, "learning_rate": 1.551461822310531e-05, "loss": 9.304285049438476e-06, "step": 297760 }, { "epoch": 84.52171444791371, "grad_norm": 0.009201383218169212, "learning_rate": 1.5511779733181946e-05, "loss": 1.0964646935462952e-05, "step": 297770 }, { "epoch": 84.52455293783707, "grad_norm": 0.002922243205830455, "learning_rate": 1.5508941243258588e-05, "loss": 8.634105324745178e-06, "step": 297780 }, { "epoch": 84.52739142776043, "grad_norm": 0.027762632817029953, "learning_rate": 1.550610275333523e-05, "loss": 1.5569292008876802e-05, "step": 297790 }, { "epoch": 84.5302299176838, "grad_norm": 0.0020862012170255184, "learning_rate": 1.5503264263411864e-05, "loss": 2.3091770708560942e-05, "step": 297800 }, { "epoch": 84.53306840760715, "grad_norm": 0.001109417760744691, "learning_rate": 1.5500425773488505e-05, "loss": 8.03191214799881e-06, "step": 297810 }, { "epoch": 84.53590689753051, "grad_norm": 0.0009104397613555193, "learning_rate": 1.5497587283565143e-05, "loss": 6.788037717342377e-06, "step": 297820 }, { "epoch": 84.53874538745387, "grad_norm": 0.004115454852581024, "learning_rate": 1.5494748793641785e-05, "loss": 2.2226013243198395e-05, "step": 297830 }, { "epoch": 84.54158387737724, "grad_norm": 0.00508657144382596, "learning_rate": 1.5491910303718422e-05, "loss": 8.885562419891357e-06, "step": 297840 }, { "epoch": 84.5444223673006, "grad_norm": 0.0565277636051178, "learning_rate": 1.548907181379506e-05, "loss": 1.3136491179466248e-05, "step": 297850 }, { "epoch": 84.54726085722396, "grad_norm": 0.0021125376224517822, "learning_rate": 1.5486233323871702e-05, "loss": 7.664971053600312e-06, "step": 297860 }, { "epoch": 84.55009934714732, "grad_norm": 0.0025065173394978046, "learning_rate": 1.548339483394834e-05, "loss": 1.7922185361385345e-05, "step": 297870 }, { "epoch": 84.55293783707067, "grad_norm": 0.00155760592315346, "learning_rate": 1.548055634402498e-05, "loss": 8.430145680904388e-06, "step": 297880 }, { "epoch": 84.55577632699404, "grad_norm": 0.0008554014493711293, "learning_rate": 1.547771785410162e-05, "loss": 1.846347004175186e-05, "step": 297890 }, { "epoch": 84.5586148169174, "grad_norm": 0.0005571526708081365, "learning_rate": 1.5474879364178257e-05, "loss": 7.468834519386292e-06, "step": 297900 }, { "epoch": 84.56145330684076, "grad_norm": 0.0009069183142855763, "learning_rate": 1.54720408742549e-05, "loss": 1.4256685972213745e-05, "step": 297910 }, { "epoch": 84.56429179676412, "grad_norm": 0.0007286157342605293, "learning_rate": 1.5469202384331537e-05, "loss": 6.720423698425293e-06, "step": 297920 }, { "epoch": 84.56713028668749, "grad_norm": 0.0012742701219394803, "learning_rate": 1.5466363894408175e-05, "loss": 9.119510650634766e-06, "step": 297930 }, { "epoch": 84.56996877661085, "grad_norm": 0.0010922059882432222, "learning_rate": 1.5463525404484816e-05, "loss": 6.9551169872283936e-06, "step": 297940 }, { "epoch": 84.5728072665342, "grad_norm": 0.0032159260008484125, "learning_rate": 1.5460686914561454e-05, "loss": 6.5909698605537415e-06, "step": 297950 }, { "epoch": 84.57564575645756, "grad_norm": 0.0029215975664556026, "learning_rate": 1.5457848424638095e-05, "loss": 8.443929255008698e-06, "step": 297960 }, { "epoch": 84.57848424638092, "grad_norm": 0.0005938486428931355, "learning_rate": 1.545500993471473e-05, "loss": 1.1182576417922974e-05, "step": 297970 }, { "epoch": 84.58132273630429, "grad_norm": 0.0014075814979150891, "learning_rate": 1.545217144479137e-05, "loss": 1.1477060616016388e-05, "step": 297980 }, { "epoch": 84.58416122622765, "grad_norm": 0.0031074013095349073, "learning_rate": 1.5449332954868013e-05, "loss": 4.586949944496155e-06, "step": 297990 }, { "epoch": 84.58699971615101, "grad_norm": 0.0010903695365414023, "learning_rate": 1.544649446494465e-05, "loss": 7.3429197072982785e-06, "step": 298000 }, { "epoch": 84.58699971615101, "eval_accuracy": 0.9882367902333566, "eval_loss": 0.04954707995057106, "eval_runtime": 35.0978, "eval_samples_per_second": 448.091, "eval_steps_per_second": 7.009, "step": 298000 }, { "epoch": 84.58983820607436, "grad_norm": 0.005615451838821173, "learning_rate": 1.544365597502129e-05, "loss": 5.899369716644287e-06, "step": 298010 }, { "epoch": 84.59267669599772, "grad_norm": 0.002661788137629628, "learning_rate": 1.5440817485097927e-05, "loss": 8.428655564785004e-06, "step": 298020 }, { "epoch": 84.59551518592109, "grad_norm": 0.0005616086418740451, "learning_rate": 1.5437978995174568e-05, "loss": 1.7408840358257292e-05, "step": 298030 }, { "epoch": 84.59835367584445, "grad_norm": 0.06468115001916885, "learning_rate": 1.543514050525121e-05, "loss": 2.206563949584961e-05, "step": 298040 }, { "epoch": 84.60119216576781, "grad_norm": 0.007940451614558697, "learning_rate": 1.5432302015327847e-05, "loss": 3.999527543783188e-05, "step": 298050 }, { "epoch": 84.60403065569118, "grad_norm": 0.0029166501481086016, "learning_rate": 1.5429463525404485e-05, "loss": 1.637190580368042e-05, "step": 298060 }, { "epoch": 84.60686914561454, "grad_norm": 0.002208015648648143, "learning_rate": 1.5426625035481123e-05, "loss": 2.5387853384017944e-05, "step": 298070 }, { "epoch": 84.60970763553789, "grad_norm": 0.0025775900576263666, "learning_rate": 1.5423786545557765e-05, "loss": 6.643868982791901e-06, "step": 298080 }, { "epoch": 84.61254612546125, "grad_norm": 0.0006132035050541162, "learning_rate": 1.5420948055634406e-05, "loss": 1.2178532779216767e-05, "step": 298090 }, { "epoch": 84.61538461538461, "grad_norm": 0.007516355719417334, "learning_rate": 1.541810956571104e-05, "loss": 1.0653398931026459e-05, "step": 298100 }, { "epoch": 84.61822310530798, "grad_norm": 0.0013878386234864593, "learning_rate": 1.5415271075787682e-05, "loss": 6.352737545967102e-06, "step": 298110 }, { "epoch": 84.62106159523134, "grad_norm": 0.0022870649117976427, "learning_rate": 1.541243258586432e-05, "loss": 1.323707401752472e-05, "step": 298120 }, { "epoch": 84.6239000851547, "grad_norm": 0.01997707597911358, "learning_rate": 1.540959409594096e-05, "loss": 1.6941316425800322e-05, "step": 298130 }, { "epoch": 84.62673857507806, "grad_norm": 0.023243429139256477, "learning_rate": 1.54067556060176e-05, "loss": 1.3803690671920777e-05, "step": 298140 }, { "epoch": 84.62957706500141, "grad_norm": 0.0003950078971683979, "learning_rate": 1.5403917116094237e-05, "loss": 4.9585476517677305e-06, "step": 298150 }, { "epoch": 84.63241555492478, "grad_norm": 0.002660420723259449, "learning_rate": 1.540107862617088e-05, "loss": 6.533227860927582e-06, "step": 298160 }, { "epoch": 84.63525404484814, "grad_norm": 0.001153768040239811, "learning_rate": 1.5398240136247517e-05, "loss": 1.3490207493305206e-05, "step": 298170 }, { "epoch": 84.6380925347715, "grad_norm": 0.0034878321457654238, "learning_rate": 1.5395401646324155e-05, "loss": 5.611591041088104e-06, "step": 298180 }, { "epoch": 84.64093102469486, "grad_norm": 0.001369531499221921, "learning_rate": 1.5392563156400796e-05, "loss": 1.5880353748798372e-05, "step": 298190 }, { "epoch": 84.64376951461823, "grad_norm": 0.005434114020317793, "learning_rate": 1.5389724666477434e-05, "loss": 2.194065600633621e-05, "step": 298200 }, { "epoch": 84.64660800454159, "grad_norm": 0.0009742345428094268, "learning_rate": 1.5386886176554075e-05, "loss": 1.717396080493927e-05, "step": 298210 }, { "epoch": 84.64944649446494, "grad_norm": 0.00226336857303977, "learning_rate": 1.5384047686630713e-05, "loss": 1.0346435010433197e-05, "step": 298220 }, { "epoch": 84.6522849843883, "grad_norm": 0.0018310954328626394, "learning_rate": 1.538120919670735e-05, "loss": 1.6188807785511015e-05, "step": 298230 }, { "epoch": 84.65512347431167, "grad_norm": 0.0004459087795112282, "learning_rate": 1.5378370706783993e-05, "loss": 1.3288483023643493e-05, "step": 298240 }, { "epoch": 84.65796196423503, "grad_norm": 0.0003060325398109853, "learning_rate": 1.537553221686063e-05, "loss": 1.0620616376399993e-05, "step": 298250 }, { "epoch": 84.66080045415839, "grad_norm": 0.0006202297518029809, "learning_rate": 1.5372693726937272e-05, "loss": 6.362609565258026e-06, "step": 298260 }, { "epoch": 84.66363894408175, "grad_norm": 0.00951787643134594, "learning_rate": 1.5369855237013907e-05, "loss": 1.3073720037937164e-05, "step": 298270 }, { "epoch": 84.6664774340051, "grad_norm": 0.0012647812254726887, "learning_rate": 1.5367016747090548e-05, "loss": 7.5425952672958376e-06, "step": 298280 }, { "epoch": 84.66931592392847, "grad_norm": 0.0025935377925634384, "learning_rate": 1.536417825716719e-05, "loss": 1.1570937931537629e-05, "step": 298290 }, { "epoch": 84.67215441385183, "grad_norm": 0.09747909009456635, "learning_rate": 1.5361623616236163e-05, "loss": 0.004780717939138412, "step": 298300 }, { "epoch": 84.67499290377519, "grad_norm": 0.06174127757549286, "learning_rate": 1.535906897530514e-05, "loss": 0.01228630319237709, "step": 298310 }, { "epoch": 84.67783139369855, "grad_norm": 1.3738412857055664, "learning_rate": 1.5356230485381777e-05, "loss": 0.0030775683000683783, "step": 298320 }, { "epoch": 84.68066988362192, "grad_norm": 0.014125457964837551, "learning_rate": 1.5353391995458415e-05, "loss": 0.0005839986726641655, "step": 298330 }, { "epoch": 84.68350837354528, "grad_norm": 1.9986090660095215, "learning_rate": 1.5350553505535057e-05, "loss": 0.0024611072614789007, "step": 298340 }, { "epoch": 84.68634686346863, "grad_norm": 10.038759231567383, "learning_rate": 1.5347715015611698e-05, "loss": 0.015460164844989776, "step": 298350 }, { "epoch": 84.68918535339199, "grad_norm": 0.15466244518756866, "learning_rate": 1.5344876525688333e-05, "loss": 0.009831634908914566, "step": 298360 }, { "epoch": 84.69202384331535, "grad_norm": 0.10974597185850143, "learning_rate": 1.5342038035764974e-05, "loss": 0.0011588877066969871, "step": 298370 }, { "epoch": 84.69486233323872, "grad_norm": 0.052033085376024246, "learning_rate": 1.5339199545841612e-05, "loss": 0.0003016939386725426, "step": 298380 }, { "epoch": 84.69770082316208, "grad_norm": 11.390109062194824, "learning_rate": 1.5336361055918253e-05, "loss": 0.0018479768186807632, "step": 298390 }, { "epoch": 84.70053931308544, "grad_norm": 0.0023048855364322662, "learning_rate": 1.533352256599489e-05, "loss": 2.3250281810760497e-05, "step": 298400 }, { "epoch": 84.7033778030088, "grad_norm": 0.004723448306322098, "learning_rate": 1.533068407607153e-05, "loss": 0.0013062762096524238, "step": 298410 }, { "epoch": 84.70621629293215, "grad_norm": 0.0031594340689480305, "learning_rate": 1.532784558614817e-05, "loss": 3.229118883609772e-05, "step": 298420 }, { "epoch": 84.70905478285552, "grad_norm": 0.021683232858777046, "learning_rate": 1.532500709622481e-05, "loss": 2.3403391242027283e-05, "step": 298430 }, { "epoch": 84.71189327277888, "grad_norm": 0.11814727634191513, "learning_rate": 1.532216860630145e-05, "loss": 0.0002623690292239189, "step": 298440 }, { "epoch": 84.71473176270224, "grad_norm": 0.005866045597940683, "learning_rate": 1.5319330116378088e-05, "loss": 0.0018256619572639466, "step": 298450 }, { "epoch": 84.7175702526256, "grad_norm": 0.15345405042171478, "learning_rate": 1.5316491626454726e-05, "loss": 0.00014166682958602906, "step": 298460 }, { "epoch": 84.72040874254897, "grad_norm": 0.006324779707938433, "learning_rate": 1.5313653136531367e-05, "loss": 0.0016846818849444388, "step": 298470 }, { "epoch": 84.72324723247232, "grad_norm": 0.0022856774739921093, "learning_rate": 1.5310814646608005e-05, "loss": 0.00013142656534910203, "step": 298480 }, { "epoch": 84.72608572239568, "grad_norm": 0.0295255109667778, "learning_rate": 1.5307976156684643e-05, "loss": 0.0027690542861819266, "step": 298490 }, { "epoch": 84.72892421231904, "grad_norm": 0.00340871955268085, "learning_rate": 1.5305137666761285e-05, "loss": 0.0004908090457320213, "step": 298500 }, { "epoch": 84.72892421231904, "eval_accuracy": 0.9862020728683156, "eval_loss": 0.057803984731435776, "eval_runtime": 35.5277, "eval_samples_per_second": 442.669, "eval_steps_per_second": 6.924, "step": 298500 }, { "epoch": 84.7317627022424, "grad_norm": 0.013819664716720581, "learning_rate": 1.5302299176837923e-05, "loss": 2.2887438535690308e-05, "step": 298510 }, { "epoch": 84.73460119216577, "grad_norm": 0.007290676701813936, "learning_rate": 1.5299460686914564e-05, "loss": 4.566553980112076e-05, "step": 298520 }, { "epoch": 84.73743968208913, "grad_norm": 0.7931046485900879, "learning_rate": 1.5296622196991202e-05, "loss": 0.00013426616787910463, "step": 298530 }, { "epoch": 84.7402781720125, "grad_norm": 0.0008655238780193031, "learning_rate": 1.529378370706784e-05, "loss": 2.4191476404666902e-05, "step": 298540 }, { "epoch": 84.74311666193584, "grad_norm": 0.004514685831964016, "learning_rate": 1.529094521714448e-05, "loss": 4.7176145017147064e-05, "step": 298550 }, { "epoch": 84.7459551518592, "grad_norm": 0.036651257425546646, "learning_rate": 1.528810672722112e-05, "loss": 2.47882679104805e-05, "step": 298560 }, { "epoch": 84.74879364178257, "grad_norm": 0.0022181300446391106, "learning_rate": 1.5285268237297757e-05, "loss": 1.4065206050872802e-05, "step": 298570 }, { "epoch": 84.75163213170593, "grad_norm": 0.0039363764226436615, "learning_rate": 1.5282429747374395e-05, "loss": 5.167350172996521e-05, "step": 298580 }, { "epoch": 84.7544706216293, "grad_norm": 0.0070470236241817474, "learning_rate": 1.5279591257451037e-05, "loss": 1.6856752336025237e-05, "step": 298590 }, { "epoch": 84.75730911155266, "grad_norm": 0.007096347399055958, "learning_rate": 1.5276752767527678e-05, "loss": 1.878701150417328e-05, "step": 298600 }, { "epoch": 84.76014760147602, "grad_norm": 0.02107582800090313, "learning_rate": 1.5273914277604316e-05, "loss": 2.6450119912624358e-05, "step": 298610 }, { "epoch": 84.76298609139937, "grad_norm": 0.0027683423832058907, "learning_rate": 1.5271075787680954e-05, "loss": 0.0001578608527779579, "step": 298620 }, { "epoch": 84.76582458132273, "grad_norm": 0.00983440037816763, "learning_rate": 1.5268237297757592e-05, "loss": 1.6914121806621553e-05, "step": 298630 }, { "epoch": 84.7686630712461, "grad_norm": 0.0025784801691770554, "learning_rate": 1.5265398807834233e-05, "loss": 2.0756758749485017e-05, "step": 298640 }, { "epoch": 84.77150156116946, "grad_norm": 0.005300150718539953, "learning_rate": 1.5262560317910875e-05, "loss": 5.165301263332367e-05, "step": 298650 }, { "epoch": 84.77434005109282, "grad_norm": 0.00502567645162344, "learning_rate": 1.525972182798751e-05, "loss": 0.00012381616979837418, "step": 298660 }, { "epoch": 84.77717854101618, "grad_norm": 0.007952754385769367, "learning_rate": 1.525688333806415e-05, "loss": 1.3526715338230134e-05, "step": 298670 }, { "epoch": 84.78001703093955, "grad_norm": 0.0006153033464215696, "learning_rate": 1.5254044848140789e-05, "loss": 1.086946576833725e-05, "step": 298680 }, { "epoch": 84.7828555208629, "grad_norm": 0.0017238992732018232, "learning_rate": 1.5251206358217428e-05, "loss": 1.1301040649414062e-05, "step": 298690 }, { "epoch": 84.78569401078626, "grad_norm": 0.005842219572514296, "learning_rate": 1.524836786829407e-05, "loss": 1.7522834241390228e-05, "step": 298700 }, { "epoch": 84.78853250070962, "grad_norm": 0.07660634815692902, "learning_rate": 1.5245529378370708e-05, "loss": 2.1869130432605743e-05, "step": 298710 }, { "epoch": 84.79137099063298, "grad_norm": 0.0026035169139504433, "learning_rate": 1.5242690888447347e-05, "loss": 1.8144957721233368e-05, "step": 298720 }, { "epoch": 84.79420948055635, "grad_norm": 0.006540433503687382, "learning_rate": 1.5239852398523985e-05, "loss": 2.596583217382431e-05, "step": 298730 }, { "epoch": 84.79704797047971, "grad_norm": 0.00122826115693897, "learning_rate": 1.5237013908600625e-05, "loss": 3.367066383361816e-05, "step": 298740 }, { "epoch": 84.79988646040306, "grad_norm": 0.001971903722733259, "learning_rate": 1.5234175418677265e-05, "loss": 2.6895292103290557e-05, "step": 298750 }, { "epoch": 84.80272495032642, "grad_norm": 0.0035534370690584183, "learning_rate": 1.5231336928753903e-05, "loss": 1.1272542178630828e-05, "step": 298760 }, { "epoch": 84.80556344024978, "grad_norm": 0.0021511937957257032, "learning_rate": 1.5228498438830544e-05, "loss": 2.4106912314891814e-05, "step": 298770 }, { "epoch": 84.80840193017315, "grad_norm": 0.005382162053138018, "learning_rate": 1.522565994890718e-05, "loss": 2.779476344585419e-05, "step": 298780 }, { "epoch": 84.81124042009651, "grad_norm": 0.0015559201128780842, "learning_rate": 1.5222821458983822e-05, "loss": 1.1160597205162049e-05, "step": 298790 }, { "epoch": 84.81407891001987, "grad_norm": 0.01669933833181858, "learning_rate": 1.5219982969060462e-05, "loss": 2.3035705089569093e-05, "step": 298800 }, { "epoch": 84.81691739994324, "grad_norm": 0.005778059363365173, "learning_rate": 1.52171444791371e-05, "loss": 2.253018319606781e-05, "step": 298810 }, { "epoch": 84.81975588986658, "grad_norm": 0.000768528669141233, "learning_rate": 1.521430598921374e-05, "loss": 1.3735331594944001e-05, "step": 298820 }, { "epoch": 84.82259437978995, "grad_norm": 0.002242040354758501, "learning_rate": 1.5211467499290377e-05, "loss": 1.3947486877441406e-05, "step": 298830 }, { "epoch": 84.82543286971331, "grad_norm": 0.0008090545888990164, "learning_rate": 1.5208629009367017e-05, "loss": 2.2457726299762725e-05, "step": 298840 }, { "epoch": 84.82827135963667, "grad_norm": 0.007606651168316603, "learning_rate": 1.5205790519443658e-05, "loss": 1.1607632040977478e-05, "step": 298850 }, { "epoch": 84.83110984956004, "grad_norm": 0.00042197966831736267, "learning_rate": 1.5202952029520296e-05, "loss": 2.66319140791893e-05, "step": 298860 }, { "epoch": 84.8339483394834, "grad_norm": 0.0015539121814072132, "learning_rate": 1.5200113539596936e-05, "loss": 7.149204611778259e-06, "step": 298870 }, { "epoch": 84.83678682940676, "grad_norm": 0.01796121895313263, "learning_rate": 1.5197275049673574e-05, "loss": 2.1702051162719728e-05, "step": 298880 }, { "epoch": 84.83962531933011, "grad_norm": 0.0004423937061801553, "learning_rate": 1.5194436559750214e-05, "loss": 2.4349056184291838e-05, "step": 298890 }, { "epoch": 84.84246380925347, "grad_norm": 0.008687634952366352, "learning_rate": 1.5191598069826853e-05, "loss": 3.86090949177742e-05, "step": 298900 }, { "epoch": 84.84530229917684, "grad_norm": 0.00339972460642457, "learning_rate": 1.5188759579903491e-05, "loss": 1.1651217937469482e-05, "step": 298910 }, { "epoch": 84.8481407891002, "grad_norm": 0.00431984756141901, "learning_rate": 1.5185921089980133e-05, "loss": 2.4477019906044006e-05, "step": 298920 }, { "epoch": 84.85097927902356, "grad_norm": 0.002026093192398548, "learning_rate": 1.5183082600056769e-05, "loss": 8.360110223293305e-06, "step": 298930 }, { "epoch": 84.85381776894693, "grad_norm": 0.005361465271562338, "learning_rate": 1.518024411013341e-05, "loss": 1.4828704297542573e-05, "step": 298940 }, { "epoch": 84.85665625887029, "grad_norm": 0.0026341283228248358, "learning_rate": 1.517740562021005e-05, "loss": 2.6207789778709413e-05, "step": 298950 }, { "epoch": 84.85949474879364, "grad_norm": 0.0027539620641618967, "learning_rate": 1.5174567130286688e-05, "loss": 9.57529991865158e-06, "step": 298960 }, { "epoch": 84.862333238717, "grad_norm": 0.00129380589351058, "learning_rate": 1.5171728640363328e-05, "loss": 2.0567886531352995e-05, "step": 298970 }, { "epoch": 84.86517172864036, "grad_norm": 0.04992048814892769, "learning_rate": 1.5168890150439966e-05, "loss": 9.606517851352691e-05, "step": 298980 }, { "epoch": 84.86801021856373, "grad_norm": 0.0018856143578886986, "learning_rate": 1.5166051660516605e-05, "loss": 1.6255490481853486e-05, "step": 298990 }, { "epoch": 84.87084870848709, "grad_norm": 7.801924228668213, "learning_rate": 1.5163213170593247e-05, "loss": 0.0006799301132559777, "step": 299000 }, { "epoch": 84.87084870848709, "eval_accuracy": 0.9844216951739048, "eval_loss": 0.0668996274471283, "eval_runtime": 35.553, "eval_samples_per_second": 442.354, "eval_steps_per_second": 6.919, "step": 299000 }, { "epoch": 84.87368719841045, "grad_norm": 0.007364349439740181, "learning_rate": 1.5160374680669883e-05, "loss": 6.175804883241653e-05, "step": 299010 }, { "epoch": 84.8765256883338, "grad_norm": 0.0383792519569397, "learning_rate": 1.5157536190746524e-05, "loss": 0.001674778386950493, "step": 299020 }, { "epoch": 84.87936417825716, "grad_norm": 0.0023756942246109247, "learning_rate": 1.5154697700823162e-05, "loss": 2.4996884167194367e-05, "step": 299030 }, { "epoch": 84.88220266818053, "grad_norm": 0.02224608324468136, "learning_rate": 1.5151859210899802e-05, "loss": 3.7903152406215665e-05, "step": 299040 }, { "epoch": 84.88504115810389, "grad_norm": 0.0036684044171124697, "learning_rate": 1.5149020720976442e-05, "loss": 3.175772726535797e-05, "step": 299050 }, { "epoch": 84.88787964802725, "grad_norm": 0.0015472398372367024, "learning_rate": 1.514618223105308e-05, "loss": 0.0004180353134870529, "step": 299060 }, { "epoch": 84.89071813795061, "grad_norm": 0.000894127122592181, "learning_rate": 1.514334374112972e-05, "loss": 1.8193013966083525e-05, "step": 299070 }, { "epoch": 84.89355662787398, "grad_norm": 0.014794677495956421, "learning_rate": 1.5140505251206357e-05, "loss": 0.00013004299253225327, "step": 299080 }, { "epoch": 84.89639511779733, "grad_norm": 0.0885261744260788, "learning_rate": 1.5137666761282999e-05, "loss": 4.5934319496154784e-05, "step": 299090 }, { "epoch": 84.89923360772069, "grad_norm": 0.015267761424183846, "learning_rate": 1.5134828271359638e-05, "loss": 5.6712888181209564e-05, "step": 299100 }, { "epoch": 84.90207209764405, "grad_norm": 1.8656456470489502, "learning_rate": 1.5131989781436276e-05, "loss": 0.0003273526206612587, "step": 299110 }, { "epoch": 84.90491058756741, "grad_norm": 0.015365434810519218, "learning_rate": 1.5129151291512916e-05, "loss": 3.4798868000507355e-05, "step": 299120 }, { "epoch": 84.90774907749078, "grad_norm": 0.03872370719909668, "learning_rate": 1.5126312801589554e-05, "loss": 2.897493541240692e-05, "step": 299130 }, { "epoch": 84.91058756741414, "grad_norm": 2.9033596515655518, "learning_rate": 1.5123474311666194e-05, "loss": 0.0005995769053697586, "step": 299140 }, { "epoch": 84.9134260573375, "grad_norm": 0.027621405199170113, "learning_rate": 1.5120635821742835e-05, "loss": 0.000767781212925911, "step": 299150 }, { "epoch": 84.91626454726085, "grad_norm": 0.015958745032548904, "learning_rate": 1.5117797331819471e-05, "loss": 1.2736767530441284e-05, "step": 299160 }, { "epoch": 84.91910303718421, "grad_norm": 0.0007947884150780737, "learning_rate": 1.5114958841896113e-05, "loss": 4.5746751129627225e-05, "step": 299170 }, { "epoch": 84.92194152710758, "grad_norm": 0.01892351172864437, "learning_rate": 1.511212035197275e-05, "loss": 1.993030309677124e-05, "step": 299180 }, { "epoch": 84.92478001703094, "grad_norm": 0.04719897732138634, "learning_rate": 1.510928186204939e-05, "loss": 0.0003582587465643883, "step": 299190 }, { "epoch": 84.9276185069543, "grad_norm": 0.0017872668104246259, "learning_rate": 1.510644337212603e-05, "loss": 0.00019369609653949738, "step": 299200 }, { "epoch": 84.93045699687767, "grad_norm": 0.011379147879779339, "learning_rate": 1.5103604882202668e-05, "loss": 2.691708505153656e-05, "step": 299210 }, { "epoch": 84.93329548680101, "grad_norm": 0.000433048844570294, "learning_rate": 1.5100766392279308e-05, "loss": 4.967991262674332e-05, "step": 299220 }, { "epoch": 84.93613397672438, "grad_norm": 0.0023105612490326166, "learning_rate": 1.5097927902355946e-05, "loss": 2.9863789677619934e-05, "step": 299230 }, { "epoch": 84.93897246664774, "grad_norm": 0.010946430265903473, "learning_rate": 1.5095089412432587e-05, "loss": 2.763010561466217e-05, "step": 299240 }, { "epoch": 84.9418109565711, "grad_norm": 0.01235453225672245, "learning_rate": 1.5092250922509227e-05, "loss": 1.8925778567790986e-05, "step": 299250 }, { "epoch": 84.94464944649447, "grad_norm": 0.0050483690574765205, "learning_rate": 1.5089412432585865e-05, "loss": 3.3344700932502745e-05, "step": 299260 }, { "epoch": 84.94748793641783, "grad_norm": 0.004505231976509094, "learning_rate": 1.5086573942662505e-05, "loss": 7.28921964764595e-05, "step": 299270 }, { "epoch": 84.95032642634119, "grad_norm": 0.029846809804439545, "learning_rate": 1.5083735452739143e-05, "loss": 1.7566420137882232e-05, "step": 299280 }, { "epoch": 84.95316491626454, "grad_norm": 0.0016220586840063334, "learning_rate": 1.5080896962815782e-05, "loss": 0.0002291133627295494, "step": 299290 }, { "epoch": 84.9560034061879, "grad_norm": 0.001700792694464326, "learning_rate": 1.5078058472892424e-05, "loss": 0.0009065210819244385, "step": 299300 }, { "epoch": 84.95884189611127, "grad_norm": 0.007666515186429024, "learning_rate": 1.507521998296906e-05, "loss": 2.0329467952251433e-05, "step": 299310 }, { "epoch": 84.96168038603463, "grad_norm": 0.010727480053901672, "learning_rate": 1.5072381493045701e-05, "loss": 3.1988881528377536e-05, "step": 299320 }, { "epoch": 84.96451887595799, "grad_norm": 0.0034788998309522867, "learning_rate": 1.5069543003122338e-05, "loss": 3.752708435058594e-05, "step": 299330 }, { "epoch": 84.96735736588136, "grad_norm": 0.0008996770484372973, "learning_rate": 1.5066704513198979e-05, "loss": 0.0007191356271505356, "step": 299340 }, { "epoch": 84.97019585580472, "grad_norm": 0.0028221679385751486, "learning_rate": 1.5063866023275619e-05, "loss": 0.0004061140120029449, "step": 299350 }, { "epoch": 84.97303434572807, "grad_norm": 0.005184968933463097, "learning_rate": 1.5061027533352257e-05, "loss": 4.132948815822601e-05, "step": 299360 }, { "epoch": 84.97587283565143, "grad_norm": 0.006065585650503635, "learning_rate": 1.5058189043428896e-05, "loss": 2.7185864746570586e-05, "step": 299370 }, { "epoch": 84.97871132557479, "grad_norm": 0.21962665021419525, "learning_rate": 1.5055350553505534e-05, "loss": 5.3553469479084014e-05, "step": 299380 }, { "epoch": 84.98154981549816, "grad_norm": 0.016188163310289383, "learning_rate": 1.5052512063582176e-05, "loss": 3.2170861959457395e-05, "step": 299390 }, { "epoch": 84.98438830542152, "grad_norm": 0.0016640799585729837, "learning_rate": 1.5049673573658815e-05, "loss": 3.446862101554871e-05, "step": 299400 }, { "epoch": 84.98722679534488, "grad_norm": 0.00398508133366704, "learning_rate": 1.5046835083735453e-05, "loss": 0.00012576766312122345, "step": 299410 }, { "epoch": 84.99006528526824, "grad_norm": 0.009161974303424358, "learning_rate": 1.5043996593812093e-05, "loss": 0.00010252352803945542, "step": 299420 }, { "epoch": 84.99290377519159, "grad_norm": 0.0012492031091824174, "learning_rate": 1.5041158103888731e-05, "loss": 5.962550640106201e-05, "step": 299430 }, { "epoch": 84.99574226511496, "grad_norm": 0.0018534373957663774, "learning_rate": 1.503831961396537e-05, "loss": 2.7726590633392335e-05, "step": 299440 }, { "epoch": 84.99858075503832, "grad_norm": 0.0020007388666272163, "learning_rate": 1.5035481124042012e-05, "loss": 1.0395608842372895e-05, "step": 299450 }, { "epoch": 85.00141924496168, "grad_norm": 0.0003292145556770265, "learning_rate": 1.5032642634118648e-05, "loss": 2.4911965010687708e-05, "step": 299460 }, { "epoch": 85.00425773488504, "grad_norm": 0.023989010602235794, "learning_rate": 1.502980414419529e-05, "loss": 2.9748305678367615e-05, "step": 299470 }, { "epoch": 85.00709622480841, "grad_norm": 0.004437328316271305, "learning_rate": 1.5026965654271926e-05, "loss": 3.630947321653366e-05, "step": 299480 }, { "epoch": 85.00993471473176, "grad_norm": 0.0017402659868821502, "learning_rate": 1.5024127164348567e-05, "loss": 3.4396909177303316e-05, "step": 299490 }, { "epoch": 85.01277320465512, "grad_norm": 0.00837850384414196, "learning_rate": 1.5021288674425207e-05, "loss": 1.4759600162506104e-05, "step": 299500 }, { "epoch": 85.01277320465512, "eval_accuracy": 0.9875373561391237, "eval_loss": 0.05436692014336586, "eval_runtime": 34.7158, "eval_samples_per_second": 453.022, "eval_steps_per_second": 7.086, "step": 299500 }, { "epoch": 85.01561169457848, "grad_norm": 0.002210768638178706, "learning_rate": 1.5018450184501845e-05, "loss": 2.760719507932663e-05, "step": 299510 }, { "epoch": 85.01845018450184, "grad_norm": 0.0011787827825173736, "learning_rate": 1.5015611694578485e-05, "loss": 1.3490021228790283e-05, "step": 299520 }, { "epoch": 85.02128867442521, "grad_norm": 0.0034491540864109993, "learning_rate": 1.5012773204655123e-05, "loss": 1.0836496949195862e-05, "step": 299530 }, { "epoch": 85.02412716434857, "grad_norm": 0.0015752234030514956, "learning_rate": 1.5009934714731762e-05, "loss": 9.433552622795106e-06, "step": 299540 }, { "epoch": 85.02696565427193, "grad_norm": 0.0015570876421406865, "learning_rate": 1.5007096224808404e-05, "loss": 6.573330610990525e-05, "step": 299550 }, { "epoch": 85.02980414419528, "grad_norm": 0.0007304187165573239, "learning_rate": 1.5004257734885042e-05, "loss": 1.1277012526988983e-05, "step": 299560 }, { "epoch": 85.03264263411864, "grad_norm": 0.0011651889653876424, "learning_rate": 1.5001419244961681e-05, "loss": 3.1280703842639924e-05, "step": 299570 }, { "epoch": 85.03548112404201, "grad_norm": 0.0033058912958949804, "learning_rate": 1.4998580755038321e-05, "loss": 3.306958824396133e-05, "step": 299580 }, { "epoch": 85.03831961396537, "grad_norm": 0.0036554031539708376, "learning_rate": 1.4995742265114959e-05, "loss": 1.2850761413574218e-05, "step": 299590 }, { "epoch": 85.04115810388873, "grad_norm": 0.0026921352837234735, "learning_rate": 1.49929037751916e-05, "loss": 2.247728407382965e-05, "step": 299600 }, { "epoch": 85.0439965938121, "grad_norm": 0.004416611976921558, "learning_rate": 1.4990065285268237e-05, "loss": 1.107286661863327e-05, "step": 299610 }, { "epoch": 85.04683508373546, "grad_norm": 0.00925668515264988, "learning_rate": 1.4987226795344878e-05, "loss": 1.0279938578605652e-05, "step": 299620 }, { "epoch": 85.04967357365881, "grad_norm": 0.019521955400705338, "learning_rate": 1.4984388305421518e-05, "loss": 1.4549307525157929e-05, "step": 299630 }, { "epoch": 85.05251206358217, "grad_norm": 0.0007600717945024371, "learning_rate": 1.4981549815498156e-05, "loss": 1.7312169075012206e-05, "step": 299640 }, { "epoch": 85.05535055350553, "grad_norm": 0.005162395536899567, "learning_rate": 1.4978711325574795e-05, "loss": 1.4993175864219665e-05, "step": 299650 }, { "epoch": 85.0581890434289, "grad_norm": 0.00784965418279171, "learning_rate": 1.4975872835651433e-05, "loss": 1.937001943588257e-05, "step": 299660 }, { "epoch": 85.06102753335226, "grad_norm": 0.035395391285419464, "learning_rate": 1.4973034345728073e-05, "loss": 1.4669820666313172e-05, "step": 299670 }, { "epoch": 85.06386602327562, "grad_norm": 0.0073347329162061214, "learning_rate": 1.4970195855804714e-05, "loss": 8.37612897157669e-06, "step": 299680 }, { "epoch": 85.06670451319899, "grad_norm": 0.0024283495731651783, "learning_rate": 1.496735736588135e-05, "loss": 5.140155553817749e-06, "step": 299690 }, { "epoch": 85.06954300312233, "grad_norm": 0.03839540854096413, "learning_rate": 1.4964518875957992e-05, "loss": 2.4731270968914032e-05, "step": 299700 }, { "epoch": 85.0723814930457, "grad_norm": 0.24537308514118195, "learning_rate": 1.496168038603463e-05, "loss": 4.990175366401672e-05, "step": 299710 }, { "epoch": 85.07521998296906, "grad_norm": 0.006240055896341801, "learning_rate": 1.495884189611127e-05, "loss": 8.10232013463974e-06, "step": 299720 }, { "epoch": 85.07805847289242, "grad_norm": 0.0055122352205216885, "learning_rate": 1.495600340618791e-05, "loss": 1.8834322690963746e-05, "step": 299730 }, { "epoch": 85.08089696281579, "grad_norm": 0.0007708482444286346, "learning_rate": 1.4953164916264547e-05, "loss": 1.0142289102077485e-05, "step": 299740 }, { "epoch": 85.08373545273915, "grad_norm": 0.0025491018313914537, "learning_rate": 1.4950326426341187e-05, "loss": 1.3161450624465942e-05, "step": 299750 }, { "epoch": 85.0865739426625, "grad_norm": 0.0020815199241042137, "learning_rate": 1.4947487936417825e-05, "loss": 1.435261219739914e-05, "step": 299760 }, { "epoch": 85.08941243258586, "grad_norm": 0.06558181345462799, "learning_rate": 1.4944649446494467e-05, "loss": 2.5013647973537446e-05, "step": 299770 }, { "epoch": 85.09225092250922, "grad_norm": 0.00211769575253129, "learning_rate": 1.4941810956571106e-05, "loss": 1.372154802083969e-05, "step": 299780 }, { "epoch": 85.09508941243259, "grad_norm": 0.0050376481376588345, "learning_rate": 1.4938972466647744e-05, "loss": 1.046936959028244e-05, "step": 299790 }, { "epoch": 85.09792790235595, "grad_norm": 0.0005490163457579911, "learning_rate": 1.4936133976724384e-05, "loss": 1.7134472727775574e-05, "step": 299800 }, { "epoch": 85.10076639227931, "grad_norm": 0.003326868172734976, "learning_rate": 1.4933295486801022e-05, "loss": 1.2579560279846192e-05, "step": 299810 }, { "epoch": 85.10360488220267, "grad_norm": 3.374765157699585, "learning_rate": 1.4930456996877662e-05, "loss": 0.00028601083904504775, "step": 299820 }, { "epoch": 85.10644337212602, "grad_norm": 0.008267778903245926, "learning_rate": 1.4927618506954303e-05, "loss": 1.451876014471054e-05, "step": 299830 }, { "epoch": 85.10928186204939, "grad_norm": 0.022381268441677094, "learning_rate": 1.492478001703094e-05, "loss": 0.00010055508464574814, "step": 299840 }, { "epoch": 85.11212035197275, "grad_norm": 0.009516053833067417, "learning_rate": 1.492194152710758e-05, "loss": 9.207166731357574e-05, "step": 299850 }, { "epoch": 85.11495884189611, "grad_norm": 0.004624743480235338, "learning_rate": 1.4919103037184219e-05, "loss": 8.725374937057494e-06, "step": 299860 }, { "epoch": 85.11779733181947, "grad_norm": 0.0016849254025146365, "learning_rate": 1.4916264547260858e-05, "loss": 2.0492449402809142e-05, "step": 299870 }, { "epoch": 85.12063582174284, "grad_norm": 0.007309349719434977, "learning_rate": 1.4913426057337498e-05, "loss": 1.7521902918815613e-05, "step": 299880 }, { "epoch": 85.1234743116662, "grad_norm": 0.0006356919766403735, "learning_rate": 1.4910587567414136e-05, "loss": 1.210775226354599e-05, "step": 299890 }, { "epoch": 85.12631280158955, "grad_norm": 0.002924201777204871, "learning_rate": 1.4907749077490776e-05, "loss": 5.3826719522476196e-05, "step": 299900 }, { "epoch": 85.12915129151291, "grad_norm": 0.0013019309844821692, "learning_rate": 1.4904910587567414e-05, "loss": 1.206323504447937e-05, "step": 299910 }, { "epoch": 85.13198978143627, "grad_norm": 0.010742745362222195, "learning_rate": 1.4902072097644055e-05, "loss": 1.9592978060245513e-05, "step": 299920 }, { "epoch": 85.13482827135964, "grad_norm": 0.0011589952046051621, "learning_rate": 1.4899233607720695e-05, "loss": 2.2152438759803772e-05, "step": 299930 }, { "epoch": 85.137666761283, "grad_norm": 0.0005338394548743963, "learning_rate": 1.4896395117797333e-05, "loss": 6.989389657974243e-06, "step": 299940 }, { "epoch": 85.14050525120636, "grad_norm": 0.00114993576426059, "learning_rate": 1.4893556627873972e-05, "loss": 1.0695494711399079e-05, "step": 299950 }, { "epoch": 85.14334374112971, "grad_norm": 0.002012701006606221, "learning_rate": 1.489071813795061e-05, "loss": 5.3675100207328794e-05, "step": 299960 }, { "epoch": 85.14618223105307, "grad_norm": 0.000924545805901289, "learning_rate": 1.488787964802725e-05, "loss": 1.839790493249893e-05, "step": 299970 }, { "epoch": 85.14902072097644, "grad_norm": 0.0016449469840154052, "learning_rate": 1.4885041158103891e-05, "loss": 1.2276135385036468e-05, "step": 299980 }, { "epoch": 85.1518592108998, "grad_norm": 0.000783274881541729, "learning_rate": 1.4882202668180528e-05, "loss": 1.0875053703784943e-05, "step": 299990 }, { "epoch": 85.15469770082316, "grad_norm": 0.0045073069632053375, "learning_rate": 1.4879364178257169e-05, "loss": 1.856144517660141e-05, "step": 300000 }, { "epoch": 85.15469770082316, "eval_accuracy": 0.9881096203980416, "eval_loss": 0.05165728181600571, "eval_runtime": 35.6758, "eval_samples_per_second": 440.831, "eval_steps_per_second": 6.895, "step": 300000 }, { "epoch": 85.15753619074653, "grad_norm": 0.001428276882506907, "learning_rate": 1.4876525688333805e-05, "loss": 1.217219978570938e-05, "step": 300010 }, { "epoch": 85.16037468066989, "grad_norm": 0.005811686161905527, "learning_rate": 1.4873687198410447e-05, "loss": 1.8966943025588988e-05, "step": 300020 }, { "epoch": 85.16321317059324, "grad_norm": 0.006700577214360237, "learning_rate": 1.4870848708487086e-05, "loss": 2.5895051658153533e-05, "step": 300030 }, { "epoch": 85.1660516605166, "grad_norm": 0.6791988611221313, "learning_rate": 1.4868010218563724e-05, "loss": 6.71612098813057e-05, "step": 300040 }, { "epoch": 85.16889015043996, "grad_norm": 0.007356118410825729, "learning_rate": 1.4865171728640364e-05, "loss": 1.835636794567108e-05, "step": 300050 }, { "epoch": 85.17172864036333, "grad_norm": 0.0008623894536867738, "learning_rate": 1.4862333238717002e-05, "loss": 1.771543174982071e-05, "step": 300060 }, { "epoch": 85.17456713028669, "grad_norm": 0.002715854439884424, "learning_rate": 1.4859494748793643e-05, "loss": 4.569962620735168e-05, "step": 300070 }, { "epoch": 85.17740562021005, "grad_norm": 0.011267460882663727, "learning_rate": 1.4856656258870283e-05, "loss": 4.315841943025589e-05, "step": 300080 }, { "epoch": 85.18024411013342, "grad_norm": 0.002653700765222311, "learning_rate": 1.4853817768946921e-05, "loss": 2.2336095571517945e-05, "step": 300090 }, { "epoch": 85.18308260005676, "grad_norm": 0.004294405225664377, "learning_rate": 1.485097927902356e-05, "loss": 1.3775192201137543e-05, "step": 300100 }, { "epoch": 85.18592108998013, "grad_norm": 0.002863667905330658, "learning_rate": 1.4848140789100199e-05, "loss": 7.604248821735382e-06, "step": 300110 }, { "epoch": 85.18875957990349, "grad_norm": 0.0024527055211365223, "learning_rate": 1.4845302299176838e-05, "loss": 1.0309368371963501e-05, "step": 300120 }, { "epoch": 85.19159806982685, "grad_norm": 0.003278384916484356, "learning_rate": 1.484246380925348e-05, "loss": 1.6235001385211945e-05, "step": 300130 }, { "epoch": 85.19443655975022, "grad_norm": 0.0034741382114589214, "learning_rate": 1.4839625319330116e-05, "loss": 9.110011160373687e-06, "step": 300140 }, { "epoch": 85.19727504967358, "grad_norm": 0.003483100328594446, "learning_rate": 1.4836786829406757e-05, "loss": 1.190192997455597e-05, "step": 300150 }, { "epoch": 85.20011353959694, "grad_norm": 0.0009040862205438316, "learning_rate": 1.4833948339483394e-05, "loss": 1.431647688150406e-05, "step": 300160 }, { "epoch": 85.20295202952029, "grad_norm": 0.0031168966088443995, "learning_rate": 1.4831109849560035e-05, "loss": 8.90977680683136e-06, "step": 300170 }, { "epoch": 85.20579051944365, "grad_norm": 0.005030802451074123, "learning_rate": 1.4828271359636675e-05, "loss": 7.580779492855072e-06, "step": 300180 }, { "epoch": 85.20862900936702, "grad_norm": 0.0012462096055969596, "learning_rate": 1.4825432869713313e-05, "loss": 1.005474478006363e-05, "step": 300190 }, { "epoch": 85.21146749929038, "grad_norm": 0.0008022186812013388, "learning_rate": 1.4822594379789952e-05, "loss": 7.10468739271164e-06, "step": 300200 }, { "epoch": 85.21430598921374, "grad_norm": 0.0014382769586518407, "learning_rate": 1.481975588986659e-05, "loss": 7.552653551101685e-06, "step": 300210 }, { "epoch": 85.2171444791371, "grad_norm": 0.01166578195989132, "learning_rate": 1.481691739994323e-05, "loss": 9.138323366641999e-06, "step": 300220 }, { "epoch": 85.21998296906045, "grad_norm": 0.003284146310761571, "learning_rate": 1.4814078910019872e-05, "loss": 1.8988922238349915e-05, "step": 300230 }, { "epoch": 85.22282145898382, "grad_norm": 0.0010535359615460038, "learning_rate": 1.481124042009651e-05, "loss": 7.615797221660614e-06, "step": 300240 }, { "epoch": 85.22565994890718, "grad_norm": 0.0017053415067493916, "learning_rate": 1.480840193017315e-05, "loss": 6.991438567638397e-06, "step": 300250 }, { "epoch": 85.22849843883054, "grad_norm": 0.0010576575295999646, "learning_rate": 1.4805563440249787e-05, "loss": 2.7208961546421052e-05, "step": 300260 }, { "epoch": 85.2313369287539, "grad_norm": 0.0008823336102068424, "learning_rate": 1.4802724950326427e-05, "loss": 6.4192339777946476e-06, "step": 300270 }, { "epoch": 85.23417541867727, "grad_norm": 0.0018154809949919581, "learning_rate": 1.4799886460403067e-05, "loss": 7.191300392150879e-06, "step": 300280 }, { "epoch": 85.23701390860063, "grad_norm": 0.005877318326383829, "learning_rate": 1.4797047970479705e-05, "loss": 2.440381795167923e-05, "step": 300290 }, { "epoch": 85.23985239852398, "grad_norm": 0.0018909447826445103, "learning_rate": 1.4794209480556346e-05, "loss": 5.787611007690429e-06, "step": 300300 }, { "epoch": 85.24269088844734, "grad_norm": 0.0006115247961133718, "learning_rate": 1.4791370990632982e-05, "loss": 8.751451969146728e-06, "step": 300310 }, { "epoch": 85.2455293783707, "grad_norm": 0.0036145595367997885, "learning_rate": 1.4788532500709624e-05, "loss": 1.1659972369670867e-05, "step": 300320 }, { "epoch": 85.24836786829407, "grad_norm": 0.0012767446460202336, "learning_rate": 1.4785694010786263e-05, "loss": 6.533227860927582e-06, "step": 300330 }, { "epoch": 85.25120635821743, "grad_norm": 0.0022732510697096586, "learning_rate": 1.4782855520862901e-05, "loss": 1.0112859308719635e-05, "step": 300340 }, { "epoch": 85.2540448481408, "grad_norm": 0.0009400516282767057, "learning_rate": 1.4780017030939541e-05, "loss": 7.070228457450867e-06, "step": 300350 }, { "epoch": 85.25688333806416, "grad_norm": 0.00684670964255929, "learning_rate": 1.4777178541016179e-05, "loss": 4.064552485942841e-05, "step": 300360 }, { "epoch": 85.2597218279875, "grad_norm": 0.011453397572040558, "learning_rate": 1.4774340051092819e-05, "loss": 9.450502693653106e-06, "step": 300370 }, { "epoch": 85.26256031791087, "grad_norm": 0.001201568404212594, "learning_rate": 1.477150156116946e-05, "loss": 4.355981945991516e-06, "step": 300380 }, { "epoch": 85.26539880783423, "grad_norm": 0.0157795287668705, "learning_rate": 1.4768663071246098e-05, "loss": 7.142312824726105e-06, "step": 300390 }, { "epoch": 85.2682372977576, "grad_norm": 0.0015659825876355171, "learning_rate": 1.4765824581322738e-05, "loss": 7.635354995727539e-06, "step": 300400 }, { "epoch": 85.27107578768096, "grad_norm": 0.0010709555353969336, "learning_rate": 1.4762986091399376e-05, "loss": 8.395686745643615e-06, "step": 300410 }, { "epoch": 85.27391427760432, "grad_norm": 0.014406410045921803, "learning_rate": 1.4760147601476015e-05, "loss": 1.0014884173870086e-05, "step": 300420 }, { "epoch": 85.27675276752768, "grad_norm": 0.004218583926558495, "learning_rate": 1.4757309111552655e-05, "loss": 2.1078623831272125e-05, "step": 300430 }, { "epoch": 85.27959125745103, "grad_norm": 0.008059876039624214, "learning_rate": 1.4754470621629293e-05, "loss": 1.2291967868804931e-05, "step": 300440 }, { "epoch": 85.2824297473744, "grad_norm": 0.0005979337147437036, "learning_rate": 1.4751632131705934e-05, "loss": 1.0323897004127503e-05, "step": 300450 }, { "epoch": 85.28526823729776, "grad_norm": 0.0015405270969495177, "learning_rate": 1.474879364178257e-05, "loss": 7.805228233337402e-06, "step": 300460 }, { "epoch": 85.28810672722112, "grad_norm": 0.0014911702601239085, "learning_rate": 1.4745955151859212e-05, "loss": 1.523289829492569e-05, "step": 300470 }, { "epoch": 85.29094521714448, "grad_norm": 0.0006394147640094161, "learning_rate": 1.4743116661935852e-05, "loss": 1.2690573930740356e-05, "step": 300480 }, { "epoch": 85.29378370706785, "grad_norm": 0.0026046030689030886, "learning_rate": 1.474027817201249e-05, "loss": 1.6809254884719848e-05, "step": 300490 }, { "epoch": 85.2966221969912, "grad_norm": 0.001720411004498601, "learning_rate": 1.473743968208913e-05, "loss": 9.999424219131469e-06, "step": 300500 }, { "epoch": 85.2966221969912, "eval_accuracy": 0.9882367902333566, "eval_loss": 0.05133754760026932, "eval_runtime": 35.3906, "eval_samples_per_second": 444.384, "eval_steps_per_second": 6.951, "step": 300500 }, { "epoch": 85.29946068691456, "grad_norm": 0.002227372257038951, "learning_rate": 1.4734601192165767e-05, "loss": 6.624683737754822e-06, "step": 300510 }, { "epoch": 85.30229917683792, "grad_norm": 0.00235157273709774, "learning_rate": 1.4731762702242407e-05, "loss": 8.947961032390594e-06, "step": 300520 }, { "epoch": 85.30513766676128, "grad_norm": 0.0015154245775192976, "learning_rate": 1.4728924212319048e-05, "loss": 1.4089420437812805e-05, "step": 300530 }, { "epoch": 85.30797615668465, "grad_norm": 0.002044764580205083, "learning_rate": 1.4726085722395685e-05, "loss": 7.210113108158112e-06, "step": 300540 }, { "epoch": 85.31081464660801, "grad_norm": 0.0012708688154816628, "learning_rate": 1.4723247232472326e-05, "loss": 5.639158189296722e-06, "step": 300550 }, { "epoch": 85.31365313653137, "grad_norm": 0.002263941802084446, "learning_rate": 1.4720408742548964e-05, "loss": 8.341856300830841e-06, "step": 300560 }, { "epoch": 85.31649162645472, "grad_norm": 0.0014662106987088919, "learning_rate": 1.4717570252625604e-05, "loss": 8.997693657875062e-06, "step": 300570 }, { "epoch": 85.31933011637808, "grad_norm": 0.0007762525929138064, "learning_rate": 1.4714731762702243e-05, "loss": 1.456383615732193e-05, "step": 300580 }, { "epoch": 85.32216860630145, "grad_norm": 0.0013405310455709696, "learning_rate": 1.4711893272778881e-05, "loss": 7.406249642372131e-06, "step": 300590 }, { "epoch": 85.32500709622481, "grad_norm": 0.006965741980820894, "learning_rate": 1.4709054782855523e-05, "loss": 8.038058876991271e-06, "step": 300600 }, { "epoch": 85.32784558614817, "grad_norm": 0.0027458732947707176, "learning_rate": 1.4706216292932159e-05, "loss": 7.99279659986496e-06, "step": 300610 }, { "epoch": 85.33068407607153, "grad_norm": 0.009327731095254421, "learning_rate": 1.47033778030088e-05, "loss": 1.0940060019493102e-05, "step": 300620 }, { "epoch": 85.3335225659949, "grad_norm": 0.010491140186786652, "learning_rate": 1.470053931308544e-05, "loss": 6.995908915996552e-06, "step": 300630 }, { "epoch": 85.33636105591825, "grad_norm": 0.0038641472347080708, "learning_rate": 1.4697700823162078e-05, "loss": 1.5602074563503264e-05, "step": 300640 }, { "epoch": 85.33919954584161, "grad_norm": 0.00297727738507092, "learning_rate": 1.4694862333238718e-05, "loss": 8.56257975101471e-06, "step": 300650 }, { "epoch": 85.34203803576497, "grad_norm": 0.003977979067713022, "learning_rate": 1.4692023843315356e-05, "loss": 1.0719150304794312e-05, "step": 300660 }, { "epoch": 85.34487652568833, "grad_norm": 0.0013163848780095577, "learning_rate": 1.4689185353391995e-05, "loss": 1.3028830289840699e-05, "step": 300670 }, { "epoch": 85.3477150156117, "grad_norm": 0.0020257935393601656, "learning_rate": 1.4686346863468637e-05, "loss": 8.142367005348205e-06, "step": 300680 }, { "epoch": 85.35055350553506, "grad_norm": 0.01906432770192623, "learning_rate": 1.4683508373545273e-05, "loss": 2.4181045591831208e-05, "step": 300690 }, { "epoch": 85.35339199545841, "grad_norm": 0.0007825125940144062, "learning_rate": 1.4680669883621914e-05, "loss": 1.8324330449104308e-05, "step": 300700 }, { "epoch": 85.35623048538177, "grad_norm": 0.0014858689391985536, "learning_rate": 1.4677831393698552e-05, "loss": 5.05838543176651e-06, "step": 300710 }, { "epoch": 85.35906897530514, "grad_norm": 0.0010079041821882129, "learning_rate": 1.4674992903775192e-05, "loss": 7.677637040615081e-06, "step": 300720 }, { "epoch": 85.3619074652285, "grad_norm": 0.0002492892963346094, "learning_rate": 1.4672154413851832e-05, "loss": 7.790327072143554e-06, "step": 300730 }, { "epoch": 85.36474595515186, "grad_norm": 0.003017146373167634, "learning_rate": 1.466931592392847e-05, "loss": 1.6138888895511627e-05, "step": 300740 }, { "epoch": 85.36758444507522, "grad_norm": 0.0004822613555006683, "learning_rate": 1.466647743400511e-05, "loss": 6.9927424192428585e-06, "step": 300750 }, { "epoch": 85.37042293499859, "grad_norm": 0.002338485326617956, "learning_rate": 1.4663638944081747e-05, "loss": 1.2448057532310486e-05, "step": 300760 }, { "epoch": 85.37326142492194, "grad_norm": 0.0005871773464605212, "learning_rate": 1.4660800454158389e-05, "loss": 1.0125152766704559e-05, "step": 300770 }, { "epoch": 85.3760999148453, "grad_norm": 0.0010766779305413365, "learning_rate": 1.4657961964235029e-05, "loss": 9.028241038322449e-06, "step": 300780 }, { "epoch": 85.37893840476866, "grad_norm": 0.00796622782945633, "learning_rate": 1.4655123474311667e-05, "loss": 1.3023056089878082e-05, "step": 300790 }, { "epoch": 85.38177689469202, "grad_norm": 0.0005195208941586316, "learning_rate": 1.4652284984388306e-05, "loss": 1.9916519522666932e-05, "step": 300800 }, { "epoch": 85.38461538461539, "grad_norm": 0.0006116540171205997, "learning_rate": 1.4649446494464944e-05, "loss": 9.634718298912049e-06, "step": 300810 }, { "epoch": 85.38745387453875, "grad_norm": 0.012202420271933079, "learning_rate": 1.4646608004541584e-05, "loss": 1.3271346688270569e-05, "step": 300820 }, { "epoch": 85.39029236446211, "grad_norm": 0.0009331704350188375, "learning_rate": 1.4643769514618225e-05, "loss": 5.880929529666901e-06, "step": 300830 }, { "epoch": 85.39313085438546, "grad_norm": 0.0014211011584848166, "learning_rate": 1.4640931024694862e-05, "loss": 1.3082846999168396e-05, "step": 300840 }, { "epoch": 85.39596934430882, "grad_norm": 0.003949475008994341, "learning_rate": 1.4638092534771503e-05, "loss": 9.766221046447754e-06, "step": 300850 }, { "epoch": 85.39880783423219, "grad_norm": 0.03657536208629608, "learning_rate": 1.4635254044848141e-05, "loss": 1.240856945514679e-05, "step": 300860 }, { "epoch": 85.40164632415555, "grad_norm": 0.002453314606100321, "learning_rate": 1.463241555492478e-05, "loss": 1.6200914978981017e-05, "step": 300870 }, { "epoch": 85.40448481407891, "grad_norm": 0.0021427848841995, "learning_rate": 1.462957706500142e-05, "loss": 1.2071430683135987e-05, "step": 300880 }, { "epoch": 85.40732330400228, "grad_norm": 0.001408076612278819, "learning_rate": 1.4626738575078058e-05, "loss": 1.70048326253891e-05, "step": 300890 }, { "epoch": 85.41016179392564, "grad_norm": 0.0007514653843827546, "learning_rate": 1.4623900085154698e-05, "loss": 8.902512490749359e-06, "step": 300900 }, { "epoch": 85.41300028384899, "grad_norm": 0.006018111947923899, "learning_rate": 1.462106159523134e-05, "loss": 1.2035109102725983e-05, "step": 300910 }, { "epoch": 85.41583877377235, "grad_norm": 1.0209243297576904, "learning_rate": 1.4618223105307977e-05, "loss": 0.00011305306106805802, "step": 300920 }, { "epoch": 85.41867726369571, "grad_norm": 0.004213580396026373, "learning_rate": 1.4615384615384617e-05, "loss": 2.9974430799484252e-05, "step": 300930 }, { "epoch": 85.42151575361908, "grad_norm": 0.00426583644002676, "learning_rate": 1.4612546125461255e-05, "loss": 4.4248998165130615e-05, "step": 300940 }, { "epoch": 85.42435424354244, "grad_norm": 0.0020703955087810755, "learning_rate": 1.4609707635537895e-05, "loss": 0.003319678455591202, "step": 300950 }, { "epoch": 85.4271927334658, "grad_norm": 0.10874751955270767, "learning_rate": 1.4606869145614534e-05, "loss": 8.986685425043106e-05, "step": 300960 }, { "epoch": 85.43003122338915, "grad_norm": 0.0016983823152258992, "learning_rate": 1.4604030655691172e-05, "loss": 0.0006063750013709068, "step": 300970 }, { "epoch": 85.43286971331251, "grad_norm": 0.0018724820110946894, "learning_rate": 1.4601192165767814e-05, "loss": 1.2329407036304474e-05, "step": 300980 }, { "epoch": 85.43570820323588, "grad_norm": 0.006359487771987915, "learning_rate": 1.459835367584445e-05, "loss": 0.0004285426810383797, "step": 300990 }, { "epoch": 85.43854669315924, "grad_norm": 0.008454319089651108, "learning_rate": 1.4595515185921091e-05, "loss": 1.531057059764862e-05, "step": 301000 }, { "epoch": 85.43854669315924, "eval_accuracy": 0.9855026387740828, "eval_loss": 0.060856789350509644, "eval_runtime": 36.1575, "eval_samples_per_second": 434.959, "eval_steps_per_second": 6.804, "step": 301000 }, { "epoch": 85.4413851830826, "grad_norm": 0.002405802020803094, "learning_rate": 1.4592676695997731e-05, "loss": 0.00012213010340929032, "step": 301010 }, { "epoch": 85.44422367300596, "grad_norm": 0.008311198092997074, "learning_rate": 1.4589838206074369e-05, "loss": 4.340633749961853e-05, "step": 301020 }, { "epoch": 85.44706216292933, "grad_norm": 0.001578820520080626, "learning_rate": 1.4586999716151009e-05, "loss": 1.8580444157123564e-05, "step": 301030 }, { "epoch": 85.44990065285268, "grad_norm": 0.003521983278915286, "learning_rate": 1.4584161226227647e-05, "loss": 4.099160432815552e-05, "step": 301040 }, { "epoch": 85.45273914277604, "grad_norm": 0.0007825038046576083, "learning_rate": 1.4581322736304286e-05, "loss": 1.7552077770233156e-05, "step": 301050 }, { "epoch": 85.4555776326994, "grad_norm": 0.009488222189247608, "learning_rate": 1.4578484246380928e-05, "loss": 2.5314092636108398e-05, "step": 301060 }, { "epoch": 85.45841612262276, "grad_norm": 0.001717145903967321, "learning_rate": 1.4575645756457566e-05, "loss": 4.1901320219039914e-05, "step": 301070 }, { "epoch": 85.46125461254613, "grad_norm": 0.0005864887498319149, "learning_rate": 1.4572807266534205e-05, "loss": 9.502507746219634e-05, "step": 301080 }, { "epoch": 85.46409310246949, "grad_norm": 0.0011586962500587106, "learning_rate": 1.4569968776610843e-05, "loss": 1.588203012943268e-05, "step": 301090 }, { "epoch": 85.46693159239285, "grad_norm": 0.005858400836586952, "learning_rate": 1.4567130286687483e-05, "loss": 7.234066724777221e-05, "step": 301100 }, { "epoch": 85.4697700823162, "grad_norm": 0.006224671378731728, "learning_rate": 1.4564291796764123e-05, "loss": 2.645067870616913e-05, "step": 301110 }, { "epoch": 85.47260857223957, "grad_norm": 0.005848039872944355, "learning_rate": 1.456145330684076e-05, "loss": 5.281306803226471e-05, "step": 301120 }, { "epoch": 85.47544706216293, "grad_norm": 0.005089652258902788, "learning_rate": 1.4558614816917402e-05, "loss": 1.0437332093715668e-05, "step": 301130 }, { "epoch": 85.47828555208629, "grad_norm": 0.4090237617492676, "learning_rate": 1.4555776326994038e-05, "loss": 5.072653293609619e-05, "step": 301140 }, { "epoch": 85.48112404200965, "grad_norm": 0.0004571877943817526, "learning_rate": 1.455293783707068e-05, "loss": 1.3021938502788544e-05, "step": 301150 }, { "epoch": 85.48396253193302, "grad_norm": 0.002475753426551819, "learning_rate": 1.455009934714732e-05, "loss": 1.5262514352798462e-05, "step": 301160 }, { "epoch": 85.48680102185637, "grad_norm": 0.00941367819905281, "learning_rate": 1.4547260857223957e-05, "loss": 2.611875534057617e-05, "step": 301170 }, { "epoch": 85.48963951177973, "grad_norm": 0.0009711350430734456, "learning_rate": 1.4544422367300597e-05, "loss": 1.4081783592700959e-05, "step": 301180 }, { "epoch": 85.49247800170309, "grad_norm": 0.03907034918665886, "learning_rate": 1.4541583877377235e-05, "loss": 2.7283094823360443e-05, "step": 301190 }, { "epoch": 85.49531649162645, "grad_norm": 0.0015749706653878093, "learning_rate": 1.4538745387453875e-05, "loss": 5.6980177760124205e-06, "step": 301200 }, { "epoch": 85.49815498154982, "grad_norm": 0.0023516106884926558, "learning_rate": 1.4535906897530516e-05, "loss": 1.0905042290687562e-05, "step": 301210 }, { "epoch": 85.50099347147318, "grad_norm": 0.0010821169707924128, "learning_rate": 1.4533068407607152e-05, "loss": 7.301457226276398e-05, "step": 301220 }, { "epoch": 85.50383196139654, "grad_norm": 0.0025101604405790567, "learning_rate": 1.4530229917683794e-05, "loss": 1.28917396068573e-05, "step": 301230 }, { "epoch": 85.50667045131989, "grad_norm": 0.0036514217499643564, "learning_rate": 1.4527391427760432e-05, "loss": 9.199604392051697e-06, "step": 301240 }, { "epoch": 85.50950894124325, "grad_norm": 0.002615683013573289, "learning_rate": 1.4524552937837072e-05, "loss": 9.199045598506927e-06, "step": 301250 }, { "epoch": 85.51234743116662, "grad_norm": 0.001342225819826126, "learning_rate": 1.4521714447913711e-05, "loss": 2.4448148906230927e-05, "step": 301260 }, { "epoch": 85.51518592108998, "grad_norm": 0.0007987089920789003, "learning_rate": 1.451887595799035e-05, "loss": 9.70723107457161e-05, "step": 301270 }, { "epoch": 85.51802441101334, "grad_norm": 0.0022776867263019085, "learning_rate": 1.451603746806699e-05, "loss": 1.0303407907485963e-05, "step": 301280 }, { "epoch": 85.5208629009367, "grad_norm": 0.0020102637354284525, "learning_rate": 1.4513198978143627e-05, "loss": 3.410764038562775e-05, "step": 301290 }, { "epoch": 85.52370139086007, "grad_norm": 0.004298173822462559, "learning_rate": 1.4510360488220268e-05, "loss": 1.737847924232483e-05, "step": 301300 }, { "epoch": 85.52653988078342, "grad_norm": 0.00070151686668396, "learning_rate": 1.4507521998296908e-05, "loss": 2.1651759743690492e-05, "step": 301310 }, { "epoch": 85.52937837070678, "grad_norm": 0.0052306377328932285, "learning_rate": 1.4504683508373546e-05, "loss": 8.394569158554077e-06, "step": 301320 }, { "epoch": 85.53221686063014, "grad_norm": 0.003858003066852689, "learning_rate": 1.4501845018450186e-05, "loss": 1.0358169674873352e-05, "step": 301330 }, { "epoch": 85.5350553505535, "grad_norm": 0.0009201049688272178, "learning_rate": 1.4499006528526824e-05, "loss": 7.521919906139374e-06, "step": 301340 }, { "epoch": 85.53789384047687, "grad_norm": 0.0027065558824688196, "learning_rate": 1.4496168038603463e-05, "loss": 1.9963830709457397e-05, "step": 301350 }, { "epoch": 85.54073233040023, "grad_norm": 0.014030175283551216, "learning_rate": 1.4493329548680105e-05, "loss": 1.4180131256580352e-05, "step": 301360 }, { "epoch": 85.5435708203236, "grad_norm": 0.01937289722263813, "learning_rate": 1.4490491058756741e-05, "loss": 5.107596516609192e-05, "step": 301370 }, { "epoch": 85.54640931024694, "grad_norm": 0.006475089117884636, "learning_rate": 1.4487652568833382e-05, "loss": 1.6012415289878846e-05, "step": 301380 }, { "epoch": 85.5492478001703, "grad_norm": 0.0034294237848371267, "learning_rate": 1.448481407891002e-05, "loss": 1.62661075592041e-05, "step": 301390 }, { "epoch": 85.55208629009367, "grad_norm": 0.003232887014746666, "learning_rate": 1.448197558898666e-05, "loss": 3.0145607888698577e-05, "step": 301400 }, { "epoch": 85.55492478001703, "grad_norm": 0.003825736464932561, "learning_rate": 1.44791370990633e-05, "loss": 0.00011095721274614334, "step": 301410 }, { "epoch": 85.5577632699404, "grad_norm": 0.0002690799592528492, "learning_rate": 1.4476298609139938e-05, "loss": 5.4481998085975646e-05, "step": 301420 }, { "epoch": 85.56060175986376, "grad_norm": 0.002265417715534568, "learning_rate": 1.4473460119216577e-05, "loss": 1.4259293675422668e-05, "step": 301430 }, { "epoch": 85.5634402497871, "grad_norm": 0.018808778375387192, "learning_rate": 1.4470621629293215e-05, "loss": 1.3680197298526764e-05, "step": 301440 }, { "epoch": 85.56627873971047, "grad_norm": 0.007266089785844088, "learning_rate": 1.4467783139369857e-05, "loss": 1.1071935296058654e-05, "step": 301450 }, { "epoch": 85.56911722963383, "grad_norm": 0.0018221985083073378, "learning_rate": 1.4464944649446496e-05, "loss": 2.3717805743217468e-05, "step": 301460 }, { "epoch": 85.5719557195572, "grad_norm": 0.0016612879699096084, "learning_rate": 1.4462106159523134e-05, "loss": 1.5601329505443572e-05, "step": 301470 }, { "epoch": 85.57479420948056, "grad_norm": 0.015250258147716522, "learning_rate": 1.4459267669599774e-05, "loss": 5.711875855922699e-05, "step": 301480 }, { "epoch": 85.57763269940392, "grad_norm": 0.0014323710929602385, "learning_rate": 1.4456429179676412e-05, "loss": 4.08196821808815e-05, "step": 301490 }, { "epoch": 85.58047118932728, "grad_norm": 0.0015523511683568358, "learning_rate": 1.4453874538745389e-05, "loss": 0.0027891123667359353, "step": 301500 }, { "epoch": 85.58047118932728, "eval_accuracy": 0.9860749030330006, "eval_loss": 0.058997463434934616, "eval_runtime": 35.263, "eval_samples_per_second": 445.992, "eval_steps_per_second": 6.976, "step": 301500 }, { "epoch": 85.58330967925063, "grad_norm": 0.012525249272584915, "learning_rate": 1.4451036048822028e-05, "loss": 0.000754702091217041, "step": 301510 }, { "epoch": 85.586148169174, "grad_norm": 0.005246967077255249, "learning_rate": 1.4448197558898666e-05, "loss": 2.7463585138320922e-05, "step": 301520 }, { "epoch": 85.58898665909736, "grad_norm": 0.007083815988153219, "learning_rate": 1.4445359068975306e-05, "loss": 3.9326213300228116e-05, "step": 301530 }, { "epoch": 85.59182514902072, "grad_norm": 0.003308810293674469, "learning_rate": 1.4442520579051946e-05, "loss": 2.124495804309845e-05, "step": 301540 }, { "epoch": 85.59466363894408, "grad_norm": 0.24044087529182434, "learning_rate": 1.4439682089128584e-05, "loss": 4.909243434667587e-05, "step": 301550 }, { "epoch": 85.59750212886745, "grad_norm": 0.0017421517986804247, "learning_rate": 1.4436843599205225e-05, "loss": 7.827021181583405e-06, "step": 301560 }, { "epoch": 85.60034061879081, "grad_norm": 0.002103532897308469, "learning_rate": 1.4434005109281861e-05, "loss": 1.1774711310863495e-05, "step": 301570 }, { "epoch": 85.60317910871416, "grad_norm": 0.0031665493734180927, "learning_rate": 1.4431166619358503e-05, "loss": 7.44946300983429e-06, "step": 301580 }, { "epoch": 85.60601759863752, "grad_norm": 0.009599230252206326, "learning_rate": 1.4428328129435142e-05, "loss": 1.1684000492095948e-05, "step": 301590 }, { "epoch": 85.60885608856088, "grad_norm": 0.01892591454088688, "learning_rate": 1.442548963951178e-05, "loss": 1.69362872838974e-05, "step": 301600 }, { "epoch": 85.61169457848425, "grad_norm": 0.0034087621606886387, "learning_rate": 1.442265114958842e-05, "loss": 8.238106966018677e-06, "step": 301610 }, { "epoch": 85.61453306840761, "grad_norm": 0.0012243259698152542, "learning_rate": 1.4419812659665058e-05, "loss": 1.2216158211231232e-05, "step": 301620 }, { "epoch": 85.61737155833097, "grad_norm": 0.0023127447348088026, "learning_rate": 1.4416974169741698e-05, "loss": 1.5885382890701293e-05, "step": 301630 }, { "epoch": 85.62021004825434, "grad_norm": 0.002711297245696187, "learning_rate": 1.4414135679818339e-05, "loss": 4.61159273982048e-05, "step": 301640 }, { "epoch": 85.62304853817768, "grad_norm": 0.001763481181114912, "learning_rate": 1.4411297189894975e-05, "loss": 1.3777241110801697e-05, "step": 301650 }, { "epoch": 85.62588702810105, "grad_norm": Infinity, "learning_rate": 1.4408458699971617e-05, "loss": 0.005030107498168945, "step": 301660 }, { "epoch": 85.62872551802441, "grad_norm": 0.0015216615283861756, "learning_rate": 1.4405904059040593e-05, "loss": 1.0473839938640595e-05, "step": 301670 }, { "epoch": 85.63156400794777, "grad_norm": 0.0007870985427871346, "learning_rate": 1.440306556911723e-05, "loss": 1.2648291885852814e-05, "step": 301680 }, { "epoch": 85.63440249787114, "grad_norm": 0.00986355822533369, "learning_rate": 1.4400227079193871e-05, "loss": 1.990757882595062e-05, "step": 301690 }, { "epoch": 85.6372409877945, "grad_norm": 0.01361448410898447, "learning_rate": 1.4397388589270507e-05, "loss": 0.0007207686081528664, "step": 301700 }, { "epoch": 85.64007947771785, "grad_norm": 0.0021455837413668633, "learning_rate": 1.4394550099347149e-05, "loss": 2.0491890609264374e-05, "step": 301710 }, { "epoch": 85.64291796764121, "grad_norm": 0.011547419242560863, "learning_rate": 1.4391711609423788e-05, "loss": 0.00026826635003089904, "step": 301720 }, { "epoch": 85.64575645756457, "grad_norm": 0.004035414196550846, "learning_rate": 1.4388873119500426e-05, "loss": 1.846589148044586e-05, "step": 301730 }, { "epoch": 85.64859494748794, "grad_norm": 0.01136824395507574, "learning_rate": 1.4386034629577066e-05, "loss": 1.839585602283478e-05, "step": 301740 }, { "epoch": 85.6514334374113, "grad_norm": 0.006119383964687586, "learning_rate": 1.4383196139653704e-05, "loss": 2.7193687856197357e-05, "step": 301750 }, { "epoch": 85.65427192733466, "grad_norm": 0.011999246664345264, "learning_rate": 1.4380357649730344e-05, "loss": 1.567285507917404e-05, "step": 301760 }, { "epoch": 85.65711041725802, "grad_norm": 0.007798186969012022, "learning_rate": 1.4377519159806985e-05, "loss": 0.0048813939094543455, "step": 301770 }, { "epoch": 85.65994890718137, "grad_norm": 0.012441769242286682, "learning_rate": 1.4374680669883623e-05, "loss": 2.460237592458725e-05, "step": 301780 }, { "epoch": 85.66278739710474, "grad_norm": 0.6417554616928101, "learning_rate": 1.4371842179960263e-05, "loss": 0.00013846810907125473, "step": 301790 }, { "epoch": 85.6656258870281, "grad_norm": 0.033044394105672836, "learning_rate": 1.43690036900369e-05, "loss": 4.251599311828613e-05, "step": 301800 }, { "epoch": 85.66846437695146, "grad_norm": 0.002070560585707426, "learning_rate": 1.436616520011354e-05, "loss": 0.00041631609201431274, "step": 301810 }, { "epoch": 85.67130286687483, "grad_norm": 0.00045797522761859, "learning_rate": 1.4363326710190182e-05, "loss": 1.9788742065429688e-05, "step": 301820 }, { "epoch": 85.67414135679819, "grad_norm": 12.363292694091797, "learning_rate": 1.4360488220266818e-05, "loss": 0.0027462173253297805, "step": 301830 }, { "epoch": 85.67697984672155, "grad_norm": 0.01596464402973652, "learning_rate": 1.435764973034346e-05, "loss": 2.2260844707489015e-05, "step": 301840 }, { "epoch": 85.6798183366449, "grad_norm": 0.0001700870634522289, "learning_rate": 1.4354811240420096e-05, "loss": 9.740516543388366e-06, "step": 301850 }, { "epoch": 85.68265682656826, "grad_norm": 0.00035497601493261755, "learning_rate": 1.4351972750496737e-05, "loss": 7.50979408621788e-05, "step": 301860 }, { "epoch": 85.68549531649163, "grad_norm": 0.029674695804715157, "learning_rate": 1.4349134260573377e-05, "loss": 0.00039691496640443803, "step": 301870 }, { "epoch": 85.68833380641499, "grad_norm": 0.002988670254126191, "learning_rate": 1.4346295770650015e-05, "loss": 9.272415190935135e-05, "step": 301880 }, { "epoch": 85.69117229633835, "grad_norm": 0.0009193998412229121, "learning_rate": 1.4343457280726654e-05, "loss": 1.8271617591381073e-05, "step": 301890 }, { "epoch": 85.69401078626171, "grad_norm": 0.003825938096269965, "learning_rate": 1.4340618790803292e-05, "loss": 2.6077404618263246e-05, "step": 301900 }, { "epoch": 85.69684927618506, "grad_norm": 0.00011032774636987597, "learning_rate": 1.4337780300879932e-05, "loss": 1.6677938401699065e-05, "step": 301910 }, { "epoch": 85.69968776610843, "grad_norm": 0.0010133914183825254, "learning_rate": 1.4334941810956573e-05, "loss": 1.6908347606658936e-05, "step": 301920 }, { "epoch": 85.70252625603179, "grad_norm": 0.0023191613145172596, "learning_rate": 1.4332103321033211e-05, "loss": 1.0637566447257996e-05, "step": 301930 }, { "epoch": 85.70536474595515, "grad_norm": 0.001660153386183083, "learning_rate": 1.4329264831109851e-05, "loss": 9.777583181858063e-06, "step": 301940 }, { "epoch": 85.70820323587851, "grad_norm": 0.0010149255394935608, "learning_rate": 1.4326426341186489e-05, "loss": 1.604333519935608e-05, "step": 301950 }, { "epoch": 85.71104172580188, "grad_norm": 0.0023061621468514204, "learning_rate": 1.4323587851263129e-05, "loss": 1.6644783318042756e-05, "step": 301960 }, { "epoch": 85.71388021572524, "grad_norm": 0.01412764098495245, "learning_rate": 1.4320749361339768e-05, "loss": 1.6302242875099183e-05, "step": 301970 }, { "epoch": 85.71671870564859, "grad_norm": 0.0019734767265617847, "learning_rate": 1.4317910871416406e-05, "loss": 1.5197508037090302e-05, "step": 301980 }, { "epoch": 85.71955719557195, "grad_norm": 0.000694998016115278, "learning_rate": 1.4315072381493048e-05, "loss": 9.141117334365845e-06, "step": 301990 }, { "epoch": 85.72239568549531, "grad_norm": 0.008127001114189625, "learning_rate": 1.4312233891569684e-05, "loss": 2.0379945635795593e-05, "step": 302000 }, { "epoch": 85.72239568549531, "eval_accuracy": 0.9881732053156991, "eval_loss": 0.05173315107822418, "eval_runtime": 35.5566, "eval_samples_per_second": 442.309, "eval_steps_per_second": 6.919, "step": 302000 }, { "epoch": 85.72523417541868, "grad_norm": 0.0014744719956070185, "learning_rate": 1.4309395401646325e-05, "loss": 9.688735008239747e-06, "step": 302010 }, { "epoch": 85.72807266534204, "grad_norm": 0.0026361634954810143, "learning_rate": 1.4306556911722965e-05, "loss": 2.490188926458359e-05, "step": 302020 }, { "epoch": 85.7309111552654, "grad_norm": 0.002265949733555317, "learning_rate": 1.4303718421799603e-05, "loss": 2.132877707481384e-05, "step": 302030 }, { "epoch": 85.73374964518877, "grad_norm": 0.0011433812323957682, "learning_rate": 1.4300879931876243e-05, "loss": 1.2729130685329437e-05, "step": 302040 }, { "epoch": 85.73658813511211, "grad_norm": 0.012777136638760567, "learning_rate": 1.429804144195288e-05, "loss": 1.068655401468277e-05, "step": 302050 }, { "epoch": 85.73942662503548, "grad_norm": 0.000921986298635602, "learning_rate": 1.429520295202952e-05, "loss": 1.4681369066238403e-05, "step": 302060 }, { "epoch": 85.74226511495884, "grad_norm": 0.0065210429020226, "learning_rate": 1.4292364462106162e-05, "loss": 1.4455243945121766e-05, "step": 302070 }, { "epoch": 85.7451036048822, "grad_norm": 0.002845963928848505, "learning_rate": 1.42895259721828e-05, "loss": 1.2705475091934204e-05, "step": 302080 }, { "epoch": 85.74794209480557, "grad_norm": 0.007304128725081682, "learning_rate": 1.428668748225944e-05, "loss": 1.9569322466850282e-05, "step": 302090 }, { "epoch": 85.75078058472893, "grad_norm": 0.011326467618346214, "learning_rate": 1.4283848992336077e-05, "loss": 2.2378936409950257e-05, "step": 302100 }, { "epoch": 85.75361907465229, "grad_norm": 0.0017870365409180522, "learning_rate": 1.4281010502412717e-05, "loss": 2.168919891119003e-05, "step": 302110 }, { "epoch": 85.75645756457564, "grad_norm": 0.011442798189818859, "learning_rate": 1.4278172012489357e-05, "loss": 9.517744183540345e-06, "step": 302120 }, { "epoch": 85.759296054499, "grad_norm": 0.0031617225613445044, "learning_rate": 1.4275333522565995e-05, "loss": 1.0650046169757843e-05, "step": 302130 }, { "epoch": 85.76213454442237, "grad_norm": 0.0019377137068659067, "learning_rate": 1.4272495032642636e-05, "loss": 7.138773798942566e-06, "step": 302140 }, { "epoch": 85.76497303434573, "grad_norm": 0.001872067921794951, "learning_rate": 1.4269656542719272e-05, "loss": 9.504705667495728e-06, "step": 302150 }, { "epoch": 85.76781152426909, "grad_norm": 0.0020853381138294935, "learning_rate": 1.4266818052795914e-05, "loss": 8.910521864891053e-06, "step": 302160 }, { "epoch": 85.77065001419246, "grad_norm": 0.0021672039292752743, "learning_rate": 1.4263979562872554e-05, "loss": 8.223764598369599e-06, "step": 302170 }, { "epoch": 85.7734885041158, "grad_norm": 0.00041501715895719826, "learning_rate": 1.4261141072949192e-05, "loss": 6.442330777645111e-06, "step": 302180 }, { "epoch": 85.77632699403917, "grad_norm": 0.002264714566990733, "learning_rate": 1.4258302583025831e-05, "loss": 2.9931776225566864e-05, "step": 302190 }, { "epoch": 85.77916548396253, "grad_norm": 0.0007754432735964656, "learning_rate": 1.425546409310247e-05, "loss": 1.1431798338890075e-05, "step": 302200 }, { "epoch": 85.78200397388589, "grad_norm": 0.003446787130087614, "learning_rate": 1.4252625603179109e-05, "loss": 1.4858320355415344e-05, "step": 302210 }, { "epoch": 85.78484246380926, "grad_norm": 0.002341963117942214, "learning_rate": 1.424978711325575e-05, "loss": 1.4245137572288514e-05, "step": 302220 }, { "epoch": 85.78768095373262, "grad_norm": 0.019072307273745537, "learning_rate": 1.4246948623332387e-05, "loss": 1.794770359992981e-05, "step": 302230 }, { "epoch": 85.79051944365598, "grad_norm": 0.0545801967382431, "learning_rate": 1.4244110133409028e-05, "loss": 2.8013810515403746e-05, "step": 302240 }, { "epoch": 85.79335793357933, "grad_norm": 0.002927393652498722, "learning_rate": 1.4241271643485666e-05, "loss": 8.449889719486237e-06, "step": 302250 }, { "epoch": 85.79619642350269, "grad_norm": 0.012199437245726585, "learning_rate": 1.4238433153562306e-05, "loss": 1.4468282461166383e-05, "step": 302260 }, { "epoch": 85.79903491342606, "grad_norm": 0.00058695615734905, "learning_rate": 1.4235594663638945e-05, "loss": 7.656030356884003e-06, "step": 302270 }, { "epoch": 85.80187340334942, "grad_norm": 0.004309527575969696, "learning_rate": 1.4232756173715583e-05, "loss": 1.3306178152561187e-05, "step": 302280 }, { "epoch": 85.80471189327278, "grad_norm": 0.0013804661575704813, "learning_rate": 1.4229917683792225e-05, "loss": 2.108905464410782e-05, "step": 302290 }, { "epoch": 85.80755038319614, "grad_norm": 0.0012427723268046975, "learning_rate": 1.4227079193868861e-05, "loss": 6.373226642608642e-06, "step": 302300 }, { "epoch": 85.8103888731195, "grad_norm": 0.0013093937886878848, "learning_rate": 1.4224240703945502e-05, "loss": 2.8813257813453674e-05, "step": 302310 }, { "epoch": 85.81322736304286, "grad_norm": 0.0007120513473637402, "learning_rate": 1.4221402214022142e-05, "loss": 6.7425891757011415e-06, "step": 302320 }, { "epoch": 85.81606585296622, "grad_norm": 0.001145420130342245, "learning_rate": 1.421856372409878e-05, "loss": 8.618086576461792e-06, "step": 302330 }, { "epoch": 85.81890434288958, "grad_norm": 0.0006869529606774449, "learning_rate": 1.421572523417542e-05, "loss": 7.3876231908798214e-06, "step": 302340 }, { "epoch": 85.82174283281294, "grad_norm": 0.10724548995494843, "learning_rate": 1.4212886744252058e-05, "loss": 1.9283220171928406e-05, "step": 302350 }, { "epoch": 85.82458132273631, "grad_norm": 0.001330041908659041, "learning_rate": 1.4210048254328697e-05, "loss": 8.961930871009826e-06, "step": 302360 }, { "epoch": 85.82741981265967, "grad_norm": 0.0017937163356691599, "learning_rate": 1.4207209764405339e-05, "loss": 2.9147043824195863e-05, "step": 302370 }, { "epoch": 85.83025830258302, "grad_norm": 0.0014344691298902035, "learning_rate": 1.4204371274481975e-05, "loss": 1.5448033809661866e-05, "step": 302380 }, { "epoch": 85.83309679250638, "grad_norm": 0.004159136675298214, "learning_rate": 1.4201532784558616e-05, "loss": 9.978935122489929e-06, "step": 302390 }, { "epoch": 85.83593528242974, "grad_norm": 0.0015790495090186596, "learning_rate": 1.4198694294635254e-05, "loss": 5.243904888629914e-06, "step": 302400 }, { "epoch": 85.83877377235311, "grad_norm": 0.009106223471462727, "learning_rate": 1.4195855804711894e-05, "loss": 1.849886029958725e-05, "step": 302410 }, { "epoch": 85.84161226227647, "grad_norm": 0.0011753836879506707, "learning_rate": 1.4193017314788534e-05, "loss": 6.701983511447906e-06, "step": 302420 }, { "epoch": 85.84445075219983, "grad_norm": 0.000509060628246516, "learning_rate": 1.4190178824865172e-05, "loss": 5.103275179862976e-06, "step": 302430 }, { "epoch": 85.8472892421232, "grad_norm": 0.0035544196143746376, "learning_rate": 1.4187340334941811e-05, "loss": 6.3266605138778685e-06, "step": 302440 }, { "epoch": 85.85012773204654, "grad_norm": 0.0027859469410032034, "learning_rate": 1.418450184501845e-05, "loss": 1.3886205852031708e-05, "step": 302450 }, { "epoch": 85.85296622196991, "grad_norm": 0.001577318529598415, "learning_rate": 1.418166335509509e-05, "loss": 7.431395351886749e-06, "step": 302460 }, { "epoch": 85.85580471189327, "grad_norm": 0.0015719399088993669, "learning_rate": 1.417882486517173e-05, "loss": 8.224882185459137e-06, "step": 302470 }, { "epoch": 85.85864320181663, "grad_norm": 0.0030804709531366825, "learning_rate": 1.4175986375248368e-05, "loss": 1.0665878653526306e-05, "step": 302480 }, { "epoch": 85.86148169174, "grad_norm": 0.002675555180758238, "learning_rate": 1.4173147885325008e-05, "loss": 9.141303598880767e-06, "step": 302490 }, { "epoch": 85.86432018166336, "grad_norm": 0.0003588541003409773, "learning_rate": 1.4170309395401646e-05, "loss": 8.151866495609283e-06, "step": 302500 }, { "epoch": 85.86432018166336, "eval_accuracy": 0.9888090544922744, "eval_loss": 0.05025317892432213, "eval_runtime": 35.5801, "eval_samples_per_second": 442.017, "eval_steps_per_second": 6.914, "step": 302500 }, { "epoch": 85.86715867158672, "grad_norm": 0.0003453535318840295, "learning_rate": 1.4167470905478286e-05, "loss": 7.64988362789154e-06, "step": 302510 }, { "epoch": 85.86999716151007, "grad_norm": 0.00032297978759743273, "learning_rate": 1.4164632415554927e-05, "loss": 5.611032247543335e-06, "step": 302520 }, { "epoch": 85.87283565143343, "grad_norm": 0.00029883920797146857, "learning_rate": 1.4161793925631563e-05, "loss": 2.765040844678879e-05, "step": 302530 }, { "epoch": 85.8756741413568, "grad_norm": 0.0006238865316845477, "learning_rate": 1.4158955435708205e-05, "loss": 1.5464797616004945e-05, "step": 302540 }, { "epoch": 85.87851263128016, "grad_norm": 0.007389145437628031, "learning_rate": 1.4156116945784843e-05, "loss": 6.772205233573914e-06, "step": 302550 }, { "epoch": 85.88135112120352, "grad_norm": 0.0034208267461508512, "learning_rate": 1.4153278455861482e-05, "loss": 7.236190140247345e-06, "step": 302560 }, { "epoch": 85.88418961112689, "grad_norm": 0.009593065828084946, "learning_rate": 1.4150439965938122e-05, "loss": 1.2966617941856384e-05, "step": 302570 }, { "epoch": 85.88702810105025, "grad_norm": 0.03239523246884346, "learning_rate": 1.414760147601476e-05, "loss": 6.015412509441376e-05, "step": 302580 }, { "epoch": 85.8898665909736, "grad_norm": 0.0003253621980547905, "learning_rate": 1.41447629860914e-05, "loss": 8.142739534378052e-06, "step": 302590 }, { "epoch": 85.89270508089696, "grad_norm": 0.002844348317012191, "learning_rate": 1.4141924496168038e-05, "loss": 1.0975636541843414e-05, "step": 302600 }, { "epoch": 85.89554357082032, "grad_norm": 0.0010184801649302244, "learning_rate": 1.413908600624468e-05, "loss": 1.3477914035320281e-05, "step": 302610 }, { "epoch": 85.89838206074369, "grad_norm": 0.0028895926661789417, "learning_rate": 1.4136247516321319e-05, "loss": 2.114269882440567e-05, "step": 302620 }, { "epoch": 85.90122055066705, "grad_norm": 0.0045772758312523365, "learning_rate": 1.4133409026397957e-05, "loss": 1.566056162118912e-05, "step": 302630 }, { "epoch": 85.90405904059041, "grad_norm": 0.013819762505590916, "learning_rate": 1.4130570536474597e-05, "loss": 9.968318045139312e-06, "step": 302640 }, { "epoch": 85.90689753051376, "grad_norm": 0.0013315179385244846, "learning_rate": 1.4127732046551234e-05, "loss": 7.7003613114357e-06, "step": 302650 }, { "epoch": 85.90973602043712, "grad_norm": 0.0016257205279543996, "learning_rate": 1.4124893556627874e-05, "loss": 1.7772242426872253e-05, "step": 302660 }, { "epoch": 85.91257451036049, "grad_norm": 0.003404037794098258, "learning_rate": 1.4122055066704516e-05, "loss": 1.9065476953983308e-05, "step": 302670 }, { "epoch": 85.91541300028385, "grad_norm": 0.0012970013776794076, "learning_rate": 1.4119216576781152e-05, "loss": 1.1545605957508087e-05, "step": 302680 }, { "epoch": 85.91825149020721, "grad_norm": 0.0020340499468147755, "learning_rate": 1.4116378086857793e-05, "loss": 7.566623389720917e-06, "step": 302690 }, { "epoch": 85.92108998013057, "grad_norm": 0.002523487200960517, "learning_rate": 1.411353959693443e-05, "loss": 7.64746218919754e-06, "step": 302700 }, { "epoch": 85.92392847005394, "grad_norm": 0.010318014770746231, "learning_rate": 1.4110701107011071e-05, "loss": 1.1218897998332978e-05, "step": 302710 }, { "epoch": 85.92676695997729, "grad_norm": 0.0013868426904082298, "learning_rate": 1.410786261708771e-05, "loss": 1.3970211148262024e-05, "step": 302720 }, { "epoch": 85.92960544990065, "grad_norm": 0.0021583260968327522, "learning_rate": 1.4105024127164349e-05, "loss": 1.5730969607830048e-05, "step": 302730 }, { "epoch": 85.93244393982401, "grad_norm": 0.00207411777228117, "learning_rate": 1.4102185637240988e-05, "loss": 5.688145756721497e-06, "step": 302740 }, { "epoch": 85.93528242974737, "grad_norm": 0.0031087796669453382, "learning_rate": 1.4099347147317626e-05, "loss": 8.012540638446808e-06, "step": 302750 }, { "epoch": 85.93812091967074, "grad_norm": 0.0021629356779158115, "learning_rate": 1.4096508657394266e-05, "loss": 1.0611303150653838e-05, "step": 302760 }, { "epoch": 85.9409594095941, "grad_norm": 0.0010838222224265337, "learning_rate": 1.4093670167470907e-05, "loss": 6.151199340820313e-06, "step": 302770 }, { "epoch": 85.94379789951746, "grad_norm": 0.0003055390261579305, "learning_rate": 1.4090831677547545e-05, "loss": 5.56018203496933e-06, "step": 302780 }, { "epoch": 85.94663638944081, "grad_norm": 0.0014768186956644058, "learning_rate": 1.4087993187624185e-05, "loss": 1.1984072625637054e-05, "step": 302790 }, { "epoch": 85.94947487936417, "grad_norm": 0.0010761383455246687, "learning_rate": 1.4085154697700823e-05, "loss": 2.5628134608268738e-05, "step": 302800 }, { "epoch": 85.95231336928754, "grad_norm": 0.005247002933174372, "learning_rate": 1.4082316207777463e-05, "loss": 8.540414273738862e-06, "step": 302810 }, { "epoch": 85.9551518592109, "grad_norm": 0.0013088566483929753, "learning_rate": 1.4079477717854104e-05, "loss": 1.0094046592712402e-05, "step": 302820 }, { "epoch": 85.95799034913426, "grad_norm": 0.0010328888893127441, "learning_rate": 1.407663922793074e-05, "loss": 1.735072582960129e-05, "step": 302830 }, { "epoch": 85.96082883905763, "grad_norm": 0.0010305471951141953, "learning_rate": 1.4073800738007382e-05, "loss": 4.482455551624298e-06, "step": 302840 }, { "epoch": 85.96366732898099, "grad_norm": 0.0013724328018724918, "learning_rate": 1.4070962248084018e-05, "loss": 6.103143095970154e-06, "step": 302850 }, { "epoch": 85.96650581890434, "grad_norm": 0.0002420678356429562, "learning_rate": 1.406812375816066e-05, "loss": 9.23536717891693e-06, "step": 302860 }, { "epoch": 85.9693443088277, "grad_norm": 0.00046586734242737293, "learning_rate": 1.4065285268237299e-05, "loss": 6.522238254547119e-06, "step": 302870 }, { "epoch": 85.97218279875106, "grad_norm": 0.0008913048077374697, "learning_rate": 1.4062446778313937e-05, "loss": 1.853257417678833e-05, "step": 302880 }, { "epoch": 85.97502128867443, "grad_norm": 0.002810603938996792, "learning_rate": 1.4059608288390577e-05, "loss": 7.511675357818604e-06, "step": 302890 }, { "epoch": 85.97785977859779, "grad_norm": 0.0007352735265158117, "learning_rate": 1.4056769798467215e-05, "loss": 6.198883056640625e-06, "step": 302900 }, { "epoch": 85.98069826852115, "grad_norm": 0.000260386907029897, "learning_rate": 1.4053931308543854e-05, "loss": 6.7804008722305294e-06, "step": 302910 }, { "epoch": 85.9835367584445, "grad_norm": 0.0008406075066886842, "learning_rate": 1.4051092818620496e-05, "loss": 7.138028740882873e-06, "step": 302920 }, { "epoch": 85.98637524836786, "grad_norm": 0.00541441747918725, "learning_rate": 1.4048254328697134e-05, "loss": 7.50422477722168e-06, "step": 302930 }, { "epoch": 85.98921373829123, "grad_norm": 0.002777345245704055, "learning_rate": 1.4045415838773773e-05, "loss": 4.5248493552207944e-05, "step": 302940 }, { "epoch": 85.99205222821459, "grad_norm": 0.0009222063818015158, "learning_rate": 1.4042577348850411e-05, "loss": 6.206631660461426e-05, "step": 302950 }, { "epoch": 85.99489071813795, "grad_norm": 0.0015374477952718735, "learning_rate": 1.4039738858927051e-05, "loss": 1.582801342010498e-05, "step": 302960 }, { "epoch": 85.99772920806132, "grad_norm": 0.0056497519835829735, "learning_rate": 1.403690036900369e-05, "loss": 9.502470493316651e-06, "step": 302970 }, { "epoch": 86.00056769798468, "grad_norm": 0.0023207589983940125, "learning_rate": 1.4034061879080329e-05, "loss": 8.491866901749745e-06, "step": 302980 }, { "epoch": 86.00340618790803, "grad_norm": 0.017717137932777405, "learning_rate": 1.403122338915697e-05, "loss": 1.156441867351532e-05, "step": 302990 }, { "epoch": 86.00624467783139, "grad_norm": 0.0010099263163283467, "learning_rate": 1.402838489923361e-05, "loss": 5.467794835567474e-06, "step": 303000 }, { "epoch": 86.00624467783139, "eval_accuracy": 0.9886818846569594, "eval_loss": 0.0505259670317173, "eval_runtime": 35.1044, "eval_samples_per_second": 448.007, "eval_steps_per_second": 7.008, "step": 303000 }, { "epoch": 86.00908316775475, "grad_norm": 0.002547227079048753, "learning_rate": 1.4025546409310248e-05, "loss": 1.0691024363040924e-05, "step": 303010 }, { "epoch": 86.01192165767812, "grad_norm": 0.030098799616098404, "learning_rate": 1.4022707919386887e-05, "loss": 8.11442732810974e-06, "step": 303020 }, { "epoch": 86.01476014760148, "grad_norm": 0.0033774266485124826, "learning_rate": 1.4019869429463525e-05, "loss": 8.898042142391206e-06, "step": 303030 }, { "epoch": 86.01759863752484, "grad_norm": 0.0045858509838581085, "learning_rate": 1.4017030939540165e-05, "loss": 7.378309965133667e-06, "step": 303040 }, { "epoch": 86.0204371274482, "grad_norm": 0.0017627030611038208, "learning_rate": 1.4014192449616806e-05, "loss": 6.291456520557403e-06, "step": 303050 }, { "epoch": 86.02327561737155, "grad_norm": 0.0007537876372225583, "learning_rate": 1.4011353959693443e-05, "loss": 5.295127630233765e-06, "step": 303060 }, { "epoch": 86.02611410729492, "grad_norm": 0.0017739713657647371, "learning_rate": 1.4008515469770084e-05, "loss": 5.145370960235596e-06, "step": 303070 }, { "epoch": 86.02895259721828, "grad_norm": 0.0014824175741523504, "learning_rate": 1.4005676979846722e-05, "loss": 8.676387369632722e-06, "step": 303080 }, { "epoch": 86.03179108714164, "grad_norm": 0.008602323941886425, "learning_rate": 1.4002838489923362e-05, "loss": 9.42908227443695e-06, "step": 303090 }, { "epoch": 86.034629577065, "grad_norm": 0.004922698717564344, "learning_rate": 1.4000000000000001e-05, "loss": 9.015016257762908e-06, "step": 303100 }, { "epoch": 86.03746806698837, "grad_norm": 0.0007656702655367553, "learning_rate": 1.399716151007664e-05, "loss": 7.68713653087616e-06, "step": 303110 }, { "epoch": 86.04030655691172, "grad_norm": 0.0017431556480005383, "learning_rate": 1.3994323020153279e-05, "loss": 1.9306503236293793e-05, "step": 303120 }, { "epoch": 86.04314504683508, "grad_norm": 0.00032481533708050847, "learning_rate": 1.3991484530229917e-05, "loss": 4.112347960472107e-06, "step": 303130 }, { "epoch": 86.04598353675844, "grad_norm": 0.0012765894643962383, "learning_rate": 1.3988646040306559e-05, "loss": 9.2165544629097e-06, "step": 303140 }, { "epoch": 86.0488220266818, "grad_norm": 0.01430998183786869, "learning_rate": 1.3985807550383198e-05, "loss": 8.872337639331818e-06, "step": 303150 }, { "epoch": 86.05166051660517, "grad_norm": 0.0013598816003650427, "learning_rate": 1.3982969060459836e-05, "loss": 1.1052936315536499e-05, "step": 303160 }, { "epoch": 86.05449900652853, "grad_norm": 0.003309833351522684, "learning_rate": 1.3980130570536476e-05, "loss": 9.64626669883728e-06, "step": 303170 }, { "epoch": 86.0573374964519, "grad_norm": 0.009187857620418072, "learning_rate": 1.3977292080613114e-05, "loss": 8.543208241462708e-06, "step": 303180 }, { "epoch": 86.06017598637524, "grad_norm": 0.0005000638775527477, "learning_rate": 1.3974453590689754e-05, "loss": 1.9346922636032103e-05, "step": 303190 }, { "epoch": 86.0630144762986, "grad_norm": 0.0005021620308980346, "learning_rate": 1.3971615100766395e-05, "loss": 6.723962724208832e-06, "step": 303200 }, { "epoch": 86.06585296622197, "grad_norm": 0.0009365727892145514, "learning_rate": 1.3968776610843031e-05, "loss": 1.4180317521095276e-05, "step": 303210 }, { "epoch": 86.06869145614533, "grad_norm": 0.002887303475290537, "learning_rate": 1.3965938120919673e-05, "loss": 9.091384708881377e-06, "step": 303220 }, { "epoch": 86.0715299460687, "grad_norm": 0.004518670961260796, "learning_rate": 1.3963099630996309e-05, "loss": 9.036064147949218e-06, "step": 303230 }, { "epoch": 86.07436843599206, "grad_norm": 0.0009551118710078299, "learning_rate": 1.396026114107295e-05, "loss": 6.442330777645111e-06, "step": 303240 }, { "epoch": 86.07720692591542, "grad_norm": 0.010785624384880066, "learning_rate": 1.395742265114959e-05, "loss": 1.330692321062088e-05, "step": 303250 }, { "epoch": 86.08004541583877, "grad_norm": 0.001883447403088212, "learning_rate": 1.3954584161226228e-05, "loss": 6.641633808612823e-06, "step": 303260 }, { "epoch": 86.08288390576213, "grad_norm": 0.002129019470885396, "learning_rate": 1.3951745671302868e-05, "loss": 7.139518857002258e-06, "step": 303270 }, { "epoch": 86.0857223956855, "grad_norm": 0.0029935266356915236, "learning_rate": 1.3948907181379506e-05, "loss": 5.964934825897217e-06, "step": 303280 }, { "epoch": 86.08856088560886, "grad_norm": 0.00039093627128750086, "learning_rate": 1.3946068691456147e-05, "loss": 7.6159834861755374e-06, "step": 303290 }, { "epoch": 86.09139937553222, "grad_norm": 0.0005833820323459804, "learning_rate": 1.3943230201532787e-05, "loss": 9.043887257575988e-06, "step": 303300 }, { "epoch": 86.09423786545558, "grad_norm": 0.003161606378853321, "learning_rate": 1.3940391711609425e-05, "loss": 6.299279630184174e-06, "step": 303310 }, { "epoch": 86.09707635537895, "grad_norm": 0.0015029306523501873, "learning_rate": 1.3937553221686064e-05, "loss": 4.884786903858185e-06, "step": 303320 }, { "epoch": 86.0999148453023, "grad_norm": 0.0006765538710169494, "learning_rate": 1.3934714731762702e-05, "loss": 5.9621408581733705e-06, "step": 303330 }, { "epoch": 86.10275333522566, "grad_norm": 0.0010510656284168363, "learning_rate": 1.3931876241839342e-05, "loss": 8.8471919298172e-06, "step": 303340 }, { "epoch": 86.10559182514902, "grad_norm": 0.0007923958473838866, "learning_rate": 1.3929037751915983e-05, "loss": 8.540041744709014e-06, "step": 303350 }, { "epoch": 86.10843031507238, "grad_norm": 0.0005712692509405315, "learning_rate": 1.392619926199262e-05, "loss": 8.981488645076751e-06, "step": 303360 }, { "epoch": 86.11126880499575, "grad_norm": 0.0012018008856102824, "learning_rate": 1.3923360772069261e-05, "loss": 7.564947009086609e-06, "step": 303370 }, { "epoch": 86.11410729491911, "grad_norm": 0.0012080310843884945, "learning_rate": 1.3920522282145897e-05, "loss": 7.104501128196716e-06, "step": 303380 }, { "epoch": 86.11694578484246, "grad_norm": 0.0003592479624785483, "learning_rate": 1.3917683792222539e-05, "loss": 7.2177499532699585e-06, "step": 303390 }, { "epoch": 86.11978427476582, "grad_norm": 0.0009875368559733033, "learning_rate": 1.3914845302299178e-05, "loss": 1.283697783946991e-05, "step": 303400 }, { "epoch": 86.12262276468918, "grad_norm": 0.008859383873641491, "learning_rate": 1.3912006812375816e-05, "loss": 1.2042187154293061e-05, "step": 303410 }, { "epoch": 86.12546125461255, "grad_norm": 0.0018682569498196244, "learning_rate": 1.3909168322452456e-05, "loss": 8.278712630271912e-06, "step": 303420 }, { "epoch": 86.12829974453591, "grad_norm": 0.002233080333098769, "learning_rate": 1.3906329832529094e-05, "loss": 4.085339605808258e-06, "step": 303430 }, { "epoch": 86.13113823445927, "grad_norm": 0.0006056365673430264, "learning_rate": 1.3903491342605734e-05, "loss": 8.367374539375305e-06, "step": 303440 }, { "epoch": 86.13397672438263, "grad_norm": 0.0019230797188356519, "learning_rate": 1.3900652852682375e-05, "loss": 9.582564234733582e-06, "step": 303450 }, { "epoch": 86.13681521430598, "grad_norm": 0.0011974876979365945, "learning_rate": 1.3897814362759013e-05, "loss": 1.0358355939388276e-05, "step": 303460 }, { "epoch": 86.13965370422935, "grad_norm": 0.001766644068993628, "learning_rate": 1.3894975872835653e-05, "loss": 6.057322025299072e-06, "step": 303470 }, { "epoch": 86.14249219415271, "grad_norm": 0.0013086829567328095, "learning_rate": 1.389213738291229e-05, "loss": 5.1781535148620605e-06, "step": 303480 }, { "epoch": 86.14533068407607, "grad_norm": 0.00877348706126213, "learning_rate": 1.388929889298893e-05, "loss": 8.918344974517822e-06, "step": 303490 }, { "epoch": 86.14816917399943, "grad_norm": 0.0018286181148141623, "learning_rate": 1.3886460403065572e-05, "loss": 7.697194814682006e-06, "step": 303500 }, { "epoch": 86.14816917399943, "eval_accuracy": 0.9889362243275895, "eval_loss": 0.049131255596876144, "eval_runtime": 35.8164, "eval_samples_per_second": 439.1, "eval_steps_per_second": 6.868, "step": 303500 }, { "epoch": 86.1510076639228, "grad_norm": 0.0013737572589889169, "learning_rate": 1.3883621913142208e-05, "loss": 5.025975406169892e-06, "step": 303510 }, { "epoch": 86.15384615384616, "grad_norm": 0.0008787747356109321, "learning_rate": 1.388078342321885e-05, "loss": 1.047905534505844e-05, "step": 303520 }, { "epoch": 86.15668464376951, "grad_norm": 0.001911195577122271, "learning_rate": 1.3877944933295486e-05, "loss": 3.934837877750397e-06, "step": 303530 }, { "epoch": 86.15952313369287, "grad_norm": 0.0016731767682358623, "learning_rate": 1.3875106443372127e-05, "loss": 7.1158632636070255e-06, "step": 303540 }, { "epoch": 86.16236162361623, "grad_norm": 0.0023926591966301203, "learning_rate": 1.3872267953448767e-05, "loss": 9.506754577159881e-06, "step": 303550 }, { "epoch": 86.1652001135396, "grad_norm": 0.001310504274442792, "learning_rate": 1.3869429463525405e-05, "loss": 6.115250289440155e-06, "step": 303560 }, { "epoch": 86.16803860346296, "grad_norm": 0.004609816707670689, "learning_rate": 1.3866590973602044e-05, "loss": 6.080605089664459e-06, "step": 303570 }, { "epoch": 86.17087709338632, "grad_norm": 0.0012452502269297838, "learning_rate": 1.3863752483678682e-05, "loss": 5.9889629483222965e-06, "step": 303580 }, { "epoch": 86.17371558330967, "grad_norm": 0.03184361383318901, "learning_rate": 1.3860913993755322e-05, "loss": 8.938089013099671e-06, "step": 303590 }, { "epoch": 86.17655407323304, "grad_norm": 0.0020997195970267057, "learning_rate": 1.3858075503831963e-05, "loss": 6.853602826595306e-06, "step": 303600 }, { "epoch": 86.1793925631564, "grad_norm": 0.003705674083903432, "learning_rate": 1.3855237013908601e-05, "loss": 5.798973143100739e-06, "step": 303610 }, { "epoch": 86.18223105307976, "grad_norm": 0.002317059552296996, "learning_rate": 1.3852398523985241e-05, "loss": 8.346140384674073e-06, "step": 303620 }, { "epoch": 86.18506954300312, "grad_norm": 0.004815945401787758, "learning_rate": 1.3849560034061879e-05, "loss": 9.01389867067337e-06, "step": 303630 }, { "epoch": 86.18790803292649, "grad_norm": 0.0049704136326909065, "learning_rate": 1.3846721544138519e-05, "loss": 9.535439312458039e-06, "step": 303640 }, { "epoch": 86.19074652284985, "grad_norm": 0.002713866764679551, "learning_rate": 1.3843883054215159e-05, "loss": 6.1247497797012326e-06, "step": 303650 }, { "epoch": 86.1935850127732, "grad_norm": 0.0027111393865197897, "learning_rate": 1.3841044564291796e-05, "loss": 1.0765716433525086e-05, "step": 303660 }, { "epoch": 86.19642350269656, "grad_norm": 0.0019302328582853079, "learning_rate": 1.3838206074368438e-05, "loss": 6.157532334327698e-06, "step": 303670 }, { "epoch": 86.19926199261992, "grad_norm": 0.001814831979572773, "learning_rate": 1.3835367584445074e-05, "loss": 6.403215229511261e-06, "step": 303680 }, { "epoch": 86.20210048254329, "grad_norm": 0.0007814955315552652, "learning_rate": 1.3832529094521716e-05, "loss": 3.8098543882369997e-06, "step": 303690 }, { "epoch": 86.20493897246665, "grad_norm": 0.0033874856308102608, "learning_rate": 1.3829690604598355e-05, "loss": 7.191672921180725e-06, "step": 303700 }, { "epoch": 86.20777746239001, "grad_norm": 0.0023122753482311964, "learning_rate": 1.3826852114674993e-05, "loss": 6.189197301864624e-06, "step": 303710 }, { "epoch": 86.21061595231338, "grad_norm": 0.0006939530721865594, "learning_rate": 1.3824013624751633e-05, "loss": 6.075389683246612e-06, "step": 303720 }, { "epoch": 86.21345444223672, "grad_norm": 0.011555527336895466, "learning_rate": 1.3821175134828271e-05, "loss": 7.555820047855377e-06, "step": 303730 }, { "epoch": 86.21629293216009, "grad_norm": 0.002766135148704052, "learning_rate": 1.381833664490491e-05, "loss": 7.0909038186073305e-06, "step": 303740 }, { "epoch": 86.21913142208345, "grad_norm": 0.0020666359923779964, "learning_rate": 1.3815498154981552e-05, "loss": 8.973851799964905e-06, "step": 303750 }, { "epoch": 86.22196991200681, "grad_norm": 0.0008044902933761477, "learning_rate": 1.381265966505819e-05, "loss": 6.661005318164825e-06, "step": 303760 }, { "epoch": 86.22480840193018, "grad_norm": 0.0010164310224354267, "learning_rate": 1.380982117513483e-05, "loss": 3.7008896470069887e-06, "step": 303770 }, { "epoch": 86.22764689185354, "grad_norm": 0.0015438480768352747, "learning_rate": 1.3806982685211468e-05, "loss": 9.188242256641387e-06, "step": 303780 }, { "epoch": 86.2304853817769, "grad_norm": 0.0005285990191623569, "learning_rate": 1.3804144195288107e-05, "loss": 1.4250725507736206e-05, "step": 303790 }, { "epoch": 86.23332387170025, "grad_norm": 0.010385924950242043, "learning_rate": 1.3801305705364747e-05, "loss": 1.1066347360610962e-05, "step": 303800 }, { "epoch": 86.23616236162361, "grad_norm": 0.0007863845094107091, "learning_rate": 1.3798467215441385e-05, "loss": 6.555952131748199e-06, "step": 303810 }, { "epoch": 86.23900085154698, "grad_norm": 0.0007922782097011805, "learning_rate": 1.3795628725518026e-05, "loss": 6.405822932720184e-06, "step": 303820 }, { "epoch": 86.24183934147034, "grad_norm": 0.0007715547690168023, "learning_rate": 1.3792790235594663e-05, "loss": 3.1344592571258544e-06, "step": 303830 }, { "epoch": 86.2446778313937, "grad_norm": 0.011826158501207829, "learning_rate": 1.3789951745671304e-05, "loss": 7.082335650920868e-06, "step": 303840 }, { "epoch": 86.24751632131706, "grad_norm": 0.000540306675247848, "learning_rate": 1.3787113255747944e-05, "loss": 3.547966480255127e-06, "step": 303850 }, { "epoch": 86.25035481124041, "grad_norm": 0.001867293962277472, "learning_rate": 1.3784274765824582e-05, "loss": 8.652545511722565e-06, "step": 303860 }, { "epoch": 86.25319330116378, "grad_norm": 0.0014311035629361868, "learning_rate": 1.3781436275901221e-05, "loss": 4.627183079719543e-06, "step": 303870 }, { "epoch": 86.25603179108714, "grad_norm": 0.0007154273916967213, "learning_rate": 1.377859778597786e-05, "loss": 6.300769746303558e-06, "step": 303880 }, { "epoch": 86.2588702810105, "grad_norm": 0.0006786973099224269, "learning_rate": 1.3775759296054499e-05, "loss": 4.834681749343872e-06, "step": 303890 }, { "epoch": 86.26170877093386, "grad_norm": 0.00203166576102376, "learning_rate": 1.377292080613114e-05, "loss": 4.334934055805207e-06, "step": 303900 }, { "epoch": 86.26454726085723, "grad_norm": 0.0032645692117512226, "learning_rate": 1.3770082316207777e-05, "loss": 8.531846106052399e-06, "step": 303910 }, { "epoch": 86.26738575078059, "grad_norm": 0.010925335809588432, "learning_rate": 1.3767243826284418e-05, "loss": 9.795092046260833e-06, "step": 303920 }, { "epoch": 86.27022424070394, "grad_norm": 0.0022711302153766155, "learning_rate": 1.3764405336361056e-05, "loss": 9.372085332870484e-06, "step": 303930 }, { "epoch": 86.2730627306273, "grad_norm": 0.0015744512202218175, "learning_rate": 1.3761566846437696e-05, "loss": 1.0385178029537201e-05, "step": 303940 }, { "epoch": 86.27590122055067, "grad_norm": 0.0031999440398067236, "learning_rate": 1.3758728356514335e-05, "loss": 5.626305937767029e-06, "step": 303950 }, { "epoch": 86.27873971047403, "grad_norm": 0.0026588321197777987, "learning_rate": 1.3755889866590973e-05, "loss": 9.317323565483093e-06, "step": 303960 }, { "epoch": 86.28157820039739, "grad_norm": 0.0036507337354123592, "learning_rate": 1.3753051376667615e-05, "loss": 4.7247856855392454e-06, "step": 303970 }, { "epoch": 86.28441669032075, "grad_norm": 0.0023295998107641935, "learning_rate": 1.3750212886744251e-05, "loss": 5.824118852615357e-06, "step": 303980 }, { "epoch": 86.28725518024412, "grad_norm": 0.000570777163375169, "learning_rate": 1.3747374396820892e-05, "loss": 1.0204501450061798e-05, "step": 303990 }, { "epoch": 86.29009367016747, "grad_norm": 0.0011779536725953221, "learning_rate": 1.3744535906897532e-05, "loss": 8.0866739153862e-06, "step": 304000 }, { "epoch": 86.29009367016747, "eval_accuracy": 0.9889362243275895, "eval_loss": 0.04873745143413544, "eval_runtime": 35.0625, "eval_samples_per_second": 448.543, "eval_steps_per_second": 7.016, "step": 304000 }, { "epoch": 86.29293216009083, "grad_norm": 0.016916902735829353, "learning_rate": 1.374169741697417e-05, "loss": 5.522556602954865e-06, "step": 304010 }, { "epoch": 86.29577065001419, "grad_norm": 0.006939336657524109, "learning_rate": 1.373885892705081e-05, "loss": 7.763318717479705e-06, "step": 304020 }, { "epoch": 86.29860913993755, "grad_norm": 0.0012746196007356048, "learning_rate": 1.3736020437127448e-05, "loss": 7.510371506214142e-06, "step": 304030 }, { "epoch": 86.30144762986092, "grad_norm": 0.0004479030321817845, "learning_rate": 1.3733181947204087e-05, "loss": 6.533786654472351e-06, "step": 304040 }, { "epoch": 86.30428611978428, "grad_norm": 0.009600969962775707, "learning_rate": 1.3730343457280729e-05, "loss": 0.0002156313508749008, "step": 304050 }, { "epoch": 86.30712460970764, "grad_norm": 0.0012298761866986752, "learning_rate": 1.3727504967357365e-05, "loss": 0.0011167909950017929, "step": 304060 }, { "epoch": 86.30996309963099, "grad_norm": 0.004582318942993879, "learning_rate": 1.3724666477434006e-05, "loss": 3.845766186714172e-05, "step": 304070 }, { "epoch": 86.31280158955435, "grad_norm": 0.010267689824104309, "learning_rate": 1.3721827987510644e-05, "loss": 0.0022250674664974213, "step": 304080 }, { "epoch": 86.31564007947772, "grad_norm": 0.002600135514512658, "learning_rate": 1.3718989497587284e-05, "loss": 0.00018152389675378798, "step": 304090 }, { "epoch": 86.31847856940108, "grad_norm": 0.0026771831326186657, "learning_rate": 1.3716151007663924e-05, "loss": 0.0003871714696288109, "step": 304100 }, { "epoch": 86.32131705932444, "grad_norm": 0.00293828290887177, "learning_rate": 1.3713312517740562e-05, "loss": 0.002485005743801594, "step": 304110 }, { "epoch": 86.3241555492478, "grad_norm": 0.5251287817955017, "learning_rate": 1.3710474027817201e-05, "loss": 0.00014055147767066956, "step": 304120 }, { "epoch": 86.32699403917115, "grad_norm": 0.0056435721926391125, "learning_rate": 1.370763553789384e-05, "loss": 0.000163971446454525, "step": 304130 }, { "epoch": 86.32983252909452, "grad_norm": 0.002286753384396434, "learning_rate": 1.370479704797048e-05, "loss": 0.00035459771752357485, "step": 304140 }, { "epoch": 86.33267101901788, "grad_norm": 0.010007848031818867, "learning_rate": 1.370195855804712e-05, "loss": 7.669627666473389e-05, "step": 304150 }, { "epoch": 86.33550950894124, "grad_norm": 0.05266386643052101, "learning_rate": 1.3699120068123759e-05, "loss": 7.402654737234115e-05, "step": 304160 }, { "epoch": 86.3383479988646, "grad_norm": 0.011086699552834034, "learning_rate": 1.3696281578200398e-05, "loss": 7.504615932703018e-05, "step": 304170 }, { "epoch": 86.34118648878797, "grad_norm": 0.004221046343445778, "learning_rate": 1.3693443088277036e-05, "loss": 9.947121143341064e-05, "step": 304180 }, { "epoch": 86.34402497871133, "grad_norm": 0.6501717567443848, "learning_rate": 1.3690604598353676e-05, "loss": 9.356960654258729e-05, "step": 304190 }, { "epoch": 86.34686346863468, "grad_norm": 0.017023170366883278, "learning_rate": 1.3687766108430317e-05, "loss": 0.0004098625853657722, "step": 304200 }, { "epoch": 86.34970195855804, "grad_norm": 4.183040142059326, "learning_rate": 1.3684927618506954e-05, "loss": 0.0006202584132552147, "step": 304210 }, { "epoch": 86.3525404484814, "grad_norm": 0.6903095245361328, "learning_rate": 1.3682089128583595e-05, "loss": 0.00045913662761449815, "step": 304220 }, { "epoch": 86.35537893840477, "grad_norm": 0.018078459426760674, "learning_rate": 1.3679250638660233e-05, "loss": 0.004834090918302536, "step": 304230 }, { "epoch": 86.35821742832813, "grad_norm": 0.06543348729610443, "learning_rate": 1.3676412148736873e-05, "loss": 3.0197203159332275e-05, "step": 304240 }, { "epoch": 86.3610559182515, "grad_norm": 0.023567823693156242, "learning_rate": 1.3673573658813512e-05, "loss": 0.00021329186856746673, "step": 304250 }, { "epoch": 86.36389440817486, "grad_norm": 0.3554718792438507, "learning_rate": 1.367073516889015e-05, "loss": 5.863923579454422e-05, "step": 304260 }, { "epoch": 86.3667328980982, "grad_norm": 0.019504178315401077, "learning_rate": 1.366789667896679e-05, "loss": 0.0005781665444374084, "step": 304270 }, { "epoch": 86.36957138802157, "grad_norm": 0.0012392632197588682, "learning_rate": 1.3665058189043431e-05, "loss": 1.5233829617500306e-05, "step": 304280 }, { "epoch": 86.37240987794493, "grad_norm": 0.005032598040997982, "learning_rate": 1.366221969912007e-05, "loss": 6.0323439538478854e-05, "step": 304290 }, { "epoch": 86.3752483678683, "grad_norm": 0.012427417561411858, "learning_rate": 1.3659381209196709e-05, "loss": 6.563067436218261e-05, "step": 304300 }, { "epoch": 86.37808685779166, "grad_norm": 0.019686266779899597, "learning_rate": 1.3656542719273347e-05, "loss": 2.2701174020767213e-05, "step": 304310 }, { "epoch": 86.38092534771502, "grad_norm": 0.0004050050338264555, "learning_rate": 1.3653704229349987e-05, "loss": 6.721466779708862e-05, "step": 304320 }, { "epoch": 86.38376383763837, "grad_norm": 0.0017194027313962579, "learning_rate": 1.3650865739426626e-05, "loss": 2.561751753091812e-05, "step": 304330 }, { "epoch": 86.38660232756173, "grad_norm": 0.008747881278395653, "learning_rate": 1.3648027249503264e-05, "loss": 5.895942449569702e-05, "step": 304340 }, { "epoch": 86.3894408174851, "grad_norm": 0.001758531085215509, "learning_rate": 1.3645188759579906e-05, "loss": 2.2177770733833314e-05, "step": 304350 }, { "epoch": 86.39227930740846, "grad_norm": 0.029425732791423798, "learning_rate": 1.3642350269656542e-05, "loss": 1.6056187450885773e-05, "step": 304360 }, { "epoch": 86.39511779733182, "grad_norm": 0.0013982829404994845, "learning_rate": 1.3639511779733183e-05, "loss": 6.066262722015381e-06, "step": 304370 }, { "epoch": 86.39795628725518, "grad_norm": 0.0018504924373701215, "learning_rate": 1.3636673289809823e-05, "loss": 3.156177699565887e-05, "step": 304380 }, { "epoch": 86.40079477717855, "grad_norm": 0.0001695079408818856, "learning_rate": 1.3633834799886461e-05, "loss": 5.849506705999374e-05, "step": 304390 }, { "epoch": 86.4036332671019, "grad_norm": 0.003998348489403725, "learning_rate": 1.36309963099631e-05, "loss": 3.202594816684723e-05, "step": 304400 }, { "epoch": 86.40647175702526, "grad_norm": 0.0016179272206500173, "learning_rate": 1.3628157820039739e-05, "loss": 1.9183196127414703e-05, "step": 304410 }, { "epoch": 86.40931024694862, "grad_norm": 0.0024284040555357933, "learning_rate": 1.3625319330116378e-05, "loss": 1.1608563363552094e-05, "step": 304420 }, { "epoch": 86.41214873687198, "grad_norm": 0.002484785858541727, "learning_rate": 1.362248084019302e-05, "loss": 1.377742737531662e-05, "step": 304430 }, { "epoch": 86.41498722679535, "grad_norm": 0.01280968263745308, "learning_rate": 1.3619642350269656e-05, "loss": 1.1414848268032074e-05, "step": 304440 }, { "epoch": 86.41782571671871, "grad_norm": 0.003171416698023677, "learning_rate": 1.3616803860346297e-05, "loss": 1.5292130410671233e-05, "step": 304450 }, { "epoch": 86.42066420664207, "grad_norm": 0.0008865776471793652, "learning_rate": 1.3613965370422935e-05, "loss": 1.3669207692146302e-05, "step": 304460 }, { "epoch": 86.42350269656542, "grad_norm": 0.0001437761093256995, "learning_rate": 1.3611126880499575e-05, "loss": 7.575750350952148e-06, "step": 304470 }, { "epoch": 86.42634118648878, "grad_norm": 0.0007435408188030124, "learning_rate": 1.3608288390576215e-05, "loss": 1.0781176388263703e-05, "step": 304480 }, { "epoch": 86.42917967641215, "grad_norm": 0.0022271343041211367, "learning_rate": 1.3605449900652853e-05, "loss": 7.90400430560112e-05, "step": 304490 }, { "epoch": 86.43201816633551, "grad_norm": 0.0023637954145669937, "learning_rate": 1.3602611410729494e-05, "loss": 2.175401896238327e-05, "step": 304500 }, { "epoch": 86.43201816633551, "eval_accuracy": 0.987918865645069, "eval_loss": 0.05317056551575661, "eval_runtime": 35.8515, "eval_samples_per_second": 438.67, "eval_steps_per_second": 6.862, "step": 304500 }, { "epoch": 86.43485665625887, "grad_norm": 0.0013668478932231665, "learning_rate": 1.359977292080613e-05, "loss": 1.470521092414856e-05, "step": 304510 }, { "epoch": 86.43769514618224, "grad_norm": 0.0017877588979899883, "learning_rate": 1.3596934430882772e-05, "loss": 1.0441988706588745e-05, "step": 304520 }, { "epoch": 86.4405336361056, "grad_norm": 0.0030962140299379826, "learning_rate": 1.3594095940959411e-05, "loss": 0.00010686870664358138, "step": 304530 }, { "epoch": 86.44337212602895, "grad_norm": 0.005947619676589966, "learning_rate": 1.359125745103605e-05, "loss": 4.37738373875618e-05, "step": 304540 }, { "epoch": 86.44621061595231, "grad_norm": 0.0049042911268770695, "learning_rate": 1.3588418961112689e-05, "loss": 6.888583302497864e-05, "step": 304550 }, { "epoch": 86.44904910587567, "grad_norm": 0.0029820078052580357, "learning_rate": 1.3585580471189327e-05, "loss": 4.09858301281929e-05, "step": 304560 }, { "epoch": 86.45188759579904, "grad_norm": 0.0011123428121209145, "learning_rate": 1.3582741981265967e-05, "loss": 1.5511550009250642e-05, "step": 304570 }, { "epoch": 86.4547260857224, "grad_norm": 0.01629667542874813, "learning_rate": 1.3579903491342608e-05, "loss": 3.07643786072731e-05, "step": 304580 }, { "epoch": 86.45756457564576, "grad_norm": 0.0017998896073549986, "learning_rate": 1.3577065001419244e-05, "loss": 1.8322840332984924e-05, "step": 304590 }, { "epoch": 86.46040306556911, "grad_norm": 0.006459738593548536, "learning_rate": 1.3574226511495886e-05, "loss": 3.0163861811161042e-05, "step": 304600 }, { "epoch": 86.46324155549247, "grad_norm": 0.0009484119946137071, "learning_rate": 1.3571388021572524e-05, "loss": 1.4494918286800384e-05, "step": 304610 }, { "epoch": 86.46608004541584, "grad_norm": 0.02596045285463333, "learning_rate": 1.3568549531649163e-05, "loss": 1.463368535041809e-05, "step": 304620 }, { "epoch": 86.4689185353392, "grad_norm": 8.63558379933238e-05, "learning_rate": 1.3565711041725803e-05, "loss": 1.256503164768219e-05, "step": 304630 }, { "epoch": 86.47175702526256, "grad_norm": 0.001938337692990899, "learning_rate": 1.3562872551802441e-05, "loss": 1.5127286314964294e-05, "step": 304640 }, { "epoch": 86.47459551518592, "grad_norm": 0.0016902799252420664, "learning_rate": 1.356003406187908e-05, "loss": 9.400025010108948e-06, "step": 304650 }, { "epoch": 86.47743400510929, "grad_norm": 0.000917403434868902, "learning_rate": 1.3557195571955719e-05, "loss": 8.527562022209167e-06, "step": 304660 }, { "epoch": 86.48027249503264, "grad_norm": 0.00030945235630497336, "learning_rate": 1.355435708203236e-05, "loss": 1.1543743312358856e-05, "step": 304670 }, { "epoch": 86.483110984956, "grad_norm": 0.002806088188663125, "learning_rate": 1.3551518592109e-05, "loss": 3.343280404806137e-05, "step": 304680 }, { "epoch": 86.48594947487936, "grad_norm": 0.003101258771494031, "learning_rate": 1.3548680102185638e-05, "loss": 2.1651573479175568e-05, "step": 304690 }, { "epoch": 86.48878796480273, "grad_norm": 0.004030503798276186, "learning_rate": 1.3545841612262278e-05, "loss": 5.0030648708343506e-05, "step": 304700 }, { "epoch": 86.49162645472609, "grad_norm": 0.0022465481888502836, "learning_rate": 1.3543003122338916e-05, "loss": 1.7708539962768556e-05, "step": 304710 }, { "epoch": 86.49446494464945, "grad_norm": 0.0004288550990168005, "learning_rate": 1.3540164632415555e-05, "loss": 2.5039166212081908e-05, "step": 304720 }, { "epoch": 86.49730343457281, "grad_norm": 0.0021309431176632643, "learning_rate": 1.3537326142492197e-05, "loss": 1.406930387020111e-05, "step": 304730 }, { "epoch": 86.50014192449616, "grad_norm": 0.000864740286488086, "learning_rate": 1.3534487652568833e-05, "loss": 2.3065321147441865e-05, "step": 304740 }, { "epoch": 86.50298041441953, "grad_norm": 0.0042312731966376305, "learning_rate": 1.3531649162645474e-05, "loss": 6.721168756484985e-05, "step": 304750 }, { "epoch": 86.50581890434289, "grad_norm": 0.0012512056855484843, "learning_rate": 1.3528810672722112e-05, "loss": 2.3591890931129454e-05, "step": 304760 }, { "epoch": 86.50865739426625, "grad_norm": 0.00036407477455213666, "learning_rate": 1.3525972182798752e-05, "loss": 0.0026644179597496986, "step": 304770 }, { "epoch": 86.51149588418961, "grad_norm": 0.0013808811781927943, "learning_rate": 1.3523133692875392e-05, "loss": 8.169841021299362e-05, "step": 304780 }, { "epoch": 86.51433437411298, "grad_norm": 0.0007310586515814066, "learning_rate": 1.352029520295203e-05, "loss": 6.054900586605072e-06, "step": 304790 }, { "epoch": 86.51717286403633, "grad_norm": 0.00543652568012476, "learning_rate": 1.351745671302867e-05, "loss": 0.0001572364941239357, "step": 304800 }, { "epoch": 86.52001135395969, "grad_norm": 0.014873028732836246, "learning_rate": 1.3514618223105307e-05, "loss": 2.298150211572647e-05, "step": 304810 }, { "epoch": 86.52284984388305, "grad_norm": 0.0012679219944402575, "learning_rate": 1.3511779733181949e-05, "loss": 0.00013646502047777177, "step": 304820 }, { "epoch": 86.52568833380641, "grad_norm": 0.00830505508929491, "learning_rate": 1.3508941243258588e-05, "loss": 1.0389275848865509e-05, "step": 304830 }, { "epoch": 86.52852682372978, "grad_norm": 0.00808435957878828, "learning_rate": 1.3506102753335226e-05, "loss": 1.4853104948997497e-05, "step": 304840 }, { "epoch": 86.53136531365314, "grad_norm": 0.0009111741092056036, "learning_rate": 1.3503264263411866e-05, "loss": 2.0369887351989746e-05, "step": 304850 }, { "epoch": 86.5342038035765, "grad_norm": 0.016093693673610687, "learning_rate": 1.3500709622480843e-05, "loss": 0.007235416769981384, "step": 304860 }, { "epoch": 86.53704229349985, "grad_norm": 0.003073623636737466, "learning_rate": 1.349787113255748e-05, "loss": 0.00014258380979299546, "step": 304870 }, { "epoch": 86.53988078342321, "grad_norm": 0.0034070813562721014, "learning_rate": 1.349503264263412e-05, "loss": 1.422874629497528e-05, "step": 304880 }, { "epoch": 86.54271927334658, "grad_norm": 0.04764352738857269, "learning_rate": 1.3492194152710758e-05, "loss": 7.657594978809357e-05, "step": 304890 }, { "epoch": 86.54555776326994, "grad_norm": 0.0019665316212922335, "learning_rate": 1.3489355662787398e-05, "loss": 0.007669942826032639, "step": 304900 }, { "epoch": 86.5483962531933, "grad_norm": 0.004479939118027687, "learning_rate": 1.3486517172864038e-05, "loss": 0.0032763808965682985, "step": 304910 }, { "epoch": 86.55123474311667, "grad_norm": 0.025582492351531982, "learning_rate": 1.3483678682940676e-05, "loss": 1.8765032291412353e-05, "step": 304920 }, { "epoch": 86.55407323304003, "grad_norm": 0.009163694456219673, "learning_rate": 1.3480840193017317e-05, "loss": 1.625381410121918e-05, "step": 304930 }, { "epoch": 86.55691172296338, "grad_norm": 0.004019822925329208, "learning_rate": 1.3478001703093953e-05, "loss": 2.5312602519989014e-05, "step": 304940 }, { "epoch": 86.55975021288674, "grad_norm": 0.45516523718833923, "learning_rate": 1.3475163213170595e-05, "loss": 7.414501160383225e-05, "step": 304950 }, { "epoch": 86.5625887028101, "grad_norm": 0.09273114800453186, "learning_rate": 1.3472324723247234e-05, "loss": 2.75537371635437e-05, "step": 304960 }, { "epoch": 86.56542719273347, "grad_norm": 0.04879779368638992, "learning_rate": 1.3469486233323872e-05, "loss": 2.0587071776390076e-05, "step": 304970 }, { "epoch": 86.56826568265683, "grad_norm": 0.0015366518637165427, "learning_rate": 1.3466647743400512e-05, "loss": 5.878154188394546e-05, "step": 304980 }, { "epoch": 86.57110417258019, "grad_norm": 0.004640909843146801, "learning_rate": 1.346380925347715e-05, "loss": 6.82193785905838e-06, "step": 304990 }, { "epoch": 86.57394266250355, "grad_norm": 0.0018003074219450355, "learning_rate": 1.346097076355379e-05, "loss": 1.6207434237003325e-05, "step": 305000 }, { "epoch": 86.57394266250355, "eval_accuracy": 0.9876645259744389, "eval_loss": 0.05644866079092026, "eval_runtime": 39.1618, "eval_samples_per_second": 401.59, "eval_steps_per_second": 6.282, "step": 305000 }, { "epoch": 86.5767811524269, "grad_norm": 0.0035647840704768896, "learning_rate": 1.3458132273630431e-05, "loss": 1.4536269009113312e-05, "step": 305010 }, { "epoch": 86.57961964235027, "grad_norm": 0.0024189730174839497, "learning_rate": 1.3455293783707067e-05, "loss": 1.3611465692520142e-05, "step": 305020 }, { "epoch": 86.58245813227363, "grad_norm": 0.0033091390505433083, "learning_rate": 1.3452455293783709e-05, "loss": 9.795837104320527e-06, "step": 305030 }, { "epoch": 86.58529662219699, "grad_norm": 0.0013891522539779544, "learning_rate": 1.3449616803860347e-05, "loss": 5.335193127393722e-05, "step": 305040 }, { "epoch": 86.58813511212036, "grad_norm": 0.0006609429838135839, "learning_rate": 1.3446778313936986e-05, "loss": 1.2868456542491913e-05, "step": 305050 }, { "epoch": 86.59097360204372, "grad_norm": 0.0007204058929346502, "learning_rate": 1.3443939824013626e-05, "loss": 2.1767430007457732e-05, "step": 305060 }, { "epoch": 86.59381209196707, "grad_norm": 0.006798377726227045, "learning_rate": 1.3441101334090264e-05, "loss": 6.499048322439194e-05, "step": 305070 }, { "epoch": 86.59665058189043, "grad_norm": 0.004836432635784149, "learning_rate": 1.3438262844166905e-05, "loss": 1.8179230391979217e-05, "step": 305080 }, { "epoch": 86.59948907181379, "grad_norm": 0.0030653835274279118, "learning_rate": 1.3435424354243542e-05, "loss": 3.0343234539031982e-05, "step": 305090 }, { "epoch": 86.60232756173716, "grad_norm": 0.0022263515274971724, "learning_rate": 1.3432585864320183e-05, "loss": 2.4726986885070802e-05, "step": 305100 }, { "epoch": 86.60516605166052, "grad_norm": 0.0016524791717529297, "learning_rate": 1.3429747374396823e-05, "loss": 2.775769680738449e-05, "step": 305110 }, { "epoch": 86.60800454158388, "grad_norm": 0.003604060271754861, "learning_rate": 1.342690888447346e-05, "loss": 1.856200397014618e-05, "step": 305120 }, { "epoch": 86.61084303150724, "grad_norm": 0.013860652223229408, "learning_rate": 1.34240703945501e-05, "loss": 1.1369772255420685e-05, "step": 305130 }, { "epoch": 86.61368152143059, "grad_norm": 0.004659317899495363, "learning_rate": 1.3421231904626738e-05, "loss": 2.9389560222625732e-05, "step": 305140 }, { "epoch": 86.61652001135396, "grad_norm": 0.0008881777757778764, "learning_rate": 1.3418393414703378e-05, "loss": 2.2407807409763337e-05, "step": 305150 }, { "epoch": 86.61935850127732, "grad_norm": 0.0035718115977942944, "learning_rate": 1.341555492478002e-05, "loss": 4.170704632997513e-05, "step": 305160 }, { "epoch": 86.62219699120068, "grad_norm": 0.007432937156409025, "learning_rate": 1.3412716434856656e-05, "loss": 7.66124576330185e-06, "step": 305170 }, { "epoch": 86.62503548112404, "grad_norm": 0.003596727969124913, "learning_rate": 1.3409877944933297e-05, "loss": 1.2145936489105225e-05, "step": 305180 }, { "epoch": 86.6278739710474, "grad_norm": 0.009784066118299961, "learning_rate": 1.3407039455009935e-05, "loss": 1.3949722051620483e-05, "step": 305190 }, { "epoch": 86.63071246097077, "grad_norm": 0.00234496733173728, "learning_rate": 1.3404200965086575e-05, "loss": 2.4667568504810332e-05, "step": 305200 }, { "epoch": 86.63355095089412, "grad_norm": 0.0011028044391423464, "learning_rate": 1.3401362475163214e-05, "loss": 6.038323044776917e-06, "step": 305210 }, { "epoch": 86.63638944081748, "grad_norm": 0.003617051988840103, "learning_rate": 1.3398523985239852e-05, "loss": 1.1154450476169586e-05, "step": 305220 }, { "epoch": 86.63922793074084, "grad_norm": 0.004641680512577295, "learning_rate": 1.3395685495316492e-05, "loss": 9.394623339176178e-06, "step": 305230 }, { "epoch": 86.64206642066421, "grad_norm": 0.002973990747705102, "learning_rate": 1.339284700539313e-05, "loss": 6.6515058279037476e-06, "step": 305240 }, { "epoch": 86.64490491058757, "grad_norm": 0.0012817400274798274, "learning_rate": 1.3390008515469771e-05, "loss": 6.941519677639007e-06, "step": 305250 }, { "epoch": 86.64774340051093, "grad_norm": 0.006946212612092495, "learning_rate": 1.3387170025546411e-05, "loss": 1.2843310832977295e-05, "step": 305260 }, { "epoch": 86.6505818904343, "grad_norm": 0.0013054129667580128, "learning_rate": 1.3384331535623049e-05, "loss": 2.5299936532974242e-05, "step": 305270 }, { "epoch": 86.65342038035764, "grad_norm": 0.0007029494154267013, "learning_rate": 1.3381493045699689e-05, "loss": 9.341351687908173e-06, "step": 305280 }, { "epoch": 86.65625887028101, "grad_norm": 0.0006558809545822442, "learning_rate": 1.3378654555776327e-05, "loss": 6.851926445960998e-06, "step": 305290 }, { "epoch": 86.65909736020437, "grad_norm": 0.0010138851357623935, "learning_rate": 1.3375816065852966e-05, "loss": 1.5140138566493988e-05, "step": 305300 }, { "epoch": 86.66193585012773, "grad_norm": 0.001577709917910397, "learning_rate": 1.3372977575929608e-05, "loss": 2.102535218000412e-05, "step": 305310 }, { "epoch": 86.6647743400511, "grad_norm": 0.0014523558784276247, "learning_rate": 1.3370139086006244e-05, "loss": 1.4794617891311646e-05, "step": 305320 }, { "epoch": 86.66761282997446, "grad_norm": 0.0015226817922666669, "learning_rate": 1.3367300596082886e-05, "loss": 1.2945570051670075e-05, "step": 305330 }, { "epoch": 86.67045131989781, "grad_norm": 0.0034241885878145695, "learning_rate": 1.3364462106159523e-05, "loss": 6.504729390144348e-06, "step": 305340 }, { "epoch": 86.67328980982117, "grad_norm": 0.0022116440813988447, "learning_rate": 1.3361623616236163e-05, "loss": 9.74014401435852e-06, "step": 305350 }, { "epoch": 86.67612829974453, "grad_norm": 0.001449241884984076, "learning_rate": 1.3358785126312803e-05, "loss": 1.0845996439456939e-05, "step": 305360 }, { "epoch": 86.6789667896679, "grad_norm": 0.11806143075227737, "learning_rate": 1.335594663638944e-05, "loss": 2.036299556493759e-05, "step": 305370 }, { "epoch": 86.68180527959126, "grad_norm": 0.0016220408724620938, "learning_rate": 1.335310814646608e-05, "loss": 9.192340075969697e-06, "step": 305380 }, { "epoch": 86.68464376951462, "grad_norm": 0.004272916819900274, "learning_rate": 1.3350269656542719e-05, "loss": 7.357075810432434e-06, "step": 305390 }, { "epoch": 86.68748225943799, "grad_norm": 0.0012079511070623994, "learning_rate": 1.334743116661936e-05, "loss": 1.7128325998783113e-05, "step": 305400 }, { "epoch": 86.69032074936133, "grad_norm": 0.0021804391872137785, "learning_rate": 1.3344592676696e-05, "loss": 1.4450587332248688e-05, "step": 305410 }, { "epoch": 86.6931592392847, "grad_norm": 0.005773486569523811, "learning_rate": 1.3341754186772638e-05, "loss": 8.084625005722045e-06, "step": 305420 }, { "epoch": 86.69599772920806, "grad_norm": 0.001077880384400487, "learning_rate": 1.3338915696849277e-05, "loss": 1.0844506323337556e-05, "step": 305430 }, { "epoch": 86.69883621913142, "grad_norm": 0.011733070015907288, "learning_rate": 1.3336077206925915e-05, "loss": 1.7650611698627473e-05, "step": 305440 }, { "epoch": 86.70167470905479, "grad_norm": 0.0016525037353858352, "learning_rate": 1.3333238717002555e-05, "loss": 7.644854485988617e-06, "step": 305450 }, { "epoch": 86.70451319897815, "grad_norm": 0.0011923275887966156, "learning_rate": 1.3330400227079196e-05, "loss": 1.6993656754493713e-05, "step": 305460 }, { "epoch": 86.70735168890151, "grad_norm": 0.005846546031534672, "learning_rate": 1.3327561737155833e-05, "loss": 1.4570914208889007e-05, "step": 305470 }, { "epoch": 86.71019017882486, "grad_norm": 0.00046936789294704795, "learning_rate": 1.3324723247232474e-05, "loss": 4.595518112182617e-06, "step": 305480 }, { "epoch": 86.71302866874822, "grad_norm": 0.02723039872944355, "learning_rate": 1.332188475730911e-05, "loss": 1.9994564354419708e-05, "step": 305490 }, { "epoch": 86.71586715867159, "grad_norm": 0.01588890142738819, "learning_rate": 1.3319046267385752e-05, "loss": 1.10553577542305e-05, "step": 305500 }, { "epoch": 86.71586715867159, "eval_accuracy": 0.9883639600686717, "eval_loss": 0.04989974573254585, "eval_runtime": 35.8894, "eval_samples_per_second": 438.207, "eval_steps_per_second": 6.854, "step": 305500 }, { "epoch": 86.71870564859495, "grad_norm": 0.0010588986333459616, "learning_rate": 1.3316207777462391e-05, "loss": 7.020998746156692e-05, "step": 305510 }, { "epoch": 86.72154413851831, "grad_norm": 0.04127068445086479, "learning_rate": 1.331336928753903e-05, "loss": 1.294109970331192e-05, "step": 305520 }, { "epoch": 86.72438262844167, "grad_norm": 0.0008842475945129991, "learning_rate": 1.3310530797615669e-05, "loss": 1.0390765964984894e-05, "step": 305530 }, { "epoch": 86.72722111836502, "grad_norm": 0.018388871103525162, "learning_rate": 1.3307692307692307e-05, "loss": 2.363976091146469e-05, "step": 305540 }, { "epoch": 86.73005960828839, "grad_norm": 0.002215189393609762, "learning_rate": 1.3304853817768947e-05, "loss": 5.669146776199341e-06, "step": 305550 }, { "epoch": 86.73289809821175, "grad_norm": 0.022847991436719894, "learning_rate": 1.3302015327845588e-05, "loss": 3.378838300704956e-05, "step": 305560 }, { "epoch": 86.73573658813511, "grad_norm": 0.006152214948087931, "learning_rate": 1.3299176837922226e-05, "loss": 2.2667087614536284e-05, "step": 305570 }, { "epoch": 86.73857507805847, "grad_norm": 0.0025464477948844433, "learning_rate": 1.3296338347998866e-05, "loss": 1.0145828127861023e-05, "step": 305580 }, { "epoch": 86.74141356798184, "grad_norm": 0.0005854056216776371, "learning_rate": 1.3293499858075504e-05, "loss": 1.363363116979599e-05, "step": 305590 }, { "epoch": 86.7442520579052, "grad_norm": 0.01867544837296009, "learning_rate": 1.3290661368152143e-05, "loss": 1.999717205762863e-05, "step": 305600 }, { "epoch": 86.74709054782855, "grad_norm": 0.03672051802277565, "learning_rate": 1.3287822878228785e-05, "loss": 0.0007305109873414039, "step": 305610 }, { "epoch": 86.74992903775191, "grad_norm": 0.00041284519829787314, "learning_rate": 1.3284984388305421e-05, "loss": 0.0008212033659219742, "step": 305620 }, { "epoch": 86.75276752767527, "grad_norm": 0.026351606473326683, "learning_rate": 1.3282145898382062e-05, "loss": 1.527201384305954e-05, "step": 305630 }, { "epoch": 86.75560601759864, "grad_norm": 0.0029317596927285194, "learning_rate": 1.3279307408458699e-05, "loss": 0.00013553816825151443, "step": 305640 }, { "epoch": 86.758444507522, "grad_norm": 0.0014706432120874524, "learning_rate": 1.327646891853534e-05, "loss": 3.3882632851600644e-05, "step": 305650 }, { "epoch": 86.76128299744536, "grad_norm": 0.014056348241865635, "learning_rate": 1.327363042861198e-05, "loss": 0.0004464292898774147, "step": 305660 }, { "epoch": 86.76412148736873, "grad_norm": 0.0010821943869814277, "learning_rate": 1.3270791938688618e-05, "loss": 0.0009673185646533966, "step": 305670 }, { "epoch": 86.76695997729207, "grad_norm": 0.018844492733478546, "learning_rate": 1.3267953448765257e-05, "loss": 0.0006520699709653855, "step": 305680 }, { "epoch": 86.76979846721544, "grad_norm": 0.45111024379730225, "learning_rate": 1.3265114958841895e-05, "loss": 6.99985772371292e-05, "step": 305690 }, { "epoch": 86.7726369571388, "grad_norm": 0.0019105183891952038, "learning_rate": 1.3262276468918535e-05, "loss": 2.616513520479202e-05, "step": 305700 }, { "epoch": 86.77547544706216, "grad_norm": 0.0023651081137359142, "learning_rate": 1.3259437978995176e-05, "loss": 4.389192909002304e-05, "step": 305710 }, { "epoch": 86.77831393698553, "grad_norm": 0.03543654829263687, "learning_rate": 1.3256599489071814e-05, "loss": 1.3248622417449952e-05, "step": 305720 }, { "epoch": 86.78115242690889, "grad_norm": 0.0007586880819872022, "learning_rate": 1.3253760999148454e-05, "loss": 1.6663037240505218e-05, "step": 305730 }, { "epoch": 86.78399091683225, "grad_norm": 0.007275429088622332, "learning_rate": 1.3250922509225092e-05, "loss": 1.6607530415058137e-05, "step": 305740 }, { "epoch": 86.7868294067556, "grad_norm": 0.002407642314210534, "learning_rate": 1.3248084019301732e-05, "loss": 8.449703454971313e-06, "step": 305750 }, { "epoch": 86.78966789667896, "grad_norm": 0.0018363866256549954, "learning_rate": 1.3245245529378371e-05, "loss": 1.205597072839737e-05, "step": 305760 }, { "epoch": 86.79250638660233, "grad_norm": 0.0010697126854211092, "learning_rate": 1.324240703945501e-05, "loss": 2.670474350452423e-05, "step": 305770 }, { "epoch": 86.79534487652569, "grad_norm": 0.0010127588175237179, "learning_rate": 1.323956854953165e-05, "loss": 8.053891360759734e-06, "step": 305780 }, { "epoch": 86.79818336644905, "grad_norm": 0.011553979478776455, "learning_rate": 1.3236730059608287e-05, "loss": 1.3143569231033325e-05, "step": 305790 }, { "epoch": 86.80102185637242, "grad_norm": 0.004219514783471823, "learning_rate": 1.3233891569684928e-05, "loss": 1.1180713772773743e-05, "step": 305800 }, { "epoch": 86.80386034629576, "grad_norm": 0.028357259929180145, "learning_rate": 1.3231053079761568e-05, "loss": 1.1355988681316375e-05, "step": 305810 }, { "epoch": 86.80669883621913, "grad_norm": 0.000567245704587549, "learning_rate": 1.3228214589838206e-05, "loss": 2.4000369012355805e-05, "step": 305820 }, { "epoch": 86.80953732614249, "grad_norm": 0.00047092471504583955, "learning_rate": 1.3225376099914846e-05, "loss": 7.518939673900604e-06, "step": 305830 }, { "epoch": 86.81237581606585, "grad_norm": 0.007732781581580639, "learning_rate": 1.3222537609991484e-05, "loss": 1.472383737564087e-05, "step": 305840 }, { "epoch": 86.81521430598922, "grad_norm": 0.0002710889675654471, "learning_rate": 1.3219699120068123e-05, "loss": 7.896497845649719e-06, "step": 305850 }, { "epoch": 86.81805279591258, "grad_norm": 0.006128802429884672, "learning_rate": 1.3216860630144765e-05, "loss": 1.4327652752399445e-05, "step": 305860 }, { "epoch": 86.82089128583594, "grad_norm": 0.0014316787710413337, "learning_rate": 1.3214022140221403e-05, "loss": 7.973425090312957e-06, "step": 305870 }, { "epoch": 86.82372977575929, "grad_norm": 0.001863359590061009, "learning_rate": 1.3211183650298043e-05, "loss": 8.109211921691895e-06, "step": 305880 }, { "epoch": 86.82656826568265, "grad_norm": 0.004911002703011036, "learning_rate": 1.320834516037468e-05, "loss": 0.00010020602494478225, "step": 305890 }, { "epoch": 86.82940675560602, "grad_norm": 0.002439389703795314, "learning_rate": 1.320550667045132e-05, "loss": 1.4706701040267945e-05, "step": 305900 }, { "epoch": 86.83224524552938, "grad_norm": 0.0013625859282910824, "learning_rate": 1.320266818052796e-05, "loss": 4.184078425168991e-05, "step": 305910 }, { "epoch": 86.83508373545274, "grad_norm": 1.7175134420394897, "learning_rate": 1.3199829690604598e-05, "loss": 0.00019659828394651412, "step": 305920 }, { "epoch": 86.8379222253761, "grad_norm": 0.0019853604026138783, "learning_rate": 1.319699120068124e-05, "loss": 0.00021368041634559632, "step": 305930 }, { "epoch": 86.84076071529947, "grad_norm": 0.0007108348654583097, "learning_rate": 1.3194152710757876e-05, "loss": 1.619011163711548e-05, "step": 305940 }, { "epoch": 86.84359920522282, "grad_norm": 0.002464447170495987, "learning_rate": 1.3191314220834517e-05, "loss": 1.790262758731842e-05, "step": 305950 }, { "epoch": 86.84643769514618, "grad_norm": 0.002661200240254402, "learning_rate": 1.3188475730911157e-05, "loss": 1.763179898262024e-05, "step": 305960 }, { "epoch": 86.84927618506954, "grad_norm": 0.017093989998102188, "learning_rate": 1.3185637240987795e-05, "loss": 4.8749707639217375e-05, "step": 305970 }, { "epoch": 86.8521146749929, "grad_norm": 0.00230050808750093, "learning_rate": 1.3182798751064434e-05, "loss": 7.2266906499862674e-06, "step": 305980 }, { "epoch": 86.85495316491627, "grad_norm": 0.009004838764667511, "learning_rate": 1.3179960261141076e-05, "loss": 6.431527435779572e-06, "step": 305990 }, { "epoch": 86.85779165483963, "grad_norm": 0.00041874192538671196, "learning_rate": 1.3177121771217712e-05, "loss": 1.2203305959701539e-05, "step": 306000 }, { "epoch": 86.85779165483963, "eval_accuracy": 0.9882367902333566, "eval_loss": 0.052585769444704056, "eval_runtime": 35.7879, "eval_samples_per_second": 439.45, "eval_steps_per_second": 6.874, "step": 306000 }, { "epoch": 86.86063014476298, "grad_norm": 0.0013060914352536201, "learning_rate": 1.3174283281294353e-05, "loss": 1.4253333210945129e-05, "step": 306010 }, { "epoch": 86.86346863468634, "grad_norm": 0.0011591509683057666, "learning_rate": 1.317144479137099e-05, "loss": 7.265433669090271e-06, "step": 306020 }, { "epoch": 86.8663071246097, "grad_norm": 0.01562260277569294, "learning_rate": 1.3168606301447631e-05, "loss": 1.897364854812622e-05, "step": 306030 }, { "epoch": 86.86914561453307, "grad_norm": 0.0018710533622652292, "learning_rate": 1.316576781152427e-05, "loss": 2.0803697407245635e-05, "step": 306040 }, { "epoch": 86.87198410445643, "grad_norm": 0.0038719477597624063, "learning_rate": 1.3162929321600909e-05, "loss": 1.821853220462799e-05, "step": 306050 }, { "epoch": 86.8748225943798, "grad_norm": 0.003475028555840254, "learning_rate": 1.3160090831677548e-05, "loss": 6.748177111148834e-06, "step": 306060 }, { "epoch": 86.87766108430316, "grad_norm": 0.0006559739704243839, "learning_rate": 1.3157252341754186e-05, "loss": 1.009088009595871e-05, "step": 306070 }, { "epoch": 86.8804995742265, "grad_norm": 0.0006460619624704123, "learning_rate": 1.3154413851830828e-05, "loss": 6.539560854434967e-06, "step": 306080 }, { "epoch": 86.88333806414987, "grad_norm": 0.002198699861764908, "learning_rate": 1.3151575361907467e-05, "loss": 1.0051578283309936e-05, "step": 306090 }, { "epoch": 86.88617655407323, "grad_norm": 0.0029131737537682056, "learning_rate": 1.3148736871984105e-05, "loss": 1.5421956777572633e-05, "step": 306100 }, { "epoch": 86.8890150439966, "grad_norm": 0.0005392484599724412, "learning_rate": 1.3145898382060745e-05, "loss": 7.653608918190002e-06, "step": 306110 }, { "epoch": 86.89185353391996, "grad_norm": 0.001657374668866396, "learning_rate": 1.3143059892137383e-05, "loss": 6.3052400946617125e-06, "step": 306120 }, { "epoch": 86.89469202384332, "grad_norm": 0.0007118916837498546, "learning_rate": 1.3140221402214023e-05, "loss": 6.9409608840942385e-06, "step": 306130 }, { "epoch": 86.89753051376668, "grad_norm": 0.010781020857393742, "learning_rate": 1.3137382912290664e-05, "loss": 1.7947889864444734e-05, "step": 306140 }, { "epoch": 86.90036900369003, "grad_norm": 0.006080830469727516, "learning_rate": 1.31345444223673e-05, "loss": 1.5100836753845215e-05, "step": 306150 }, { "epoch": 86.9032074936134, "grad_norm": 0.11220073699951172, "learning_rate": 1.3131705932443942e-05, "loss": 1.9955821335315706e-05, "step": 306160 }, { "epoch": 86.90604598353676, "grad_norm": 0.0016679795226082206, "learning_rate": 1.3128867442520578e-05, "loss": 1.313779503107071e-05, "step": 306170 }, { "epoch": 86.90888447346012, "grad_norm": 0.003735774662345648, "learning_rate": 1.312602895259722e-05, "loss": 1.1188909411430359e-05, "step": 306180 }, { "epoch": 86.91172296338348, "grad_norm": 0.001365500851534307, "learning_rate": 1.3123190462673859e-05, "loss": 1.0018609464168548e-05, "step": 306190 }, { "epoch": 86.91456145330685, "grad_norm": 0.0010193204507231712, "learning_rate": 1.3120351972750497e-05, "loss": 1.0795705020427703e-05, "step": 306200 }, { "epoch": 86.91739994323021, "grad_norm": 0.008535962551832199, "learning_rate": 1.3117513482827137e-05, "loss": 1.6584061086177826e-05, "step": 306210 }, { "epoch": 86.92023843315356, "grad_norm": 0.080509252846241, "learning_rate": 1.3114674992903775e-05, "loss": 2.5958940386772156e-05, "step": 306220 }, { "epoch": 86.92307692307692, "grad_norm": 0.004203511867672205, "learning_rate": 1.3111836502980414e-05, "loss": 8.223950862884521e-06, "step": 306230 }, { "epoch": 86.92591541300028, "grad_norm": 0.0015198601176962256, "learning_rate": 1.3108998013057056e-05, "loss": 1.0724179446697235e-05, "step": 306240 }, { "epoch": 86.92875390292365, "grad_norm": 0.00044907411211170256, "learning_rate": 1.3106159523133694e-05, "loss": 1.1851266026496888e-05, "step": 306250 }, { "epoch": 86.93159239284701, "grad_norm": 0.0027421689592301846, "learning_rate": 1.3103321033210333e-05, "loss": 1.8150731921195985e-05, "step": 306260 }, { "epoch": 86.93443088277037, "grad_norm": 0.002071302616968751, "learning_rate": 1.3100482543286971e-05, "loss": 3.394763916730881e-05, "step": 306270 }, { "epoch": 86.93726937269372, "grad_norm": 0.004574410151690245, "learning_rate": 1.3097644053363611e-05, "loss": 1.074187457561493e-05, "step": 306280 }, { "epoch": 86.94010786261708, "grad_norm": 0.0055962130427360535, "learning_rate": 1.3094805563440253e-05, "loss": 7.266737520694733e-06, "step": 306290 }, { "epoch": 86.94294635254045, "grad_norm": 0.004674541763961315, "learning_rate": 1.3091967073516889e-05, "loss": 6.230548024177551e-06, "step": 306300 }, { "epoch": 86.94578484246381, "grad_norm": 0.0007131816819310188, "learning_rate": 1.308912858359353e-05, "loss": 1.0448135435581208e-05, "step": 306310 }, { "epoch": 86.94862333238717, "grad_norm": 0.0007093410240486264, "learning_rate": 1.3086290093670166e-05, "loss": 1.6831792891025543e-05, "step": 306320 }, { "epoch": 86.95146182231053, "grad_norm": 0.002157366368919611, "learning_rate": 1.3083451603746808e-05, "loss": 3.208741545677185e-05, "step": 306330 }, { "epoch": 86.9543003122339, "grad_norm": 0.0016012216219678521, "learning_rate": 1.3080613113823448e-05, "loss": 1.1545792222023011e-05, "step": 306340 }, { "epoch": 86.95713880215725, "grad_norm": 0.003836859716102481, "learning_rate": 1.3077774623900086e-05, "loss": 1.249704509973526e-05, "step": 306350 }, { "epoch": 86.95997729208061, "grad_norm": 0.008291603997349739, "learning_rate": 1.3074936133976725e-05, "loss": 9.874626994132995e-06, "step": 306360 }, { "epoch": 86.96281578200397, "grad_norm": 0.0037106219679117203, "learning_rate": 1.3072097644053363e-05, "loss": 7.19074159860611e-06, "step": 306370 }, { "epoch": 86.96565427192733, "grad_norm": 0.024429980665445328, "learning_rate": 1.3069259154130003e-05, "loss": 1.1828355491161347e-05, "step": 306380 }, { "epoch": 86.9684927618507, "grad_norm": 0.0011495612561702728, "learning_rate": 1.3066420664206644e-05, "loss": 7.657520473003388e-06, "step": 306390 }, { "epoch": 86.97133125177406, "grad_norm": 0.006155162118375301, "learning_rate": 1.3063582174283282e-05, "loss": 7.195211946964264e-06, "step": 306400 }, { "epoch": 86.97416974169742, "grad_norm": 0.001481553423218429, "learning_rate": 1.3060743684359922e-05, "loss": 8.1576406955719e-06, "step": 306410 }, { "epoch": 86.97700823162077, "grad_norm": 0.0029025468975305557, "learning_rate": 1.305790519443656e-05, "loss": 7.698684930801391e-06, "step": 306420 }, { "epoch": 86.97984672154413, "grad_norm": 0.001571036409586668, "learning_rate": 1.30550667045132e-05, "loss": 5.10290265083313e-06, "step": 306430 }, { "epoch": 86.9826852114675, "grad_norm": 0.004433729685842991, "learning_rate": 1.305222821458984e-05, "loss": 9.091570973396301e-06, "step": 306440 }, { "epoch": 86.98552370139086, "grad_norm": 0.0009502629982307553, "learning_rate": 1.3049389724666477e-05, "loss": 8.65347683429718e-06, "step": 306450 }, { "epoch": 86.98836219131422, "grad_norm": 0.0008942997665144503, "learning_rate": 1.3046551234743119e-05, "loss": 7.391907274723053e-06, "step": 306460 }, { "epoch": 86.99120068123759, "grad_norm": 0.0010858230525627732, "learning_rate": 1.3043712744819755e-05, "loss": 8.863583207130433e-06, "step": 306470 }, { "epoch": 86.99403917116095, "grad_norm": 0.00047810160322114825, "learning_rate": 1.3040874254896396e-05, "loss": 7.530860602855682e-06, "step": 306480 }, { "epoch": 86.9968776610843, "grad_norm": 0.0017703144112601876, "learning_rate": 1.3038035764973036e-05, "loss": 9.223073720932008e-06, "step": 306490 }, { "epoch": 86.99971615100766, "grad_norm": 0.0005536337848752737, "learning_rate": 1.3035197275049674e-05, "loss": 7.310137152671814e-06, "step": 306500 }, { "epoch": 86.99971615100766, "eval_accuracy": 0.988999809245247, "eval_loss": 0.049419648945331573, "eval_runtime": 35.6248, "eval_samples_per_second": 441.462, "eval_steps_per_second": 6.905, "step": 306500 }, { "epoch": 87.00255464093102, "grad_norm": 0.0009196125902235508, "learning_rate": 1.3032358785126314e-05, "loss": 7.668053149245679e-06, "step": 306510 }, { "epoch": 87.00539313085439, "grad_norm": 0.0025085555389523506, "learning_rate": 1.3029520295202952e-05, "loss": 1.0378472506999969e-05, "step": 306520 }, { "epoch": 87.00823162077775, "grad_norm": 0.0009508947841823101, "learning_rate": 1.3026681805279591e-05, "loss": 1.0051950812339783e-05, "step": 306530 }, { "epoch": 87.01107011070111, "grad_norm": 0.0074104624800384045, "learning_rate": 1.3023843315356233e-05, "loss": 1.1263415217399597e-05, "step": 306540 }, { "epoch": 87.01390860062446, "grad_norm": 0.0005318058538250625, "learning_rate": 1.302100482543287e-05, "loss": 1.9250065088272093e-05, "step": 306550 }, { "epoch": 87.01674709054782, "grad_norm": 0.0032406162936240435, "learning_rate": 1.301816633550951e-05, "loss": 9.964406490325928e-06, "step": 306560 }, { "epoch": 87.01958558047119, "grad_norm": 0.004970796871930361, "learning_rate": 1.3015327845586148e-05, "loss": 7.244758307933807e-06, "step": 306570 }, { "epoch": 87.02242407039455, "grad_norm": 0.05697399750351906, "learning_rate": 1.3012489355662788e-05, "loss": 1.3655796647071838e-05, "step": 306580 }, { "epoch": 87.02526256031791, "grad_norm": 0.0004667671164497733, "learning_rate": 1.3009650865739428e-05, "loss": 6.10053539276123e-06, "step": 306590 }, { "epoch": 87.02810105024128, "grad_norm": 0.0006784415454603732, "learning_rate": 1.3006812375816066e-05, "loss": 6.39408826828003e-06, "step": 306600 }, { "epoch": 87.03093954016464, "grad_norm": 0.0014491743640974164, "learning_rate": 1.3003973885892707e-05, "loss": 6.274692714214325e-06, "step": 306610 }, { "epoch": 87.03377803008799, "grad_norm": 0.004999324679374695, "learning_rate": 1.3001135395969343e-05, "loss": 9.445101022720337e-06, "step": 306620 }, { "epoch": 87.03661652001135, "grad_norm": 0.006007409654557705, "learning_rate": 1.2998296906045985e-05, "loss": 1.0798126459121704e-05, "step": 306630 }, { "epoch": 87.03945500993471, "grad_norm": 0.001526754000224173, "learning_rate": 1.2995458416122624e-05, "loss": 1.3769418001174926e-05, "step": 306640 }, { "epoch": 87.04229349985808, "grad_norm": 0.0008557632099837065, "learning_rate": 1.2992619926199262e-05, "loss": 6.292387843132019e-06, "step": 306650 }, { "epoch": 87.04513198978144, "grad_norm": 0.008920283988118172, "learning_rate": 1.2989781436275902e-05, "loss": 9.709596633911133e-06, "step": 306660 }, { "epoch": 87.0479704797048, "grad_norm": 0.0011990340426564217, "learning_rate": 1.298694294635254e-05, "loss": 7.768720388412476e-06, "step": 306670 }, { "epoch": 87.05080896962816, "grad_norm": 0.016032174229621887, "learning_rate": 1.298410445642918e-05, "loss": 1.1097639799118042e-05, "step": 306680 }, { "epoch": 87.05364745955151, "grad_norm": 0.0011338423937559128, "learning_rate": 1.2981265966505821e-05, "loss": 8.198060095310212e-06, "step": 306690 }, { "epoch": 87.05648594947488, "grad_norm": 0.0032465471886098385, "learning_rate": 1.2978427476582457e-05, "loss": 8.43629240989685e-06, "step": 306700 }, { "epoch": 87.05932443939824, "grad_norm": 0.001934858737513423, "learning_rate": 1.2975588986659099e-05, "loss": 7.538497447967529e-06, "step": 306710 }, { "epoch": 87.0621629293216, "grad_norm": 0.00019769753271248192, "learning_rate": 1.2972750496735737e-05, "loss": 9.122677147388458e-06, "step": 306720 }, { "epoch": 87.06500141924496, "grad_norm": 0.0010709513444453478, "learning_rate": 1.2969912006812376e-05, "loss": 7.041916251182556e-06, "step": 306730 }, { "epoch": 87.06783990916833, "grad_norm": 0.0016232193447649479, "learning_rate": 1.2967073516889016e-05, "loss": 8.039548993110656e-06, "step": 306740 }, { "epoch": 87.07067839909168, "grad_norm": 0.0009288092260248959, "learning_rate": 1.2964235026965654e-05, "loss": 7.495656609535217e-06, "step": 306750 }, { "epoch": 87.07351688901504, "grad_norm": 0.0015849951887503266, "learning_rate": 1.2961396537042294e-05, "loss": 9.203702211380005e-06, "step": 306760 }, { "epoch": 87.0763553789384, "grad_norm": 0.00044532728497870266, "learning_rate": 1.2958558047118932e-05, "loss": 3.2206997275352476e-06, "step": 306770 }, { "epoch": 87.07919386886176, "grad_norm": 0.0006797729292884469, "learning_rate": 1.2955719557195573e-05, "loss": 8.810870349407196e-06, "step": 306780 }, { "epoch": 87.08203235878513, "grad_norm": 0.0016061966307461262, "learning_rate": 1.2952881067272213e-05, "loss": 4.007667303085327e-06, "step": 306790 }, { "epoch": 87.08487084870849, "grad_norm": 0.0002175823028665036, "learning_rate": 1.295004257734885e-05, "loss": 8.624047040939331e-06, "step": 306800 }, { "epoch": 87.08770933863185, "grad_norm": 0.0012364083668217063, "learning_rate": 1.294720408742549e-05, "loss": 1.1805258691310882e-05, "step": 306810 }, { "epoch": 87.0905478285552, "grad_norm": 0.003514158306643367, "learning_rate": 1.2944365597502128e-05, "loss": 7.299520075321198e-06, "step": 306820 }, { "epoch": 87.09338631847857, "grad_norm": 0.005301823373883963, "learning_rate": 1.2941527107578768e-05, "loss": 5.757063627243042e-06, "step": 306830 }, { "epoch": 87.09622480840193, "grad_norm": 0.0012825748417526484, "learning_rate": 1.293868861765541e-05, "loss": 5.8688223361969e-06, "step": 306840 }, { "epoch": 87.09906329832529, "grad_norm": 0.008988670073449612, "learning_rate": 1.2935850127732046e-05, "loss": 9.088404476642609e-06, "step": 306850 }, { "epoch": 87.10190178824865, "grad_norm": 0.00394676998257637, "learning_rate": 1.2933011637808687e-05, "loss": 6.006099283695221e-06, "step": 306860 }, { "epoch": 87.10474027817202, "grad_norm": 0.005151314195245504, "learning_rate": 1.2930173147885325e-05, "loss": 1.2460723519325257e-05, "step": 306870 }, { "epoch": 87.10757876809538, "grad_norm": 0.007611339446157217, "learning_rate": 1.2927334657961965e-05, "loss": 1.2045353651046753e-05, "step": 306880 }, { "epoch": 87.11041725801873, "grad_norm": 0.0014149834169074893, "learning_rate": 1.2924496168038605e-05, "loss": 4.974324256181717e-05, "step": 306890 }, { "epoch": 87.11325574794209, "grad_norm": 0.002062814310193062, "learning_rate": 1.2921657678115243e-05, "loss": 1.321788877248764e-05, "step": 306900 }, { "epoch": 87.11609423786545, "grad_norm": 0.0014058412052690983, "learning_rate": 1.2918819188191882e-05, "loss": 2.5438703596591948e-05, "step": 306910 }, { "epoch": 87.11893272778882, "grad_norm": 0.0014897778164595366, "learning_rate": 1.291598069826852e-05, "loss": 1.2790225446224212e-05, "step": 306920 }, { "epoch": 87.12177121771218, "grad_norm": 0.0037113602738827467, "learning_rate": 1.2913142208345162e-05, "loss": 1.0908208787441254e-05, "step": 306930 }, { "epoch": 87.12460970763554, "grad_norm": 0.0014185330364853144, "learning_rate": 1.2910303718421801e-05, "loss": 1.2126564979553222e-05, "step": 306940 }, { "epoch": 87.1274481975589, "grad_norm": 0.00416771974414587, "learning_rate": 1.290746522849844e-05, "loss": 9.81111079454422e-06, "step": 306950 }, { "epoch": 87.13028668748225, "grad_norm": 0.0009816632373258471, "learning_rate": 1.2904626738575079e-05, "loss": 1.2697651982307434e-05, "step": 306960 }, { "epoch": 87.13312517740562, "grad_norm": 0.00022815527336206287, "learning_rate": 1.2901788248651717e-05, "loss": 1.1903233826160431e-05, "step": 306970 }, { "epoch": 87.13596366732898, "grad_norm": 0.0008538194233551621, "learning_rate": 1.2898949758728357e-05, "loss": 5.304627120494843e-06, "step": 306980 }, { "epoch": 87.13880215725234, "grad_norm": 0.0017230988014489412, "learning_rate": 1.2896111268804998e-05, "loss": 1.1052004992961884e-05, "step": 306990 }, { "epoch": 87.1416406471757, "grad_norm": 0.03773070499300957, "learning_rate": 1.2893272778881634e-05, "loss": 2.8679706156253813e-05, "step": 307000 }, { "epoch": 87.1416406471757, "eval_accuracy": 0.9885547148216443, "eval_loss": 0.0514603890478611, "eval_runtime": 35.6844, "eval_samples_per_second": 440.724, "eval_steps_per_second": 6.894, "step": 307000 }, { "epoch": 87.14447913709907, "grad_norm": 0.0012289630249142647, "learning_rate": 1.2890434288958276e-05, "loss": 1.0927766561508179e-05, "step": 307010 }, { "epoch": 87.14731762702242, "grad_norm": 0.015784716233611107, "learning_rate": 1.2887595799034912e-05, "loss": 9.9843367934227e-06, "step": 307020 }, { "epoch": 87.15015611694578, "grad_norm": 0.0024059806019067764, "learning_rate": 1.2884757309111553e-05, "loss": 6.8319961428642275e-06, "step": 307030 }, { "epoch": 87.15299460686914, "grad_norm": 0.0013013383140787482, "learning_rate": 1.2881918819188193e-05, "loss": 7.425062358379364e-06, "step": 307040 }, { "epoch": 87.1558330967925, "grad_norm": 0.0011415977496653795, "learning_rate": 1.2879080329264831e-05, "loss": 7.336027920246124e-06, "step": 307050 }, { "epoch": 87.15867158671587, "grad_norm": 0.004904731176793575, "learning_rate": 1.287624183934147e-05, "loss": 8.517690002918243e-06, "step": 307060 }, { "epoch": 87.16151007663923, "grad_norm": 0.005543828010559082, "learning_rate": 1.2873403349418109e-05, "loss": 5.309842526912689e-06, "step": 307070 }, { "epoch": 87.1643485665626, "grad_norm": 0.00630249734967947, "learning_rate": 1.287056485949475e-05, "loss": 1.3179890811443328e-05, "step": 307080 }, { "epoch": 87.16718705648594, "grad_norm": 0.0013778624124825, "learning_rate": 1.286772636957139e-05, "loss": 6.625615060329437e-06, "step": 307090 }, { "epoch": 87.1700255464093, "grad_norm": 0.002910919487476349, "learning_rate": 1.2864887879648028e-05, "loss": 1.2947805225849152e-05, "step": 307100 }, { "epoch": 87.17286403633267, "grad_norm": 0.0029861328657716513, "learning_rate": 1.2862049389724667e-05, "loss": 6.979517638683319e-06, "step": 307110 }, { "epoch": 87.17570252625603, "grad_norm": 0.0031452816911041737, "learning_rate": 1.2859210899801305e-05, "loss": 7.18291848897934e-06, "step": 307120 }, { "epoch": 87.1785410161794, "grad_norm": 0.010934264399111271, "learning_rate": 1.2856372409877945e-05, "loss": 7.5694173574447635e-06, "step": 307130 }, { "epoch": 87.18137950610276, "grad_norm": 0.0012335111387073994, "learning_rate": 1.2853533919954586e-05, "loss": 8.321553468704224e-06, "step": 307140 }, { "epoch": 87.18421799602612, "grad_norm": 0.0025168738793581724, "learning_rate": 1.2850695430031223e-05, "loss": 7.542222738265991e-06, "step": 307150 }, { "epoch": 87.18705648594947, "grad_norm": 0.0008019803208298981, "learning_rate": 1.2847856940107864e-05, "loss": 8.978322148323059e-06, "step": 307160 }, { "epoch": 87.18989497587283, "grad_norm": 0.0007002244237810373, "learning_rate": 1.28450184501845e-05, "loss": 5.1390379667282104e-06, "step": 307170 }, { "epoch": 87.1927334657962, "grad_norm": 0.0014019471127539873, "learning_rate": 1.2842179960261142e-05, "loss": 6.5710395574569706e-06, "step": 307180 }, { "epoch": 87.19557195571956, "grad_norm": 0.0020042099058628082, "learning_rate": 1.2839341470337781e-05, "loss": 7.053092122077942e-06, "step": 307190 }, { "epoch": 87.19841044564292, "grad_norm": 0.004616095218807459, "learning_rate": 1.283650298041442e-05, "loss": 7.1106478571891785e-06, "step": 307200 }, { "epoch": 87.20124893556628, "grad_norm": 0.0006548544624820352, "learning_rate": 1.2833664490491059e-05, "loss": 1.1163577437400817e-05, "step": 307210 }, { "epoch": 87.20408742548965, "grad_norm": 0.0047865137457847595, "learning_rate": 1.2830826000567697e-05, "loss": 5.893968045711518e-06, "step": 307220 }, { "epoch": 87.206925915413, "grad_norm": 0.0007420594920404255, "learning_rate": 1.2827987510644337e-05, "loss": 2.1595507860183716e-05, "step": 307230 }, { "epoch": 87.20976440533636, "grad_norm": 0.0025344854220747948, "learning_rate": 1.2825149020720978e-05, "loss": 9.543076157569885e-06, "step": 307240 }, { "epoch": 87.21260289525972, "grad_norm": 0.0028900490142405033, "learning_rate": 1.2822310530797616e-05, "loss": 4.936940968036652e-06, "step": 307250 }, { "epoch": 87.21544138518308, "grad_norm": 0.0038818358443677425, "learning_rate": 1.2819472040874256e-05, "loss": 1.0590068995952606e-05, "step": 307260 }, { "epoch": 87.21827987510645, "grad_norm": 0.008927828632295132, "learning_rate": 1.2816633550950894e-05, "loss": 9.073875844478607e-06, "step": 307270 }, { "epoch": 87.22111836502981, "grad_norm": 0.009587710723280907, "learning_rate": 1.2813795061027533e-05, "loss": 6.51385635137558e-06, "step": 307280 }, { "epoch": 87.22395685495316, "grad_norm": 0.0010025392984971404, "learning_rate": 1.2810956571104175e-05, "loss": 9.678676724433899e-06, "step": 307290 }, { "epoch": 87.22679534487652, "grad_norm": 0.0008985128370113671, "learning_rate": 1.2808118081180811e-05, "loss": 5.223415791988373e-06, "step": 307300 }, { "epoch": 87.22963383479988, "grad_norm": 0.0029897592030465603, "learning_rate": 1.2805279591257452e-05, "loss": 8.664466440677642e-06, "step": 307310 }, { "epoch": 87.23247232472325, "grad_norm": 0.003650480881333351, "learning_rate": 1.2802441101334092e-05, "loss": 8.310936391353607e-06, "step": 307320 }, { "epoch": 87.23531081464661, "grad_norm": 0.0002698994940146804, "learning_rate": 1.279960261141073e-05, "loss": 5.5033713579177855e-06, "step": 307330 }, { "epoch": 87.23814930456997, "grad_norm": 0.0008659060695208609, "learning_rate": 1.279676412148737e-05, "loss": 1.2255460023880006e-05, "step": 307340 }, { "epoch": 87.24098779449334, "grad_norm": 0.0013182451948523521, "learning_rate": 1.2793925631564008e-05, "loss": 8.655898272991181e-06, "step": 307350 }, { "epoch": 87.24382628441668, "grad_norm": 0.006714821793138981, "learning_rate": 1.2791087141640648e-05, "loss": 2.5232136249542236e-05, "step": 307360 }, { "epoch": 87.24666477434005, "grad_norm": 0.0019981644582003355, "learning_rate": 1.2788248651717289e-05, "loss": 1.0422058403491974e-05, "step": 307370 }, { "epoch": 87.24950326426341, "grad_norm": 0.00190918508451432, "learning_rate": 1.2785410161793925e-05, "loss": 8.92486423254013e-06, "step": 307380 }, { "epoch": 87.25234175418677, "grad_norm": 0.0016018440946936607, "learning_rate": 1.2782571671870567e-05, "loss": 1.2616813182830811e-05, "step": 307390 }, { "epoch": 87.25518024411014, "grad_norm": 0.004981392994523048, "learning_rate": 1.2779733181947205e-05, "loss": 1.0337121784687042e-05, "step": 307400 }, { "epoch": 87.2580187340335, "grad_norm": 0.0010649129981175065, "learning_rate": 1.2776894692023844e-05, "loss": 5.903653800487518e-06, "step": 307410 }, { "epoch": 87.26085722395686, "grad_norm": 0.0004988880245946348, "learning_rate": 1.2774056202100484e-05, "loss": 1.1185184121131897e-05, "step": 307420 }, { "epoch": 87.26369571388021, "grad_norm": 0.0008468502201139927, "learning_rate": 1.2771217712177122e-05, "loss": 7.309019565582275e-06, "step": 307430 }, { "epoch": 87.26653420380357, "grad_norm": 0.003893290413543582, "learning_rate": 1.2768379222253762e-05, "loss": 1.1483579874038696e-05, "step": 307440 }, { "epoch": 87.26937269372694, "grad_norm": 0.000353113078745082, "learning_rate": 1.27655407323304e-05, "loss": 4.39714640378952e-06, "step": 307450 }, { "epoch": 87.2722111836503, "grad_norm": 0.0013758959248661995, "learning_rate": 1.2762702242407041e-05, "loss": 8.006580173969269e-06, "step": 307460 }, { "epoch": 87.27504967357366, "grad_norm": 0.0007470412529073656, "learning_rate": 1.275986375248368e-05, "loss": 1.29014253616333e-05, "step": 307470 }, { "epoch": 87.27788816349702, "grad_norm": 0.0029297948349267244, "learning_rate": 1.2757025262560319e-05, "loss": 5.384534597396851e-06, "step": 307480 }, { "epoch": 87.28072665342037, "grad_norm": 0.0015622578794136643, "learning_rate": 1.2754186772636958e-05, "loss": 6.87427818775177e-06, "step": 307490 }, { "epoch": 87.28356514334374, "grad_norm": 0.0004661151033360511, "learning_rate": 1.2751348282713596e-05, "loss": 1.2121535837650299e-05, "step": 307500 }, { "epoch": 87.28356514334374, "eval_accuracy": 0.988872639409932, "eval_loss": 0.050165627151727676, "eval_runtime": 37.3682, "eval_samples_per_second": 420.866, "eval_steps_per_second": 6.583, "step": 307500 }, { "epoch": 87.2864036332671, "grad_norm": 0.0026312554255127907, "learning_rate": 1.2748509792790236e-05, "loss": 8.440949022769928e-06, "step": 307510 }, { "epoch": 87.28924212319046, "grad_norm": 0.0003257891221437603, "learning_rate": 1.2745671302866877e-05, "loss": 5.71180135011673e-06, "step": 307520 }, { "epoch": 87.29208061311382, "grad_norm": 0.0010593030601739883, "learning_rate": 1.2742832812943514e-05, "loss": 4.568509757518768e-06, "step": 307530 }, { "epoch": 87.29491910303719, "grad_norm": 0.005275383125990629, "learning_rate": 1.2739994323020155e-05, "loss": 5.204416811466217e-06, "step": 307540 }, { "epoch": 87.29775759296055, "grad_norm": 0.0027415184304118156, "learning_rate": 1.2737155833096793e-05, "loss": 4.1801482439041134e-06, "step": 307550 }, { "epoch": 87.3005960828839, "grad_norm": 0.007197884377092123, "learning_rate": 1.2734317343173433e-05, "loss": 1.2906454503536225e-05, "step": 307560 }, { "epoch": 87.30343457280726, "grad_norm": 0.001663312199525535, "learning_rate": 1.2731478853250072e-05, "loss": 1.3487786054611206e-05, "step": 307570 }, { "epoch": 87.30627306273063, "grad_norm": 0.00035957968793809414, "learning_rate": 1.272864036332671e-05, "loss": 1.025814563035965e-05, "step": 307580 }, { "epoch": 87.30911155265399, "grad_norm": 0.0026099153328686953, "learning_rate": 1.272580187340335e-05, "loss": 8.646398782730103e-06, "step": 307590 }, { "epoch": 87.31195004257735, "grad_norm": 0.004818896763026714, "learning_rate": 1.2722963383479988e-05, "loss": 5.4532662034034726e-06, "step": 307600 }, { "epoch": 87.31478853250071, "grad_norm": 0.0013679825933650136, "learning_rate": 1.272012489355663e-05, "loss": 6.926245987415314e-06, "step": 307610 }, { "epoch": 87.31762702242408, "grad_norm": 0.00030757905915379524, "learning_rate": 1.2717286403633269e-05, "loss": 7.719919085502625e-06, "step": 307620 }, { "epoch": 87.32046551234743, "grad_norm": 0.0018988515948876739, "learning_rate": 1.2714447913709907e-05, "loss": 1.0288134217262267e-05, "step": 307630 }, { "epoch": 87.32330400227079, "grad_norm": 0.0007082419469952583, "learning_rate": 1.2711609423786547e-05, "loss": 5.9174373745918276e-06, "step": 307640 }, { "epoch": 87.32614249219415, "grad_norm": 0.002966284519061446, "learning_rate": 1.2708770933863185e-05, "loss": 1.040492206811905e-05, "step": 307650 }, { "epoch": 87.32898098211751, "grad_norm": 0.0009125744691118598, "learning_rate": 1.2705932443939824e-05, "loss": 5.677714943885803e-06, "step": 307660 }, { "epoch": 87.33181947204088, "grad_norm": 0.00269458070397377, "learning_rate": 1.2703093954016466e-05, "loss": 8.293427526950836e-06, "step": 307670 }, { "epoch": 87.33465796196424, "grad_norm": 0.0019071419956162572, "learning_rate": 1.2700255464093102e-05, "loss": 7.335469126701355e-06, "step": 307680 }, { "epoch": 87.3374964518876, "grad_norm": 0.0010439010802656412, "learning_rate": 1.2697416974169743e-05, "loss": 1.8920190632343294e-05, "step": 307690 }, { "epoch": 87.34033494181095, "grad_norm": 0.0002450175234116614, "learning_rate": 1.269457848424638e-05, "loss": 5.415081977844238e-06, "step": 307700 }, { "epoch": 87.34317343173431, "grad_norm": 0.003297514049336314, "learning_rate": 1.2691739994323021e-05, "loss": 7.730349898338318e-06, "step": 307710 }, { "epoch": 87.34601192165768, "grad_norm": 0.0018544535851106048, "learning_rate": 1.268890150439966e-05, "loss": 6.611645221710205e-06, "step": 307720 }, { "epoch": 87.34885041158104, "grad_norm": 0.0025312488432973623, "learning_rate": 1.2686063014476299e-05, "loss": 7.629208266735077e-06, "step": 307730 }, { "epoch": 87.3516889015044, "grad_norm": 0.0006187902763485909, "learning_rate": 1.2683224524552938e-05, "loss": 6.656348705291748e-06, "step": 307740 }, { "epoch": 87.35452739142777, "grad_norm": 0.001149380230344832, "learning_rate": 1.2680386034629576e-05, "loss": 8.83638858795166e-06, "step": 307750 }, { "epoch": 87.35736588135111, "grad_norm": 0.0003355195512995124, "learning_rate": 1.2677547544706218e-05, "loss": 8.013658225536347e-06, "step": 307760 }, { "epoch": 87.36020437127448, "grad_norm": 0.0028787648770958185, "learning_rate": 1.2674709054782857e-05, "loss": 6.993301212787628e-06, "step": 307770 }, { "epoch": 87.36304286119784, "grad_norm": 0.0012249082792550325, "learning_rate": 1.2671870564859495e-05, "loss": 1.010168343782425e-05, "step": 307780 }, { "epoch": 87.3658813511212, "grad_norm": 0.0017458023503422737, "learning_rate": 1.2669032074936135e-05, "loss": 1.3623572885990143e-05, "step": 307790 }, { "epoch": 87.36871984104457, "grad_norm": 0.0018479401478543878, "learning_rate": 1.2666193585012773e-05, "loss": 9.464845061302184e-06, "step": 307800 }, { "epoch": 87.37155833096793, "grad_norm": 0.0008647791109979153, "learning_rate": 1.2663355095089413e-05, "loss": 6.271712481975555e-06, "step": 307810 }, { "epoch": 87.37439682089129, "grad_norm": 0.00064101442694664, "learning_rate": 1.2660516605166054e-05, "loss": 7.0840120315551754e-06, "step": 307820 }, { "epoch": 87.37723531081464, "grad_norm": 0.031661342829465866, "learning_rate": 1.265767811524269e-05, "loss": 1.189839094877243e-05, "step": 307830 }, { "epoch": 87.380073800738, "grad_norm": 0.0003190955030731857, "learning_rate": 1.2654839625319332e-05, "loss": 7.190369069576264e-06, "step": 307840 }, { "epoch": 87.38291229066137, "grad_norm": 0.0006691345479339361, "learning_rate": 1.2652001135395968e-05, "loss": 3.7390738725662233e-06, "step": 307850 }, { "epoch": 87.38575078058473, "grad_norm": 0.0005259140161797404, "learning_rate": 1.264916264547261e-05, "loss": 7.227808237075806e-06, "step": 307860 }, { "epoch": 87.38858927050809, "grad_norm": 0.0003702679241541773, "learning_rate": 1.264632415554925e-05, "loss": 4.909932613372803e-06, "step": 307870 }, { "epoch": 87.39142776043145, "grad_norm": 0.0034449175000190735, "learning_rate": 1.2643485665625887e-05, "loss": 6.552040576934814e-06, "step": 307880 }, { "epoch": 87.39426625035482, "grad_norm": 0.00024770948220975697, "learning_rate": 1.2640647175702527e-05, "loss": 5.177035927772522e-06, "step": 307890 }, { "epoch": 87.39710474027817, "grad_norm": 0.006667891517281532, "learning_rate": 1.2637808685779165e-05, "loss": 9.373761713504791e-06, "step": 307900 }, { "epoch": 87.39994323020153, "grad_norm": 0.0017142622964456677, "learning_rate": 1.2634970195855805e-05, "loss": 1.1482462286949158e-05, "step": 307910 }, { "epoch": 87.40278172012489, "grad_norm": 0.0002637745928950608, "learning_rate": 1.2632131705932446e-05, "loss": 4.4425949454307554e-06, "step": 307920 }, { "epoch": 87.40562021004826, "grad_norm": 0.0007941452204249799, "learning_rate": 1.2629293216009084e-05, "loss": 4.705972969532013e-06, "step": 307930 }, { "epoch": 87.40845869997162, "grad_norm": 0.002776963170617819, "learning_rate": 1.2626454726085724e-05, "loss": 5.193054676055908e-06, "step": 307940 }, { "epoch": 87.41129718989498, "grad_norm": 0.0029327180236577988, "learning_rate": 1.2623616236162362e-05, "loss": 7.0409849286079405e-06, "step": 307950 }, { "epoch": 87.41413567981834, "grad_norm": 0.0007532874005846679, "learning_rate": 1.2620777746239001e-05, "loss": 1.3813935220241547e-05, "step": 307960 }, { "epoch": 87.41697416974169, "grad_norm": 0.0010355106787756085, "learning_rate": 1.2617939256315643e-05, "loss": 4.1119754314422606e-06, "step": 307970 }, { "epoch": 87.41981265966506, "grad_norm": 0.0023411684669554234, "learning_rate": 1.2615100766392279e-05, "loss": 7.52285122871399e-06, "step": 307980 }, { "epoch": 87.42265114958842, "grad_norm": 0.0035396909806877375, "learning_rate": 1.261226227646892e-05, "loss": 1.2208707630634308e-05, "step": 307990 }, { "epoch": 87.42548963951178, "grad_norm": 0.004459182266145945, "learning_rate": 1.2609423786545557e-05, "loss": 9.803846478462219e-06, "step": 308000 }, { "epoch": 87.42548963951178, "eval_accuracy": 0.988745469574617, "eval_loss": 0.050355203449726105, "eval_runtime": 35.9688, "eval_samples_per_second": 437.24, "eval_steps_per_second": 6.839, "step": 308000 }, { "epoch": 87.42832812943514, "grad_norm": 0.001715399557724595, "learning_rate": 1.2606585296622198e-05, "loss": 9.969063103199006e-06, "step": 308010 }, { "epoch": 87.4311666193585, "grad_norm": 0.0072158342227339745, "learning_rate": 1.2603746806698838e-05, "loss": 1.3406015932559967e-05, "step": 308020 }, { "epoch": 87.43400510928186, "grad_norm": 0.006455295253545046, "learning_rate": 1.2600908316775476e-05, "loss": 6.409548223018646e-06, "step": 308030 }, { "epoch": 87.43684359920522, "grad_norm": 0.0007798505248501897, "learning_rate": 1.2598069826852115e-05, "loss": 6.723403930664062e-06, "step": 308040 }, { "epoch": 87.43968208912858, "grad_norm": 0.0034959798213094473, "learning_rate": 1.2595231336928753e-05, "loss": 7.509440183639526e-06, "step": 308050 }, { "epoch": 87.44252057905194, "grad_norm": 0.019235840067267418, "learning_rate": 1.2592392847005393e-05, "loss": 7.966905832290649e-06, "step": 308060 }, { "epoch": 87.4453590689753, "grad_norm": 0.0004145768180023879, "learning_rate": 1.2589554357082034e-05, "loss": 4.937686026096344e-06, "step": 308070 }, { "epoch": 87.44819755889867, "grad_norm": 0.0027008922770619392, "learning_rate": 1.2586715867158672e-05, "loss": 1.2675300240516663e-05, "step": 308080 }, { "epoch": 87.45103604882203, "grad_norm": 0.003005674807354808, "learning_rate": 1.2583877377235312e-05, "loss": 6.961822509765625e-06, "step": 308090 }, { "epoch": 87.45387453874538, "grad_norm": 0.0011317277094349265, "learning_rate": 1.258103888731195e-05, "loss": 4.9263238906860355e-06, "step": 308100 }, { "epoch": 87.45671302866874, "grad_norm": 0.0006022783927619457, "learning_rate": 1.257820039738859e-05, "loss": 5.975551903247833e-06, "step": 308110 }, { "epoch": 87.45955151859211, "grad_norm": 0.004480360541492701, "learning_rate": 1.257536190746523e-05, "loss": 6.6414475440979e-06, "step": 308120 }, { "epoch": 87.46239000851547, "grad_norm": 0.0010591885074973106, "learning_rate": 1.2572523417541867e-05, "loss": 2.078711986541748e-05, "step": 308130 }, { "epoch": 87.46522849843883, "grad_norm": 0.0010841629700735211, "learning_rate": 1.2569684927618509e-05, "loss": 7.491372525691986e-06, "step": 308140 }, { "epoch": 87.4680669883622, "grad_norm": 0.0022275312803685665, "learning_rate": 1.2566846437695145e-05, "loss": 5.240365862846375e-06, "step": 308150 }, { "epoch": 87.47090547828556, "grad_norm": 0.0014177096309140325, "learning_rate": 1.2564007947771786e-05, "loss": 7.865019142627716e-06, "step": 308160 }, { "epoch": 87.47374396820891, "grad_norm": 0.0026826842222362757, "learning_rate": 1.2561169457848426e-05, "loss": 7.984042167663574e-06, "step": 308170 }, { "epoch": 87.47658245813227, "grad_norm": 0.00021399540128186345, "learning_rate": 1.2558330967925064e-05, "loss": 7.327832281589508e-06, "step": 308180 }, { "epoch": 87.47942094805563, "grad_norm": 0.0029312956612557173, "learning_rate": 1.2555492478001704e-05, "loss": 1.205597072839737e-05, "step": 308190 }, { "epoch": 87.482259437979, "grad_norm": 0.0009160908521153033, "learning_rate": 1.2552653988078342e-05, "loss": 7.128715515136718e-06, "step": 308200 }, { "epoch": 87.48509792790236, "grad_norm": 0.0019283178262412548, "learning_rate": 1.2549815498154981e-05, "loss": 6.663426756858826e-06, "step": 308210 }, { "epoch": 87.48793641782572, "grad_norm": 0.0008118539117276669, "learning_rate": 1.2546977008231623e-05, "loss": 6.989762187004089e-06, "step": 308220 }, { "epoch": 87.49077490774907, "grad_norm": 0.004027572460472584, "learning_rate": 1.254413851830826e-05, "loss": 5.12339174747467e-06, "step": 308230 }, { "epoch": 87.49361339767243, "grad_norm": 0.0017185634933412075, "learning_rate": 1.25413000283849e-05, "loss": 4.51449304819107e-06, "step": 308240 }, { "epoch": 87.4964518875958, "grad_norm": 0.0012706636916846037, "learning_rate": 1.2538461538461538e-05, "loss": 3.938004374504089e-06, "step": 308250 }, { "epoch": 87.49929037751916, "grad_norm": 0.0009633236331865191, "learning_rate": 1.2535623048538178e-05, "loss": 5.802884697914124e-06, "step": 308260 }, { "epoch": 87.50212886744252, "grad_norm": 0.0015169010730460286, "learning_rate": 1.2532784558614818e-05, "loss": 9.63546335697174e-06, "step": 308270 }, { "epoch": 87.50496735736589, "grad_norm": 0.0027315730694681406, "learning_rate": 1.2529946068691456e-05, "loss": 1.5411712229251862e-05, "step": 308280 }, { "epoch": 87.50780584728925, "grad_norm": 0.005946922581642866, "learning_rate": 1.2527107578768097e-05, "loss": 1.1609308421611786e-05, "step": 308290 }, { "epoch": 87.5106443372126, "grad_norm": 0.0006460627191700041, "learning_rate": 1.2524269088844733e-05, "loss": 4.817917943000793e-06, "step": 308300 }, { "epoch": 87.51348282713596, "grad_norm": 0.003761362051591277, "learning_rate": 1.2521430598921375e-05, "loss": 6.309710443019867e-06, "step": 308310 }, { "epoch": 87.51632131705932, "grad_norm": 0.0002774643071461469, "learning_rate": 1.2518592108998015e-05, "loss": 3.952719271183014e-06, "step": 308320 }, { "epoch": 87.51915980698269, "grad_norm": 0.002194817177951336, "learning_rate": 1.2515753619074652e-05, "loss": 7.868185639381408e-06, "step": 308330 }, { "epoch": 87.52199829690605, "grad_norm": 0.026670614257454872, "learning_rate": 1.2512915129151292e-05, "loss": 1.887138932943344e-05, "step": 308340 }, { "epoch": 87.52483678682941, "grad_norm": 0.007027135696262121, "learning_rate": 1.251007663922793e-05, "loss": 6.946176290512085e-06, "step": 308350 }, { "epoch": 87.52767527675277, "grad_norm": 0.001569048035889864, "learning_rate": 1.250723814930457e-05, "loss": 5.183182656764984e-06, "step": 308360 }, { "epoch": 87.53051376667612, "grad_norm": 0.00028771290089935064, "learning_rate": 1.2504399659381211e-05, "loss": 6.261840462684632e-06, "step": 308370 }, { "epoch": 87.53335225659949, "grad_norm": 0.000965654500760138, "learning_rate": 1.2501561169457848e-05, "loss": 5.1228329539299015e-06, "step": 308380 }, { "epoch": 87.53619074652285, "grad_norm": 0.0011930488981306553, "learning_rate": 1.2498722679534489e-05, "loss": 3.244113177061081e-05, "step": 308390 }, { "epoch": 87.53902923644621, "grad_norm": 0.001671508071012795, "learning_rate": 1.2495884189611127e-05, "loss": 5.587935447692871e-06, "step": 308400 }, { "epoch": 87.54186772636957, "grad_norm": 0.0013567340793088078, "learning_rate": 1.2493045699687767e-05, "loss": 9.83644276857376e-06, "step": 308410 }, { "epoch": 87.54470621629294, "grad_norm": 0.001873056753538549, "learning_rate": 1.2490207209764406e-05, "loss": 7.265433669090271e-06, "step": 308420 }, { "epoch": 87.5475447062163, "grad_norm": 0.00021847130847163498, "learning_rate": 1.2487368719841046e-05, "loss": 8.769705891609192e-06, "step": 308430 }, { "epoch": 87.55038319613965, "grad_norm": 0.0038908792193979025, "learning_rate": 1.2484530229917684e-05, "loss": 4.8119574785232546e-06, "step": 308440 }, { "epoch": 87.55322168606301, "grad_norm": 0.05990573391318321, "learning_rate": 1.2481691739994324e-05, "loss": 1.8950924277305604e-05, "step": 308450 }, { "epoch": 87.55606017598637, "grad_norm": 0.002150116953998804, "learning_rate": 1.2478853250070963e-05, "loss": 9.627267718315125e-06, "step": 308460 }, { "epoch": 87.55889866590974, "grad_norm": 0.0011850415030494332, "learning_rate": 1.2476014760147601e-05, "loss": 1.019015908241272e-05, "step": 308470 }, { "epoch": 87.5617371558331, "grad_norm": 0.0007835754076950252, "learning_rate": 1.2473176270224243e-05, "loss": 6.622262299060822e-06, "step": 308480 }, { "epoch": 87.56457564575646, "grad_norm": 0.0006887389463372529, "learning_rate": 1.247033778030088e-05, "loss": 8.348003029823304e-06, "step": 308490 }, { "epoch": 87.56741413567981, "grad_norm": 0.0011173546081408858, "learning_rate": 1.246749929037752e-05, "loss": 4.892796277999878e-06, "step": 308500 }, { "epoch": 87.56741413567981, "eval_accuracy": 0.9893177338335347, "eval_loss": 0.04880739375948906, "eval_runtime": 36.1815, "eval_samples_per_second": 434.67, "eval_steps_per_second": 6.799, "step": 308500 }, { "epoch": 87.57025262560317, "grad_norm": 0.0008909863536246121, "learning_rate": 1.2464660800454158e-05, "loss": 4.558637738227844e-06, "step": 308510 }, { "epoch": 87.57309111552654, "grad_norm": 0.002114024944603443, "learning_rate": 1.2461822310530798e-05, "loss": 3.779120743274689e-06, "step": 308520 }, { "epoch": 87.5759296054499, "grad_norm": 0.002715144772082567, "learning_rate": 1.2458983820607438e-05, "loss": 5.1192939281463625e-06, "step": 308530 }, { "epoch": 87.57876809537326, "grad_norm": 0.0006784844445064664, "learning_rate": 1.2456145330684077e-05, "loss": 8.87848436832428e-06, "step": 308540 }, { "epoch": 87.58160658529663, "grad_norm": 0.005133163649588823, "learning_rate": 1.2453306840760715e-05, "loss": 8.247233927249908e-06, "step": 308550 }, { "epoch": 87.58444507521999, "grad_norm": 0.003193119540810585, "learning_rate": 1.2450468350837355e-05, "loss": 1.2586824595928192e-05, "step": 308560 }, { "epoch": 87.58728356514334, "grad_norm": 0.008368228562176228, "learning_rate": 1.2447629860913993e-05, "loss": 1.007765531539917e-05, "step": 308570 }, { "epoch": 87.5901220550667, "grad_norm": 0.00302872690372169, "learning_rate": 1.2444791370990634e-05, "loss": 7.616728544235229e-06, "step": 308580 }, { "epoch": 87.59296054499006, "grad_norm": 0.00900217704474926, "learning_rate": 1.2441952881067272e-05, "loss": 6.8301334977149965e-06, "step": 308590 }, { "epoch": 87.59579903491343, "grad_norm": 0.0033342824317514896, "learning_rate": 1.2439114391143912e-05, "loss": 1.9049085676670074e-05, "step": 308600 }, { "epoch": 87.59863752483679, "grad_norm": 0.004109544213861227, "learning_rate": 1.2436275901220552e-05, "loss": 1.4237500727176666e-05, "step": 308610 }, { "epoch": 87.60147601476015, "grad_norm": 0.0003967868979088962, "learning_rate": 1.243343741129719e-05, "loss": 5.4012984037399295e-06, "step": 308620 }, { "epoch": 87.60431450468351, "grad_norm": 0.0014102284330874681, "learning_rate": 1.2430598921373831e-05, "loss": 5.383230745792389e-06, "step": 308630 }, { "epoch": 87.60715299460686, "grad_norm": 0.004479388240724802, "learning_rate": 1.2427760431450469e-05, "loss": 9.462051093578338e-06, "step": 308640 }, { "epoch": 87.60999148453023, "grad_norm": 0.004378515295684338, "learning_rate": 1.2424921941527109e-05, "loss": 6.186030805110931e-06, "step": 308650 }, { "epoch": 87.61282997445359, "grad_norm": 0.0006605949602089822, "learning_rate": 1.2422083451603747e-05, "loss": 1.0414980351924896e-05, "step": 308660 }, { "epoch": 87.61566846437695, "grad_norm": 0.004984881263226271, "learning_rate": 1.2419244961680386e-05, "loss": 1.0150112211704254e-05, "step": 308670 }, { "epoch": 87.61850695430032, "grad_norm": 0.00021570405806414783, "learning_rate": 1.2416406471757026e-05, "loss": 1.1606141924858093e-05, "step": 308680 }, { "epoch": 87.62134544422368, "grad_norm": 0.0004682500148192048, "learning_rate": 1.2413567981833666e-05, "loss": 7.510744035243988e-06, "step": 308690 }, { "epoch": 87.62418393414703, "grad_norm": 0.0005424072151072323, "learning_rate": 1.2410729491910304e-05, "loss": 1.0332092642784119e-05, "step": 308700 }, { "epoch": 87.62702242407039, "grad_norm": 0.0024013409856706858, "learning_rate": 1.2407891001986943e-05, "loss": 3.879144787788391e-06, "step": 308710 }, { "epoch": 87.62986091399375, "grad_norm": 0.004951354581862688, "learning_rate": 1.2405052512063581e-05, "loss": 8.678622543811798e-06, "step": 308720 }, { "epoch": 87.63269940391712, "grad_norm": 0.0004413019632920623, "learning_rate": 1.2402214022140223e-05, "loss": 8.717738091945649e-06, "step": 308730 }, { "epoch": 87.63553789384048, "grad_norm": 0.01980511285364628, "learning_rate": 1.239937553221686e-05, "loss": 2.114996314048767e-05, "step": 308740 }, { "epoch": 87.63837638376384, "grad_norm": 0.00033857583184726536, "learning_rate": 1.23965370422935e-05, "loss": 6.706453859806061e-06, "step": 308750 }, { "epoch": 87.6412148736872, "grad_norm": 0.0013171483296900988, "learning_rate": 1.239369855237014e-05, "loss": 4.163011908531189e-06, "step": 308760 }, { "epoch": 87.64405336361055, "grad_norm": 0.007053052540868521, "learning_rate": 1.2390860062446778e-05, "loss": 7.664598524570464e-06, "step": 308770 }, { "epoch": 87.64689185353392, "grad_norm": 0.0006872756639495492, "learning_rate": 1.2388021572523418e-05, "loss": 3.608502447605133e-06, "step": 308780 }, { "epoch": 87.64973034345728, "grad_norm": 0.01556087750941515, "learning_rate": 1.2385183082600057e-05, "loss": 1.746583729982376e-05, "step": 308790 }, { "epoch": 87.65256883338064, "grad_norm": 0.0005541261052712798, "learning_rate": 1.2382344592676697e-05, "loss": 7.038190960884094e-06, "step": 308800 }, { "epoch": 87.655407323304, "grad_norm": 0.0007800163002684712, "learning_rate": 1.2379506102753335e-05, "loss": 5.306117236614227e-06, "step": 308810 }, { "epoch": 87.65824581322737, "grad_norm": 0.0010376345599070191, "learning_rate": 1.2376667612829975e-05, "loss": 6.076321005821228e-06, "step": 308820 }, { "epoch": 87.66108430315073, "grad_norm": 0.004935247357934713, "learning_rate": 1.2373829122906614e-05, "loss": 6.964989006519318e-06, "step": 308830 }, { "epoch": 87.66392279307408, "grad_norm": 0.002612850395962596, "learning_rate": 1.2370990632983254e-05, "loss": 5.012564361095428e-06, "step": 308840 }, { "epoch": 87.66676128299744, "grad_norm": 0.0006012855446897447, "learning_rate": 1.2368152143059892e-05, "loss": 5.924887955188751e-06, "step": 308850 }, { "epoch": 87.6695997729208, "grad_norm": 0.002220391994342208, "learning_rate": 1.2365313653136532e-05, "loss": 5.034171044826507e-06, "step": 308860 }, { "epoch": 87.67243826284417, "grad_norm": 0.000729719118680805, "learning_rate": 1.236247516321317e-05, "loss": 7.677078247070313e-06, "step": 308870 }, { "epoch": 87.67527675276753, "grad_norm": 0.0004308847419451922, "learning_rate": 1.2359636673289811e-05, "loss": 5.511194467544556e-06, "step": 308880 }, { "epoch": 87.6781152426909, "grad_norm": 0.0007814866839908063, "learning_rate": 1.235679818336645e-05, "loss": 6.808154284954071e-06, "step": 308890 }, { "epoch": 87.68095373261426, "grad_norm": 0.00046714916243217885, "learning_rate": 1.2353959693443089e-05, "loss": 5.433335900306702e-06, "step": 308900 }, { "epoch": 87.6837922225376, "grad_norm": 0.0011838347418233752, "learning_rate": 1.2351121203519727e-05, "loss": 9.951740503311158e-06, "step": 308910 }, { "epoch": 87.68663071246097, "grad_norm": 0.0018018538830801845, "learning_rate": 1.2348282713596368e-05, "loss": 6.318464875221252e-06, "step": 308920 }, { "epoch": 87.68946920238433, "grad_norm": 0.0017178368289023638, "learning_rate": 1.2345444223673006e-05, "loss": 6.429664790630341e-06, "step": 308930 }, { "epoch": 87.6923076923077, "grad_norm": 0.0015870946226641536, "learning_rate": 1.2342605733749646e-05, "loss": 5.688145756721497e-06, "step": 308940 }, { "epoch": 87.69514618223106, "grad_norm": 0.0006256027263589203, "learning_rate": 1.2339767243826286e-05, "loss": 5.9602782130241396e-06, "step": 308950 }, { "epoch": 87.69798467215442, "grad_norm": 0.002110430970788002, "learning_rate": 1.2336928753902924e-05, "loss": 8.819997310638427e-06, "step": 308960 }, { "epoch": 87.70082316207777, "grad_norm": 0.0015858495607972145, "learning_rate": 1.2334090263979565e-05, "loss": 5.625002086162567e-06, "step": 308970 }, { "epoch": 87.70366165200113, "grad_norm": 0.000502837763633579, "learning_rate": 1.2331251774056203e-05, "loss": 4.184618592262268e-06, "step": 308980 }, { "epoch": 87.7065001419245, "grad_norm": 0.0013271033531054854, "learning_rate": 1.2328413284132843e-05, "loss": 6.153993308544159e-06, "step": 308990 }, { "epoch": 87.70933863184786, "grad_norm": 0.00543211679905653, "learning_rate": 1.232557479420948e-05, "loss": 5.617178976535797e-06, "step": 309000 }, { "epoch": 87.70933863184786, "eval_accuracy": 0.9894449036688497, "eval_loss": 0.048487402498722076, "eval_runtime": 35.768, "eval_samples_per_second": 439.694, "eval_steps_per_second": 6.878, "step": 309000 }, { "epoch": 87.71217712177122, "grad_norm": 0.0020177995320409536, "learning_rate": 1.232273630428612e-05, "loss": 7.409974932670593e-06, "step": 309010 }, { "epoch": 87.71501561169458, "grad_norm": 0.0007777427672408521, "learning_rate": 1.231989781436276e-05, "loss": 6.16125762462616e-06, "step": 309020 }, { "epoch": 87.71785410161795, "grad_norm": 0.0031337596010416746, "learning_rate": 1.23170593244394e-05, "loss": 6.516091525554657e-06, "step": 309030 }, { "epoch": 87.7206925915413, "grad_norm": 0.0005999220884405077, "learning_rate": 1.2314220834516038e-05, "loss": 4.376843571662903e-06, "step": 309040 }, { "epoch": 87.72353108146466, "grad_norm": 0.0009033414535224438, "learning_rate": 1.2311382344592677e-05, "loss": 6.184913218021393e-06, "step": 309050 }, { "epoch": 87.72636957138802, "grad_norm": 0.002744391793385148, "learning_rate": 1.2308543854669315e-05, "loss": 3.972277045249939e-06, "step": 309060 }, { "epoch": 87.72920806131138, "grad_norm": 0.0005612745881080627, "learning_rate": 1.2305705364745957e-05, "loss": 2.734363079071045e-06, "step": 309070 }, { "epoch": 87.73204655123475, "grad_norm": 0.001612278982065618, "learning_rate": 1.2302866874822595e-05, "loss": 6.234459578990936e-06, "step": 309080 }, { "epoch": 87.73488504115811, "grad_norm": 0.002177383517846465, "learning_rate": 1.2300028384899234e-05, "loss": 7.347017526626587e-06, "step": 309090 }, { "epoch": 87.73772353108147, "grad_norm": 0.0011919359676539898, "learning_rate": 1.2297189894975874e-05, "loss": 4.76352870464325e-06, "step": 309100 }, { "epoch": 87.74056202100482, "grad_norm": 0.0026460224762558937, "learning_rate": 1.2294351405052512e-05, "loss": 7.80392438173294e-06, "step": 309110 }, { "epoch": 87.74340051092818, "grad_norm": 0.0008370268042199314, "learning_rate": 1.2291512915129152e-05, "loss": 3.7923455238342285e-06, "step": 309120 }, { "epoch": 87.74623900085155, "grad_norm": 0.002696077339351177, "learning_rate": 1.2288674425205791e-05, "loss": 4.8764050006866455e-06, "step": 309130 }, { "epoch": 87.74907749077491, "grad_norm": 0.0017699386226013303, "learning_rate": 1.2285835935282431e-05, "loss": 4.673376679420471e-06, "step": 309140 }, { "epoch": 87.75191598069827, "grad_norm": 0.00530373677611351, "learning_rate": 1.2282997445359069e-05, "loss": 1.1254847049713134e-05, "step": 309150 }, { "epoch": 87.75475447062163, "grad_norm": 0.002560925669968128, "learning_rate": 1.2280158955435709e-05, "loss": 4.403479397296905e-06, "step": 309160 }, { "epoch": 87.757592960545, "grad_norm": 0.0010628896998241544, "learning_rate": 1.2277320465512348e-05, "loss": 4.782155156135559e-06, "step": 309170 }, { "epoch": 87.76043145046835, "grad_norm": 0.00030828273156657815, "learning_rate": 1.2274481975588988e-05, "loss": 5.2265822887420654e-06, "step": 309180 }, { "epoch": 87.76326994039171, "grad_norm": 0.0004385364300105721, "learning_rate": 1.2271643485665626e-05, "loss": 7.690675556659698e-06, "step": 309190 }, { "epoch": 87.76610843031507, "grad_norm": 0.0004409562679938972, "learning_rate": 1.2268804995742266e-05, "loss": 3.876909613609314e-06, "step": 309200 }, { "epoch": 87.76894692023843, "grad_norm": 0.001494675874710083, "learning_rate": 1.2265966505818904e-05, "loss": 5.885772407054901e-06, "step": 309210 }, { "epoch": 87.7717854101618, "grad_norm": 0.01311644446104765, "learning_rate": 1.2263128015895545e-05, "loss": 6.941892206668854e-06, "step": 309220 }, { "epoch": 87.77462390008516, "grad_norm": 0.0010540471412241459, "learning_rate": 1.2260289525972183e-05, "loss": 8.129701018333436e-06, "step": 309230 }, { "epoch": 87.77746239000851, "grad_norm": 0.0009869443019852042, "learning_rate": 1.2257451036048823e-05, "loss": 9.57883894443512e-06, "step": 309240 }, { "epoch": 87.78030087993187, "grad_norm": 0.003168166149407625, "learning_rate": 1.225461254612546e-05, "loss": 5.8658421039581295e-06, "step": 309250 }, { "epoch": 87.78313936985523, "grad_norm": 0.10297969728708267, "learning_rate": 1.22517740562021e-05, "loss": 2.318285405635834e-05, "step": 309260 }, { "epoch": 87.7859778597786, "grad_norm": 0.004023500252515078, "learning_rate": 1.224893556627874e-05, "loss": 1.3392418622970581e-05, "step": 309270 }, { "epoch": 87.78881634970196, "grad_norm": 0.0056161037646234035, "learning_rate": 1.224609707635538e-05, "loss": 5.263090133666992e-06, "step": 309280 }, { "epoch": 87.79165483962532, "grad_norm": 0.0009161143098026514, "learning_rate": 1.224325858643202e-05, "loss": 5.504116415977478e-06, "step": 309290 }, { "epoch": 87.79449332954869, "grad_norm": 0.0010397436562925577, "learning_rate": 1.2240420096508657e-05, "loss": 5.791895091533661e-06, "step": 309300 }, { "epoch": 87.79733181947203, "grad_norm": 0.0007995782652869821, "learning_rate": 1.2237581606585297e-05, "loss": 6.232596933841705e-06, "step": 309310 }, { "epoch": 87.8001703093954, "grad_norm": 0.000291408970952034, "learning_rate": 1.2234743116661937e-05, "loss": 6.5229833126068115e-06, "step": 309320 }, { "epoch": 87.80300879931876, "grad_norm": 0.0010033503640443087, "learning_rate": 1.2231904626738577e-05, "loss": 3.896839916706085e-06, "step": 309330 }, { "epoch": 87.80584728924212, "grad_norm": 0.0037017720751464367, "learning_rate": 1.2229066136815214e-05, "loss": 1.4506839215755463e-05, "step": 309340 }, { "epoch": 87.80868577916549, "grad_norm": 0.000566719623748213, "learning_rate": 1.2226227646891854e-05, "loss": 6.998516619205475e-06, "step": 309350 }, { "epoch": 87.81152426908885, "grad_norm": 0.00042408800800330937, "learning_rate": 1.2223389156968492e-05, "loss": 5.271658301353454e-06, "step": 309360 }, { "epoch": 87.81436275901221, "grad_norm": 0.0032696041744202375, "learning_rate": 1.2220550667045134e-05, "loss": 4.191510379314423e-06, "step": 309370 }, { "epoch": 87.81720124893556, "grad_norm": 0.013501198962330818, "learning_rate": 1.2217712177121772e-05, "loss": 9.875372052192688e-06, "step": 309380 }, { "epoch": 87.82003973885892, "grad_norm": 0.0003397018008399755, "learning_rate": 1.2214873687198411e-05, "loss": 5.357898771762848e-06, "step": 309390 }, { "epoch": 87.82287822878229, "grad_norm": 0.0025815716944634914, "learning_rate": 1.221203519727505e-05, "loss": 6.576254963874817e-06, "step": 309400 }, { "epoch": 87.82571671870565, "grad_norm": 0.0013945966493338346, "learning_rate": 1.2209196707351689e-05, "loss": 5.193240940570831e-06, "step": 309410 }, { "epoch": 87.82855520862901, "grad_norm": 0.0003910214873030782, "learning_rate": 1.2206358217428329e-05, "loss": 6.0247257351875305e-06, "step": 309420 }, { "epoch": 87.83139369855238, "grad_norm": 0.00048512493958696723, "learning_rate": 1.2203519727504968e-05, "loss": 7.799640297889709e-06, "step": 309430 }, { "epoch": 87.83423218847572, "grad_norm": 0.001322106458246708, "learning_rate": 1.2200681237581608e-05, "loss": 8.557364344596862e-06, "step": 309440 }, { "epoch": 87.83707067839909, "grad_norm": 0.0018751017050817609, "learning_rate": 1.2197842747658246e-05, "loss": 5.257129669189453e-06, "step": 309450 }, { "epoch": 87.83990916832245, "grad_norm": 0.0006360372062772512, "learning_rate": 1.2195004257734886e-05, "loss": 7.325597107410431e-06, "step": 309460 }, { "epoch": 87.84274765824581, "grad_norm": 0.0002823819231707603, "learning_rate": 1.2192165767811525e-05, "loss": 7.068924605846405e-06, "step": 309470 }, { "epoch": 87.84558614816918, "grad_norm": 0.0015127341030165553, "learning_rate": 1.2189327277888165e-05, "loss": 8.80453735589981e-06, "step": 309480 }, { "epoch": 87.84842463809254, "grad_norm": 0.0016577339265495539, "learning_rate": 1.2186488787964803e-05, "loss": 5.156546831130982e-06, "step": 309490 }, { "epoch": 87.8512631280159, "grad_norm": 0.0010371318785473704, "learning_rate": 1.2183650298041443e-05, "loss": 7.577799260616303e-06, "step": 309500 }, { "epoch": 87.8512631280159, "eval_accuracy": 0.9889362243275895, "eval_loss": 0.04874683916568756, "eval_runtime": 36.3404, "eval_samples_per_second": 432.769, "eval_steps_per_second": 6.769, "step": 309500 }, { "epoch": 87.85410161793925, "grad_norm": 0.000919072306714952, "learning_rate": 1.218081180811808e-05, "loss": 5.476176738739014e-06, "step": 309510 }, { "epoch": 87.85694010786261, "grad_norm": 0.002063718158751726, "learning_rate": 1.2177973318194722e-05, "loss": 3.6908313632011414e-06, "step": 309520 }, { "epoch": 87.85977859778598, "grad_norm": 0.0012249965220689774, "learning_rate": 1.217513482827136e-05, "loss": 7.832050323486328e-06, "step": 309530 }, { "epoch": 87.86261708770934, "grad_norm": 0.0006915060221217573, "learning_rate": 1.2172296338348e-05, "loss": 8.862093091011048e-06, "step": 309540 }, { "epoch": 87.8654555776327, "grad_norm": 0.0019685663282871246, "learning_rate": 1.2169457848424638e-05, "loss": 1.3423524796962738e-05, "step": 309550 }, { "epoch": 87.86829406755606, "grad_norm": 0.0008831890299916267, "learning_rate": 1.2166619358501279e-05, "loss": 3.6168843507766723e-06, "step": 309560 }, { "epoch": 87.87113255747943, "grad_norm": 0.0047881463542580605, "learning_rate": 1.2163780868577917e-05, "loss": 7.195398211479187e-06, "step": 309570 }, { "epoch": 87.87397104740278, "grad_norm": 0.001571895438246429, "learning_rate": 1.2160942378654557e-05, "loss": 4.182197153568268e-06, "step": 309580 }, { "epoch": 87.87680953732614, "grad_norm": 0.0031667693983763456, "learning_rate": 1.2158103888731195e-05, "loss": 7.748417556285857e-06, "step": 309590 }, { "epoch": 87.8796480272495, "grad_norm": 0.0010187511797994375, "learning_rate": 1.2155265398807834e-05, "loss": 1.1505372822284698e-05, "step": 309600 }, { "epoch": 87.88248651717286, "grad_norm": 0.0007465626113116741, "learning_rate": 1.2152426908884474e-05, "loss": 3.703683614730835e-06, "step": 309610 }, { "epoch": 87.88532500709623, "grad_norm": 0.004054366610944271, "learning_rate": 1.2149588418961114e-05, "loss": 4.60110604763031e-06, "step": 309620 }, { "epoch": 87.88816349701959, "grad_norm": 0.001564034610055387, "learning_rate": 1.2146749929037753e-05, "loss": 7.873587310314179e-06, "step": 309630 }, { "epoch": 87.89100198694295, "grad_norm": 0.0015814483631402254, "learning_rate": 1.2143911439114391e-05, "loss": 1.0188296437263489e-05, "step": 309640 }, { "epoch": 87.8938404768663, "grad_norm": 0.001396847772412002, "learning_rate": 1.2141072949191031e-05, "loss": 5.186162889003754e-06, "step": 309650 }, { "epoch": 87.89667896678966, "grad_norm": 0.00041092088213190436, "learning_rate": 1.213823445926767e-05, "loss": 6.2607228755950924e-06, "step": 309660 }, { "epoch": 87.89951745671303, "grad_norm": 0.0034072131384164095, "learning_rate": 1.213539596934431e-05, "loss": 8.707866072654724e-06, "step": 309670 }, { "epoch": 87.90235594663639, "grad_norm": 0.00037083361530676484, "learning_rate": 1.2132557479420948e-05, "loss": 4.161335527896881e-06, "step": 309680 }, { "epoch": 87.90519443655975, "grad_norm": 0.0036818061489611864, "learning_rate": 1.2129718989497588e-05, "loss": 8.328817784786224e-06, "step": 309690 }, { "epoch": 87.90803292648312, "grad_norm": 0.005699770990759134, "learning_rate": 1.2126880499574226e-05, "loss": 3.8113445043563844e-06, "step": 309700 }, { "epoch": 87.91087141640647, "grad_norm": 0.00044932743185199797, "learning_rate": 1.2124042009650867e-05, "loss": 5.702674388885498e-06, "step": 309710 }, { "epoch": 87.91370990632983, "grad_norm": 0.003564999671652913, "learning_rate": 1.2121203519727505e-05, "loss": 4.1764229536056515e-06, "step": 309720 }, { "epoch": 87.91654839625319, "grad_norm": 0.0008918644161894917, "learning_rate": 1.2118365029804145e-05, "loss": 5.974061787128449e-06, "step": 309730 }, { "epoch": 87.91938688617655, "grad_norm": 0.0020162053406238556, "learning_rate": 1.2115526539880783e-05, "loss": 8.213892579078675e-06, "step": 309740 }, { "epoch": 87.92222537609992, "grad_norm": 0.0006392770446836948, "learning_rate": 1.2112688049957423e-05, "loss": 1.3940595090389252e-05, "step": 309750 }, { "epoch": 87.92506386602328, "grad_norm": 0.0027520686853677034, "learning_rate": 1.2109849560034062e-05, "loss": 5.398504436016083e-06, "step": 309760 }, { "epoch": 87.92790235594664, "grad_norm": 0.0031720467377454042, "learning_rate": 1.2107011070110702e-05, "loss": 7.289834320545196e-06, "step": 309770 }, { "epoch": 87.93074084586999, "grad_norm": 0.0016674366779625416, "learning_rate": 1.210417258018734e-05, "loss": 9.766779839992523e-06, "step": 309780 }, { "epoch": 87.93357933579335, "grad_norm": 0.0012842192081734538, "learning_rate": 1.210133409026398e-05, "loss": 7.662177085876466e-06, "step": 309790 }, { "epoch": 87.93641782571672, "grad_norm": 0.0005065588629804552, "learning_rate": 1.209849560034062e-05, "loss": 5.460157990455628e-06, "step": 309800 }, { "epoch": 87.93925631564008, "grad_norm": 0.0019238266395404935, "learning_rate": 1.209565711041726e-05, "loss": 6.254762411117554e-06, "step": 309810 }, { "epoch": 87.94209480556344, "grad_norm": 0.001194869284518063, "learning_rate": 1.2092818620493899e-05, "loss": 6.3139945268630985e-06, "step": 309820 }, { "epoch": 87.9449332954868, "grad_norm": 0.0010477205505594611, "learning_rate": 1.2089980130570537e-05, "loss": 7.350929081439972e-06, "step": 309830 }, { "epoch": 87.94777178541017, "grad_norm": 0.027070553973317146, "learning_rate": 1.2087141640647177e-05, "loss": 9.397603571414948e-06, "step": 309840 }, { "epoch": 87.95061027533352, "grad_norm": 0.001715984777547419, "learning_rate": 1.2084303150723814e-05, "loss": 1.0537542402744294e-05, "step": 309850 }, { "epoch": 87.95344876525688, "grad_norm": 0.002381306141614914, "learning_rate": 1.2081464660800456e-05, "loss": 2.114381641149521e-05, "step": 309860 }, { "epoch": 87.95628725518024, "grad_norm": 0.05219043791294098, "learning_rate": 1.2078626170877094e-05, "loss": 1.5533342957496642e-05, "step": 309870 }, { "epoch": 87.9591257451036, "grad_norm": 0.00010786082566482946, "learning_rate": 1.2075787680953734e-05, "loss": 1.117754727602005e-05, "step": 309880 }, { "epoch": 87.96196423502697, "grad_norm": 0.0002999699499923736, "learning_rate": 1.2072949191030372e-05, "loss": 1.053065061569214e-05, "step": 309890 }, { "epoch": 87.96480272495033, "grad_norm": 0.0014119880506768823, "learning_rate": 1.2070110701107011e-05, "loss": 4.8907473683357235e-06, "step": 309900 }, { "epoch": 87.96764121487368, "grad_norm": 0.006325399968773127, "learning_rate": 1.2067272211183651e-05, "loss": 6.8409368395805355e-06, "step": 309910 }, { "epoch": 87.97047970479704, "grad_norm": 0.0011233427794650197, "learning_rate": 1.206443372126029e-05, "loss": 1.2340210378170014e-05, "step": 309920 }, { "epoch": 87.9733181947204, "grad_norm": 0.0006449701031669974, "learning_rate": 1.2061595231336929e-05, "loss": 5.113333463668823e-06, "step": 309930 }, { "epoch": 87.97615668464377, "grad_norm": 0.004161448683589697, "learning_rate": 1.2058756741413568e-05, "loss": 6.068684160709381e-06, "step": 309940 }, { "epoch": 87.97899517456713, "grad_norm": 0.0015126664657145739, "learning_rate": 1.2055918251490208e-05, "loss": 8.688494563102723e-06, "step": 309950 }, { "epoch": 87.9818336644905, "grad_norm": 0.0010768447536975145, "learning_rate": 1.2053079761566848e-05, "loss": 4.1801482439041134e-06, "step": 309960 }, { "epoch": 87.98467215441386, "grad_norm": 0.0012709297006949782, "learning_rate": 1.2050241271643487e-05, "loss": 3.6956742405891417e-06, "step": 309970 }, { "epoch": 87.9875106443372, "grad_norm": 0.002390065463259816, "learning_rate": 1.2047402781720125e-05, "loss": 4.573911428451538e-06, "step": 309980 }, { "epoch": 87.99034913426057, "grad_norm": 0.001089269993826747, "learning_rate": 1.2044564291796765e-05, "loss": 6.189011037349701e-06, "step": 309990 }, { "epoch": 87.99318762418393, "grad_norm": 0.0032919885125011206, "learning_rate": 1.2041725801873403e-05, "loss": 6.854534149169922e-06, "step": 310000 }, { "epoch": 87.99318762418393, "eval_accuracy": 0.988872639409932, "eval_loss": 0.04979044571518898, "eval_runtime": 35.8768, "eval_samples_per_second": 438.361, "eval_steps_per_second": 6.857, "step": 310000 }, { "epoch": 87.9960261141073, "grad_norm": 0.0004756319976877421, "learning_rate": 1.2038887311950044e-05, "loss": 4.416704177856445e-06, "step": 310010 }, { "epoch": 87.99886460403066, "grad_norm": 0.0020457087084650993, "learning_rate": 1.2036048822026682e-05, "loss": 1.0318867862224578e-05, "step": 310020 }, { "epoch": 88.00170309395402, "grad_norm": 0.0014463047264143825, "learning_rate": 1.2033210332103322e-05, "loss": 4.085881664650515e-06, "step": 310030 }, { "epoch": 88.00454158387738, "grad_norm": 0.0018812716007232666, "learning_rate": 1.203037184217996e-05, "loss": 6.3071027398109434e-06, "step": 310040 }, { "epoch": 88.00738007380073, "grad_norm": 0.00085917190881446, "learning_rate": 1.20275333522566e-05, "loss": 4.54094260931015e-06, "step": 310050 }, { "epoch": 88.0102185637241, "grad_norm": 0.001328235142864287, "learning_rate": 1.202469486233324e-05, "loss": 1.3185478746891021e-05, "step": 310060 }, { "epoch": 88.01305705364746, "grad_norm": 0.009494055993855, "learning_rate": 1.2021856372409879e-05, "loss": 5.182623863220215e-06, "step": 310070 }, { "epoch": 88.01589554357082, "grad_norm": 0.0010647921590134501, "learning_rate": 1.2019017882486517e-05, "loss": 8.69259238243103e-06, "step": 310080 }, { "epoch": 88.01873403349418, "grad_norm": 0.0017227292992174625, "learning_rate": 1.2016179392563157e-05, "loss": 8.43685120344162e-06, "step": 310090 }, { "epoch": 88.02157252341755, "grad_norm": 0.0011137896217405796, "learning_rate": 1.2013340902639796e-05, "loss": 4.5889988541603085e-06, "step": 310100 }, { "epoch": 88.02441101334091, "grad_norm": 0.002266068011522293, "learning_rate": 1.2010502412716436e-05, "loss": 7.510371506214142e-06, "step": 310110 }, { "epoch": 88.02724950326426, "grad_norm": 0.0021680649369955063, "learning_rate": 1.2007663922793074e-05, "loss": 7.193349301815033e-06, "step": 310120 }, { "epoch": 88.03008799318762, "grad_norm": 0.010424858890473843, "learning_rate": 1.2004825432869714e-05, "loss": 5.3388997912406925e-06, "step": 310130 }, { "epoch": 88.03292648311098, "grad_norm": 0.012115629389882088, "learning_rate": 1.2001986942946353e-05, "loss": 6.98678195476532e-06, "step": 310140 }, { "epoch": 88.03576497303435, "grad_norm": 0.0021407115273177624, "learning_rate": 1.1999148453022991e-05, "loss": 5.605816841125488e-06, "step": 310150 }, { "epoch": 88.03860346295771, "grad_norm": 0.0015737077919766307, "learning_rate": 1.1996309963099633e-05, "loss": 2.069380134344101e-05, "step": 310160 }, { "epoch": 88.04144195288107, "grad_norm": 0.002912272699177265, "learning_rate": 1.199347147317627e-05, "loss": 0.00015151258558034896, "step": 310170 }, { "epoch": 88.04428044280442, "grad_norm": 0.001502651022747159, "learning_rate": 1.199063298325291e-05, "loss": 0.0015826968476176262, "step": 310180 }, { "epoch": 88.04711893272778, "grad_norm": 0.021610328927636147, "learning_rate": 1.1987794493329548e-05, "loss": 1.2576393783092498e-05, "step": 310190 }, { "epoch": 88.04995742265115, "grad_norm": 0.004200757946819067, "learning_rate": 1.1984956003406188e-05, "loss": 0.002633134461939335, "step": 310200 }, { "epoch": 88.05279591257451, "grad_norm": 0.0060583557933568954, "learning_rate": 1.1982117513482828e-05, "loss": 0.0001824731007218361, "step": 310210 }, { "epoch": 88.05563440249787, "grad_norm": 0.0018391861813142896, "learning_rate": 1.1979279023559467e-05, "loss": 5.826856940984726e-05, "step": 310220 }, { "epoch": 88.05847289242124, "grad_norm": 0.0014166315086185932, "learning_rate": 1.1976440533636105e-05, "loss": 0.0003297792747616768, "step": 310230 }, { "epoch": 88.0613113823446, "grad_norm": 0.03199819475412369, "learning_rate": 1.1973602043712745e-05, "loss": 0.00011912882328033448, "step": 310240 }, { "epoch": 88.06414987226795, "grad_norm": 0.24509969353675842, "learning_rate": 1.1971047402781722e-05, "loss": 0.010212814062833786, "step": 310250 }, { "epoch": 88.06698836219131, "grad_norm": 0.0013864842476323247, "learning_rate": 1.196820891285836e-05, "loss": 2.451762557029724e-05, "step": 310260 }, { "epoch": 88.06982685211467, "grad_norm": 0.0027416462544351816, "learning_rate": 1.1965370422935e-05, "loss": 4.4541247189044955e-05, "step": 310270 }, { "epoch": 88.07266534203804, "grad_norm": 0.0061348541639745235, "learning_rate": 1.1962531933011637e-05, "loss": 0.0047793429344892505, "step": 310280 }, { "epoch": 88.0755038319614, "grad_norm": 0.0020149867050349712, "learning_rate": 1.1959693443088279e-05, "loss": 4.649907350540161e-05, "step": 310290 }, { "epoch": 88.07834232188476, "grad_norm": 0.001799452700652182, "learning_rate": 1.1956854953164917e-05, "loss": 0.00014550555497407913, "step": 310300 }, { "epoch": 88.08118081180812, "grad_norm": 0.005927516147494316, "learning_rate": 1.1954016463241556e-05, "loss": 0.00011232756078243256, "step": 310310 }, { "epoch": 88.08401930173147, "grad_norm": 0.007535212207585573, "learning_rate": 1.1951177973318194e-05, "loss": 0.0001734299585223198, "step": 310320 }, { "epoch": 88.08685779165484, "grad_norm": 0.0066594709642231464, "learning_rate": 1.1948339483394834e-05, "loss": 0.000165533646941185, "step": 310330 }, { "epoch": 88.0896962815782, "grad_norm": 0.003913396038115025, "learning_rate": 1.1945500993471474e-05, "loss": 3.577042371034622e-05, "step": 310340 }, { "epoch": 88.09253477150156, "grad_norm": 0.014220607466995716, "learning_rate": 1.1942662503548113e-05, "loss": 2.6472099125385285e-05, "step": 310350 }, { "epoch": 88.09537326142492, "grad_norm": 0.041141267865896225, "learning_rate": 1.1939824013624751e-05, "loss": 3.488883376121521e-05, "step": 310360 }, { "epoch": 88.09821175134829, "grad_norm": 0.002242441289126873, "learning_rate": 1.1936985523701391e-05, "loss": 2.3177079856395722e-05, "step": 310370 }, { "epoch": 88.10105024127165, "grad_norm": 0.0033038316760212183, "learning_rate": 1.193414703377803e-05, "loss": 3.805011510848999e-05, "step": 310380 }, { "epoch": 88.103888731195, "grad_norm": 0.004501772113144398, "learning_rate": 1.193130854385467e-05, "loss": 0.00026791300624608996, "step": 310390 }, { "epoch": 88.10672722111836, "grad_norm": 0.0009065655758604407, "learning_rate": 1.1928753902923645e-05, "loss": 0.001828249730169773, "step": 310400 }, { "epoch": 88.10956571104172, "grad_norm": 0.014328764751553535, "learning_rate": 1.1925915413000283e-05, "loss": 9.939167648553848e-05, "step": 310410 }, { "epoch": 88.11240420096509, "grad_norm": 0.004209865815937519, "learning_rate": 1.1923076923076925e-05, "loss": 3.271382302045822e-05, "step": 310420 }, { "epoch": 88.11524269088845, "grad_norm": 0.007344994693994522, "learning_rate": 1.1920238433153563e-05, "loss": 4.9034133553504944e-05, "step": 310430 }, { "epoch": 88.11808118081181, "grad_norm": 0.003128504380583763, "learning_rate": 1.1917399943230202e-05, "loss": 2.2129155695438385e-05, "step": 310440 }, { "epoch": 88.12091967073516, "grad_norm": 0.0003539593890309334, "learning_rate": 1.1914561453306842e-05, "loss": 5.885716527700424e-05, "step": 310450 }, { "epoch": 88.12375816065853, "grad_norm": 0.001884007710032165, "learning_rate": 1.191172296338348e-05, "loss": 9.078159928321839e-06, "step": 310460 }, { "epoch": 88.12659665058189, "grad_norm": 0.003865548875182867, "learning_rate": 1.190888447346012e-05, "loss": 4.705172032117844e-05, "step": 310470 }, { "epoch": 88.12943514050525, "grad_norm": 0.02977333590388298, "learning_rate": 1.190604598353676e-05, "loss": 1.9811280071735383e-05, "step": 310480 }, { "epoch": 88.13227363042861, "grad_norm": 0.001322967349551618, "learning_rate": 1.1903207493613399e-05, "loss": 1.5492923557758333e-05, "step": 310490 }, { "epoch": 88.13511212035198, "grad_norm": 0.04046309366822243, "learning_rate": 1.1900369003690037e-05, "loss": 2.7168914675712587e-05, "step": 310500 }, { "epoch": 88.13511212035198, "eval_accuracy": 0.9863292427036306, "eval_loss": 0.05742533132433891, "eval_runtime": 35.6482, "eval_samples_per_second": 441.172, "eval_steps_per_second": 6.901, "step": 310500 }, { "epoch": 88.13795061027534, "grad_norm": 0.002434880705550313, "learning_rate": 1.1897530513766677e-05, "loss": 8.98987054824829e-06, "step": 310510 }, { "epoch": 88.14078910019869, "grad_norm": 0.0037490171380341053, "learning_rate": 1.1894692023843316e-05, "loss": 8.200481534004212e-06, "step": 310520 }, { "epoch": 88.14362759012205, "grad_norm": 0.0028425005730241537, "learning_rate": 1.1891853533919956e-05, "loss": 7.182937115430832e-05, "step": 310530 }, { "epoch": 88.14646608004541, "grad_norm": 0.10817206650972366, "learning_rate": 1.1889015043996594e-05, "loss": 0.00010503008961677551, "step": 310540 }, { "epoch": 88.14930456996878, "grad_norm": 0.0059608411975204945, "learning_rate": 1.1886176554073234e-05, "loss": 5.344096571207047e-05, "step": 310550 }, { "epoch": 88.15214305989214, "grad_norm": 0.0019105605315417051, "learning_rate": 1.1883338064149872e-05, "loss": 1.4472566545009612e-05, "step": 310560 }, { "epoch": 88.1549815498155, "grad_norm": 0.004694353323429823, "learning_rate": 1.1880499574226513e-05, "loss": 0.0001859772950410843, "step": 310570 }, { "epoch": 88.15782003973887, "grad_norm": 0.02691166289150715, "learning_rate": 1.1877661084303151e-05, "loss": 1.635141670703888e-05, "step": 310580 }, { "epoch": 88.16065852966221, "grad_norm": 0.03221261501312256, "learning_rate": 1.187482259437979e-05, "loss": 0.002140796184539795, "step": 310590 }, { "epoch": 88.16349701958558, "grad_norm": 0.0013944882666692138, "learning_rate": 1.1871984104456429e-05, "loss": 2.202950417995453e-05, "step": 310600 }, { "epoch": 88.16633550950894, "grad_norm": 0.0037827230989933014, "learning_rate": 1.1869145614533068e-05, "loss": 4.908088594675064e-05, "step": 310610 }, { "epoch": 88.1691739994323, "grad_norm": 0.0030986550264060497, "learning_rate": 1.1866307124609708e-05, "loss": 2.0736083388328553e-05, "step": 310620 }, { "epoch": 88.17201248935567, "grad_norm": 0.0009601112687960267, "learning_rate": 1.1863468634686348e-05, "loss": 1.2530200183391572e-05, "step": 310630 }, { "epoch": 88.17485097927903, "grad_norm": 1.1384577751159668, "learning_rate": 1.1860630144762987e-05, "loss": 0.00012012459337711334, "step": 310640 }, { "epoch": 88.17768946920238, "grad_norm": 0.0035086062271147966, "learning_rate": 1.1857791654839625e-05, "loss": 4.901587963104248e-05, "step": 310650 }, { "epoch": 88.18052795912574, "grad_norm": 0.0004843443457502872, "learning_rate": 1.1854953164916265e-05, "loss": 1.7120316624641418e-05, "step": 310660 }, { "epoch": 88.1833664490491, "grad_norm": 0.007365719880908728, "learning_rate": 1.1852114674992905e-05, "loss": 6.09174370765686e-05, "step": 310670 }, { "epoch": 88.18620493897247, "grad_norm": 0.0016174106858670712, "learning_rate": 1.1849276185069544e-05, "loss": 1.4815665781497956e-05, "step": 310680 }, { "epoch": 88.18904342889583, "grad_norm": 0.0037819126155227423, "learning_rate": 1.1846437695146182e-05, "loss": 2.0690634846687318e-05, "step": 310690 }, { "epoch": 88.19188191881919, "grad_norm": 0.008490076288580894, "learning_rate": 1.1843599205222822e-05, "loss": 0.0002818293869495392, "step": 310700 }, { "epoch": 88.19472040874255, "grad_norm": 0.000781192269641906, "learning_rate": 1.184076071529946e-05, "loss": 0.003537912294268608, "step": 310710 }, { "epoch": 88.1975588986659, "grad_norm": 0.0010019725887104869, "learning_rate": 1.1837922225376102e-05, "loss": 1.3034231960773468e-05, "step": 310720 }, { "epoch": 88.20039738858927, "grad_norm": 0.003496067365631461, "learning_rate": 1.183508373545274e-05, "loss": 1.8342584371566773e-05, "step": 310730 }, { "epoch": 88.20323587851263, "grad_norm": 0.0011953793000429869, "learning_rate": 1.183224524552938e-05, "loss": 0.0003564838320016861, "step": 310740 }, { "epoch": 88.20607436843599, "grad_norm": 0.0016742221778258681, "learning_rate": 1.1829406755606017e-05, "loss": 1.2149848043918609e-05, "step": 310750 }, { "epoch": 88.20891285835935, "grad_norm": 0.014035950414836407, "learning_rate": 1.1826568265682657e-05, "loss": 2.1938420832157136e-05, "step": 310760 }, { "epoch": 88.21175134828272, "grad_norm": 0.01130589284002781, "learning_rate": 1.1823729775759297e-05, "loss": 0.0018322139978408814, "step": 310770 }, { "epoch": 88.21458983820608, "grad_norm": 0.02055533602833748, "learning_rate": 1.1820891285835936e-05, "loss": 1.1850520968437194e-05, "step": 310780 }, { "epoch": 88.21742832812943, "grad_norm": 0.0013065977254882455, "learning_rate": 1.1818052795912574e-05, "loss": 0.0008957959711551666, "step": 310790 }, { "epoch": 88.22026681805279, "grad_norm": 0.3313116431236267, "learning_rate": 1.1815214305989214e-05, "loss": 3.654658794403076e-05, "step": 310800 }, { "epoch": 88.22310530797616, "grad_norm": 0.013681859709322453, "learning_rate": 1.1812375816065854e-05, "loss": 0.0007160441949963569, "step": 310810 }, { "epoch": 88.22594379789952, "grad_norm": 0.005500276573002338, "learning_rate": 1.1809537326142493e-05, "loss": 7.409807294607162e-05, "step": 310820 }, { "epoch": 88.22878228782288, "grad_norm": 0.004041893873363733, "learning_rate": 1.1806698836219133e-05, "loss": 3.2920204102993014e-05, "step": 310830 }, { "epoch": 88.23162077774624, "grad_norm": 0.08582139760255814, "learning_rate": 1.1803860346295771e-05, "loss": 2.9042176902294158e-05, "step": 310840 }, { "epoch": 88.2344592676696, "grad_norm": 0.001446784008294344, "learning_rate": 1.180102185637241e-05, "loss": 1.8933601677417754e-05, "step": 310850 }, { "epoch": 88.23729775759296, "grad_norm": 0.0017446837155148387, "learning_rate": 1.1798183366449049e-05, "loss": 7.083248347043991e-05, "step": 310860 }, { "epoch": 88.24013624751632, "grad_norm": 0.15353526175022125, "learning_rate": 1.179534487652569e-05, "loss": 2.3481063544750212e-05, "step": 310870 }, { "epoch": 88.24297473743968, "grad_norm": 0.006685703992843628, "learning_rate": 1.1792506386602328e-05, "loss": 1.4565140008926392e-05, "step": 310880 }, { "epoch": 88.24581322736304, "grad_norm": 0.004270540084689856, "learning_rate": 1.1789667896678968e-05, "loss": 1.0947883129119874e-05, "step": 310890 }, { "epoch": 88.2486517172864, "grad_norm": 0.015592792071402073, "learning_rate": 1.1786829406755606e-05, "loss": 1.3823993504047393e-05, "step": 310900 }, { "epoch": 88.25149020720977, "grad_norm": 0.003057905938476324, "learning_rate": 1.1783990916832245e-05, "loss": 1.232735812664032e-05, "step": 310910 }, { "epoch": 88.25432869713312, "grad_norm": 0.005672561936080456, "learning_rate": 1.1781152426908885e-05, "loss": 1.4466419816017152e-05, "step": 310920 }, { "epoch": 88.25716718705648, "grad_norm": 0.0014109876938164234, "learning_rate": 1.1778313936985525e-05, "loss": 8.020997047424317e-05, "step": 310930 }, { "epoch": 88.26000567697984, "grad_norm": 0.001968038035556674, "learning_rate": 1.1775475447062163e-05, "loss": 1.4812685549259186e-05, "step": 310940 }, { "epoch": 88.26284416690321, "grad_norm": 0.033050909638404846, "learning_rate": 1.1772636957138802e-05, "loss": 7.373411208391189e-05, "step": 310950 }, { "epoch": 88.26568265682657, "grad_norm": 0.0004598122905008495, "learning_rate": 1.1769798467215442e-05, "loss": 1.6555935144424438e-05, "step": 310960 }, { "epoch": 88.26852114674993, "grad_norm": 0.0008883204427547753, "learning_rate": 1.1766959977292082e-05, "loss": 1.1138245463371277e-05, "step": 310970 }, { "epoch": 88.2713596366733, "grad_norm": 0.00568748451769352, "learning_rate": 1.1764121487368721e-05, "loss": 2.0251236855983733e-05, "step": 310980 }, { "epoch": 88.27419812659664, "grad_norm": 0.0018845899030566216, "learning_rate": 1.176128299744536e-05, "loss": 1.1065974831581116e-05, "step": 310990 }, { "epoch": 88.27703661652001, "grad_norm": 0.027574844658374786, "learning_rate": 1.1758444507521999e-05, "loss": 1.3081170618534087e-05, "step": 311000 }, { "epoch": 88.27703661652001, "eval_accuracy": 0.9873466013861512, "eval_loss": 0.05676136910915375, "eval_runtime": 35.8624, "eval_samples_per_second": 438.537, "eval_steps_per_second": 6.86, "step": 311000 }, { "epoch": 88.27987510644337, "grad_norm": 0.00650312565267086, "learning_rate": 1.1755606017598639e-05, "loss": 1.1405907571315765e-05, "step": 311010 }, { "epoch": 88.28271359636673, "grad_norm": 0.0015209114644676447, "learning_rate": 1.1752767527675278e-05, "loss": 1.4515221118927002e-05, "step": 311020 }, { "epoch": 88.2855520862901, "grad_norm": 0.0038594186771661043, "learning_rate": 1.1749929037751916e-05, "loss": 8.688680827617645e-06, "step": 311030 }, { "epoch": 88.28839057621346, "grad_norm": 0.0016693603247404099, "learning_rate": 1.1747090547828556e-05, "loss": 6.6677108407020565e-06, "step": 311040 }, { "epoch": 88.29122906613682, "grad_norm": 0.034242432564496994, "learning_rate": 1.1744252057905194e-05, "loss": 5.658380687236786e-05, "step": 311050 }, { "epoch": 88.29406755606017, "grad_norm": 0.0012225464452058077, "learning_rate": 1.1741413567981835e-05, "loss": 8.728355169296265e-06, "step": 311060 }, { "epoch": 88.29690604598353, "grad_norm": 0.00231204298324883, "learning_rate": 1.1738575078058473e-05, "loss": 3.0518509447574615e-05, "step": 311070 }, { "epoch": 88.2997445359069, "grad_norm": 0.0019648035522550344, "learning_rate": 1.1735736588135113e-05, "loss": 1.0660849511623383e-05, "step": 311080 }, { "epoch": 88.30258302583026, "grad_norm": 0.0009073036490008235, "learning_rate": 1.1732898098211751e-05, "loss": 1.6289204359054567e-05, "step": 311090 }, { "epoch": 88.30542151575362, "grad_norm": 0.0015294513432309031, "learning_rate": 1.173005960828839e-05, "loss": 7.208436727523804e-06, "step": 311100 }, { "epoch": 88.30826000567698, "grad_norm": 0.00040093230200000107, "learning_rate": 1.172722111836503e-05, "loss": 1.6200914978981017e-05, "step": 311110 }, { "epoch": 88.31109849560033, "grad_norm": 0.0019216666696593165, "learning_rate": 1.172438262844167e-05, "loss": 1.7281807959079743e-05, "step": 311120 }, { "epoch": 88.3139369855237, "grad_norm": 0.004031251650303602, "learning_rate": 1.1721544138518308e-05, "loss": 8.904002606868744e-06, "step": 311130 }, { "epoch": 88.31677547544706, "grad_norm": 0.0025498229078948498, "learning_rate": 1.1718705648594948e-05, "loss": 7.87489116191864e-06, "step": 311140 }, { "epoch": 88.31961396537042, "grad_norm": 0.013624900951981544, "learning_rate": 1.1715867158671587e-05, "loss": 1.710541546344757e-05, "step": 311150 }, { "epoch": 88.32245245529379, "grad_norm": 0.0012755153002217412, "learning_rate": 1.1713028668748227e-05, "loss": 1.467689871788025e-05, "step": 311160 }, { "epoch": 88.32529094521715, "grad_norm": 0.0028416570276021957, "learning_rate": 1.1710190178824867e-05, "loss": 7.656589150428773e-06, "step": 311170 }, { "epoch": 88.32812943514051, "grad_norm": 0.0010621987748891115, "learning_rate": 1.1707351688901505e-05, "loss": 1.0298378765583038e-05, "step": 311180 }, { "epoch": 88.33096792506386, "grad_norm": 0.0005467051523737609, "learning_rate": 1.1704513198978144e-05, "loss": 2.6158802211284637e-05, "step": 311190 }, { "epoch": 88.33380641498722, "grad_norm": 0.0010594911873340607, "learning_rate": 1.1701674709054782e-05, "loss": 8.006207644939423e-06, "step": 311200 }, { "epoch": 88.33664490491059, "grad_norm": 0.0028799385763704777, "learning_rate": 1.1698836219131424e-05, "loss": 6.526894867420196e-06, "step": 311210 }, { "epoch": 88.33948339483395, "grad_norm": 0.001851809793151915, "learning_rate": 1.1695997729208062e-05, "loss": 5.5335462093353275e-06, "step": 311220 }, { "epoch": 88.34232188475731, "grad_norm": 0.002280697925016284, "learning_rate": 1.1693159239284702e-05, "loss": 8.375756442546844e-06, "step": 311230 }, { "epoch": 88.34516037468067, "grad_norm": 0.0029286558274179697, "learning_rate": 1.169032074936134e-05, "loss": 7.646158337593079e-06, "step": 311240 }, { "epoch": 88.34799886460404, "grad_norm": 0.0018923439783975482, "learning_rate": 1.168748225943798e-05, "loss": 7.270276546478271e-06, "step": 311250 }, { "epoch": 88.35083735452739, "grad_norm": 0.0007556392811238766, "learning_rate": 1.1684643769514619e-05, "loss": 5.7656317949295046e-06, "step": 311260 }, { "epoch": 88.35367584445075, "grad_norm": 0.006973192095756531, "learning_rate": 1.1681805279591259e-05, "loss": 3.697909414768219e-05, "step": 311270 }, { "epoch": 88.35651433437411, "grad_norm": 0.0003802731225732714, "learning_rate": 1.1678966789667897e-05, "loss": 6.625056266784668e-06, "step": 311280 }, { "epoch": 88.35935282429747, "grad_norm": 0.0008548288606107235, "learning_rate": 1.1676128299744536e-05, "loss": 7.568113505840301e-06, "step": 311290 }, { "epoch": 88.36219131422084, "grad_norm": 0.0013265418820083141, "learning_rate": 1.1673289809821176e-05, "loss": 8.109025657176971e-06, "step": 311300 }, { "epoch": 88.3650298041442, "grad_norm": 0.0008801744552329183, "learning_rate": 1.1670451319897816e-05, "loss": 2.0489655435085295e-05, "step": 311310 }, { "epoch": 88.36786829406756, "grad_norm": 0.047530923038721085, "learning_rate": 1.1667612829974455e-05, "loss": 1.3295933604240417e-05, "step": 311320 }, { "epoch": 88.37070678399091, "grad_norm": 0.022769922390580177, "learning_rate": 1.1664774340051093e-05, "loss": 1.1317245662212371e-05, "step": 311330 }, { "epoch": 88.37354527391427, "grad_norm": 0.004264147486537695, "learning_rate": 1.1661935850127733e-05, "loss": 5.427747964859009e-06, "step": 311340 }, { "epoch": 88.37638376383764, "grad_norm": 0.006986344698816538, "learning_rate": 1.1659097360204371e-05, "loss": 7.815100252628326e-06, "step": 311350 }, { "epoch": 88.379222253761, "grad_norm": 0.004799274727702141, "learning_rate": 1.1656258870281012e-05, "loss": 5.468912422657013e-06, "step": 311360 }, { "epoch": 88.38206074368436, "grad_norm": 0.0005428754375316203, "learning_rate": 1.165342038035765e-05, "loss": 6.686896085739136e-06, "step": 311370 }, { "epoch": 88.38489923360773, "grad_norm": 0.00639538886025548, "learning_rate": 1.165058189043429e-05, "loss": 1.1379644274711608e-05, "step": 311380 }, { "epoch": 88.38773772353107, "grad_norm": 0.014282144606113434, "learning_rate": 1.1647743400510928e-05, "loss": 5.996599793434143e-06, "step": 311390 }, { "epoch": 88.39057621345444, "grad_norm": 0.0008824744145385921, "learning_rate": 1.1644904910587568e-05, "loss": 8.124299347400665e-06, "step": 311400 }, { "epoch": 88.3934147033778, "grad_norm": 0.0018933978863060474, "learning_rate": 1.1642066420664207e-05, "loss": 7.265619933605194e-06, "step": 311410 }, { "epoch": 88.39625319330116, "grad_norm": 0.0005549456691369414, "learning_rate": 1.1639227930740847e-05, "loss": 9.997934103012086e-06, "step": 311420 }, { "epoch": 88.39909168322453, "grad_norm": 0.0007150114397518337, "learning_rate": 1.1636389440817485e-05, "loss": 1.08230859041214e-05, "step": 311430 }, { "epoch": 88.40193017314789, "grad_norm": 0.0004778352740686387, "learning_rate": 1.1633550950894125e-05, "loss": 4.577264189720154e-06, "step": 311440 }, { "epoch": 88.40476866307125, "grad_norm": 0.0014035578351467848, "learning_rate": 1.1630712460970764e-05, "loss": 1.0850466787815094e-05, "step": 311450 }, { "epoch": 88.4076071529946, "grad_norm": 0.0005975113017484546, "learning_rate": 1.1627873971047404e-05, "loss": 4.882365465164184e-06, "step": 311460 }, { "epoch": 88.41044564291796, "grad_norm": 0.001953571103513241, "learning_rate": 1.1625035481124042e-05, "loss": 4.807859659194946e-06, "step": 311470 }, { "epoch": 88.41328413284133, "grad_norm": 0.0018073517130687833, "learning_rate": 1.1622196991200682e-05, "loss": 6.806477904319763e-06, "step": 311480 }, { "epoch": 88.41612262276469, "grad_norm": 0.0007423461647704244, "learning_rate": 1.1619358501277321e-05, "loss": 7.71600753068924e-06, "step": 311490 }, { "epoch": 88.41896111268805, "grad_norm": 0.00031597327324561775, "learning_rate": 1.161652001135396e-05, "loss": 3.1594187021255494e-06, "step": 311500 }, { "epoch": 88.41896111268805, "eval_accuracy": 0.9889362243275895, "eval_loss": 0.05224116891622543, "eval_runtime": 35.7762, "eval_samples_per_second": 439.594, "eval_steps_per_second": 6.876, "step": 311500 }, { "epoch": 88.42179960261142, "grad_norm": 0.007292232941836119, "learning_rate": 1.16136815214306e-05, "loss": 8.685886859893798e-06, "step": 311510 }, { "epoch": 88.42463809253478, "grad_norm": 0.00017204732284881175, "learning_rate": 1.1610843031507239e-05, "loss": 1.3054721057415008e-05, "step": 311520 }, { "epoch": 88.42747658245813, "grad_norm": 0.0007652104250155389, "learning_rate": 1.1608004541583878e-05, "loss": 6.431341171264649e-06, "step": 311530 }, { "epoch": 88.43031507238149, "grad_norm": 0.001925979508087039, "learning_rate": 1.1605166051660516e-05, "loss": 2.3706443607807158e-05, "step": 311540 }, { "epoch": 88.43315356230485, "grad_norm": 0.009593462571501732, "learning_rate": 1.1602327561737156e-05, "loss": 8.535012602806091e-06, "step": 311550 }, { "epoch": 88.43599205222822, "grad_norm": 0.0033839677926152945, "learning_rate": 1.1599489071813796e-05, "loss": 1.5335530042648317e-05, "step": 311560 }, { "epoch": 88.43883054215158, "grad_norm": 0.0011122809955850244, "learning_rate": 1.1596650581890435e-05, "loss": 4.9281865358352665e-06, "step": 311570 }, { "epoch": 88.44166903207494, "grad_norm": 0.00506626395508647, "learning_rate": 1.1593812091967073e-05, "loss": 8.900836110115052e-06, "step": 311580 }, { "epoch": 88.4445075219983, "grad_norm": 0.003249753499403596, "learning_rate": 1.1590973602043713e-05, "loss": 2.377089112997055e-05, "step": 311590 }, { "epoch": 88.44734601192165, "grad_norm": 0.01620853692293167, "learning_rate": 1.1588135112120351e-05, "loss": 7.932260632514954e-06, "step": 311600 }, { "epoch": 88.45018450184502, "grad_norm": 0.0011676851427182555, "learning_rate": 1.1585296622196992e-05, "loss": 4.963390529155731e-06, "step": 311610 }, { "epoch": 88.45302299176838, "grad_norm": 0.0010049054399132729, "learning_rate": 1.158245813227363e-05, "loss": 2.1394528448581696e-05, "step": 311620 }, { "epoch": 88.45586148169174, "grad_norm": 0.00034039211459457874, "learning_rate": 1.157961964235027e-05, "loss": 6.058253347873688e-06, "step": 311630 }, { "epoch": 88.4586999716151, "grad_norm": 0.004489822778850794, "learning_rate": 1.157678115242691e-05, "loss": 2.455972135066986e-05, "step": 311640 }, { "epoch": 88.46153846153847, "grad_norm": 0.0007300571305677295, "learning_rate": 1.157394266250355e-05, "loss": 6.580539047718048e-06, "step": 311650 }, { "epoch": 88.46437695146182, "grad_norm": 0.0008065724396146834, "learning_rate": 1.1571104172580189e-05, "loss": 6.4838677644729614e-06, "step": 311660 }, { "epoch": 88.46721544138518, "grad_norm": 0.01599934697151184, "learning_rate": 1.1568265682656827e-05, "loss": 6.134063005447387e-06, "step": 311670 }, { "epoch": 88.47005393130854, "grad_norm": 0.0014066450530663133, "learning_rate": 1.1565427192733467e-05, "loss": 8.36886465549469e-06, "step": 311680 }, { "epoch": 88.4728924212319, "grad_norm": 0.0009950747480615973, "learning_rate": 1.1562588702810105e-05, "loss": 8.006580173969269e-06, "step": 311690 }, { "epoch": 88.47573091115527, "grad_norm": 0.0004388986271806061, "learning_rate": 1.1559750212886746e-05, "loss": 9.17389988899231e-06, "step": 311700 }, { "epoch": 88.47856940107863, "grad_norm": 0.0021558774169534445, "learning_rate": 1.1556911722963384e-05, "loss": 8.676573634147644e-06, "step": 311710 }, { "epoch": 88.481407891002, "grad_norm": 0.0018857074901461601, "learning_rate": 1.1554073233040024e-05, "loss": 9.318068623542786e-06, "step": 311720 }, { "epoch": 88.48424638092534, "grad_norm": 0.001460029510781169, "learning_rate": 1.1551234743116662e-05, "loss": 9.964406490325928e-06, "step": 311730 }, { "epoch": 88.4870848708487, "grad_norm": 7.593802001792938e-05, "learning_rate": 1.1548396253193302e-05, "loss": 3.7390738725662233e-06, "step": 311740 }, { "epoch": 88.48992336077207, "grad_norm": 0.0023479864466935396, "learning_rate": 1.1545557763269941e-05, "loss": 1.1608190834522248e-05, "step": 311750 }, { "epoch": 88.49276185069543, "grad_norm": 0.006876468658447266, "learning_rate": 1.1542719273346581e-05, "loss": 1.0733120143413544e-05, "step": 311760 }, { "epoch": 88.4956003406188, "grad_norm": 0.0002819756919052452, "learning_rate": 1.1539880783423219e-05, "loss": 6.132759153842926e-06, "step": 311770 }, { "epoch": 88.49843883054216, "grad_norm": 0.0005411819438450038, "learning_rate": 1.1537042293499859e-05, "loss": 5.6799501180648805e-06, "step": 311780 }, { "epoch": 88.50127732046552, "grad_norm": 0.002286961767822504, "learning_rate": 1.1534203803576498e-05, "loss": 6.598420441150665e-06, "step": 311790 }, { "epoch": 88.50411581038887, "grad_norm": 0.0007626906153745949, "learning_rate": 1.1531365313653138e-05, "loss": 5.743838846683502e-06, "step": 311800 }, { "epoch": 88.50695430031223, "grad_norm": 0.0008946068701334298, "learning_rate": 1.1528526823729776e-05, "loss": 5.071982741355896e-06, "step": 311810 }, { "epoch": 88.5097927902356, "grad_norm": 0.0011231532553210855, "learning_rate": 1.1525688333806416e-05, "loss": 7.644109427928924e-06, "step": 311820 }, { "epoch": 88.51263128015896, "grad_norm": 0.004432579502463341, "learning_rate": 1.1522849843883055e-05, "loss": 9.250827133655548e-06, "step": 311830 }, { "epoch": 88.51546977008232, "grad_norm": 0.0026412918232381344, "learning_rate": 1.1520011353959693e-05, "loss": 6.065331399440766e-06, "step": 311840 }, { "epoch": 88.51830826000568, "grad_norm": 0.0001678154367255047, "learning_rate": 1.1517172864036335e-05, "loss": 3.092922270298004e-06, "step": 311850 }, { "epoch": 88.52114674992903, "grad_norm": 0.0010337381390854716, "learning_rate": 1.1514334374112973e-05, "loss": 2.1832995116710663e-05, "step": 311860 }, { "epoch": 88.5239852398524, "grad_norm": 0.0036066530738025904, "learning_rate": 1.1511495884189612e-05, "loss": 9.632669389247894e-06, "step": 311870 }, { "epoch": 88.52682372977576, "grad_norm": 0.0015284972032532096, "learning_rate": 1.150865739426625e-05, "loss": 6.877258419990539e-06, "step": 311880 }, { "epoch": 88.52966221969912, "grad_norm": 0.0011755188461393118, "learning_rate": 1.150581890434289e-05, "loss": 4.309043288230896e-06, "step": 311890 }, { "epoch": 88.53250070962248, "grad_norm": 0.003203969681635499, "learning_rate": 1.150298041441953e-05, "loss": 4.652906209230423e-05, "step": 311900 }, { "epoch": 88.53533919954585, "grad_norm": 0.0004825422365684062, "learning_rate": 1.150014192449617e-05, "loss": 6.4318999648094176e-06, "step": 311910 }, { "epoch": 88.53817768946921, "grad_norm": 0.0028934550937265158, "learning_rate": 1.1497303434572807e-05, "loss": 8.616596460342407e-06, "step": 311920 }, { "epoch": 88.54101617939256, "grad_norm": 0.00558083551004529, "learning_rate": 1.1494464944649447e-05, "loss": 6.783194839954376e-06, "step": 311930 }, { "epoch": 88.54385466931592, "grad_norm": 0.040793526917696, "learning_rate": 1.1491626454726085e-05, "loss": 1.9180215895175932e-05, "step": 311940 }, { "epoch": 88.54669315923928, "grad_norm": 0.0006082222680561244, "learning_rate": 1.1488787964802726e-05, "loss": 7.907487452030182e-06, "step": 311950 }, { "epoch": 88.54953164916265, "grad_norm": 0.005914020352065563, "learning_rate": 1.1485949474879364e-05, "loss": 8.5342675447464e-06, "step": 311960 }, { "epoch": 88.55237013908601, "grad_norm": 0.003978665918111801, "learning_rate": 1.1483110984956004e-05, "loss": 7.494352757930756e-06, "step": 311970 }, { "epoch": 88.55520862900937, "grad_norm": 0.00800298247486353, "learning_rate": 1.1480272495032644e-05, "loss": 1.1436827480793e-05, "step": 311980 }, { "epoch": 88.55804711893273, "grad_norm": 0.00027464982122182846, "learning_rate": 1.1477434005109282e-05, "loss": 8.566491305828095e-06, "step": 311990 }, { "epoch": 88.56088560885608, "grad_norm": 0.002521168440580368, "learning_rate": 1.1474595515185921e-05, "loss": 7.333606481552124e-06, "step": 312000 }, { "epoch": 88.56088560885608, "eval_accuracy": 0.9891269790805621, "eval_loss": 0.0515391044318676, "eval_runtime": 37.9631, "eval_samples_per_second": 414.271, "eval_steps_per_second": 6.48, "step": 312000 }, { "epoch": 88.56372409877945, "grad_norm": 0.010774864815175533, "learning_rate": 1.1471757025262561e-05, "loss": 6.508082151412964e-06, "step": 312010 }, { "epoch": 88.56656258870281, "grad_norm": 0.003812062554061413, "learning_rate": 1.14689185353392e-05, "loss": 4.153698682785034e-06, "step": 312020 }, { "epoch": 88.56940107862617, "grad_norm": 0.004162893630564213, "learning_rate": 1.1466080045415839e-05, "loss": 5.164928734302521e-06, "step": 312030 }, { "epoch": 88.57223956854953, "grad_norm": 0.0022408533841371536, "learning_rate": 1.1463241555492478e-05, "loss": 1.1722929775714874e-05, "step": 312040 }, { "epoch": 88.5750780584729, "grad_norm": 0.002110961126163602, "learning_rate": 1.1460403065569118e-05, "loss": 1.7845258116722108e-05, "step": 312050 }, { "epoch": 88.57791654839626, "grad_norm": 0.0013264328008517623, "learning_rate": 1.1457564575645758e-05, "loss": 1.3753399252891541e-05, "step": 312060 }, { "epoch": 88.58075503831961, "grad_norm": 0.0007977240020409226, "learning_rate": 1.1454726085722396e-05, "loss": 7.971376180648804e-06, "step": 312070 }, { "epoch": 88.58359352824297, "grad_norm": 0.0006055505364201963, "learning_rate": 1.1451887595799035e-05, "loss": 1.4478527009487153e-05, "step": 312080 }, { "epoch": 88.58643201816633, "grad_norm": 0.001542158075608313, "learning_rate": 1.1449049105875673e-05, "loss": 6.0299411416053775e-06, "step": 312090 }, { "epoch": 88.5892705080897, "grad_norm": 0.0026807228568941355, "learning_rate": 1.1446210615952315e-05, "loss": 6.720609962940216e-06, "step": 312100 }, { "epoch": 88.59210899801306, "grad_norm": 0.0007622645935043693, "learning_rate": 1.1443372126028953e-05, "loss": 8.218362927436828e-06, "step": 312110 }, { "epoch": 88.59494748793642, "grad_norm": 0.010979689657688141, "learning_rate": 1.1440533636105592e-05, "loss": 1.346878707408905e-05, "step": 312120 }, { "epoch": 88.59778597785977, "grad_norm": 0.0007804855122230947, "learning_rate": 1.143769514618223e-05, "loss": 6.599351763725281e-06, "step": 312130 }, { "epoch": 88.60062446778313, "grad_norm": 0.0006996962474659085, "learning_rate": 1.143485665625887e-05, "loss": 1.9111111760139467e-05, "step": 312140 }, { "epoch": 88.6034629577065, "grad_norm": 0.004430435597896576, "learning_rate": 1.143201816633551e-05, "loss": 7.055886089801788e-06, "step": 312150 }, { "epoch": 88.60630144762986, "grad_norm": 0.003636434441432357, "learning_rate": 1.142917967641215e-05, "loss": 6.72508031129837e-06, "step": 312160 }, { "epoch": 88.60913993755322, "grad_norm": 0.0010719550773501396, "learning_rate": 1.1426341186488789e-05, "loss": 4.147179424762726e-06, "step": 312170 }, { "epoch": 88.61197842747659, "grad_norm": 0.0007759982254356146, "learning_rate": 1.1423502696565427e-05, "loss": 5.844235420227051e-06, "step": 312180 }, { "epoch": 88.61481691739995, "grad_norm": 0.0002003602567128837, "learning_rate": 1.1420664206642067e-05, "loss": 8.324719965457917e-06, "step": 312190 }, { "epoch": 88.6176554073233, "grad_norm": 0.001476271077990532, "learning_rate": 1.1417825716718706e-05, "loss": 7.409974932670593e-06, "step": 312200 }, { "epoch": 88.62049389724666, "grad_norm": 0.0011344067752361298, "learning_rate": 1.1414987226795346e-05, "loss": 7.675029337406158e-06, "step": 312210 }, { "epoch": 88.62333238717002, "grad_norm": 0.010186497122049332, "learning_rate": 1.1412148736871984e-05, "loss": 1.038443297147751e-05, "step": 312220 }, { "epoch": 88.62617087709339, "grad_norm": 0.0004428979300428182, "learning_rate": 1.1409310246948624e-05, "loss": 8.946843445301056e-06, "step": 312230 }, { "epoch": 88.62900936701675, "grad_norm": 0.007760889828205109, "learning_rate": 1.1406471757025262e-05, "loss": 1.0447762906551362e-05, "step": 312240 }, { "epoch": 88.63184785694011, "grad_norm": 0.0016649505123496056, "learning_rate": 1.1403633267101903e-05, "loss": 8.72928649187088e-06, "step": 312250 }, { "epoch": 88.63468634686348, "grad_norm": 0.0015347294975072145, "learning_rate": 1.1400794777178541e-05, "loss": 7.996894419193268e-06, "step": 312260 }, { "epoch": 88.63752483678682, "grad_norm": 0.0054189907386898994, "learning_rate": 1.1397956287255181e-05, "loss": 9.92715358734131e-06, "step": 312270 }, { "epoch": 88.64036332671019, "grad_norm": 0.013461926952004433, "learning_rate": 1.1395117797331819e-05, "loss": 9.579211473464966e-06, "step": 312280 }, { "epoch": 88.64320181663355, "grad_norm": 0.0004952788585796952, "learning_rate": 1.1392279307408459e-05, "loss": 8.84607434272766e-06, "step": 312290 }, { "epoch": 88.64604030655691, "grad_norm": 0.002491958439350128, "learning_rate": 1.1389440817485098e-05, "loss": 7.916055619716644e-06, "step": 312300 }, { "epoch": 88.64887879648028, "grad_norm": 0.019502071663737297, "learning_rate": 1.1386602327561738e-05, "loss": 9.331293404102325e-06, "step": 312310 }, { "epoch": 88.65171728640364, "grad_norm": 0.0034076389856636524, "learning_rate": 1.1383763837638378e-05, "loss": 8.928775787353516e-06, "step": 312320 }, { "epoch": 88.65455577632699, "grad_norm": 0.015876134857535362, "learning_rate": 1.1380925347715016e-05, "loss": 1.0599195957183837e-05, "step": 312330 }, { "epoch": 88.65739426625035, "grad_norm": 0.0011368226259946823, "learning_rate": 1.1378086857791655e-05, "loss": 5.563907325267792e-06, "step": 312340 }, { "epoch": 88.66023275617371, "grad_norm": 0.003694939659908414, "learning_rate": 1.1375248367868295e-05, "loss": 1.929719001054764e-05, "step": 312350 }, { "epoch": 88.66307124609708, "grad_norm": 0.0006171297281980515, "learning_rate": 1.1372409877944935e-05, "loss": 8.848868310451507e-06, "step": 312360 }, { "epoch": 88.66590973602044, "grad_norm": 0.0009962909389287233, "learning_rate": 1.1369571388021573e-05, "loss": 8.15708190202713e-06, "step": 312370 }, { "epoch": 88.6687482259438, "grad_norm": 0.004805095959454775, "learning_rate": 1.1366732898098212e-05, "loss": 1.4690682291984558e-05, "step": 312380 }, { "epoch": 88.67158671586716, "grad_norm": 0.0009179218905046582, "learning_rate": 1.1363894408174852e-05, "loss": 1.3461336493492126e-05, "step": 312390 }, { "epoch": 88.67442520579051, "grad_norm": 0.009835953824222088, "learning_rate": 1.1361055918251492e-05, "loss": 5.476735532283783e-06, "step": 312400 }, { "epoch": 88.67726369571388, "grad_norm": 0.01189193781465292, "learning_rate": 1.135821742832813e-05, "loss": 5.106627941131592e-06, "step": 312410 }, { "epoch": 88.68010218563724, "grad_norm": 0.0011508659226819873, "learning_rate": 1.135537893840477e-05, "loss": 4.709698259830475e-06, "step": 312420 }, { "epoch": 88.6829406755606, "grad_norm": 0.0016671493649482727, "learning_rate": 1.1352540448481407e-05, "loss": 5.82858920097351e-06, "step": 312430 }, { "epoch": 88.68577916548396, "grad_norm": 0.0004840067122131586, "learning_rate": 1.1349701958558049e-05, "loss": 7.15889036655426e-06, "step": 312440 }, { "epoch": 88.68861765540733, "grad_norm": 0.00131550757214427, "learning_rate": 1.1346863468634687e-05, "loss": 8.158758282661438e-06, "step": 312450 }, { "epoch": 88.69145614533069, "grad_norm": 0.0005384926334954798, "learning_rate": 1.1344024978711326e-05, "loss": 4.773028194904327e-06, "step": 312460 }, { "epoch": 88.69429463525404, "grad_norm": 0.0004420307232066989, "learning_rate": 1.1341186488787964e-05, "loss": 4.764460027217865e-06, "step": 312470 }, { "epoch": 88.6971331251774, "grad_norm": 0.029252734035253525, "learning_rate": 1.1338347998864604e-05, "loss": 1.2730620801448822e-05, "step": 312480 }, { "epoch": 88.69997161510076, "grad_norm": 0.0035589272156357765, "learning_rate": 1.1335509508941244e-05, "loss": 6.614997982978821e-06, "step": 312490 }, { "epoch": 88.70281010502413, "grad_norm": 0.0027878996916115284, "learning_rate": 1.1332671019017883e-05, "loss": 7.125921547412872e-06, "step": 312500 }, { "epoch": 88.70281010502413, "eval_accuracy": 0.9893813187511922, "eval_loss": 0.05107565596699715, "eval_runtime": 35.8604, "eval_samples_per_second": 438.561, "eval_steps_per_second": 6.86, "step": 312500 }, { "epoch": 88.70564859494749, "grad_norm": 0.0013293595984578133, "learning_rate": 1.1329832529094523e-05, "loss": 6.860122084617615e-06, "step": 312510 }, { "epoch": 88.70848708487085, "grad_norm": 0.0010299477726221085, "learning_rate": 1.1326994039171161e-05, "loss": 7.931888103485107e-06, "step": 312520 }, { "epoch": 88.71132557479422, "grad_norm": 0.0014572180807590485, "learning_rate": 1.13241555492478e-05, "loss": 4.9818307161331175e-06, "step": 312530 }, { "epoch": 88.71416406471756, "grad_norm": 0.0005215576966293156, "learning_rate": 1.132131705932444e-05, "loss": 6.145797669887543e-06, "step": 312540 }, { "epoch": 88.71700255464093, "grad_norm": 0.0007291425135917962, "learning_rate": 1.131847856940108e-05, "loss": 8.963793516159057e-06, "step": 312550 }, { "epoch": 88.71984104456429, "grad_norm": 0.00043008936336264014, "learning_rate": 1.1315640079477718e-05, "loss": 3.900378942489624e-06, "step": 312560 }, { "epoch": 88.72267953448765, "grad_norm": 0.0023711479734629393, "learning_rate": 1.1312801589554358e-05, "loss": 4.937872290611267e-06, "step": 312570 }, { "epoch": 88.72551802441102, "grad_norm": 0.0024311868473887444, "learning_rate": 1.1309963099630996e-05, "loss": 1.0643340647220612e-05, "step": 312580 }, { "epoch": 88.72835651433438, "grad_norm": 0.00026934241759590805, "learning_rate": 1.1307124609707637e-05, "loss": 6.377138197422027e-06, "step": 312590 }, { "epoch": 88.73119500425773, "grad_norm": 0.004756256006658077, "learning_rate": 1.1304286119784275e-05, "loss": 5.8924779295921326e-06, "step": 312600 }, { "epoch": 88.73403349418109, "grad_norm": 0.0007163287955336273, "learning_rate": 1.1301447629860915e-05, "loss": 5.816482007503509e-06, "step": 312610 }, { "epoch": 88.73687198410445, "grad_norm": 0.0035975624341517687, "learning_rate": 1.1298609139937553e-05, "loss": 1.101810485124588e-05, "step": 312620 }, { "epoch": 88.73971047402782, "grad_norm": 0.004264136776328087, "learning_rate": 1.1295770650014192e-05, "loss": 7.926858961582183e-06, "step": 312630 }, { "epoch": 88.74254896395118, "grad_norm": 0.0012975850841030478, "learning_rate": 1.1292932160090832e-05, "loss": 6.88452273607254e-06, "step": 312640 }, { "epoch": 88.74538745387454, "grad_norm": 0.0009080314193852246, "learning_rate": 1.1290093670167472e-05, "loss": 5.234405398368835e-06, "step": 312650 }, { "epoch": 88.7482259437979, "grad_norm": 0.0006932418327778578, "learning_rate": 1.1287255180244111e-05, "loss": 7.451325654983521e-06, "step": 312660 }, { "epoch": 88.75106443372125, "grad_norm": 0.000678224372677505, "learning_rate": 1.128441669032075e-05, "loss": 5.917996168136596e-06, "step": 312670 }, { "epoch": 88.75390292364462, "grad_norm": 0.0017158153932541609, "learning_rate": 1.1281578200397389e-05, "loss": 1.8555298447608946e-05, "step": 312680 }, { "epoch": 88.75674141356798, "grad_norm": 0.0013819151790812612, "learning_rate": 1.1278739710474029e-05, "loss": 4.403293132781982e-06, "step": 312690 }, { "epoch": 88.75957990349134, "grad_norm": 0.0012094766134396195, "learning_rate": 1.1275901220550668e-05, "loss": 8.90549272298813e-06, "step": 312700 }, { "epoch": 88.7624183934147, "grad_norm": 0.0018720160005614161, "learning_rate": 1.1273062730627306e-05, "loss": 7.64857977628708e-06, "step": 312710 }, { "epoch": 88.76525688333807, "grad_norm": 0.001263815094716847, "learning_rate": 1.1270224240703946e-05, "loss": 7.162056863307953e-06, "step": 312720 }, { "epoch": 88.76809537326143, "grad_norm": 0.002735971473157406, "learning_rate": 1.1267385750780584e-05, "loss": 7.96206295490265e-06, "step": 312730 }, { "epoch": 88.77093386318478, "grad_norm": 0.0006430061184801161, "learning_rate": 1.1264547260857226e-05, "loss": 8.353590965270996e-06, "step": 312740 }, { "epoch": 88.77377235310814, "grad_norm": 0.0002223434712504968, "learning_rate": 1.1261708770933864e-05, "loss": 8.114613592624664e-06, "step": 312750 }, { "epoch": 88.7766108430315, "grad_norm": 0.0006257740897126496, "learning_rate": 1.1258870281010503e-05, "loss": 6.983242928981781e-06, "step": 312760 }, { "epoch": 88.77944933295487, "grad_norm": 0.002756800502538681, "learning_rate": 1.1256031791087141e-05, "loss": 9.081140160560607e-06, "step": 312770 }, { "epoch": 88.78228782287823, "grad_norm": 0.0006730863242410123, "learning_rate": 1.1253193301163781e-05, "loss": 9.20817255973816e-06, "step": 312780 }, { "epoch": 88.7851263128016, "grad_norm": 0.0008896589279174805, "learning_rate": 1.125035481124042e-05, "loss": 5.14984130859375e-06, "step": 312790 }, { "epoch": 88.78796480272496, "grad_norm": 0.0005093553336337209, "learning_rate": 1.124751632131706e-05, "loss": 7.017515599727631e-06, "step": 312800 }, { "epoch": 88.7908032926483, "grad_norm": 0.00033492359216324985, "learning_rate": 1.1244677831393698e-05, "loss": 7.033348083496094e-06, "step": 312810 }, { "epoch": 88.79364178257167, "grad_norm": 0.001728316769003868, "learning_rate": 1.1241839341470338e-05, "loss": 7.020682096481323e-06, "step": 312820 }, { "epoch": 88.79648027249503, "grad_norm": 0.02330593392252922, "learning_rate": 1.1239000851546978e-05, "loss": 1.4423392713069916e-05, "step": 312830 }, { "epoch": 88.7993187624184, "grad_norm": 0.003210341092199087, "learning_rate": 1.1236162361623617e-05, "loss": 7.5338408350944516e-06, "step": 312840 }, { "epoch": 88.80215725234176, "grad_norm": 0.00025159199140034616, "learning_rate": 1.1233323871700257e-05, "loss": 4.3833628296852115e-06, "step": 312850 }, { "epoch": 88.80499574226512, "grad_norm": 0.006160734687000513, "learning_rate": 1.1230485381776895e-05, "loss": 7.578916847705841e-06, "step": 312860 }, { "epoch": 88.80783423218847, "grad_norm": 0.0013426847290247679, "learning_rate": 1.1227646891853535e-05, "loss": 9.812414646148682e-06, "step": 312870 }, { "epoch": 88.81067272211183, "grad_norm": 0.0016754076350480318, "learning_rate": 1.1224808401930173e-05, "loss": 9.65520739555359e-06, "step": 312880 }, { "epoch": 88.8135112120352, "grad_norm": 0.021006787195801735, "learning_rate": 1.1221969912006814e-05, "loss": 1.030806452035904e-05, "step": 312890 }, { "epoch": 88.81634970195856, "grad_norm": 0.0014334191801026464, "learning_rate": 1.1219131422083452e-05, "loss": 3.7888064980506895e-06, "step": 312900 }, { "epoch": 88.81918819188192, "grad_norm": 0.0007647262536920607, "learning_rate": 1.1216292932160092e-05, "loss": 4.449300467967987e-06, "step": 312910 }, { "epoch": 88.82202668180528, "grad_norm": 0.003317193128168583, "learning_rate": 1.121345444223673e-05, "loss": 6.08246773481369e-06, "step": 312920 }, { "epoch": 88.82486517172865, "grad_norm": 0.0018564923666417599, "learning_rate": 1.121061595231337e-05, "loss": 6.320886313915253e-06, "step": 312930 }, { "epoch": 88.827703661652, "grad_norm": 0.0033806206192821264, "learning_rate": 1.1207777462390009e-05, "loss": 4.827231168746948e-06, "step": 312940 }, { "epoch": 88.83054215157536, "grad_norm": 0.0006473198300227523, "learning_rate": 1.1204938972466649e-05, "loss": 5.713850259780884e-06, "step": 312950 }, { "epoch": 88.83338064149872, "grad_norm": 0.0010942973894998431, "learning_rate": 1.1202100482543287e-05, "loss": 5.057454109191894e-06, "step": 312960 }, { "epoch": 88.83621913142208, "grad_norm": 0.0005856050993315876, "learning_rate": 1.1199261992619926e-05, "loss": 4.306435585021973e-06, "step": 312970 }, { "epoch": 88.83905762134545, "grad_norm": 0.0007114221225492656, "learning_rate": 1.1196423502696566e-05, "loss": 6.136111915111542e-06, "step": 312980 }, { "epoch": 88.84189611126881, "grad_norm": 0.0008716967422515154, "learning_rate": 1.1193585012773206e-05, "loss": 4.1618943214416506e-06, "step": 312990 }, { "epoch": 88.84473460119217, "grad_norm": 0.0007329106447286904, "learning_rate": 1.1190746522849845e-05, "loss": 6.7196786403656e-06, "step": 313000 }, { "epoch": 88.84473460119217, "eval_accuracy": 0.9892541489158772, "eval_loss": 0.04938751086592674, "eval_runtime": 37.0417, "eval_samples_per_second": 424.576, "eval_steps_per_second": 6.641, "step": 313000 }, { "epoch": 88.84757309111552, "grad_norm": 0.001228439505212009, "learning_rate": 1.1187908032926483e-05, "loss": 5.108863115310669e-06, "step": 313010 }, { "epoch": 88.85041158103888, "grad_norm": 0.000636377721093595, "learning_rate": 1.1185069543003123e-05, "loss": 4.353933036327362e-06, "step": 313020 }, { "epoch": 88.85325007096225, "grad_norm": 0.0004549663281068206, "learning_rate": 1.1182231053079763e-05, "loss": 1.143086701631546e-05, "step": 313030 }, { "epoch": 88.85608856088561, "grad_norm": 0.0005592607194557786, "learning_rate": 1.1179392563156402e-05, "loss": 5.978159606456757e-06, "step": 313040 }, { "epoch": 88.85892705080897, "grad_norm": 0.0010928427800536156, "learning_rate": 1.117655407323304e-05, "loss": 6.075203418731689e-06, "step": 313050 }, { "epoch": 88.86176554073234, "grad_norm": 0.0007094187894836068, "learning_rate": 1.117371558330968e-05, "loss": 7.581152021884918e-06, "step": 313060 }, { "epoch": 88.86460403065568, "grad_norm": 0.001175867160782218, "learning_rate": 1.1170877093386318e-05, "loss": 5.171447992324829e-06, "step": 313070 }, { "epoch": 88.86744252057905, "grad_norm": 0.0015657254261896014, "learning_rate": 1.116803860346296e-05, "loss": 7.366575300693512e-06, "step": 313080 }, { "epoch": 88.87028101050241, "grad_norm": 0.0028273360803723335, "learning_rate": 1.1165200113539597e-05, "loss": 7.651560008525848e-06, "step": 313090 }, { "epoch": 88.87311950042577, "grad_norm": 0.001114992774091661, "learning_rate": 1.1162361623616237e-05, "loss": 4.977360367774963e-06, "step": 313100 }, { "epoch": 88.87595799034914, "grad_norm": 0.0011096418602392077, "learning_rate": 1.1159523133692875e-05, "loss": 6.284750998020172e-06, "step": 313110 }, { "epoch": 88.8787964802725, "grad_norm": 0.006987065076828003, "learning_rate": 1.1156684643769515e-05, "loss": 9.04761254787445e-06, "step": 313120 }, { "epoch": 88.88163497019586, "grad_norm": 0.0023779026232659817, "learning_rate": 1.1153846153846154e-05, "loss": 4.261545836925507e-06, "step": 313130 }, { "epoch": 88.88447346011921, "grad_norm": 0.0002722233475651592, "learning_rate": 1.1151007663922794e-05, "loss": 5.340389907360077e-06, "step": 313140 }, { "epoch": 88.88731195004257, "grad_norm": 0.016734736040234566, "learning_rate": 1.1148169173999432e-05, "loss": 6.168149411678314e-06, "step": 313150 }, { "epoch": 88.89015043996594, "grad_norm": 0.0020425752736628056, "learning_rate": 1.1145330684076072e-05, "loss": 5.204044282436371e-06, "step": 313160 }, { "epoch": 88.8929889298893, "grad_norm": 0.001796620897948742, "learning_rate": 1.1142492194152711e-05, "loss": 6.236322224140167e-06, "step": 313170 }, { "epoch": 88.89582741981266, "grad_norm": 0.0007913808221928775, "learning_rate": 1.1139653704229351e-05, "loss": 4.00543212890625e-06, "step": 313180 }, { "epoch": 88.89866590973602, "grad_norm": 0.0007817150908522308, "learning_rate": 1.113681521430599e-05, "loss": 6.488710641860962e-06, "step": 313190 }, { "epoch": 88.90150439965939, "grad_norm": 0.002594227436929941, "learning_rate": 1.1133976724382629e-05, "loss": 1.3495981693267823e-05, "step": 313200 }, { "epoch": 88.90434288958274, "grad_norm": 0.0008955959347076714, "learning_rate": 1.1131138234459268e-05, "loss": 5.0229951739311215e-06, "step": 313210 }, { "epoch": 88.9071813795061, "grad_norm": 0.0011463873088359833, "learning_rate": 1.1128299744535906e-05, "loss": 7.949210703372955e-06, "step": 313220 }, { "epoch": 88.91001986942946, "grad_norm": 0.00024380294780712575, "learning_rate": 1.1125461254612548e-05, "loss": 6.9139525294303896e-06, "step": 313230 }, { "epoch": 88.91285835935282, "grad_norm": 0.003078558249399066, "learning_rate": 1.1122622764689186e-05, "loss": 2.0022504031658173e-05, "step": 313240 }, { "epoch": 88.91569684927619, "grad_norm": 0.0006505500059574842, "learning_rate": 1.1119784274765826e-05, "loss": 9.792111814022065e-06, "step": 313250 }, { "epoch": 88.91853533919955, "grad_norm": 0.0010397204896435142, "learning_rate": 1.1116945784842464e-05, "loss": 6.593950092792511e-06, "step": 313260 }, { "epoch": 88.92137382912291, "grad_norm": 0.0005466447910293937, "learning_rate": 1.1114107294919103e-05, "loss": 7.1587041020393375e-06, "step": 313270 }, { "epoch": 88.92421231904626, "grad_norm": 0.00697236368432641, "learning_rate": 1.1111268804995743e-05, "loss": 7.1695074439048765e-06, "step": 313280 }, { "epoch": 88.92705080896962, "grad_norm": 0.0011001460952684283, "learning_rate": 1.1108430315072383e-05, "loss": 5.115196108818054e-06, "step": 313290 }, { "epoch": 88.92988929889299, "grad_norm": 0.002807177137583494, "learning_rate": 1.110559182514902e-05, "loss": 6.079114973545074e-06, "step": 313300 }, { "epoch": 88.93272778881635, "grad_norm": 0.0007610629545524716, "learning_rate": 1.110275333522566e-05, "loss": 6.118416786193848e-06, "step": 313310 }, { "epoch": 88.93556627873971, "grad_norm": 0.022954415529966354, "learning_rate": 1.10999148453023e-05, "loss": 8.964166045188904e-06, "step": 313320 }, { "epoch": 88.93840476866308, "grad_norm": 0.0008059840183705091, "learning_rate": 1.109707635537894e-05, "loss": 9.170360863208771e-06, "step": 313330 }, { "epoch": 88.94124325858643, "grad_norm": 0.0016573257744312286, "learning_rate": 1.109423786545558e-05, "loss": 6.2372535467147825e-06, "step": 313340 }, { "epoch": 88.94408174850979, "grad_norm": 0.0026229533832520247, "learning_rate": 1.1091399375532217e-05, "loss": 5.20758330821991e-06, "step": 313350 }, { "epoch": 88.94692023843315, "grad_norm": 0.0005775410681962967, "learning_rate": 1.1088560885608857e-05, "loss": 5.727075040340423e-06, "step": 313360 }, { "epoch": 88.94975872835651, "grad_norm": 0.0011670095846056938, "learning_rate": 1.1085722395685495e-05, "loss": 4.061870276927948e-06, "step": 313370 }, { "epoch": 88.95259721827988, "grad_norm": 0.0026446522679179907, "learning_rate": 1.1082883905762136e-05, "loss": 4.692748188972473e-06, "step": 313380 }, { "epoch": 88.95543570820324, "grad_norm": 0.0032530860044062138, "learning_rate": 1.1080045415838774e-05, "loss": 6.4462423324584964e-06, "step": 313390 }, { "epoch": 88.9582741981266, "grad_norm": 0.0024538657162338495, "learning_rate": 1.1077206925915414e-05, "loss": 8.866563439369201e-06, "step": 313400 }, { "epoch": 88.96111268804995, "grad_norm": 0.004966530948877335, "learning_rate": 1.1074368435992052e-05, "loss": 9.360909461975098e-06, "step": 313410 }, { "epoch": 88.96395117797331, "grad_norm": 0.0047714849933981895, "learning_rate": 1.1071529946068692e-05, "loss": 6.0660764575004574e-06, "step": 313420 }, { "epoch": 88.96678966789668, "grad_norm": 0.001187683898024261, "learning_rate": 1.1068691456145331e-05, "loss": 7.19018280506134e-06, "step": 313430 }, { "epoch": 88.96962815782004, "grad_norm": 0.00029865908436477184, "learning_rate": 1.1065852966221971e-05, "loss": 1.7039105296134947e-05, "step": 313440 }, { "epoch": 88.9724666477434, "grad_norm": 0.0011010669404640794, "learning_rate": 1.1063014476298609e-05, "loss": 1.3323873281478881e-05, "step": 313450 }, { "epoch": 88.97530513766677, "grad_norm": 0.0006116903387010098, "learning_rate": 1.1060175986375249e-05, "loss": 5.678832530975342e-06, "step": 313460 }, { "epoch": 88.97814362759013, "grad_norm": 0.0014225798659026623, "learning_rate": 1.1057337496451888e-05, "loss": 3.7642195820808412e-06, "step": 313470 }, { "epoch": 88.98098211751348, "grad_norm": 0.0020148330368101597, "learning_rate": 1.1054499006528528e-05, "loss": 6.064772605895996e-06, "step": 313480 }, { "epoch": 88.98382060743684, "grad_norm": 0.0010315235704183578, "learning_rate": 1.1051660516605166e-05, "loss": 1.2487545609474182e-05, "step": 313490 }, { "epoch": 88.9866590973602, "grad_norm": 0.0022755106911063194, "learning_rate": 1.1048822026681806e-05, "loss": 7.104314863681793e-06, "step": 313500 }, { "epoch": 88.9866590973602, "eval_accuracy": 0.988745469574617, "eval_loss": 0.04977617412805557, "eval_runtime": 36.788, "eval_samples_per_second": 427.503, "eval_steps_per_second": 6.687, "step": 313500 }, { "epoch": 88.98949758728357, "grad_norm": 0.002300382824614644, "learning_rate": 1.1045983536758445e-05, "loss": 6.907805800437927e-06, "step": 313510 }, { "epoch": 88.99233607720693, "grad_norm": 0.001013488508760929, "learning_rate": 1.1043145046835083e-05, "loss": 1.1167861521244049e-05, "step": 313520 }, { "epoch": 88.99517456713029, "grad_norm": 0.0013019161997362971, "learning_rate": 1.1040306556911725e-05, "loss": 1.1942721903324127e-05, "step": 313530 }, { "epoch": 88.99801305705365, "grad_norm": 0.0007914427551440895, "learning_rate": 1.1037468066988363e-05, "loss": 4.811584949493408e-06, "step": 313540 }, { "epoch": 89.000851546977, "grad_norm": 0.00044358070590533316, "learning_rate": 1.1034629577065002e-05, "loss": 1.0869025572901592e-05, "step": 313550 }, { "epoch": 89.00369003690037, "grad_norm": 0.0036986966151744127, "learning_rate": 1.103179108714164e-05, "loss": 3.0515901744365693e-05, "step": 313560 }, { "epoch": 89.00652852682373, "grad_norm": 0.00119122548494488, "learning_rate": 1.102895259721828e-05, "loss": 8.071213960647583e-06, "step": 313570 }, { "epoch": 89.00936701674709, "grad_norm": 0.0012443620944395661, "learning_rate": 1.102611410729492e-05, "loss": 7.79721885919571e-06, "step": 313580 }, { "epoch": 89.01220550667045, "grad_norm": 0.0004505193210206926, "learning_rate": 1.102327561737156e-05, "loss": 4.079565405845642e-06, "step": 313590 }, { "epoch": 89.01504399659382, "grad_norm": 0.05620870739221573, "learning_rate": 1.1020437127448197e-05, "loss": 2.2994913160800932e-05, "step": 313600 }, { "epoch": 89.01788248651717, "grad_norm": 0.0028209583833813667, "learning_rate": 1.1017598637524837e-05, "loss": 7.703714072704315e-06, "step": 313610 }, { "epoch": 89.02072097644053, "grad_norm": 0.0027511934749782085, "learning_rate": 1.1014760147601477e-05, "loss": 6.119906902313232e-06, "step": 313620 }, { "epoch": 89.02355946636389, "grad_norm": 0.00042387013672851026, "learning_rate": 1.1011921657678116e-05, "loss": 6.411224603652954e-06, "step": 313630 }, { "epoch": 89.02639795628725, "grad_norm": 0.00046858249697834253, "learning_rate": 1.1009083167754754e-05, "loss": 4.4191256165504456e-06, "step": 313640 }, { "epoch": 89.02923644621062, "grad_norm": 0.001246871193870902, "learning_rate": 1.1006244677831394e-05, "loss": 5.524419248104096e-06, "step": 313650 }, { "epoch": 89.03207493613398, "grad_norm": 0.0005006597493775189, "learning_rate": 1.1003406187908034e-05, "loss": 4.64804470539093e-06, "step": 313660 }, { "epoch": 89.03491342605734, "grad_norm": 0.0008769261185079813, "learning_rate": 1.1000567697984673e-05, "loss": 7.168389856815338e-06, "step": 313670 }, { "epoch": 89.03775191598069, "grad_norm": 0.0006585881346836686, "learning_rate": 1.0997729208061311e-05, "loss": 4.15407121181488e-06, "step": 313680 }, { "epoch": 89.04059040590406, "grad_norm": 0.0013757299166172743, "learning_rate": 1.0994890718137951e-05, "loss": 4.313327372074127e-06, "step": 313690 }, { "epoch": 89.04342889582742, "grad_norm": 0.00041965488344430923, "learning_rate": 1.099205222821459e-05, "loss": 3.369711339473724e-06, "step": 313700 }, { "epoch": 89.04626738575078, "grad_norm": 0.0003824277373496443, "learning_rate": 1.0989213738291229e-05, "loss": 5.881674587726593e-06, "step": 313710 }, { "epoch": 89.04910587567414, "grad_norm": 0.0020634373649954796, "learning_rate": 1.098637524836787e-05, "loss": 4.626624286174774e-06, "step": 313720 }, { "epoch": 89.0519443655975, "grad_norm": 0.0005554610979743302, "learning_rate": 1.0983536758444508e-05, "loss": 3.1730160117149353e-06, "step": 313730 }, { "epoch": 89.05478285552087, "grad_norm": 0.002886723494157195, "learning_rate": 1.0980698268521148e-05, "loss": 6.372295320034027e-06, "step": 313740 }, { "epoch": 89.05762134544422, "grad_norm": 0.0007299785502254963, "learning_rate": 1.0977859778597786e-05, "loss": 6.053037941455841e-06, "step": 313750 }, { "epoch": 89.06045983536758, "grad_norm": 0.0024019277188926935, "learning_rate": 1.0975021288674426e-05, "loss": 3.635883331298828e-06, "step": 313760 }, { "epoch": 89.06329832529094, "grad_norm": 0.0010159459197893739, "learning_rate": 1.0972182798751065e-05, "loss": 6.357207894325256e-06, "step": 313770 }, { "epoch": 89.0661368152143, "grad_norm": 0.00216102646663785, "learning_rate": 1.0969344308827705e-05, "loss": 9.537488222122192e-06, "step": 313780 }, { "epoch": 89.06897530513767, "grad_norm": 0.0008723904611542821, "learning_rate": 1.0966505818904343e-05, "loss": 5.463138222694397e-06, "step": 313790 }, { "epoch": 89.07181379506103, "grad_norm": 0.001229477347806096, "learning_rate": 1.0963667328980983e-05, "loss": 4.463642835617066e-06, "step": 313800 }, { "epoch": 89.07465228498438, "grad_norm": 0.0012727908324450254, "learning_rate": 1.096082883905762e-05, "loss": 5.375035107135772e-06, "step": 313810 }, { "epoch": 89.07749077490774, "grad_norm": 0.0005215817363932729, "learning_rate": 1.0957990349134262e-05, "loss": 5.3104013204574585e-06, "step": 313820 }, { "epoch": 89.08032926483111, "grad_norm": 0.00036158645525574684, "learning_rate": 1.09551518592109e-05, "loss": 3.077276051044464e-06, "step": 313830 }, { "epoch": 89.08316775475447, "grad_norm": 0.0004673480580095202, "learning_rate": 1.095231336928754e-05, "loss": 7.918663322925568e-06, "step": 313840 }, { "epoch": 89.08600624467783, "grad_norm": 0.0010464438237249851, "learning_rate": 1.094947487936418e-05, "loss": 5.402229726314545e-06, "step": 313850 }, { "epoch": 89.0888447346012, "grad_norm": 0.0017569524934515357, "learning_rate": 1.0946636389440817e-05, "loss": 7.515214383602142e-06, "step": 313860 }, { "epoch": 89.09168322452456, "grad_norm": 0.0009180688648484647, "learning_rate": 1.0943797899517459e-05, "loss": 4.819035530090332e-06, "step": 313870 }, { "epoch": 89.09452171444791, "grad_norm": 0.0006822148570790887, "learning_rate": 1.0940959409594097e-05, "loss": 5.580112338066101e-06, "step": 313880 }, { "epoch": 89.09736020437127, "grad_norm": 0.001179953571408987, "learning_rate": 1.0938120919670736e-05, "loss": 9.144283831119538e-06, "step": 313890 }, { "epoch": 89.10019869429463, "grad_norm": 0.0014874860644340515, "learning_rate": 1.0935282429747374e-05, "loss": 9.430013597011566e-06, "step": 313900 }, { "epoch": 89.103037184218, "grad_norm": 0.0012732736067846417, "learning_rate": 1.0932443939824014e-05, "loss": 3.7692487239837645e-06, "step": 313910 }, { "epoch": 89.10587567414136, "grad_norm": 0.0003514138516038656, "learning_rate": 1.0929605449900654e-05, "loss": 3.6831945180892946e-06, "step": 313920 }, { "epoch": 89.10871416406472, "grad_norm": 0.0006539272144436836, "learning_rate": 1.0926766959977293e-05, "loss": 4.936382174491882e-06, "step": 313930 }, { "epoch": 89.11155265398808, "grad_norm": 0.00122172012925148, "learning_rate": 1.0923928470053931e-05, "loss": 4.233606159687042e-06, "step": 313940 }, { "epoch": 89.11439114391143, "grad_norm": 0.00041463293018750846, "learning_rate": 1.0921089980130571e-05, "loss": 5.1407143473625185e-06, "step": 313950 }, { "epoch": 89.1172296338348, "grad_norm": 0.009502585977315903, "learning_rate": 1.0918251490207209e-05, "loss": 8.713267743587493e-06, "step": 313960 }, { "epoch": 89.12006812375816, "grad_norm": 0.005577779840677977, "learning_rate": 1.091541300028385e-05, "loss": 6.859563291072845e-06, "step": 313970 }, { "epoch": 89.12290661368152, "grad_norm": 0.002890970790758729, "learning_rate": 1.0912574510360488e-05, "loss": 4.782341420650482e-06, "step": 313980 }, { "epoch": 89.12574510360488, "grad_norm": 0.005330396816134453, "learning_rate": 1.0909736020437128e-05, "loss": 7.708929479122162e-06, "step": 313990 }, { "epoch": 89.12858359352825, "grad_norm": 0.0011398119386285543, "learning_rate": 1.0906897530513768e-05, "loss": 6.228126585483551e-06, "step": 314000 }, { "epoch": 89.12858359352825, "eval_accuracy": 0.9890633941629046, "eval_loss": 0.04951871186494827, "eval_runtime": 36.2548, "eval_samples_per_second": 433.791, "eval_steps_per_second": 6.785, "step": 314000 }, { "epoch": 89.13142208345161, "grad_norm": 0.00018228408589493483, "learning_rate": 1.0904059040590406e-05, "loss": 4.555657505989075e-06, "step": 314010 }, { "epoch": 89.13426057337496, "grad_norm": 0.0013058447511866689, "learning_rate": 1.0901220550667045e-05, "loss": 6.5693631768226625e-06, "step": 314020 }, { "epoch": 89.13709906329832, "grad_norm": 0.00113234156742692, "learning_rate": 1.0898382060743685e-05, "loss": 6.1640515923500064e-06, "step": 314030 }, { "epoch": 89.13993755322169, "grad_norm": 0.005229811649769545, "learning_rate": 1.0895543570820325e-05, "loss": 6.3069164752960205e-06, "step": 314040 }, { "epoch": 89.14277604314505, "grad_norm": 0.0035322424955666065, "learning_rate": 1.0892705080896963e-05, "loss": 4.684925079345703e-06, "step": 314050 }, { "epoch": 89.14561453306841, "grad_norm": 0.0021233020815998316, "learning_rate": 1.0889866590973602e-05, "loss": 5.359947681427002e-06, "step": 314060 }, { "epoch": 89.14845302299177, "grad_norm": 0.0007145645213313401, "learning_rate": 1.0887028101050242e-05, "loss": 3.0158087611198424e-06, "step": 314070 }, { "epoch": 89.15129151291512, "grad_norm": 0.0008616098202764988, "learning_rate": 1.0884189611126882e-05, "loss": 6.152503192424774e-06, "step": 314080 }, { "epoch": 89.15413000283849, "grad_norm": 0.0002855699276551604, "learning_rate": 1.088135112120352e-05, "loss": 3.0191801488399506e-05, "step": 314090 }, { "epoch": 89.15696849276185, "grad_norm": 0.0010659839026629925, "learning_rate": 1.087851263128016e-05, "loss": 6.230920553207397e-06, "step": 314100 }, { "epoch": 89.15980698268521, "grad_norm": 0.0014787671389058232, "learning_rate": 1.0875674141356797e-05, "loss": 0.00034367404878139497, "step": 314110 }, { "epoch": 89.16264547260857, "grad_norm": 2.970228672027588, "learning_rate": 1.0872835651433439e-05, "loss": 0.0006194006651639939, "step": 314120 }, { "epoch": 89.16548396253194, "grad_norm": 0.013681192882359028, "learning_rate": 1.0869997161510077e-05, "loss": 0.00013897214084863664, "step": 314130 }, { "epoch": 89.1683224524553, "grad_norm": 0.0026777111925184727, "learning_rate": 1.0867158671586716e-05, "loss": 2.0899809896945953e-05, "step": 314140 }, { "epoch": 89.17116094237865, "grad_norm": 0.0020628971979022026, "learning_rate": 1.0864320181663354e-05, "loss": 0.006363475322723388, "step": 314150 }, { "epoch": 89.17399943230201, "grad_norm": 0.3935810923576355, "learning_rate": 1.0861481691739994e-05, "loss": 6.815474480390549e-05, "step": 314160 }, { "epoch": 89.17683792222537, "grad_norm": 0.002971724374219775, "learning_rate": 1.0858643201816634e-05, "loss": 2.5613605976104738e-05, "step": 314170 }, { "epoch": 89.17967641214874, "grad_norm": 0.0008831479353830218, "learning_rate": 1.0855804711893273e-05, "loss": 9.219907224178314e-06, "step": 314180 }, { "epoch": 89.1825149020721, "grad_norm": 0.022339116781949997, "learning_rate": 1.0852966221969913e-05, "loss": 6.6086649894714355e-06, "step": 314190 }, { "epoch": 89.18535339199546, "grad_norm": 0.009360200725495815, "learning_rate": 1.0850127732046551e-05, "loss": 1.4995597302913665e-05, "step": 314200 }, { "epoch": 89.18819188191883, "grad_norm": 0.005316707771271467, "learning_rate": 1.084728924212319e-05, "loss": 1.2622959911823272e-05, "step": 314210 }, { "epoch": 89.19103037184217, "grad_norm": 0.004263971000909805, "learning_rate": 1.084445075219983e-05, "loss": 1.4272332191467284e-05, "step": 314220 }, { "epoch": 89.19386886176554, "grad_norm": 0.0013255005469545722, "learning_rate": 1.084161226227647e-05, "loss": 8.97236168384552e-06, "step": 314230 }, { "epoch": 89.1967073516889, "grad_norm": 0.018663765862584114, "learning_rate": 1.0838773772353108e-05, "loss": 1.742243766784668e-05, "step": 314240 }, { "epoch": 89.19954584161226, "grad_norm": 0.0017997960094362497, "learning_rate": 1.0835935282429748e-05, "loss": 1.4355778694152832e-05, "step": 314250 }, { "epoch": 89.20238433153563, "grad_norm": 0.0023678685538470745, "learning_rate": 1.0833096792506388e-05, "loss": 1.7819367349147798e-05, "step": 314260 }, { "epoch": 89.20522282145899, "grad_norm": 0.0030172013211995363, "learning_rate": 1.0830258302583027e-05, "loss": 1.9142404198646545e-05, "step": 314270 }, { "epoch": 89.20806131138234, "grad_norm": 0.017775883898139, "learning_rate": 1.0827419812659665e-05, "loss": 2.495311200618744e-05, "step": 314280 }, { "epoch": 89.2108998013057, "grad_norm": 0.0013186458963900805, "learning_rate": 1.0824581322736305e-05, "loss": 6.867013871669769e-06, "step": 314290 }, { "epoch": 89.21373829122906, "grad_norm": 0.000940209545660764, "learning_rate": 1.0821742832812943e-05, "loss": 7.758475840091706e-06, "step": 314300 }, { "epoch": 89.21657678115243, "grad_norm": 0.050048962235450745, "learning_rate": 1.0818904342889584e-05, "loss": 1.938231289386749e-05, "step": 314310 }, { "epoch": 89.21941527107579, "grad_norm": 0.0014602778246626258, "learning_rate": 1.0816065852966222e-05, "loss": 7.175095379352569e-06, "step": 314320 }, { "epoch": 89.22225376099915, "grad_norm": 0.0006320046377368271, "learning_rate": 1.0813227363042862e-05, "loss": 6.684288382530212e-06, "step": 314330 }, { "epoch": 89.22509225092251, "grad_norm": 0.028971949592232704, "learning_rate": 1.0810388873119502e-05, "loss": 1.3342872262001037e-05, "step": 314340 }, { "epoch": 89.22793074084586, "grad_norm": 0.00057447818107903, "learning_rate": 1.080755038319614e-05, "loss": 4.427693784236908e-06, "step": 314350 }, { "epoch": 89.23076923076923, "grad_norm": 0.0013739678543061018, "learning_rate": 1.080471189327278e-05, "loss": 7.970817387104035e-06, "step": 314360 }, { "epoch": 89.23360772069259, "grad_norm": 0.001109570381231606, "learning_rate": 1.0801873403349419e-05, "loss": 9.556673467159271e-06, "step": 314370 }, { "epoch": 89.23644621061595, "grad_norm": 0.0360664501786232, "learning_rate": 1.0799034913426059e-05, "loss": 1.3303197920322419e-05, "step": 314380 }, { "epoch": 89.23928470053932, "grad_norm": 0.003018277930095792, "learning_rate": 1.0796196423502697e-05, "loss": 5.286931991577149e-06, "step": 314390 }, { "epoch": 89.24212319046268, "grad_norm": 0.014271850697696209, "learning_rate": 1.0793357933579336e-05, "loss": 9.812600910663605e-06, "step": 314400 }, { "epoch": 89.24496168038604, "grad_norm": 0.008819540031254292, "learning_rate": 1.0790519443655976e-05, "loss": 8.429400622844696e-06, "step": 314410 }, { "epoch": 89.24780017030939, "grad_norm": 0.002320845378562808, "learning_rate": 1.0787680953732616e-05, "loss": 7.414445281028747e-06, "step": 314420 }, { "epoch": 89.25063866023275, "grad_norm": 0.0013640419347211719, "learning_rate": 1.0784842463809254e-05, "loss": 4.933774471282959e-06, "step": 314430 }, { "epoch": 89.25347715015612, "grad_norm": 0.0011638259747996926, "learning_rate": 1.0782003973885893e-05, "loss": 2.605803310871124e-05, "step": 314440 }, { "epoch": 89.25631564007948, "grad_norm": 0.000568474002648145, "learning_rate": 1.0779165483962531e-05, "loss": 7.66739249229431e-06, "step": 314450 }, { "epoch": 89.25915413000284, "grad_norm": 0.002668290166184306, "learning_rate": 1.0776326994039173e-05, "loss": 6.909854710102081e-06, "step": 314460 }, { "epoch": 89.2619926199262, "grad_norm": 0.003470886265859008, "learning_rate": 1.077348850411581e-05, "loss": 8.989498019218444e-06, "step": 314470 }, { "epoch": 89.26483110984957, "grad_norm": 0.001622501527890563, "learning_rate": 1.077065001419245e-05, "loss": 7.436051964759826e-06, "step": 314480 }, { "epoch": 89.26766959977292, "grad_norm": 0.001319992239587009, "learning_rate": 1.0767811524269088e-05, "loss": 2.1321140229701997e-05, "step": 314490 }, { "epoch": 89.27050808969628, "grad_norm": 0.001008004997856915, "learning_rate": 1.0764973034345728e-05, "loss": 2.1549127995967866e-05, "step": 314500 }, { "epoch": 89.27050808969628, "eval_accuracy": 0.987918865645069, "eval_loss": 0.050484754145145416, "eval_runtime": 35.8767, "eval_samples_per_second": 438.363, "eval_steps_per_second": 6.857, "step": 314500 }, { "epoch": 89.27334657961964, "grad_norm": 0.0005455968785099685, "learning_rate": 1.0762134544422368e-05, "loss": 4.555657505989075e-06, "step": 314510 }, { "epoch": 89.276185069543, "grad_norm": 0.003259822726249695, "learning_rate": 1.0759296054499007e-05, "loss": 1.4957226812839509e-05, "step": 314520 }, { "epoch": 89.27902355946637, "grad_norm": 0.0010429469402879477, "learning_rate": 1.0756457564575647e-05, "loss": 5.641765892505646e-06, "step": 314530 }, { "epoch": 89.28186204938973, "grad_norm": 0.0016027451492846012, "learning_rate": 1.0753619074652285e-05, "loss": 5.802325904369354e-06, "step": 314540 }, { "epoch": 89.28470053931308, "grad_norm": 0.002545741619542241, "learning_rate": 1.0750780584728925e-05, "loss": 6.290338933467865e-06, "step": 314550 }, { "epoch": 89.28753902923644, "grad_norm": 0.0033578157890588045, "learning_rate": 1.0747942094805564e-05, "loss": 4.3263658881187436e-06, "step": 314560 }, { "epoch": 89.2903775191598, "grad_norm": 0.003971319645643234, "learning_rate": 1.0745103604882204e-05, "loss": 6.021745502948761e-06, "step": 314570 }, { "epoch": 89.29321600908317, "grad_norm": 0.0012800091644749045, "learning_rate": 1.0742265114958842e-05, "loss": 7.752701640129089e-06, "step": 314580 }, { "epoch": 89.29605449900653, "grad_norm": 7.877766620367765e-05, "learning_rate": 1.0739426625035482e-05, "loss": 4.761852324008942e-06, "step": 314590 }, { "epoch": 89.2988929889299, "grad_norm": 0.0017652669921517372, "learning_rate": 1.073658813511212e-05, "loss": 6.533786654472351e-06, "step": 314600 }, { "epoch": 89.30173147885326, "grad_norm": 0.0008669914095662534, "learning_rate": 1.0733749645188761e-05, "loss": 5.2746385335922245e-06, "step": 314610 }, { "epoch": 89.3045699687766, "grad_norm": 0.0017639861907809973, "learning_rate": 1.0730911155265399e-05, "loss": 7.606297731399536e-06, "step": 314620 }, { "epoch": 89.30740845869997, "grad_norm": 0.003704998642206192, "learning_rate": 1.0728072665342039e-05, "loss": 9.383819997310638e-06, "step": 314630 }, { "epoch": 89.31024694862333, "grad_norm": 0.005624376703053713, "learning_rate": 1.0725234175418677e-05, "loss": 1.5994347631931304e-05, "step": 314640 }, { "epoch": 89.3130854385467, "grad_norm": 0.00048571027582511306, "learning_rate": 1.0722395685495316e-05, "loss": 4.571862518787384e-06, "step": 314650 }, { "epoch": 89.31592392847006, "grad_norm": 0.0016622263938188553, "learning_rate": 1.0719557195571956e-05, "loss": 7.787998765707016e-05, "step": 314660 }, { "epoch": 89.31876241839342, "grad_norm": 0.003148117568343878, "learning_rate": 1.0716718705648596e-05, "loss": 3.572255373001099e-05, "step": 314670 }, { "epoch": 89.32160090831678, "grad_norm": 0.001349173253402114, "learning_rate": 1.0713880215725235e-05, "loss": 5.644224584102631e-05, "step": 314680 }, { "epoch": 89.32443939824013, "grad_norm": 0.0025485572405159473, "learning_rate": 1.0711041725801873e-05, "loss": 1.0522082448005676e-05, "step": 314690 }, { "epoch": 89.3272778881635, "grad_norm": 0.0019466601079329848, "learning_rate": 1.0708203235878513e-05, "loss": 4.1419081389904024e-05, "step": 314700 }, { "epoch": 89.33011637808686, "grad_norm": 0.013011430390179157, "learning_rate": 1.0705364745955153e-05, "loss": 1.7903558909893035e-05, "step": 314710 }, { "epoch": 89.33295486801022, "grad_norm": 0.019912317395210266, "learning_rate": 1.0702526256031793e-05, "loss": 2.3004226386547087e-05, "step": 314720 }, { "epoch": 89.33579335793358, "grad_norm": 0.030029794201254845, "learning_rate": 1.069968776610843e-05, "loss": 1.2438930571079255e-05, "step": 314730 }, { "epoch": 89.33863184785695, "grad_norm": 0.0010052027646452188, "learning_rate": 1.069684927618507e-05, "loss": 1.5901215374469756e-05, "step": 314740 }, { "epoch": 89.34147033778031, "grad_norm": 0.00019619801605585963, "learning_rate": 1.0694010786261708e-05, "loss": 7.176212966442108e-06, "step": 314750 }, { "epoch": 89.34430882770366, "grad_norm": 0.0006577041931450367, "learning_rate": 1.069117229633835e-05, "loss": 9.67457890510559e-06, "step": 314760 }, { "epoch": 89.34714731762702, "grad_norm": 0.00452945102006197, "learning_rate": 1.0688333806414988e-05, "loss": 5.121715366840362e-06, "step": 314770 }, { "epoch": 89.34998580755038, "grad_norm": 0.001753583550453186, "learning_rate": 1.0685495316491627e-05, "loss": 6.886199116706848e-06, "step": 314780 }, { "epoch": 89.35282429747375, "grad_norm": 0.01064320933073759, "learning_rate": 1.0682656826568265e-05, "loss": 3.4988299012184144e-05, "step": 314790 }, { "epoch": 89.35566278739711, "grad_norm": 0.0016546954866498709, "learning_rate": 1.0679818336644905e-05, "loss": 3.803148865699768e-06, "step": 314800 }, { "epoch": 89.35850127732047, "grad_norm": 0.0008049793541431427, "learning_rate": 1.0676979846721545e-05, "loss": 7.059052586555481e-06, "step": 314810 }, { "epoch": 89.36133976724382, "grad_norm": 0.001538500189781189, "learning_rate": 1.0674141356798184e-05, "loss": 6.403960287570953e-06, "step": 314820 }, { "epoch": 89.36417825716718, "grad_norm": 0.0024299139622598886, "learning_rate": 1.0671302866874822e-05, "loss": 8.797086775302886e-06, "step": 314830 }, { "epoch": 89.36701674709055, "grad_norm": 0.007317302282899618, "learning_rate": 1.0668464376951462e-05, "loss": 1.0071136057376861e-05, "step": 314840 }, { "epoch": 89.36985523701391, "grad_norm": 0.0027825713623315096, "learning_rate": 1.0665625887028102e-05, "loss": 8.674897253513337e-06, "step": 314850 }, { "epoch": 89.37269372693727, "grad_norm": 0.002618617843836546, "learning_rate": 1.0662787397104741e-05, "loss": 5.069002509117127e-06, "step": 314860 }, { "epoch": 89.37553221686063, "grad_norm": 0.0007513569435104728, "learning_rate": 1.0659948907181381e-05, "loss": 5.557388067245483e-06, "step": 314870 }, { "epoch": 89.378370706784, "grad_norm": 0.004141601733863354, "learning_rate": 1.0657110417258019e-05, "loss": 6.892345845699311e-06, "step": 314880 }, { "epoch": 89.38120919670735, "grad_norm": 0.0007838468300178647, "learning_rate": 1.0654271927334659e-05, "loss": 6.061233580112457e-06, "step": 314890 }, { "epoch": 89.38404768663071, "grad_norm": 0.0012261881493031979, "learning_rate": 1.0651433437411297e-05, "loss": 3.275591880083084e-05, "step": 314900 }, { "epoch": 89.38688617655407, "grad_norm": 0.0019505335949361324, "learning_rate": 1.0648594947487938e-05, "loss": 1.0486319661140442e-05, "step": 314910 }, { "epoch": 89.38972466647743, "grad_norm": 0.00015043417806737125, "learning_rate": 1.0645756457564576e-05, "loss": 9.775348007678986e-06, "step": 314920 }, { "epoch": 89.3925631564008, "grad_norm": 0.0027113051619380713, "learning_rate": 1.0642917967641216e-05, "loss": 6.905756890773773e-06, "step": 314930 }, { "epoch": 89.39540164632416, "grad_norm": 0.00709297601133585, "learning_rate": 1.0640079477717854e-05, "loss": 7.350556552410126e-06, "step": 314940 }, { "epoch": 89.39824013624752, "grad_norm": 0.001273884903639555, "learning_rate": 1.0637240987794495e-05, "loss": 3.4850090742111206e-06, "step": 314950 }, { "epoch": 89.40107862617087, "grad_norm": 0.0007564474362879992, "learning_rate": 1.0634402497871133e-05, "loss": 4.565343260765076e-06, "step": 314960 }, { "epoch": 89.40391711609423, "grad_norm": 0.0005366142140701413, "learning_rate": 1.0631564007947773e-05, "loss": 5.979649722576142e-06, "step": 314970 }, { "epoch": 89.4067556060176, "grad_norm": 0.0003219483478460461, "learning_rate": 1.062872551802441e-05, "loss": 1.4070607721805573e-05, "step": 314980 }, { "epoch": 89.40959409594096, "grad_norm": 0.0007449517142958939, "learning_rate": 1.062588702810105e-05, "loss": 6.358325481414795e-06, "step": 314990 }, { "epoch": 89.41243258586432, "grad_norm": 0.0012719244696199894, "learning_rate": 1.062304853817769e-05, "loss": 4.039518535137176e-06, "step": 315000 }, { "epoch": 89.41243258586432, "eval_accuracy": 0.9877281108920964, "eval_loss": 0.049935482442379, "eval_runtime": 36.1995, "eval_samples_per_second": 434.453, "eval_steps_per_second": 6.796, "step": 315000 }, { "epoch": 89.41527107578769, "grad_norm": 0.0024041282013058662, "learning_rate": 1.062021004825433e-05, "loss": 5.638599395751953e-06, "step": 315010 }, { "epoch": 89.41810956571103, "grad_norm": 0.000880477367900312, "learning_rate": 1.0617371558330968e-05, "loss": 5.90793788433075e-06, "step": 315020 }, { "epoch": 89.4209480556344, "grad_norm": 0.0018525177147239447, "learning_rate": 1.0614533068407607e-05, "loss": 7.345713675022125e-06, "step": 315030 }, { "epoch": 89.42378654555776, "grad_norm": 0.003740411251783371, "learning_rate": 1.0611694578484247e-05, "loss": 1.0242499411106109e-05, "step": 315040 }, { "epoch": 89.42662503548112, "grad_norm": 0.01949721947312355, "learning_rate": 1.0608856088560887e-05, "loss": 8.509494364261627e-06, "step": 315050 }, { "epoch": 89.42946352540449, "grad_norm": 0.004700010642409325, "learning_rate": 1.0606017598637526e-05, "loss": 5.963630974292755e-06, "step": 315060 }, { "epoch": 89.43230201532785, "grad_norm": 0.00039159023435786366, "learning_rate": 1.0603179108714164e-05, "loss": 7.98013061285019e-06, "step": 315070 }, { "epoch": 89.43514050525121, "grad_norm": 0.016385089606046677, "learning_rate": 1.0600340618790804e-05, "loss": 1.6792118549346925e-05, "step": 315080 }, { "epoch": 89.43797899517456, "grad_norm": 0.004737253300845623, "learning_rate": 1.0597502128867442e-05, "loss": 8.255988359451293e-06, "step": 315090 }, { "epoch": 89.44081748509792, "grad_norm": 0.004855560604482889, "learning_rate": 1.0594663638944083e-05, "loss": 1.164861023426056e-05, "step": 315100 }, { "epoch": 89.44365597502129, "grad_norm": 0.0009509598603472114, "learning_rate": 1.0591825149020721e-05, "loss": 4.8924237489700316e-06, "step": 315110 }, { "epoch": 89.44649446494465, "grad_norm": 0.0007284189923666418, "learning_rate": 1.0588986659097361e-05, "loss": 7.115118205547333e-06, "step": 315120 }, { "epoch": 89.44933295486801, "grad_norm": 0.0006392525974661112, "learning_rate": 1.0586148169173999e-05, "loss": 6.05415552854538e-06, "step": 315130 }, { "epoch": 89.45217144479138, "grad_norm": 0.0004208532045595348, "learning_rate": 1.0583309679250639e-05, "loss": 5.750730633735657e-06, "step": 315140 }, { "epoch": 89.45500993471474, "grad_norm": 0.0009313393384218216, "learning_rate": 1.0580471189327278e-05, "loss": 3.900434821844101e-05, "step": 315150 }, { "epoch": 89.45784842463809, "grad_norm": 0.0031036746222525835, "learning_rate": 1.0577632699403918e-05, "loss": 8.367188274860383e-06, "step": 315160 }, { "epoch": 89.46068691456145, "grad_norm": 0.0009475885890424252, "learning_rate": 1.0574794209480556e-05, "loss": 9.794160723686218e-06, "step": 315170 }, { "epoch": 89.46352540448481, "grad_norm": 0.003724542912095785, "learning_rate": 1.0571955719557196e-05, "loss": 4.478134214878082e-05, "step": 315180 }, { "epoch": 89.46636389440818, "grad_norm": 0.0011322040809318423, "learning_rate": 1.0569117229633835e-05, "loss": 1.7769820988178254e-05, "step": 315190 }, { "epoch": 89.46920238433154, "grad_norm": 0.0015854703960940242, "learning_rate": 1.0566278739710475e-05, "loss": 6.7802146077156065e-06, "step": 315200 }, { "epoch": 89.4720408742549, "grad_norm": 0.011325839906930923, "learning_rate": 1.0563440249787115e-05, "loss": 1.0552071034908295e-05, "step": 315210 }, { "epoch": 89.47487936417826, "grad_norm": 0.0005612328532151878, "learning_rate": 1.0560601759863753e-05, "loss": 2.8573162853717805e-05, "step": 315220 }, { "epoch": 89.47771785410161, "grad_norm": 0.00046349703916348517, "learning_rate": 1.0557763269940393e-05, "loss": 1.0357797145843506e-05, "step": 315230 }, { "epoch": 89.48055634402498, "grad_norm": 0.00037242064718157053, "learning_rate": 1.055492478001703e-05, "loss": 6.335042417049408e-06, "step": 315240 }, { "epoch": 89.48339483394834, "grad_norm": 0.0013867584057152271, "learning_rate": 1.0552086290093672e-05, "loss": 8.29119235277176e-06, "step": 315250 }, { "epoch": 89.4862333238717, "grad_norm": 0.0011619124561548233, "learning_rate": 1.054924780017031e-05, "loss": 1.1774711310863495e-05, "step": 315260 }, { "epoch": 89.48907181379506, "grad_norm": 0.0010719751007854939, "learning_rate": 1.054640931024695e-05, "loss": 9.971857070922852e-06, "step": 315270 }, { "epoch": 89.49191030371843, "grad_norm": 0.0026108298916369677, "learning_rate": 1.0543570820323588e-05, "loss": 7.248483598232269e-06, "step": 315280 }, { "epoch": 89.49474879364178, "grad_norm": 0.01577739417552948, "learning_rate": 1.0540732330400227e-05, "loss": 9.32551920413971e-06, "step": 315290 }, { "epoch": 89.49758728356514, "grad_norm": 0.0012746049324050546, "learning_rate": 1.0537893840476867e-05, "loss": 8.633174002170562e-06, "step": 315300 }, { "epoch": 89.5004257734885, "grad_norm": 0.0036793553736060858, "learning_rate": 1.0535055350553507e-05, "loss": 6.242096424102783e-06, "step": 315310 }, { "epoch": 89.50326426341186, "grad_norm": 0.00047718797577545047, "learning_rate": 1.0532216860630145e-05, "loss": 9.212829172611237e-06, "step": 315320 }, { "epoch": 89.50610275333523, "grad_norm": 0.0033936069812625647, "learning_rate": 1.0529378370706784e-05, "loss": 6.230734288692474e-06, "step": 315330 }, { "epoch": 89.50894124325859, "grad_norm": 0.0018608253449201584, "learning_rate": 1.0526539880783424e-05, "loss": 1.3569928705692292e-05, "step": 315340 }, { "epoch": 89.51177973318195, "grad_norm": 0.002938290825113654, "learning_rate": 1.0523701390860064e-05, "loss": 8.926168084144592e-06, "step": 315350 }, { "epoch": 89.5146182231053, "grad_norm": 0.0005008968291804194, "learning_rate": 1.0520862900936702e-05, "loss": 1.0229647159576416e-05, "step": 315360 }, { "epoch": 89.51745671302866, "grad_norm": 0.0029713313560932875, "learning_rate": 1.0518024411013341e-05, "loss": 9.65464860200882e-06, "step": 315370 }, { "epoch": 89.52029520295203, "grad_norm": 0.0014749354450032115, "learning_rate": 1.0515185921089981e-05, "loss": 4.945695400238037e-06, "step": 315380 }, { "epoch": 89.52313369287539, "grad_norm": 0.0009091197280213237, "learning_rate": 1.0512347431166619e-05, "loss": 8.387491106987e-06, "step": 315390 }, { "epoch": 89.52597218279875, "grad_norm": 0.0006078956066630781, "learning_rate": 1.050950894124326e-05, "loss": 6.19497150182724e-06, "step": 315400 }, { "epoch": 89.52881067272212, "grad_norm": 0.004153990186750889, "learning_rate": 1.0506670451319898e-05, "loss": 6.44586980342865e-06, "step": 315410 }, { "epoch": 89.53164916264548, "grad_norm": 0.0007204838329926133, "learning_rate": 1.0503831961396538e-05, "loss": 7.105804979801178e-06, "step": 315420 }, { "epoch": 89.53448765256883, "grad_norm": 0.02458355203270912, "learning_rate": 1.0500993471473176e-05, "loss": 9.892880916595459e-06, "step": 315430 }, { "epoch": 89.53732614249219, "grad_norm": 0.00422975467517972, "learning_rate": 1.0498154981549816e-05, "loss": 6.135925650596618e-06, "step": 315440 }, { "epoch": 89.54016463241555, "grad_norm": 0.0011336440220475197, "learning_rate": 1.0495316491626455e-05, "loss": 5.241110920906067e-06, "step": 315450 }, { "epoch": 89.54300312233892, "grad_norm": 0.0005147307529114187, "learning_rate": 1.0492478001703095e-05, "loss": 5.908124148845673e-06, "step": 315460 }, { "epoch": 89.54584161226228, "grad_norm": 0.0009596490417607129, "learning_rate": 1.0489639511779733e-05, "loss": 5.295872688293457e-06, "step": 315470 }, { "epoch": 89.54868010218564, "grad_norm": 0.0015860131243243814, "learning_rate": 1.0486801021856373e-05, "loss": 4.867091774940491e-06, "step": 315480 }, { "epoch": 89.551518592109, "grad_norm": 0.007393307518213987, "learning_rate": 1.048396253193301e-05, "loss": 4.150904715061188e-06, "step": 315490 }, { "epoch": 89.55435708203235, "grad_norm": 0.002137587871402502, "learning_rate": 1.0481124042009652e-05, "loss": 5.276501178741455e-06, "step": 315500 }, { "epoch": 89.55435708203235, "eval_accuracy": 0.9884911299039868, "eval_loss": 0.048966605216264725, "eval_runtime": 38.4105, "eval_samples_per_second": 409.445, "eval_steps_per_second": 6.405, "step": 315500 }, { "epoch": 89.55719557195572, "grad_norm": 0.004436835180968046, "learning_rate": 1.047828555208629e-05, "loss": 1.1040270328521728e-05, "step": 315510 }, { "epoch": 89.56003406187908, "grad_norm": 0.0018799304962158203, "learning_rate": 1.047544706216293e-05, "loss": 6.514973938465118e-06, "step": 315520 }, { "epoch": 89.56287255180244, "grad_norm": 0.003143415553495288, "learning_rate": 1.047260857223957e-05, "loss": 5.760230123996735e-06, "step": 315530 }, { "epoch": 89.5657110417258, "grad_norm": 0.0006698346114717424, "learning_rate": 1.0469770082316207e-05, "loss": 5.0120055675506595e-06, "step": 315540 }, { "epoch": 89.56854953164917, "grad_norm": 0.004877758212387562, "learning_rate": 1.0466931592392849e-05, "loss": 6.138347089290619e-06, "step": 315550 }, { "epoch": 89.57138802157252, "grad_norm": 0.0013831094838678837, "learning_rate": 1.0464093102469487e-05, "loss": 4.537217319011688e-06, "step": 315560 }, { "epoch": 89.57422651149588, "grad_norm": 0.000599354098085314, "learning_rate": 1.0461254612546126e-05, "loss": 1.163184642791748e-05, "step": 315570 }, { "epoch": 89.57706500141924, "grad_norm": 0.0011985772289335728, "learning_rate": 1.0458416122622764e-05, "loss": 5.4391101002693175e-06, "step": 315580 }, { "epoch": 89.5799034913426, "grad_norm": 0.002264156471937895, "learning_rate": 1.0455577632699406e-05, "loss": 5.452893674373627e-06, "step": 315590 }, { "epoch": 89.58274198126597, "grad_norm": 0.0034145440440624952, "learning_rate": 1.0452739142776044e-05, "loss": 4.777312278747559e-06, "step": 315600 }, { "epoch": 89.58558047118933, "grad_norm": 0.000756729394197464, "learning_rate": 1.0449900652852683e-05, "loss": 8.84365290403366e-06, "step": 315610 }, { "epoch": 89.5884189611127, "grad_norm": 0.0010958637576550245, "learning_rate": 1.0447062162929321e-05, "loss": 9.614229202270508e-06, "step": 315620 }, { "epoch": 89.59125745103604, "grad_norm": 0.0018103144830092788, "learning_rate": 1.0444223673005961e-05, "loss": 7.631070911884307e-06, "step": 315630 }, { "epoch": 89.5940959409594, "grad_norm": 0.004291492514312267, "learning_rate": 1.04413851830826e-05, "loss": 6.053037941455841e-06, "step": 315640 }, { "epoch": 89.59693443088277, "grad_norm": 0.007194437552243471, "learning_rate": 1.043854669315924e-05, "loss": 6.7051500082015995e-06, "step": 315650 }, { "epoch": 89.59977292080613, "grad_norm": 0.0024307244457304478, "learning_rate": 1.0435708203235878e-05, "loss": 5.9656798839569095e-06, "step": 315660 }, { "epoch": 89.6026114107295, "grad_norm": 0.003939447924494743, "learning_rate": 1.0432869713312518e-05, "loss": 8.358433842658996e-06, "step": 315670 }, { "epoch": 89.60544990065286, "grad_norm": 0.00031993052107281983, "learning_rate": 1.0430031223389158e-05, "loss": 3.902614116668701e-06, "step": 315680 }, { "epoch": 89.60828839057622, "grad_norm": 0.0003709465963765979, "learning_rate": 1.0427192733465797e-05, "loss": 5.50299882888794e-06, "step": 315690 }, { "epoch": 89.61112688049957, "grad_norm": 0.0006341809057630599, "learning_rate": 1.0424354243542435e-05, "loss": 5.956366658210755e-06, "step": 315700 }, { "epoch": 89.61396537042293, "grad_norm": 0.0009695493499748409, "learning_rate": 1.0421515753619075e-05, "loss": 7.044896483421325e-06, "step": 315710 }, { "epoch": 89.6168038603463, "grad_norm": 0.0005906761507503688, "learning_rate": 1.0418677263695715e-05, "loss": 4.044361412525177e-06, "step": 315720 }, { "epoch": 89.61964235026966, "grad_norm": 0.0010254548396915197, "learning_rate": 1.0415838773772353e-05, "loss": 5.4802745580673214e-06, "step": 315730 }, { "epoch": 89.62248084019302, "grad_norm": 0.0011314258445054293, "learning_rate": 1.0413000283848994e-05, "loss": 8.607655763626098e-06, "step": 315740 }, { "epoch": 89.62531933011638, "grad_norm": 0.0011557582765817642, "learning_rate": 1.0410161793925632e-05, "loss": 5.499087274074555e-06, "step": 315750 }, { "epoch": 89.62815782003973, "grad_norm": 0.000994421890936792, "learning_rate": 1.0407323304002272e-05, "loss": 3.746896982192993e-06, "step": 315760 }, { "epoch": 89.6309963099631, "grad_norm": 0.0010161476675421, "learning_rate": 1.040448481407891e-05, "loss": 6.254762411117554e-06, "step": 315770 }, { "epoch": 89.63383479988646, "grad_norm": 0.0020191704388707876, "learning_rate": 1.040164632415555e-05, "loss": 8.655525743961334e-06, "step": 315780 }, { "epoch": 89.63667328980982, "grad_norm": 0.0002074202784569934, "learning_rate": 1.039880783423219e-05, "loss": 3.06498259305954e-06, "step": 315790 }, { "epoch": 89.63951177973318, "grad_norm": 0.0011614508694037795, "learning_rate": 1.0395969344308829e-05, "loss": 5.898624658584595e-06, "step": 315800 }, { "epoch": 89.64235026965655, "grad_norm": 0.0014636297710239887, "learning_rate": 1.0393130854385467e-05, "loss": 5.9871003031730655e-06, "step": 315810 }, { "epoch": 89.64518875957991, "grad_norm": 0.001939490088261664, "learning_rate": 1.0390292364462107e-05, "loss": 8.28821212053299e-06, "step": 315820 }, { "epoch": 89.64802724950326, "grad_norm": 0.0007009318214841187, "learning_rate": 1.0387453874538745e-05, "loss": 3.6641955375671388e-06, "step": 315830 }, { "epoch": 89.65086573942662, "grad_norm": 0.00047499535139650106, "learning_rate": 1.0384615384615386e-05, "loss": 4.225969314575196e-06, "step": 315840 }, { "epoch": 89.65370422934998, "grad_norm": 0.0008008222794160247, "learning_rate": 1.0381776894692024e-05, "loss": 3.271549940109253e-06, "step": 315850 }, { "epoch": 89.65654271927335, "grad_norm": 0.0001912749430630356, "learning_rate": 1.0378938404768664e-05, "loss": 5.639344453811645e-06, "step": 315860 }, { "epoch": 89.65938120919671, "grad_norm": 0.00097389001166448, "learning_rate": 1.0376099914845303e-05, "loss": 3.818795084953308e-06, "step": 315870 }, { "epoch": 89.66221969912007, "grad_norm": 0.0010643582791090012, "learning_rate": 1.0373261424921941e-05, "loss": 6.8623572587966916e-06, "step": 315880 }, { "epoch": 89.66505818904344, "grad_norm": 0.0005545910680666566, "learning_rate": 1.0370422934998583e-05, "loss": 4.3783336877822874e-06, "step": 315890 }, { "epoch": 89.66789667896678, "grad_norm": 0.0033612423576414585, "learning_rate": 1.036758444507522e-05, "loss": 5.725957453250885e-06, "step": 315900 }, { "epoch": 89.67073516889015, "grad_norm": 0.0012833732180297375, "learning_rate": 1.036474595515186e-05, "loss": 5.51808625459671e-06, "step": 315910 }, { "epoch": 89.67357365881351, "grad_norm": 0.0022407937794923782, "learning_rate": 1.0361907465228498e-05, "loss": 4.305504262447357e-06, "step": 315920 }, { "epoch": 89.67641214873687, "grad_norm": 0.0019968515262007713, "learning_rate": 1.0359068975305138e-05, "loss": 4.602223634719849e-06, "step": 315930 }, { "epoch": 89.67925063866024, "grad_norm": 0.009858720004558563, "learning_rate": 1.0356230485381778e-05, "loss": 1.0550394654273986e-05, "step": 315940 }, { "epoch": 89.6820891285836, "grad_norm": 0.0005897270166315138, "learning_rate": 1.0353391995458417e-05, "loss": 2.864934504032135e-06, "step": 315950 }, { "epoch": 89.68492761850696, "grad_norm": 0.000676908006425947, "learning_rate": 1.0350553505535055e-05, "loss": 3.0376017093658446e-06, "step": 315960 }, { "epoch": 89.68776610843031, "grad_norm": 0.00113801215775311, "learning_rate": 1.0347715015611695e-05, "loss": 5.7836994528770445e-06, "step": 315970 }, { "epoch": 89.69060459835367, "grad_norm": 0.0005581791629083455, "learning_rate": 1.0344876525688333e-05, "loss": 3.0005723237991334e-05, "step": 315980 }, { "epoch": 89.69344308827704, "grad_norm": 0.0003251895832363516, "learning_rate": 1.0342038035764974e-05, "loss": 5.7477504014968875e-06, "step": 315990 }, { "epoch": 89.6962815782004, "grad_norm": 0.007869147695600986, "learning_rate": 1.0339199545841612e-05, "loss": 4.201922565698624e-05, "step": 316000 }, { "epoch": 89.6962815782004, "eval_accuracy": 0.9874101863038087, "eval_loss": 0.054121728986501694, "eval_runtime": 37.6398, "eval_samples_per_second": 417.829, "eval_steps_per_second": 6.536, "step": 316000 }, { "epoch": 89.69912006812376, "grad_norm": 0.00016956220497377217, "learning_rate": 1.0336361055918252e-05, "loss": 7.010623812675476e-06, "step": 316010 }, { "epoch": 89.70195855804712, "grad_norm": 0.0012811239575967193, "learning_rate": 1.0333522565994892e-05, "loss": 6.537139415740967e-06, "step": 316020 }, { "epoch": 89.70479704797047, "grad_norm": 0.0002558377746026963, "learning_rate": 1.033068407607153e-05, "loss": 6.508082151412964e-06, "step": 316030 }, { "epoch": 89.70763553789384, "grad_norm": 0.0025467609521001577, "learning_rate": 1.032784558614817e-05, "loss": 7.157586514949798e-06, "step": 316040 }, { "epoch": 89.7104740278172, "grad_norm": 0.0018386597512289882, "learning_rate": 1.0325007096224809e-05, "loss": 9.965524077415466e-06, "step": 316050 }, { "epoch": 89.71331251774056, "grad_norm": 0.0005003417609259486, "learning_rate": 1.0322168606301449e-05, "loss": 4.936568439006806e-06, "step": 316060 }, { "epoch": 89.71615100766392, "grad_norm": 0.0012975800782442093, "learning_rate": 1.0319330116378087e-05, "loss": 7.40736722946167e-06, "step": 316070 }, { "epoch": 89.71898949758729, "grad_norm": 0.0020896357018500566, "learning_rate": 1.0316491626454726e-05, "loss": 5.871988832950592e-06, "step": 316080 }, { "epoch": 89.72182798751065, "grad_norm": 0.0006574426079168916, "learning_rate": 1.0313653136531366e-05, "loss": 5.1997601985931395e-06, "step": 316090 }, { "epoch": 89.724666477434, "grad_norm": 0.00054593023378402, "learning_rate": 1.0310814646608006e-05, "loss": 3.913231194019318e-06, "step": 316100 }, { "epoch": 89.72750496735736, "grad_norm": 0.0012071427190676332, "learning_rate": 1.0307976156684644e-05, "loss": 1.1678412556648255e-05, "step": 316110 }, { "epoch": 89.73034345728072, "grad_norm": 0.00016613787738606334, "learning_rate": 1.0305137666761283e-05, "loss": 5.28339296579361e-06, "step": 316120 }, { "epoch": 89.73318194720409, "grad_norm": 0.000913674826733768, "learning_rate": 1.0302299176837921e-05, "loss": 4.910118877887726e-06, "step": 316130 }, { "epoch": 89.73602043712745, "grad_norm": 0.0045096976682543755, "learning_rate": 1.0299460686914563e-05, "loss": 4.874356091022492e-06, "step": 316140 }, { "epoch": 89.73885892705081, "grad_norm": 0.0009163649519905448, "learning_rate": 1.02966221969912e-05, "loss": 3.2887235283851625e-05, "step": 316150 }, { "epoch": 89.74169741697418, "grad_norm": 0.003949954640120268, "learning_rate": 1.029378370706784e-05, "loss": 9.990483522415162e-06, "step": 316160 }, { "epoch": 89.74453590689753, "grad_norm": 0.0006717248470522463, "learning_rate": 1.0290945217144478e-05, "loss": 7.982738316059112e-06, "step": 316170 }, { "epoch": 89.74737439682089, "grad_norm": 0.0035055214539170265, "learning_rate": 1.0288106727221118e-05, "loss": 1.9627064466476442e-05, "step": 316180 }, { "epoch": 89.75021288674425, "grad_norm": 0.0003487409558147192, "learning_rate": 1.0285268237297758e-05, "loss": 9.87127423286438e-06, "step": 316190 }, { "epoch": 89.75305137666761, "grad_norm": 0.0010963886743411422, "learning_rate": 1.0282429747374397e-05, "loss": 1.535080373287201e-05, "step": 316200 }, { "epoch": 89.75588986659098, "grad_norm": 0.0021085699554532766, "learning_rate": 1.0279591257451037e-05, "loss": 2.117399126291275e-05, "step": 316210 }, { "epoch": 89.75872835651434, "grad_norm": 0.0031653563492000103, "learning_rate": 1.0276752767527675e-05, "loss": 8.026137948036194e-06, "step": 316220 }, { "epoch": 89.76156684643769, "grad_norm": 0.0006397956167347729, "learning_rate": 1.0273914277604317e-05, "loss": 6.597302854061127e-06, "step": 316230 }, { "epoch": 89.76440533636105, "grad_norm": 0.002245602197945118, "learning_rate": 1.0271075787680955e-05, "loss": 2.1369196474552155e-05, "step": 316240 }, { "epoch": 89.76724382628441, "grad_norm": 0.005761255044490099, "learning_rate": 1.0268237297757594e-05, "loss": 6.836093962192535e-06, "step": 316250 }, { "epoch": 89.77008231620778, "grad_norm": 0.0007775508565828204, "learning_rate": 1.0265398807834232e-05, "loss": 5.949102342128754e-06, "step": 316260 }, { "epoch": 89.77292080613114, "grad_norm": 0.0007428537937812507, "learning_rate": 1.0262560317910872e-05, "loss": 7.268227636814117e-06, "step": 316270 }, { "epoch": 89.7757592960545, "grad_norm": 0.0026168308686465025, "learning_rate": 1.0259721827987512e-05, "loss": 9.003467857837677e-06, "step": 316280 }, { "epoch": 89.77859778597787, "grad_norm": 0.0007438208558596671, "learning_rate": 1.0256883338064151e-05, "loss": 2.0598247647285462e-05, "step": 316290 }, { "epoch": 89.78143627590121, "grad_norm": 0.001658960827626288, "learning_rate": 1.025404484814079e-05, "loss": 5.002133548259735e-06, "step": 316300 }, { "epoch": 89.78427476582458, "grad_norm": 0.0021437271498143673, "learning_rate": 1.0251206358217429e-05, "loss": 1.1441856622695923e-05, "step": 316310 }, { "epoch": 89.78711325574794, "grad_norm": 0.002508105942979455, "learning_rate": 1.0248367868294067e-05, "loss": 9.531714022159577e-06, "step": 316320 }, { "epoch": 89.7899517456713, "grad_norm": 0.0009079690789803863, "learning_rate": 1.0245529378370708e-05, "loss": 1.0405667126178741e-05, "step": 316330 }, { "epoch": 89.79279023559467, "grad_norm": 0.00041679077548906207, "learning_rate": 1.0242690888447346e-05, "loss": 2.5606900453567506e-05, "step": 316340 }, { "epoch": 89.79562872551803, "grad_norm": 0.004003516864031553, "learning_rate": 1.0239852398523986e-05, "loss": 9.013153612613677e-06, "step": 316350 }, { "epoch": 89.79846721544139, "grad_norm": 0.002551178215071559, "learning_rate": 1.0237013908600626e-05, "loss": 8.816830813884735e-06, "step": 316360 }, { "epoch": 89.80130570536474, "grad_norm": 0.0019860854372382164, "learning_rate": 1.0234175418677264e-05, "loss": 8.163414895534515e-06, "step": 316370 }, { "epoch": 89.8041441952881, "grad_norm": 0.0017320968909189105, "learning_rate": 1.0231336928753903e-05, "loss": 9.118206799030304e-06, "step": 316380 }, { "epoch": 89.80698268521147, "grad_norm": 0.00027605879586189985, "learning_rate": 1.0228498438830543e-05, "loss": 9.085237979888917e-06, "step": 316390 }, { "epoch": 89.80982117513483, "grad_norm": 0.006984326988458633, "learning_rate": 1.0225659948907183e-05, "loss": 8.384883403778077e-06, "step": 316400 }, { "epoch": 89.81265966505819, "grad_norm": 0.004568762611597776, "learning_rate": 1.022282145898382e-05, "loss": 6.708502769470215e-06, "step": 316410 }, { "epoch": 89.81549815498155, "grad_norm": 0.0019773601088672876, "learning_rate": 1.021998296906046e-05, "loss": 5.625560879707336e-06, "step": 316420 }, { "epoch": 89.81833664490492, "grad_norm": 0.018401343375444412, "learning_rate": 1.02171444791371e-05, "loss": 1.003071665763855e-05, "step": 316430 }, { "epoch": 89.82117513482827, "grad_norm": 0.005345964338630438, "learning_rate": 1.021430598921374e-05, "loss": 3.610923886299133e-06, "step": 316440 }, { "epoch": 89.82401362475163, "grad_norm": 0.0018893074011430144, "learning_rate": 1.0211467499290378e-05, "loss": 5.64921647310257e-06, "step": 316450 }, { "epoch": 89.82685211467499, "grad_norm": 0.0031754770316183567, "learning_rate": 1.0208629009367017e-05, "loss": 6.795302033424377e-06, "step": 316460 }, { "epoch": 89.82969060459835, "grad_norm": 0.0006964972708374262, "learning_rate": 1.0205790519443655e-05, "loss": 6.121024489402771e-06, "step": 316470 }, { "epoch": 89.83252909452172, "grad_norm": 0.0006677312194369733, "learning_rate": 1.0202952029520297e-05, "loss": 5.538389086723328e-06, "step": 316480 }, { "epoch": 89.83536758444508, "grad_norm": 0.0002307557879248634, "learning_rate": 1.0200113539596935e-05, "loss": 2.988092601299286e-05, "step": 316490 }, { "epoch": 89.83820607436843, "grad_norm": 0.02647748403251171, "learning_rate": 1.0197275049673574e-05, "loss": 1.617409288883209e-05, "step": 316500 }, { "epoch": 89.83820607436843, "eval_accuracy": 0.986965091880206, "eval_loss": 0.05803213641047478, "eval_runtime": 36.433, "eval_samples_per_second": 431.669, "eval_steps_per_second": 6.752, "step": 316500 }, { "epoch": 89.84104456429179, "grad_norm": 0.14277991652488708, "learning_rate": 1.0194436559750212e-05, "loss": 3.2314658164978026e-05, "step": 316510 }, { "epoch": 89.84388305421515, "grad_norm": 0.002214586827903986, "learning_rate": 1.0191598069826852e-05, "loss": 9.725429117679596e-06, "step": 316520 }, { "epoch": 89.84672154413852, "grad_norm": 0.006299261469393969, "learning_rate": 1.0188759579903492e-05, "loss": 7.23470002412796e-06, "step": 316530 }, { "epoch": 89.84956003406188, "grad_norm": 0.0013511691940948367, "learning_rate": 1.0185921089980131e-05, "loss": 2.115592360496521e-05, "step": 316540 }, { "epoch": 89.85239852398524, "grad_norm": 0.0009599041077308357, "learning_rate": 1.0183082600056771e-05, "loss": 5.83939254283905e-06, "step": 316550 }, { "epoch": 89.8552370139086, "grad_norm": 0.018765976652503014, "learning_rate": 1.0180244110133409e-05, "loss": 1.0225921869277954e-05, "step": 316560 }, { "epoch": 89.85807550383196, "grad_norm": 0.0004939456703141332, "learning_rate": 1.0177405620210049e-05, "loss": 6.714649498462677e-06, "step": 316570 }, { "epoch": 89.86091399375532, "grad_norm": 0.0022469074465334415, "learning_rate": 1.0174567130286688e-05, "loss": 8.275546133518218e-06, "step": 316580 }, { "epoch": 89.86375248367868, "grad_norm": 0.00045632474939338863, "learning_rate": 1.0171728640363328e-05, "loss": 7.209181785583496e-06, "step": 316590 }, { "epoch": 89.86659097360204, "grad_norm": 0.04230130463838577, "learning_rate": 1.0169173999432301e-05, "loss": 0.001789901964366436, "step": 316600 }, { "epoch": 89.8694294635254, "grad_norm": 0.0019069898407906294, "learning_rate": 1.0166335509508943e-05, "loss": 0.003477056324481964, "step": 316610 }, { "epoch": 89.87226795344877, "grad_norm": 0.008316314779222012, "learning_rate": 1.016349701958558e-05, "loss": 1.1284090578556061e-05, "step": 316620 }, { "epoch": 89.87510644337213, "grad_norm": 0.008120080456137657, "learning_rate": 1.016065852966222e-05, "loss": 3.8699619472026826e-05, "step": 316630 }, { "epoch": 89.87794493329548, "grad_norm": 0.0010526742553338408, "learning_rate": 1.015782003973886e-05, "loss": 3.763418644666672e-05, "step": 316640 }, { "epoch": 89.88078342321884, "grad_norm": 0.001894557848572731, "learning_rate": 1.0154981549815498e-05, "loss": 6.523355841636657e-06, "step": 316650 }, { "epoch": 89.8836219131422, "grad_norm": 0.007334281224757433, "learning_rate": 1.015214305989214e-05, "loss": 1.996830105781555e-05, "step": 316660 }, { "epoch": 89.88646040306557, "grad_norm": 0.001092902966775, "learning_rate": 1.0149304569968777e-05, "loss": 2.0881369709968568e-05, "step": 316670 }, { "epoch": 89.88929889298893, "grad_norm": 0.0006947075016796589, "learning_rate": 1.0146466080045417e-05, "loss": 7.0141628384590146e-06, "step": 316680 }, { "epoch": 89.8921373829123, "grad_norm": 0.0011453087208792567, "learning_rate": 1.0143627590122055e-05, "loss": 0.0011601263657212257, "step": 316690 }, { "epoch": 89.89497587283566, "grad_norm": 0.14438658952713013, "learning_rate": 1.0140789100198695e-05, "loss": 3.2590515911579135e-05, "step": 316700 }, { "epoch": 89.89781436275901, "grad_norm": 0.0004774034896399826, "learning_rate": 1.0137950610275334e-05, "loss": 0.0012434924021363259, "step": 316710 }, { "epoch": 89.90065285268237, "grad_norm": 0.0023297888692468405, "learning_rate": 1.0135112120351974e-05, "loss": 1.301988959312439e-05, "step": 316720 }, { "epoch": 89.90349134260573, "grad_norm": 0.015519569627940655, "learning_rate": 1.0132273630428612e-05, "loss": 0.0003892991691827774, "step": 316730 }, { "epoch": 89.9063298325291, "grad_norm": 0.0002574405225459486, "learning_rate": 1.0129435140505252e-05, "loss": 1.1184811592102051e-05, "step": 316740 }, { "epoch": 89.90916832245246, "grad_norm": 0.018472276628017426, "learning_rate": 1.012659665058189e-05, "loss": 0.0002175627276301384, "step": 316750 }, { "epoch": 89.91200681237582, "grad_norm": 0.0030197910964488983, "learning_rate": 1.0123758160658531e-05, "loss": 6.118975579738617e-06, "step": 316760 }, { "epoch": 89.91484530229917, "grad_norm": 0.0025436589494347572, "learning_rate": 1.0120919670735169e-05, "loss": 0.001558958739042282, "step": 316770 }, { "epoch": 89.91768379222253, "grad_norm": 0.010112296789884567, "learning_rate": 1.0118081180811809e-05, "loss": 1.0598450899124146e-05, "step": 316780 }, { "epoch": 89.9205222821459, "grad_norm": 0.0051878285594284534, "learning_rate": 1.0115242690888448e-05, "loss": 1.1607632040977478e-05, "step": 316790 }, { "epoch": 89.92336077206926, "grad_norm": 0.0015374922659248114, "learning_rate": 1.0112404200965086e-05, "loss": 7.053092122077942e-06, "step": 316800 }, { "epoch": 89.92619926199262, "grad_norm": 0.003118011401966214, "learning_rate": 1.0109565711041726e-05, "loss": 2.0064227283000945e-05, "step": 316810 }, { "epoch": 89.92903775191598, "grad_norm": 0.0005212448886595666, "learning_rate": 1.0106727221118366e-05, "loss": 1.2356787919998169e-05, "step": 316820 }, { "epoch": 89.93187624183935, "grad_norm": 0.0014080761466175318, "learning_rate": 1.0103888731195005e-05, "loss": 2.1694600582122804e-05, "step": 316830 }, { "epoch": 89.9347147317627, "grad_norm": 0.00361373508349061, "learning_rate": 1.0101050241271643e-05, "loss": 4.850700497627258e-05, "step": 316840 }, { "epoch": 89.93755322168606, "grad_norm": 0.001039988361299038, "learning_rate": 1.0098211751348283e-05, "loss": 6.778910756111145e-06, "step": 316850 }, { "epoch": 89.94039171160942, "grad_norm": 0.0007672904757782817, "learning_rate": 1.0095373261424923e-05, "loss": 1.033693552017212e-05, "step": 316860 }, { "epoch": 89.94323020153278, "grad_norm": 0.0004457767936401069, "learning_rate": 1.0092534771501562e-05, "loss": 1.2590549886226654e-05, "step": 316870 }, { "epoch": 89.94606869145615, "grad_norm": 0.04704252630472183, "learning_rate": 1.00896962815782e-05, "loss": 1.637618988752365e-05, "step": 316880 }, { "epoch": 89.94890718137951, "grad_norm": 0.002708091866225004, "learning_rate": 1.008685779165484e-05, "loss": 1.8664635717868805e-05, "step": 316890 }, { "epoch": 89.95174567130287, "grad_norm": 0.0002321796491742134, "learning_rate": 1.0084019301731478e-05, "loss": 6.177462637424469e-06, "step": 316900 }, { "epoch": 89.95458416122622, "grad_norm": 0.0015712527092546225, "learning_rate": 1.008118081180812e-05, "loss": 4.802830517292023e-06, "step": 316910 }, { "epoch": 89.95742265114959, "grad_norm": 0.00046257622307166457, "learning_rate": 1.0078342321884757e-05, "loss": 6.592832505702972e-06, "step": 316920 }, { "epoch": 89.96026114107295, "grad_norm": 0.0027231131680309772, "learning_rate": 1.0075503831961397e-05, "loss": 8.245185017585755e-06, "step": 316930 }, { "epoch": 89.96309963099631, "grad_norm": 0.000386973813874647, "learning_rate": 1.0072665342038035e-05, "loss": 7.399357855319977e-06, "step": 316940 }, { "epoch": 89.96593812091967, "grad_norm": 7.706484757363796e-05, "learning_rate": 1.0069826852114675e-05, "loss": 5.9802085161209105e-06, "step": 316950 }, { "epoch": 89.96877661084304, "grad_norm": 0.0006088872905820608, "learning_rate": 1.0066988362191315e-05, "loss": 5.275197327136993e-06, "step": 316960 }, { "epoch": 89.97161510076639, "grad_norm": 0.0009032774833030999, "learning_rate": 1.0064149872267954e-05, "loss": 5.594640970230102e-06, "step": 316970 }, { "epoch": 89.97445359068975, "grad_norm": 0.0006443801103159785, "learning_rate": 1.0061311382344594e-05, "loss": 5.635805428028106e-06, "step": 316980 }, { "epoch": 89.97729208061311, "grad_norm": 0.0016163536347448826, "learning_rate": 1.0058472892421232e-05, "loss": 9.06083732843399e-06, "step": 316990 }, { "epoch": 89.98013057053647, "grad_norm": 0.0021609021350741386, "learning_rate": 1.0055634402497872e-05, "loss": 5.077943205833435e-06, "step": 317000 }, { "epoch": 89.98013057053647, "eval_accuracy": 0.9885547148216443, "eval_loss": 0.05179942771792412, "eval_runtime": 35.8236, "eval_samples_per_second": 439.012, "eval_steps_per_second": 6.867, "step": 317000 }, { "epoch": 89.98296906045984, "grad_norm": 0.0014324733056128025, "learning_rate": 1.0052795912574511e-05, "loss": 3.6031007766723635e-06, "step": 317010 }, { "epoch": 89.9858075503832, "grad_norm": 0.003034986322745681, "learning_rate": 1.0049957422651151e-05, "loss": 1.1111795902252198e-05, "step": 317020 }, { "epoch": 89.98864604030656, "grad_norm": 0.029313933104276657, "learning_rate": 1.0047118932727789e-05, "loss": 9.49036329984665e-06, "step": 317030 }, { "epoch": 89.99148453022991, "grad_norm": 0.0003919119480997324, "learning_rate": 1.0044280442804429e-05, "loss": 5.9470534324646e-06, "step": 317040 }, { "epoch": 89.99432302015327, "grad_norm": 0.0014906107680872083, "learning_rate": 1.0041441952881067e-05, "loss": 6.384029984474182e-06, "step": 317050 }, { "epoch": 89.99716151007664, "grad_norm": 0.0018789760069921613, "learning_rate": 1.0038603462957708e-05, "loss": 5.029700696468354e-06, "step": 317060 }, { "epoch": 90.0, "grad_norm": 0.0012238766066730022, "learning_rate": 1.0035764973034346e-05, "loss": 6.262196257011965e-06, "step": 317070 }, { "epoch": 90.00283848992336, "grad_norm": 0.00017130734340753406, "learning_rate": 1.0032926483110986e-05, "loss": 4.395470023155213e-06, "step": 317080 }, { "epoch": 90.00567697984673, "grad_norm": 0.00038295326521620154, "learning_rate": 1.0030087993187624e-05, "loss": 5.661696195602417e-06, "step": 317090 }, { "epoch": 90.00851546977009, "grad_norm": 0.006254907697439194, "learning_rate": 1.0027249503264263e-05, "loss": 5.877763032913208e-06, "step": 317100 }, { "epoch": 90.01135395969344, "grad_norm": 0.0005087698809802532, "learning_rate": 1.0024411013340903e-05, "loss": 5.277246236801147e-06, "step": 317110 }, { "epoch": 90.0141924496168, "grad_norm": 0.0023602847941219807, "learning_rate": 1.0021572523417543e-05, "loss": 5.673617124557495e-06, "step": 317120 }, { "epoch": 90.01703093954016, "grad_norm": 0.0006398884579539299, "learning_rate": 1.0018734033494182e-05, "loss": 5.188211798667908e-06, "step": 317130 }, { "epoch": 90.01986942946353, "grad_norm": 0.0006565103540197015, "learning_rate": 1.001589554357082e-05, "loss": 4.609487950801849e-06, "step": 317140 }, { "epoch": 90.02270791938689, "grad_norm": 0.00019531276484485716, "learning_rate": 1.001305705364746e-05, "loss": 5.07403165102005e-06, "step": 317150 }, { "epoch": 90.02554640931025, "grad_norm": 0.000501849630381912, "learning_rate": 1.00102185637241e-05, "loss": 7.246062159538269e-06, "step": 317160 }, { "epoch": 90.02838489923361, "grad_norm": 0.0043426258489489555, "learning_rate": 1.000738007380074e-05, "loss": 8.449703454971313e-06, "step": 317170 }, { "epoch": 90.03122338915696, "grad_norm": 0.005145457107573748, "learning_rate": 1.0004541583877377e-05, "loss": 9.203515946865082e-06, "step": 317180 }, { "epoch": 90.03406187908033, "grad_norm": 0.006920960266143084, "learning_rate": 1.0001703093954017e-05, "loss": 5.83399087190628e-06, "step": 317190 }, { "epoch": 90.03690036900369, "grad_norm": 0.00023637594131287187, "learning_rate": 9.998864604030655e-06, "loss": 5.593337118625641e-06, "step": 317200 }, { "epoch": 90.03973885892705, "grad_norm": 0.0032997566740959883, "learning_rate": 9.996026114107296e-06, "loss": 8.274614810943603e-06, "step": 317210 }, { "epoch": 90.04257734885041, "grad_norm": 0.0026032382156699896, "learning_rate": 9.993187624183934e-06, "loss": 5.759298801422119e-06, "step": 317220 }, { "epoch": 90.04541583877378, "grad_norm": 0.0018241199431940913, "learning_rate": 9.990349134260574e-06, "loss": 4.909560084342957e-06, "step": 317230 }, { "epoch": 90.04825432869713, "grad_norm": 0.0020795788150280714, "learning_rate": 9.987510644337212e-06, "loss": 3.2922253012657166e-06, "step": 317240 }, { "epoch": 90.05109281862049, "grad_norm": 0.0006686457782052457, "learning_rate": 9.984672154413852e-06, "loss": 3.2616779208183287e-06, "step": 317250 }, { "epoch": 90.05393130854385, "grad_norm": 0.0012242684606462717, "learning_rate": 9.981833664490491e-06, "loss": 6.608106195926666e-06, "step": 317260 }, { "epoch": 90.05676979846722, "grad_norm": 0.0009551711264066398, "learning_rate": 9.978995174567131e-06, "loss": 4.889816045761108e-06, "step": 317270 }, { "epoch": 90.05960828839058, "grad_norm": 0.0008589118951931596, "learning_rate": 9.976156684643769e-06, "loss": 5.833245813846588e-06, "step": 317280 }, { "epoch": 90.06244677831394, "grad_norm": 0.0007071839645504951, "learning_rate": 9.973318194720409e-06, "loss": 4.990957677364349e-06, "step": 317290 }, { "epoch": 90.0652852682373, "grad_norm": 0.007171932607889175, "learning_rate": 9.970479704797048e-06, "loss": 7.758848369121552e-06, "step": 317300 }, { "epoch": 90.06812375816065, "grad_norm": 0.0003194283926859498, "learning_rate": 9.967641214873688e-06, "loss": 3.9460137486457825e-06, "step": 317310 }, { "epoch": 90.07096224808402, "grad_norm": 0.003425814677029848, "learning_rate": 9.964802724950328e-06, "loss": 5.294568836688995e-06, "step": 317320 }, { "epoch": 90.07380073800738, "grad_norm": 0.009937174618244171, "learning_rate": 9.961964235026966e-06, "loss": 9.557418525218964e-06, "step": 317330 }, { "epoch": 90.07663922793074, "grad_norm": 0.001518457313068211, "learning_rate": 9.959125745103605e-06, "loss": 9.433180093765258e-06, "step": 317340 }, { "epoch": 90.0794777178541, "grad_norm": 0.00042365730041638017, "learning_rate": 9.956287255180245e-06, "loss": 3.1033530831336973e-06, "step": 317350 }, { "epoch": 90.08231620777747, "grad_norm": 0.0019603746477514505, "learning_rate": 9.953448765256885e-06, "loss": 5.8284029364585875e-06, "step": 317360 }, { "epoch": 90.08515469770083, "grad_norm": 0.008129304274916649, "learning_rate": 9.950610275333523e-06, "loss": 7.046759128570556e-06, "step": 317370 }, { "epoch": 90.08799318762418, "grad_norm": 0.00023855542531237006, "learning_rate": 9.947771785410162e-06, "loss": 5.201995372772217e-06, "step": 317380 }, { "epoch": 90.09083167754754, "grad_norm": 0.00033038388937711716, "learning_rate": 9.9449332954868e-06, "loss": 3.6865472793579102e-06, "step": 317390 }, { "epoch": 90.0936701674709, "grad_norm": 0.0006430922658182681, "learning_rate": 9.942094805563442e-06, "loss": 5.9962272644042965e-06, "step": 317400 }, { "epoch": 90.09650865739427, "grad_norm": 0.002227455610409379, "learning_rate": 9.93925631564008e-06, "loss": 4.683993756771087e-06, "step": 317410 }, { "epoch": 90.09934714731763, "grad_norm": 0.0007805890054441988, "learning_rate": 9.93641782571672e-06, "loss": 7.059052586555481e-06, "step": 317420 }, { "epoch": 90.10218563724099, "grad_norm": 0.0007619140669703484, "learning_rate": 9.933579335793357e-06, "loss": 1.1044181883335114e-05, "step": 317430 }, { "epoch": 90.10502412716434, "grad_norm": 0.004206147976219654, "learning_rate": 9.930740845869997e-06, "loss": 6.6390261054039005e-06, "step": 317440 }, { "epoch": 90.1078626170877, "grad_norm": 0.0006379755795933306, "learning_rate": 9.927902355946637e-06, "loss": 3.810599446296692e-06, "step": 317450 }, { "epoch": 90.11070110701107, "grad_norm": 0.0013309830101206899, "learning_rate": 9.925063866023277e-06, "loss": 4.418939352035523e-06, "step": 317460 }, { "epoch": 90.11353959693443, "grad_norm": 0.0014682529726997018, "learning_rate": 9.922225376099916e-06, "loss": 5.5674463510513306e-06, "step": 317470 }, { "epoch": 90.1163780868578, "grad_norm": 0.001759328879415989, "learning_rate": 9.919386886176554e-06, "loss": 5.7889148592948915e-06, "step": 317480 }, { "epoch": 90.11921657678116, "grad_norm": 0.000961254583671689, "learning_rate": 9.916548396253194e-06, "loss": 4.895776510238648e-06, "step": 317490 }, { "epoch": 90.12205506670452, "grad_norm": 0.003497903235256672, "learning_rate": 9.913709906329834e-06, "loss": 5.145743489265442e-06, "step": 317500 }, { "epoch": 90.12205506670452, "eval_accuracy": 0.9888090544922744, "eval_loss": 0.05063338577747345, "eval_runtime": 36.4212, "eval_samples_per_second": 431.809, "eval_steps_per_second": 6.754, "step": 317500 }, { "epoch": 90.12489355662787, "grad_norm": 0.005107417237013578, "learning_rate": 9.910871416406473e-06, "loss": 5.639903247356415e-06, "step": 317510 }, { "epoch": 90.12773204655123, "grad_norm": 0.0009403466247022152, "learning_rate": 9.908032926483111e-06, "loss": 3.664940595626831e-06, "step": 317520 }, { "epoch": 90.1305705364746, "grad_norm": 0.0010709329508244991, "learning_rate": 9.905194436559751e-06, "loss": 7.2481110692024234e-06, "step": 317530 }, { "epoch": 90.13340902639796, "grad_norm": 0.0007312021916732192, "learning_rate": 9.902355946636389e-06, "loss": 2.4745240807533264e-06, "step": 317540 }, { "epoch": 90.13624751632132, "grad_norm": 0.002947509055957198, "learning_rate": 9.89951745671303e-06, "loss": 9.240210056304932e-06, "step": 317550 }, { "epoch": 90.13908600624468, "grad_norm": 0.0049298726953566074, "learning_rate": 9.896678966789668e-06, "loss": 5.728006362915039e-06, "step": 317560 }, { "epoch": 90.14192449616804, "grad_norm": 0.0020828477572649717, "learning_rate": 9.893840476866308e-06, "loss": 4.236213862895966e-06, "step": 317570 }, { "epoch": 90.1447629860914, "grad_norm": 0.0004503944655880332, "learning_rate": 9.891001986942946e-06, "loss": 3.6517158150672913e-06, "step": 317580 }, { "epoch": 90.14760147601476, "grad_norm": 0.001318106078542769, "learning_rate": 9.888163497019586e-06, "loss": 3.856979310512542e-06, "step": 317590 }, { "epoch": 90.15043996593812, "grad_norm": 0.0005976250395178795, "learning_rate": 9.885325007096225e-06, "loss": 2.7187168598175047e-06, "step": 317600 }, { "epoch": 90.15327845586148, "grad_norm": 0.016238337382674217, "learning_rate": 9.882486517172865e-06, "loss": 1.0446831583976746e-05, "step": 317610 }, { "epoch": 90.15611694578485, "grad_norm": 0.004473899025470018, "learning_rate": 9.879648027249503e-06, "loss": 6.107427179813385e-06, "step": 317620 }, { "epoch": 90.15895543570821, "grad_norm": 0.0030376927461475134, "learning_rate": 9.876809537326143e-06, "loss": 4.067830741405487e-06, "step": 317630 }, { "epoch": 90.16179392563157, "grad_norm": 0.001548929139971733, "learning_rate": 9.873971047402782e-06, "loss": 7.021799683570862e-06, "step": 317640 }, { "epoch": 90.16463241555492, "grad_norm": 0.0008333692676387727, "learning_rate": 9.871132557479422e-06, "loss": 3.613904118537903e-06, "step": 317650 }, { "epoch": 90.16747090547828, "grad_norm": 0.00844622403383255, "learning_rate": 9.868294067556062e-06, "loss": 1.8628127872943877e-05, "step": 317660 }, { "epoch": 90.17030939540165, "grad_norm": 0.00689340615645051, "learning_rate": 9.8654555776327e-06, "loss": 5.844235420227051e-06, "step": 317670 }, { "epoch": 90.17314788532501, "grad_norm": 0.023276248946785927, "learning_rate": 9.86261708770934e-06, "loss": 9.324029088020325e-06, "step": 317680 }, { "epoch": 90.17598637524837, "grad_norm": 0.2972589433193207, "learning_rate": 9.859778597785977e-06, "loss": 2.4365819990634917e-05, "step": 317690 }, { "epoch": 90.17882486517173, "grad_norm": 0.002113988157361746, "learning_rate": 9.856940107862619e-06, "loss": 1.4271773397922517e-05, "step": 317700 }, { "epoch": 90.18166335509508, "grad_norm": 0.0025544508825987577, "learning_rate": 9.854101617939257e-06, "loss": 6.41215592622757e-06, "step": 317710 }, { "epoch": 90.18450184501845, "grad_norm": 0.0020305397920310497, "learning_rate": 9.851263128015896e-06, "loss": 5.688872188329697e-05, "step": 317720 }, { "epoch": 90.18734033494181, "grad_norm": 0.0011829098220914602, "learning_rate": 9.848424638092534e-06, "loss": 1.805778592824936e-05, "step": 317730 }, { "epoch": 90.19017882486517, "grad_norm": 0.0018139160238206387, "learning_rate": 9.845586148169174e-06, "loss": 2.4041905999183654e-05, "step": 317740 }, { "epoch": 90.19301731478853, "grad_norm": 0.0055122519843280315, "learning_rate": 9.842747658245814e-06, "loss": 2.9619969427585602e-05, "step": 317750 }, { "epoch": 90.1958558047119, "grad_norm": 0.000834847625810653, "learning_rate": 9.839909168322453e-06, "loss": 6.814952939748764e-05, "step": 317760 }, { "epoch": 90.19869429463526, "grad_norm": 0.0037903895135968924, "learning_rate": 9.837070678399091e-06, "loss": 0.00010969564318656922, "step": 317770 }, { "epoch": 90.20153278455861, "grad_norm": 0.012711027637124062, "learning_rate": 9.834232188475731e-06, "loss": 1.2871809303760529e-05, "step": 317780 }, { "epoch": 90.20437127448197, "grad_norm": 0.004235380329191685, "learning_rate": 9.83139369855237e-06, "loss": 0.002339698001742363, "step": 317790 }, { "epoch": 90.20720976440533, "grad_norm": 0.0011025829007849097, "learning_rate": 9.82855520862901e-06, "loss": 4.56932932138443e-05, "step": 317800 }, { "epoch": 90.2100482543287, "grad_norm": 0.0009604496881365776, "learning_rate": 9.825716718705648e-06, "loss": 5.639195442199707e-05, "step": 317810 }, { "epoch": 90.21288674425206, "grad_norm": 0.0011572461808100343, "learning_rate": 9.822878228782288e-06, "loss": 1.922696828842163e-05, "step": 317820 }, { "epoch": 90.21572523417542, "grad_norm": 0.002697278745472431, "learning_rate": 9.820039738858928e-06, "loss": 0.0037706442177295684, "step": 317830 }, { "epoch": 90.21856372409879, "grad_norm": 0.007722967304289341, "learning_rate": 9.817201248935566e-06, "loss": 8.490122854709626e-06, "step": 317840 }, { "epoch": 90.22140221402213, "grad_norm": 0.0009589997353032231, "learning_rate": 9.814362759012207e-06, "loss": 6.208568811416626e-06, "step": 317850 }, { "epoch": 90.2242407039455, "grad_norm": 0.06432191282510757, "learning_rate": 9.811524269088845e-06, "loss": 2.087913453578949e-05, "step": 317860 }, { "epoch": 90.22707919386886, "grad_norm": 0.0006662162486463785, "learning_rate": 9.808685779165485e-06, "loss": 9.909272193908691e-06, "step": 317870 }, { "epoch": 90.22991768379222, "grad_norm": 0.0005309783737175167, "learning_rate": 9.805847289242123e-06, "loss": 8.785072714090347e-05, "step": 317880 }, { "epoch": 90.23275617371559, "grad_norm": 0.0018382256384938955, "learning_rate": 9.803008799318762e-06, "loss": 1.369919627904892e-05, "step": 317890 }, { "epoch": 90.23559466363895, "grad_norm": 0.026057330891489983, "learning_rate": 9.800170309395402e-06, "loss": 4.545021802186966e-05, "step": 317900 }, { "epoch": 90.23843315356231, "grad_norm": 0.004534495063126087, "learning_rate": 9.797331819472042e-06, "loss": 6.528757512569427e-06, "step": 317910 }, { "epoch": 90.24127164348566, "grad_norm": 0.008228492923080921, "learning_rate": 9.79449332954868e-06, "loss": 7.791630923748017e-06, "step": 317920 }, { "epoch": 90.24411013340902, "grad_norm": 0.003733876394107938, "learning_rate": 9.79165483962532e-06, "loss": 4.308857023715973e-06, "step": 317930 }, { "epoch": 90.24694862333239, "grad_norm": 0.012895461171865463, "learning_rate": 9.78881634970196e-06, "loss": 9.579956531524658e-06, "step": 317940 }, { "epoch": 90.24978711325575, "grad_norm": 0.001576345064677298, "learning_rate": 9.785977859778599e-06, "loss": 7.16671347618103e-06, "step": 317950 }, { "epoch": 90.25262560317911, "grad_norm": 0.001940127112902701, "learning_rate": 9.783139369855237e-06, "loss": 7.4997544288635256e-06, "step": 317960 }, { "epoch": 90.25546409310247, "grad_norm": 0.003112766658887267, "learning_rate": 9.780300879931877e-06, "loss": 6.9785863161087034e-06, "step": 317970 }, { "epoch": 90.25830258302582, "grad_norm": 0.0012606334639713168, "learning_rate": 9.777462390008516e-06, "loss": 5.4441392421722415e-06, "step": 317980 }, { "epoch": 90.26114107294919, "grad_norm": 0.0008631361997686327, "learning_rate": 9.774623900085156e-06, "loss": 6.88508152961731e-06, "step": 317990 }, { "epoch": 90.26397956287255, "grad_norm": 0.0012878183042630553, "learning_rate": 9.771785410161796e-06, "loss": 1.2184306979179382e-05, "step": 318000 }, { "epoch": 90.26397956287255, "eval_accuracy": 0.9883003751510142, "eval_loss": 0.0521424300968647, "eval_runtime": 40.3133, "eval_samples_per_second": 390.12, "eval_steps_per_second": 6.102, "step": 318000 }, { "epoch": 90.26681805279591, "grad_norm": 0.001337990164756775, "learning_rate": 9.768946920238434e-06, "loss": 7.96820968389511e-06, "step": 318010 }, { "epoch": 90.26965654271928, "grad_norm": 0.001282706274650991, "learning_rate": 9.766108430315073e-06, "loss": 8.368119597434998e-06, "step": 318020 }, { "epoch": 90.27249503264264, "grad_norm": 0.002840746659785509, "learning_rate": 9.763269940391711e-06, "loss": 4.936195909976959e-06, "step": 318030 }, { "epoch": 90.275333522566, "grad_norm": 0.0013808290241286159, "learning_rate": 9.760431450468353e-06, "loss": 7.7148899435997e-06, "step": 318040 }, { "epoch": 90.27817201248935, "grad_norm": 0.0007499393541365862, "learning_rate": 9.75759296054499e-06, "loss": 6.720423698425293e-06, "step": 318050 }, { "epoch": 90.28101050241271, "grad_norm": 0.0004587840521708131, "learning_rate": 9.75475447062163e-06, "loss": 7.616728544235229e-06, "step": 318060 }, { "epoch": 90.28384899233608, "grad_norm": 0.0014875781489536166, "learning_rate": 9.751915980698268e-06, "loss": 3.628060221672058e-06, "step": 318070 }, { "epoch": 90.28668748225944, "grad_norm": 0.08871176838874817, "learning_rate": 9.749077490774908e-06, "loss": 1.780446618795395e-05, "step": 318080 }, { "epoch": 90.2895259721828, "grad_norm": 0.008630898781120777, "learning_rate": 9.746239000851548e-06, "loss": 6.989948451519012e-06, "step": 318090 }, { "epoch": 90.29236446210616, "grad_norm": 0.003241139929741621, "learning_rate": 9.743400510928187e-06, "loss": 5.819089710712433e-06, "step": 318100 }, { "epoch": 90.29520295202953, "grad_norm": 0.0005604368634521961, "learning_rate": 9.740562021004825e-06, "loss": 6.533600389957428e-06, "step": 318110 }, { "epoch": 90.29804144195288, "grad_norm": 0.012654255144298077, "learning_rate": 9.737723531081465e-06, "loss": 5.62816858291626e-06, "step": 318120 }, { "epoch": 90.30087993187624, "grad_norm": 0.010882693342864513, "learning_rate": 9.734885041158105e-06, "loss": 8.127838373184205e-06, "step": 318130 }, { "epoch": 90.3037184217996, "grad_norm": 0.002469307277351618, "learning_rate": 9.732046551234744e-06, "loss": 1.2464821338653564e-05, "step": 318140 }, { "epoch": 90.30655691172296, "grad_norm": 0.005304149817675352, "learning_rate": 9.729208061311382e-06, "loss": 6.038881838321686e-06, "step": 318150 }, { "epoch": 90.30939540164633, "grad_norm": 0.0014243319164961576, "learning_rate": 9.726369571388022e-06, "loss": 4.952959716320038e-06, "step": 318160 }, { "epoch": 90.31223389156969, "grad_norm": 0.001723341178148985, "learning_rate": 9.723531081464662e-06, "loss": 1.3642385601997375e-05, "step": 318170 }, { "epoch": 90.31507238149304, "grad_norm": 0.004414220806211233, "learning_rate": 9.7206925915413e-06, "loss": 4.929117858409882e-06, "step": 318180 }, { "epoch": 90.3179108714164, "grad_norm": 0.001948392135091126, "learning_rate": 9.717854101617941e-06, "loss": 8.394569158554077e-06, "step": 318190 }, { "epoch": 90.32074936133976, "grad_norm": 0.004213250242173672, "learning_rate": 9.715015611694579e-06, "loss": 6.259232759475708e-06, "step": 318200 }, { "epoch": 90.32358785126313, "grad_norm": 0.0020006343256682158, "learning_rate": 9.712177121771219e-06, "loss": 4.128366708755493e-06, "step": 318210 }, { "epoch": 90.32642634118649, "grad_norm": 0.00025029786047525704, "learning_rate": 9.709338631847857e-06, "loss": 3.8513913750648495e-06, "step": 318220 }, { "epoch": 90.32926483110985, "grad_norm": 0.005411060526967049, "learning_rate": 9.706500141924496e-06, "loss": 1.3530626893043518e-05, "step": 318230 }, { "epoch": 90.33210332103322, "grad_norm": 0.0003069291415158659, "learning_rate": 9.703661652001136e-06, "loss": 8.531659841537475e-06, "step": 318240 }, { "epoch": 90.33494181095656, "grad_norm": 0.0008541252464056015, "learning_rate": 9.700823162077776e-06, "loss": 7.246434688568115e-06, "step": 318250 }, { "epoch": 90.33778030087993, "grad_norm": 0.0036274485755711794, "learning_rate": 9.697984672154414e-06, "loss": 6.278976798057556e-06, "step": 318260 }, { "epoch": 90.34061879080329, "grad_norm": 0.0038127279840409756, "learning_rate": 9.695146182231053e-06, "loss": 4.542805254459381e-06, "step": 318270 }, { "epoch": 90.34345728072665, "grad_norm": 0.0014859589282423258, "learning_rate": 9.692307692307691e-06, "loss": 4.1317194700241085e-06, "step": 318280 }, { "epoch": 90.34629577065002, "grad_norm": 0.003734937636181712, "learning_rate": 9.689469202384333e-06, "loss": 5.278550088405609e-06, "step": 318290 }, { "epoch": 90.34913426057338, "grad_norm": 0.0006338879466056824, "learning_rate": 9.68663071246097e-06, "loss": 5.13959676027298e-06, "step": 318300 }, { "epoch": 90.35197275049674, "grad_norm": 0.007479473482817411, "learning_rate": 9.68379222253761e-06, "loss": 6.204843521118164e-06, "step": 318310 }, { "epoch": 90.35481124042009, "grad_norm": 0.001212926348671317, "learning_rate": 9.68095373261425e-06, "loss": 8.954480290412902e-06, "step": 318320 }, { "epoch": 90.35764973034345, "grad_norm": 0.011732873506844044, "learning_rate": 9.678115242690888e-06, "loss": 6.09029084444046e-06, "step": 318330 }, { "epoch": 90.36048822026682, "grad_norm": 0.00035533608752302825, "learning_rate": 9.67527675276753e-06, "loss": 3.946572542190552e-06, "step": 318340 }, { "epoch": 90.36332671019018, "grad_norm": 0.0006855076644569635, "learning_rate": 9.672438262844167e-06, "loss": 4.5228749513626095e-06, "step": 318350 }, { "epoch": 90.36616520011354, "grad_norm": 0.0004119628865737468, "learning_rate": 9.669599772920807e-06, "loss": 2.825632691383362e-06, "step": 318360 }, { "epoch": 90.3690036900369, "grad_norm": 0.00022431004617828876, "learning_rate": 9.666761282997445e-06, "loss": 5.577504634857178e-06, "step": 318370 }, { "epoch": 90.37184217996027, "grad_norm": 0.0024055512621998787, "learning_rate": 9.663922793074085e-06, "loss": 7.535889744758606e-06, "step": 318380 }, { "epoch": 90.37468066988362, "grad_norm": 0.0010680562118068337, "learning_rate": 9.661084303150724e-06, "loss": 3.938376903533936e-06, "step": 318390 }, { "epoch": 90.37751915980698, "grad_norm": 0.0005147714982740581, "learning_rate": 9.658245813227364e-06, "loss": 8.361786603927612e-06, "step": 318400 }, { "epoch": 90.38035764973034, "grad_norm": 0.0017658056458458304, "learning_rate": 9.655407323304002e-06, "loss": 4.834681749343872e-06, "step": 318410 }, { "epoch": 90.3831961396537, "grad_norm": 0.000822636007796973, "learning_rate": 9.652568833380642e-06, "loss": 8.94051045179367e-06, "step": 318420 }, { "epoch": 90.38603462957707, "grad_norm": 0.0009568327805027366, "learning_rate": 9.64973034345728e-06, "loss": 5.428306758403778e-06, "step": 318430 }, { "epoch": 90.38887311950043, "grad_norm": 0.0004942422383464873, "learning_rate": 9.646891853533921e-06, "loss": 7.616542279720306e-06, "step": 318440 }, { "epoch": 90.39171160942378, "grad_norm": 0.005097091197967529, "learning_rate": 9.64405336361056e-06, "loss": 4.997104406356812e-06, "step": 318450 }, { "epoch": 90.39455009934714, "grad_norm": 0.004939120262861252, "learning_rate": 9.641214873687199e-06, "loss": 6.0230493545532225e-06, "step": 318460 }, { "epoch": 90.3973885892705, "grad_norm": 0.09364645183086395, "learning_rate": 9.638376383763839e-06, "loss": 1.2451224029064178e-05, "step": 318470 }, { "epoch": 90.40022707919387, "grad_norm": 0.0006125846994109452, "learning_rate": 9.635537893840477e-06, "loss": 5.81592321395874e-06, "step": 318480 }, { "epoch": 90.40306556911723, "grad_norm": 0.0012409535702317953, "learning_rate": 9.632699403917116e-06, "loss": 8.445046842098235e-06, "step": 318490 }, { "epoch": 90.4059040590406, "grad_norm": 0.0007015252485871315, "learning_rate": 9.629860913993756e-06, "loss": 2.9725953936576845e-06, "step": 318500 }, { "epoch": 90.4059040590406, "eval_accuracy": 0.9889362243275895, "eval_loss": 0.05051140859723091, "eval_runtime": 35.492, "eval_samples_per_second": 443.114, "eval_steps_per_second": 6.931, "step": 318500 }, { "epoch": 90.40874254896396, "grad_norm": 0.042747095227241516, "learning_rate": 9.627022424070396e-06, "loss": 8.69855284690857e-06, "step": 318510 }, { "epoch": 90.4115810388873, "grad_norm": 0.00046903465408831835, "learning_rate": 9.624183934147034e-06, "loss": 4.946254193782806e-06, "step": 318520 }, { "epoch": 90.41441952881067, "grad_norm": 0.0006458568386733532, "learning_rate": 9.621345444223673e-06, "loss": 1.0581314563751221e-05, "step": 318530 }, { "epoch": 90.41725801873403, "grad_norm": 0.0015603514621034265, "learning_rate": 9.618506954300313e-06, "loss": 5.893968045711518e-06, "step": 318540 }, { "epoch": 90.4200965086574, "grad_norm": 0.0017314550932496786, "learning_rate": 9.615668464376953e-06, "loss": 4.626810550689697e-06, "step": 318550 }, { "epoch": 90.42293499858076, "grad_norm": 0.0004246596363373101, "learning_rate": 9.61282997445359e-06, "loss": 6.395764648914337e-06, "step": 318560 }, { "epoch": 90.42577348850412, "grad_norm": 0.0010112780146300793, "learning_rate": 9.60999148453023e-06, "loss": 3.823824226856232e-06, "step": 318570 }, { "epoch": 90.42861197842748, "grad_norm": 0.005853999871760607, "learning_rate": 9.60715299460687e-06, "loss": 9.817257523536683e-06, "step": 318580 }, { "epoch": 90.43145046835083, "grad_norm": 0.00035860284697264433, "learning_rate": 9.60431450468351e-06, "loss": 6.921961903572082e-06, "step": 318590 }, { "epoch": 90.4342889582742, "grad_norm": 0.0015474142273887992, "learning_rate": 9.601476014760148e-06, "loss": 1.017153263092041e-05, "step": 318600 }, { "epoch": 90.43712744819756, "grad_norm": 0.020040860399603844, "learning_rate": 9.598637524836787e-06, "loss": 8.008070290088654e-06, "step": 318610 }, { "epoch": 90.43996593812092, "grad_norm": 0.001794368145056069, "learning_rate": 9.595799034913425e-06, "loss": 5.65890222787857e-06, "step": 318620 }, { "epoch": 90.44280442804428, "grad_norm": 0.0018833428621292114, "learning_rate": 9.592960544990067e-06, "loss": 1.7301365733146668e-05, "step": 318630 }, { "epoch": 90.44564291796765, "grad_norm": 0.00039112602826207876, "learning_rate": 9.590122055066705e-06, "loss": 5.459226667881012e-06, "step": 318640 }, { "epoch": 90.448481407891, "grad_norm": 0.0015409851912409067, "learning_rate": 9.587283565143344e-06, "loss": 1.0733678936958314e-05, "step": 318650 }, { "epoch": 90.45131989781436, "grad_norm": 0.000932433526031673, "learning_rate": 9.584445075219984e-06, "loss": 2.2452138364315032e-05, "step": 318660 }, { "epoch": 90.45415838773772, "grad_norm": 0.006681604776531458, "learning_rate": 9.581606585296622e-06, "loss": 8.243881165981292e-06, "step": 318670 }, { "epoch": 90.45699687766108, "grad_norm": 0.0022453961428254843, "learning_rate": 9.578768095373263e-06, "loss": 7.493793964385986e-06, "step": 318680 }, { "epoch": 90.45983536758445, "grad_norm": 0.000602632062509656, "learning_rate": 9.575929605449901e-06, "loss": 6.36465847492218e-06, "step": 318690 }, { "epoch": 90.46267385750781, "grad_norm": 0.0027504917234182358, "learning_rate": 9.573091115526541e-06, "loss": 7.115304470062256e-06, "step": 318700 }, { "epoch": 90.46551234743117, "grad_norm": 0.0007821351755410433, "learning_rate": 9.570252625603179e-06, "loss": 4.578568041324615e-06, "step": 318710 }, { "epoch": 90.46835083735452, "grad_norm": 0.0030016626697033644, "learning_rate": 9.567414135679819e-06, "loss": 2.0129792392253876e-05, "step": 318720 }, { "epoch": 90.47118932727788, "grad_norm": 0.00014886562712490559, "learning_rate": 9.564575645756458e-06, "loss": 5.948916077613831e-06, "step": 318730 }, { "epoch": 90.47402781720125, "grad_norm": 0.00307990494184196, "learning_rate": 9.561737155833098e-06, "loss": 8.421391248703003e-06, "step": 318740 }, { "epoch": 90.47686630712461, "grad_norm": 0.00024724812828935683, "learning_rate": 9.558898665909736e-06, "loss": 4.650279879570007e-06, "step": 318750 }, { "epoch": 90.47970479704797, "grad_norm": 0.0029185169842094183, "learning_rate": 9.556060175986376e-06, "loss": 5.98616898059845e-06, "step": 318760 }, { "epoch": 90.48254328697134, "grad_norm": 0.0007416486623696983, "learning_rate": 9.553221686063014e-06, "loss": 4.645064473152161e-06, "step": 318770 }, { "epoch": 90.4853817768947, "grad_norm": 0.0009091224637813866, "learning_rate": 9.550383196139655e-06, "loss": 3.6174431443214415e-06, "step": 318780 }, { "epoch": 90.48822026681805, "grad_norm": 0.019107218831777573, "learning_rate": 9.547544706216293e-06, "loss": 7.006712257862091e-06, "step": 318790 }, { "epoch": 90.49105875674141, "grad_norm": 0.002045863540843129, "learning_rate": 9.544706216292933e-06, "loss": 4.728883504867554e-06, "step": 318800 }, { "epoch": 90.49389724666477, "grad_norm": 0.0007864145445637405, "learning_rate": 9.541867726369572e-06, "loss": 3.5664066672325135e-06, "step": 318810 }, { "epoch": 90.49673573658814, "grad_norm": 0.0025044246576726437, "learning_rate": 9.53902923644621e-06, "loss": 1.894347369670868e-05, "step": 318820 }, { "epoch": 90.4995742265115, "grad_norm": 0.0004142374382354319, "learning_rate": 9.53619074652285e-06, "loss": 1.5029497444629669e-05, "step": 318830 }, { "epoch": 90.50241271643486, "grad_norm": 0.0008678470621816814, "learning_rate": 9.53335225659949e-06, "loss": 9.919703006744385e-06, "step": 318840 }, { "epoch": 90.50525120635822, "grad_norm": 0.005337024573236704, "learning_rate": 9.53051376667613e-06, "loss": 6.6764652729034426e-06, "step": 318850 }, { "epoch": 90.50808969628157, "grad_norm": 0.0011867244029417634, "learning_rate": 9.527675276752767e-06, "loss": 5.008094012737274e-06, "step": 318860 }, { "epoch": 90.51092818620494, "grad_norm": 0.0016742174047976732, "learning_rate": 9.524836786829407e-06, "loss": 3.85921448469162e-06, "step": 318870 }, { "epoch": 90.5137666761283, "grad_norm": 0.008164183236658573, "learning_rate": 9.521998296906047e-06, "loss": 4.147924482822418e-06, "step": 318880 }, { "epoch": 90.51660516605166, "grad_norm": 0.0007912483415566385, "learning_rate": 9.519159806982686e-06, "loss": 5.031190812587738e-06, "step": 318890 }, { "epoch": 90.51944365597502, "grad_norm": 0.0010396450525149703, "learning_rate": 9.516321317059324e-06, "loss": 8.661486208438874e-06, "step": 318900 }, { "epoch": 90.52228214589839, "grad_norm": 0.0005829329020343721, "learning_rate": 9.513482827135964e-06, "loss": 4.304759204387665e-06, "step": 318910 }, { "epoch": 90.52512063582174, "grad_norm": 0.0003006323822773993, "learning_rate": 9.510644337212602e-06, "loss": 5.559064447879791e-06, "step": 318920 }, { "epoch": 90.5279591257451, "grad_norm": 0.002193249762058258, "learning_rate": 9.507805847289244e-06, "loss": 7.438100874423981e-06, "step": 318930 }, { "epoch": 90.53079761566846, "grad_norm": 0.00041169486939907074, "learning_rate": 9.504967357365882e-06, "loss": 1.0582059621810913e-05, "step": 318940 }, { "epoch": 90.53363610559182, "grad_norm": 0.0014408479910343885, "learning_rate": 9.502128867442521e-06, "loss": 3.5338103771209718e-06, "step": 318950 }, { "epoch": 90.53647459551519, "grad_norm": 0.0005051092593930662, "learning_rate": 9.49929037751916e-06, "loss": 9.72263514995575e-06, "step": 318960 }, { "epoch": 90.53931308543855, "grad_norm": 0.0004854606813751161, "learning_rate": 9.496451887595799e-06, "loss": 4.421919584274292e-06, "step": 318970 }, { "epoch": 90.54215157536191, "grad_norm": 0.0022345008328557014, "learning_rate": 9.493613397672439e-06, "loss": 8.914805948734284e-06, "step": 318980 }, { "epoch": 90.54499006528526, "grad_norm": 0.03031230717897415, "learning_rate": 9.490774907749078e-06, "loss": 1.2926198542118072e-05, "step": 318990 }, { "epoch": 90.54782855520862, "grad_norm": 0.0013549791183322668, "learning_rate": 9.487936417825718e-06, "loss": 6.016716361045837e-06, "step": 319000 }, { "epoch": 90.54782855520862, "eval_accuracy": 0.9889362243275895, "eval_loss": 0.049708012491464615, "eval_runtime": 35.7228, "eval_samples_per_second": 440.251, "eval_steps_per_second": 6.886, "step": 319000 }, { "epoch": 90.55066704513199, "grad_norm": 0.00032813817961141467, "learning_rate": 9.485097927902356e-06, "loss": 3.800541162490845e-06, "step": 319010 }, { "epoch": 90.55350553505535, "grad_norm": 0.0005288123502396047, "learning_rate": 9.482259437978996e-06, "loss": 5.7300552725791935e-06, "step": 319020 }, { "epoch": 90.55634402497871, "grad_norm": 0.0003199858474545181, "learning_rate": 9.479420948055635e-06, "loss": 7.013604044914246e-06, "step": 319030 }, { "epoch": 90.55918251490208, "grad_norm": 0.0012678095372393727, "learning_rate": 9.476582458132275e-06, "loss": 3.618001937866211e-06, "step": 319040 }, { "epoch": 90.56202100482544, "grad_norm": 0.0069732824340462685, "learning_rate": 9.473743968208913e-06, "loss": 5.910173058509827e-06, "step": 319050 }, { "epoch": 90.56485949474879, "grad_norm": 0.0004087402194272727, "learning_rate": 9.470905478285553e-06, "loss": 5.242042243480683e-06, "step": 319060 }, { "epoch": 90.56769798467215, "grad_norm": 0.0021525274496525526, "learning_rate": 9.46806698836219e-06, "loss": 5.399063229560852e-06, "step": 319070 }, { "epoch": 90.57053647459551, "grad_norm": 0.00028686740552075207, "learning_rate": 9.465228498438832e-06, "loss": 6.110034883022309e-06, "step": 319080 }, { "epoch": 90.57337496451888, "grad_norm": 0.0033334922045469284, "learning_rate": 9.46239000851547e-06, "loss": 5.578063428401947e-06, "step": 319090 }, { "epoch": 90.57621345444224, "grad_norm": 0.003711601486429572, "learning_rate": 9.45955151859211e-06, "loss": 6.507895886898041e-06, "step": 319100 }, { "epoch": 90.5790519443656, "grad_norm": 0.0006209599669091403, "learning_rate": 9.456713028668748e-06, "loss": 7.573515176773071e-06, "step": 319110 }, { "epoch": 90.58189043428897, "grad_norm": 0.0004486391553655267, "learning_rate": 9.453874538745387e-06, "loss": 6.523542106151581e-06, "step": 319120 }, { "epoch": 90.58472892421231, "grad_norm": 0.0006757582887075841, "learning_rate": 9.451036048822027e-06, "loss": 5.724094808101654e-06, "step": 319130 }, { "epoch": 90.58756741413568, "grad_norm": 0.0010280284332111478, "learning_rate": 9.448197558898667e-06, "loss": 4.024989902973175e-06, "step": 319140 }, { "epoch": 90.59040590405904, "grad_norm": 0.00034488795790821314, "learning_rate": 9.445359068975305e-06, "loss": 6.0264021158218385e-06, "step": 319150 }, { "epoch": 90.5932443939824, "grad_norm": 0.0012173205614089966, "learning_rate": 9.442520579051944e-06, "loss": 4.524365067481995e-06, "step": 319160 }, { "epoch": 90.59608288390577, "grad_norm": 0.004741427954286337, "learning_rate": 9.439682089128584e-06, "loss": 3.6630779504776e-06, "step": 319170 }, { "epoch": 90.59892137382913, "grad_norm": 0.0006341140251606703, "learning_rate": 9.436843599205224e-06, "loss": 6.833299994468689e-06, "step": 319180 }, { "epoch": 90.60175986375248, "grad_norm": 0.0007455778541043401, "learning_rate": 9.434005109281863e-06, "loss": 5.347281694412232e-06, "step": 319190 }, { "epoch": 90.60459835367584, "grad_norm": 0.026769623160362244, "learning_rate": 9.431166619358501e-06, "loss": 6.340444087982178e-06, "step": 319200 }, { "epoch": 90.6074368435992, "grad_norm": 0.00037210588925518095, "learning_rate": 9.428328129435141e-06, "loss": 4.469789564609527e-06, "step": 319210 }, { "epoch": 90.61027533352257, "grad_norm": 0.0016838315641507506, "learning_rate": 9.42548963951178e-06, "loss": 4.951469600200653e-06, "step": 319220 }, { "epoch": 90.61311382344593, "grad_norm": 0.0020428074058145285, "learning_rate": 9.42265114958842e-06, "loss": 4.073791205883026e-06, "step": 319230 }, { "epoch": 90.61595231336929, "grad_norm": 0.0023900731466710567, "learning_rate": 9.419812659665058e-06, "loss": 5.716085433959961e-06, "step": 319240 }, { "epoch": 90.61879080329265, "grad_norm": 0.001379223307594657, "learning_rate": 9.416974169741698e-06, "loss": 4.946812987327576e-06, "step": 319250 }, { "epoch": 90.621629293216, "grad_norm": 0.0012966175563633442, "learning_rate": 9.414135679818336e-06, "loss": 3.5706907510757446e-06, "step": 319260 }, { "epoch": 90.62446778313937, "grad_norm": 0.0002398059150436893, "learning_rate": 9.411297189894977e-06, "loss": 4.809536039829254e-06, "step": 319270 }, { "epoch": 90.62730627306273, "grad_norm": 0.0006555065629072487, "learning_rate": 9.408458699971615e-06, "loss": 5.750730633735657e-06, "step": 319280 }, { "epoch": 90.63014476298609, "grad_norm": 0.0029431942384690046, "learning_rate": 9.405620210048255e-06, "loss": 8.589588105678558e-06, "step": 319290 }, { "epoch": 90.63298325290945, "grad_norm": 0.003921828232705593, "learning_rate": 9.402781720124893e-06, "loss": 9.665638208389282e-06, "step": 319300 }, { "epoch": 90.63582174283282, "grad_norm": 0.0004483428783714771, "learning_rate": 9.399943230201533e-06, "loss": 7.17993825674057e-06, "step": 319310 }, { "epoch": 90.63866023275618, "grad_norm": 0.0010869529796764255, "learning_rate": 9.397104740278172e-06, "loss": 2.3910403251647948e-05, "step": 319320 }, { "epoch": 90.64149872267953, "grad_norm": 0.004087910521775484, "learning_rate": 9.394266250354812e-06, "loss": 9.109079837799072e-06, "step": 319330 }, { "epoch": 90.64433721260289, "grad_norm": 0.0009226797265000641, "learning_rate": 9.391427760431452e-06, "loss": 4.501640796661377e-06, "step": 319340 }, { "epoch": 90.64717570252625, "grad_norm": 0.00413259444758296, "learning_rate": 9.38858927050809e-06, "loss": 4.941225051879883e-06, "step": 319350 }, { "epoch": 90.65001419244962, "grad_norm": 0.007451266050338745, "learning_rate": 9.38575078058473e-06, "loss": 5.381181836128235e-06, "step": 319360 }, { "epoch": 90.65285268237298, "grad_norm": 0.0007958784117363393, "learning_rate": 9.382912290661369e-06, "loss": 6.909854710102081e-06, "step": 319370 }, { "epoch": 90.65569117229634, "grad_norm": 0.0022720317356288433, "learning_rate": 9.380073800738009e-06, "loss": 5.611218512058258e-06, "step": 319380 }, { "epoch": 90.65852966221969, "grad_norm": 0.000826647796202451, "learning_rate": 9.377235310814647e-06, "loss": 5.9586018323898315e-06, "step": 319390 }, { "epoch": 90.66136815214305, "grad_norm": 0.0010461013298481703, "learning_rate": 9.374396820891286e-06, "loss": 7.36825168132782e-06, "step": 319400 }, { "epoch": 90.66420664206642, "grad_norm": 0.001577617833390832, "learning_rate": 9.371558330967924e-06, "loss": 7.897987961769104e-06, "step": 319410 }, { "epoch": 90.66704513198978, "grad_norm": 0.004681139253079891, "learning_rate": 9.368719841044566e-06, "loss": 9.57474112510681e-06, "step": 319420 }, { "epoch": 90.66988362191314, "grad_norm": 0.0027643723879009485, "learning_rate": 9.365881351121204e-06, "loss": 4.702433943748474e-06, "step": 319430 }, { "epoch": 90.6727221118365, "grad_norm": 0.00315206334926188, "learning_rate": 9.363042861197844e-06, "loss": 7.832422852516174e-06, "step": 319440 }, { "epoch": 90.67556060175987, "grad_norm": 0.004401044920086861, "learning_rate": 9.360204371274482e-06, "loss": 4.165060818195343e-06, "step": 319450 }, { "epoch": 90.67839909168322, "grad_norm": 0.001243213890120387, "learning_rate": 9.357365881351121e-06, "loss": 4.47537750005722e-06, "step": 319460 }, { "epoch": 90.68123758160658, "grad_norm": 0.0012476606061682105, "learning_rate": 9.354527391427761e-06, "loss": 4.327297210693359e-06, "step": 319470 }, { "epoch": 90.68407607152994, "grad_norm": 0.0009075477137230337, "learning_rate": 9.3516889015044e-06, "loss": 4.6422705054283145e-06, "step": 319480 }, { "epoch": 90.6869145614533, "grad_norm": 0.0014868986327201128, "learning_rate": 9.348850411581039e-06, "loss": 5.296990275382996e-06, "step": 319490 }, { "epoch": 90.68975305137667, "grad_norm": 0.0017609854694455862, "learning_rate": 9.346011921657678e-06, "loss": 8.675083518028259e-06, "step": 319500 }, { "epoch": 90.68975305137667, "eval_accuracy": 0.9891269790805621, "eval_loss": 0.04895860701799393, "eval_runtime": 36.2199, "eval_samples_per_second": 434.209, "eval_steps_per_second": 6.792, "step": 319500 }, { "epoch": 90.69259154130003, "grad_norm": 0.0004667155153583735, "learning_rate": 9.343173431734318e-06, "loss": 4.604831337928772e-06, "step": 319510 }, { "epoch": 90.6954300312234, "grad_norm": 0.0010724901221692562, "learning_rate": 9.340334941810958e-06, "loss": 5.909614264965057e-06, "step": 319520 }, { "epoch": 90.69826852114674, "grad_norm": 0.0021696470212191343, "learning_rate": 9.337496451887597e-06, "loss": 4.926510155200958e-06, "step": 319530 }, { "epoch": 90.7011070110701, "grad_norm": 0.0007531318115070462, "learning_rate": 9.334657961964235e-06, "loss": 3.22684645652771e-06, "step": 319540 }, { "epoch": 90.70394550099347, "grad_norm": 0.0026793687138706446, "learning_rate": 9.331819472040875e-06, "loss": 6.6928565502166745e-06, "step": 319550 }, { "epoch": 90.70678399091683, "grad_norm": 0.0016875551082193851, "learning_rate": 9.328980982117513e-06, "loss": 8.20942223072052e-06, "step": 319560 }, { "epoch": 90.7096224808402, "grad_norm": 0.0057641868479549885, "learning_rate": 9.326142492194154e-06, "loss": 2.831779420375824e-06, "step": 319570 }, { "epoch": 90.71246097076356, "grad_norm": 0.0007851409609429538, "learning_rate": 9.323304002270792e-06, "loss": 4.54094260931015e-06, "step": 319580 }, { "epoch": 90.71529946068692, "grad_norm": 0.0012806809972971678, "learning_rate": 9.320465512347432e-06, "loss": 5.262903869152069e-06, "step": 319590 }, { "epoch": 90.71813795061027, "grad_norm": 0.000779672700446099, "learning_rate": 9.31762702242407e-06, "loss": 3.5909935832023622e-06, "step": 319600 }, { "epoch": 90.72097644053363, "grad_norm": 0.0015075489645823836, "learning_rate": 9.31478853250071e-06, "loss": 4.3870881199836734e-06, "step": 319610 }, { "epoch": 90.723814930457, "grad_norm": 0.0002399305085418746, "learning_rate": 9.31195004257735e-06, "loss": 3.520213067531586e-06, "step": 319620 }, { "epoch": 90.72665342038036, "grad_norm": 0.0003595634188968688, "learning_rate": 9.309111552653989e-06, "loss": 4.4817104935646055e-06, "step": 319630 }, { "epoch": 90.72949191030372, "grad_norm": 0.0022480899933725595, "learning_rate": 9.306273062730627e-06, "loss": 4.09204512834549e-06, "step": 319640 }, { "epoch": 90.73233040022708, "grad_norm": 0.0018852089997380972, "learning_rate": 9.303434572807267e-06, "loss": 7.252767682075501e-06, "step": 319650 }, { "epoch": 90.73516889015043, "grad_norm": 0.006377308629453182, "learning_rate": 9.300596082883906e-06, "loss": 5.952082574367523e-06, "step": 319660 }, { "epoch": 90.7380073800738, "grad_norm": 0.008496018126606941, "learning_rate": 9.297757592960546e-06, "loss": 4.93861734867096e-06, "step": 319670 }, { "epoch": 90.74084586999716, "grad_norm": 0.00018978959997184575, "learning_rate": 9.294919103037186e-06, "loss": 3.5766512155532838e-06, "step": 319680 }, { "epoch": 90.74368435992052, "grad_norm": 0.012862449511885643, "learning_rate": 9.292080613113824e-06, "loss": 5.306303501129151e-06, "step": 319690 }, { "epoch": 90.74652284984388, "grad_norm": 0.001816120813600719, "learning_rate": 9.289242123190463e-06, "loss": 6.317161023616791e-06, "step": 319700 }, { "epoch": 90.74936133976725, "grad_norm": 0.0015019765123724937, "learning_rate": 9.286403633267101e-06, "loss": 1.660473644733429e-05, "step": 319710 }, { "epoch": 90.75219982969061, "grad_norm": 0.0012253251625224948, "learning_rate": 9.283565143343743e-06, "loss": 4.668906331062317e-06, "step": 319720 }, { "epoch": 90.75503831961396, "grad_norm": 0.00011007692955899984, "learning_rate": 9.28072665342038e-06, "loss": 3.319364041090012e-05, "step": 319730 }, { "epoch": 90.75787680953732, "grad_norm": 0.003130599856376648, "learning_rate": 9.27788816349702e-06, "loss": 8.767098188400269e-06, "step": 319740 }, { "epoch": 90.76071529946068, "grad_norm": 0.007600302342325449, "learning_rate": 9.275049673573658e-06, "loss": 6.715022027492523e-06, "step": 319750 }, { "epoch": 90.76355378938405, "grad_norm": 0.08316408842802048, "learning_rate": 9.272211183650298e-06, "loss": 2.733338624238968e-05, "step": 319760 }, { "epoch": 90.76639227930741, "grad_norm": 0.0013741828734055161, "learning_rate": 9.269372693726938e-06, "loss": 4.99337911605835e-06, "step": 319770 }, { "epoch": 90.76923076923077, "grad_norm": 0.00025633155019022524, "learning_rate": 9.266534203803577e-06, "loss": 7.306784391403199e-06, "step": 319780 }, { "epoch": 90.77206925915414, "grad_norm": 0.0003734456258825958, "learning_rate": 9.263695713880215e-06, "loss": 6.7390501499176025e-06, "step": 319790 }, { "epoch": 90.77490774907749, "grad_norm": 0.00047414834261871874, "learning_rate": 9.260857223956855e-06, "loss": 1.4636479318141937e-05, "step": 319800 }, { "epoch": 90.77774623900085, "grad_norm": 0.0012950599193572998, "learning_rate": 9.258018734033495e-06, "loss": 6.909668445587158e-06, "step": 319810 }, { "epoch": 90.78058472892421, "grad_norm": 0.0007233095238916576, "learning_rate": 9.255180244110134e-06, "loss": 5.582906305789948e-06, "step": 319820 }, { "epoch": 90.78342321884757, "grad_norm": 9.028738713823259e-05, "learning_rate": 9.252341754186772e-06, "loss": 5.6535005569458004e-06, "step": 319830 }, { "epoch": 90.78626170877094, "grad_norm": 0.0022693525534123182, "learning_rate": 9.249503264263412e-06, "loss": 5.116127431392669e-06, "step": 319840 }, { "epoch": 90.7891001986943, "grad_norm": 0.0008992493385449052, "learning_rate": 9.246664774340052e-06, "loss": 4.472583532333374e-06, "step": 319850 }, { "epoch": 90.79193868861765, "grad_norm": 0.0002923636056948453, "learning_rate": 9.24382628441669e-06, "loss": 6.146356463432312e-06, "step": 319860 }, { "epoch": 90.79477717854101, "grad_norm": 0.005091371946036816, "learning_rate": 9.240987794493331e-06, "loss": 8.368678390979766e-06, "step": 319870 }, { "epoch": 90.79761566846437, "grad_norm": 0.0015903337625786662, "learning_rate": 9.238149304569969e-06, "loss": 4.496052861213684e-06, "step": 319880 }, { "epoch": 90.80045415838774, "grad_norm": 0.000531826342921704, "learning_rate": 9.235310814646609e-06, "loss": 7.568672299385071e-06, "step": 319890 }, { "epoch": 90.8032926483111, "grad_norm": 0.0012699563521891832, "learning_rate": 9.232472324723247e-06, "loss": 4.021823406219482e-06, "step": 319900 }, { "epoch": 90.80613113823446, "grad_norm": 0.0007411862025037408, "learning_rate": 9.229633834799888e-06, "loss": 7.676146924495698e-06, "step": 319910 }, { "epoch": 90.80896962815783, "grad_norm": 0.007607660256326199, "learning_rate": 9.226795344876526e-06, "loss": 7.505901157855987e-06, "step": 319920 }, { "epoch": 90.81180811808117, "grad_norm": 0.002156661357730627, "learning_rate": 9.223956854953166e-06, "loss": 7.625669240951538e-06, "step": 319930 }, { "epoch": 90.81464660800454, "grad_norm": 0.0005876894574612379, "learning_rate": 9.221118365029804e-06, "loss": 4.726089537143708e-06, "step": 319940 }, { "epoch": 90.8174850979279, "grad_norm": 0.0004960348596796393, "learning_rate": 9.218279875106444e-06, "loss": 3.898143768310547e-06, "step": 319950 }, { "epoch": 90.82032358785126, "grad_norm": 0.004051062744110823, "learning_rate": 9.215441385183083e-06, "loss": 6.60829246044159e-06, "step": 319960 }, { "epoch": 90.82316207777463, "grad_norm": 0.0009347064769826829, "learning_rate": 9.212602895259723e-06, "loss": 7.0907175540924076e-06, "step": 319970 }, { "epoch": 90.82600056769799, "grad_norm": 0.003054648404940963, "learning_rate": 9.209764405336361e-06, "loss": 5.538016557693481e-06, "step": 319980 }, { "epoch": 90.82883905762135, "grad_norm": 0.006642343010753393, "learning_rate": 9.206925915413e-06, "loss": 3.897584974765778e-06, "step": 319990 }, { "epoch": 90.8316775475447, "grad_norm": 0.0005063122371211648, "learning_rate": 9.20408742548964e-06, "loss": 4.460103809833527e-06, "step": 320000 }, { "epoch": 90.8316775475447, "eval_accuracy": 0.9891269790805621, "eval_loss": 0.04947369918227196, "eval_runtime": 35.9372, "eval_samples_per_second": 437.625, "eval_steps_per_second": 6.845, "step": 320000 }, { "epoch": 90.83451603746806, "grad_norm": 0.0043680714443326, "learning_rate": 9.20124893556628e-06, "loss": 6.27562403678894e-06, "step": 320010 }, { "epoch": 90.83735452739143, "grad_norm": 0.010191643610596657, "learning_rate": 9.19841044564292e-06, "loss": 7.703155279159547e-06, "step": 320020 }, { "epoch": 90.84019301731479, "grad_norm": 0.004786066245287657, "learning_rate": 9.195571955719558e-06, "loss": 4.613585770130158e-06, "step": 320030 }, { "epoch": 90.84303150723815, "grad_norm": 0.0004319227591622621, "learning_rate": 9.192733465796197e-06, "loss": 4.55956906080246e-06, "step": 320040 }, { "epoch": 90.84586999716151, "grad_norm": 0.020459283143281937, "learning_rate": 9.189894975872835e-06, "loss": 6.396509706974029e-06, "step": 320050 }, { "epoch": 90.84870848708488, "grad_norm": 0.004488603677600622, "learning_rate": 9.187056485949477e-06, "loss": 6.374344229698181e-06, "step": 320060 }, { "epoch": 90.85154697700823, "grad_norm": 0.00035064606345258653, "learning_rate": 9.184217996026115e-06, "loss": 3.2238662242889403e-06, "step": 320070 }, { "epoch": 90.85438546693159, "grad_norm": 0.0005314349546097219, "learning_rate": 9.181379506102754e-06, "loss": 5.174241960048676e-06, "step": 320080 }, { "epoch": 90.85722395685495, "grad_norm": 0.0008042226545512676, "learning_rate": 9.178541016179392e-06, "loss": 3.3931806683540345e-06, "step": 320090 }, { "epoch": 90.86006244677831, "grad_norm": 0.0025334213860332966, "learning_rate": 9.175702526256032e-06, "loss": 9.386427700519562e-06, "step": 320100 }, { "epoch": 90.86290093670168, "grad_norm": 0.0003456811828073114, "learning_rate": 9.172864036332672e-06, "loss": 2.765469253063202e-06, "step": 320110 }, { "epoch": 90.86573942662504, "grad_norm": 0.0008694902062416077, "learning_rate": 9.170025546409311e-06, "loss": 4.848837852478027e-06, "step": 320120 }, { "epoch": 90.86857791654839, "grad_norm": 0.0013094128808006644, "learning_rate": 9.16718705648595e-06, "loss": 3.6811456084251403e-06, "step": 320130 }, { "epoch": 90.87141640647175, "grad_norm": 0.0026790776755660772, "learning_rate": 9.164348566562589e-06, "loss": 3.7079676985740662e-06, "step": 320140 }, { "epoch": 90.87425489639512, "grad_norm": 0.06473205983638763, "learning_rate": 9.161510076639229e-06, "loss": 1.6726553440093994e-05, "step": 320150 }, { "epoch": 90.87709338631848, "grad_norm": 0.001268070307560265, "learning_rate": 9.158671586715868e-06, "loss": 3.6600977182388306e-06, "step": 320160 }, { "epoch": 90.87993187624184, "grad_norm": 0.018601777032017708, "learning_rate": 9.155833096792506e-06, "loss": 7.2658061981201175e-06, "step": 320170 }, { "epoch": 90.8827703661652, "grad_norm": 0.0030709425918757915, "learning_rate": 9.152994606869146e-06, "loss": 6.949529051780701e-06, "step": 320180 }, { "epoch": 90.88560885608857, "grad_norm": 0.0005287099629640579, "learning_rate": 9.150156116945786e-06, "loss": 3.2702460885047913e-06, "step": 320190 }, { "epoch": 90.88844734601192, "grad_norm": 0.0016764224274083972, "learning_rate": 9.147317627022424e-06, "loss": 5.9569254517555235e-06, "step": 320200 }, { "epoch": 90.89128583593528, "grad_norm": 0.0012207872932776809, "learning_rate": 9.144479137099065e-06, "loss": 4.022009670734406e-06, "step": 320210 }, { "epoch": 90.89412432585864, "grad_norm": 0.001122268382459879, "learning_rate": 9.141640647175703e-06, "loss": 4.636123776435852e-06, "step": 320220 }, { "epoch": 90.896962815782, "grad_norm": 0.0004897493054158986, "learning_rate": 9.138802157252343e-06, "loss": 3.3698976039886475e-06, "step": 320230 }, { "epoch": 90.89980130570537, "grad_norm": 0.0069510373286902905, "learning_rate": 9.13596366732898e-06, "loss": 6.447359919548035e-06, "step": 320240 }, { "epoch": 90.90263979562873, "grad_norm": 0.0041700997389853, "learning_rate": 9.13312517740562e-06, "loss": 4.497729241847992e-06, "step": 320250 }, { "epoch": 90.90547828555209, "grad_norm": 0.002544223330914974, "learning_rate": 9.13028668748226e-06, "loss": 4.2015686631202694e-06, "step": 320260 }, { "epoch": 90.90831677547544, "grad_norm": 0.0001189161921502091, "learning_rate": 9.1274481975589e-06, "loss": 4.832260310649872e-06, "step": 320270 }, { "epoch": 90.9111552653988, "grad_norm": 0.007275553420186043, "learning_rate": 9.124609707635538e-06, "loss": 6.157904863357544e-06, "step": 320280 }, { "epoch": 90.91399375532217, "grad_norm": 0.0008453530608676374, "learning_rate": 9.121771217712177e-06, "loss": 1.3944879174232484e-05, "step": 320290 }, { "epoch": 90.91683224524553, "grad_norm": 0.003666355274617672, "learning_rate": 9.118932727788815e-06, "loss": 6.200186908245086e-06, "step": 320300 }, { "epoch": 90.91967073516889, "grad_norm": 0.0011882811086252332, "learning_rate": 9.116094237865457e-06, "loss": 4.773028194904327e-06, "step": 320310 }, { "epoch": 90.92250922509226, "grad_norm": 0.001128196599893272, "learning_rate": 9.113255747942095e-06, "loss": 5.239807069301605e-06, "step": 320320 }, { "epoch": 90.92534771501562, "grad_norm": 0.006533155217766762, "learning_rate": 9.110417258018734e-06, "loss": 6.570667028427124e-06, "step": 320330 }, { "epoch": 90.92818620493897, "grad_norm": 0.0021783937700092793, "learning_rate": 9.107578768095374e-06, "loss": 3.640018403530121e-05, "step": 320340 }, { "epoch": 90.93102469486233, "grad_norm": 0.006566681899130344, "learning_rate": 9.104740278172012e-06, "loss": 3.344360738992691e-05, "step": 320350 }, { "epoch": 90.9338631847857, "grad_norm": 0.008518017828464508, "learning_rate": 9.101901788248653e-06, "loss": 6.993114948272705e-06, "step": 320360 }, { "epoch": 90.93670167470906, "grad_norm": 0.008535880595445633, "learning_rate": 9.099063298325291e-06, "loss": 2.4600885808467865e-05, "step": 320370 }, { "epoch": 90.93954016463242, "grad_norm": 0.001329826540313661, "learning_rate": 9.096224808401931e-06, "loss": 1.2953579425811767e-05, "step": 320380 }, { "epoch": 90.94237865455578, "grad_norm": 0.003766178386285901, "learning_rate": 9.093386318478569e-06, "loss": 9.963475167751313e-06, "step": 320390 }, { "epoch": 90.94521714447913, "grad_norm": 0.012188106775283813, "learning_rate": 9.090547828555209e-06, "loss": 7.374212145805359e-06, "step": 320400 }, { "epoch": 90.9480556344025, "grad_norm": 0.004825073294341564, "learning_rate": 9.087709338631848e-06, "loss": 4.667975008487701e-06, "step": 320410 }, { "epoch": 90.95089412432586, "grad_norm": 0.0002035627403529361, "learning_rate": 9.084870848708488e-06, "loss": 7.317587733268738e-06, "step": 320420 }, { "epoch": 90.95373261424922, "grad_norm": 0.0014697761507704854, "learning_rate": 9.082032358785126e-06, "loss": 4.032999277114868e-06, "step": 320430 }, { "epoch": 90.95657110417258, "grad_norm": 0.00104274891782552, "learning_rate": 9.079193868861766e-06, "loss": 6.625615060329437e-06, "step": 320440 }, { "epoch": 90.95940959409594, "grad_norm": 0.0010348609648644924, "learning_rate": 9.076355378938404e-06, "loss": 6.774254143238068e-06, "step": 320450 }, { "epoch": 90.96224808401931, "grad_norm": 0.0003693052858579904, "learning_rate": 9.073516889015045e-06, "loss": 5.969032645225525e-06, "step": 320460 }, { "epoch": 90.96508657394266, "grad_norm": 0.0011828788556158543, "learning_rate": 9.070678399091683e-06, "loss": 5.435757339000702e-06, "step": 320470 }, { "epoch": 90.96792506386602, "grad_norm": 0.0030705940444022417, "learning_rate": 9.067839909168323e-06, "loss": 7.3660165071487425e-06, "step": 320480 }, { "epoch": 90.97076355378938, "grad_norm": 0.004584712907671928, "learning_rate": 9.065001419244963e-06, "loss": 0.0008455989882349968, "step": 320490 }, { "epoch": 90.97360204371275, "grad_norm": 0.13769303262233734, "learning_rate": 9.062446778313937e-06, "loss": 0.0012281576171517372, "step": 320500 }, { "epoch": 90.97360204371275, "eval_accuracy": 0.9823869778088637, "eval_loss": 0.08974659442901611, "eval_runtime": 36.3121, "eval_samples_per_second": 433.106, "eval_steps_per_second": 6.775, "step": 320500 }, { "epoch": 90.97644053363611, "grad_norm": 0.0012784067075699568, "learning_rate": 9.059892137382912e-06, "loss": 0.013317729532718658, "step": 320510 }, { "epoch": 90.97927902355947, "grad_norm": 0.08983325213193893, "learning_rate": 9.057053647459552e-06, "loss": 2.6288628578186036e-05, "step": 320520 }, { "epoch": 90.98211751348283, "grad_norm": 0.0007261669379658997, "learning_rate": 9.054215157536192e-06, "loss": 0.0022673336789011955, "step": 320530 }, { "epoch": 90.98495600340618, "grad_norm": 0.007171903736889362, "learning_rate": 9.051376667612831e-06, "loss": 1.8740259110927582e-05, "step": 320540 }, { "epoch": 90.98779449332955, "grad_norm": 0.0027768979780375957, "learning_rate": 9.04853817768947e-06, "loss": 0.001483019068837166, "step": 320550 }, { "epoch": 90.99063298325291, "grad_norm": 0.0007395141874440014, "learning_rate": 9.045699687766109e-06, "loss": 2.3564137518405914e-05, "step": 320560 }, { "epoch": 90.99347147317627, "grad_norm": 0.0024578250013291836, "learning_rate": 9.042861197842747e-06, "loss": 4.091169685125351e-05, "step": 320570 }, { "epoch": 90.99630996309963, "grad_norm": 0.01064610667526722, "learning_rate": 9.040022707919388e-06, "loss": 5.061421543359756e-05, "step": 320580 }, { "epoch": 90.999148453023, "grad_norm": 0.0003635003522504121, "learning_rate": 9.037184217996026e-06, "loss": 1.3417378067970275e-05, "step": 320590 }, { "epoch": 91.00198694294635, "grad_norm": 0.0013751396909356117, "learning_rate": 9.034345728072666e-06, "loss": 8.164871542248874e-06, "step": 320600 }, { "epoch": 91.00482543286971, "grad_norm": 0.0022095420863479376, "learning_rate": 9.031507238149304e-06, "loss": 2.346392720937729e-05, "step": 320610 }, { "epoch": 91.00766392279307, "grad_norm": 0.0004884665831923485, "learning_rate": 9.028668748225944e-06, "loss": 3.6191195249557495e-06, "step": 320620 }, { "epoch": 91.01050241271643, "grad_norm": 0.0011174074606969953, "learning_rate": 9.025830258302583e-06, "loss": 8.216500282287597e-06, "step": 320630 }, { "epoch": 91.0133409026398, "grad_norm": 0.001122451270930469, "learning_rate": 9.022991768379223e-06, "loss": 1.4514289796352387e-05, "step": 320640 }, { "epoch": 91.01617939256316, "grad_norm": 0.0007625747821293771, "learning_rate": 9.020153278455861e-06, "loss": 9.63546335697174e-06, "step": 320650 }, { "epoch": 91.01901788248652, "grad_norm": 0.0004196568625047803, "learning_rate": 9.0173147885325e-06, "loss": 9.122677147388458e-06, "step": 320660 }, { "epoch": 91.02185637240987, "grad_norm": 0.0004667830653488636, "learning_rate": 9.01447629860914e-06, "loss": 5.872547626495362e-06, "step": 320670 }, { "epoch": 91.02469486233323, "grad_norm": 0.0024622599594295025, "learning_rate": 9.01163780868578e-06, "loss": 6.407313048839569e-06, "step": 320680 }, { "epoch": 91.0275333522566, "grad_norm": 0.0005911489715799689, "learning_rate": 9.00879931876242e-06, "loss": 9.940173476934433e-05, "step": 320690 }, { "epoch": 91.03037184217996, "grad_norm": 0.01849624700844288, "learning_rate": 9.005960828839058e-06, "loss": 0.0008855808526277542, "step": 320700 }, { "epoch": 91.03321033210332, "grad_norm": 0.0005324551602825522, "learning_rate": 9.003122338915697e-06, "loss": 6.520748138427735e-06, "step": 320710 }, { "epoch": 91.03604882202669, "grad_norm": 0.0003760850813705474, "learning_rate": 9.000283848992337e-06, "loss": 4.925765097141266e-06, "step": 320720 }, { "epoch": 91.03888731195005, "grad_norm": 0.0004415800212882459, "learning_rate": 8.997445359068977e-06, "loss": 7.489323616027832e-06, "step": 320730 }, { "epoch": 91.0417258018734, "grad_norm": 0.004504286218434572, "learning_rate": 8.994606869145615e-06, "loss": 8.578971028327942e-06, "step": 320740 }, { "epoch": 91.04456429179676, "grad_norm": 0.0026588859036564827, "learning_rate": 8.991768379222254e-06, "loss": 3.7824735045433045e-06, "step": 320750 }, { "epoch": 91.04740278172012, "grad_norm": 0.002842533402144909, "learning_rate": 8.988929889298892e-06, "loss": 1.3680756092071534e-05, "step": 320760 }, { "epoch": 91.05024127164349, "grad_norm": 0.0004281063156668097, "learning_rate": 8.986091399375534e-06, "loss": 3.318674862384796e-05, "step": 320770 }, { "epoch": 91.05307976156685, "grad_norm": 0.0014728367095813155, "learning_rate": 8.983252909452172e-06, "loss": 5.825608968734741e-06, "step": 320780 }, { "epoch": 91.05591825149021, "grad_norm": 0.00948195531964302, "learning_rate": 8.980414419528811e-06, "loss": 8.466467261314391e-06, "step": 320790 }, { "epoch": 91.05875674141357, "grad_norm": 0.0006807458121329546, "learning_rate": 8.97757592960545e-06, "loss": 5.617178976535797e-06, "step": 320800 }, { "epoch": 91.06159523133692, "grad_norm": 0.0014820534270256758, "learning_rate": 8.974737439682089e-06, "loss": 5.959905683994293e-06, "step": 320810 }, { "epoch": 91.06443372126029, "grad_norm": 0.001229007262736559, "learning_rate": 8.971898949758729e-06, "loss": 1.0436959564685822e-05, "step": 320820 }, { "epoch": 91.06727221118365, "grad_norm": 0.005335635039955378, "learning_rate": 8.969060459835369e-06, "loss": 7.111765444278717e-06, "step": 320830 }, { "epoch": 91.07011070110701, "grad_norm": 0.000993691966868937, "learning_rate": 8.966221969912006e-06, "loss": 6.696954369544983e-06, "step": 320840 }, { "epoch": 91.07294919103038, "grad_norm": 0.0004727965279016644, "learning_rate": 8.963383479988646e-06, "loss": 3.696605563163757e-06, "step": 320850 }, { "epoch": 91.07578768095374, "grad_norm": 0.0013441090704873204, "learning_rate": 8.960544990065286e-06, "loss": 4.368647933006287e-06, "step": 320860 }, { "epoch": 91.07862617087709, "grad_norm": 0.0007101708906702697, "learning_rate": 8.957706500141926e-06, "loss": 5.037896335124969e-06, "step": 320870 }, { "epoch": 91.08146466080045, "grad_norm": 0.0014130247291177511, "learning_rate": 8.954868010218565e-06, "loss": 8.670613169670105e-06, "step": 320880 }, { "epoch": 91.08430315072381, "grad_norm": 0.0014903857372701168, "learning_rate": 8.952029520295203e-06, "loss": 9.301118552684784e-06, "step": 320890 }, { "epoch": 91.08714164064718, "grad_norm": 0.0009677480557002127, "learning_rate": 8.949191030371843e-06, "loss": 6.046518683433532e-06, "step": 320900 }, { "epoch": 91.08998013057054, "grad_norm": 0.001173372264020145, "learning_rate": 8.946352540448481e-06, "loss": 6.167590618133545e-06, "step": 320910 }, { "epoch": 91.0928186204939, "grad_norm": 0.0007209109026007354, "learning_rate": 8.943514050525122e-06, "loss": 1.2013502418994903e-05, "step": 320920 }, { "epoch": 91.09565711041726, "grad_norm": 0.0014819060452282429, "learning_rate": 8.94067556060176e-06, "loss": 7.406063377857208e-06, "step": 320930 }, { "epoch": 91.09849560034061, "grad_norm": 0.000896514393389225, "learning_rate": 8.9378370706784e-06, "loss": 5.457550287246704e-06, "step": 320940 }, { "epoch": 91.10133409026398, "grad_norm": 0.00045484406291507185, "learning_rate": 8.934998580755038e-06, "loss": 4.237890243530273e-06, "step": 320950 }, { "epoch": 91.10417258018734, "grad_norm": 0.00021105678752064705, "learning_rate": 8.932160090831678e-06, "loss": 2.1423958241939544e-05, "step": 320960 }, { "epoch": 91.1070110701107, "grad_norm": 0.0010511936852708459, "learning_rate": 8.929321600908317e-06, "loss": 6.641633808612823e-06, "step": 320970 }, { "epoch": 91.10984956003406, "grad_norm": 0.0018346892902627587, "learning_rate": 8.926483110984957e-06, "loss": 1.336168497800827e-05, "step": 320980 }, { "epoch": 91.11268804995743, "grad_norm": 0.0005139787099324167, "learning_rate": 8.923644621061595e-06, "loss": 6.828084588050843e-06, "step": 320990 }, { "epoch": 91.11552653988079, "grad_norm": 0.004181351978331804, "learning_rate": 8.920806131138235e-06, "loss": 7.0156529545784e-06, "step": 321000 }, { "epoch": 91.11552653988079, "eval_accuracy": 0.9886818846569594, "eval_loss": 0.04910823702812195, "eval_runtime": 36.9889, "eval_samples_per_second": 425.181, "eval_steps_per_second": 6.651, "step": 321000 }, { "epoch": 91.11836502980414, "grad_norm": 0.0007767081842757761, "learning_rate": 8.917967641214874e-06, "loss": 3.803707659244537e-06, "step": 321010 }, { "epoch": 91.1212035197275, "grad_norm": 0.003462463151663542, "learning_rate": 8.915129151291514e-06, "loss": 5.961582064628601e-06, "step": 321020 }, { "epoch": 91.12404200965086, "grad_norm": 0.001082119531929493, "learning_rate": 8.912290661368154e-06, "loss": 4.966557025909424e-06, "step": 321030 }, { "epoch": 91.12688049957423, "grad_norm": 0.0010697016259655356, "learning_rate": 8.909452171444792e-06, "loss": 4.148297011852264e-06, "step": 321040 }, { "epoch": 91.12971898949759, "grad_norm": 0.0003235284239053726, "learning_rate": 8.906613681521431e-06, "loss": 4.5247375965118405e-06, "step": 321050 }, { "epoch": 91.13255747942095, "grad_norm": 0.0005609767395071685, "learning_rate": 8.90377519159807e-06, "loss": 5.4391101002693175e-06, "step": 321060 }, { "epoch": 91.13539596934432, "grad_norm": 0.000624361855443567, "learning_rate": 8.90093670167471e-06, "loss": 6.1744824051857e-06, "step": 321070 }, { "epoch": 91.13823445926766, "grad_norm": 0.0005273892893455923, "learning_rate": 8.898098211751349e-06, "loss": 9.586289525032044e-06, "step": 321080 }, { "epoch": 91.14107294919103, "grad_norm": 0.008357610553503036, "learning_rate": 8.895259721827988e-06, "loss": 4.279986023902893e-06, "step": 321090 }, { "epoch": 91.14391143911439, "grad_norm": 0.0012882242444902658, "learning_rate": 8.892421231904626e-06, "loss": 6.165914237499237e-06, "step": 321100 }, { "epoch": 91.14674992903775, "grad_norm": 0.0015845729503780603, "learning_rate": 8.889582741981266e-06, "loss": 4.502758383750916e-06, "step": 321110 }, { "epoch": 91.14958841896112, "grad_norm": 0.001295876340009272, "learning_rate": 8.886744252057906e-06, "loss": 4.760921001434326e-06, "step": 321120 }, { "epoch": 91.15242690888448, "grad_norm": 0.0009649799903854728, "learning_rate": 8.883905762134545e-06, "loss": 1.0557658970355988e-05, "step": 321130 }, { "epoch": 91.15526539880783, "grad_norm": 0.00040385249303653836, "learning_rate": 8.881067272211183e-06, "loss": 1.9067712128162383e-05, "step": 321140 }, { "epoch": 91.15810388873119, "grad_norm": 0.0017410841537639499, "learning_rate": 8.878228782287823e-06, "loss": 5.5514276027679445e-06, "step": 321150 }, { "epoch": 91.16094237865455, "grad_norm": 0.0010849529644474387, "learning_rate": 8.875390292364463e-06, "loss": 8.925795555114746e-06, "step": 321160 }, { "epoch": 91.16378086857792, "grad_norm": 0.0005798225174658, "learning_rate": 8.872551802441102e-06, "loss": 4.697497934103012e-05, "step": 321170 }, { "epoch": 91.16661935850128, "grad_norm": 0.0011762278154492378, "learning_rate": 8.86971331251774e-06, "loss": 6.523914635181427e-06, "step": 321180 }, { "epoch": 91.16945784842464, "grad_norm": 0.00037682027323171496, "learning_rate": 8.86687482259438e-06, "loss": 5.8103352785110475e-06, "step": 321190 }, { "epoch": 91.172296338348, "grad_norm": 0.0014770722482353449, "learning_rate": 8.86403633267102e-06, "loss": 1.6617774963378907e-05, "step": 321200 }, { "epoch": 91.17513482827135, "grad_norm": 0.00034781082649715245, "learning_rate": 8.861197842747658e-06, "loss": 1.1560693383216859e-05, "step": 321210 }, { "epoch": 91.17797331819472, "grad_norm": 0.0005461248219944537, "learning_rate": 8.858359352824299e-06, "loss": 4.30028885602951e-06, "step": 321220 }, { "epoch": 91.18081180811808, "grad_norm": 0.0014091191114857793, "learning_rate": 8.855520862900937e-06, "loss": 5.4353848099708555e-06, "step": 321230 }, { "epoch": 91.18365029804144, "grad_norm": 0.0009637982584536076, "learning_rate": 8.852682372977577e-06, "loss": 5.652196705341339e-06, "step": 321240 }, { "epoch": 91.1864887879648, "grad_norm": 0.0003257761418353766, "learning_rate": 8.849843883054215e-06, "loss": 3.4538842737674716e-05, "step": 321250 }, { "epoch": 91.18932727788817, "grad_norm": 0.0009073839755728841, "learning_rate": 8.847005393130854e-06, "loss": 4.107505083084106e-06, "step": 321260 }, { "epoch": 91.19216576781153, "grad_norm": 0.0009824383305385709, "learning_rate": 8.844166903207494e-06, "loss": 6.3261017203331e-06, "step": 321270 }, { "epoch": 91.19500425773488, "grad_norm": 0.0005166733171790838, "learning_rate": 8.841328413284134e-06, "loss": 5.992315709590912e-06, "step": 321280 }, { "epoch": 91.19784274765824, "grad_norm": 0.000997477094642818, "learning_rate": 8.838489923360772e-06, "loss": 3.475695848464966e-06, "step": 321290 }, { "epoch": 91.2006812375816, "grad_norm": 0.0006370740011334419, "learning_rate": 8.835651433437411e-06, "loss": 4.555657505989075e-06, "step": 321300 }, { "epoch": 91.20351972750497, "grad_norm": 0.00024551694514229894, "learning_rate": 8.832812943514051e-06, "loss": 5.897320806980133e-06, "step": 321310 }, { "epoch": 91.20635821742833, "grad_norm": 0.0006373130599968135, "learning_rate": 8.82997445359069e-06, "loss": 3.931112587451935e-06, "step": 321320 }, { "epoch": 91.2091967073517, "grad_norm": 0.0020955177024006844, "learning_rate": 8.827135963667329e-06, "loss": 1.81000679731369e-05, "step": 321330 }, { "epoch": 91.21203519727504, "grad_norm": 0.0005391423474065959, "learning_rate": 8.824297473743969e-06, "loss": 6.709247827529907e-06, "step": 321340 }, { "epoch": 91.2148736871984, "grad_norm": 0.001148305251263082, "learning_rate": 8.821458983820608e-06, "loss": 4.756450653076172e-06, "step": 321350 }, { "epoch": 91.21771217712177, "grad_norm": 0.0008699496975168586, "learning_rate": 8.818620493897248e-06, "loss": 8.468329906463622e-06, "step": 321360 }, { "epoch": 91.22055066704513, "grad_norm": 0.0006494115223176777, "learning_rate": 8.815782003973888e-06, "loss": 5.928240716457367e-06, "step": 321370 }, { "epoch": 91.2233891569685, "grad_norm": 0.0005013931659050286, "learning_rate": 8.812943514050526e-06, "loss": 2.7626752853393556e-06, "step": 321380 }, { "epoch": 91.22622764689186, "grad_norm": 0.0004235372762195766, "learning_rate": 8.810105024127165e-06, "loss": 4.8924237489700316e-06, "step": 321390 }, { "epoch": 91.22906613681522, "grad_norm": 0.0002668067754711956, "learning_rate": 8.807266534203803e-06, "loss": 3.036297857761383e-06, "step": 321400 }, { "epoch": 91.23190462673857, "grad_norm": 0.0008450369350612164, "learning_rate": 8.804428044280445e-06, "loss": 3.353133797645569e-06, "step": 321410 }, { "epoch": 91.23474311666193, "grad_norm": 0.002128488617017865, "learning_rate": 8.801589554357083e-06, "loss": 4.0069222450256344e-06, "step": 321420 }, { "epoch": 91.2375816065853, "grad_norm": 0.005500203929841518, "learning_rate": 8.798751064433722e-06, "loss": 7.983483374118806e-06, "step": 321430 }, { "epoch": 91.24042009650866, "grad_norm": 0.001240285113453865, "learning_rate": 8.79591257451036e-06, "loss": 5.675293505191803e-06, "step": 321440 }, { "epoch": 91.24325858643202, "grad_norm": 0.00019583696848712862, "learning_rate": 8.793074084587e-06, "loss": 4.3960288166999815e-06, "step": 321450 }, { "epoch": 91.24609707635538, "grad_norm": 0.001408106298185885, "learning_rate": 8.79023559466364e-06, "loss": 7.845647633075715e-06, "step": 321460 }, { "epoch": 91.24893556627875, "grad_norm": 0.0010390589013695717, "learning_rate": 8.78739710474028e-06, "loss": 6.467103958129883e-06, "step": 321470 }, { "epoch": 91.2517740562021, "grad_norm": 0.0019335208926349878, "learning_rate": 8.784558614816917e-06, "loss": 5.350261926651001e-06, "step": 321480 }, { "epoch": 91.25461254612546, "grad_norm": 0.0006134691066108644, "learning_rate": 8.781720124893557e-06, "loss": 4.0959566831588745e-06, "step": 321490 }, { "epoch": 91.25745103604882, "grad_norm": 0.0011305139632895589, "learning_rate": 8.778881634970197e-06, "loss": 3.596581518650055e-06, "step": 321500 }, { "epoch": 91.25745103604882, "eval_accuracy": 0.9889362243275895, "eval_loss": 0.04816732183098793, "eval_runtime": 35.8959, "eval_samples_per_second": 438.128, "eval_steps_per_second": 6.853, "step": 321500 }, { "epoch": 91.26028952597218, "grad_norm": 0.0005641428870148957, "learning_rate": 8.776043145046836e-06, "loss": 3.39113175868988e-06, "step": 321510 }, { "epoch": 91.26312801589555, "grad_norm": 0.0020334834698587656, "learning_rate": 8.773204655123474e-06, "loss": 8.944980800151825e-06, "step": 321520 }, { "epoch": 91.26596650581891, "grad_norm": 0.00035044466494582593, "learning_rate": 8.770366165200114e-06, "loss": 4.725158214569092e-06, "step": 321530 }, { "epoch": 91.26880499574227, "grad_norm": 0.0014995341189205647, "learning_rate": 8.767527675276754e-06, "loss": 6.335042417049408e-06, "step": 321540 }, { "epoch": 91.27164348566562, "grad_norm": 0.000761816743761301, "learning_rate": 8.764689185353392e-06, "loss": 8.441880345344543e-06, "step": 321550 }, { "epoch": 91.27448197558898, "grad_norm": 0.00024331118038389832, "learning_rate": 8.761850695430033e-06, "loss": 2.7302652597427367e-06, "step": 321560 }, { "epoch": 91.27732046551235, "grad_norm": 0.0004674404044635594, "learning_rate": 8.759012205506671e-06, "loss": 3.6483630537986756e-06, "step": 321570 }, { "epoch": 91.28015895543571, "grad_norm": 0.0007931857835501432, "learning_rate": 8.75617371558331e-06, "loss": 4.192814230918884e-06, "step": 321580 }, { "epoch": 91.28299744535907, "grad_norm": 0.0019443635828793049, "learning_rate": 8.753335225659949e-06, "loss": 9.387172758579254e-06, "step": 321590 }, { "epoch": 91.28583593528244, "grad_norm": 0.004334188997745514, "learning_rate": 8.750496735736588e-06, "loss": 9.344704449176788e-06, "step": 321600 }, { "epoch": 91.28867442520578, "grad_norm": 0.0011069854954257607, "learning_rate": 8.747658245813228e-06, "loss": 7.867813110351562e-06, "step": 321610 }, { "epoch": 91.29151291512915, "grad_norm": 0.009839259088039398, "learning_rate": 8.744819755889868e-06, "loss": 1.7402507364749908e-05, "step": 321620 }, { "epoch": 91.29435140505251, "grad_norm": 0.00033945858012884855, "learning_rate": 8.741981265966506e-06, "loss": 4.7283247113227844e-06, "step": 321630 }, { "epoch": 91.29718989497587, "grad_norm": 0.0005404246621765196, "learning_rate": 8.739142776043145e-06, "loss": 3.528594970703125e-06, "step": 321640 }, { "epoch": 91.30002838489924, "grad_norm": 0.0014593458035960793, "learning_rate": 8.736304286119783e-06, "loss": 7.112696766853332e-06, "step": 321650 }, { "epoch": 91.3028668748226, "grad_norm": 0.00427362322807312, "learning_rate": 8.733465796196425e-06, "loss": 3.6980956792831423e-06, "step": 321660 }, { "epoch": 91.30570536474596, "grad_norm": 0.00023398801567964256, "learning_rate": 8.730627306273063e-06, "loss": 7.075443863868713e-06, "step": 321670 }, { "epoch": 91.30854385466931, "grad_norm": 0.0064650289714336395, "learning_rate": 8.727788816349702e-06, "loss": 5.564652383327484e-06, "step": 321680 }, { "epoch": 91.31138234459267, "grad_norm": 0.0031379542779177427, "learning_rate": 8.724950326426342e-06, "loss": 4.0531158447265625e-06, "step": 321690 }, { "epoch": 91.31422083451604, "grad_norm": 0.003680621972307563, "learning_rate": 8.72211183650298e-06, "loss": 5.175359547138214e-06, "step": 321700 }, { "epoch": 91.3170593244394, "grad_norm": 0.0002503220457583666, "learning_rate": 8.71927334657962e-06, "loss": 4.149042069911957e-06, "step": 321710 }, { "epoch": 91.31989781436276, "grad_norm": 0.00029632775112986565, "learning_rate": 8.71643485665626e-06, "loss": 5.515478551387787e-06, "step": 321720 }, { "epoch": 91.32273630428612, "grad_norm": 0.0005851022433489561, "learning_rate": 8.713596366732899e-06, "loss": 7.52285122871399e-06, "step": 321730 }, { "epoch": 91.32557479420949, "grad_norm": 0.0012401733547449112, "learning_rate": 8.710757876809537e-06, "loss": 4.303641617298126e-06, "step": 321740 }, { "epoch": 91.32841328413284, "grad_norm": 0.006820217240601778, "learning_rate": 8.707919386886177e-06, "loss": 5.191192030906677e-06, "step": 321750 }, { "epoch": 91.3312517740562, "grad_norm": 0.0036365282721817493, "learning_rate": 8.705080896962816e-06, "loss": 3.3095479011535644e-06, "step": 321760 }, { "epoch": 91.33409026397956, "grad_norm": 0.0010543542448431253, "learning_rate": 8.702242407039456e-06, "loss": 4.5103952288627625e-06, "step": 321770 }, { "epoch": 91.33692875390292, "grad_norm": 0.0003119587490800768, "learning_rate": 8.699403917116094e-06, "loss": 4.445388913154602e-06, "step": 321780 }, { "epoch": 91.33976724382629, "grad_norm": 0.0006887170020490885, "learning_rate": 8.696565427192734e-06, "loss": 3.954768180847168e-06, "step": 321790 }, { "epoch": 91.34260573374965, "grad_norm": 0.00010471702262293547, "learning_rate": 8.693726937269372e-06, "loss": 4.773028194904327e-06, "step": 321800 }, { "epoch": 91.34544422367301, "grad_norm": 0.002457423834130168, "learning_rate": 8.690888447346013e-06, "loss": 4.480965435504913e-06, "step": 321810 }, { "epoch": 91.34828271359636, "grad_norm": 0.0014372454024851322, "learning_rate": 8.688049957422651e-06, "loss": 5.470030009746551e-06, "step": 321820 }, { "epoch": 91.35112120351972, "grad_norm": 0.0005923726712353528, "learning_rate": 8.68521146749929e-06, "loss": 3.0703842639923097e-06, "step": 321830 }, { "epoch": 91.35395969344309, "grad_norm": 0.003941499162465334, "learning_rate": 8.682372977575929e-06, "loss": 6.881356239318848e-06, "step": 321840 }, { "epoch": 91.35679818336645, "grad_norm": 0.0004732822999358177, "learning_rate": 8.679534487652569e-06, "loss": 3.995746374130249e-06, "step": 321850 }, { "epoch": 91.35963667328981, "grad_norm": 0.0018581682816147804, "learning_rate": 8.676695997729208e-06, "loss": 5.585886538028717e-06, "step": 321860 }, { "epoch": 91.36247516321318, "grad_norm": 0.0019182501127943397, "learning_rate": 8.673857507805848e-06, "loss": 6.3499435782432554e-06, "step": 321870 }, { "epoch": 91.36531365313652, "grad_norm": 0.001901275129057467, "learning_rate": 8.671019017882488e-06, "loss": 3.5805627703666686e-06, "step": 321880 }, { "epoch": 91.36815214305989, "grad_norm": 0.0007831915863789618, "learning_rate": 8.668180527959126e-06, "loss": 4.2708590626716615e-06, "step": 321890 }, { "epoch": 91.37099063298325, "grad_norm": 0.000670874782372266, "learning_rate": 8.665342038035765e-06, "loss": 4.037283360958099e-06, "step": 321900 }, { "epoch": 91.37382912290661, "grad_norm": 0.01522192731499672, "learning_rate": 8.662503548112405e-06, "loss": 5.87906688451767e-06, "step": 321910 }, { "epoch": 91.37666761282998, "grad_norm": 0.0018685709219425917, "learning_rate": 8.659665058189045e-06, "loss": 4.415363073348999e-05, "step": 321920 }, { "epoch": 91.37950610275334, "grad_norm": 0.0005105888121761382, "learning_rate": 8.656826568265683e-06, "loss": 3.923662006855011e-06, "step": 321930 }, { "epoch": 91.3823445926767, "grad_norm": 0.001611256506294012, "learning_rate": 8.653988078342322e-06, "loss": 4.21963632106781e-06, "step": 321940 }, { "epoch": 91.38518308260005, "grad_norm": 0.0012254253961145878, "learning_rate": 8.65114958841896e-06, "loss": 0.000202057883143425, "step": 321950 }, { "epoch": 91.38802157252341, "grad_norm": 0.003133252263069153, "learning_rate": 8.648311098495602e-06, "loss": 2.1649524569511414e-05, "step": 321960 }, { "epoch": 91.39086006244678, "grad_norm": 0.0006442510639317334, "learning_rate": 8.64547260857224e-06, "loss": 0.0006333822384476662, "step": 321970 }, { "epoch": 91.39369855237014, "grad_norm": 0.0019216288346797228, "learning_rate": 8.64263411864888e-06, "loss": 7.5571238994598385e-06, "step": 321980 }, { "epoch": 91.3965370422935, "grad_norm": 0.00048227468505501747, "learning_rate": 8.639795628725517e-06, "loss": 1.333281397819519e-05, "step": 321990 }, { "epoch": 91.39937553221687, "grad_norm": 0.004995061084628105, "learning_rate": 8.636957138802159e-06, "loss": 6.355345249176025e-06, "step": 322000 }, { "epoch": 91.39937553221687, "eval_accuracy": 0.9878552807274115, "eval_loss": 0.05044759809970856, "eval_runtime": 35.6847, "eval_samples_per_second": 440.721, "eval_steps_per_second": 6.894, "step": 322000 }, { "epoch": 91.40221402214023, "grad_norm": 0.008128768764436245, "learning_rate": 8.634118648878797e-06, "loss": 7.873214781284332e-06, "step": 322010 }, { "epoch": 91.40505251206358, "grad_norm": 0.00303576304577291, "learning_rate": 8.631280158955436e-06, "loss": 1.4053285121917725e-05, "step": 322020 }, { "epoch": 91.40789100198694, "grad_norm": 0.0003613258886616677, "learning_rate": 8.628441669032076e-06, "loss": 1.6075931489467622e-05, "step": 322030 }, { "epoch": 91.4107294919103, "grad_norm": 0.0006657799240201712, "learning_rate": 8.625603179108714e-06, "loss": 5.300901830196381e-06, "step": 322040 }, { "epoch": 91.41356798183367, "grad_norm": 0.00044147338485345244, "learning_rate": 8.622764689185354e-06, "loss": 5.31710684299469e-06, "step": 322050 }, { "epoch": 91.41640647175703, "grad_norm": 0.001733236713334918, "learning_rate": 8.619926199261993e-06, "loss": 1.2599490582942963e-05, "step": 322060 }, { "epoch": 91.41924496168039, "grad_norm": 0.0014505516737699509, "learning_rate": 8.617087709338633e-06, "loss": 9.161978960037232e-06, "step": 322070 }, { "epoch": 91.42208345160374, "grad_norm": 0.000977586372755468, "learning_rate": 8.614249219415271e-06, "loss": 7.524527609348297e-06, "step": 322080 }, { "epoch": 91.4249219415271, "grad_norm": 0.008951100520789623, "learning_rate": 8.61141072949191e-06, "loss": 8.90679657459259e-06, "step": 322090 }, { "epoch": 91.42776043145047, "grad_norm": 0.005104293581098318, "learning_rate": 8.60857223956855e-06, "loss": 6.107427179813385e-06, "step": 322100 }, { "epoch": 91.43059892137383, "grad_norm": 0.0022189710289239883, "learning_rate": 8.60573374964519e-06, "loss": 7.705576717853545e-06, "step": 322110 }, { "epoch": 91.43343741129719, "grad_norm": 0.000807846081443131, "learning_rate": 8.602895259721828e-06, "loss": 5.4176896810531615e-06, "step": 322120 }, { "epoch": 91.43627590122055, "grad_norm": 0.001499396050348878, "learning_rate": 8.600056769798468e-06, "loss": 7.115118205547333e-06, "step": 322130 }, { "epoch": 91.43911439114392, "grad_norm": 0.00044698224519379437, "learning_rate": 8.597218279875106e-06, "loss": 9.717047214508057e-06, "step": 322140 }, { "epoch": 91.44195288106727, "grad_norm": 0.3547096848487854, "learning_rate": 8.594379789951747e-06, "loss": 4.184041172266007e-05, "step": 322150 }, { "epoch": 91.44479137099063, "grad_norm": 0.0021938574500381947, "learning_rate": 8.591541300028385e-06, "loss": 8.644163608551026e-06, "step": 322160 }, { "epoch": 91.44762986091399, "grad_norm": 0.00036279449705034494, "learning_rate": 8.588702810105025e-06, "loss": 5.196947604417801e-05, "step": 322170 }, { "epoch": 91.45046835083735, "grad_norm": 0.0044080098159611225, "learning_rate": 8.585864320181663e-06, "loss": 4.6569854021072384e-06, "step": 322180 }, { "epoch": 91.45330684076072, "grad_norm": 0.0009649339481256902, "learning_rate": 8.583025830258302e-06, "loss": 6.6997483372688295e-06, "step": 322190 }, { "epoch": 91.45614533068408, "grad_norm": 0.0029037524946033955, "learning_rate": 8.580187340334942e-06, "loss": 6.029196083545685e-06, "step": 322200 }, { "epoch": 91.45898382060744, "grad_norm": 0.0011065322905778885, "learning_rate": 8.577348850411582e-06, "loss": 6.209313869476318e-06, "step": 322210 }, { "epoch": 91.46182231053079, "grad_norm": 0.0005116595420986414, "learning_rate": 8.574510360488221e-06, "loss": 3.7373974919319153e-06, "step": 322220 }, { "epoch": 91.46466080045415, "grad_norm": 0.0006699746591039002, "learning_rate": 8.57167187056486e-06, "loss": 3.7128105759620666e-06, "step": 322230 }, { "epoch": 91.46749929037752, "grad_norm": 0.0014547689352184534, "learning_rate": 8.568833380641499e-06, "loss": 5.464442074298858e-06, "step": 322240 }, { "epoch": 91.47033778030088, "grad_norm": 0.000385303923394531, "learning_rate": 8.565994890718139e-06, "loss": 4.696473479270935e-06, "step": 322250 }, { "epoch": 91.47317627022424, "grad_norm": 0.0006305139977484941, "learning_rate": 8.563156400794778e-06, "loss": 4.4388696551322935e-06, "step": 322260 }, { "epoch": 91.4760147601476, "grad_norm": 0.0008377672056667507, "learning_rate": 8.560317910871416e-06, "loss": 4.7175213694572445e-06, "step": 322270 }, { "epoch": 91.47885325007097, "grad_norm": 0.0027377631049603224, "learning_rate": 8.557479420948056e-06, "loss": 4.649162292480469e-06, "step": 322280 }, { "epoch": 91.48169173999432, "grad_norm": 0.0008290958940051496, "learning_rate": 8.554640931024694e-06, "loss": 4.293210804462433e-06, "step": 322290 }, { "epoch": 91.48453022991768, "grad_norm": 0.002065168460831046, "learning_rate": 8.551802441101336e-06, "loss": 5.980394780635833e-06, "step": 322300 }, { "epoch": 91.48736871984104, "grad_norm": 0.00021665336680598557, "learning_rate": 8.548963951177973e-06, "loss": 1.006443053483963e-05, "step": 322310 }, { "epoch": 91.4902072097644, "grad_norm": 0.005367807578295469, "learning_rate": 8.546125461254613e-06, "loss": 6.678141653537751e-06, "step": 322320 }, { "epoch": 91.49304569968777, "grad_norm": 0.0013965243706479669, "learning_rate": 8.543286971331251e-06, "loss": 5.630403757095337e-06, "step": 322330 }, { "epoch": 91.49588418961113, "grad_norm": 0.0009012984810397029, "learning_rate": 8.54044848140789e-06, "loss": 7.2142109274864195e-06, "step": 322340 }, { "epoch": 91.49872267953448, "grad_norm": 0.008588344790041447, "learning_rate": 8.53760999148453e-06, "loss": 7.453002035617829e-06, "step": 322350 }, { "epoch": 91.50156116945784, "grad_norm": 0.004399767145514488, "learning_rate": 8.53477150156117e-06, "loss": 2.8388574719429014e-06, "step": 322360 }, { "epoch": 91.5043996593812, "grad_norm": 0.0004669646732509136, "learning_rate": 8.53193301163781e-06, "loss": 2.787075936794281e-06, "step": 322370 }, { "epoch": 91.50723814930457, "grad_norm": 0.0006287303986027837, "learning_rate": 8.529094521714448e-06, "loss": 5.5890530347824095e-06, "step": 322380 }, { "epoch": 91.51007663922793, "grad_norm": 0.0013275534147396684, "learning_rate": 8.526256031791088e-06, "loss": 4.464201629161834e-06, "step": 322390 }, { "epoch": 91.5129151291513, "grad_norm": 0.0005060627008788288, "learning_rate": 8.523417541867727e-06, "loss": 6.097555160522461e-06, "step": 322400 }, { "epoch": 91.51575361907466, "grad_norm": 0.023480162024497986, "learning_rate": 8.520579051944367e-06, "loss": 1.0695867240428925e-05, "step": 322410 }, { "epoch": 91.518592108998, "grad_norm": 0.0013003154890611768, "learning_rate": 8.517740562021005e-06, "loss": 3.207288682460785e-06, "step": 322420 }, { "epoch": 91.52143059892137, "grad_norm": 0.0013439463218674064, "learning_rate": 8.514902072097645e-06, "loss": 6.817840039730072e-06, "step": 322430 }, { "epoch": 91.52426908884473, "grad_norm": 0.0008770574349910021, "learning_rate": 8.512063582174283e-06, "loss": 4.557333886623382e-06, "step": 322440 }, { "epoch": 91.5271075787681, "grad_norm": 0.0017174823442474008, "learning_rate": 8.509225092250924e-06, "loss": 5.080923438072205e-06, "step": 322450 }, { "epoch": 91.52994606869146, "grad_norm": 0.0006517866277135909, "learning_rate": 8.506386602327562e-06, "loss": 4.802457988262177e-06, "step": 322460 }, { "epoch": 91.53278455861482, "grad_norm": 0.0015350586036220193, "learning_rate": 8.503548112404202e-06, "loss": 5.357526242733002e-06, "step": 322470 }, { "epoch": 91.53562304853818, "grad_norm": 0.000758522015530616, "learning_rate": 8.50070962248084e-06, "loss": 2.897903323173523e-06, "step": 322480 }, { "epoch": 91.53846153846153, "grad_norm": 0.001051872270181775, "learning_rate": 8.49787113255748e-06, "loss": 9.071454405784607e-06, "step": 322490 }, { "epoch": 91.5413000283849, "grad_norm": 0.0031551532447338104, "learning_rate": 8.495032642634119e-06, "loss": 5.5532902479171755e-06, "step": 322500 }, { "epoch": 91.5413000283849, "eval_accuracy": 0.9893177338335347, "eval_loss": 0.04840192571282387, "eval_runtime": 36.8172, "eval_samples_per_second": 427.165, "eval_steps_per_second": 6.682, "step": 322500 }, { "epoch": 91.54413851830826, "grad_norm": 0.001680072513408959, "learning_rate": 8.492194152710759e-06, "loss": 4.548393189907074e-06, "step": 322510 }, { "epoch": 91.54697700823162, "grad_norm": 0.001271096640266478, "learning_rate": 8.489355662787397e-06, "loss": 8.926913142204285e-06, "step": 322520 }, { "epoch": 91.54981549815498, "grad_norm": 0.0029871519654989243, "learning_rate": 8.486517172864036e-06, "loss": 5.632638931274414e-06, "step": 322530 }, { "epoch": 91.55265398807835, "grad_norm": 0.0005182424793019891, "learning_rate": 8.483678682940676e-06, "loss": 4.316680133342743e-06, "step": 322540 }, { "epoch": 91.5554924780017, "grad_norm": 0.0005524177104234695, "learning_rate": 8.480840193017316e-06, "loss": 4.317052662372589e-06, "step": 322550 }, { "epoch": 91.55833096792506, "grad_norm": 0.0027758891228586435, "learning_rate": 8.478001703093955e-06, "loss": 4.431977868080139e-06, "step": 322560 }, { "epoch": 91.56116945784842, "grad_norm": 0.0007138740620575845, "learning_rate": 8.475163213170593e-06, "loss": 7.482245564460754e-06, "step": 322570 }, { "epoch": 91.56400794777178, "grad_norm": 0.001524779130704701, "learning_rate": 8.472324723247233e-06, "loss": 4.325248301029205e-06, "step": 322580 }, { "epoch": 91.56684643769515, "grad_norm": 0.003137505380436778, "learning_rate": 8.469486233323871e-06, "loss": 6.2337145209312436e-06, "step": 322590 }, { "epoch": 91.56968492761851, "grad_norm": 0.0017370856367051601, "learning_rate": 8.466647743400512e-06, "loss": 4.835054278373719e-06, "step": 322600 }, { "epoch": 91.57252341754187, "grad_norm": 0.00038142938865348697, "learning_rate": 8.46380925347715e-06, "loss": 2.279505133628845e-06, "step": 322610 }, { "epoch": 91.57536190746522, "grad_norm": 0.0024326525162905455, "learning_rate": 8.46097076355379e-06, "loss": 5.917064845561981e-06, "step": 322620 }, { "epoch": 91.57820039738858, "grad_norm": 0.0011192505480721593, "learning_rate": 8.458132273630428e-06, "loss": 3.36524099111557e-06, "step": 322630 }, { "epoch": 91.58103888731195, "grad_norm": 0.012258004397153854, "learning_rate": 8.45529378370707e-06, "loss": 7.931515574455261e-06, "step": 322640 }, { "epoch": 91.58387737723531, "grad_norm": 0.0017151631182059646, "learning_rate": 8.452455293783707e-06, "loss": 5.711056292057037e-06, "step": 322650 }, { "epoch": 91.58671586715867, "grad_norm": 0.0006927556823939085, "learning_rate": 8.449616803860347e-06, "loss": 6.6122040152549745e-06, "step": 322660 }, { "epoch": 91.58955435708204, "grad_norm": 0.003016927745193243, "learning_rate": 8.446778313936985e-06, "loss": 4.282407462596893e-06, "step": 322670 }, { "epoch": 91.5923928470054, "grad_norm": 0.002962068421766162, "learning_rate": 8.443939824013625e-06, "loss": 4.103779792785644e-06, "step": 322680 }, { "epoch": 91.59523133692875, "grad_norm": 0.0005270742694847286, "learning_rate": 8.441101334090264e-06, "loss": 3.721751272678375e-06, "step": 322690 }, { "epoch": 91.59806982685211, "grad_norm": 0.00040682274266146123, "learning_rate": 8.438262844166904e-06, "loss": 4.856102168560028e-06, "step": 322700 }, { "epoch": 91.60090831677547, "grad_norm": 0.00040118079050444067, "learning_rate": 8.435424354243544e-06, "loss": 4.108063876628876e-06, "step": 322710 }, { "epoch": 91.60374680669884, "grad_norm": 0.0032695126719772816, "learning_rate": 8.432585864320182e-06, "loss": 5.0658360123634335e-06, "step": 322720 }, { "epoch": 91.6065852966222, "grad_norm": 0.0002702035126276314, "learning_rate": 8.429747374396821e-06, "loss": 5.196966230869293e-06, "step": 322730 }, { "epoch": 91.60942378654556, "grad_norm": 0.0011988428886979818, "learning_rate": 8.426908884473461e-06, "loss": 6.820447742938995e-06, "step": 322740 }, { "epoch": 91.61226227646893, "grad_norm": 0.0009857607074081898, "learning_rate": 8.4240703945501e-06, "loss": 4.526786506175995e-06, "step": 322750 }, { "epoch": 91.61510076639227, "grad_norm": 0.0029213400557637215, "learning_rate": 8.421231904626739e-06, "loss": 5.37186861038208e-06, "step": 322760 }, { "epoch": 91.61793925631564, "grad_norm": 0.0006322587141767144, "learning_rate": 8.418393414703378e-06, "loss": 4.262477159500122e-06, "step": 322770 }, { "epoch": 91.620777746239, "grad_norm": 0.0003634264867287129, "learning_rate": 8.415554924780016e-06, "loss": 3.3278018236160277e-06, "step": 322780 }, { "epoch": 91.62361623616236, "grad_norm": 0.004795524757355452, "learning_rate": 8.412716434856658e-06, "loss": 5.255080759525299e-06, "step": 322790 }, { "epoch": 91.62645472608573, "grad_norm": 0.0007276138640008867, "learning_rate": 8.409877944933296e-06, "loss": 3.2491981983184816e-06, "step": 322800 }, { "epoch": 91.62929321600909, "grad_norm": 0.0006761461845599115, "learning_rate": 8.407039455009935e-06, "loss": 8.188374340534211e-06, "step": 322810 }, { "epoch": 91.63213170593244, "grad_norm": 0.0019187767757102847, "learning_rate": 8.404200965086573e-06, "loss": 4.259496927261353e-06, "step": 322820 }, { "epoch": 91.6349701958558, "grad_norm": 0.0008218351867981255, "learning_rate": 8.401362475163213e-06, "loss": 4.382617771625519e-06, "step": 322830 }, { "epoch": 91.63780868577916, "grad_norm": 0.0028092176653444767, "learning_rate": 8.398523985239853e-06, "loss": 5.319900810718536e-06, "step": 322840 }, { "epoch": 91.64064717570253, "grad_norm": 0.00026512020849622786, "learning_rate": 8.395685495316493e-06, "loss": 3.926828503608703e-06, "step": 322850 }, { "epoch": 91.64348566562589, "grad_norm": 0.0006556620355695486, "learning_rate": 8.39284700539313e-06, "loss": 3.54032963514328e-06, "step": 322860 }, { "epoch": 91.64632415554925, "grad_norm": 0.00030468293698504567, "learning_rate": 8.39000851546977e-06, "loss": 6.969273090362549e-06, "step": 322870 }, { "epoch": 91.64916264547261, "grad_norm": 0.0006275150226429105, "learning_rate": 8.38717002554641e-06, "loss": 6.152130663394928e-06, "step": 322880 }, { "epoch": 91.65200113539596, "grad_norm": 0.0005091407801955938, "learning_rate": 8.38433153562305e-06, "loss": 2.9684975743293763e-06, "step": 322890 }, { "epoch": 91.65483962531933, "grad_norm": 0.0032918776851147413, "learning_rate": 8.38149304569969e-06, "loss": 5.20460307598114e-06, "step": 322900 }, { "epoch": 91.65767811524269, "grad_norm": 0.0006789661711081862, "learning_rate": 8.378654555776327e-06, "loss": 3.0213966965675352e-06, "step": 322910 }, { "epoch": 91.66051660516605, "grad_norm": 0.0010299495188519359, "learning_rate": 8.375816065852967e-06, "loss": 4.5817345380783084e-06, "step": 322920 }, { "epoch": 91.66335509508941, "grad_norm": 0.0007574167684651911, "learning_rate": 8.372977575929605e-06, "loss": 4.029273986816406e-06, "step": 322930 }, { "epoch": 91.66619358501278, "grad_norm": 0.0005458550294861197, "learning_rate": 8.370139086006246e-06, "loss": 4.482641816139221e-06, "step": 322940 }, { "epoch": 91.66903207493614, "grad_norm": 0.0011317208409309387, "learning_rate": 8.367300596082884e-06, "loss": 4.511699080467224e-06, "step": 322950 }, { "epoch": 91.67187056485949, "grad_norm": 0.000536129402462393, "learning_rate": 8.364462106159524e-06, "loss": 4.129670560359955e-06, "step": 322960 }, { "epoch": 91.67470905478285, "grad_norm": 0.005289086140692234, "learning_rate": 8.361623616236162e-06, "loss": 3.256089985370636e-06, "step": 322970 }, { "epoch": 91.67754754470621, "grad_norm": 0.002123807556927204, "learning_rate": 8.358785126312802e-06, "loss": 3.4900382161140443e-06, "step": 322980 }, { "epoch": 91.68038603462958, "grad_norm": 0.00041195013909600675, "learning_rate": 8.355946636389441e-06, "loss": 7.761642336845398e-06, "step": 322990 }, { "epoch": 91.68322452455294, "grad_norm": 0.0006869264761917293, "learning_rate": 8.353108146466081e-06, "loss": 4.366040229797363e-06, "step": 323000 }, { "epoch": 91.68322452455294, "eval_accuracy": 0.9891269790805621, "eval_loss": 0.04784113168716431, "eval_runtime": 35.5735, "eval_samples_per_second": 442.098, "eval_steps_per_second": 6.915, "step": 323000 }, { "epoch": 91.6860630144763, "grad_norm": 0.0002144078171113506, "learning_rate": 8.350269656542719e-06, "loss": 2.9109418392181396e-06, "step": 323010 }, { "epoch": 91.68890150439967, "grad_norm": 0.0005870701279491186, "learning_rate": 8.347431166619359e-06, "loss": 2.8118491172790526e-06, "step": 323020 }, { "epoch": 91.69173999432302, "grad_norm": 0.0008898602100089192, "learning_rate": 8.344592676695998e-06, "loss": 6.900914013385773e-06, "step": 323030 }, { "epoch": 91.69457848424638, "grad_norm": 0.00021022892906330526, "learning_rate": 8.341754186772638e-06, "loss": 3.3704563975334167e-06, "step": 323040 }, { "epoch": 91.69741697416974, "grad_norm": 0.0346357487142086, "learning_rate": 8.338915696849276e-06, "loss": 9.309686720371247e-06, "step": 323050 }, { "epoch": 91.7002554640931, "grad_norm": 0.0002684177306946367, "learning_rate": 8.336077206925916e-06, "loss": 3.931298851966858e-06, "step": 323060 }, { "epoch": 91.70309395401647, "grad_norm": 0.0005506329471245408, "learning_rate": 8.333238717002555e-06, "loss": 2.2079795598983766e-06, "step": 323070 }, { "epoch": 91.70593244393983, "grad_norm": 0.010335751809179783, "learning_rate": 8.330400227079193e-06, "loss": 6.438419222831726e-06, "step": 323080 }, { "epoch": 91.70877093386318, "grad_norm": 0.0005447251023724675, "learning_rate": 8.327561737155835e-06, "loss": 4.540756344795227e-06, "step": 323090 }, { "epoch": 91.71160942378654, "grad_norm": 0.00030126943602226675, "learning_rate": 8.324723247232473e-06, "loss": 4.971586167812347e-06, "step": 323100 }, { "epoch": 91.7144479137099, "grad_norm": 0.13463184237480164, "learning_rate": 8.321884757309112e-06, "loss": 1.891776919364929e-05, "step": 323110 }, { "epoch": 91.71728640363327, "grad_norm": 0.00033973687095567584, "learning_rate": 8.31904626738575e-06, "loss": 7.855892181396485e-06, "step": 323120 }, { "epoch": 91.72012489355663, "grad_norm": 0.0022330586798489094, "learning_rate": 8.31620777746239e-06, "loss": 3.316625952720642e-06, "step": 323130 }, { "epoch": 91.72296338347999, "grad_norm": 0.000616095494478941, "learning_rate": 8.31336928753903e-06, "loss": 3.255903720855713e-06, "step": 323140 }, { "epoch": 91.72580187340336, "grad_norm": 0.0017380060162395239, "learning_rate": 8.31053079761567e-06, "loss": 5.003809928894043e-06, "step": 323150 }, { "epoch": 91.7286403633267, "grad_norm": 0.0011288102250546217, "learning_rate": 8.307692307692307e-06, "loss": 2.839788794517517e-06, "step": 323160 }, { "epoch": 91.73147885325007, "grad_norm": 0.001056898501701653, "learning_rate": 8.304853817768947e-06, "loss": 6.445683538913727e-06, "step": 323170 }, { "epoch": 91.73431734317343, "grad_norm": 0.001183783169835806, "learning_rate": 8.302015327845585e-06, "loss": 4.566274583339691e-06, "step": 323180 }, { "epoch": 91.73715583309679, "grad_norm": 0.0021591305267065763, "learning_rate": 8.299176837922226e-06, "loss": 3.96091490983963e-06, "step": 323190 }, { "epoch": 91.73999432302016, "grad_norm": 0.003068596823140979, "learning_rate": 8.296338347998864e-06, "loss": 4.139542579650879e-06, "step": 323200 }, { "epoch": 91.74283281294352, "grad_norm": 0.0017356113530695438, "learning_rate": 8.293499858075504e-06, "loss": 4.470720887184143e-06, "step": 323210 }, { "epoch": 91.74567130286688, "grad_norm": 0.001271622721105814, "learning_rate": 8.290661368152144e-06, "loss": 7.623620331287384e-06, "step": 323220 }, { "epoch": 91.74850979279023, "grad_norm": 0.0010179589735344052, "learning_rate": 8.287822878228782e-06, "loss": 6.3462182879447935e-06, "step": 323230 }, { "epoch": 91.7513482827136, "grad_norm": 0.0027883665170520544, "learning_rate": 8.284984388305423e-06, "loss": 3.645196557044983e-06, "step": 323240 }, { "epoch": 91.75418677263696, "grad_norm": 0.0002919680846389383, "learning_rate": 8.282145898382061e-06, "loss": 9.028613567352295e-06, "step": 323250 }, { "epoch": 91.75702526256032, "grad_norm": 0.0019681283738464117, "learning_rate": 8.2793074084587e-06, "loss": 8.836016058921814e-06, "step": 323260 }, { "epoch": 91.75986375248368, "grad_norm": 0.010911029763519764, "learning_rate": 8.276468918535339e-06, "loss": 5.166418850421905e-06, "step": 323270 }, { "epoch": 91.76270224240704, "grad_norm": 0.00046434911200776696, "learning_rate": 8.27363042861198e-06, "loss": 5.169026553630829e-06, "step": 323280 }, { "epoch": 91.7655407323304, "grad_norm": 0.0006867364281788468, "learning_rate": 8.270791938688618e-06, "loss": 2.7785077691078187e-06, "step": 323290 }, { "epoch": 91.76837922225376, "grad_norm": 0.0007554516196250916, "learning_rate": 8.267953448765258e-06, "loss": 4.289485514163971e-06, "step": 323300 }, { "epoch": 91.77121771217712, "grad_norm": 0.007783023174852133, "learning_rate": 8.265114958841896e-06, "loss": 4.522502422332764e-06, "step": 323310 }, { "epoch": 91.77405620210048, "grad_norm": 0.00028650296735577285, "learning_rate": 8.262276468918535e-06, "loss": 1.8129125237464904e-06, "step": 323320 }, { "epoch": 91.77689469202384, "grad_norm": 0.0013581927632912993, "learning_rate": 8.259437978995175e-06, "loss": 2.65464186668396e-06, "step": 323330 }, { "epoch": 91.77973318194721, "grad_norm": 0.00034818879794329405, "learning_rate": 8.256599489071815e-06, "loss": 5.268305540084839e-06, "step": 323340 }, { "epoch": 91.78257167187057, "grad_norm": 0.0005650415550917387, "learning_rate": 8.253760999148453e-06, "loss": 5.473010241985321e-06, "step": 323350 }, { "epoch": 91.78541016179392, "grad_norm": 0.00040133396396413445, "learning_rate": 8.250922509225093e-06, "loss": 7.0625916123390194e-06, "step": 323360 }, { "epoch": 91.78824865171728, "grad_norm": 0.001246501924470067, "learning_rate": 8.248084019301732e-06, "loss": 4.864297807216644e-06, "step": 323370 }, { "epoch": 91.79108714164065, "grad_norm": 0.0005189632647670805, "learning_rate": 8.245245529378372e-06, "loss": 4.011206328868866e-06, "step": 323380 }, { "epoch": 91.79392563156401, "grad_norm": 0.0020307956729084253, "learning_rate": 8.24240703945501e-06, "loss": 3.2439827919006346e-06, "step": 323390 }, { "epoch": 91.79676412148737, "grad_norm": 0.00020844562095589936, "learning_rate": 8.23956854953165e-06, "loss": 8.224323391914367e-06, "step": 323400 }, { "epoch": 91.79960261141073, "grad_norm": 0.0010266273748129606, "learning_rate": 8.23673005960829e-06, "loss": 6.51627779006958e-06, "step": 323410 }, { "epoch": 91.8024411013341, "grad_norm": 0.0010870812693610787, "learning_rate": 8.233891569684927e-06, "loss": 4.914216697216034e-06, "step": 323420 }, { "epoch": 91.80527959125745, "grad_norm": 0.00046127758105285466, "learning_rate": 8.231053079761569e-06, "loss": 2.96272337436676e-06, "step": 323430 }, { "epoch": 91.80811808118081, "grad_norm": 0.00041358283488079906, "learning_rate": 8.228214589838207e-06, "loss": 4.765391349792481e-06, "step": 323440 }, { "epoch": 91.81095657110417, "grad_norm": 0.007078428752720356, "learning_rate": 8.225376099914846e-06, "loss": 4.401803016662597e-06, "step": 323450 }, { "epoch": 91.81379506102753, "grad_norm": 0.002012974116951227, "learning_rate": 8.222537609991484e-06, "loss": 6.5498054027557375e-06, "step": 323460 }, { "epoch": 91.8166335509509, "grad_norm": 0.004176821559667587, "learning_rate": 8.219699120068124e-06, "loss": 3.306753933429718e-06, "step": 323470 }, { "epoch": 91.81947204087426, "grad_norm": 0.0008693566196598113, "learning_rate": 8.216860630144764e-06, "loss": 5.260668694972992e-06, "step": 323480 }, { "epoch": 91.82231053079762, "grad_norm": 0.0002487883029971272, "learning_rate": 8.214022140221403e-06, "loss": 2.662837505340576e-06, "step": 323490 }, { "epoch": 91.82514902072097, "grad_norm": 0.00022008207452017814, "learning_rate": 8.211183650298041e-06, "loss": 5.522556602954865e-06, "step": 323500 }, { "epoch": 91.82514902072097, "eval_accuracy": 0.988872639409932, "eval_loss": 0.04842745140194893, "eval_runtime": 35.6507, "eval_samples_per_second": 441.142, "eval_steps_per_second": 6.9, "step": 323500 }, { "epoch": 91.82798751064433, "grad_norm": 0.0005454912898130715, "learning_rate": 8.208345160374681e-06, "loss": 4.004128277301788e-06, "step": 323510 }, { "epoch": 91.8308260005677, "grad_norm": 0.0008594243554398417, "learning_rate": 8.205506670451319e-06, "loss": 5.350448191165924e-06, "step": 323520 }, { "epoch": 91.83366449049106, "grad_norm": 0.003726622788235545, "learning_rate": 8.20266818052796e-06, "loss": 3.768317401409149e-06, "step": 323530 }, { "epoch": 91.83650298041442, "grad_norm": 0.004971690010279417, "learning_rate": 8.199829690604598e-06, "loss": 5.011260509490967e-06, "step": 323540 }, { "epoch": 91.83934147033779, "grad_norm": 0.0016998969949781895, "learning_rate": 8.196991200681238e-06, "loss": 4.459172487258911e-06, "step": 323550 }, { "epoch": 91.84217996026113, "grad_norm": 0.002881287829950452, "learning_rate": 8.194152710757878e-06, "loss": 4.743784666061402e-06, "step": 323560 }, { "epoch": 91.8450184501845, "grad_norm": 0.0009933634428307414, "learning_rate": 8.191314220834516e-06, "loss": 4.080496728420257e-06, "step": 323570 }, { "epoch": 91.84785694010786, "grad_norm": 0.0008663471671752632, "learning_rate": 8.188475730911157e-06, "loss": 3.700330853462219e-06, "step": 323580 }, { "epoch": 91.85069543003122, "grad_norm": 0.001479733968153596, "learning_rate": 8.185637240987795e-06, "loss": 3.3669173717498778e-06, "step": 323590 }, { "epoch": 91.85353391995459, "grad_norm": 0.0007033550646156073, "learning_rate": 8.182798751064435e-06, "loss": 2.5643035769462586e-06, "step": 323600 }, { "epoch": 91.85637240987795, "grad_norm": 0.00023243982286658138, "learning_rate": 8.179960261141073e-06, "loss": 4.106760025024414e-06, "step": 323610 }, { "epoch": 91.85921089980131, "grad_norm": 0.00078374776057899, "learning_rate": 8.177121771217712e-06, "loss": 3.505498170852661e-06, "step": 323620 }, { "epoch": 91.86204938972466, "grad_norm": 0.00048333840095438063, "learning_rate": 8.174283281294352e-06, "loss": 3.427453339099884e-06, "step": 323630 }, { "epoch": 91.86488787964802, "grad_norm": 0.0005404921830631793, "learning_rate": 8.171444791370992e-06, "loss": 2.8485432267189025e-06, "step": 323640 }, { "epoch": 91.86772636957139, "grad_norm": 0.005422003101557493, "learning_rate": 8.16860630144763e-06, "loss": 4.1369348764419556e-06, "step": 323650 }, { "epoch": 91.87056485949475, "grad_norm": 0.001667010597884655, "learning_rate": 8.16576781152427e-06, "loss": 4.0227547287940976e-06, "step": 323660 }, { "epoch": 91.87340334941811, "grad_norm": 0.004686563741415739, "learning_rate": 8.162929321600907e-06, "loss": 8.425302803516388e-06, "step": 323670 }, { "epoch": 91.87624183934147, "grad_norm": 0.001246918342076242, "learning_rate": 8.160090831677549e-06, "loss": 7.15404748916626e-06, "step": 323680 }, { "epoch": 91.87908032926484, "grad_norm": 0.011037256568670273, "learning_rate": 8.157252341754187e-06, "loss": 4.035420715808868e-06, "step": 323690 }, { "epoch": 91.88191881918819, "grad_norm": 0.0010686073219403625, "learning_rate": 8.154413851830826e-06, "loss": 6.3121318817138675e-06, "step": 323700 }, { "epoch": 91.88475730911155, "grad_norm": 0.0010507968254387379, "learning_rate": 8.151575361907466e-06, "loss": 3.886036574840546e-06, "step": 323710 }, { "epoch": 91.88759579903491, "grad_norm": 0.001751412870362401, "learning_rate": 8.148736871984104e-06, "loss": 5.257129669189453e-06, "step": 323720 }, { "epoch": 91.89043428895828, "grad_norm": 0.0011168993078172207, "learning_rate": 8.145898382060744e-06, "loss": 3.703869879245758e-06, "step": 323730 }, { "epoch": 91.89327277888164, "grad_norm": 0.0008743792423047125, "learning_rate": 8.143059892137383e-06, "loss": 4.162453114986419e-06, "step": 323740 }, { "epoch": 91.896111268805, "grad_norm": 0.0011868129950016737, "learning_rate": 8.140221402214023e-06, "loss": 6.8336725234985355e-06, "step": 323750 }, { "epoch": 91.89894975872835, "grad_norm": 0.01627756468951702, "learning_rate": 8.137382912290661e-06, "loss": 7.707811892032624e-06, "step": 323760 }, { "epoch": 91.90178824865171, "grad_norm": 0.0004652758361771703, "learning_rate": 8.1345444223673e-06, "loss": 4.695355892181397e-06, "step": 323770 }, { "epoch": 91.90462673857508, "grad_norm": 0.0005793950986117125, "learning_rate": 8.13170593244394e-06, "loss": 3.5455450415611267e-06, "step": 323780 }, { "epoch": 91.90746522849844, "grad_norm": 0.0013794294791296124, "learning_rate": 8.12886744252058e-06, "loss": 3.2337382435798643e-06, "step": 323790 }, { "epoch": 91.9103037184218, "grad_norm": 0.00045127180055715144, "learning_rate": 8.126028952597218e-06, "loss": 4.078447818756103e-06, "step": 323800 }, { "epoch": 91.91314220834516, "grad_norm": 0.0007599867531098425, "learning_rate": 8.123190462673858e-06, "loss": 4.38714399933815e-05, "step": 323810 }, { "epoch": 91.91598069826853, "grad_norm": 0.000393830647226423, "learning_rate": 8.120351972750496e-06, "loss": 1.2464448809623719e-05, "step": 323820 }, { "epoch": 91.91881918819188, "grad_norm": 0.00016627594595775008, "learning_rate": 8.117513482827137e-06, "loss": 4.064291715621948e-06, "step": 323830 }, { "epoch": 91.92165767811524, "grad_norm": 0.0005546921165660024, "learning_rate": 8.114674992903775e-06, "loss": 5.902908742427826e-06, "step": 323840 }, { "epoch": 91.9244961680386, "grad_norm": 0.003785632085055113, "learning_rate": 8.111836502980415e-06, "loss": 7.160939276218414e-06, "step": 323850 }, { "epoch": 91.92733465796196, "grad_norm": 0.000523126742336899, "learning_rate": 8.108998013057053e-06, "loss": 3.5017728805541992e-06, "step": 323860 }, { "epoch": 91.93017314788533, "grad_norm": 0.0002882322296500206, "learning_rate": 8.106159523133693e-06, "loss": 5.597621202468872e-06, "step": 323870 }, { "epoch": 91.93301163780869, "grad_norm": 0.003524706233292818, "learning_rate": 8.103321033210332e-06, "loss": 4.247203469276428e-06, "step": 323880 }, { "epoch": 91.93585012773205, "grad_norm": 0.0018182250205427408, "learning_rate": 8.100482543286972e-06, "loss": 5.690194666385651e-06, "step": 323890 }, { "epoch": 91.9386886176554, "grad_norm": 0.0011658540461212397, "learning_rate": 8.097644053363612e-06, "loss": 4.39472496509552e-06, "step": 323900 }, { "epoch": 91.94152710757876, "grad_norm": 0.0002208934020018205, "learning_rate": 8.09480556344025e-06, "loss": 4.803389310836792e-06, "step": 323910 }, { "epoch": 91.94436559750213, "grad_norm": 0.00045781690278090537, "learning_rate": 8.091967073516891e-06, "loss": 4.895776510238648e-06, "step": 323920 }, { "epoch": 91.94720408742549, "grad_norm": 0.021237807348370552, "learning_rate": 8.089128583593529e-06, "loss": 9.262748062610626e-06, "step": 323930 }, { "epoch": 91.95004257734885, "grad_norm": 0.0014264453202486038, "learning_rate": 8.086290093670169e-06, "loss": 6.927363574504852e-06, "step": 323940 }, { "epoch": 91.95288106727222, "grad_norm": 0.00032183577422983944, "learning_rate": 8.083451603746807e-06, "loss": 5.89657574892044e-06, "step": 323950 }, { "epoch": 91.95571955719558, "grad_norm": 0.0007133952458389103, "learning_rate": 8.080613113823446e-06, "loss": 6.218254566192627e-06, "step": 323960 }, { "epoch": 91.95855804711893, "grad_norm": 0.006505612283945084, "learning_rate": 8.077774623900086e-06, "loss": 5.065277218818665e-06, "step": 323970 }, { "epoch": 91.96139653704229, "grad_norm": 0.0013686437159776688, "learning_rate": 8.074936133976726e-06, "loss": 5.462765693664551e-06, "step": 323980 }, { "epoch": 91.96423502696565, "grad_norm": 0.0009923152392730117, "learning_rate": 8.072097644053364e-06, "loss": 3.998167812824249e-06, "step": 323990 }, { "epoch": 91.96707351688902, "grad_norm": 0.001292902510613203, "learning_rate": 8.069259154130003e-06, "loss": 6.885454058647156e-06, "step": 324000 }, { "epoch": 91.96707351688902, "eval_accuracy": 0.9889362243275895, "eval_loss": 0.048782747238874435, "eval_runtime": 35.6034, "eval_samples_per_second": 441.728, "eval_steps_per_second": 6.909, "step": 324000 }, { "epoch": 91.96991200681238, "grad_norm": 0.0004980129306204617, "learning_rate": 8.066420664206641e-06, "loss": 2.4633482098579407e-06, "step": 324010 }, { "epoch": 91.97275049673574, "grad_norm": 0.000509098987095058, "learning_rate": 8.063582174283283e-06, "loss": 4.813075065612793e-06, "step": 324020 }, { "epoch": 91.97558898665909, "grad_norm": 0.0030844947323203087, "learning_rate": 8.06074368435992e-06, "loss": 6.621330976486206e-06, "step": 324030 }, { "epoch": 91.97842747658245, "grad_norm": 0.0006429146742448211, "learning_rate": 8.05790519443656e-06, "loss": 3.1000003218650817e-06, "step": 324040 }, { "epoch": 91.98126596650582, "grad_norm": 0.002722187666222453, "learning_rate": 8.0550667045132e-06, "loss": 6.73588365316391e-06, "step": 324050 }, { "epoch": 91.98410445642918, "grad_norm": 0.0005006708670407534, "learning_rate": 8.052228214589838e-06, "loss": 4.1978433728218075e-06, "step": 324060 }, { "epoch": 91.98694294635254, "grad_norm": 0.0006744798156432807, "learning_rate": 8.049389724666478e-06, "loss": 3.95607203245163e-06, "step": 324070 }, { "epoch": 91.9897814362759, "grad_norm": 0.0009412849321961403, "learning_rate": 8.046551234743117e-06, "loss": 2.5935471057891847e-06, "step": 324080 }, { "epoch": 91.99261992619927, "grad_norm": 0.0012075303820893168, "learning_rate": 8.043712744819757e-06, "loss": 3.622099757194519e-06, "step": 324090 }, { "epoch": 91.99545841612262, "grad_norm": 0.00011908258602488786, "learning_rate": 8.040874254896395e-06, "loss": 3.887712955474853e-06, "step": 324100 }, { "epoch": 91.99829690604598, "grad_norm": 0.00029891711892560124, "learning_rate": 8.038035764973035e-06, "loss": 3.661029040813446e-06, "step": 324110 }, { "epoch": 92.00113539596934, "grad_norm": 0.0008768909610807896, "learning_rate": 8.035197275049674e-06, "loss": 9.642067016102374e-06, "step": 324120 }, { "epoch": 92.0039738858927, "grad_norm": 0.0014843903481960297, "learning_rate": 8.032358785126314e-06, "loss": 4.084780812263489e-06, "step": 324130 }, { "epoch": 92.00681237581607, "grad_norm": 0.0002120232820743695, "learning_rate": 8.029520295202952e-06, "loss": 3.335811197757721e-06, "step": 324140 }, { "epoch": 92.00965086573943, "grad_norm": 0.001070306170731783, "learning_rate": 8.026681805279592e-06, "loss": 5.402788519859314e-06, "step": 324150 }, { "epoch": 92.0124893556628, "grad_norm": 0.0005707835080102086, "learning_rate": 8.02384331535623e-06, "loss": 3.5312026739120483e-06, "step": 324160 }, { "epoch": 92.01532784558614, "grad_norm": 0.0006168134277686477, "learning_rate": 8.021004825432871e-06, "loss": 3.917701542377472e-06, "step": 324170 }, { "epoch": 92.0181663355095, "grad_norm": 0.0003981132176704705, "learning_rate": 8.018166335509509e-06, "loss": 3.014504909515381e-06, "step": 324180 }, { "epoch": 92.02100482543287, "grad_norm": 0.0014697378501296043, "learning_rate": 8.015327845586149e-06, "loss": 6.5729022026062015e-06, "step": 324190 }, { "epoch": 92.02384331535623, "grad_norm": 0.0006855658139102161, "learning_rate": 8.012489355662787e-06, "loss": 3.1406059861183165e-06, "step": 324200 }, { "epoch": 92.0266818052796, "grad_norm": 0.002092949813231826, "learning_rate": 8.009650865739426e-06, "loss": 6.468035280704498e-06, "step": 324210 }, { "epoch": 92.02952029520296, "grad_norm": 0.0014238582225516438, "learning_rate": 8.006812375816066e-06, "loss": 3.927014768123627e-06, "step": 324220 }, { "epoch": 92.03235878512632, "grad_norm": 0.0007072364678606391, "learning_rate": 8.003973885892706e-06, "loss": 3.56379896402359e-06, "step": 324230 }, { "epoch": 92.03519727504967, "grad_norm": 0.0008170066284947097, "learning_rate": 8.001135395969345e-06, "loss": 6.135739386081695e-06, "step": 324240 }, { "epoch": 92.03803576497303, "grad_norm": 0.0006411867216229439, "learning_rate": 7.998296906045983e-06, "loss": 3.872253000736237e-06, "step": 324250 }, { "epoch": 92.0408742548964, "grad_norm": 0.0015263293171301484, "learning_rate": 7.995458416122623e-06, "loss": 3.6723911762237547e-06, "step": 324260 }, { "epoch": 92.04371274481976, "grad_norm": 0.00037362397415563464, "learning_rate": 7.992619926199263e-06, "loss": 3.4889206290245054e-06, "step": 324270 }, { "epoch": 92.04655123474312, "grad_norm": 0.0017867255955934525, "learning_rate": 7.989781436275902e-06, "loss": 4.6765431761741635e-06, "step": 324280 }, { "epoch": 92.04938972466648, "grad_norm": 0.0011431697057560086, "learning_rate": 7.98694294635254e-06, "loss": 4.086457192897797e-06, "step": 324290 }, { "epoch": 92.05222821458983, "grad_norm": 0.0004877959145233035, "learning_rate": 7.98410445642918e-06, "loss": 8.666329085826873e-06, "step": 324300 }, { "epoch": 92.0550667045132, "grad_norm": 0.0005481953267008066, "learning_rate": 7.981265966505818e-06, "loss": 3.6263838410377504e-06, "step": 324310 }, { "epoch": 92.05790519443656, "grad_norm": 0.003834257135167718, "learning_rate": 7.97842747658246e-06, "loss": 2.9861927032470704e-06, "step": 324320 }, { "epoch": 92.06074368435992, "grad_norm": 0.0008107325411401689, "learning_rate": 7.975588986659098e-06, "loss": 3.6925077438354494e-06, "step": 324330 }, { "epoch": 92.06358217428328, "grad_norm": 0.00044216550304554403, "learning_rate": 7.972750496735737e-06, "loss": 5.056150257587433e-06, "step": 324340 }, { "epoch": 92.06642066420665, "grad_norm": 0.0018233471782878041, "learning_rate": 7.969912006812375e-06, "loss": 6.175227463245392e-06, "step": 324350 }, { "epoch": 92.06925915413001, "grad_norm": 0.0005602333694696426, "learning_rate": 7.967073516889015e-06, "loss": 4.542805254459381e-06, "step": 324360 }, { "epoch": 92.07209764405336, "grad_norm": 0.0010564638068899512, "learning_rate": 7.964235026965655e-06, "loss": 3.0975788831710815e-06, "step": 324370 }, { "epoch": 92.07493613397672, "grad_norm": 0.000620286155026406, "learning_rate": 7.961396537042294e-06, "loss": 3.557652235031128e-06, "step": 324380 }, { "epoch": 92.07777462390008, "grad_norm": 0.0001012262946460396, "learning_rate": 7.958558047118934e-06, "loss": 5.000270903110504e-06, "step": 324390 }, { "epoch": 92.08061311382345, "grad_norm": 0.00015928130596876144, "learning_rate": 7.955719557195572e-06, "loss": 2.1694228053092957e-06, "step": 324400 }, { "epoch": 92.08345160374681, "grad_norm": 0.004654296673834324, "learning_rate": 7.952881067272212e-06, "loss": 4.322640597820282e-06, "step": 324410 }, { "epoch": 92.08629009367017, "grad_norm": 0.0008767166291363537, "learning_rate": 7.950042577348851e-06, "loss": 2.9342249035835265e-06, "step": 324420 }, { "epoch": 92.08912858359353, "grad_norm": 0.0013275641249492764, "learning_rate": 7.947204087425491e-06, "loss": 4.869699478149414e-06, "step": 324430 }, { "epoch": 92.09196707351688, "grad_norm": 0.0011472069891169667, "learning_rate": 7.944365597502129e-06, "loss": 5.107931792736054e-06, "step": 324440 }, { "epoch": 92.09480556344025, "grad_norm": 0.0035590853076428175, "learning_rate": 7.941527107578769e-06, "loss": 4.3798238039016725e-06, "step": 324450 }, { "epoch": 92.09764405336361, "grad_norm": 0.000522573827765882, "learning_rate": 7.938688617655407e-06, "loss": 5.763396620750427e-06, "step": 324460 }, { "epoch": 92.10048254328697, "grad_norm": 0.0006552730337716639, "learning_rate": 7.935850127732048e-06, "loss": 4.175864160060883e-06, "step": 324470 }, { "epoch": 92.10332103321034, "grad_norm": 0.0008723033242858946, "learning_rate": 7.933011637808686e-06, "loss": 3.234297037124634e-06, "step": 324480 }, { "epoch": 92.1061595231337, "grad_norm": 0.0008274954743683338, "learning_rate": 7.930173147885326e-06, "loss": 9.611062705516815e-06, "step": 324490 }, { "epoch": 92.10899801305705, "grad_norm": 0.0022728529293090105, "learning_rate": 7.927334657961964e-06, "loss": 6.2551349401473996e-06, "step": 324500 }, { "epoch": 92.10899801305705, "eval_accuracy": 0.988745469574617, "eval_loss": 0.050021182745695114, "eval_runtime": 35.4686, "eval_samples_per_second": 443.406, "eval_steps_per_second": 6.936, "step": 324500 }, { "epoch": 92.11183650298041, "grad_norm": 0.0003207511326763779, "learning_rate": 7.924496168038603e-06, "loss": 3.333576023578644e-06, "step": 324510 }, { "epoch": 92.11467499290377, "grad_norm": 0.0002590073272585869, "learning_rate": 7.921657678115243e-06, "loss": 3.1368806958198546e-06, "step": 324520 }, { "epoch": 92.11751348282714, "grad_norm": 0.0005960162379778922, "learning_rate": 7.918819188191883e-06, "loss": 8.02837312221527e-06, "step": 324530 }, { "epoch": 92.1203519727505, "grad_norm": 0.006125758867710829, "learning_rate": 7.91598069826852e-06, "loss": 4.557520151138306e-06, "step": 324540 }, { "epoch": 92.12319046267386, "grad_norm": 0.0015209122793748975, "learning_rate": 7.91314220834516e-06, "loss": 3.5818666219711304e-06, "step": 324550 }, { "epoch": 92.12602895259722, "grad_norm": 0.00020320323528721929, "learning_rate": 7.9103037184218e-06, "loss": 3.485940396785736e-06, "step": 324560 }, { "epoch": 92.12886744252057, "grad_norm": 0.000418935640482232, "learning_rate": 7.90746522849844e-06, "loss": 2.829916775226593e-06, "step": 324570 }, { "epoch": 92.13170593244394, "grad_norm": 0.0008202799945138395, "learning_rate": 7.90462673857508e-06, "loss": 3.5274773836135863e-06, "step": 324580 }, { "epoch": 92.1345444223673, "grad_norm": 0.0006000357680022717, "learning_rate": 7.901788248651717e-06, "loss": 3.363192081451416e-06, "step": 324590 }, { "epoch": 92.13738291229066, "grad_norm": 0.0034282044507563114, "learning_rate": 7.898949758728357e-06, "loss": 5.02225011587143e-06, "step": 324600 }, { "epoch": 92.14022140221402, "grad_norm": 0.0006244583637453616, "learning_rate": 7.896111268804997e-06, "loss": 3.0411407351493836e-06, "step": 324610 }, { "epoch": 92.14305989213739, "grad_norm": 0.0001738445571390912, "learning_rate": 7.893272778881636e-06, "loss": 3.909505903720856e-06, "step": 324620 }, { "epoch": 92.14589838206075, "grad_norm": 0.0002772035077214241, "learning_rate": 7.890434288958274e-06, "loss": 3.1141564249992372e-06, "step": 324630 }, { "epoch": 92.1487368719841, "grad_norm": 0.0009642314980737865, "learning_rate": 7.887595799034914e-06, "loss": 2.4743378162384035e-06, "step": 324640 }, { "epoch": 92.15157536190746, "grad_norm": 0.0004075063916388899, "learning_rate": 7.884757309111552e-06, "loss": 2.8582289814949036e-06, "step": 324650 }, { "epoch": 92.15441385183082, "grad_norm": 0.000373692688299343, "learning_rate": 7.881918819188193e-06, "loss": 3.536231815814972e-06, "step": 324660 }, { "epoch": 92.15725234175419, "grad_norm": 0.00042299521737731993, "learning_rate": 7.879080329264831e-06, "loss": 2.5061890482902527e-06, "step": 324670 }, { "epoch": 92.16009083167755, "grad_norm": 0.0006865213508717716, "learning_rate": 7.876241839341471e-06, "loss": 4.263222217559815e-06, "step": 324680 }, { "epoch": 92.16292932160091, "grad_norm": 0.0002328104746993631, "learning_rate": 7.873403349418109e-06, "loss": 3.6971643567085268e-06, "step": 324690 }, { "epoch": 92.16576781152428, "grad_norm": 0.0012992563424631953, "learning_rate": 7.870564859494749e-06, "loss": 2.7954578399658202e-06, "step": 324700 }, { "epoch": 92.16860630144762, "grad_norm": 0.012510690838098526, "learning_rate": 7.867726369571388e-06, "loss": 6.449967622756958e-06, "step": 324710 }, { "epoch": 92.17144479137099, "grad_norm": 0.00036079331766813993, "learning_rate": 7.864887879648028e-06, "loss": 5.7032331824302675e-06, "step": 324720 }, { "epoch": 92.17428328129435, "grad_norm": 0.0010411761468276381, "learning_rate": 7.862049389724666e-06, "loss": 4.0799379348754885e-06, "step": 324730 }, { "epoch": 92.17712177121771, "grad_norm": 0.0010284243617206812, "learning_rate": 7.859210899801306e-06, "loss": 3.5980716347694398e-06, "step": 324740 }, { "epoch": 92.17996026114108, "grad_norm": 0.0026600954588502645, "learning_rate": 7.856372409877945e-06, "loss": 7.027573883533478e-06, "step": 324750 }, { "epoch": 92.18279875106444, "grad_norm": 0.0006814827211201191, "learning_rate": 7.853533919954585e-06, "loss": 5.054287612438202e-06, "step": 324760 }, { "epoch": 92.18563724098779, "grad_norm": 0.00040183396777138114, "learning_rate": 7.850695430031225e-06, "loss": 2.559460699558258e-06, "step": 324770 }, { "epoch": 92.18847573091115, "grad_norm": 0.0009611390996724367, "learning_rate": 7.847856940107863e-06, "loss": 4.117004573345185e-06, "step": 324780 }, { "epoch": 92.19131422083451, "grad_norm": 0.00017503078561276197, "learning_rate": 7.845018450184502e-06, "loss": 1.1498108506202698e-05, "step": 324790 }, { "epoch": 92.19415271075788, "grad_norm": 0.0004239525296725333, "learning_rate": 7.84217996026114e-06, "loss": 3.3017247915267943e-06, "step": 324800 }, { "epoch": 92.19699120068124, "grad_norm": 0.0015556549187749624, "learning_rate": 7.839341470337782e-06, "loss": 3.8079917430877687e-06, "step": 324810 }, { "epoch": 92.1998296906046, "grad_norm": 0.0007745657931081951, "learning_rate": 7.83650298041442e-06, "loss": 4.045665264129639e-06, "step": 324820 }, { "epoch": 92.20266818052797, "grad_norm": 0.0005113408551551402, "learning_rate": 7.83366449049106e-06, "loss": 5.186907947063446e-06, "step": 324830 }, { "epoch": 92.20550667045131, "grad_norm": 0.0003825454623438418, "learning_rate": 7.830826000567698e-06, "loss": 3.948062658309936e-06, "step": 324840 }, { "epoch": 92.20834516037468, "grad_norm": 0.0010853884741663933, "learning_rate": 7.827987510644337e-06, "loss": 3.9279460906982425e-06, "step": 324850 }, { "epoch": 92.21118365029804, "grad_norm": 0.0006870453944429755, "learning_rate": 7.825149020720977e-06, "loss": 6.340257823467255e-06, "step": 324860 }, { "epoch": 92.2140221402214, "grad_norm": 0.0005506141460500658, "learning_rate": 7.822310530797617e-06, "loss": 3.351829946041107e-06, "step": 324870 }, { "epoch": 92.21686063014477, "grad_norm": 0.0006197824259288609, "learning_rate": 7.819472040874255e-06, "loss": 6.742030382156372e-06, "step": 324880 }, { "epoch": 92.21969912006813, "grad_norm": 0.000883868255186826, "learning_rate": 7.816633550950894e-06, "loss": 3.1813979148864746e-06, "step": 324890 }, { "epoch": 92.22253760999149, "grad_norm": 0.002396220341324806, "learning_rate": 7.813795061027534e-06, "loss": 4.31165099143982e-06, "step": 324900 }, { "epoch": 92.22537609991484, "grad_norm": 0.00010042813664767891, "learning_rate": 7.810956571104174e-06, "loss": 3.5462900996208193e-06, "step": 324910 }, { "epoch": 92.2282145898382, "grad_norm": 0.0004867525422014296, "learning_rate": 7.808118081180813e-06, "loss": 4.9121677875518795e-06, "step": 324920 }, { "epoch": 92.23105307976157, "grad_norm": 0.0008312897989526391, "learning_rate": 7.805279591257451e-06, "loss": 2.724863588809967e-06, "step": 324930 }, { "epoch": 92.23389156968493, "grad_norm": 0.00040872948011383414, "learning_rate": 7.802441101334091e-06, "loss": 3.2553449273109437e-06, "step": 324940 }, { "epoch": 92.23673005960829, "grad_norm": 0.007090521045029163, "learning_rate": 7.799602611410729e-06, "loss": 4.663504660129547e-06, "step": 324950 }, { "epoch": 92.23956854953165, "grad_norm": 0.00020996220700908452, "learning_rate": 7.79676412148737e-06, "loss": 2.5179237127304076e-06, "step": 324960 }, { "epoch": 92.242407039455, "grad_norm": 0.0012310497695580125, "learning_rate": 7.793925631564008e-06, "loss": 3.0197203159332276e-06, "step": 324970 }, { "epoch": 92.24524552937837, "grad_norm": 0.0006959029706194997, "learning_rate": 7.791087141640648e-06, "loss": 3.150478005409241e-06, "step": 324980 }, { "epoch": 92.24808401930173, "grad_norm": 0.0006318659870885313, "learning_rate": 7.788248651717286e-06, "loss": 2.349168062210083e-06, "step": 324990 }, { "epoch": 92.25092250922509, "grad_norm": 0.006376994773745537, "learning_rate": 7.785410161793926e-06, "loss": 5.340203642845154e-06, "step": 325000 }, { "epoch": 92.25092250922509, "eval_accuracy": 0.9891269790805621, "eval_loss": 0.04883307218551636, "eval_runtime": 37.1405, "eval_samples_per_second": 423.446, "eval_steps_per_second": 6.624, "step": 325000 }, { "epoch": 92.25376099914845, "grad_norm": 0.0004480214847717434, "learning_rate": 7.782571671870565e-06, "loss": 4.086457192897797e-06, "step": 325010 }, { "epoch": 92.25659948907182, "grad_norm": 0.0015302429674193263, "learning_rate": 7.779733181947205e-06, "loss": 6.218254566192627e-06, "step": 325020 }, { "epoch": 92.25943797899518, "grad_norm": 0.0016199384117498994, "learning_rate": 7.776894692023843e-06, "loss": 3.5488978028297423e-06, "step": 325030 }, { "epoch": 92.26227646891853, "grad_norm": 0.0350252166390419, "learning_rate": 7.774056202100483e-06, "loss": 8.867867290973663e-06, "step": 325040 }, { "epoch": 92.26511495884189, "grad_norm": 0.00016786584455985576, "learning_rate": 7.771217712177122e-06, "loss": 4.609301686286926e-06, "step": 325050 }, { "epoch": 92.26795344876525, "grad_norm": 0.0018220614874735475, "learning_rate": 7.768379222253762e-06, "loss": 6.421469151973724e-06, "step": 325060 }, { "epoch": 92.27079193868862, "grad_norm": 0.0033944109454751015, "learning_rate": 7.7655407323304e-06, "loss": 4.14922833442688e-06, "step": 325070 }, { "epoch": 92.27363042861198, "grad_norm": 0.001005823607556522, "learning_rate": 7.76270224240704e-06, "loss": 5.461648106575012e-06, "step": 325080 }, { "epoch": 92.27646891853534, "grad_norm": 0.0003054535773117095, "learning_rate": 7.75986375248368e-06, "loss": 3.0444934964179993e-06, "step": 325090 }, { "epoch": 92.2793074084587, "grad_norm": 0.00018944188195746392, "learning_rate": 7.757025262560317e-06, "loss": 4.347041249275208e-06, "step": 325100 }, { "epoch": 92.28214589838205, "grad_norm": 0.00023084440908860415, "learning_rate": 7.754186772636959e-06, "loss": 2.5918707251548767e-06, "step": 325110 }, { "epoch": 92.28498438830542, "grad_norm": 0.0009279469959437847, "learning_rate": 7.751348282713597e-06, "loss": 3.973580896854401e-06, "step": 325120 }, { "epoch": 92.28782287822878, "grad_norm": 0.0004507310513872653, "learning_rate": 7.748509792790236e-06, "loss": 5.962513387203216e-06, "step": 325130 }, { "epoch": 92.29066136815214, "grad_norm": 0.0006927153444848955, "learning_rate": 7.745671302866874e-06, "loss": 4.91514801979065e-06, "step": 325140 }, { "epoch": 92.2934998580755, "grad_norm": 0.0021702179219573736, "learning_rate": 7.742832812943514e-06, "loss": 7.106736302375794e-06, "step": 325150 }, { "epoch": 92.29633834799887, "grad_norm": 0.0009792196797206998, "learning_rate": 7.739994323020154e-06, "loss": 4.320405423641205e-06, "step": 325160 }, { "epoch": 92.29917683792223, "grad_norm": 0.0002557082043495029, "learning_rate": 7.737155833096793e-06, "loss": 2.997368574142456e-06, "step": 325170 }, { "epoch": 92.30201532784558, "grad_norm": 5.8447531046113e-05, "learning_rate": 7.734317343173431e-06, "loss": 6.83758407831192e-06, "step": 325180 }, { "epoch": 92.30485381776894, "grad_norm": 0.000563454523216933, "learning_rate": 7.731478853250071e-06, "loss": 3.684498369693756e-06, "step": 325190 }, { "epoch": 92.3076923076923, "grad_norm": 0.00035046780249103904, "learning_rate": 7.728640363326709e-06, "loss": 6.51087611913681e-06, "step": 325200 }, { "epoch": 92.31053079761567, "grad_norm": 0.00043795310193672776, "learning_rate": 7.72580187340335e-06, "loss": 4.016794264316559e-06, "step": 325210 }, { "epoch": 92.31336928753903, "grad_norm": 0.0021422572899609804, "learning_rate": 7.722963383479988e-06, "loss": 3.4594908356666564e-06, "step": 325220 }, { "epoch": 92.3162077774624, "grad_norm": 0.0012470680521801114, "learning_rate": 7.720124893556628e-06, "loss": 3.660470247268677e-06, "step": 325230 }, { "epoch": 92.31904626738574, "grad_norm": 0.00027171135297976434, "learning_rate": 7.717286403633268e-06, "loss": 5.6486576795578e-06, "step": 325240 }, { "epoch": 92.3218847573091, "grad_norm": 0.0005359523347578943, "learning_rate": 7.714447913709907e-06, "loss": 1.955777406692505e-06, "step": 325250 }, { "epoch": 92.32472324723247, "grad_norm": 0.0003862667945213616, "learning_rate": 7.711609423786547e-06, "loss": 3.0800700187683104e-06, "step": 325260 }, { "epoch": 92.32756173715583, "grad_norm": 0.0008734880830161273, "learning_rate": 7.708770933863185e-06, "loss": 3.363005816936493e-06, "step": 325270 }, { "epoch": 92.3304002270792, "grad_norm": 0.0014157273108139634, "learning_rate": 7.705932443939825e-06, "loss": 8.168071508407592e-06, "step": 325280 }, { "epoch": 92.33323871700256, "grad_norm": 0.0022896307054907084, "learning_rate": 7.703093954016463e-06, "loss": 4.093535244464874e-06, "step": 325290 }, { "epoch": 92.33607720692592, "grad_norm": 0.0028923312202095985, "learning_rate": 7.700255464093104e-06, "loss": 5.0066038966178896e-06, "step": 325300 }, { "epoch": 92.33891569684927, "grad_norm": 0.0007199349929578602, "learning_rate": 7.697416974169742e-06, "loss": 3.993883728981018e-06, "step": 325310 }, { "epoch": 92.34175418677263, "grad_norm": 0.0007735484978184104, "learning_rate": 7.694578484246382e-06, "loss": 4.693865776062012e-06, "step": 325320 }, { "epoch": 92.344592676696, "grad_norm": 0.0005501931300386786, "learning_rate": 7.69173999432302e-06, "loss": 4.777684807777405e-06, "step": 325330 }, { "epoch": 92.34743116661936, "grad_norm": 0.00019068609981331974, "learning_rate": 7.68890150439966e-06, "loss": 3.1441450119018555e-06, "step": 325340 }, { "epoch": 92.35026965654272, "grad_norm": 0.002211238257586956, "learning_rate": 7.6860630144763e-06, "loss": 5.2228569984436035e-06, "step": 325350 }, { "epoch": 92.35310814646608, "grad_norm": 0.0007874540751799941, "learning_rate": 7.683224524552939e-06, "loss": 3.1564384698867796e-06, "step": 325360 }, { "epoch": 92.35594663638945, "grad_norm": 0.0006200427305884659, "learning_rate": 7.680386034629577e-06, "loss": 4.060007631778717e-06, "step": 325370 }, { "epoch": 92.3587851263128, "grad_norm": 0.0006583190988749266, "learning_rate": 7.677547544706217e-06, "loss": 3.6694109439849854e-06, "step": 325380 }, { "epoch": 92.36162361623616, "grad_norm": 0.0008602732559666038, "learning_rate": 7.674709054782856e-06, "loss": 6.002746522426605e-06, "step": 325390 }, { "epoch": 92.36446210615952, "grad_norm": 0.00025629717856645584, "learning_rate": 7.671870564859496e-06, "loss": 3.8532540202140805e-06, "step": 325400 }, { "epoch": 92.36730059608288, "grad_norm": 0.004634337965399027, "learning_rate": 7.669032074936134e-06, "loss": 3.5015866160392763e-06, "step": 325410 }, { "epoch": 92.37013908600625, "grad_norm": 0.00046594085870310664, "learning_rate": 7.666193585012774e-06, "loss": 2.8479844331741333e-06, "step": 325420 }, { "epoch": 92.37297757592961, "grad_norm": 0.0007905500242486596, "learning_rate": 7.663355095089413e-06, "loss": 2.170167863368988e-06, "step": 325430 }, { "epoch": 92.37581606585297, "grad_norm": 0.0003480382729321718, "learning_rate": 7.660516605166051e-06, "loss": 4.123337566852569e-06, "step": 325440 }, { "epoch": 92.37865455577632, "grad_norm": 0.0030086582992225885, "learning_rate": 7.657678115242693e-06, "loss": 3.108195960521698e-06, "step": 325450 }, { "epoch": 92.38149304569968, "grad_norm": 0.004972447641193867, "learning_rate": 7.65483962531933e-06, "loss": 4.9406662583351135e-06, "step": 325460 }, { "epoch": 92.38433153562305, "grad_norm": 0.006641657091677189, "learning_rate": 7.65200113539597e-06, "loss": 7.83279538154602e-06, "step": 325470 }, { "epoch": 92.38717002554641, "grad_norm": 0.0007253661169670522, "learning_rate": 7.649162645472608e-06, "loss": 5.6158751249313354e-06, "step": 325480 }, { "epoch": 92.39000851546977, "grad_norm": 0.0003197613114025444, "learning_rate": 7.646324155549248e-06, "loss": 3.967620432376861e-06, "step": 325490 }, { "epoch": 92.39284700539314, "grad_norm": 0.0003356664383318275, "learning_rate": 7.643485665625888e-06, "loss": 2.7192756533622743e-06, "step": 325500 }, { "epoch": 92.39284700539314, "eval_accuracy": 0.988872639409932, "eval_loss": 0.04822723567485809, "eval_runtime": 35.817, "eval_samples_per_second": 439.093, "eval_steps_per_second": 6.868, "step": 325500 }, { "epoch": 92.39568549531649, "grad_norm": 0.0011308512184768915, "learning_rate": 7.640647175702527e-06, "loss": 8.084066212177277e-06, "step": 325510 }, { "epoch": 92.39852398523985, "grad_norm": 0.003616195870563388, "learning_rate": 7.637808685779165e-06, "loss": 3.197602927684784e-06, "step": 325520 }, { "epoch": 92.40136247516321, "grad_norm": 0.00034760552807711065, "learning_rate": 7.634970195855805e-06, "loss": 5.911663174629212e-06, "step": 325530 }, { "epoch": 92.40420096508657, "grad_norm": 0.0006835647509433329, "learning_rate": 7.632131705932443e-06, "loss": 4.456937313079834e-06, "step": 325540 }, { "epoch": 92.40703945500994, "grad_norm": 0.0023675851989537477, "learning_rate": 7.629293216009084e-06, "loss": 4.959851503372193e-06, "step": 325550 }, { "epoch": 92.4098779449333, "grad_norm": 0.0009684219839982688, "learning_rate": 7.626454726085723e-06, "loss": 4.043243825435638e-06, "step": 325560 }, { "epoch": 92.41271643485666, "grad_norm": 0.002579600317403674, "learning_rate": 7.623616236162362e-06, "loss": 8.564256131649017e-06, "step": 325570 }, { "epoch": 92.41555492478001, "grad_norm": 0.0003105259092990309, "learning_rate": 7.620777746239001e-06, "loss": 4.198774695396423e-06, "step": 325580 }, { "epoch": 92.41839341470337, "grad_norm": 0.004174713511019945, "learning_rate": 7.61793925631564e-06, "loss": 5.28339296579361e-06, "step": 325590 }, { "epoch": 92.42123190462674, "grad_norm": 0.0002973835216835141, "learning_rate": 7.61510076639228e-06, "loss": 3.6817044019699095e-06, "step": 325600 }, { "epoch": 92.4240703945501, "grad_norm": 0.0005691437982022762, "learning_rate": 7.612262276468919e-06, "loss": 3.4341588616371156e-06, "step": 325610 }, { "epoch": 92.42690888447346, "grad_norm": 0.0007546168635599315, "learning_rate": 7.609423786545558e-06, "loss": 3.3231452107429503e-06, "step": 325620 }, { "epoch": 92.42974737439683, "grad_norm": 0.0005304287187755108, "learning_rate": 7.606585296622197e-06, "loss": 3.4462660551071168e-06, "step": 325630 }, { "epoch": 92.43258586432019, "grad_norm": 0.000200504349777475, "learning_rate": 7.603746806698836e-06, "loss": 2.652779221534729e-06, "step": 325640 }, { "epoch": 92.43542435424354, "grad_norm": 0.0005984199233353138, "learning_rate": 7.600908316775476e-06, "loss": 2.42721289396286e-06, "step": 325650 }, { "epoch": 92.4382628441669, "grad_norm": 0.0022832327522337437, "learning_rate": 7.598069826852115e-06, "loss": 6.336718797683716e-06, "step": 325660 }, { "epoch": 92.44110133409026, "grad_norm": 0.0005349685088731349, "learning_rate": 7.5952313369287546e-06, "loss": 5.114264786243438e-06, "step": 325670 }, { "epoch": 92.44393982401363, "grad_norm": 0.00026106240693479776, "learning_rate": 7.592392847005393e-06, "loss": 6.4998865127563475e-06, "step": 325680 }, { "epoch": 92.44677831393699, "grad_norm": 0.0018067271448671818, "learning_rate": 7.589554357082032e-06, "loss": 6.813555955886841e-06, "step": 325690 }, { "epoch": 92.44961680386035, "grad_norm": 0.0018263317178934813, "learning_rate": 7.586715867158673e-06, "loss": 4.55360859632492e-06, "step": 325700 }, { "epoch": 92.4524552937837, "grad_norm": 0.00034692377084866166, "learning_rate": 7.583877377235312e-06, "loss": 2.783164381980896e-06, "step": 325710 }, { "epoch": 92.45529378370706, "grad_norm": 0.0015708960127085447, "learning_rate": 7.5810388873119504e-06, "loss": 6.698630750179291e-06, "step": 325720 }, { "epoch": 92.45813227363043, "grad_norm": 0.0009612305439077318, "learning_rate": 7.578200397388589e-06, "loss": 3.821961581707001e-06, "step": 325730 }, { "epoch": 92.46097076355379, "grad_norm": 0.0001655697269598022, "learning_rate": 7.575361907465228e-06, "loss": 4.653632640838623e-06, "step": 325740 }, { "epoch": 92.46380925347715, "grad_norm": 0.0005424663540907204, "learning_rate": 7.572523417541869e-06, "loss": 5.130469799041748e-06, "step": 325750 }, { "epoch": 92.46664774340051, "grad_norm": 0.0013559767976403236, "learning_rate": 7.5696849276185075e-06, "loss": 4.1905790567398075e-06, "step": 325760 }, { "epoch": 92.46948623332388, "grad_norm": 0.0008325795060954988, "learning_rate": 7.566846437695146e-06, "loss": 3.36524099111557e-06, "step": 325770 }, { "epoch": 92.47232472324723, "grad_norm": 0.0005924568395130336, "learning_rate": 7.564007947771785e-06, "loss": 3.1348317861557007e-06, "step": 325780 }, { "epoch": 92.47516321317059, "grad_norm": 0.0007061014184728265, "learning_rate": 7.561169457848424e-06, "loss": 4.516169428825378e-06, "step": 325790 }, { "epoch": 92.47800170309395, "grad_norm": 0.0013046854874119163, "learning_rate": 7.5583309679250645e-06, "loss": 2.9454007744789123e-06, "step": 325800 }, { "epoch": 92.48084019301731, "grad_norm": 0.0005745934904552996, "learning_rate": 7.555492478001703e-06, "loss": 3.158673644065857e-06, "step": 325810 }, { "epoch": 92.48367868294068, "grad_norm": 0.00023683143081143498, "learning_rate": 7.552653988078342e-06, "loss": 5.073286592960357e-06, "step": 325820 }, { "epoch": 92.48651717286404, "grad_norm": 0.00013665140431839973, "learning_rate": 7.549815498154982e-06, "loss": 2.298504114151001e-06, "step": 325830 }, { "epoch": 92.4893556627874, "grad_norm": 0.0004969117580913007, "learning_rate": 7.546977008231621e-06, "loss": 3.6282464861869814e-06, "step": 325840 }, { "epoch": 92.49219415271075, "grad_norm": 0.0006564201903529465, "learning_rate": 7.544138518308261e-06, "loss": 9.267404675483703e-06, "step": 325850 }, { "epoch": 92.49503264263411, "grad_norm": 0.00042287088581360877, "learning_rate": 7.5413000283849e-06, "loss": 2.715364098548889e-06, "step": 325860 }, { "epoch": 92.49787113255748, "grad_norm": 0.008580234833061695, "learning_rate": 7.538461538461539e-06, "loss": 5.170144140720367e-06, "step": 325870 }, { "epoch": 92.50070962248084, "grad_norm": 0.0005547943874262273, "learning_rate": 7.535623048538178e-06, "loss": 6.993860006332398e-06, "step": 325880 }, { "epoch": 92.5035481124042, "grad_norm": 0.000964031380135566, "learning_rate": 7.532784558614818e-06, "loss": 4.354491829872132e-06, "step": 325890 }, { "epoch": 92.50638660232757, "grad_norm": 0.007654332555830479, "learning_rate": 7.529946068691457e-06, "loss": 4.931539297103882e-06, "step": 325900 }, { "epoch": 92.50922509225093, "grad_norm": 0.0005839161458425224, "learning_rate": 7.527107578768096e-06, "loss": 2.5387853384017944e-06, "step": 325910 }, { "epoch": 92.51206358217428, "grad_norm": 0.002503079129382968, "learning_rate": 7.524269088844735e-06, "loss": 4.972703754901886e-06, "step": 325920 }, { "epoch": 92.51490207209764, "grad_norm": 0.0004512862942647189, "learning_rate": 7.5214305989213736e-06, "loss": 2.5840476155281066e-06, "step": 325930 }, { "epoch": 92.517740562021, "grad_norm": 0.0018589745741337538, "learning_rate": 7.518592108998014e-06, "loss": 3.949739038944244e-06, "step": 325940 }, { "epoch": 92.52057905194437, "grad_norm": 0.002100280486047268, "learning_rate": 7.515753619074653e-06, "loss": 4.127435386180878e-06, "step": 325950 }, { "epoch": 92.52341754186773, "grad_norm": 0.00047564131091348827, "learning_rate": 7.512915129151292e-06, "loss": 4.706345498561859e-06, "step": 325960 }, { "epoch": 92.52625603179109, "grad_norm": 0.0016324337339028716, "learning_rate": 7.510076639227931e-06, "loss": 4.525110125541687e-06, "step": 325970 }, { "epoch": 92.52909452171444, "grad_norm": 0.001943649840541184, "learning_rate": 7.50723814930457e-06, "loss": 4.841573536396027e-06, "step": 325980 }, { "epoch": 92.5319330116378, "grad_norm": 0.0005805761902593076, "learning_rate": 7.50439965938121e-06, "loss": 3.180094063282013e-06, "step": 325990 }, { "epoch": 92.53477150156117, "grad_norm": 0.0006328141316771507, "learning_rate": 7.501561169457849e-06, "loss": 4.069693386554718e-06, "step": 326000 }, { "epoch": 92.53477150156117, "eval_accuracy": 0.9889362243275895, "eval_loss": 0.04853489249944687, "eval_runtime": 40.7867, "eval_samples_per_second": 385.591, "eval_steps_per_second": 6.031, "step": 326000 }, { "epoch": 92.53760999148453, "grad_norm": 0.000752240710426122, "learning_rate": 7.4987226795344885e-06, "loss": 3.3235177397727965e-06, "step": 326010 }, { "epoch": 92.54044848140789, "grad_norm": 0.002538904082030058, "learning_rate": 7.495884189611127e-06, "loss": 5.433149635791779e-06, "step": 326020 }, { "epoch": 92.54328697133126, "grad_norm": 8.8940178102348e-05, "learning_rate": 7.493045699687766e-06, "loss": 5.9142708778381344e-06, "step": 326030 }, { "epoch": 92.54612546125462, "grad_norm": 0.0007794866105541587, "learning_rate": 7.490207209764407e-06, "loss": 4.212558269500732e-06, "step": 326040 }, { "epoch": 92.54896395117797, "grad_norm": 0.0017210396472364664, "learning_rate": 7.4873687198410455e-06, "loss": 4.834495484828949e-06, "step": 326050 }, { "epoch": 92.55180244110133, "grad_norm": 0.000868257659021765, "learning_rate": 7.484530229917684e-06, "loss": 5.2746385335922245e-06, "step": 326060 }, { "epoch": 92.5546409310247, "grad_norm": 0.0016403425252065063, "learning_rate": 7.481691739994323e-06, "loss": 3.855302929878235e-06, "step": 326070 }, { "epoch": 92.55747942094806, "grad_norm": 0.01239804644137621, "learning_rate": 7.478853250070962e-06, "loss": 5.993247032165527e-06, "step": 326080 }, { "epoch": 92.56031791087142, "grad_norm": 0.0015111194225028157, "learning_rate": 7.4760147601476025e-06, "loss": 3.7647783756256104e-06, "step": 326090 }, { "epoch": 92.56315640079478, "grad_norm": 0.0008169658831320703, "learning_rate": 7.473176270224241e-06, "loss": 2.806633710861206e-06, "step": 326100 }, { "epoch": 92.56599489071814, "grad_norm": 0.0004530665755737573, "learning_rate": 7.47033778030088e-06, "loss": 6.05657696723938e-06, "step": 326110 }, { "epoch": 92.5688333806415, "grad_norm": 0.0011769579723477364, "learning_rate": 7.467499290377519e-06, "loss": 5.049258470535279e-06, "step": 326120 }, { "epoch": 92.57167187056486, "grad_norm": 0.0016476558521389961, "learning_rate": 7.464660800454158e-06, "loss": 3.341957926750183e-06, "step": 326130 }, { "epoch": 92.57451036048822, "grad_norm": 0.00019302102737128735, "learning_rate": 7.461822310530798e-06, "loss": 3.105774521827698e-06, "step": 326140 }, { "epoch": 92.57734885041158, "grad_norm": 0.00014531424676533788, "learning_rate": 7.458983820607437e-06, "loss": 3.201514482498169e-06, "step": 326150 }, { "epoch": 92.58018734033494, "grad_norm": 0.0027867495082318783, "learning_rate": 7.456145330684076e-06, "loss": 3.7983059883117677e-06, "step": 326160 }, { "epoch": 92.58302583025831, "grad_norm": 7.54006759962067e-05, "learning_rate": 7.453306840760716e-06, "loss": 3.328733146190643e-06, "step": 326170 }, { "epoch": 92.58586432018166, "grad_norm": 0.0012076575076207519, "learning_rate": 7.4504683508373546e-06, "loss": 4.313513636589051e-06, "step": 326180 }, { "epoch": 92.58870281010502, "grad_norm": 0.0008090232731774449, "learning_rate": 7.447629860913994e-06, "loss": 3.5163015127182006e-06, "step": 326190 }, { "epoch": 92.59154130002838, "grad_norm": 0.0005740104243159294, "learning_rate": 7.444791370990634e-06, "loss": 2.9494985938072205e-06, "step": 326200 }, { "epoch": 92.59437978995174, "grad_norm": 0.001096365856938064, "learning_rate": 7.441952881067273e-06, "loss": 3.0837953090667723e-06, "step": 326210 }, { "epoch": 92.59721827987511, "grad_norm": 0.001442846842110157, "learning_rate": 7.439114391143912e-06, "loss": 4.213117063045502e-06, "step": 326220 }, { "epoch": 92.60005676979847, "grad_norm": 0.0010705855675041676, "learning_rate": 7.4362759012205504e-06, "loss": 3.678351640701294e-06, "step": 326230 }, { "epoch": 92.60289525972183, "grad_norm": 0.0029015878681093454, "learning_rate": 7.433437411297191e-06, "loss": 3.5742297768592836e-06, "step": 326240 }, { "epoch": 92.60573374964518, "grad_norm": 0.00086127471877262, "learning_rate": 7.43059892137383e-06, "loss": 5.004741251468659e-06, "step": 326250 }, { "epoch": 92.60857223956855, "grad_norm": 0.0006389493937604129, "learning_rate": 7.427760431450469e-06, "loss": 4.094839096069336e-06, "step": 326260 }, { "epoch": 92.61141072949191, "grad_norm": 0.0028444037307053804, "learning_rate": 7.4249219415271075e-06, "loss": 4.124268889427185e-06, "step": 326270 }, { "epoch": 92.61424921941527, "grad_norm": 0.007276387419551611, "learning_rate": 7.422083451603746e-06, "loss": 4.553794860839844e-06, "step": 326280 }, { "epoch": 92.61708770933863, "grad_norm": 0.0034092990681529045, "learning_rate": 7.419244961680387e-06, "loss": 4.385411739349365e-06, "step": 326290 }, { "epoch": 92.619926199262, "grad_norm": 0.0004595777718350291, "learning_rate": 7.416406471757026e-06, "loss": 6.700493395328522e-06, "step": 326300 }, { "epoch": 92.62276468918536, "grad_norm": 0.0018662711372599006, "learning_rate": 7.4135679818336645e-06, "loss": 8.871033787727357e-06, "step": 326310 }, { "epoch": 92.62560317910871, "grad_norm": 0.0009576238808222115, "learning_rate": 7.410729491910303e-06, "loss": 3.5103410482406615e-06, "step": 326320 }, { "epoch": 92.62844166903207, "grad_norm": 0.0014838179340586066, "learning_rate": 7.407891001986943e-06, "loss": 4.128739237785339e-06, "step": 326330 }, { "epoch": 92.63128015895543, "grad_norm": 0.0004890100099146366, "learning_rate": 7.405052512063583e-06, "loss": 3.353878855705261e-06, "step": 326340 }, { "epoch": 92.6341186488788, "grad_norm": 0.0006986029329709709, "learning_rate": 7.402214022140222e-06, "loss": 7.178820669651031e-06, "step": 326350 }, { "epoch": 92.63695713880216, "grad_norm": 0.009119748137891293, "learning_rate": 7.399375532216861e-06, "loss": 5.347467958927155e-06, "step": 326360 }, { "epoch": 92.63979562872552, "grad_norm": 0.0019595827907323837, "learning_rate": 7.3965370422935e-06, "loss": 4.080124199390411e-06, "step": 326370 }, { "epoch": 92.64263411864889, "grad_norm": 0.0002254994324175641, "learning_rate": 7.393698552370139e-06, "loss": 3.944709897041321e-06, "step": 326380 }, { "epoch": 92.64547260857223, "grad_norm": 0.00015033491945359856, "learning_rate": 7.390860062446779e-06, "loss": 4.502944648265839e-06, "step": 326390 }, { "epoch": 92.6483110984956, "grad_norm": 0.0012498329160735011, "learning_rate": 7.388021572523418e-06, "loss": 5.064532160758972e-06, "step": 326400 }, { "epoch": 92.65114958841896, "grad_norm": 0.0023469976149499416, "learning_rate": 7.385183082600057e-06, "loss": 4.4995918869972226e-06, "step": 326410 }, { "epoch": 92.65398807834232, "grad_norm": 0.0004018719191662967, "learning_rate": 7.382344592676696e-06, "loss": 3.5267323255538942e-06, "step": 326420 }, { "epoch": 92.65682656826569, "grad_norm": 0.002761700190603733, "learning_rate": 7.379506102753335e-06, "loss": 5.843117833137512e-06, "step": 326430 }, { "epoch": 92.65966505818905, "grad_norm": 0.00016927112301345915, "learning_rate": 7.376667612829975e-06, "loss": 5.824118852615357e-06, "step": 326440 }, { "epoch": 92.6625035481124, "grad_norm": 0.002450367668643594, "learning_rate": 7.373829122906614e-06, "loss": 3.6407262086868284e-06, "step": 326450 }, { "epoch": 92.66534203803576, "grad_norm": 0.0007714630337432027, "learning_rate": 7.370990632983253e-06, "loss": 3.1797215342521666e-06, "step": 326460 }, { "epoch": 92.66818052795912, "grad_norm": 0.0125052435323596, "learning_rate": 7.368152143059892e-06, "loss": 5.652196705341339e-06, "step": 326470 }, { "epoch": 92.67101901788249, "grad_norm": 0.0005443257396109402, "learning_rate": 7.3653136531365314e-06, "loss": 2.5311484932899477e-06, "step": 326480 }, { "epoch": 92.67385750780585, "grad_norm": 0.000740674848202616, "learning_rate": 7.362475163213171e-06, "loss": 4.633329808712006e-06, "step": 326490 }, { "epoch": 92.67669599772921, "grad_norm": 0.0010648444294929504, "learning_rate": 7.35963667328981e-06, "loss": 3.3367425203323366e-06, "step": 326500 }, { "epoch": 92.67669599772921, "eval_accuracy": 0.9892541489158772, "eval_loss": 0.048041533678770065, "eval_runtime": 39.3076, "eval_samples_per_second": 400.101, "eval_steps_per_second": 6.258, "step": 326500 }, { "epoch": 92.67953448765257, "grad_norm": 0.005241051781922579, "learning_rate": 7.35679818336645e-06, "loss": 5.161762237548828e-06, "step": 326510 }, { "epoch": 92.68237297757592, "grad_norm": 0.004507491830736399, "learning_rate": 7.3539596934430885e-06, "loss": 6.744079291820526e-06, "step": 326520 }, { "epoch": 92.68521146749929, "grad_norm": 0.0012184755178168416, "learning_rate": 7.351121203519728e-06, "loss": 3.3404678106307985e-06, "step": 326530 }, { "epoch": 92.68804995742265, "grad_norm": 0.002314477227628231, "learning_rate": 7.348282713596368e-06, "loss": 4.26117330789566e-06, "step": 326540 }, { "epoch": 92.69088844734601, "grad_norm": 0.0001699354179436341, "learning_rate": 7.345444223673007e-06, "loss": 3.5198405385017396e-06, "step": 326550 }, { "epoch": 92.69372693726937, "grad_norm": 0.002966145519167185, "learning_rate": 7.3426057337496455e-06, "loss": 3.9745122194290165e-06, "step": 326560 }, { "epoch": 92.69656542719274, "grad_norm": 0.0012191524729132652, "learning_rate": 7.339767243826284e-06, "loss": 4.564784467220306e-06, "step": 326570 }, { "epoch": 92.6994039171161, "grad_norm": 0.0016554601024836302, "learning_rate": 7.336928753902925e-06, "loss": 4.14382666349411e-06, "step": 326580 }, { "epoch": 92.70224240703945, "grad_norm": 0.0002648691588547081, "learning_rate": 7.334090263979564e-06, "loss": 2.5754794478416444e-06, "step": 326590 }, { "epoch": 92.70508089696281, "grad_norm": 0.00014839918003417552, "learning_rate": 7.3312517740562025e-06, "loss": 2.590566873550415e-06, "step": 326600 }, { "epoch": 92.70791938688618, "grad_norm": 0.0021282352972775698, "learning_rate": 7.328413284132841e-06, "loss": 2.9401853680610657e-06, "step": 326610 }, { "epoch": 92.71075787680954, "grad_norm": 0.0005564700113609433, "learning_rate": 7.32557479420948e-06, "loss": 3.290921449661255e-06, "step": 326620 }, { "epoch": 92.7135963667329, "grad_norm": 0.00046878066495992243, "learning_rate": 7.322736304286121e-06, "loss": 4.534609615802765e-06, "step": 326630 }, { "epoch": 92.71643485665626, "grad_norm": 0.0004890374839305878, "learning_rate": 7.3198978143627596e-06, "loss": 2.5276094675064087e-06, "step": 326640 }, { "epoch": 92.71927334657963, "grad_norm": 0.002092501148581505, "learning_rate": 7.317059324439398e-06, "loss": 6.5069645643234255e-06, "step": 326650 }, { "epoch": 92.72211183650298, "grad_norm": 0.0003223152307327837, "learning_rate": 7.314220834516037e-06, "loss": 4.391372203826904e-06, "step": 326660 }, { "epoch": 92.72495032642634, "grad_norm": 0.0016927493270486593, "learning_rate": 7.311382344592677e-06, "loss": 4.0512531995773315e-06, "step": 326670 }, { "epoch": 92.7277888163497, "grad_norm": 0.0005361161311157048, "learning_rate": 7.308543854669317e-06, "loss": 3.6338344216346742e-06, "step": 326680 }, { "epoch": 92.73062730627306, "grad_norm": 0.0029837568290531635, "learning_rate": 7.305705364745956e-06, "loss": 2.9703602194786073e-06, "step": 326690 }, { "epoch": 92.73346579619643, "grad_norm": 0.0010090004652738571, "learning_rate": 7.302866874822595e-06, "loss": 3.993697464466095e-06, "step": 326700 }, { "epoch": 92.73630428611979, "grad_norm": 0.0003237126802559942, "learning_rate": 7.300028384899234e-06, "loss": 2.346746623516083e-06, "step": 326710 }, { "epoch": 92.73914277604314, "grad_norm": 0.0002533789083827287, "learning_rate": 7.297189894975873e-06, "loss": 4.356354475021363e-06, "step": 326720 }, { "epoch": 92.7419812659665, "grad_norm": 0.00033705829991959035, "learning_rate": 7.294351405052513e-06, "loss": 4.487484693527221e-06, "step": 326730 }, { "epoch": 92.74481975588986, "grad_norm": 0.0007006701780483127, "learning_rate": 7.291512915129152e-06, "loss": 6.0284510254859924e-06, "step": 326740 }, { "epoch": 92.74765824581323, "grad_norm": 0.003996529150754213, "learning_rate": 7.288674425205791e-06, "loss": 3.392435610294342e-06, "step": 326750 }, { "epoch": 92.75049673573659, "grad_norm": 0.000667845131829381, "learning_rate": 7.28583593528243e-06, "loss": 3.5276636481285097e-06, "step": 326760 }, { "epoch": 92.75333522565995, "grad_norm": 0.00038486492121592164, "learning_rate": 7.282997445359069e-06, "loss": 4.220753908157349e-06, "step": 326770 }, { "epoch": 92.75617371558332, "grad_norm": 0.00014906776777934283, "learning_rate": 7.280158955435709e-06, "loss": 2.6673078536987305e-06, "step": 326780 }, { "epoch": 92.75901220550666, "grad_norm": 0.00040621383232064545, "learning_rate": 7.277320465512348e-06, "loss": 2.353079617023468e-06, "step": 326790 }, { "epoch": 92.76185069543003, "grad_norm": 0.0008664041524752975, "learning_rate": 7.274481975588987e-06, "loss": 4.463642835617066e-06, "step": 326800 }, { "epoch": 92.76468918535339, "grad_norm": 0.0002571087097749114, "learning_rate": 7.271643485665626e-06, "loss": 3.886036574840546e-06, "step": 326810 }, { "epoch": 92.76752767527675, "grad_norm": 0.0009218360064551234, "learning_rate": 7.268804995742265e-06, "loss": 4.2296946048736576e-06, "step": 326820 }, { "epoch": 92.77036616520012, "grad_norm": 0.0002857391373254359, "learning_rate": 7.265966505818905e-06, "loss": 5.47952950000763e-06, "step": 326830 }, { "epoch": 92.77320465512348, "grad_norm": 0.0026685709599405527, "learning_rate": 7.263128015895544e-06, "loss": 3.1458213925361635e-06, "step": 326840 }, { "epoch": 92.77604314504684, "grad_norm": 0.005820579826831818, "learning_rate": 7.2602895259721835e-06, "loss": 5.068257451057434e-06, "step": 326850 }, { "epoch": 92.77888163497019, "grad_norm": 0.0036681981291621923, "learning_rate": 7.257451036048822e-06, "loss": 4.171952605247498e-06, "step": 326860 }, { "epoch": 92.78172012489355, "grad_norm": 0.0007345177582465112, "learning_rate": 7.254612546125461e-06, "loss": 3.196485340595245e-06, "step": 326870 }, { "epoch": 92.78455861481692, "grad_norm": 0.0012261583469808102, "learning_rate": 7.251774056202102e-06, "loss": 6.234832108020783e-06, "step": 326880 }, { "epoch": 92.78739710474028, "grad_norm": 0.0009167710668407381, "learning_rate": 7.2489355662787406e-06, "loss": 4.3762847781181335e-06, "step": 326890 }, { "epoch": 92.79023559466364, "grad_norm": 0.006795964203774929, "learning_rate": 7.246097076355379e-06, "loss": 4.875846207141876e-06, "step": 326900 }, { "epoch": 92.793074084587, "grad_norm": 0.0003148676478303969, "learning_rate": 7.243258586432018e-06, "loss": 3.639049828052521e-06, "step": 326910 }, { "epoch": 92.79591257451035, "grad_norm": 0.001042208052240312, "learning_rate": 7.240420096508657e-06, "loss": 3.3039599657058715e-06, "step": 326920 }, { "epoch": 92.79875106443372, "grad_norm": 0.0007223199354484677, "learning_rate": 7.237581606585298e-06, "loss": 2.4462118744850158e-06, "step": 326930 }, { "epoch": 92.80158955435708, "grad_norm": 0.004307951778173447, "learning_rate": 7.234743116661936e-06, "loss": 3.211013972759247e-06, "step": 326940 }, { "epoch": 92.80442804428044, "grad_norm": 0.0003785625740420073, "learning_rate": 7.231904626738575e-06, "loss": 3.6207959055900575e-06, "step": 326950 }, { "epoch": 92.8072665342038, "grad_norm": 0.0047395434230566025, "learning_rate": 7.229066136815214e-06, "loss": 6.424635648727417e-06, "step": 326960 }, { "epoch": 92.81010502412717, "grad_norm": 0.014927647076547146, "learning_rate": 7.226227646891853e-06, "loss": 6.025657057762146e-06, "step": 326970 }, { "epoch": 92.81294351405053, "grad_norm": 0.0003203915257472545, "learning_rate": 7.2233891569684934e-06, "loss": 2.0239502191543577e-06, "step": 326980 }, { "epoch": 92.81578200397388, "grad_norm": 0.01041935384273529, "learning_rate": 7.220550667045132e-06, "loss": 5.4588541388511654e-06, "step": 326990 }, { "epoch": 92.81862049389724, "grad_norm": 0.0008374548051506281, "learning_rate": 7.217712177121771e-06, "loss": 2.259761095046997e-06, "step": 327000 }, { "epoch": 92.81862049389724, "eval_accuracy": 0.9893177338335347, "eval_loss": 0.04780187830328941, "eval_runtime": 44.7682, "eval_samples_per_second": 351.298, "eval_steps_per_second": 5.495, "step": 327000 }, { "epoch": 92.8214589838206, "grad_norm": 0.00043464795453473926, "learning_rate": 7.214873687198411e-06, "loss": 3.6796554923057556e-06, "step": 327010 }, { "epoch": 92.82429747374397, "grad_norm": 0.00017415379988960922, "learning_rate": 7.21203519727505e-06, "loss": 2.621673047542572e-06, "step": 327020 }, { "epoch": 92.82713596366733, "grad_norm": 0.0038812123239040375, "learning_rate": 7.209196707351689e-06, "loss": 4.608742892742157e-06, "step": 327030 }, { "epoch": 92.8299744535907, "grad_norm": 0.0006057637510821223, "learning_rate": 7.206358217428329e-06, "loss": 3.2801181077957153e-06, "step": 327040 }, { "epoch": 92.83281294351406, "grad_norm": 0.0025910791009664536, "learning_rate": 7.203519727504968e-06, "loss": 4.108250141143799e-06, "step": 327050 }, { "epoch": 92.8356514334374, "grad_norm": 0.0017634505638852715, "learning_rate": 7.200681237581607e-06, "loss": 7.004104554653168e-06, "step": 327060 }, { "epoch": 92.83848992336077, "grad_norm": 0.0002931270864792168, "learning_rate": 7.1978427476582455e-06, "loss": 3.4362077713012694e-06, "step": 327070 }, { "epoch": 92.84132841328413, "grad_norm": 0.0006758795352652669, "learning_rate": 7.195004257734886e-06, "loss": 2.575293183326721e-06, "step": 327080 }, { "epoch": 92.8441669032075, "grad_norm": 0.0019086074316874146, "learning_rate": 7.192165767811525e-06, "loss": 4.8175454139709474e-06, "step": 327090 }, { "epoch": 92.84700539313086, "grad_norm": 0.0002667001390364021, "learning_rate": 7.189327277888164e-06, "loss": 3.0329450964927672e-06, "step": 327100 }, { "epoch": 92.84984388305422, "grad_norm": 0.0014582881703972816, "learning_rate": 7.1864887879648025e-06, "loss": 3.4632161259651183e-06, "step": 327110 }, { "epoch": 92.85268237297758, "grad_norm": 0.0005874598282389343, "learning_rate": 7.183650298041441e-06, "loss": 3.464147448539734e-06, "step": 327120 }, { "epoch": 92.85552086290093, "grad_norm": 0.00044338812585920095, "learning_rate": 7.180811808118082e-06, "loss": 3.1650066375732423e-06, "step": 327130 }, { "epoch": 92.8583593528243, "grad_norm": 0.000850368058308959, "learning_rate": 7.177973318194721e-06, "loss": 3.0545517802238466e-06, "step": 327140 }, { "epoch": 92.86119784274766, "grad_norm": 0.0022210502065718174, "learning_rate": 7.1751348282713595e-06, "loss": 4.15947288274765e-06, "step": 327150 }, { "epoch": 92.86403633267102, "grad_norm": 0.001381305162794888, "learning_rate": 7.172296338347998e-06, "loss": 6.611831486225128e-06, "step": 327160 }, { "epoch": 92.86687482259438, "grad_norm": 0.0008425613050349057, "learning_rate": 7.169457848424638e-06, "loss": 3.6500394344329832e-06, "step": 327170 }, { "epoch": 92.86971331251775, "grad_norm": 0.0006461196462623775, "learning_rate": 7.166619358501278e-06, "loss": 3.1873583793640138e-06, "step": 327180 }, { "epoch": 92.8725518024411, "grad_norm": 0.0019270192133262753, "learning_rate": 7.1637808685779174e-06, "loss": 7.770583033561707e-06, "step": 327190 }, { "epoch": 92.87539029236446, "grad_norm": 0.0004770659143105149, "learning_rate": 7.160942378654556e-06, "loss": 2.0753592252731324e-06, "step": 327200 }, { "epoch": 92.87822878228782, "grad_norm": 0.00020870465959887952, "learning_rate": 7.158103888731195e-06, "loss": 3.3674761652946474e-06, "step": 327210 }, { "epoch": 92.88106727221118, "grad_norm": 0.002708067884668708, "learning_rate": 7.155265398807836e-06, "loss": 5.014054477214813e-06, "step": 327220 }, { "epoch": 92.88390576213455, "grad_norm": 0.0013535897014662623, "learning_rate": 7.1524269088844745e-06, "loss": 3.3259391784667967e-06, "step": 327230 }, { "epoch": 92.88674425205791, "grad_norm": 0.0004835499858018011, "learning_rate": 7.149588418961113e-06, "loss": 4.473514854907989e-06, "step": 327240 }, { "epoch": 92.88958274198127, "grad_norm": 0.0015794637147337198, "learning_rate": 7.146749929037752e-06, "loss": 3.9046630263328556e-06, "step": 327250 }, { "epoch": 92.89242123190462, "grad_norm": 0.0007875429000705481, "learning_rate": 7.143911439114391e-06, "loss": 3.042258322238922e-06, "step": 327260 }, { "epoch": 92.89525972182798, "grad_norm": 0.0006942276377230883, "learning_rate": 7.1410729491910315e-06, "loss": 5.471520125865936e-06, "step": 327270 }, { "epoch": 92.89809821175135, "grad_norm": 0.0007965308031998575, "learning_rate": 7.13823445926767e-06, "loss": 3.3278018236160277e-06, "step": 327280 }, { "epoch": 92.90093670167471, "grad_norm": 0.0013282356085255742, "learning_rate": 7.135395969344309e-06, "loss": 2.4667009711265563e-06, "step": 327290 }, { "epoch": 92.90377519159807, "grad_norm": 0.001911665196530521, "learning_rate": 7.132557479420948e-06, "loss": 6.091222167015075e-06, "step": 327300 }, { "epoch": 92.90661368152143, "grad_norm": 0.0007496246253140271, "learning_rate": 7.129718989497587e-06, "loss": 3.528781235218048e-06, "step": 327310 }, { "epoch": 92.9094521714448, "grad_norm": 0.0010737186530604959, "learning_rate": 7.126880499574227e-06, "loss": 4.788674414157868e-06, "step": 327320 }, { "epoch": 92.91229066136815, "grad_norm": 0.0005010883905924857, "learning_rate": 7.124042009650866e-06, "loss": 4.363991320133209e-06, "step": 327330 }, { "epoch": 92.91512915129151, "grad_norm": 0.0003994440194219351, "learning_rate": 7.121203519727505e-06, "loss": 3.160908818244934e-06, "step": 327340 }, { "epoch": 92.91796764121487, "grad_norm": 0.006430960260331631, "learning_rate": 7.118365029804145e-06, "loss": 4.504434764385224e-06, "step": 327350 }, { "epoch": 92.92080613113824, "grad_norm": 0.00018609678954817355, "learning_rate": 7.1155265398807835e-06, "loss": 5.534850060939789e-06, "step": 327360 }, { "epoch": 92.9236446210616, "grad_norm": 0.0013136561028659344, "learning_rate": 7.112688049957423e-06, "loss": 4.044920206069946e-06, "step": 327370 }, { "epoch": 92.92648311098496, "grad_norm": 0.0023780011106282473, "learning_rate": 7.109849560034063e-06, "loss": 4.341825842857361e-06, "step": 327380 }, { "epoch": 92.92932160090831, "grad_norm": 0.0013300045393407345, "learning_rate": 7.107011070110702e-06, "loss": 3.1746923923492433e-06, "step": 327390 }, { "epoch": 92.93216009083167, "grad_norm": 0.0006974432035349309, "learning_rate": 7.1041725801873406e-06, "loss": 3.868713974952698e-06, "step": 327400 }, { "epoch": 92.93499858075504, "grad_norm": 0.0017653882969170809, "learning_rate": 7.101334090263979e-06, "loss": 3.520213067531586e-06, "step": 327410 }, { "epoch": 92.9378370706784, "grad_norm": 0.00031097111059352756, "learning_rate": 7.09849560034062e-06, "loss": 2.8248876333236693e-06, "step": 327420 }, { "epoch": 92.94067556060176, "grad_norm": 0.0012478608405217528, "learning_rate": 7.095657110417259e-06, "loss": 2.6311725378036497e-06, "step": 327430 }, { "epoch": 92.94351405052512, "grad_norm": 0.0007187033770605922, "learning_rate": 7.092818620493898e-06, "loss": 6.586872041225434e-06, "step": 327440 }, { "epoch": 92.94635254044849, "grad_norm": 0.0031338436529040337, "learning_rate": 7.089980130570536e-06, "loss": 3.0981376767158507e-06, "step": 327450 }, { "epoch": 92.94919103037184, "grad_norm": 0.0003531796974129975, "learning_rate": 7.087141640647175e-06, "loss": 2.787448465824127e-06, "step": 327460 }, { "epoch": 92.9520295202952, "grad_norm": 0.0010272276122123003, "learning_rate": 7.084303150723816e-06, "loss": 5.7263299822807315e-06, "step": 327470 }, { "epoch": 92.95486801021856, "grad_norm": 0.0002955025411210954, "learning_rate": 7.081464660800455e-06, "loss": 2.0930543541908265e-06, "step": 327480 }, { "epoch": 92.95770650014192, "grad_norm": 0.0008806932019069791, "learning_rate": 7.0786261708770934e-06, "loss": 5.421414971351623e-06, "step": 327490 }, { "epoch": 92.96054499006529, "grad_norm": 0.0010671811178326607, "learning_rate": 7.075787680953732e-06, "loss": 2.90554016828537e-06, "step": 327500 }, { "epoch": 92.96054499006529, "eval_accuracy": 0.9886182997393018, "eval_loss": 0.048231903463602066, "eval_runtime": 55.9356, "eval_samples_per_second": 281.163, "eval_steps_per_second": 4.398, "step": 327500 }, { "epoch": 92.96338347998865, "grad_norm": 0.00048223871272057295, "learning_rate": 7.072949191030372e-06, "loss": 2.99941748380661e-06, "step": 327510 }, { "epoch": 92.96622196991201, "grad_norm": 0.0005033304914832115, "learning_rate": 7.070110701107012e-06, "loss": 3.2840296626091e-06, "step": 327520 }, { "epoch": 92.96906045983536, "grad_norm": 0.0006649920251220465, "learning_rate": 7.067272211183651e-06, "loss": 8.02651047706604e-06, "step": 327530 }, { "epoch": 92.97189894975872, "grad_norm": 0.0006239337963052094, "learning_rate": 7.06443372126029e-06, "loss": 6.379932165145874e-06, "step": 327540 }, { "epoch": 92.97473743968209, "grad_norm": 0.000509640492964536, "learning_rate": 7.061595231336929e-06, "loss": 7.437914609909057e-06, "step": 327550 }, { "epoch": 92.97757592960545, "grad_norm": 0.0008077551610767841, "learning_rate": 7.058756741413568e-06, "loss": 5.31710684299469e-06, "step": 327560 }, { "epoch": 92.98041441952881, "grad_norm": 0.0013775692787021399, "learning_rate": 7.055918251490208e-06, "loss": 5.106441676616669e-06, "step": 327570 }, { "epoch": 92.98325290945218, "grad_norm": 0.002900544786825776, "learning_rate": 7.053079761566847e-06, "loss": 5.8908015489578245e-06, "step": 327580 }, { "epoch": 92.98609139937554, "grad_norm": 0.0009174084989354014, "learning_rate": 7.050241271643486e-06, "loss": 5.512870848178863e-06, "step": 327590 }, { "epoch": 92.98892988929889, "grad_norm": 0.0019011656986549497, "learning_rate": 7.047402781720125e-06, "loss": 4.881061613559723e-06, "step": 327600 }, { "epoch": 92.99176837922225, "grad_norm": 0.0014890192542225122, "learning_rate": 7.044564291796764e-06, "loss": 3.1070783734321593e-06, "step": 327610 }, { "epoch": 92.99460686914561, "grad_norm": 0.00036175118293613195, "learning_rate": 7.041725801873404e-06, "loss": 3.361143171787262e-06, "step": 327620 }, { "epoch": 92.99744535906898, "grad_norm": 0.0008364058448933065, "learning_rate": 7.038887311950043e-06, "loss": 3.5025179386138918e-06, "step": 327630 }, { "epoch": 93.00028384899234, "grad_norm": 0.0037377672269940376, "learning_rate": 7.036048822026682e-06, "loss": 4.222735151415691e-06, "step": 327640 }, { "epoch": 93.0031223389157, "grad_norm": 0.0017226210329681635, "learning_rate": 7.033210332103321e-06, "loss": 2.942979335784912e-06, "step": 327650 }, { "epoch": 93.00596082883905, "grad_norm": 0.023038653656840324, "learning_rate": 7.03037184217996e-06, "loss": 6.58966600894928e-06, "step": 327660 }, { "epoch": 93.00879931876241, "grad_norm": 0.0012270634761080146, "learning_rate": 7.0275333522566e-06, "loss": 7.126294076442719e-06, "step": 327670 }, { "epoch": 93.01163780868578, "grad_norm": 0.0010389360832050443, "learning_rate": 7.024694862333239e-06, "loss": 3.4159049391746523e-06, "step": 327680 }, { "epoch": 93.01447629860914, "grad_norm": 0.00037375243846327066, "learning_rate": 7.021856372409879e-06, "loss": 3.927573561668396e-06, "step": 327690 }, { "epoch": 93.0173147885325, "grad_norm": 0.0013928934931755066, "learning_rate": 7.019017882486517e-06, "loss": 4.0531158447265625e-06, "step": 327700 }, { "epoch": 93.02015327845587, "grad_norm": 0.0005515867378562689, "learning_rate": 7.016179392563156e-06, "loss": 3.5241246223449707e-06, "step": 327710 }, { "epoch": 93.02299176837923, "grad_norm": 0.00043633676250465214, "learning_rate": 7.013340902639797e-06, "loss": 3.4596771001815798e-06, "step": 327720 }, { "epoch": 93.02583025830258, "grad_norm": 0.0007076971232891083, "learning_rate": 7.010502412716436e-06, "loss": 3.631040453910828e-06, "step": 327730 }, { "epoch": 93.02866874822594, "grad_norm": 0.00025000248569995165, "learning_rate": 7.0076639227930745e-06, "loss": 3.721006214618683e-06, "step": 327740 }, { "epoch": 93.0315072381493, "grad_norm": 0.0006107945810072124, "learning_rate": 7.004825432869713e-06, "loss": 2.3243948817253114e-06, "step": 327750 }, { "epoch": 93.03434572807267, "grad_norm": 0.0004085309919901192, "learning_rate": 7.001986942946352e-06, "loss": 2.576969563961029e-06, "step": 327760 }, { "epoch": 93.03718421799603, "grad_norm": 0.00017199850117322057, "learning_rate": 6.999148453022993e-06, "loss": 3.0662864446640015e-06, "step": 327770 }, { "epoch": 93.04002270791939, "grad_norm": 0.0005071719642728567, "learning_rate": 6.9963099630996315e-06, "loss": 3.4088268876075743e-06, "step": 327780 }, { "epoch": 93.04286119784275, "grad_norm": 0.001578050316311419, "learning_rate": 6.99347147317627e-06, "loss": 3.1989067792892458e-06, "step": 327790 }, { "epoch": 93.0456996877661, "grad_norm": 0.00107134401332587, "learning_rate": 6.990632983252909e-06, "loss": 2.081133425235748e-06, "step": 327800 }, { "epoch": 93.04853817768947, "grad_norm": 0.0010546805569902062, "learning_rate": 6.987794493329548e-06, "loss": 3.904476761817932e-06, "step": 327810 }, { "epoch": 93.05137666761283, "grad_norm": 0.0014576626708731055, "learning_rate": 6.9849560034061885e-06, "loss": 3.982894122600556e-06, "step": 327820 }, { "epoch": 93.05421515753619, "grad_norm": 0.0005433037295006216, "learning_rate": 6.982117513482827e-06, "loss": 3.1635165214538576e-06, "step": 327830 }, { "epoch": 93.05705364745955, "grad_norm": 0.0007921935175545514, "learning_rate": 6.979279023559466e-06, "loss": 2.9819086194038393e-06, "step": 327840 }, { "epoch": 93.05989213738292, "grad_norm": 0.000696416711434722, "learning_rate": 6.976440533636106e-06, "loss": 3.505311906337738e-06, "step": 327850 }, { "epoch": 93.06273062730628, "grad_norm": 0.000963925092946738, "learning_rate": 6.9736020437127455e-06, "loss": 3.909692168235779e-06, "step": 327860 }, { "epoch": 93.06556911722963, "grad_norm": 0.002036911202594638, "learning_rate": 6.970763553789384e-06, "loss": 3.6155804991722105e-06, "step": 327870 }, { "epoch": 93.06840760715299, "grad_norm": 0.0007377586443908513, "learning_rate": 6.967925063866024e-06, "loss": 2.8314068913459776e-06, "step": 327880 }, { "epoch": 93.07124609707635, "grad_norm": 0.0008964776643551886, "learning_rate": 6.965086573942663e-06, "loss": 2.8073787689208985e-06, "step": 327890 }, { "epoch": 93.07408458699972, "grad_norm": 0.0004412498965393752, "learning_rate": 6.962248084019302e-06, "loss": 3.734417259693146e-06, "step": 327900 }, { "epoch": 93.07692307692308, "grad_norm": 0.0005362254451029003, "learning_rate": 6.959409594095942e-06, "loss": 2.93925404548645e-06, "step": 327910 }, { "epoch": 93.07976156684644, "grad_norm": 0.0012963935732841492, "learning_rate": 6.956571104172581e-06, "loss": 2.5020912289619445e-06, "step": 327920 }, { "epoch": 93.08260005676979, "grad_norm": 0.00047227393952198327, "learning_rate": 6.95373261424922e-06, "loss": 3.9387494325637816e-06, "step": 327930 }, { "epoch": 93.08543854669315, "grad_norm": 0.001753460499458015, "learning_rate": 6.950894124325859e-06, "loss": 5.76656311750412e-06, "step": 327940 }, { "epoch": 93.08827703661652, "grad_norm": 0.0012312050675973296, "learning_rate": 6.948055634402498e-06, "loss": 6.189942359924316e-06, "step": 327950 }, { "epoch": 93.09111552653988, "grad_norm": 0.0003861241275444627, "learning_rate": 6.945217144479138e-06, "loss": 5.184486508369446e-06, "step": 327960 }, { "epoch": 93.09395401646324, "grad_norm": 0.0038779540918767452, "learning_rate": 6.942378654555777e-06, "loss": 5.09507954120636e-06, "step": 327970 }, { "epoch": 93.0967925063866, "grad_norm": 0.001020713709294796, "learning_rate": 6.939540164632416e-06, "loss": 3.180280327796936e-06, "step": 327980 }, { "epoch": 93.09963099630997, "grad_norm": 0.0004058323975186795, "learning_rate": 6.936701674709055e-06, "loss": 4.083104431629181e-06, "step": 327990 }, { "epoch": 93.10246948623332, "grad_norm": 0.00047549090231768787, "learning_rate": 6.9338631847856934e-06, "loss": 4.973635077476501e-06, "step": 328000 }, { "epoch": 93.10246948623332, "eval_accuracy": 0.9888090544922744, "eval_loss": 0.04986546188592911, "eval_runtime": 49.3096, "eval_samples_per_second": 318.944, "eval_steps_per_second": 4.989, "step": 328000 }, { "epoch": 93.10530797615668, "grad_norm": 0.000675639312248677, "learning_rate": 6.931024694862334e-06, "loss": 3.8744881749153136e-06, "step": 328010 }, { "epoch": 93.10814646608004, "grad_norm": 0.00017954451323021203, "learning_rate": 6.928186204938973e-06, "loss": 2.9403716325759886e-06, "step": 328020 }, { "epoch": 93.1109849560034, "grad_norm": 0.0006864253664389253, "learning_rate": 6.9253477150156125e-06, "loss": 4.136748611927033e-06, "step": 328030 }, { "epoch": 93.11382344592677, "grad_norm": 0.0014528045430779457, "learning_rate": 6.922509225092251e-06, "loss": 2.2882595658302307e-06, "step": 328040 }, { "epoch": 93.11666193585013, "grad_norm": 0.00039626198122277856, "learning_rate": 6.91967073516889e-06, "loss": 2.5987625122070313e-06, "step": 328050 }, { "epoch": 93.1195004257735, "grad_norm": 0.0003089593374170363, "learning_rate": 6.916832245245531e-06, "loss": 3.4479424357414244e-06, "step": 328060 }, { "epoch": 93.12233891569684, "grad_norm": 0.0015169577673077583, "learning_rate": 6.9139937553221695e-06, "loss": 3.4205615520477297e-06, "step": 328070 }, { "epoch": 93.1251774056202, "grad_norm": 0.0005686066579073668, "learning_rate": 6.911155265398808e-06, "loss": 3.983080387115479e-06, "step": 328080 }, { "epoch": 93.12801589554357, "grad_norm": 0.0012788473395630717, "learning_rate": 6.908316775475447e-06, "loss": 4.260614514350891e-06, "step": 328090 }, { "epoch": 93.13085438546693, "grad_norm": 0.0009680109214968979, "learning_rate": 6.905478285552086e-06, "loss": 4.759803414344788e-06, "step": 328100 }, { "epoch": 93.1336928753903, "grad_norm": 0.0008120644488371909, "learning_rate": 6.9026397956287265e-06, "loss": 5.106255412101746e-06, "step": 328110 }, { "epoch": 93.13653136531366, "grad_norm": 0.0033970982767641544, "learning_rate": 6.899801305705365e-06, "loss": 2.9826536774635314e-06, "step": 328120 }, { "epoch": 93.139369855237, "grad_norm": 0.0014279271708801389, "learning_rate": 6.896962815782004e-06, "loss": 3.33394855260849e-06, "step": 328130 }, { "epoch": 93.14220834516037, "grad_norm": 0.00032978077069856226, "learning_rate": 6.894124325858643e-06, "loss": 4.129298031330109e-06, "step": 328140 }, { "epoch": 93.14504683508373, "grad_norm": 0.00034429930383339524, "learning_rate": 6.891285835935282e-06, "loss": 2.518109977245331e-06, "step": 328150 }, { "epoch": 93.1478853250071, "grad_norm": 0.003605373203754425, "learning_rate": 6.888447346011922e-06, "loss": 3.8582831621170046e-06, "step": 328160 }, { "epoch": 93.15072381493046, "grad_norm": 0.004630844574421644, "learning_rate": 6.885608856088561e-06, "loss": 4.321523010730743e-06, "step": 328170 }, { "epoch": 93.15356230485382, "grad_norm": 0.001107127289287746, "learning_rate": 6.8827703661652e-06, "loss": 5.231238901615143e-06, "step": 328180 }, { "epoch": 93.15640079477718, "grad_norm": 0.0020724255591630936, "learning_rate": 6.87993187624184e-06, "loss": 4.704296588897705e-06, "step": 328190 }, { "epoch": 93.15923928470053, "grad_norm": 0.0005016078357584774, "learning_rate": 6.877093386318479e-06, "loss": 2.662837505340576e-06, "step": 328200 }, { "epoch": 93.1620777746239, "grad_norm": 0.001025272416882217, "learning_rate": 6.874254896395118e-06, "loss": 2.7010217308998106e-06, "step": 328210 }, { "epoch": 93.16491626454726, "grad_norm": 0.0004066381079610437, "learning_rate": 6.871416406471758e-06, "loss": 2.8792768716812133e-06, "step": 328220 }, { "epoch": 93.16775475447062, "grad_norm": 0.0006115020951256156, "learning_rate": 6.868577916548397e-06, "loss": 2.730078995227814e-06, "step": 328230 }, { "epoch": 93.17059324439398, "grad_norm": 0.0005082456627860665, "learning_rate": 6.865739426625036e-06, "loss": 3.0957162380218506e-06, "step": 328240 }, { "epoch": 93.17343173431735, "grad_norm": 0.0014106143498793244, "learning_rate": 6.8629009367016744e-06, "loss": 6.240233778953552e-06, "step": 328250 }, { "epoch": 93.17627022424071, "grad_norm": 0.0002036545192822814, "learning_rate": 6.860062446778315e-06, "loss": 3.5565346479415895e-06, "step": 328260 }, { "epoch": 93.17910871416406, "grad_norm": 0.0016892346320673823, "learning_rate": 6.857223956854954e-06, "loss": 4.458241164684296e-06, "step": 328270 }, { "epoch": 93.18194720408742, "grad_norm": 0.0006448462954722345, "learning_rate": 6.854385466931593e-06, "loss": 6.413646042346955e-06, "step": 328280 }, { "epoch": 93.18478569401078, "grad_norm": 0.00018640854978002608, "learning_rate": 6.8515469770082315e-06, "loss": 5.018897354602814e-06, "step": 328290 }, { "epoch": 93.18762418393415, "grad_norm": 0.002407960593700409, "learning_rate": 6.84870848708487e-06, "loss": 4.4513493776321415e-06, "step": 328300 }, { "epoch": 93.19046267385751, "grad_norm": 0.00039317720802500844, "learning_rate": 6.845869997161511e-06, "loss": 4.14140522480011e-06, "step": 328310 }, { "epoch": 93.19330116378087, "grad_norm": 0.008652509190142155, "learning_rate": 6.84303150723815e-06, "loss": 4.476122558116913e-06, "step": 328320 }, { "epoch": 93.19613965370424, "grad_norm": 0.0005572527297772467, "learning_rate": 6.8401930173147885e-06, "loss": 3.0297785997390745e-06, "step": 328330 }, { "epoch": 93.19897814362758, "grad_norm": 0.004635712131857872, "learning_rate": 6.837354527391427e-06, "loss": 4.168599843978882e-06, "step": 328340 }, { "epoch": 93.20181663355095, "grad_norm": 0.0006842160364612937, "learning_rate": 6.834516037468067e-06, "loss": 2.46930867433548e-06, "step": 328350 }, { "epoch": 93.20465512347431, "grad_norm": 0.001615966553799808, "learning_rate": 6.831677547544707e-06, "loss": 3.440678119659424e-06, "step": 328360 }, { "epoch": 93.20749361339767, "grad_norm": 0.000615704630035907, "learning_rate": 6.828839057621346e-06, "loss": 4.05777245759964e-06, "step": 328370 }, { "epoch": 93.21033210332104, "grad_norm": 0.0003440928994677961, "learning_rate": 6.826000567697985e-06, "loss": 2.958253026008606e-06, "step": 328380 }, { "epoch": 93.2131705932444, "grad_norm": 0.0011718282476067543, "learning_rate": 6.823162077774624e-06, "loss": 3.592856228351593e-06, "step": 328390 }, { "epoch": 93.21600908316775, "grad_norm": 0.00015685697144363075, "learning_rate": 6.820323587851263e-06, "loss": 1.4942139387130738e-06, "step": 328400 }, { "epoch": 93.21884757309111, "grad_norm": 0.0003111027763225138, "learning_rate": 6.817485097927903e-06, "loss": 2.96570360660553e-06, "step": 328410 }, { "epoch": 93.22168606301447, "grad_norm": 0.0010819248855113983, "learning_rate": 6.814646608004542e-06, "loss": 3.612041473388672e-06, "step": 328420 }, { "epoch": 93.22452455293784, "grad_norm": 0.0003572455025278032, "learning_rate": 6.811808118081181e-06, "loss": 3.0094757676124574e-06, "step": 328430 }, { "epoch": 93.2273630428612, "grad_norm": 0.0009769749594852328, "learning_rate": 6.80896962815782e-06, "loss": 8.123740553855895e-06, "step": 328440 }, { "epoch": 93.23020153278456, "grad_norm": 0.0006767694721929729, "learning_rate": 6.806131138234459e-06, "loss": 2.2014603018760683e-06, "step": 328450 }, { "epoch": 93.23304002270793, "grad_norm": 0.0034328103065490723, "learning_rate": 6.803292648311099e-06, "loss": 4.037283360958099e-06, "step": 328460 }, { "epoch": 93.23587851263127, "grad_norm": 0.002856222214177251, "learning_rate": 6.800454158387738e-06, "loss": 4.492700099945068e-06, "step": 328470 }, { "epoch": 93.23871700255464, "grad_norm": 0.0005475121433846653, "learning_rate": 6.797615668464377e-06, "loss": 3.5624951124191282e-06, "step": 328480 }, { "epoch": 93.241555492478, "grad_norm": 0.0007649580365978181, "learning_rate": 6.794777178541016e-06, "loss": 7.679685950279236e-06, "step": 328490 }, { "epoch": 93.24439398240136, "grad_norm": 0.00035909112193621695, "learning_rate": 6.791938688617656e-06, "loss": 2.802349627017975e-06, "step": 328500 }, { "epoch": 93.24439398240136, "eval_accuracy": 0.9888090544922744, "eval_loss": 0.049370963126420975, "eval_runtime": 60.1286, "eval_samples_per_second": 261.556, "eval_steps_per_second": 4.091, "step": 328500 }, { "epoch": 93.24723247232473, "grad_norm": 0.0032608499750494957, "learning_rate": 6.789100198694295e-06, "loss": 2.5460496544837953e-06, "step": 328510 }, { "epoch": 93.25007096224809, "grad_norm": 0.0003657694614958018, "learning_rate": 6.786261708770934e-06, "loss": 3.100372850894928e-06, "step": 328520 }, { "epoch": 93.25290945217145, "grad_norm": 0.0028897623997181654, "learning_rate": 6.783423218847574e-06, "loss": 8.13361257314682e-06, "step": 328530 }, { "epoch": 93.2557479420948, "grad_norm": 0.0014987733447924256, "learning_rate": 6.7805847289242125e-06, "loss": 3.5854056477546694e-06, "step": 328540 }, { "epoch": 93.25858643201816, "grad_norm": 0.00099267961923033, "learning_rate": 6.777746239000852e-06, "loss": 7.142871618270874e-06, "step": 328550 }, { "epoch": 93.26142492194153, "grad_norm": 0.0010936688631772995, "learning_rate": 6.774907749077492e-06, "loss": 2.6587396860122682e-06, "step": 328560 }, { "epoch": 93.26426341186489, "grad_norm": 0.0006440022261813283, "learning_rate": 6.772069259154131e-06, "loss": 3.822892904281616e-06, "step": 328570 }, { "epoch": 93.26710190178825, "grad_norm": 0.0012123828055337071, "learning_rate": 6.7692307692307695e-06, "loss": 7.329881191253662e-06, "step": 328580 }, { "epoch": 93.26994039171161, "grad_norm": 0.0006141833146102726, "learning_rate": 6.766392279307408e-06, "loss": 3.7740916013717652e-06, "step": 328590 }, { "epoch": 93.27277888163498, "grad_norm": 0.0006176726310513914, "learning_rate": 6.763553789384049e-06, "loss": 7.11437314748764e-06, "step": 328600 }, { "epoch": 93.27561737155833, "grad_norm": 0.0007403792114928365, "learning_rate": 6.760715299460688e-06, "loss": 2.8427690267562867e-06, "step": 328610 }, { "epoch": 93.27845586148169, "grad_norm": 0.001866902457550168, "learning_rate": 6.7578768095373265e-06, "loss": 2.598389983177185e-06, "step": 328620 }, { "epoch": 93.28129435140505, "grad_norm": 0.0007124132243916392, "learning_rate": 6.755038319613965e-06, "loss": 4.749372601509095e-06, "step": 328630 }, { "epoch": 93.28413284132841, "grad_norm": 0.00082963309250772, "learning_rate": 6.752199829690604e-06, "loss": 5.705840885639191e-06, "step": 328640 }, { "epoch": 93.28697133125178, "grad_norm": 0.001100996625609696, "learning_rate": 6.749361339767245e-06, "loss": 4.082731902599335e-06, "step": 328650 }, { "epoch": 93.28980982117514, "grad_norm": 0.0009034231188707054, "learning_rate": 6.7465228498438836e-06, "loss": 2.6239082217216493e-06, "step": 328660 }, { "epoch": 93.29264831109849, "grad_norm": 0.0008260340546257794, "learning_rate": 6.743684359920522e-06, "loss": 7.5034797191619875e-06, "step": 328670 }, { "epoch": 93.29548680102185, "grad_norm": 0.001014698063954711, "learning_rate": 6.740845869997161e-06, "loss": 4.611164331436157e-06, "step": 328680 }, { "epoch": 93.29832529094521, "grad_norm": 0.0006388859474100173, "learning_rate": 6.738007380073801e-06, "loss": 7.381103932857513e-06, "step": 328690 }, { "epoch": 93.30116378086858, "grad_norm": 0.0019375164993107319, "learning_rate": 6.735168890150441e-06, "loss": 3.6364421248435973e-06, "step": 328700 }, { "epoch": 93.30400227079194, "grad_norm": 0.001114366576075554, "learning_rate": 6.7323304002270794e-06, "loss": 5.104765295982361e-06, "step": 328710 }, { "epoch": 93.3068407607153, "grad_norm": 0.0010884334333240986, "learning_rate": 6.729491910303719e-06, "loss": 2.9016286134719848e-06, "step": 328720 }, { "epoch": 93.30967925063867, "grad_norm": 7.83413925091736e-05, "learning_rate": 6.726653420380358e-06, "loss": 5.293823778629303e-06, "step": 328730 }, { "epoch": 93.31251774056201, "grad_norm": 0.014478572644293308, "learning_rate": 6.723814930456997e-06, "loss": 6.526336073875428e-06, "step": 328740 }, { "epoch": 93.31535623048538, "grad_norm": 0.0032397222239524126, "learning_rate": 6.720976440533637e-06, "loss": 5.532428622245788e-06, "step": 328750 }, { "epoch": 93.31819472040874, "grad_norm": 0.0038990345783531666, "learning_rate": 6.718137950610276e-06, "loss": 4.5761466026306156e-06, "step": 328760 }, { "epoch": 93.3210332103321, "grad_norm": 0.0004182792908977717, "learning_rate": 6.715299460686915e-06, "loss": 3.176368772983551e-06, "step": 328770 }, { "epoch": 93.32387170025547, "grad_norm": 0.0009614927694201469, "learning_rate": 6.712460970763554e-06, "loss": 3.340281546115875e-06, "step": 328780 }, { "epoch": 93.32671019017883, "grad_norm": 0.0006482757744379342, "learning_rate": 6.709622480840193e-06, "loss": 2.71722674369812e-06, "step": 328790 }, { "epoch": 93.32954868010219, "grad_norm": 0.001023423857986927, "learning_rate": 6.706783990916833e-06, "loss": 2.866797149181366e-06, "step": 328800 }, { "epoch": 93.33238717002554, "grad_norm": 0.0019617655780166388, "learning_rate": 6.703945500993472e-06, "loss": 5.1802024245262144e-06, "step": 328810 }, { "epoch": 93.3352256599489, "grad_norm": 0.000714375579264015, "learning_rate": 6.701107011070111e-06, "loss": 4.086270928382874e-06, "step": 328820 }, { "epoch": 93.33806414987227, "grad_norm": 0.0002871489559765905, "learning_rate": 6.69826852114675e-06, "loss": 3.366544842720032e-06, "step": 328830 }, { "epoch": 93.34090263979563, "grad_norm": 0.0009715078631415963, "learning_rate": 6.6954300312233885e-06, "loss": 2.1586194634437563e-06, "step": 328840 }, { "epoch": 93.34374112971899, "grad_norm": 0.0011091359192505479, "learning_rate": 6.692591541300029e-06, "loss": 3.139488399028778e-06, "step": 328850 }, { "epoch": 93.34657961964236, "grad_norm": 0.00036925438325852156, "learning_rate": 6.689753051376668e-06, "loss": 4.133023321628571e-06, "step": 328860 }, { "epoch": 93.3494181095657, "grad_norm": 0.0033473626244813204, "learning_rate": 6.6869145614533075e-06, "loss": 3.598816692829132e-06, "step": 328870 }, { "epoch": 93.35225659948907, "grad_norm": 0.00037544171209447086, "learning_rate": 6.684076071529946e-06, "loss": 3.084354102611542e-06, "step": 328880 }, { "epoch": 93.35509508941243, "grad_norm": 0.0009146254160441458, "learning_rate": 6.681237581606585e-06, "loss": 3.522820770740509e-06, "step": 328890 }, { "epoch": 93.35793357933579, "grad_norm": 0.0010985458502545953, "learning_rate": 6.678399091683226e-06, "loss": 2.286769449710846e-06, "step": 328900 }, { "epoch": 93.36077206925916, "grad_norm": 0.0012023898307234049, "learning_rate": 6.6755606017598646e-06, "loss": 4.516914486885071e-06, "step": 328910 }, { "epoch": 93.36361055918252, "grad_norm": 0.0003034777764696628, "learning_rate": 6.672722111836503e-06, "loss": 3.0240043997764587e-06, "step": 328920 }, { "epoch": 93.36644904910588, "grad_norm": 0.00042960248538292944, "learning_rate": 6.669883621913142e-06, "loss": 3.36281955242157e-06, "step": 328930 }, { "epoch": 93.36928753902923, "grad_norm": 0.0003158985637128353, "learning_rate": 6.667045131989781e-06, "loss": 3.7088990211486817e-06, "step": 328940 }, { "epoch": 93.3721260289526, "grad_norm": 0.0009630609420128167, "learning_rate": 6.664206642066422e-06, "loss": 3.4892931580543517e-06, "step": 328950 }, { "epoch": 93.37496451887596, "grad_norm": 0.0004699759592767805, "learning_rate": 6.6613681521430604e-06, "loss": 2.6145949959754945e-06, "step": 328960 }, { "epoch": 93.37780300879932, "grad_norm": 0.0006353971548378468, "learning_rate": 6.658529662219699e-06, "loss": 2.002343535423279e-06, "step": 328970 }, { "epoch": 93.38064149872268, "grad_norm": 0.0006273447652347386, "learning_rate": 6.655691172296338e-06, "loss": 5.0371512770652774e-06, "step": 328980 }, { "epoch": 93.38347998864604, "grad_norm": 0.0022074326407164335, "learning_rate": 6.652852682372977e-06, "loss": 2.7723610401153566e-06, "step": 328990 }, { "epoch": 93.38631847856941, "grad_norm": 0.0011062955018132925, "learning_rate": 6.6500141924496175e-06, "loss": 3.3253803849220275e-06, "step": 329000 }, { "epoch": 93.38631847856941, "eval_accuracy": 0.9886818846569594, "eval_loss": 0.04873061180114746, "eval_runtime": 57.4934, "eval_samples_per_second": 273.545, "eval_steps_per_second": 4.279, "step": 329000 }, { "epoch": 93.38915696849276, "grad_norm": 0.0010382113978266716, "learning_rate": 6.647175702526256e-06, "loss": 4.634819924831391e-06, "step": 329010 }, { "epoch": 93.39199545841612, "grad_norm": 0.001202216139063239, "learning_rate": 6.644337212602895e-06, "loss": 3.1061470508575438e-06, "step": 329020 }, { "epoch": 93.39483394833948, "grad_norm": 0.0016264535952359438, "learning_rate": 6.641498722679535e-06, "loss": 2.1776184439659117e-06, "step": 329030 }, { "epoch": 93.39767243826284, "grad_norm": 0.0011787442490458488, "learning_rate": 6.638660232756174e-06, "loss": 3.7478283047676085e-06, "step": 329040 }, { "epoch": 93.40051092818621, "grad_norm": 0.0006827147444710135, "learning_rate": 6.635821742832813e-06, "loss": 3.606639802455902e-06, "step": 329050 }, { "epoch": 93.40334941810957, "grad_norm": 0.0012036386178806424, "learning_rate": 6.632983252909453e-06, "loss": 7.188692688941956e-06, "step": 329060 }, { "epoch": 93.40618790803293, "grad_norm": 0.0002761094074230641, "learning_rate": 6.630144762986092e-06, "loss": 4.349090158939362e-06, "step": 329070 }, { "epoch": 93.40902639795628, "grad_norm": 0.0008038554806262255, "learning_rate": 6.627306273062731e-06, "loss": 3.561191260814667e-06, "step": 329080 }, { "epoch": 93.41186488787964, "grad_norm": 0.0003445112088229507, "learning_rate": 6.6244677831393695e-06, "loss": 2.0015984773635863e-06, "step": 329090 }, { "epoch": 93.41470337780301, "grad_norm": 0.005221027880907059, "learning_rate": 6.62162929321601e-06, "loss": 3.5412609577178956e-06, "step": 329100 }, { "epoch": 93.41754186772637, "grad_norm": 0.002568913623690605, "learning_rate": 6.618790803292649e-06, "loss": 3.5505741834640504e-06, "step": 329110 }, { "epoch": 93.42038035764973, "grad_norm": 0.0001849109394242987, "learning_rate": 6.615952313369288e-06, "loss": 7.084384560585022e-06, "step": 329120 }, { "epoch": 93.4232188475731, "grad_norm": 0.0007822581683285534, "learning_rate": 6.6131138234459265e-06, "loss": 2.2953376173973083e-06, "step": 329130 }, { "epoch": 93.42605733749645, "grad_norm": 0.0013638216769322753, "learning_rate": 6.610275333522567e-06, "loss": 6.078183650970459e-06, "step": 329140 }, { "epoch": 93.42889582741981, "grad_norm": 0.0003838296979665756, "learning_rate": 6.607436843599206e-06, "loss": 6.337463855743408e-06, "step": 329150 }, { "epoch": 93.43173431734317, "grad_norm": 0.0008334445301443338, "learning_rate": 6.604598353675845e-06, "loss": 2.728588879108429e-06, "step": 329160 }, { "epoch": 93.43457280726653, "grad_norm": 0.0002552266523707658, "learning_rate": 6.6017598637524836e-06, "loss": 2.8034672141075133e-06, "step": 329170 }, { "epoch": 93.4374112971899, "grad_norm": 0.0005543727893382311, "learning_rate": 6.598921373829122e-06, "loss": 2.693384885787964e-06, "step": 329180 }, { "epoch": 93.44024978711326, "grad_norm": 0.0008411371381953359, "learning_rate": 6.596082883905763e-06, "loss": 3.577768802642822e-06, "step": 329190 }, { "epoch": 93.44308827703662, "grad_norm": 0.005076341796666384, "learning_rate": 6.593244393982402e-06, "loss": 2.5853514671325684e-06, "step": 329200 }, { "epoch": 93.44592676695997, "grad_norm": 0.0005493483622558415, "learning_rate": 6.590405904059041e-06, "loss": 2.526864409446716e-06, "step": 329210 }, { "epoch": 93.44876525688333, "grad_norm": 9.817384125199169e-05, "learning_rate": 6.58756741413568e-06, "loss": 7.1672722697258e-06, "step": 329220 }, { "epoch": 93.4516037468067, "grad_norm": 0.0046637472696602345, "learning_rate": 6.584728924212319e-06, "loss": 3.6887824535369875e-06, "step": 329230 }, { "epoch": 93.45444223673006, "grad_norm": 5.378999776439741e-05, "learning_rate": 6.58189043428896e-06, "loss": 2.251751720905304e-06, "step": 329240 }, { "epoch": 93.45728072665342, "grad_norm": 0.0001693690865067765, "learning_rate": 6.5790519443655985e-06, "loss": 2.5479122996330263e-06, "step": 329250 }, { "epoch": 93.46011921657679, "grad_norm": 0.0017792104044929147, "learning_rate": 6.576213454442237e-06, "loss": 3.2534822821617127e-06, "step": 329260 }, { "epoch": 93.46295770650015, "grad_norm": 7.114189065760002e-05, "learning_rate": 6.573374964518876e-06, "loss": 4.264339804649353e-06, "step": 329270 }, { "epoch": 93.4657961964235, "grad_norm": 0.001219732454046607, "learning_rate": 6.570536474595515e-06, "loss": 2.8219074010849e-06, "step": 329280 }, { "epoch": 93.46863468634686, "grad_norm": 0.0005536662065424025, "learning_rate": 6.5676979846721555e-06, "loss": 3.266148269176483e-06, "step": 329290 }, { "epoch": 93.47147317627022, "grad_norm": 0.00036488441401161253, "learning_rate": 6.564859494748794e-06, "loss": 2.7088448405265807e-06, "step": 329300 }, { "epoch": 93.47431166619359, "grad_norm": 0.0002557251718826592, "learning_rate": 6.562021004825433e-06, "loss": 2.835690975189209e-06, "step": 329310 }, { "epoch": 93.47715015611695, "grad_norm": 0.0031181499361991882, "learning_rate": 6.559182514902072e-06, "loss": 3.2525509595870973e-06, "step": 329320 }, { "epoch": 93.47998864604031, "grad_norm": 0.0034636303316801786, "learning_rate": 6.556344024978711e-06, "loss": 3.60831618309021e-06, "step": 329330 }, { "epoch": 93.48282713596367, "grad_norm": 0.000377848744392395, "learning_rate": 6.553505535055351e-06, "loss": 2.2405758500099184e-06, "step": 329340 }, { "epoch": 93.48566562588702, "grad_norm": 0.0006756093353033066, "learning_rate": 6.55066704513199e-06, "loss": 3.7947669625282287e-06, "step": 329350 }, { "epoch": 93.48850411581039, "grad_norm": 0.0014746455708518624, "learning_rate": 6.547828555208629e-06, "loss": 2.8576701879501344e-06, "step": 329360 }, { "epoch": 93.49134260573375, "grad_norm": 0.002977619180455804, "learning_rate": 6.544990065285269e-06, "loss": 3.32072377204895e-06, "step": 329370 }, { "epoch": 93.49418109565711, "grad_norm": 0.0003228353161830455, "learning_rate": 6.5421515753619075e-06, "loss": 2.9668211936950683e-06, "step": 329380 }, { "epoch": 93.49701958558047, "grad_norm": 0.0003686540585476905, "learning_rate": 6.539313085438547e-06, "loss": 4.090368747711182e-06, "step": 329390 }, { "epoch": 93.49985807550384, "grad_norm": 0.0007538383360952139, "learning_rate": 6.536474595515187e-06, "loss": 4.117190837860108e-06, "step": 329400 }, { "epoch": 93.50269656542719, "grad_norm": 0.0009850781643763185, "learning_rate": 6.533636105591826e-06, "loss": 3.2885000109672546e-06, "step": 329410 }, { "epoch": 93.50553505535055, "grad_norm": 0.0004327161004766822, "learning_rate": 6.5307976156684646e-06, "loss": 3.055669367313385e-06, "step": 329420 }, { "epoch": 93.50837354527391, "grad_norm": 0.00044756021816283464, "learning_rate": 6.527959125745103e-06, "loss": 2.6758760213851927e-06, "step": 329430 }, { "epoch": 93.51121203519727, "grad_norm": 0.008723925799131393, "learning_rate": 6.525120635821744e-06, "loss": 4.220753908157349e-06, "step": 329440 }, { "epoch": 93.51405052512064, "grad_norm": 0.0007828538655303419, "learning_rate": 6.522282145898383e-06, "loss": 3.1355768442153932e-06, "step": 329450 }, { "epoch": 93.516889015044, "grad_norm": 0.0008547328761778772, "learning_rate": 6.519443655975022e-06, "loss": 2.0889565348625183e-06, "step": 329460 }, { "epoch": 93.51972750496736, "grad_norm": 0.0020686527714133263, "learning_rate": 6.5166051660516604e-06, "loss": 3.421492874622345e-06, "step": 329470 }, { "epoch": 93.52256599489071, "grad_norm": 0.0013877300079911947, "learning_rate": 6.513766676128299e-06, "loss": 2.1997839212417603e-06, "step": 329480 }, { "epoch": 93.52540448481408, "grad_norm": 0.0008326633833348751, "learning_rate": 6.51092818620494e-06, "loss": 2.3538246750831605e-06, "step": 329490 }, { "epoch": 93.52824297473744, "grad_norm": 0.0018888877239078283, "learning_rate": 6.508089696281579e-06, "loss": 3.0316412448883055e-06, "step": 329500 }, { "epoch": 93.52824297473744, "eval_accuracy": 0.988745469574617, "eval_loss": 0.04849613457918167, "eval_runtime": 62.593, "eval_samples_per_second": 251.258, "eval_steps_per_second": 3.93, "step": 329500 }, { "epoch": 93.5310814646608, "grad_norm": 0.0005560361896641552, "learning_rate": 6.5052512063582175e-06, "loss": 2.0954757928848267e-06, "step": 329510 }, { "epoch": 93.53391995458416, "grad_norm": 0.0007112592575140297, "learning_rate": 6.502412716434856e-06, "loss": 3.402121365070343e-06, "step": 329520 }, { "epoch": 93.53675844450753, "grad_norm": 0.000519530032761395, "learning_rate": 6.499574226511496e-06, "loss": 2.84537672996521e-06, "step": 329530 }, { "epoch": 93.53959693443089, "grad_norm": 0.0002521189453545958, "learning_rate": 6.496735736588136e-06, "loss": 2.6693567633628844e-06, "step": 329540 }, { "epoch": 93.54243542435424, "grad_norm": 0.00016812917601782829, "learning_rate": 6.4938972466647745e-06, "loss": 1.948326826095581e-06, "step": 329550 }, { "epoch": 93.5452739142776, "grad_norm": 0.014905002899467945, "learning_rate": 6.491058756741414e-06, "loss": 5.109421908855438e-06, "step": 329560 }, { "epoch": 93.54811240420096, "grad_norm": 0.0003722039400599897, "learning_rate": 6.488220266818053e-06, "loss": 3.194063901901245e-06, "step": 329570 }, { "epoch": 93.55095089412433, "grad_norm": 0.0009253200842067599, "learning_rate": 6.485381776894692e-06, "loss": 4.420056939125061e-06, "step": 329580 }, { "epoch": 93.55378938404769, "grad_norm": 0.0018247661646455526, "learning_rate": 6.482543286971332e-06, "loss": 3.1037256121635436e-06, "step": 329590 }, { "epoch": 93.55662787397105, "grad_norm": 0.00400437181815505, "learning_rate": 6.479704797047971e-06, "loss": 3.336183726787567e-06, "step": 329600 }, { "epoch": 93.5594663638944, "grad_norm": 0.0009410504717379808, "learning_rate": 6.47686630712461e-06, "loss": 3.7340447306632996e-06, "step": 329610 }, { "epoch": 93.56230485381776, "grad_norm": 0.00016302218136843294, "learning_rate": 6.474027817201249e-06, "loss": 2.47955322265625e-06, "step": 329620 }, { "epoch": 93.56514334374113, "grad_norm": 0.00339507800526917, "learning_rate": 6.471189327277888e-06, "loss": 4.453770816326141e-06, "step": 329630 }, { "epoch": 93.56798183366449, "grad_norm": 0.011861980892717838, "learning_rate": 6.468350837354528e-06, "loss": 5.713477730751038e-06, "step": 329640 }, { "epoch": 93.57082032358785, "grad_norm": 0.0002578787098173052, "learning_rate": 6.465512347431167e-06, "loss": 3.3479183912277223e-06, "step": 329650 }, { "epoch": 93.57365881351122, "grad_norm": 0.001262553152628243, "learning_rate": 6.462673857507806e-06, "loss": 2.354942262172699e-06, "step": 329660 }, { "epoch": 93.57649730343458, "grad_norm": 0.0011530328774824739, "learning_rate": 6.459835367584445e-06, "loss": 2.7779489755630495e-06, "step": 329670 }, { "epoch": 93.57933579335793, "grad_norm": 0.0024190417025238276, "learning_rate": 6.4569968776610836e-06, "loss": 3.5954639315605163e-06, "step": 329680 }, { "epoch": 93.58217428328129, "grad_norm": 0.0003237866039853543, "learning_rate": 6.454158387737724e-06, "loss": 3.580935299396515e-06, "step": 329690 }, { "epoch": 93.58501277320465, "grad_norm": 0.004439288284629583, "learning_rate": 6.451319897814363e-06, "loss": 4.656054079532623e-06, "step": 329700 }, { "epoch": 93.58785126312802, "grad_norm": 0.004164679907262325, "learning_rate": 6.448481407891003e-06, "loss": 4.051066935062409e-06, "step": 329710 }, { "epoch": 93.59068975305138, "grad_norm": 0.0015859451377764344, "learning_rate": 6.4456429179676414e-06, "loss": 2.3655593395233154e-06, "step": 329720 }, { "epoch": 93.59352824297474, "grad_norm": 0.0029413180891424417, "learning_rate": 6.44280442804428e-06, "loss": 3.164820373058319e-06, "step": 329730 }, { "epoch": 93.5963667328981, "grad_norm": 0.0011481298133730888, "learning_rate": 6.439965938120921e-06, "loss": 3.0120834708213804e-06, "step": 329740 }, { "epoch": 93.59920522282145, "grad_norm": 0.00039429840398952365, "learning_rate": 6.43712744819756e-06, "loss": 3.160536289215088e-06, "step": 329750 }, { "epoch": 93.60204371274482, "grad_norm": 0.0005572406225837767, "learning_rate": 6.4342889582741985e-06, "loss": 3.0139461159706114e-06, "step": 329760 }, { "epoch": 93.60488220266818, "grad_norm": 5.775357203674503e-05, "learning_rate": 6.431450468350837e-06, "loss": 2.405047416687012e-06, "step": 329770 }, { "epoch": 93.60772069259154, "grad_norm": 0.0019212737679481506, "learning_rate": 6.428611978427476e-06, "loss": 2.2767111659049987e-06, "step": 329780 }, { "epoch": 93.6105591825149, "grad_norm": 0.0016307949554175138, "learning_rate": 6.425773488504117e-06, "loss": 2.979673445224762e-06, "step": 329790 }, { "epoch": 93.61339767243827, "grad_norm": 0.003186993533745408, "learning_rate": 6.4229349985807555e-06, "loss": 3.7536025047302247e-06, "step": 329800 }, { "epoch": 93.61623616236163, "grad_norm": 0.0008780217031016946, "learning_rate": 6.420096508657394e-06, "loss": 3.375299274921417e-06, "step": 329810 }, { "epoch": 93.61907465228498, "grad_norm": 0.000837557774502784, "learning_rate": 6.417258018734033e-06, "loss": 2.29422003030777e-06, "step": 329820 }, { "epoch": 93.62191314220834, "grad_norm": 0.00010710945934988558, "learning_rate": 6.414419528810674e-06, "loss": 5.268678069114685e-06, "step": 329830 }, { "epoch": 93.6247516321317, "grad_norm": 0.0006813201471231878, "learning_rate": 6.4115810388873125e-06, "loss": 6.356649100780487e-06, "step": 329840 }, { "epoch": 93.62759012205507, "grad_norm": 0.0005228400696069002, "learning_rate": 6.408742548963951e-06, "loss": 3.4455209970474242e-06, "step": 329850 }, { "epoch": 93.63042861197843, "grad_norm": 0.0014326699310913682, "learning_rate": 6.40590405904059e-06, "loss": 3.6776065826416017e-06, "step": 329860 }, { "epoch": 93.6332671019018, "grad_norm": 0.005242202430963516, "learning_rate": 6.40306556911723e-06, "loss": 4.627741873264313e-06, "step": 329870 }, { "epoch": 93.63610559182514, "grad_norm": 0.0002514127700123936, "learning_rate": 6.4002270791938696e-06, "loss": 2.310611307621002e-06, "step": 329880 }, { "epoch": 93.6389440817485, "grad_norm": 0.0008728006505407393, "learning_rate": 6.397388589270508e-06, "loss": 3.9137899875640866e-06, "step": 329890 }, { "epoch": 93.64178257167187, "grad_norm": 0.004982552025467157, "learning_rate": 6.394550099347148e-06, "loss": 3.8132071495056153e-06, "step": 329900 }, { "epoch": 93.64462106159523, "grad_norm": 0.0013427746016532183, "learning_rate": 6.391711609423787e-06, "loss": 2.5309622287750243e-06, "step": 329910 }, { "epoch": 93.6474595515186, "grad_norm": 0.0003037809510715306, "learning_rate": 6.388873119500426e-06, "loss": 2.822466194629669e-06, "step": 329920 }, { "epoch": 93.65029804144196, "grad_norm": 0.0006433816743083298, "learning_rate": 6.386034629577066e-06, "loss": 3.350153565406799e-06, "step": 329930 }, { "epoch": 93.65313653136532, "grad_norm": 0.0005399012588895857, "learning_rate": 6.383196139653705e-06, "loss": 3.817863762378692e-06, "step": 329940 }, { "epoch": 93.65597502128867, "grad_norm": 0.0009552970295771956, "learning_rate": 6.380357649730344e-06, "loss": 5.5674463510513306e-06, "step": 329950 }, { "epoch": 93.65881351121203, "grad_norm": 0.0038954317569732666, "learning_rate": 6.377519159806983e-06, "loss": 2.8347596526145937e-06, "step": 329960 }, { "epoch": 93.6616520011354, "grad_norm": 0.00010653331992216408, "learning_rate": 6.374680669883622e-06, "loss": 4.101544618606567e-06, "step": 329970 }, { "epoch": 93.66449049105876, "grad_norm": 0.00031791493529453874, "learning_rate": 6.371842179960262e-06, "loss": 9.139254689216614e-06, "step": 329980 }, { "epoch": 93.66732898098212, "grad_norm": 0.0008148200577124953, "learning_rate": 6.369003690036901e-06, "loss": 2.4775043129920958e-06, "step": 329990 }, { "epoch": 93.67016747090548, "grad_norm": 0.00018064900359604508, "learning_rate": 6.36616520011354e-06, "loss": 4.254467785358429e-06, "step": 330000 }, { "epoch": 93.67016747090548, "eval_accuracy": 0.988999809245247, "eval_loss": 0.0491148978471756, "eval_runtime": 68.8125, "eval_samples_per_second": 228.548, "eval_steps_per_second": 3.575, "step": 330000 }, { "epoch": 93.67300596082885, "grad_norm": 0.0002566221810411662, "learning_rate": 6.363326710190179e-06, "loss": 2.207234501838684e-06, "step": 330010 }, { "epoch": 93.6758444507522, "grad_norm": 0.0013605981366708875, "learning_rate": 6.3604882202668175e-06, "loss": 3.789365291595459e-06, "step": 330020 }, { "epoch": 93.67868294067556, "grad_norm": 0.000282737280940637, "learning_rate": 6.357649730343458e-06, "loss": 2.639554440975189e-06, "step": 330030 }, { "epoch": 93.68152143059892, "grad_norm": 0.0007150093442760408, "learning_rate": 6.354811240420097e-06, "loss": 2.7546659111976625e-06, "step": 330040 }, { "epoch": 93.68435992052228, "grad_norm": 0.00038877042243257165, "learning_rate": 6.351972750496736e-06, "loss": 3.0353665351867674e-06, "step": 330050 }, { "epoch": 93.68719841044565, "grad_norm": 0.0009785046568140388, "learning_rate": 6.349134260573375e-06, "loss": 4.113093018531799e-06, "step": 330060 }, { "epoch": 93.69003690036901, "grad_norm": 0.00047049205750226974, "learning_rate": 6.346295770650014e-06, "loss": 2.453848719596863e-06, "step": 330070 }, { "epoch": 93.69287539029236, "grad_norm": 0.003200191305950284, "learning_rate": 6.343457280726655e-06, "loss": 1.0381266474723815e-05, "step": 330080 }, { "epoch": 93.69571388021572, "grad_norm": 0.0007950230501592159, "learning_rate": 6.3406187908032935e-06, "loss": 3.936327993869782e-06, "step": 330090 }, { "epoch": 93.69855237013908, "grad_norm": 0.00019236734078731388, "learning_rate": 6.337780300879932e-06, "loss": 9.085424244403839e-06, "step": 330100 }, { "epoch": 93.70139086006245, "grad_norm": 0.000458090245956555, "learning_rate": 6.334941810956571e-06, "loss": 3.394298255443573e-06, "step": 330110 }, { "epoch": 93.70422934998581, "grad_norm": 0.0012349020689725876, "learning_rate": 6.33210332103321e-06, "loss": 6.125308573246002e-06, "step": 330120 }, { "epoch": 93.70706783990917, "grad_norm": 0.0007569901645183563, "learning_rate": 6.3292648311098506e-06, "loss": 2.8619542717933655e-06, "step": 330130 }, { "epoch": 93.70990632983253, "grad_norm": 0.0003885373589582741, "learning_rate": 6.326426341186489e-06, "loss": 3.734603524208069e-06, "step": 330140 }, { "epoch": 93.71274481975588, "grad_norm": 0.0047321077436208725, "learning_rate": 6.323587851263128e-06, "loss": 3.319792449474335e-05, "step": 330150 }, { "epoch": 93.71558330967925, "grad_norm": 0.00033463642466813326, "learning_rate": 6.320749361339767e-06, "loss": 5.06669282913208e-05, "step": 330160 }, { "epoch": 93.71842179960261, "grad_norm": 0.0038973542395979166, "learning_rate": 6.317910871416406e-06, "loss": 0.0001036718487739563, "step": 330170 }, { "epoch": 93.72126028952597, "grad_norm": 0.006553724408149719, "learning_rate": 6.315072381493046e-06, "loss": 6.713718175888062e-06, "step": 330180 }, { "epoch": 93.72409877944933, "grad_norm": 0.22750267386436462, "learning_rate": 6.312233891569685e-06, "loss": 0.0001818697899580002, "step": 330190 }, { "epoch": 93.7269372693727, "grad_norm": 0.012444064021110535, "learning_rate": 6.309395401646324e-06, "loss": 2.529434859752655e-05, "step": 330200 }, { "epoch": 93.72977575929606, "grad_norm": 0.001955963671207428, "learning_rate": 6.306556911722964e-06, "loss": 3.493186086416245e-05, "step": 330210 }, { "epoch": 93.73261424921941, "grad_norm": 0.26335594058036804, "learning_rate": 6.303718421799603e-06, "loss": 6.586592644453049e-05, "step": 330220 }, { "epoch": 93.73545273914277, "grad_norm": 0.0030392881017178297, "learning_rate": 6.300879931876242e-06, "loss": 2.442076802253723e-05, "step": 330230 }, { "epoch": 93.73829122906614, "grad_norm": 0.003954834304749966, "learning_rate": 6.298041441952882e-06, "loss": 2.2802874445915224e-05, "step": 330240 }, { "epoch": 93.7411297189895, "grad_norm": 0.0012994399294257164, "learning_rate": 6.295202952029521e-06, "loss": 9.279884397983552e-06, "step": 330250 }, { "epoch": 93.74396820891286, "grad_norm": 0.006320811342447996, "learning_rate": 6.29236446210616e-06, "loss": 1.1928938329219818e-05, "step": 330260 }, { "epoch": 93.74680669883622, "grad_norm": 0.013018044643104076, "learning_rate": 6.2895259721827985e-06, "loss": 3.76235693693161e-05, "step": 330270 }, { "epoch": 93.74964518875959, "grad_norm": 0.009477309882640839, "learning_rate": 6.286687482259439e-06, "loss": 5.6497752666473385e-06, "step": 330280 }, { "epoch": 93.75248367868294, "grad_norm": 0.0014531777705997229, "learning_rate": 6.283848992336078e-06, "loss": 4.43682074546814e-06, "step": 330290 }, { "epoch": 93.7553221686063, "grad_norm": 0.005033554043620825, "learning_rate": 6.281010502412717e-06, "loss": 3.2616779208183287e-06, "step": 330300 }, { "epoch": 93.75816065852966, "grad_norm": 0.0002818504290189594, "learning_rate": 6.2781720124893555e-06, "loss": 6.784312427043915e-06, "step": 330310 }, { "epoch": 93.76099914845302, "grad_norm": 0.000689071835950017, "learning_rate": 6.275333522565994e-06, "loss": 1.2401677668094636e-05, "step": 330320 }, { "epoch": 93.76383763837639, "grad_norm": 0.0006114938296377659, "learning_rate": 6.272495032642635e-06, "loss": 5.380995571613312e-06, "step": 330330 }, { "epoch": 93.76667612829975, "grad_norm": 0.003411961253732443, "learning_rate": 6.269656542719274e-06, "loss": 5.4175034165382385e-06, "step": 330340 }, { "epoch": 93.7695146182231, "grad_norm": 0.0001294791727559641, "learning_rate": 6.2668180527959125e-06, "loss": 4.850700497627258e-06, "step": 330350 }, { "epoch": 93.77235310814646, "grad_norm": 0.002675255760550499, "learning_rate": 6.263979562872551e-06, "loss": 9.39108431339264e-06, "step": 330360 }, { "epoch": 93.77519159806982, "grad_norm": 0.0022411213722079992, "learning_rate": 6.261141072949191e-06, "loss": 4.739686846733093e-06, "step": 330370 }, { "epoch": 93.77803008799319, "grad_norm": 0.012453523464500904, "learning_rate": 6.258302583025831e-06, "loss": 3.257524222135544e-05, "step": 330380 }, { "epoch": 93.78086857791655, "grad_norm": 0.0022908910177648067, "learning_rate": 6.2554640931024695e-06, "loss": 4.1799619793891905e-06, "step": 330390 }, { "epoch": 93.78370706783991, "grad_norm": 0.010120967403054237, "learning_rate": 6.252625603179109e-06, "loss": 5.387887358665466e-06, "step": 330400 }, { "epoch": 93.78654555776328, "grad_norm": 0.001983809284865856, "learning_rate": 6.249787113255749e-06, "loss": 3.857165575027466e-06, "step": 330410 }, { "epoch": 93.78938404768662, "grad_norm": 0.0002692469279281795, "learning_rate": 6.246948623332388e-06, "loss": 2.9632821679115297e-06, "step": 330420 }, { "epoch": 93.79222253760999, "grad_norm": 0.0011847869027405977, "learning_rate": 6.244110133409027e-06, "loss": 4.760175943374634e-06, "step": 330430 }, { "epoch": 93.79506102753335, "grad_norm": 0.001970479264855385, "learning_rate": 6.241271643485666e-06, "loss": 5.403906106948852e-06, "step": 330440 }, { "epoch": 93.79789951745671, "grad_norm": 0.001741541433148086, "learning_rate": 6.238433153562305e-06, "loss": 3.196485340595245e-06, "step": 330450 }, { "epoch": 93.80073800738008, "grad_norm": 0.0009742019465193152, "learning_rate": 6.235594663638945e-06, "loss": 6.257183849811554e-06, "step": 330460 }, { "epoch": 93.80357649730344, "grad_norm": 0.0004173124034423381, "learning_rate": 6.232756173715584e-06, "loss": 3.217160701751709e-06, "step": 330470 }, { "epoch": 93.8064149872268, "grad_norm": 0.0004176761722192168, "learning_rate": 6.2299176837922224e-06, "loss": 2.9390677809715273e-06, "step": 330480 }, { "epoch": 93.80925347715015, "grad_norm": 0.001237538643181324, "learning_rate": 6.227079193868862e-06, "loss": 3.1149014830589294e-06, "step": 330490 }, { "epoch": 93.81209196707351, "grad_norm": 0.0009566331282258034, "learning_rate": 6.224240703945501e-06, "loss": 8.856505155563355e-06, "step": 330500 }, { "epoch": 93.81209196707351, "eval_accuracy": 0.9886182997393018, "eval_loss": 0.04953980818390846, "eval_runtime": 60.239, "eval_samples_per_second": 261.077, "eval_steps_per_second": 4.084, "step": 330500 }, { "epoch": 93.81493045699688, "grad_norm": 0.0025808438658714294, "learning_rate": 6.221402214022141e-06, "loss": 6.459839642047882e-06, "step": 330510 }, { "epoch": 93.81776894692024, "grad_norm": 0.0004589349846355617, "learning_rate": 6.2185637240987795e-06, "loss": 2.5460496544837953e-06, "step": 330520 }, { "epoch": 93.8206074368436, "grad_norm": 0.0013512939913198352, "learning_rate": 6.215725234175418e-06, "loss": 4.341080784797668e-06, "step": 330530 }, { "epoch": 93.82344592676696, "grad_norm": 0.0005285319057293236, "learning_rate": 6.212886744252058e-06, "loss": 7.231533527374268e-06, "step": 330540 }, { "epoch": 93.82628441669033, "grad_norm": 0.001402711495757103, "learning_rate": 6.210048254328698e-06, "loss": 3.569386899471283e-06, "step": 330550 }, { "epoch": 93.82912290661368, "grad_norm": 0.0005574376555159688, "learning_rate": 6.2072097644053365e-06, "loss": 3.2745301723480225e-06, "step": 330560 }, { "epoch": 93.83196139653704, "grad_norm": 0.0016882121562957764, "learning_rate": 6.204371274481976e-06, "loss": 2.4901703000068666e-06, "step": 330570 }, { "epoch": 93.8347998864604, "grad_norm": 0.003381802700459957, "learning_rate": 6.201532784558615e-06, "loss": 4.766322672367096e-06, "step": 330580 }, { "epoch": 93.83763837638377, "grad_norm": 0.006513058207929134, "learning_rate": 6.198694294635255e-06, "loss": 4.614703357219696e-06, "step": 330590 }, { "epoch": 93.84047686630713, "grad_norm": 0.0003392123617231846, "learning_rate": 6.1958558047118935e-06, "loss": 3.6362558603286744e-06, "step": 330600 }, { "epoch": 93.84331535623049, "grad_norm": 0.0031717633828520775, "learning_rate": 6.193017314788533e-06, "loss": 3.2244250178337095e-06, "step": 330610 }, { "epoch": 93.84615384615384, "grad_norm": 0.00016254083311650902, "learning_rate": 6.190178824865172e-06, "loss": 4.222244024276734e-06, "step": 330620 }, { "epoch": 93.8489923360772, "grad_norm": 0.0007547555142082274, "learning_rate": 6.187340334941812e-06, "loss": 4.681013524532318e-06, "step": 330630 }, { "epoch": 93.85183082600057, "grad_norm": 0.0006283516995608807, "learning_rate": 6.1845018450184506e-06, "loss": 3.35201621055603e-06, "step": 330640 }, { "epoch": 93.85466931592393, "grad_norm": 0.0005496126250363886, "learning_rate": 6.181663355095089e-06, "loss": 5.537085235118866e-06, "step": 330650 }, { "epoch": 93.85750780584729, "grad_norm": 0.003019561292603612, "learning_rate": 6.178824865171729e-06, "loss": 2.703815698623657e-06, "step": 330660 }, { "epoch": 93.86034629577065, "grad_norm": 0.00017019534425344318, "learning_rate": 6.175986375248368e-06, "loss": 2.991221845149994e-06, "step": 330670 }, { "epoch": 93.86318478569402, "grad_norm": 0.00023558505927212536, "learning_rate": 6.173147885325008e-06, "loss": 4.255957901477814e-06, "step": 330680 }, { "epoch": 93.86602327561737, "grad_norm": 0.0005181024316698313, "learning_rate": 6.170309395401646e-06, "loss": 2.828054130077362e-06, "step": 330690 }, { "epoch": 93.86886176554073, "grad_norm": 0.0011917552910745144, "learning_rate": 6.167470905478285e-06, "loss": 3.0739232897758483e-06, "step": 330700 }, { "epoch": 93.87170025546409, "grad_norm": 0.00043650990119203925, "learning_rate": 6.164632415554925e-06, "loss": 2.6071444153785706e-06, "step": 330710 }, { "epoch": 93.87453874538745, "grad_norm": 0.0003256034979131073, "learning_rate": 6.161793925631564e-06, "loss": 4.418380558490753e-06, "step": 330720 }, { "epoch": 93.87737723531082, "grad_norm": 0.00011371198343113065, "learning_rate": 6.1589554357082034e-06, "loss": 3.789179027080536e-06, "step": 330730 }, { "epoch": 93.88021572523418, "grad_norm": 0.0004957847995683551, "learning_rate": 6.156116945784843e-06, "loss": 3.91099601984024e-06, "step": 330740 }, { "epoch": 93.88305421515754, "grad_norm": 0.0013238084502518177, "learning_rate": 6.153278455861482e-06, "loss": 4.011206328868866e-06, "step": 330750 }, { "epoch": 93.88589270508089, "grad_norm": 0.0005155338440090418, "learning_rate": 6.150439965938122e-06, "loss": 6.353110074996949e-06, "step": 330760 }, { "epoch": 93.88873119500425, "grad_norm": 0.00027732044691219926, "learning_rate": 6.1476014760147605e-06, "loss": 4.5156106352806095e-06, "step": 330770 }, { "epoch": 93.89156968492762, "grad_norm": 0.0018745330162346363, "learning_rate": 6.1447629860914e-06, "loss": 5.413591861724854e-06, "step": 330780 }, { "epoch": 93.89440817485098, "grad_norm": 0.0011368448613211513, "learning_rate": 6.141924496168039e-06, "loss": 5.8088451623916624e-06, "step": 330790 }, { "epoch": 93.89724666477434, "grad_norm": 0.0024460838176310062, "learning_rate": 6.139086006244678e-06, "loss": 4.642084240913391e-06, "step": 330800 }, { "epoch": 93.9000851546977, "grad_norm": 0.0008395717013627291, "learning_rate": 6.1362475163213175e-06, "loss": 4.187971353530884e-06, "step": 330810 }, { "epoch": 93.90292364462105, "grad_norm": 0.0008075431687757373, "learning_rate": 6.133409026397956e-06, "loss": 4.625879228115082e-06, "step": 330820 }, { "epoch": 93.90576213454442, "grad_norm": 0.0006062232423573732, "learning_rate": 6.130570536474596e-06, "loss": 3.318116068840027e-06, "step": 330830 }, { "epoch": 93.90860062446778, "grad_norm": 0.000343048624927178, "learning_rate": 6.127732046551235e-06, "loss": 2.5836750864982603e-06, "step": 330840 }, { "epoch": 93.91143911439114, "grad_norm": 0.0006852671504020691, "learning_rate": 6.124893556627874e-06, "loss": 3.5848468542098997e-06, "step": 330850 }, { "epoch": 93.9142776043145, "grad_norm": 0.004392504692077637, "learning_rate": 6.122055066704513e-06, "loss": 3.6302953958511353e-06, "step": 330860 }, { "epoch": 93.91711609423787, "grad_norm": 0.013428050093352795, "learning_rate": 6.119216576781152e-06, "loss": 7.525831460952759e-06, "step": 330870 }, { "epoch": 93.91995458416123, "grad_norm": 0.0006426869658753276, "learning_rate": 6.116378086857792e-06, "loss": 6.72265887260437e-06, "step": 330880 }, { "epoch": 93.92279307408458, "grad_norm": 0.00011854543845402077, "learning_rate": 6.113539596934431e-06, "loss": 3.0780211091041565e-06, "step": 330890 }, { "epoch": 93.92563156400794, "grad_norm": 0.000318637554300949, "learning_rate": 6.11070110701107e-06, "loss": 4.455074667930603e-06, "step": 330900 }, { "epoch": 93.9284700539313, "grad_norm": 0.0025795679539442062, "learning_rate": 6.10786261708771e-06, "loss": 5.007162690162658e-06, "step": 330910 }, { "epoch": 93.93130854385467, "grad_norm": 0.005667920224368572, "learning_rate": 6.105024127164349e-06, "loss": 2.736225724220276e-06, "step": 330920 }, { "epoch": 93.93414703377803, "grad_norm": 0.0003309439343865961, "learning_rate": 6.102185637240989e-06, "loss": 8.530169725418091e-06, "step": 330930 }, { "epoch": 93.9369855237014, "grad_norm": 0.0004931892035529017, "learning_rate": 6.099347147317627e-06, "loss": 3.4904107451438906e-06, "step": 330940 }, { "epoch": 93.93982401362476, "grad_norm": 0.003426877548918128, "learning_rate": 6.096508657394267e-06, "loss": 2.6922672986984255e-06, "step": 330950 }, { "epoch": 93.9426625035481, "grad_norm": 0.00047304967301897705, "learning_rate": 6.093670167470906e-06, "loss": 3.005377948284149e-06, "step": 330960 }, { "epoch": 93.94550099347147, "grad_norm": 0.0003145227092318237, "learning_rate": 6.090831677547545e-06, "loss": 4.735402762889862e-06, "step": 330970 }, { "epoch": 93.94833948339483, "grad_norm": 0.00024746143026277423, "learning_rate": 6.0879931876241845e-06, "loss": 2.9670074582099916e-06, "step": 330980 }, { "epoch": 93.9511779733182, "grad_norm": 0.0002382935053901747, "learning_rate": 6.085154697700823e-06, "loss": 3.0677765607833862e-06, "step": 330990 }, { "epoch": 93.95401646324156, "grad_norm": 0.001439470099285245, "learning_rate": 6.082316207777463e-06, "loss": 3.8191676139831545e-06, "step": 331000 }, { "epoch": 93.95401646324156, "eval_accuracy": 0.988745469574617, "eval_loss": 0.0490986630320549, "eval_runtime": 52.5236, "eval_samples_per_second": 299.427, "eval_steps_per_second": 4.684, "step": 331000 }, { "epoch": 93.95685495316492, "grad_norm": 0.0002291962009621784, "learning_rate": 6.079477717854102e-06, "loss": 4.304386675357819e-06, "step": 331010 }, { "epoch": 93.95969344308828, "grad_norm": 0.0006628796691074967, "learning_rate": 6.076639227930741e-06, "loss": 3.06740403175354e-06, "step": 331020 }, { "epoch": 93.96253193301163, "grad_norm": 0.00035574010689742863, "learning_rate": 6.07380073800738e-06, "loss": 5.845166742801666e-06, "step": 331030 }, { "epoch": 93.965370422935, "grad_norm": 0.001537300180643797, "learning_rate": 6.070962248084019e-06, "loss": 6.318092346191406e-06, "step": 331040 }, { "epoch": 93.96820891285836, "grad_norm": 0.0016152157913893461, "learning_rate": 6.068123758160659e-06, "loss": 3.016926348209381e-06, "step": 331050 }, { "epoch": 93.97104740278172, "grad_norm": 0.0006022774614393711, "learning_rate": 6.065285268237298e-06, "loss": 4.130043089389801e-06, "step": 331060 }, { "epoch": 93.97388589270508, "grad_norm": 0.0007115964544937015, "learning_rate": 6.062446778313937e-06, "loss": 3.899820148944854e-06, "step": 331070 }, { "epoch": 93.97672438262845, "grad_norm": 0.00039899349212646484, "learning_rate": 6.059608288390577e-06, "loss": 5.564466118812561e-06, "step": 331080 }, { "epoch": 93.9795628725518, "grad_norm": 0.003466981230303645, "learning_rate": 6.056769798467216e-06, "loss": 5.554035305976868e-06, "step": 331090 }, { "epoch": 93.98240136247516, "grad_norm": 0.0008228106889873743, "learning_rate": 6.0539313085438555e-06, "loss": 4.829280078411102e-06, "step": 331100 }, { "epoch": 93.98523985239852, "grad_norm": 0.0018056400585919619, "learning_rate": 6.051092818620494e-06, "loss": 3.6889687180519104e-06, "step": 331110 }, { "epoch": 93.98807834232188, "grad_norm": 0.0004939676146022975, "learning_rate": 6.048254328697133e-06, "loss": 2.5346875190734862e-06, "step": 331120 }, { "epoch": 93.99091683224525, "grad_norm": 0.001037855981849134, "learning_rate": 6.045415838773773e-06, "loss": 2.9832124710083006e-06, "step": 331130 }, { "epoch": 93.99375532216861, "grad_norm": 0.00035914560430683196, "learning_rate": 6.042577348850412e-06, "loss": 3.762543201446533e-06, "step": 331140 }, { "epoch": 93.99659381209197, "grad_norm": 0.0010553941829130054, "learning_rate": 6.039738858927051e-06, "loss": 2.8891488909721373e-06, "step": 331150 }, { "epoch": 93.99943230201532, "grad_norm": 0.0003329115570522845, "learning_rate": 6.03690036900369e-06, "loss": 3.241188824176788e-06, "step": 331160 }, { "epoch": 94.00227079193868, "grad_norm": 0.0032313165720552206, "learning_rate": 6.034061879080329e-06, "loss": 3.299201853224076e-06, "step": 331170 }, { "epoch": 94.00510928186205, "grad_norm": 0.0005821308004669845, "learning_rate": 6.031223389156969e-06, "loss": 2.0308420062065124e-06, "step": 331180 }, { "epoch": 94.00794777178541, "grad_norm": 0.0009159977780655026, "learning_rate": 6.028384899233608e-06, "loss": 2.6507303118705748e-06, "step": 331190 }, { "epoch": 94.01078626170877, "grad_norm": 0.00039594617555849254, "learning_rate": 6.025546409310247e-06, "loss": 3.238581120967865e-06, "step": 331200 }, { "epoch": 94.01362475163214, "grad_norm": 0.0003957665176130831, "learning_rate": 6.022707919386886e-06, "loss": 3.0975788831710815e-06, "step": 331210 }, { "epoch": 94.0164632415555, "grad_norm": 0.003179678227752447, "learning_rate": 6.019869429463526e-06, "loss": 3.463961184024811e-06, "step": 331220 }, { "epoch": 94.01930173147885, "grad_norm": 0.00044023882946930826, "learning_rate": 6.017030939540165e-06, "loss": 2.9569491744041443e-06, "step": 331230 }, { "epoch": 94.02214022140221, "grad_norm": 0.00014884094707667828, "learning_rate": 6.014192449616804e-06, "loss": 3.129243850708008e-06, "step": 331240 }, { "epoch": 94.02497871132557, "grad_norm": 0.0032779513858258724, "learning_rate": 6.011353959693444e-06, "loss": 3.639794886112213e-06, "step": 331250 }, { "epoch": 94.02781720124894, "grad_norm": 0.0003144628135487437, "learning_rate": 6.008515469770083e-06, "loss": 2.094171941280365e-06, "step": 331260 }, { "epoch": 94.0306556911723, "grad_norm": 0.000499121320899576, "learning_rate": 6.0056769798467225e-06, "loss": 3.2203271985054018e-06, "step": 331270 }, { "epoch": 94.03349418109566, "grad_norm": 0.0009485810878686607, "learning_rate": 6.002838489923361e-06, "loss": 2.6542693376541138e-06, "step": 331280 }, { "epoch": 94.03633267101901, "grad_norm": 0.003998367115855217, "learning_rate": 6e-06, "loss": 5.415081977844238e-06, "step": 331290 }, { "epoch": 94.03917116094237, "grad_norm": 0.00036623573396354914, "learning_rate": 5.99716151007664e-06, "loss": 3.219209611415863e-06, "step": 331300 }, { "epoch": 94.04200965086574, "grad_norm": 0.0012324325507506728, "learning_rate": 5.994323020153279e-06, "loss": 3.4151598811149597e-06, "step": 331310 }, { "epoch": 94.0448481407891, "grad_norm": 0.0005542665021494031, "learning_rate": 5.991484530229918e-06, "loss": 3.298558294773102e-06, "step": 331320 }, { "epoch": 94.04768663071246, "grad_norm": 0.0002633575059007853, "learning_rate": 5.988646040306557e-06, "loss": 2.456270158290863e-06, "step": 331330 }, { "epoch": 94.05052512063583, "grad_norm": 0.0014699065359309316, "learning_rate": 5.985807550383196e-06, "loss": 3.7470832467079163e-06, "step": 331340 }, { "epoch": 94.05336361055919, "grad_norm": 0.001396912382915616, "learning_rate": 5.982969060459836e-06, "loss": 3.0547380447387695e-06, "step": 331350 }, { "epoch": 94.05620210048254, "grad_norm": 0.00045882168342359364, "learning_rate": 5.9801305705364745e-06, "loss": 3.515370190143585e-06, "step": 331360 }, { "epoch": 94.0590405904059, "grad_norm": 0.002088765613734722, "learning_rate": 5.977292080613114e-06, "loss": 3.663450479507446e-06, "step": 331370 }, { "epoch": 94.06187908032926, "grad_norm": 0.0014722839696332812, "learning_rate": 5.974453590689753e-06, "loss": 3.604032099246979e-06, "step": 331380 }, { "epoch": 94.06471757025263, "grad_norm": 0.0006483860197477043, "learning_rate": 5.971615100766393e-06, "loss": 2.3854896426200868e-06, "step": 331390 }, { "epoch": 94.06755606017599, "grad_norm": 0.0005463319248519838, "learning_rate": 5.9687766108430316e-06, "loss": 2.527981996536255e-06, "step": 331400 }, { "epoch": 94.07039455009935, "grad_norm": 0.0002446394646540284, "learning_rate": 5.965938120919671e-06, "loss": 1.929327845573425e-06, "step": 331410 }, { "epoch": 94.07323304002271, "grad_norm": 0.00037188263377174735, "learning_rate": 5.963099630996311e-06, "loss": 6.409734487533569e-06, "step": 331420 }, { "epoch": 94.07607152994606, "grad_norm": 0.0012655489845201373, "learning_rate": 5.96026114107295e-06, "loss": 4.022940993309021e-06, "step": 331430 }, { "epoch": 94.07891001986943, "grad_norm": 0.000679386721458286, "learning_rate": 5.957422651149589e-06, "loss": 4.257634282112122e-06, "step": 331440 }, { "epoch": 94.08174850979279, "grad_norm": 0.0025642772670835257, "learning_rate": 5.954584161226228e-06, "loss": 3.3080577850341797e-06, "step": 331450 }, { "epoch": 94.08458699971615, "grad_norm": 0.001270157634280622, "learning_rate": 5.951745671302867e-06, "loss": 2.580881118774414e-06, "step": 331460 }, { "epoch": 94.08742548963951, "grad_norm": 0.00033876969246193767, "learning_rate": 5.948907181379507e-06, "loss": 3.0767172574996947e-06, "step": 331470 }, { "epoch": 94.09026397956288, "grad_norm": 0.00047297851415351033, "learning_rate": 5.946068691456146e-06, "loss": 3.6327168345451354e-06, "step": 331480 }, { "epoch": 94.09310246948624, "grad_norm": 0.001040806295350194, "learning_rate": 5.9432302015327844e-06, "loss": 2.1032989025115968e-06, "step": 331490 }, { "epoch": 94.09594095940959, "grad_norm": 0.0007619446259923279, "learning_rate": 5.940391711609424e-06, "loss": 3.1439587473869325e-06, "step": 331500 }, { "epoch": 94.09594095940959, "eval_accuracy": 0.9891905639982196, "eval_loss": 0.04850461333990097, "eval_runtime": 46.9154, "eval_samples_per_second": 335.22, "eval_steps_per_second": 5.243, "step": 331500 }, { "epoch": 94.09877944933295, "grad_norm": 0.0007193380733951926, "learning_rate": 5.937553221686063e-06, "loss": 2.104043960571289e-06, "step": 331510 }, { "epoch": 94.10161793925631, "grad_norm": 0.0006576990708708763, "learning_rate": 5.934714731762703e-06, "loss": 3.4205615520477297e-06, "step": 331520 }, { "epoch": 94.10445642917968, "grad_norm": 0.0005008496809750795, "learning_rate": 5.9318762418393415e-06, "loss": 2.1025538444519042e-06, "step": 331530 }, { "epoch": 94.10729491910304, "grad_norm": 0.00022364653705153614, "learning_rate": 5.92903775191598e-06, "loss": 3.0333176255226135e-06, "step": 331540 }, { "epoch": 94.1101334090264, "grad_norm": 0.0002688914246391505, "learning_rate": 5.92619926199262e-06, "loss": 2.946518361568451e-06, "step": 331550 }, { "epoch": 94.11297189894975, "grad_norm": 0.00016662236885167658, "learning_rate": 5.923360772069259e-06, "loss": 7.66552984714508e-06, "step": 331560 }, { "epoch": 94.11581038887311, "grad_norm": 0.0005192667595110834, "learning_rate": 5.9205222821458985e-06, "loss": 2.7487054467201234e-06, "step": 331570 }, { "epoch": 94.11864887879648, "grad_norm": 0.0007824936765246093, "learning_rate": 5.917683792222538e-06, "loss": 3.3792108297348023e-06, "step": 331580 }, { "epoch": 94.12148736871984, "grad_norm": 0.000307172245811671, "learning_rate": 5.914845302299178e-06, "loss": 2.2247433662414552e-06, "step": 331590 }, { "epoch": 94.1243258586432, "grad_norm": 0.009306671097874641, "learning_rate": 5.912006812375817e-06, "loss": 3.5017728805541992e-06, "step": 331600 }, { "epoch": 94.12716434856657, "grad_norm": 0.0005354212480597198, "learning_rate": 5.9091683224524555e-06, "loss": 1.3696029782295226e-06, "step": 331610 }, { "epoch": 94.13000283848993, "grad_norm": 0.0005536291864700615, "learning_rate": 5.906329832529095e-06, "loss": 2.523697912693024e-06, "step": 331620 }, { "epoch": 94.13284132841328, "grad_norm": 0.0008376752957701683, "learning_rate": 5.903491342605734e-06, "loss": 3.7372112274169924e-06, "step": 331630 }, { "epoch": 94.13567981833664, "grad_norm": 0.0006578732863999903, "learning_rate": 5.900652852682374e-06, "loss": 3.147311508655548e-06, "step": 331640 }, { "epoch": 94.13851830826, "grad_norm": 0.001664540497586131, "learning_rate": 5.8978143627590126e-06, "loss": 2.1569430828094482e-06, "step": 331650 }, { "epoch": 94.14135679818337, "grad_norm": 0.0008231571409851313, "learning_rate": 5.894975872835651e-06, "loss": 2.1534040570259093e-06, "step": 331660 }, { "epoch": 94.14419528810673, "grad_norm": 0.0001787103246897459, "learning_rate": 5.892137382912291e-06, "loss": 3.3859163522720336e-06, "step": 331670 }, { "epoch": 94.14703377803009, "grad_norm": 0.00013812340330332518, "learning_rate": 5.88929889298893e-06, "loss": 1.5463680028915406e-06, "step": 331680 }, { "epoch": 94.14987226795346, "grad_norm": 0.00044016665196977556, "learning_rate": 5.88646040306557e-06, "loss": 2.4553388357162476e-06, "step": 331690 }, { "epoch": 94.1527107578768, "grad_norm": 0.0004052761069033295, "learning_rate": 5.8836219131422084e-06, "loss": 2.4629756808280944e-06, "step": 331700 }, { "epoch": 94.15554924780017, "grad_norm": 0.0021724370308220387, "learning_rate": 5.880783423218847e-06, "loss": 2.7235597372055055e-06, "step": 331710 }, { "epoch": 94.15838773772353, "grad_norm": 0.0004741231387015432, "learning_rate": 5.877944933295487e-06, "loss": 1.915358006954193e-06, "step": 331720 }, { "epoch": 94.16122622764689, "grad_norm": 0.006136711221188307, "learning_rate": 5.875106443372126e-06, "loss": 3.771856427192688e-06, "step": 331730 }, { "epoch": 94.16406471757026, "grad_norm": 0.00020431348821148276, "learning_rate": 5.8722679534487655e-06, "loss": 2.6293098926544188e-06, "step": 331740 }, { "epoch": 94.16690320749362, "grad_norm": 0.0003449808282312006, "learning_rate": 5.869429463525405e-06, "loss": 3.4613534808158874e-06, "step": 331750 }, { "epoch": 94.16974169741698, "grad_norm": 0.0003046577621717006, "learning_rate": 5.866590973602044e-06, "loss": 2.08783894777298e-06, "step": 331760 }, { "epoch": 94.17258018734033, "grad_norm": 0.0007445731898769736, "learning_rate": 5.863752483678684e-06, "loss": 3.818050026893616e-06, "step": 331770 }, { "epoch": 94.17541867726369, "grad_norm": 0.001466060639359057, "learning_rate": 5.8609139937553225e-06, "loss": 2.958253026008606e-06, "step": 331780 }, { "epoch": 94.17825716718706, "grad_norm": 0.005584118887782097, "learning_rate": 5.858075503831962e-06, "loss": 3.3069401979446413e-06, "step": 331790 }, { "epoch": 94.18109565711042, "grad_norm": 0.00033600302413105965, "learning_rate": 5.855237013908601e-06, "loss": 2.5002285838127135e-06, "step": 331800 }, { "epoch": 94.18393414703378, "grad_norm": 0.0003083429182879627, "learning_rate": 5.85239852398524e-06, "loss": 2.878345549106598e-06, "step": 331810 }, { "epoch": 94.18677263695714, "grad_norm": 0.0012880753492936492, "learning_rate": 5.8495600340618795e-06, "loss": 4.669465124607086e-06, "step": 331820 }, { "epoch": 94.1896111268805, "grad_norm": 0.010992358438670635, "learning_rate": 5.846721544138518e-06, "loss": 1.1107325553894043e-05, "step": 331830 }, { "epoch": 94.19244961680386, "grad_norm": 0.0005719659966416657, "learning_rate": 5.843883054215158e-06, "loss": 1.9650906324386597e-06, "step": 331840 }, { "epoch": 94.19528810672722, "grad_norm": 0.0013093601446598768, "learning_rate": 5.841044564291797e-06, "loss": 2.1247193217277528e-06, "step": 331850 }, { "epoch": 94.19812659665058, "grad_norm": 0.001913486048579216, "learning_rate": 5.838206074368436e-06, "loss": 2.7179718017578126e-06, "step": 331860 }, { "epoch": 94.20096508657394, "grad_norm": 0.0005475276266224682, "learning_rate": 5.835367584445075e-06, "loss": 2.0470470190048218e-06, "step": 331870 }, { "epoch": 94.20380357649731, "grad_norm": 0.00045969695202074945, "learning_rate": 5.832529094521714e-06, "loss": 2.8423964977264404e-06, "step": 331880 }, { "epoch": 94.20664206642067, "grad_norm": 0.00034595062606967986, "learning_rate": 5.829690604598354e-06, "loss": 3.2279640436172485e-06, "step": 331890 }, { "epoch": 94.20948055634402, "grad_norm": 0.0004649996990337968, "learning_rate": 5.826852114674993e-06, "loss": 3.2702460885047913e-06, "step": 331900 }, { "epoch": 94.21231904626738, "grad_norm": 0.002362282481044531, "learning_rate": 5.824013624751632e-06, "loss": 2.6263296604156494e-06, "step": 331910 }, { "epoch": 94.21515753619074, "grad_norm": 0.0009603291400708258, "learning_rate": 5.821175134828272e-06, "loss": 3.6774203181266784e-06, "step": 331920 }, { "epoch": 94.21799602611411, "grad_norm": 0.0010598975932225585, "learning_rate": 5.818336644904911e-06, "loss": 4.183128476142884e-06, "step": 331930 }, { "epoch": 94.22083451603747, "grad_norm": 0.0013386800419539213, "learning_rate": 5.815498154981551e-06, "loss": 2.5581568479537966e-06, "step": 331940 }, { "epoch": 94.22367300596083, "grad_norm": 0.0005370189319364727, "learning_rate": 5.8126596650581894e-06, "loss": 2.995692193508148e-06, "step": 331950 }, { "epoch": 94.2265114958842, "grad_norm": 0.0004873821453657001, "learning_rate": 5.809821175134829e-06, "loss": 2.795085310935974e-06, "step": 331960 }, { "epoch": 94.22934998580754, "grad_norm": 0.00010262576688546687, "learning_rate": 5.806982685211468e-06, "loss": 2.2176653146743773e-06, "step": 331970 }, { "epoch": 94.23218847573091, "grad_norm": 0.0005396406631916761, "learning_rate": 5.804144195288107e-06, "loss": 4.017911851406097e-06, "step": 331980 }, { "epoch": 94.23502696565427, "grad_norm": 0.0010764030739665031, "learning_rate": 5.8013057053647465e-06, "loss": 2.62334942817688e-06, "step": 331990 }, { "epoch": 94.23786545557763, "grad_norm": 0.0026756676379591227, "learning_rate": 5.798467215441385e-06, "loss": 4.641152918338775e-06, "step": 332000 }, { "epoch": 94.23786545557763, "eval_accuracy": 0.9894449036688497, "eval_loss": 0.04766452684998512, "eval_runtime": 82.056, "eval_samples_per_second": 191.662, "eval_steps_per_second": 2.998, "step": 332000 }, { "epoch": 94.240703945501, "grad_norm": 0.0013878901954740286, "learning_rate": 5.795628725518025e-06, "loss": 3.153644502162933e-06, "step": 332010 }, { "epoch": 94.24354243542436, "grad_norm": 0.0006870316574349999, "learning_rate": 5.792790235594664e-06, "loss": 4.460848867893219e-06, "step": 332020 }, { "epoch": 94.24638092534771, "grad_norm": 0.001173606957308948, "learning_rate": 5.789951745671303e-06, "loss": 2.5851652026176454e-06, "step": 332030 }, { "epoch": 94.24921941527107, "grad_norm": 0.0006561822956427932, "learning_rate": 5.787113255747942e-06, "loss": 3.32072377204895e-06, "step": 332040 }, { "epoch": 94.25205790519443, "grad_norm": 0.00027531490195542574, "learning_rate": 5.784274765824581e-06, "loss": 3.850832581520081e-06, "step": 332050 }, { "epoch": 94.2548963951178, "grad_norm": 0.0009950619423761964, "learning_rate": 5.781436275901221e-06, "loss": 2.5486573576927184e-06, "step": 332060 }, { "epoch": 94.25773488504116, "grad_norm": 0.001839949982240796, "learning_rate": 5.77859778597786e-06, "loss": 1.9283965229988097e-06, "step": 332070 }, { "epoch": 94.26057337496452, "grad_norm": 0.0002762165095191449, "learning_rate": 5.775759296054499e-06, "loss": 2.1087005734443663e-06, "step": 332080 }, { "epoch": 94.26341186488789, "grad_norm": 0.0004135897324886173, "learning_rate": 5.772920806131139e-06, "loss": 3.866665065288544e-06, "step": 332090 }, { "epoch": 94.26625035481123, "grad_norm": 0.0010235406225547194, "learning_rate": 5.770082316207778e-06, "loss": 3.3082440495491026e-06, "step": 332100 }, { "epoch": 94.2690888447346, "grad_norm": 0.0018628446850925684, "learning_rate": 5.7672438262844175e-06, "loss": 2.5209039449691774e-06, "step": 332110 }, { "epoch": 94.27192733465796, "grad_norm": 0.0009699576185084879, "learning_rate": 5.764405336361056e-06, "loss": 2.561323344707489e-06, "step": 332120 }, { "epoch": 94.27476582458132, "grad_norm": 0.0011821326334029436, "learning_rate": 5.761566846437695e-06, "loss": 2.93143093585968e-06, "step": 332130 }, { "epoch": 94.27760431450469, "grad_norm": 0.0007139635854400694, "learning_rate": 5.758728356514335e-06, "loss": 3.0677765607833862e-06, "step": 332140 }, { "epoch": 94.28044280442805, "grad_norm": 0.0005155866383574903, "learning_rate": 5.755889866590974e-06, "loss": 3.062933683395386e-06, "step": 332150 }, { "epoch": 94.28328129435141, "grad_norm": 0.0009095654240809381, "learning_rate": 5.753051376667613e-06, "loss": 3.575347363948822e-06, "step": 332160 }, { "epoch": 94.28611978427476, "grad_norm": 0.0007045763777568936, "learning_rate": 5.750212886744252e-06, "loss": 3.2838433980941772e-06, "step": 332170 }, { "epoch": 94.28895827419812, "grad_norm": 0.0016211640322580934, "learning_rate": 5.747374396820891e-06, "loss": 2.5920569896697996e-06, "step": 332180 }, { "epoch": 94.29179676412149, "grad_norm": 0.0001123249385273084, "learning_rate": 5.744535906897531e-06, "loss": 5.151703953742981e-06, "step": 332190 }, { "epoch": 94.29463525404485, "grad_norm": 0.0012793670175597072, "learning_rate": 5.74169741697417e-06, "loss": 2.5976449251174925e-06, "step": 332200 }, { "epoch": 94.29747374396821, "grad_norm": 0.0029652987141162157, "learning_rate": 5.738858927050809e-06, "loss": 3.7280842661857605e-06, "step": 332210 }, { "epoch": 94.30031223389157, "grad_norm": 0.00012396706733852625, "learning_rate": 5.736020437127448e-06, "loss": 2.446770668029785e-06, "step": 332220 }, { "epoch": 94.30315072381494, "grad_norm": 0.00197654333896935, "learning_rate": 5.733181947204087e-06, "loss": 3.6522746086120605e-06, "step": 332230 }, { "epoch": 94.30598921373829, "grad_norm": 0.0007655953522771597, "learning_rate": 5.730343457280727e-06, "loss": 2.6514753699302673e-06, "step": 332240 }, { "epoch": 94.30882770366165, "grad_norm": 0.0006649072747677565, "learning_rate": 5.727504967357366e-06, "loss": 2.7609989047050475e-06, "step": 332250 }, { "epoch": 94.31166619358501, "grad_norm": 0.00039160114829428494, "learning_rate": 5.724666477434006e-06, "loss": 2.8405338525772095e-06, "step": 332260 }, { "epoch": 94.31450468350837, "grad_norm": 0.0005011186003684998, "learning_rate": 5.721827987510645e-06, "loss": 2.166442573070526e-06, "step": 332270 }, { "epoch": 94.31734317343174, "grad_norm": 0.0006334976060315967, "learning_rate": 5.7189894975872845e-06, "loss": 3.6133453249931337e-06, "step": 332280 }, { "epoch": 94.3201816633551, "grad_norm": 0.00021294345788192004, "learning_rate": 5.716151007663923e-06, "loss": 2.90796160697937e-06, "step": 332290 }, { "epoch": 94.32302015327845, "grad_norm": 0.0006080727907828987, "learning_rate": 5.713312517740562e-06, "loss": 3.0135735869407656e-06, "step": 332300 }, { "epoch": 94.32585864320181, "grad_norm": 0.0007535762269981205, "learning_rate": 5.710474027817202e-06, "loss": 2.7749687433242797e-06, "step": 332310 }, { "epoch": 94.32869713312517, "grad_norm": 0.0006470032385550439, "learning_rate": 5.707635537893841e-06, "loss": 4.659220576286316e-06, "step": 332320 }, { "epoch": 94.33153562304854, "grad_norm": 0.00016367892385460436, "learning_rate": 5.70479704797048e-06, "loss": 2.2551044821739198e-06, "step": 332330 }, { "epoch": 94.3343741129719, "grad_norm": 0.0003899283765349537, "learning_rate": 5.701958558047119e-06, "loss": 2.289190888404846e-06, "step": 332340 }, { "epoch": 94.33721260289526, "grad_norm": 0.0004549513687379658, "learning_rate": 5.699120068123758e-06, "loss": 3.6368146538734436e-06, "step": 332350 }, { "epoch": 94.34005109281863, "grad_norm": 0.0007618023664690554, "learning_rate": 5.696281578200398e-06, "loss": 3.830902278423309e-06, "step": 332360 }, { "epoch": 94.34288958274198, "grad_norm": 0.0004707992193289101, "learning_rate": 5.6934430882770365e-06, "loss": 2.658367156982422e-06, "step": 332370 }, { "epoch": 94.34572807266534, "grad_norm": 0.0007628814782947302, "learning_rate": 5.690604598353676e-06, "loss": 3.1406059861183165e-06, "step": 332380 }, { "epoch": 94.3485665625887, "grad_norm": 0.0007206738227978349, "learning_rate": 5.687766108430315e-06, "loss": 5.712546408176422e-06, "step": 332390 }, { "epoch": 94.35140505251206, "grad_norm": 0.001058765104971826, "learning_rate": 5.684927618506954e-06, "loss": 2.5851652026176454e-06, "step": 332400 }, { "epoch": 94.35424354243543, "grad_norm": 0.00013983601820655167, "learning_rate": 5.6820891285835936e-06, "loss": 5.250610411167145e-06, "step": 332410 }, { "epoch": 94.35708203235879, "grad_norm": 0.0017375331372022629, "learning_rate": 5.679250638660233e-06, "loss": 6.243027746677398e-06, "step": 332420 }, { "epoch": 94.35992052228215, "grad_norm": 0.002547661541029811, "learning_rate": 5.676412148736873e-06, "loss": 3.897026181221008e-06, "step": 332430 }, { "epoch": 94.3627590122055, "grad_norm": 0.00040985760278999805, "learning_rate": 5.673573658813512e-06, "loss": 3.3939257264137266e-06, "step": 332440 }, { "epoch": 94.36559750212886, "grad_norm": 0.0008928484749048948, "learning_rate": 5.670735168890151e-06, "loss": 3.758445382118225e-06, "step": 332450 }, { "epoch": 94.36843599205223, "grad_norm": 0.001606899662874639, "learning_rate": 5.66789667896679e-06, "loss": 2.778880298137665e-06, "step": 332460 }, { "epoch": 94.37127448197559, "grad_norm": 0.00039787491550669074, "learning_rate": 5.665058189043429e-06, "loss": 5.548819899559021e-06, "step": 332470 }, { "epoch": 94.37411297189895, "grad_norm": 0.0005988385528326035, "learning_rate": 5.662219699120069e-06, "loss": 1.9434839487075807e-06, "step": 332480 }, { "epoch": 94.37695146182232, "grad_norm": 0.0015910794027149677, "learning_rate": 5.659381209196708e-06, "loss": 3.464706242084503e-06, "step": 332490 }, { "epoch": 94.37978995174566, "grad_norm": 0.00031149174901656806, "learning_rate": 5.6565427192733465e-06, "loss": 3.792718052864075e-06, "step": 332500 }, { "epoch": 94.37978995174566, "eval_accuracy": 0.9892541489158772, "eval_loss": 0.0482749305665493, "eval_runtime": 100.2132, "eval_samples_per_second": 156.935, "eval_steps_per_second": 2.455, "step": 332500 }, { "epoch": 94.38262844166903, "grad_norm": 0.000564372050575912, "learning_rate": 5.653704229349986e-06, "loss": 1.9406899809837343e-06, "step": 332510 }, { "epoch": 94.38546693159239, "grad_norm": 0.0006361289415508509, "learning_rate": 5.650865739426625e-06, "loss": 4.7871842980384825e-06, "step": 332520 }, { "epoch": 94.38830542151575, "grad_norm": 0.0003325608849991113, "learning_rate": 5.648027249503265e-06, "loss": 2.660974860191345e-06, "step": 332530 }, { "epoch": 94.39114391143912, "grad_norm": 0.0007348385406658053, "learning_rate": 5.6451887595799035e-06, "loss": 2.724863588809967e-06, "step": 332540 }, { "epoch": 94.39398240136248, "grad_norm": 0.002137394156306982, "learning_rate": 5.642350269656542e-06, "loss": 2.487190067768097e-06, "step": 332550 }, { "epoch": 94.39682089128584, "grad_norm": 0.0003288687439635396, "learning_rate": 5.639511779733182e-06, "loss": 3.0431896448135375e-06, "step": 332560 }, { "epoch": 94.39965938120919, "grad_norm": 0.0008765699458308518, "learning_rate": 5.636673289809821e-06, "loss": 2.9632821679115297e-06, "step": 332570 }, { "epoch": 94.40249787113255, "grad_norm": 0.0006847015465609729, "learning_rate": 5.6338347998864605e-06, "loss": 3.5842880606651305e-06, "step": 332580 }, { "epoch": 94.40533636105592, "grad_norm": 0.0006938816513866186, "learning_rate": 5.6309963099631e-06, "loss": 1.7365440726280212e-06, "step": 332590 }, { "epoch": 94.40817485097928, "grad_norm": 0.0012063377071172, "learning_rate": 5.62815782003974e-06, "loss": 2.9200688004493714e-06, "step": 332600 }, { "epoch": 94.41101334090264, "grad_norm": 0.001275295508094132, "learning_rate": 5.625319330116379e-06, "loss": 2.3867934942245485e-06, "step": 332610 }, { "epoch": 94.413851830826, "grad_norm": 0.0007934515597298741, "learning_rate": 5.6224808401930175e-06, "loss": 3.444775938987732e-06, "step": 332620 }, { "epoch": 94.41669032074937, "grad_norm": 0.00016073441656772047, "learning_rate": 5.619642350269657e-06, "loss": 2.648867666721344e-06, "step": 332630 }, { "epoch": 94.41952881067272, "grad_norm": 0.0009085386991500854, "learning_rate": 5.616803860346296e-06, "loss": 4.349835216999054e-06, "step": 332640 }, { "epoch": 94.42236730059608, "grad_norm": 0.00038609918556176126, "learning_rate": 5.613965370422936e-06, "loss": 3.5453587770462038e-06, "step": 332650 }, { "epoch": 94.42520579051944, "grad_norm": 0.0018049088539555669, "learning_rate": 5.6111268804995746e-06, "loss": 3.1655654311180115e-06, "step": 332660 }, { "epoch": 94.4280442804428, "grad_norm": 0.0008385113324038684, "learning_rate": 5.608288390576213e-06, "loss": 3.5274773836135863e-06, "step": 332670 }, { "epoch": 94.43088277036617, "grad_norm": 0.00025758903939276934, "learning_rate": 5.605449900652853e-06, "loss": 1.85258686542511e-06, "step": 332680 }, { "epoch": 94.43372126028953, "grad_norm": 0.0005443801055662334, "learning_rate": 5.602611410729492e-06, "loss": 3.1637027859687805e-06, "step": 332690 }, { "epoch": 94.4365597502129, "grad_norm": 0.0006708323489874601, "learning_rate": 5.599772920806132e-06, "loss": 2.9202550649642943e-06, "step": 332700 }, { "epoch": 94.43939824013624, "grad_norm": 0.0001548833679407835, "learning_rate": 5.5969344308827704e-06, "loss": 3.6975368857383726e-06, "step": 332710 }, { "epoch": 94.4422367300596, "grad_norm": 0.00021481316071003675, "learning_rate": 5.594095940959409e-06, "loss": 4.502758383750916e-06, "step": 332720 }, { "epoch": 94.44507521998297, "grad_norm": 0.00046340946573764086, "learning_rate": 5.591257451036049e-06, "loss": 2.826005220413208e-06, "step": 332730 }, { "epoch": 94.44791370990633, "grad_norm": 0.0006544950301758945, "learning_rate": 5.588418961112688e-06, "loss": 2.7371570467948914e-06, "step": 332740 }, { "epoch": 94.4507521998297, "grad_norm": 0.008240997791290283, "learning_rate": 5.5855804711893275e-06, "loss": 5.048699676990509e-06, "step": 332750 }, { "epoch": 94.45359068975306, "grad_norm": 0.0004974895855411887, "learning_rate": 5.582741981265967e-06, "loss": 1.944601535797119e-06, "step": 332760 }, { "epoch": 94.4564291796764, "grad_norm": 0.0005602266173809767, "learning_rate": 5.579903491342606e-06, "loss": 3.2648444175720214e-06, "step": 332770 }, { "epoch": 94.45926766959977, "grad_norm": 0.0021234964951872826, "learning_rate": 5.577065001419246e-06, "loss": 4.798360168933868e-06, "step": 332780 }, { "epoch": 94.46210615952313, "grad_norm": 0.004659817088395357, "learning_rate": 5.5742265114958845e-06, "loss": 3.7334859371185304e-06, "step": 332790 }, { "epoch": 94.4649446494465, "grad_norm": 0.0008043620036914945, "learning_rate": 5.571388021572524e-06, "loss": 2.4817883968353273e-06, "step": 332800 }, { "epoch": 94.46778313936986, "grad_norm": 0.005852298811078072, "learning_rate": 5.568549531649163e-06, "loss": 3.5902485251426697e-06, "step": 332810 }, { "epoch": 94.47062162929322, "grad_norm": 0.0022565589752048254, "learning_rate": 5.565711041725802e-06, "loss": 2.7002766728401185e-06, "step": 332820 }, { "epoch": 94.47346011921658, "grad_norm": 0.00033312838058918715, "learning_rate": 5.5628725518024415e-06, "loss": 4.065781831741333e-06, "step": 332830 }, { "epoch": 94.47629860913993, "grad_norm": 0.0007762472378090024, "learning_rate": 5.56003406187908e-06, "loss": 3.7338584661483763e-06, "step": 332840 }, { "epoch": 94.4791370990633, "grad_norm": 0.0003696275525726378, "learning_rate": 5.55719557195572e-06, "loss": 3.139674663543701e-06, "step": 332850 }, { "epoch": 94.48197558898666, "grad_norm": 0.00044500312651507556, "learning_rate": 5.554357082032359e-06, "loss": 3.206916153430939e-06, "step": 332860 }, { "epoch": 94.48481407891002, "grad_norm": 0.00035688653588294983, "learning_rate": 5.551518592108998e-06, "loss": 1.6329810023307801e-06, "step": 332870 }, { "epoch": 94.48765256883338, "grad_norm": 0.00020332775602582842, "learning_rate": 5.548680102185637e-06, "loss": 4.25390899181366e-06, "step": 332880 }, { "epoch": 94.49049105875675, "grad_norm": 0.0005389424040913582, "learning_rate": 5.545841612262276e-06, "loss": 2.7721747756004333e-06, "step": 332890 }, { "epoch": 94.49332954868011, "grad_norm": 0.0005625677877105772, "learning_rate": 5.543003122338916e-06, "loss": 2.997368574142456e-06, "step": 332900 }, { "epoch": 94.49616803860346, "grad_norm": 0.00047744472976773977, "learning_rate": 5.540164632415555e-06, "loss": 2.117268741130829e-06, "step": 332910 }, { "epoch": 94.49900652852682, "grad_norm": 0.00019264912407379597, "learning_rate": 5.537326142492194e-06, "loss": 4.688091576099396e-06, "step": 332920 }, { "epoch": 94.50184501845018, "grad_norm": 0.0005931042251177132, "learning_rate": 5.534487652568834e-06, "loss": 2.707168459892273e-06, "step": 332930 }, { "epoch": 94.50468350837355, "grad_norm": 0.00035003479570150375, "learning_rate": 5.531649162645473e-06, "loss": 6.382167339324951e-06, "step": 332940 }, { "epoch": 94.50752199829691, "grad_norm": 0.00044277208507992327, "learning_rate": 5.528810672722113e-06, "loss": 2.5441870093345644e-06, "step": 332950 }, { "epoch": 94.51036048822027, "grad_norm": 0.0010361417662352324, "learning_rate": 5.5259721827987514e-06, "loss": 3.296881914138794e-06, "step": 332960 }, { "epoch": 94.51319897814363, "grad_norm": 0.00044524279655888677, "learning_rate": 5.523133692875391e-06, "loss": 3.390014171600342e-06, "step": 332970 }, { "epoch": 94.51603746806698, "grad_norm": 0.0018430643249303102, "learning_rate": 5.52029520295203e-06, "loss": 2.866983413696289e-06, "step": 332980 }, { "epoch": 94.51887595799035, "grad_norm": 9.85258593573235e-05, "learning_rate": 5.517456713028669e-06, "loss": 2.0150095224380492e-06, "step": 332990 }, { "epoch": 94.52171444791371, "grad_norm": 0.000947780383285135, "learning_rate": 5.5146182231053085e-06, "loss": 2.3620203137397764e-06, "step": 333000 }, { "epoch": 94.52171444791371, "eval_accuracy": 0.9889362243275895, "eval_loss": 0.04903610050678253, "eval_runtime": 58.0117, "eval_samples_per_second": 271.101, "eval_steps_per_second": 4.241, "step": 333000 }, { "epoch": 94.52455293783707, "grad_norm": 0.00023801467614248395, "learning_rate": 5.511779733181947e-06, "loss": 2.7719885110855103e-06, "step": 333010 }, { "epoch": 94.52739142776043, "grad_norm": 0.0007880865014158189, "learning_rate": 5.508941243258587e-06, "loss": 2.2564083337783815e-06, "step": 333020 }, { "epoch": 94.5302299176838, "grad_norm": 0.0018914209213107824, "learning_rate": 5.506102753335226e-06, "loss": 5.011074244976044e-06, "step": 333030 }, { "epoch": 94.53306840760715, "grad_norm": 0.0020047707948833704, "learning_rate": 5.503264263411865e-06, "loss": 3.4959986805915834e-06, "step": 333040 }, { "epoch": 94.53590689753051, "grad_norm": 0.0006552347913384438, "learning_rate": 5.500425773488504e-06, "loss": 2.6695430278778077e-06, "step": 333050 }, { "epoch": 94.53874538745387, "grad_norm": 0.0005317038740031421, "learning_rate": 5.497587283565143e-06, "loss": 1.8090009689331056e-06, "step": 333060 }, { "epoch": 94.54158387737724, "grad_norm": 0.004660284612327814, "learning_rate": 5.494748793641783e-06, "loss": 5.217269062995911e-06, "step": 333070 }, { "epoch": 94.5444223673006, "grad_norm": 0.0014667266514152288, "learning_rate": 5.491910303718422e-06, "loss": 3.2747164368629454e-06, "step": 333080 }, { "epoch": 94.54726085722396, "grad_norm": 0.0003555942384991795, "learning_rate": 5.489071813795061e-06, "loss": 2.512149512767792e-06, "step": 333090 }, { "epoch": 94.55009934714732, "grad_norm": 0.0008356199250556529, "learning_rate": 5.486233323871701e-06, "loss": 1.9123777747154237e-06, "step": 333100 }, { "epoch": 94.55293783707067, "grad_norm": 6.8800181907136e-05, "learning_rate": 5.48339483394834e-06, "loss": 4.383176565170288e-06, "step": 333110 }, { "epoch": 94.55577632699404, "grad_norm": 0.0003637125773821026, "learning_rate": 5.4805563440249796e-06, "loss": 3.42782586812973e-06, "step": 333120 }, { "epoch": 94.5586148169174, "grad_norm": 0.0003249000001233071, "learning_rate": 5.477717854101618e-06, "loss": 2.932734787464142e-06, "step": 333130 }, { "epoch": 94.56145330684076, "grad_norm": 0.0003769622417166829, "learning_rate": 5.474879364178257e-06, "loss": 2.7313828468322756e-06, "step": 333140 }, { "epoch": 94.56429179676412, "grad_norm": 0.0001994647755054757, "learning_rate": 5.472040874254897e-06, "loss": 1.7637386918067932e-06, "step": 333150 }, { "epoch": 94.56713028668749, "grad_norm": 0.0004423348873388022, "learning_rate": 5.469202384331536e-06, "loss": 2.32011079788208e-06, "step": 333160 }, { "epoch": 94.56996877661085, "grad_norm": 0.0002230344107374549, "learning_rate": 5.466363894408175e-06, "loss": 2.411007881164551e-06, "step": 333170 }, { "epoch": 94.5728072665342, "grad_norm": 0.002146851969882846, "learning_rate": 5.463525404484814e-06, "loss": 3.0124559998512267e-06, "step": 333180 }, { "epoch": 94.57564575645756, "grad_norm": 0.0004943582462146878, "learning_rate": 5.460686914561453e-06, "loss": 1.7739832401275634e-06, "step": 333190 }, { "epoch": 94.57848424638092, "grad_norm": 0.00028767238836735487, "learning_rate": 5.457848424638093e-06, "loss": 3.9614737033844e-06, "step": 333200 }, { "epoch": 94.58132273630429, "grad_norm": 0.0008484016871079803, "learning_rate": 5.455009934714732e-06, "loss": 5.303695797920227e-06, "step": 333210 }, { "epoch": 94.58416122622765, "grad_norm": 0.0011126287281513214, "learning_rate": 5.452171444791371e-06, "loss": 2.701953053474426e-06, "step": 333220 }, { "epoch": 94.58699971615101, "grad_norm": 0.001055376254953444, "learning_rate": 5.44933295486801e-06, "loss": 4.1352584958076475e-06, "step": 333230 }, { "epoch": 94.58983820607436, "grad_norm": 9.241051157005131e-05, "learning_rate": 5.44649446494465e-06, "loss": 2.8988346457481383e-06, "step": 333240 }, { "epoch": 94.59267669599772, "grad_norm": 0.0001821670593926683, "learning_rate": 5.443655975021289e-06, "loss": 2.8043985366821287e-06, "step": 333250 }, { "epoch": 94.59551518592109, "grad_norm": 0.0009950943058356643, "learning_rate": 5.440817485097928e-06, "loss": 3.341957926750183e-06, "step": 333260 }, { "epoch": 94.59835367584445, "grad_norm": 0.0004632948839571327, "learning_rate": 5.437978995174568e-06, "loss": 3.5634264349937437e-06, "step": 333270 }, { "epoch": 94.60119216576781, "grad_norm": 0.0017006617272272706, "learning_rate": 5.435140505251207e-06, "loss": 4.54336404800415e-06, "step": 333280 }, { "epoch": 94.60403065569118, "grad_norm": 0.00138337560929358, "learning_rate": 5.4323020153278465e-06, "loss": 2.08485871553421e-06, "step": 333290 }, { "epoch": 94.60686914561454, "grad_norm": 0.0009921473683789372, "learning_rate": 5.429463525404485e-06, "loss": 3.914348781108856e-06, "step": 333300 }, { "epoch": 94.60970763553789, "grad_norm": 0.000480753107694909, "learning_rate": 5.426625035481124e-06, "loss": 2.8992071747779846e-06, "step": 333310 }, { "epoch": 94.61254612546125, "grad_norm": 0.00038672529626637697, "learning_rate": 5.423786545557764e-06, "loss": 3.231316804885864e-06, "step": 333320 }, { "epoch": 94.61538461538461, "grad_norm": 0.0011505689471960068, "learning_rate": 5.420948055634403e-06, "loss": 2.302415668964386e-06, "step": 333330 }, { "epoch": 94.61822310530798, "grad_norm": 0.00018565684149507433, "learning_rate": 5.418109565711042e-06, "loss": 3.4227967262268065e-06, "step": 333340 }, { "epoch": 94.62106159523134, "grad_norm": 0.0014275501016527414, "learning_rate": 5.415271075787681e-06, "loss": 2.2334977984428404e-06, "step": 333350 }, { "epoch": 94.6239000851547, "grad_norm": 0.0017346582608297467, "learning_rate": 5.41243258586432e-06, "loss": 2.5156885385513304e-06, "step": 333360 }, { "epoch": 94.62673857507806, "grad_norm": 0.0006218449561856687, "learning_rate": 5.40959409594096e-06, "loss": 2.3446977138519286e-06, "step": 333370 }, { "epoch": 94.62957706500141, "grad_norm": 0.0008740407065488398, "learning_rate": 5.4067556060175985e-06, "loss": 2.3826956748962403e-06, "step": 333380 }, { "epoch": 94.63241555492478, "grad_norm": 0.0004129495064262301, "learning_rate": 5.403917116094238e-06, "loss": 4.014000296592712e-06, "step": 333390 }, { "epoch": 94.63525404484814, "grad_norm": 0.0003646464610937983, "learning_rate": 5.401078626170877e-06, "loss": 3.0975788831710815e-06, "step": 333400 }, { "epoch": 94.6380925347715, "grad_norm": 0.0005692982813343406, "learning_rate": 5.398240136247516e-06, "loss": 2.855062484741211e-06, "step": 333410 }, { "epoch": 94.64093102469486, "grad_norm": 0.0006695199990645051, "learning_rate": 5.3954016463241556e-06, "loss": 2.6598572731018066e-06, "step": 333420 }, { "epoch": 94.64376951461823, "grad_norm": 0.0007041069911792874, "learning_rate": 5.392563156400795e-06, "loss": 2.417340874671936e-06, "step": 333430 }, { "epoch": 94.64660800454159, "grad_norm": 0.00106321822386235, "learning_rate": 5.389724666477435e-06, "loss": 3.6662444472312926e-06, "step": 333440 }, { "epoch": 94.64944649446494, "grad_norm": 0.00016051165584940463, "learning_rate": 5.386886176554074e-06, "loss": 2.3635104298591615e-06, "step": 333450 }, { "epoch": 94.6522849843883, "grad_norm": 0.0004988168948329985, "learning_rate": 5.384047686630713e-06, "loss": 2.5626271963119506e-06, "step": 333460 }, { "epoch": 94.65512347431167, "grad_norm": 0.00041277147829532623, "learning_rate": 5.381209196707352e-06, "loss": 1.999177038669586e-06, "step": 333470 }, { "epoch": 94.65796196423503, "grad_norm": 0.0006476428243331611, "learning_rate": 5.378370706783991e-06, "loss": 4.703924059867859e-06, "step": 333480 }, { "epoch": 94.66080045415839, "grad_norm": 0.0007315647671930492, "learning_rate": 5.375532216860631e-06, "loss": 2.7904286980628966e-06, "step": 333490 }, { "epoch": 94.66363894408175, "grad_norm": 0.0005975035019218922, "learning_rate": 5.37269372693727e-06, "loss": 1.8171966075897217e-06, "step": 333500 }, { "epoch": 94.66363894408175, "eval_accuracy": 0.9891269790805621, "eval_loss": 0.04755445569753647, "eval_runtime": 63.1718, "eval_samples_per_second": 248.956, "eval_steps_per_second": 3.894, "step": 333500 }, { "epoch": 94.6664774340051, "grad_norm": 0.00042937524267472327, "learning_rate": 5.3698552370139085e-06, "loss": 2.0736828446388244e-06, "step": 333510 }, { "epoch": 94.66931592392847, "grad_norm": 0.0004761710006278008, "learning_rate": 5.367016747090548e-06, "loss": 4.246830940246582e-06, "step": 333520 }, { "epoch": 94.67215441385183, "grad_norm": 0.0012021161383017898, "learning_rate": 5.364178257167187e-06, "loss": 2.936273813247681e-06, "step": 333530 }, { "epoch": 94.67499290377519, "grad_norm": 0.0011541059939190745, "learning_rate": 5.361339767243827e-06, "loss": 4.408322274684906e-06, "step": 333540 }, { "epoch": 94.67783139369855, "grad_norm": 0.0003363839059602469, "learning_rate": 5.3585012773204655e-06, "loss": 3.2041221857070924e-06, "step": 333550 }, { "epoch": 94.68066988362192, "grad_norm": 0.0010818422306329012, "learning_rate": 5.355662787397105e-06, "loss": 2.923235297203064e-06, "step": 333560 }, { "epoch": 94.68350837354528, "grad_norm": 0.004177269525825977, "learning_rate": 5.352824297473744e-06, "loss": 2.276897430419922e-06, "step": 333570 }, { "epoch": 94.68634686346863, "grad_norm": 0.0008883182890713215, "learning_rate": 5.349985807550383e-06, "loss": 2.911128103733063e-06, "step": 333580 }, { "epoch": 94.68918535339199, "grad_norm": 0.0007422122871503234, "learning_rate": 5.3471473176270225e-06, "loss": 3.1538307666778565e-06, "step": 333590 }, { "epoch": 94.69202384331535, "grad_norm": 0.0003850949869956821, "learning_rate": 5.344308827703662e-06, "loss": 2.261810004711151e-06, "step": 333600 }, { "epoch": 94.69486233323872, "grad_norm": 0.0009675122564658523, "learning_rate": 5.341470337780301e-06, "loss": 3.3082440495491026e-06, "step": 333610 }, { "epoch": 94.69770082316208, "grad_norm": 0.00040932372212409973, "learning_rate": 5.338631847856941e-06, "loss": 1.9958242774009705e-06, "step": 333620 }, { "epoch": 94.70053931308544, "grad_norm": 0.0014135012170299888, "learning_rate": 5.3357933579335795e-06, "loss": 2.4450942873954774e-06, "step": 333630 }, { "epoch": 94.7033778030088, "grad_norm": 0.0009526950889267027, "learning_rate": 5.332954868010219e-06, "loss": 3.4086406230926514e-06, "step": 333640 }, { "epoch": 94.70621629293215, "grad_norm": 0.0004818133602384478, "learning_rate": 5.330116378086858e-06, "loss": 2.9629096388816834e-06, "step": 333650 }, { "epoch": 94.70905478285552, "grad_norm": 0.004227477591484785, "learning_rate": 5.327277888163498e-06, "loss": 2.8209760785102844e-06, "step": 333660 }, { "epoch": 94.71189327277888, "grad_norm": 0.00024097401183098555, "learning_rate": 5.324439398240137e-06, "loss": 1.3960525393486023e-06, "step": 333670 }, { "epoch": 94.71473176270224, "grad_norm": 0.00048422024701721966, "learning_rate": 5.321600908316775e-06, "loss": 2.7762725949287415e-06, "step": 333680 }, { "epoch": 94.7175702526256, "grad_norm": 0.0007253312505781651, "learning_rate": 5.318762418393415e-06, "loss": 3.267638385295868e-06, "step": 333690 }, { "epoch": 94.72040874254897, "grad_norm": 0.00021569924138020724, "learning_rate": 5.315923928470054e-06, "loss": 2.2847205400466917e-06, "step": 333700 }, { "epoch": 94.72324723247232, "grad_norm": 0.0004813247942365706, "learning_rate": 5.313085438546694e-06, "loss": 2.5367364287376406e-06, "step": 333710 }, { "epoch": 94.72608572239568, "grad_norm": 0.0007123823743313551, "learning_rate": 5.3102469486233324e-06, "loss": 2.33575701713562e-06, "step": 333720 }, { "epoch": 94.72892421231904, "grad_norm": 0.0006628584233112633, "learning_rate": 5.307408458699971e-06, "loss": 2.382323145866394e-06, "step": 333730 }, { "epoch": 94.7317627022424, "grad_norm": 0.0001277358824154362, "learning_rate": 5.304569968776611e-06, "loss": 2.3189932107925415e-06, "step": 333740 }, { "epoch": 94.73460119216577, "grad_norm": 0.0015417308313772082, "learning_rate": 5.30173147885325e-06, "loss": 2.322345972061157e-06, "step": 333750 }, { "epoch": 94.73743968208913, "grad_norm": 0.0008311820565722883, "learning_rate": 5.2988929889298895e-06, "loss": 2.9759481549263e-06, "step": 333760 }, { "epoch": 94.7402781720125, "grad_norm": 0.0005570195498876274, "learning_rate": 5.296054499006529e-06, "loss": 2.775155007839203e-06, "step": 333770 }, { "epoch": 94.74311666193584, "grad_norm": 0.0001725051406538114, "learning_rate": 5.293216009083168e-06, "loss": 5.1461160182952884e-06, "step": 333780 }, { "epoch": 94.7459551518592, "grad_norm": 0.0005556164542213082, "learning_rate": 5.290377519159808e-06, "loss": 3.968365490436554e-06, "step": 333790 }, { "epoch": 94.74879364178257, "grad_norm": 0.002043973421677947, "learning_rate": 5.2875390292364465e-06, "loss": 3.1597912311553957e-06, "step": 333800 }, { "epoch": 94.75163213170593, "grad_norm": 0.00012038113345624879, "learning_rate": 5.284700539313086e-06, "loss": 2.8546899557113646e-06, "step": 333810 }, { "epoch": 94.7544706216293, "grad_norm": 0.014215744100511074, "learning_rate": 5.281862049389725e-06, "loss": 5.515106022357941e-06, "step": 333820 }, { "epoch": 94.75730911155266, "grad_norm": 0.002044080290943384, "learning_rate": 5.279023559466364e-06, "loss": 3.2942742109298704e-06, "step": 333830 }, { "epoch": 94.76014760147602, "grad_norm": 0.0006022977759130299, "learning_rate": 5.2761850695430035e-06, "loss": 3.1072646379470826e-06, "step": 333840 }, { "epoch": 94.76298609139937, "grad_norm": 0.000409813248552382, "learning_rate": 5.273346579619642e-06, "loss": 1.6862526535987853e-06, "step": 333850 }, { "epoch": 94.76582458132273, "grad_norm": 0.0016540242359042168, "learning_rate": 5.270508089696282e-06, "loss": 3.2803043723106383e-06, "step": 333860 }, { "epoch": 94.7686630712461, "grad_norm": 0.0005402958486229181, "learning_rate": 5.267669599772921e-06, "loss": 2.096593379974365e-06, "step": 333870 }, { "epoch": 94.77150156116946, "grad_norm": 0.000325165456160903, "learning_rate": 5.2648311098495606e-06, "loss": 2.9865652322769167e-06, "step": 333880 }, { "epoch": 94.77434005109282, "grad_norm": 7.83046634751372e-05, "learning_rate": 5.261992619926199e-06, "loss": 3.1232833862304687e-06, "step": 333890 }, { "epoch": 94.77717854101618, "grad_norm": 0.0001239622215507552, "learning_rate": 5.259154130002838e-06, "loss": 3.334134817123413e-06, "step": 333900 }, { "epoch": 94.78001703093955, "grad_norm": 0.0026934565976262093, "learning_rate": 5.256315640079478e-06, "loss": 2.86884605884552e-06, "step": 333910 }, { "epoch": 94.7828555208629, "grad_norm": 0.00494846748188138, "learning_rate": 5.253477150156117e-06, "loss": 3.837421536445617e-06, "step": 333920 }, { "epoch": 94.78569401078626, "grad_norm": 0.00038854856393299997, "learning_rate": 5.250638660232756e-06, "loss": 2.730824053287506e-06, "step": 333930 }, { "epoch": 94.78853250070962, "grad_norm": 0.00028078214381821454, "learning_rate": 5.247800170309396e-06, "loss": 2.7045607566833496e-06, "step": 333940 }, { "epoch": 94.79137099063298, "grad_norm": 0.0011705372016876936, "learning_rate": 5.244961680386035e-06, "loss": 1.9095838069915772e-06, "step": 333950 }, { "epoch": 94.79420948055635, "grad_norm": 0.000576846010517329, "learning_rate": 5.242123190462675e-06, "loss": 1.8263235688209534e-06, "step": 333960 }, { "epoch": 94.79704797047971, "grad_norm": 0.0008162839221768081, "learning_rate": 5.2392847005393134e-06, "loss": 2.890266478061676e-06, "step": 333970 }, { "epoch": 94.79988646040306, "grad_norm": 0.00015172816347330809, "learning_rate": 5.236446210615953e-06, "loss": 3.0659139156341553e-06, "step": 333980 }, { "epoch": 94.80272495032642, "grad_norm": 0.0013266726164147258, "learning_rate": 5.233607720692592e-06, "loss": 1.8959864974021911e-06, "step": 333990 }, { "epoch": 94.80556344024978, "grad_norm": 0.000850232201628387, "learning_rate": 5.230769230769231e-06, "loss": 2.4765729904174803e-06, "step": 334000 }, { "epoch": 94.80556344024978, "eval_accuracy": 0.9890633941629046, "eval_loss": 0.04739978536963463, "eval_runtime": 49.0917, "eval_samples_per_second": 320.359, "eval_steps_per_second": 5.011, "step": 334000 }, { "epoch": 94.80840193017315, "grad_norm": 0.0008545201853848994, "learning_rate": 5.2279307408458705e-06, "loss": 3.7139281630516054e-06, "step": 334010 }, { "epoch": 94.81124042009651, "grad_norm": 0.0006179798510856926, "learning_rate": 5.225092250922509e-06, "loss": 4.2280182242393495e-06, "step": 334020 }, { "epoch": 94.81407891001987, "grad_norm": 0.00031165964901447296, "learning_rate": 5.222253760999149e-06, "loss": 3.373436629772186e-06, "step": 334030 }, { "epoch": 94.81691739994324, "grad_norm": 0.0010198205709457397, "learning_rate": 5.219415271075788e-06, "loss": 3.658793866634369e-06, "step": 334040 }, { "epoch": 94.81975588986658, "grad_norm": 0.0008241732139140368, "learning_rate": 5.216576781152427e-06, "loss": 2.619810402393341e-06, "step": 334050 }, { "epoch": 94.82259437978995, "grad_norm": 0.003074492560699582, "learning_rate": 5.213738291229066e-06, "loss": 3.3680349588394166e-06, "step": 334060 }, { "epoch": 94.82543286971331, "grad_norm": 0.0018455147510394454, "learning_rate": 5.210899801305705e-06, "loss": 2.1751970052719115e-06, "step": 334070 }, { "epoch": 94.82827135963667, "grad_norm": 0.00018560627358965576, "learning_rate": 5.208061311382345e-06, "loss": 2.3679807782173156e-06, "step": 334080 }, { "epoch": 94.83110984956004, "grad_norm": 0.0005642867763526738, "learning_rate": 5.205222821458984e-06, "loss": 2.0975247025489805e-06, "step": 334090 }, { "epoch": 94.8339483394834, "grad_norm": 0.0017286546062678099, "learning_rate": 5.202384331535623e-06, "loss": 2.1811574697494507e-06, "step": 334100 }, { "epoch": 94.83678682940676, "grad_norm": 0.0005977409309707582, "learning_rate": 5.199545841612263e-06, "loss": 2.1744519472122194e-06, "step": 334110 }, { "epoch": 94.83962531933011, "grad_norm": 0.0004004229558631778, "learning_rate": 5.196707351688902e-06, "loss": 2.771057188510895e-06, "step": 334120 }, { "epoch": 94.84246380925347, "grad_norm": 0.0002117009280482307, "learning_rate": 5.1938688617655416e-06, "loss": 4.382431507110596e-06, "step": 334130 }, { "epoch": 94.84530229917684, "grad_norm": 0.0004891863791272044, "learning_rate": 5.19103037184218e-06, "loss": 2.806633710861206e-06, "step": 334140 }, { "epoch": 94.8481407891002, "grad_norm": 0.0030396038200706244, "learning_rate": 5.188191881918819e-06, "loss": 2.789497375488281e-06, "step": 334150 }, { "epoch": 94.85097927902356, "grad_norm": 0.00038748589577153325, "learning_rate": 5.185353391995459e-06, "loss": 1.9181519746780395e-06, "step": 334160 }, { "epoch": 94.85381776894693, "grad_norm": 0.001162524102255702, "learning_rate": 5.182514902072098e-06, "loss": 3.4337863326072693e-06, "step": 334170 }, { "epoch": 94.85665625887029, "grad_norm": 0.00036549396463669837, "learning_rate": 5.179676412148737e-06, "loss": 3.1206756830215456e-06, "step": 334180 }, { "epoch": 94.85949474879364, "grad_norm": 0.00034272336051799357, "learning_rate": 5.176837922225376e-06, "loss": 1.5826895833015443e-06, "step": 334190 }, { "epoch": 94.862333238717, "grad_norm": 0.0007348359213210642, "learning_rate": 5.173999432302016e-06, "loss": 2.5050714612007143e-06, "step": 334200 }, { "epoch": 94.86517172864036, "grad_norm": 0.0006873270613141358, "learning_rate": 5.171160942378655e-06, "loss": 2.9109418392181396e-06, "step": 334210 }, { "epoch": 94.86801021856373, "grad_norm": 0.0007481980719603598, "learning_rate": 5.168322452455294e-06, "loss": 3.4030526876449585e-06, "step": 334220 }, { "epoch": 94.87084870848709, "grad_norm": 0.00018363930576015264, "learning_rate": 5.165483962531933e-06, "loss": 2.5436282157897948e-06, "step": 334230 }, { "epoch": 94.87368719841045, "grad_norm": 0.000262539746472612, "learning_rate": 5.162645472608572e-06, "loss": 2.6052817702293397e-06, "step": 334240 }, { "epoch": 94.8765256883338, "grad_norm": 0.00029906502459198236, "learning_rate": 5.159806982685212e-06, "loss": 1.3651326298713684e-06, "step": 334250 }, { "epoch": 94.87936417825716, "grad_norm": 0.000613408803474158, "learning_rate": 5.156968492761851e-06, "loss": 2.6475638151168825e-06, "step": 334260 }, { "epoch": 94.88220266818053, "grad_norm": 0.0015517097199335694, "learning_rate": 5.15413000283849e-06, "loss": 2.979673445224762e-06, "step": 334270 }, { "epoch": 94.88504115810389, "grad_norm": 0.0007457205792888999, "learning_rate": 5.151291512915129e-06, "loss": 2.796947956085205e-06, "step": 334280 }, { "epoch": 94.88787964802725, "grad_norm": 0.0011502784909680486, "learning_rate": 5.148453022991769e-06, "loss": 2.864934504032135e-06, "step": 334290 }, { "epoch": 94.89071813795061, "grad_norm": 0.00024404969008173794, "learning_rate": 5.1456145330684085e-06, "loss": 2.239830791950226e-06, "step": 334300 }, { "epoch": 94.89355662787398, "grad_norm": 0.00015594641445204616, "learning_rate": 5.142776043145047e-06, "loss": 1.991167664527893e-06, "step": 334310 }, { "epoch": 94.89639511779733, "grad_norm": 0.0003517707227729261, "learning_rate": 5.139937553221686e-06, "loss": 2.7596950531005858e-06, "step": 334320 }, { "epoch": 94.89923360772069, "grad_norm": 0.0016183851985260844, "learning_rate": 5.137099063298326e-06, "loss": 1.5383586287498474e-06, "step": 334330 }, { "epoch": 94.90207209764405, "grad_norm": 0.0005139827844686806, "learning_rate": 5.134260573374965e-06, "loss": 1.9185245037078857e-06, "step": 334340 }, { "epoch": 94.90491058756741, "grad_norm": 0.0004936896148137748, "learning_rate": 5.131422083451604e-06, "loss": 2.9532238841056824e-06, "step": 334350 }, { "epoch": 94.90774907749078, "grad_norm": 0.00026859427453018725, "learning_rate": 5.128583593528243e-06, "loss": 2.0228326320648193e-06, "step": 334360 }, { "epoch": 94.91058756741414, "grad_norm": 0.0008003762341104448, "learning_rate": 5.125745103604882e-06, "loss": 2.204999327659607e-06, "step": 334370 }, { "epoch": 94.9134260573375, "grad_norm": 0.000273908517556265, "learning_rate": 5.122906613681522e-06, "loss": 1.580454409122467e-06, "step": 334380 }, { "epoch": 94.91626454726085, "grad_norm": 0.0006812116480432451, "learning_rate": 5.1200681237581606e-06, "loss": 2.2644177079200745e-06, "step": 334390 }, { "epoch": 94.91910303718421, "grad_norm": 0.0002514721709303558, "learning_rate": 5.1172296338348e-06, "loss": 2.310052514076233e-06, "step": 334400 }, { "epoch": 94.92194152710758, "grad_norm": 0.0010603985283523798, "learning_rate": 5.114391143911439e-06, "loss": 1.8566846847534179e-06, "step": 334410 }, { "epoch": 94.92478001703094, "grad_norm": 0.0008682560874149203, "learning_rate": 5.111552653988078e-06, "loss": 2.317503094673157e-06, "step": 334420 }, { "epoch": 94.9276185069543, "grad_norm": 0.0001862388162408024, "learning_rate": 5.108714164064718e-06, "loss": 2.5484710931777955e-06, "step": 334430 }, { "epoch": 94.93045699687767, "grad_norm": 0.007132719270884991, "learning_rate": 5.105875674141357e-06, "loss": 2.9167160391807558e-06, "step": 334440 }, { "epoch": 94.93329548680101, "grad_norm": 0.00041720998706296086, "learning_rate": 5.103037184217996e-06, "loss": 4.119984805583954e-06, "step": 334450 }, { "epoch": 94.93613397672438, "grad_norm": 0.0018437599064782262, "learning_rate": 5.100198694294636e-06, "loss": 2.660974860191345e-06, "step": 334460 }, { "epoch": 94.93897246664774, "grad_norm": 0.00029464310500770807, "learning_rate": 5.097360204371275e-06, "loss": 1.9239261746406557e-06, "step": 334470 }, { "epoch": 94.9418109565711, "grad_norm": 0.000736942165531218, "learning_rate": 5.094521714447914e-06, "loss": 1.969747245311737e-06, "step": 334480 }, { "epoch": 94.94464944649447, "grad_norm": 0.0007911415887065232, "learning_rate": 5.091683224524553e-06, "loss": 2.4812296032905577e-06, "step": 334490 }, { "epoch": 94.94748793641783, "grad_norm": 0.0005904965801164508, "learning_rate": 5.088844734601193e-06, "loss": 4.1997060179710385e-06, "step": 334500 }, { "epoch": 94.94748793641783, "eval_accuracy": 0.9890633941629046, "eval_loss": 0.04763644188642502, "eval_runtime": 42.9943, "eval_samples_per_second": 365.793, "eval_steps_per_second": 5.722, "step": 334500 }, { "epoch": 94.95032642634119, "grad_norm": 0.00013447980745695531, "learning_rate": 5.086290093670168e-06, "loss": 1.8000602722167968e-06, "step": 334510 }, { "epoch": 94.95316491626454, "grad_norm": 0.0018985834904015064, "learning_rate": 5.0834516037468065e-06, "loss": 2.1867454051971436e-06, "step": 334520 }, { "epoch": 94.9560034061879, "grad_norm": 0.00036009354516863823, "learning_rate": 5.080613113823446e-06, "loss": 4.5584514737129215e-06, "step": 334530 }, { "epoch": 94.95884189611127, "grad_norm": 0.00020231342932675034, "learning_rate": 5.077774623900086e-06, "loss": 3.254227340221405e-06, "step": 334540 }, { "epoch": 94.96168038603463, "grad_norm": 0.0005013840273022652, "learning_rate": 5.074936133976725e-06, "loss": 2.376176416873932e-06, "step": 334550 }, { "epoch": 94.96451887595799, "grad_norm": 0.001907423371449113, "learning_rate": 5.072097644053364e-06, "loss": 4.339031875133514e-06, "step": 334560 }, { "epoch": 94.96735736588136, "grad_norm": 0.0036083811428397894, "learning_rate": 5.069259154130003e-06, "loss": 2.9690563678741455e-06, "step": 334570 }, { "epoch": 94.97019585580472, "grad_norm": 0.0014915443025529385, "learning_rate": 5.066420664206642e-06, "loss": 2.5685876607894897e-06, "step": 334580 }, { "epoch": 94.97303434572807, "grad_norm": 0.0011179561261087656, "learning_rate": 5.063582174283282e-06, "loss": 3.1266361474990843e-06, "step": 334590 }, { "epoch": 94.97587283565143, "grad_norm": 0.00035775627475231886, "learning_rate": 5.0607436843599206e-06, "loss": 3.974325954914093e-06, "step": 334600 }, { "epoch": 94.97871132557479, "grad_norm": 0.0004125434788875282, "learning_rate": 5.05790519443656e-06, "loss": 2.7632340788841248e-06, "step": 334610 }, { "epoch": 94.98154981549816, "grad_norm": 0.0008819615468382835, "learning_rate": 5.055066704513199e-06, "loss": 2.8248876333236693e-06, "step": 334620 }, { "epoch": 94.98438830542152, "grad_norm": 0.001396785955876112, "learning_rate": 5.052228214589839e-06, "loss": 4.220008850097656e-06, "step": 334630 }, { "epoch": 94.98722679534488, "grad_norm": 0.00021897765691392124, "learning_rate": 5.049389724666478e-06, "loss": 2.380460500717163e-06, "step": 334640 }, { "epoch": 94.99006528526824, "grad_norm": 0.0005048375460319221, "learning_rate": 5.046551234743116e-06, "loss": 2.1861866116523744e-06, "step": 334650 }, { "epoch": 94.99290377519159, "grad_norm": 0.0008472750196233392, "learning_rate": 5.043712744819756e-06, "loss": 2.2759661078453066e-06, "step": 334660 }, { "epoch": 94.99574226511496, "grad_norm": 0.00106124859303236, "learning_rate": 5.040874254896395e-06, "loss": 2.1725893020629885e-06, "step": 334670 }, { "epoch": 94.99858075503832, "grad_norm": 0.0003840856079477817, "learning_rate": 5.038035764973035e-06, "loss": 2.543255686759949e-06, "step": 334680 }, { "epoch": 95.00141924496168, "grad_norm": 0.0010003555798903108, "learning_rate": 5.03548112404201e-06, "loss": 2.501498602214269e-06, "step": 334690 }, { "epoch": 95.00425773488504, "grad_norm": 0.004899166990071535, "learning_rate": 5.032642634118649e-06, "loss": 3.0312687158584596e-06, "step": 334700 }, { "epoch": 95.00709622480841, "grad_norm": 0.0003421692526899278, "learning_rate": 5.029804144195288e-06, "loss": 2.440996468067169e-06, "step": 334710 }, { "epoch": 95.00993471473176, "grad_norm": 0.0006761820404790342, "learning_rate": 5.026965654271928e-06, "loss": 3.310292959213257e-06, "step": 334720 }, { "epoch": 95.01277320465512, "grad_norm": 0.00010342275345465168, "learning_rate": 5.0241271643485665e-06, "loss": 3.3034011721611023e-06, "step": 334730 }, { "epoch": 95.01561169457848, "grad_norm": 0.00014561365242116153, "learning_rate": 5.021288674425206e-06, "loss": 1.987069845199585e-06, "step": 334740 }, { "epoch": 95.01845018450184, "grad_norm": 0.0007972394814714789, "learning_rate": 5.018450184501845e-06, "loss": 2.50842422246933e-06, "step": 334750 }, { "epoch": 95.02128867442521, "grad_norm": 0.00014248109073378146, "learning_rate": 5.015611694578484e-06, "loss": 2.0569190382957458e-06, "step": 334760 }, { "epoch": 95.02412716434857, "grad_norm": 0.00025098296464420855, "learning_rate": 5.0127732046551235e-06, "loss": 2.3368746042251585e-06, "step": 334770 }, { "epoch": 95.02696565427193, "grad_norm": 0.003106113523244858, "learning_rate": 5.009934714731763e-06, "loss": 2.6442110538482664e-06, "step": 334780 }, { "epoch": 95.02980414419528, "grad_norm": 0.0006440537981688976, "learning_rate": 5.007096224808402e-06, "loss": 3.664940595626831e-06, "step": 334790 }, { "epoch": 95.03264263411864, "grad_norm": 0.00023681396851316094, "learning_rate": 5.004257734885042e-06, "loss": 2.541951835155487e-06, "step": 334800 }, { "epoch": 95.03548112404201, "grad_norm": 0.002413359237834811, "learning_rate": 5.0014192449616806e-06, "loss": 2.152658998966217e-06, "step": 334810 }, { "epoch": 95.03831961396537, "grad_norm": 0.0002982945879921317, "learning_rate": 4.99858075503832e-06, "loss": 3.295019268989563e-06, "step": 334820 }, { "epoch": 95.04115810388873, "grad_norm": 0.00034713090281002223, "learning_rate": 4.995742265114959e-06, "loss": 2.917274832725525e-06, "step": 334830 }, { "epoch": 95.0439965938121, "grad_norm": 0.001082549337297678, "learning_rate": 4.992903775191599e-06, "loss": 1.9719824194908143e-06, "step": 334840 }, { "epoch": 95.04683508373546, "grad_norm": 0.0013204270508140326, "learning_rate": 4.990065285268238e-06, "loss": 3.3423304557800295e-06, "step": 334850 }, { "epoch": 95.04967357365881, "grad_norm": 0.0016112589510157704, "learning_rate": 4.9872267953448764e-06, "loss": 2.265535295009613e-06, "step": 334860 }, { "epoch": 95.05251206358217, "grad_norm": 0.0003524828644003719, "learning_rate": 4.984388305421516e-06, "loss": 2.251379191875458e-06, "step": 334870 }, { "epoch": 95.05535055350553, "grad_norm": 0.0007686346070840955, "learning_rate": 4.981549815498155e-06, "loss": 2.3577362298965453e-06, "step": 334880 }, { "epoch": 95.0581890434289, "grad_norm": 0.0002497140958439559, "learning_rate": 4.978711325574795e-06, "loss": 1.492723822593689e-06, "step": 334890 }, { "epoch": 95.06102753335226, "grad_norm": 0.0008957693935371935, "learning_rate": 4.9758728356514335e-06, "loss": 2.8846785426139832e-06, "step": 334900 }, { "epoch": 95.06386602327562, "grad_norm": 0.0014683973276987672, "learning_rate": 4.973034345728072e-06, "loss": 2.608075737953186e-06, "step": 334910 }, { "epoch": 95.06670451319899, "grad_norm": 0.0011528416071087122, "learning_rate": 4.970195855804712e-06, "loss": 2.9316172003746034e-06, "step": 334920 }, { "epoch": 95.06954300312233, "grad_norm": 0.0005042626871727407, "learning_rate": 4.967357365881351e-06, "loss": 3.259629011154175e-06, "step": 334930 }, { "epoch": 95.0723814930457, "grad_norm": 0.00030922339647077024, "learning_rate": 4.9645188759579905e-06, "loss": 2.3202970623970033e-06, "step": 334940 }, { "epoch": 95.07521998296906, "grad_norm": 0.0010583362309262156, "learning_rate": 4.96168038603463e-06, "loss": 1.7151236534118651e-06, "step": 334950 }, { "epoch": 95.07805847289242, "grad_norm": 0.00022033088316675276, "learning_rate": 4.958841896111269e-06, "loss": 2.731196582317352e-06, "step": 334960 }, { "epoch": 95.08089696281579, "grad_norm": 0.0006158899050205946, "learning_rate": 4.956003406187909e-06, "loss": 2.8192996978759764e-06, "step": 334970 }, { "epoch": 95.08373545273915, "grad_norm": 0.00018790352623909712, "learning_rate": 4.9531649162645475e-06, "loss": 3.186240792274475e-06, "step": 334980 }, { "epoch": 95.0865739426625, "grad_norm": 9.16036733542569e-05, "learning_rate": 4.950326426341187e-06, "loss": 2.253986895084381e-06, "step": 334990 }, { "epoch": 95.08941243258586, "grad_norm": 0.000687357212882489, "learning_rate": 4.947487936417826e-06, "loss": 2.6989728212356567e-06, "step": 335000 }, { "epoch": 95.08941243258586, "eval_accuracy": 0.988999809245247, "eval_loss": 0.047486938536167145, "eval_runtime": 54.1931, "eval_samples_per_second": 290.203, "eval_steps_per_second": 4.539, "step": 335000 }, { "epoch": 95.09225092250922, "grad_norm": 0.0006789412000216544, "learning_rate": 4.944649446494466e-06, "loss": 2.456456422805786e-06, "step": 335010 }, { "epoch": 95.09508941243259, "grad_norm": 0.001185632892884314, "learning_rate": 4.9418109565711045e-06, "loss": 2.2981315851211547e-06, "step": 335020 }, { "epoch": 95.09792790235595, "grad_norm": 0.0006236390327103436, "learning_rate": 4.938972466647743e-06, "loss": 2.424977719783783e-06, "step": 335030 }, { "epoch": 95.10076639227931, "grad_norm": 0.0005480576073750854, "learning_rate": 4.936133976724383e-06, "loss": 1.6024336218833922e-06, "step": 335040 }, { "epoch": 95.10360488220267, "grad_norm": 0.0002815722837112844, "learning_rate": 4.933295486801022e-06, "loss": 1.697428524494171e-06, "step": 335050 }, { "epoch": 95.10644337212602, "grad_norm": 0.0004261539434082806, "learning_rate": 4.9304569968776616e-06, "loss": 2.4788081645965575e-06, "step": 335060 }, { "epoch": 95.10928186204939, "grad_norm": 0.0005326915998011827, "learning_rate": 4.9276185069543e-06, "loss": 3.909319639205933e-06, "step": 335070 }, { "epoch": 95.11212035197275, "grad_norm": 0.003280100878328085, "learning_rate": 4.924780017030939e-06, "loss": 3.1989067792892458e-06, "step": 335080 }, { "epoch": 95.11495884189611, "grad_norm": 0.0005076536908745766, "learning_rate": 4.921941527107579e-06, "loss": 1.990608870983124e-06, "step": 335090 }, { "epoch": 95.11779733181947, "grad_norm": 0.011338084004819393, "learning_rate": 4.919103037184218e-06, "loss": 3.886595368385315e-06, "step": 335100 }, { "epoch": 95.12063582174284, "grad_norm": 0.00023037292703520507, "learning_rate": 4.9162645472608574e-06, "loss": 1.7838552594184876e-06, "step": 335110 }, { "epoch": 95.1234743116662, "grad_norm": 0.00010739846766227856, "learning_rate": 4.913426057337497e-06, "loss": 2.9496848583221434e-06, "step": 335120 }, { "epoch": 95.12631280158955, "grad_norm": 0.006859784945845604, "learning_rate": 4.910587567414136e-06, "loss": 5.22620975971222e-06, "step": 335130 }, { "epoch": 95.12915129151291, "grad_norm": 0.0007907882099971175, "learning_rate": 4.907749077490776e-06, "loss": 3.321841359138489e-06, "step": 335140 }, { "epoch": 95.13198978143627, "grad_norm": 0.0022809095680713654, "learning_rate": 4.9049105875674145e-06, "loss": 3.0312687158584596e-06, "step": 335150 }, { "epoch": 95.13482827135964, "grad_norm": 0.00012923730537295341, "learning_rate": 4.902072097644054e-06, "loss": 1.2733042240142823e-06, "step": 335160 }, { "epoch": 95.137666761283, "grad_norm": 0.0035666394978761673, "learning_rate": 4.899233607720693e-06, "loss": 4.0728598833084105e-06, "step": 335170 }, { "epoch": 95.14050525120636, "grad_norm": 0.0008110642665997148, "learning_rate": 4.896395117797332e-06, "loss": 1.8231570720672607e-06, "step": 335180 }, { "epoch": 95.14334374112971, "grad_norm": 0.00023060725652612746, "learning_rate": 4.8935566278739715e-06, "loss": 2.465769648551941e-06, "step": 335190 }, { "epoch": 95.14618223105307, "grad_norm": 0.0005505813751369715, "learning_rate": 4.89071813795061e-06, "loss": 2.60230153799057e-06, "step": 335200 }, { "epoch": 95.14902072097644, "grad_norm": 0.0010674772784113884, "learning_rate": 4.88787964802725e-06, "loss": 2.1170824766159056e-06, "step": 335210 }, { "epoch": 95.1518592108998, "grad_norm": 0.00030899388366378844, "learning_rate": 4.885041158103889e-06, "loss": 2.1964311599731446e-06, "step": 335220 }, { "epoch": 95.15469770082316, "grad_norm": 0.0016910400008782744, "learning_rate": 4.882202668180528e-06, "loss": 2.6360154151916505e-06, "step": 335230 }, { "epoch": 95.15753619074653, "grad_norm": 0.00017812340229284018, "learning_rate": 4.879364178257167e-06, "loss": 1.7022714018821716e-06, "step": 335240 }, { "epoch": 95.16037468066989, "grad_norm": 0.00022866814106237143, "learning_rate": 4.876525688333806e-06, "loss": 1.8630176782608033e-06, "step": 335250 }, { "epoch": 95.16321317059324, "grad_norm": 0.0008808138663880527, "learning_rate": 4.873687198410446e-06, "loss": 2.5952234864234923e-06, "step": 335260 }, { "epoch": 95.1660516605166, "grad_norm": 0.0008105540182441473, "learning_rate": 4.870848708487085e-06, "loss": 1.8883496522903442e-06, "step": 335270 }, { "epoch": 95.16889015043996, "grad_norm": 0.0009726332500576973, "learning_rate": 4.868010218563724e-06, "loss": 2.0015984773635863e-06, "step": 335280 }, { "epoch": 95.17172864036333, "grad_norm": 0.0010440570767968893, "learning_rate": 4.865171728640363e-06, "loss": 2.5276094675064087e-06, "step": 335290 }, { "epoch": 95.17456713028669, "grad_norm": 0.0007796487770974636, "learning_rate": 4.862333238717003e-06, "loss": 2.4611130356788634e-06, "step": 335300 }, { "epoch": 95.17740562021005, "grad_norm": 0.0006602565990760922, "learning_rate": 4.859494748793643e-06, "loss": 2.261251211166382e-06, "step": 335310 }, { "epoch": 95.18024411013342, "grad_norm": 0.0004438113246578723, "learning_rate": 4.856656258870281e-06, "loss": 3.330036997795105e-06, "step": 335320 }, { "epoch": 95.18308260005676, "grad_norm": 0.00042712577851489186, "learning_rate": 4.853817768946921e-06, "loss": 2.028606832027435e-06, "step": 335330 }, { "epoch": 95.18592108998013, "grad_norm": 0.0010175384813919663, "learning_rate": 4.85097927902356e-06, "loss": 1.8214806914329528e-06, "step": 335340 }, { "epoch": 95.18875957990349, "grad_norm": 0.00014219502918422222, "learning_rate": 4.848140789100199e-06, "loss": 2.747774124145508e-06, "step": 335350 }, { "epoch": 95.19159806982685, "grad_norm": 0.00016968307318165898, "learning_rate": 4.8453022991768384e-06, "loss": 1.816079020500183e-06, "step": 335360 }, { "epoch": 95.19443655975022, "grad_norm": 0.0029047660063952208, "learning_rate": 4.842463809253477e-06, "loss": 3.367289900779724e-06, "step": 335370 }, { "epoch": 95.19727504967358, "grad_norm": 0.0005009692977182567, "learning_rate": 4.839625319330117e-06, "loss": 2.505257725715637e-06, "step": 335380 }, { "epoch": 95.20011353959694, "grad_norm": 0.0005234211566857994, "learning_rate": 4.836786829406756e-06, "loss": 2.6518478989601136e-06, "step": 335390 }, { "epoch": 95.20295202952029, "grad_norm": 0.0028544149827212095, "learning_rate": 4.833948339483395e-06, "loss": 2.893432974815369e-06, "step": 335400 }, { "epoch": 95.20579051944365, "grad_norm": 0.0007219829130917788, "learning_rate": 4.831109849560034e-06, "loss": 2.6656314730644224e-06, "step": 335410 }, { "epoch": 95.20862900936702, "grad_norm": 0.006225578486919403, "learning_rate": 4.828271359636673e-06, "loss": 3.0234456062316895e-06, "step": 335420 }, { "epoch": 95.21146749929038, "grad_norm": 0.0006587347015738487, "learning_rate": 4.825432869713313e-06, "loss": 1.926906406879425e-06, "step": 335430 }, { "epoch": 95.21430598921374, "grad_norm": 0.0008354635210707784, "learning_rate": 4.822594379789952e-06, "loss": 3.77018004655838e-06, "step": 335440 }, { "epoch": 95.2171444791371, "grad_norm": 0.0021035943645983934, "learning_rate": 4.819755889866591e-06, "loss": 2.226606011390686e-06, "step": 335450 }, { "epoch": 95.21998296906045, "grad_norm": 0.0032439425121992826, "learning_rate": 4.81691739994323e-06, "loss": 3.4438446164131166e-06, "step": 335460 }, { "epoch": 95.22282145898382, "grad_norm": 0.000528912409208715, "learning_rate": 4.81407891001987e-06, "loss": 2.720765769481659e-06, "step": 335470 }, { "epoch": 95.22565994890718, "grad_norm": 0.0006523555493913591, "learning_rate": 4.8112404200965095e-06, "loss": 3.573484718799591e-06, "step": 335480 }, { "epoch": 95.22849843883054, "grad_norm": 0.0008284564246423542, "learning_rate": 4.808401930173148e-06, "loss": 2.6617199182510376e-06, "step": 335490 }, { "epoch": 95.2313369287539, "grad_norm": 0.0005729413242079318, "learning_rate": 4.805563440249787e-06, "loss": 2.0345672965049743e-06, "step": 335500 }, { "epoch": 95.2313369287539, "eval_accuracy": 0.988872639409932, "eval_loss": 0.04738923907279968, "eval_runtime": 59.3813, "eval_samples_per_second": 264.848, "eval_steps_per_second": 4.143, "step": 335500 }, { "epoch": 95.23417541867727, "grad_norm": 0.00014011959137860686, "learning_rate": 4.802724950326427e-06, "loss": 3.723427653312683e-06, "step": 335510 }, { "epoch": 95.23701390860063, "grad_norm": 0.00014808277774136513, "learning_rate": 4.799886460403066e-06, "loss": 2.7878209948539735e-06, "step": 335520 }, { "epoch": 95.23985239852398, "grad_norm": 0.0001834261347539723, "learning_rate": 4.797047970479705e-06, "loss": 2.3853033781051634e-06, "step": 335530 }, { "epoch": 95.24269088844734, "grad_norm": 0.0003520353347994387, "learning_rate": 4.794209480556344e-06, "loss": 5.632452666759491e-06, "step": 335540 }, { "epoch": 95.2455293783707, "grad_norm": 0.0009399145492352545, "learning_rate": 4.791370990632983e-06, "loss": 7.001124322414398e-06, "step": 335550 }, { "epoch": 95.24836786829407, "grad_norm": 0.000990608474239707, "learning_rate": 4.788532500709623e-06, "loss": 2.545677125453949e-06, "step": 335560 }, { "epoch": 95.25120635821743, "grad_norm": 0.000729416380636394, "learning_rate": 4.7856940107862616e-06, "loss": 2.282485365867615e-06, "step": 335570 }, { "epoch": 95.2540448481408, "grad_norm": 0.000774995016399771, "learning_rate": 4.782855520862901e-06, "loss": 2.5443732738494873e-06, "step": 335580 }, { "epoch": 95.25688333806416, "grad_norm": 0.0017252117395401, "learning_rate": 4.78001703093954e-06, "loss": 5.082041025161743e-06, "step": 335590 }, { "epoch": 95.2597218279875, "grad_norm": 0.000518440268933773, "learning_rate": 4.777178541016179e-06, "loss": 4.394538700580597e-06, "step": 335600 }, { "epoch": 95.26256031791087, "grad_norm": 0.000597081147134304, "learning_rate": 4.774340051092819e-06, "loss": 2.130866050720215e-06, "step": 335610 }, { "epoch": 95.26539880783423, "grad_norm": 0.0016876182053238153, "learning_rate": 4.771501561169458e-06, "loss": 3.0076131224632264e-06, "step": 335620 }, { "epoch": 95.2682372977576, "grad_norm": 0.0006713554612360895, "learning_rate": 4.768663071246097e-06, "loss": 2.9120594263076784e-06, "step": 335630 }, { "epoch": 95.27107578768096, "grad_norm": 0.00019411963876336813, "learning_rate": 4.765824581322737e-06, "loss": 2.5283545255661012e-06, "step": 335640 }, { "epoch": 95.27391427760432, "grad_norm": 0.0005269075627438724, "learning_rate": 4.7629860913993765e-06, "loss": 2.109818160533905e-06, "step": 335650 }, { "epoch": 95.27675276752768, "grad_norm": 0.0026799740735441446, "learning_rate": 4.760147601476015e-06, "loss": 1.010950654745102e-05, "step": 335660 }, { "epoch": 95.27959125745103, "grad_norm": 0.00018328304577153176, "learning_rate": 4.757309111552654e-06, "loss": 3.219209611415863e-06, "step": 335670 }, { "epoch": 95.2824297473744, "grad_norm": 0.0004732973175123334, "learning_rate": 4.754470621629294e-06, "loss": 2.85860151052475e-06, "step": 335680 }, { "epoch": 95.28526823729776, "grad_norm": 0.0017021190142259002, "learning_rate": 4.751632131705933e-06, "loss": 3.95067036151886e-06, "step": 335690 }, { "epoch": 95.28810672722112, "grad_norm": 0.0013284781016409397, "learning_rate": 4.748793641782572e-06, "loss": 2.389587461948395e-06, "step": 335700 }, { "epoch": 95.29094521714448, "grad_norm": 0.00031626192503608763, "learning_rate": 4.745955151859211e-06, "loss": 2.7047470211982725e-06, "step": 335710 }, { "epoch": 95.29378370706785, "grad_norm": 0.0007584932027384639, "learning_rate": 4.74311666193585e-06, "loss": 3.4943222999572754e-06, "step": 335720 }, { "epoch": 95.2966221969912, "grad_norm": 0.0015860211569815874, "learning_rate": 4.74027817201249e-06, "loss": 3.938004374504089e-06, "step": 335730 }, { "epoch": 95.29946068691456, "grad_norm": 0.00026902477839030325, "learning_rate": 4.7374396820891285e-06, "loss": 2.796761691570282e-06, "step": 335740 }, { "epoch": 95.30229917683792, "grad_norm": 0.00015836235252209008, "learning_rate": 4.734601192165768e-06, "loss": 1.6206875443458558e-06, "step": 335750 }, { "epoch": 95.30513766676128, "grad_norm": 0.0010827755322679877, "learning_rate": 4.731762702242407e-06, "loss": 3.392435610294342e-06, "step": 335760 }, { "epoch": 95.30797615668465, "grad_norm": 0.0003206513065379113, "learning_rate": 4.728924212319046e-06, "loss": 2.7781352400779724e-06, "step": 335770 }, { "epoch": 95.31081464660801, "grad_norm": 0.000911517592612654, "learning_rate": 4.7260857223956855e-06, "loss": 3.4142285585403442e-06, "step": 335780 }, { "epoch": 95.31365313653137, "grad_norm": 0.025546440854668617, "learning_rate": 4.723247232472325e-06, "loss": 6.388872861862183e-06, "step": 335790 }, { "epoch": 95.31649162645472, "grad_norm": 0.00020735806901939213, "learning_rate": 4.720408742548964e-06, "loss": 3.5854056477546694e-06, "step": 335800 }, { "epoch": 95.31933011637808, "grad_norm": 0.001673119724728167, "learning_rate": 4.717570252625604e-06, "loss": 2.5521963834762574e-06, "step": 335810 }, { "epoch": 95.32216860630145, "grad_norm": 0.0002653211704455316, "learning_rate": 4.7147317627022426e-06, "loss": 2.2014603018760683e-06, "step": 335820 }, { "epoch": 95.32500709622481, "grad_norm": 0.0006646275869570673, "learning_rate": 4.711893272778882e-06, "loss": 3.478117287158966e-06, "step": 335830 }, { "epoch": 95.32784558614817, "grad_norm": 0.001006175996735692, "learning_rate": 4.709054782855521e-06, "loss": 5.7872384786605835e-06, "step": 335840 }, { "epoch": 95.33068407607153, "grad_norm": 0.0008806781261228025, "learning_rate": 4.706216292932161e-06, "loss": 1.917779445648193e-06, "step": 335850 }, { "epoch": 95.3335225659949, "grad_norm": 0.000668609282001853, "learning_rate": 4.7033778030088e-06, "loss": 2.403184771537781e-06, "step": 335860 }, { "epoch": 95.33636105591825, "grad_norm": 0.0012941844761371613, "learning_rate": 4.7005393130854384e-06, "loss": 2.041272819042206e-06, "step": 335870 }, { "epoch": 95.33919954584161, "grad_norm": 0.0005341817741282284, "learning_rate": 4.697700823162078e-06, "loss": 2.5797635316848755e-06, "step": 335880 }, { "epoch": 95.34203803576497, "grad_norm": 0.0018995738355442882, "learning_rate": 4.694862333238717e-06, "loss": 2.6091933250427245e-06, "step": 335890 }, { "epoch": 95.34487652568833, "grad_norm": 0.0011513314675539732, "learning_rate": 4.692023843315357e-06, "loss": 2.306513488292694e-06, "step": 335900 }, { "epoch": 95.3477150156117, "grad_norm": 0.0003597917384468019, "learning_rate": 4.6891853533919955e-06, "loss": 2.576783299446106e-06, "step": 335910 }, { "epoch": 95.35055350553506, "grad_norm": 0.0004144291451666504, "learning_rate": 4.686346863468634e-06, "loss": 2.1984800696372985e-06, "step": 335920 }, { "epoch": 95.35339199545841, "grad_norm": 0.00016886329103726894, "learning_rate": 4.683508373545274e-06, "loss": 1.5202909708023072e-06, "step": 335930 }, { "epoch": 95.35623048538177, "grad_norm": 0.00030065010651014745, "learning_rate": 4.680669883621913e-06, "loss": 2.7259811758995056e-06, "step": 335940 }, { "epoch": 95.35906897530514, "grad_norm": 0.0019319069106131792, "learning_rate": 4.6778313936985525e-06, "loss": 3.5956501960754396e-06, "step": 335950 }, { "epoch": 95.3619074652285, "grad_norm": 0.00017246131028514355, "learning_rate": 4.674992903775191e-06, "loss": 2.2765249013900758e-06, "step": 335960 }, { "epoch": 95.36474595515186, "grad_norm": 0.0009163017384707928, "learning_rate": 4.672154413851831e-06, "loss": 3.760866820812225e-06, "step": 335970 }, { "epoch": 95.36758444507522, "grad_norm": 0.001062314142473042, "learning_rate": 4.669315923928471e-06, "loss": 2.0576640963554383e-06, "step": 335980 }, { "epoch": 95.37042293499859, "grad_norm": 0.0009720627567730844, "learning_rate": 4.6664774340051095e-06, "loss": 2.5724992156028746e-06, "step": 335990 }, { "epoch": 95.37326142492194, "grad_norm": 0.00024188034876715392, "learning_rate": 4.663638944081749e-06, "loss": 3.1035393476486207e-06, "step": 336000 }, { "epoch": 95.37326142492194, "eval_accuracy": 0.9890633941629046, "eval_loss": 0.047634921967983246, "eval_runtime": 49.8645, "eval_samples_per_second": 315.395, "eval_steps_per_second": 4.933, "step": 336000 }, { "epoch": 95.3760999148453, "grad_norm": 0.00046637988998554647, "learning_rate": 4.660800454158388e-06, "loss": 2.3262575268745424e-06, "step": 336010 }, { "epoch": 95.37893840476866, "grad_norm": 0.0001741937012411654, "learning_rate": 4.657961964235028e-06, "loss": 1.7512589693069459e-06, "step": 336020 }, { "epoch": 95.38177689469202, "grad_norm": 0.00014477492368314415, "learning_rate": 4.6551234743116666e-06, "loss": 2.4236738681793214e-06, "step": 336030 }, { "epoch": 95.38461538461539, "grad_norm": 0.00039205761277116835, "learning_rate": 4.652284984388305e-06, "loss": 2.236105501651764e-06, "step": 336040 }, { "epoch": 95.38745387453875, "grad_norm": 0.0002547222247812897, "learning_rate": 4.649446494464945e-06, "loss": 2.7904286980628966e-06, "step": 336050 }, { "epoch": 95.39029236446211, "grad_norm": 0.0010144459083676338, "learning_rate": 4.646608004541584e-06, "loss": 2.0621344447135924e-06, "step": 336060 }, { "epoch": 95.39313085438546, "grad_norm": 0.0014251827960833907, "learning_rate": 4.643769514618224e-06, "loss": 3.347732126712799e-06, "step": 336070 }, { "epoch": 95.39596934430882, "grad_norm": 0.0005172804812900722, "learning_rate": 4.640931024694862e-06, "loss": 1.94404274225235e-06, "step": 336080 }, { "epoch": 95.39880783423219, "grad_norm": 0.0014273204142227769, "learning_rate": 4.638092534771501e-06, "loss": 3.155507147312164e-06, "step": 336090 }, { "epoch": 95.40164632415555, "grad_norm": 0.0004021617060061544, "learning_rate": 4.635254044848141e-06, "loss": 2.62577086687088e-06, "step": 336100 }, { "epoch": 95.40448481407891, "grad_norm": 0.0001937173365149647, "learning_rate": 4.63241555492478e-06, "loss": 1.4651566743850708e-06, "step": 336110 }, { "epoch": 95.40732330400228, "grad_norm": 0.0007471178541891277, "learning_rate": 4.6295770650014194e-06, "loss": 2.2681429982185365e-06, "step": 336120 }, { "epoch": 95.41016179392564, "grad_norm": 0.00015569559764117002, "learning_rate": 4.626738575078058e-06, "loss": 1.348741352558136e-06, "step": 336130 }, { "epoch": 95.41300028384899, "grad_norm": 0.0006182683282531798, "learning_rate": 4.623900085154698e-06, "loss": 2.142414450645447e-06, "step": 336140 }, { "epoch": 95.41583877377235, "grad_norm": 0.0007727181655354798, "learning_rate": 4.621061595231338e-06, "loss": 2.3730099201202393e-06, "step": 336150 }, { "epoch": 95.41867726369571, "grad_norm": 0.0005302937934175134, "learning_rate": 4.6182231053079765e-06, "loss": 2.5486573576927184e-06, "step": 336160 }, { "epoch": 95.42151575361908, "grad_norm": 0.0007413235143758357, "learning_rate": 4.615384615384616e-06, "loss": 2.2837892174720762e-06, "step": 336170 }, { "epoch": 95.42435424354244, "grad_norm": 0.00045298109762370586, "learning_rate": 4.612546125461255e-06, "loss": 3.1614676117897033e-06, "step": 336180 }, { "epoch": 95.4271927334658, "grad_norm": 0.0008641253225505352, "learning_rate": 4.609707635537894e-06, "loss": 2.621673047542572e-06, "step": 336190 }, { "epoch": 95.43003122338915, "grad_norm": 0.00019268415053375065, "learning_rate": 4.6068691456145335e-06, "loss": 2.457760274410248e-06, "step": 336200 }, { "epoch": 95.43286971331251, "grad_norm": 0.0016930693527683616, "learning_rate": 4.604030655691172e-06, "loss": 2.206675708293915e-06, "step": 336210 }, { "epoch": 95.43570820323588, "grad_norm": 0.0006243878160603344, "learning_rate": 4.601192165767812e-06, "loss": 3.431364893913269e-06, "step": 336220 }, { "epoch": 95.43854669315924, "grad_norm": 0.00014143306179903448, "learning_rate": 4.598353675844451e-06, "loss": 2.235919237136841e-06, "step": 336230 }, { "epoch": 95.4413851830826, "grad_norm": 0.00038670509820804, "learning_rate": 4.59551518592109e-06, "loss": 2.282671630382538e-06, "step": 336240 }, { "epoch": 95.44422367300596, "grad_norm": 0.006037199404090643, "learning_rate": 4.592676695997729e-06, "loss": 4.733726382255554e-06, "step": 336250 }, { "epoch": 95.44706216292933, "grad_norm": 0.000246420968323946, "learning_rate": 4.589838206074368e-06, "loss": 1.4124438166618346e-06, "step": 336260 }, { "epoch": 95.44990065285268, "grad_norm": 0.0004920574137941003, "learning_rate": 4.586999716151008e-06, "loss": 3.4831464290618896e-06, "step": 336270 }, { "epoch": 95.45273914277604, "grad_norm": 0.001229921355843544, "learning_rate": 4.584161226227647e-06, "loss": 2.0049512386322023e-06, "step": 336280 }, { "epoch": 95.4555776326994, "grad_norm": 0.0011497926898300648, "learning_rate": 4.581322736304286e-06, "loss": 2.722814679145813e-06, "step": 336290 }, { "epoch": 95.45841612262276, "grad_norm": 0.00025045688380487263, "learning_rate": 4.578484246380925e-06, "loss": 1.5644356608390808e-06, "step": 336300 }, { "epoch": 95.46125461254613, "grad_norm": 0.00010288219345966354, "learning_rate": 4.575645756457565e-06, "loss": 1.9261613488197325e-06, "step": 336310 }, { "epoch": 95.46409310246949, "grad_norm": 0.00042423148988746107, "learning_rate": 4.572807266534205e-06, "loss": 2.8625130653381347e-06, "step": 336320 }, { "epoch": 95.46693159239285, "grad_norm": 0.0025380721781402826, "learning_rate": 4.569968776610843e-06, "loss": 2.4782493710517883e-06, "step": 336330 }, { "epoch": 95.4697700823162, "grad_norm": 0.0002970360219478607, "learning_rate": 4.567130286687483e-06, "loss": 2.557411789894104e-06, "step": 336340 }, { "epoch": 95.47260857223957, "grad_norm": 0.0004519275389611721, "learning_rate": 4.564291796764122e-06, "loss": 4.6603381633758545e-06, "step": 336350 }, { "epoch": 95.47544706216293, "grad_norm": 0.00039074308006092906, "learning_rate": 4.561453306840761e-06, "loss": 2.777203917503357e-06, "step": 336360 }, { "epoch": 95.47828555208629, "grad_norm": 0.0004887094255536795, "learning_rate": 4.5586148169174004e-06, "loss": 3.5818666219711304e-06, "step": 336370 }, { "epoch": 95.48112404200965, "grad_norm": 0.002468906342983246, "learning_rate": 4.555776326994039e-06, "loss": 3.6504119634628295e-06, "step": 336380 }, { "epoch": 95.48396253193302, "grad_norm": 0.00030645247898064554, "learning_rate": 4.552937837070679e-06, "loss": 2.0038336515426635e-06, "step": 336390 }, { "epoch": 95.48680102185637, "grad_norm": 0.0005133271333761513, "learning_rate": 4.550099347147318e-06, "loss": 2.4182721972465514e-06, "step": 336400 }, { "epoch": 95.48963951177973, "grad_norm": 0.0006429690984077752, "learning_rate": 4.547260857223957e-06, "loss": 3.390014171600342e-06, "step": 336410 }, { "epoch": 95.49247800170309, "grad_norm": 0.0007131064194254577, "learning_rate": 4.544422367300596e-06, "loss": 2.594292163848877e-06, "step": 336420 }, { "epoch": 95.49531649162645, "grad_norm": 0.0005296553717926145, "learning_rate": 4.541583877377235e-06, "loss": 3.6479905247688293e-06, "step": 336430 }, { "epoch": 95.49815498154982, "grad_norm": 0.000525185780134052, "learning_rate": 4.538745387453875e-06, "loss": 3.4047290682792665e-06, "step": 336440 }, { "epoch": 95.50099347147318, "grad_norm": 0.0011401479132473469, "learning_rate": 4.535906897530514e-06, "loss": 3.3063814043998717e-06, "step": 336450 }, { "epoch": 95.50383196139654, "grad_norm": 0.0008913157507777214, "learning_rate": 4.533068407607153e-06, "loss": 1.6979873180389405e-06, "step": 336460 }, { "epoch": 95.50667045131989, "grad_norm": 0.0007853202405385673, "learning_rate": 4.530229917683792e-06, "loss": 2.2469088435173034e-06, "step": 336470 }, { "epoch": 95.50950894124325, "grad_norm": 0.0003317809896543622, "learning_rate": 4.527391427760432e-06, "loss": 1.911073923110962e-06, "step": 336480 }, { "epoch": 95.51234743116662, "grad_norm": 0.0005313497385941446, "learning_rate": 4.5245529378370715e-06, "loss": 2.3616477847099306e-06, "step": 336490 }, { "epoch": 95.51518592108998, "grad_norm": 0.001754488330334425, "learning_rate": 4.52171444791371e-06, "loss": 2.621114253997803e-06, "step": 336500 }, { "epoch": 95.51518592108998, "eval_accuracy": 0.9891905639982196, "eval_loss": 0.047619957476854324, "eval_runtime": 48.8794, "eval_samples_per_second": 321.751, "eval_steps_per_second": 5.033, "step": 336500 }, { "epoch": 95.51802441101334, "grad_norm": 0.0003923533367924392, "learning_rate": 4.518875957990349e-06, "loss": 2.0660459995269776e-06, "step": 336510 }, { "epoch": 95.5208629009367, "grad_norm": 0.0006328152376227081, "learning_rate": 4.516037468066989e-06, "loss": 1.990795135498047e-06, "step": 336520 }, { "epoch": 95.52370139086007, "grad_norm": 0.0005425058770924807, "learning_rate": 4.513198978143628e-06, "loss": 2.5017186999320982e-06, "step": 336530 }, { "epoch": 95.52653988078342, "grad_norm": 0.00043326785089448094, "learning_rate": 4.510360488220267e-06, "loss": 2.0191073417663574e-06, "step": 336540 }, { "epoch": 95.52937837070678, "grad_norm": 0.0003510714741423726, "learning_rate": 4.507521998296906e-06, "loss": 3.4863129258155824e-06, "step": 336550 }, { "epoch": 95.53221686063014, "grad_norm": 0.0005021244287490845, "learning_rate": 4.504683508373545e-06, "loss": 2.7408823370933533e-06, "step": 336560 }, { "epoch": 95.5350553505535, "grad_norm": 0.0006737738731317222, "learning_rate": 4.501845018450185e-06, "loss": 2.8153881430625916e-06, "step": 336570 }, { "epoch": 95.53789384047687, "grad_norm": 0.0011276457225903869, "learning_rate": 4.499006528526824e-06, "loss": 3.2275915145874023e-06, "step": 336580 }, { "epoch": 95.54073233040023, "grad_norm": 0.0015342538245022297, "learning_rate": 4.496168038603463e-06, "loss": 2.8710812330245973e-06, "step": 336590 }, { "epoch": 95.5435708203236, "grad_norm": 0.0004745634214486927, "learning_rate": 4.493329548680102e-06, "loss": 2.7956441044807436e-06, "step": 336600 }, { "epoch": 95.54640931024694, "grad_norm": 0.00028775076498277485, "learning_rate": 4.490491058756742e-06, "loss": 2.264790236949921e-06, "step": 336610 }, { "epoch": 95.5492478001703, "grad_norm": 0.00016711912758182734, "learning_rate": 4.487652568833381e-06, "loss": 3.0873343348503113e-06, "step": 336620 }, { "epoch": 95.55208629009367, "grad_norm": 0.0020085456781089306, "learning_rate": 4.48481407891002e-06, "loss": 4.128366708755493e-06, "step": 336630 }, { "epoch": 95.55492478001703, "grad_norm": 0.0002943132712971419, "learning_rate": 4.481975588986659e-06, "loss": 2.647005021572113e-06, "step": 336640 }, { "epoch": 95.5577632699404, "grad_norm": 0.00033806200372055173, "learning_rate": 4.479137099063299e-06, "loss": 3.0487775802612304e-06, "step": 336650 }, { "epoch": 95.56060175986376, "grad_norm": 0.0006506036734208465, "learning_rate": 4.4762986091399385e-06, "loss": 1.4199316501617431e-05, "step": 336660 }, { "epoch": 95.5634402497871, "grad_norm": 0.0011930017499253154, "learning_rate": 4.4737439682089125e-06, "loss": 0.0017686260864138604, "step": 336670 }, { "epoch": 95.56627873971047, "grad_norm": 0.0027255553286522627, "learning_rate": 4.470905478285552e-06, "loss": 3.2125040888786317e-06, "step": 336680 }, { "epoch": 95.56911722963383, "grad_norm": 0.00532586919143796, "learning_rate": 4.468066988362191e-06, "loss": 2.614222466945648e-06, "step": 336690 }, { "epoch": 95.5719557195572, "grad_norm": 0.0005173672107048333, "learning_rate": 4.465228498438831e-06, "loss": 3.850460052490234e-06, "step": 336700 }, { "epoch": 95.57479420948056, "grad_norm": 0.004300607834011316, "learning_rate": 4.4623900085154695e-06, "loss": 4.870258271694183e-06, "step": 336710 }, { "epoch": 95.57763269940392, "grad_norm": 0.0010336654959246516, "learning_rate": 4.459551518592109e-06, "loss": 3.25031578540802e-06, "step": 336720 }, { "epoch": 95.58047118932728, "grad_norm": 0.00042673113057389855, "learning_rate": 4.456713028668749e-06, "loss": 3.219209611415863e-06, "step": 336730 }, { "epoch": 95.58330967925063, "grad_norm": 0.0009420675341971219, "learning_rate": 4.453874538745388e-06, "loss": 5.707517266273498e-06, "step": 336740 }, { "epoch": 95.586148169174, "grad_norm": 0.0005388691206462681, "learning_rate": 4.451036048822027e-06, "loss": 2.187490463256836e-06, "step": 336750 }, { "epoch": 95.58898665909736, "grad_norm": 0.0004121550009585917, "learning_rate": 4.448197558898666e-06, "loss": 3.6088749766349792e-06, "step": 336760 }, { "epoch": 95.59182514902072, "grad_norm": 0.00021055068646091968, "learning_rate": 4.445359068975306e-06, "loss": 2.720765769481659e-06, "step": 336770 }, { "epoch": 95.59466363894408, "grad_norm": 0.0002670057292561978, "learning_rate": 4.442520579051945e-06, "loss": 1.0607950389385224e-05, "step": 336780 }, { "epoch": 95.59750212886745, "grad_norm": 0.0009281046222895384, "learning_rate": 4.439682089128584e-06, "loss": 3.3229589462280273e-06, "step": 336790 }, { "epoch": 95.60034061879081, "grad_norm": 0.0009576245793141425, "learning_rate": 4.436843599205223e-06, "loss": 2.2854655981063843e-06, "step": 336800 }, { "epoch": 95.60317910871416, "grad_norm": 0.0009847284527495503, "learning_rate": 4.434005109281862e-06, "loss": 3.3190473914146425e-06, "step": 336810 }, { "epoch": 95.60601759863752, "grad_norm": 0.0010169586166739464, "learning_rate": 4.431166619358502e-06, "loss": 4.1335821151733395e-06, "step": 336820 }, { "epoch": 95.60885608856088, "grad_norm": 0.0006922453176230192, "learning_rate": 4.428328129435141e-06, "loss": 2.109445631504059e-06, "step": 336830 }, { "epoch": 95.61169457848425, "grad_norm": 0.0030998792499303818, "learning_rate": 4.4254896395117794e-06, "loss": 3.60049307346344e-06, "step": 336840 }, { "epoch": 95.61453306840761, "grad_norm": 0.00023406892432831228, "learning_rate": 4.422651149588419e-06, "loss": 2.866797149181366e-06, "step": 336850 }, { "epoch": 95.61737155833097, "grad_norm": 0.00017538791871629655, "learning_rate": 4.419812659665058e-06, "loss": 3.440678119659424e-06, "step": 336860 }, { "epoch": 95.62021004825434, "grad_norm": 0.00020178481645416468, "learning_rate": 4.416974169741698e-06, "loss": 3.3525750041007997e-06, "step": 336870 }, { "epoch": 95.62304853817768, "grad_norm": 0.0005008825683034956, "learning_rate": 4.4141356798183365e-06, "loss": 3.1612813472747804e-06, "step": 336880 }, { "epoch": 95.62588702810105, "grad_norm": 0.0008160540601238608, "learning_rate": 4.411297189894976e-06, "loss": 3.0338764190673827e-06, "step": 336890 }, { "epoch": 95.62872551802441, "grad_norm": 0.0008728501852601767, "learning_rate": 4.408458699971616e-06, "loss": 2.514012157917023e-06, "step": 336900 }, { "epoch": 95.63156400794777, "grad_norm": 0.0004480719508137554, "learning_rate": 4.405620210048255e-06, "loss": 2.8748065233230593e-06, "step": 336910 }, { "epoch": 95.63440249787114, "grad_norm": 0.00035602113348431885, "learning_rate": 4.402781720124894e-06, "loss": 1.8866732716560363e-06, "step": 336920 }, { "epoch": 95.6372409877945, "grad_norm": 0.0009487161296419799, "learning_rate": 4.399943230201533e-06, "loss": 2.9103830456733704e-06, "step": 336930 }, { "epoch": 95.64007947771785, "grad_norm": 0.000989214750006795, "learning_rate": 4.397104740278172e-06, "loss": 2.8168782591819762e-06, "step": 336940 }, { "epoch": 95.64291796764121, "grad_norm": 0.0005549935158342123, "learning_rate": 4.394266250354812e-06, "loss": 2.8170645236968996e-06, "step": 336950 }, { "epoch": 95.64575645756457, "grad_norm": 0.0005160478758625686, "learning_rate": 4.3914277604314505e-06, "loss": 2.101995050907135e-06, "step": 336960 }, { "epoch": 95.64859494748794, "grad_norm": 0.0008826404809951782, "learning_rate": 4.38858927050809e-06, "loss": 1.8572434782981873e-06, "step": 336970 }, { "epoch": 95.6514334374113, "grad_norm": 0.00018283935787621886, "learning_rate": 4.385750780584729e-06, "loss": 2.357363700866699e-06, "step": 336980 }, { "epoch": 95.65427192733466, "grad_norm": 9.759368549566716e-05, "learning_rate": 4.382912290661368e-06, "loss": 1.4388933777809143e-06, "step": 336990 }, { "epoch": 95.65711041725802, "grad_norm": 0.0005242266925051808, "learning_rate": 4.3800738007380076e-06, "loss": 3.168918192386627e-06, "step": 337000 }, { "epoch": 95.65711041725802, "eval_accuracy": 0.9892541489158772, "eval_loss": 0.04843961074948311, "eval_runtime": 46.4312, "eval_samples_per_second": 338.717, "eval_steps_per_second": 5.298, "step": 337000 }, { "epoch": 95.65994890718137, "grad_norm": 0.00015982723562046885, "learning_rate": 4.377235310814646e-06, "loss": 1.479499042034149e-06, "step": 337010 }, { "epoch": 95.66278739710474, "grad_norm": 0.0029273719992488623, "learning_rate": 4.374396820891286e-06, "loss": 2.7474015951156616e-06, "step": 337020 }, { "epoch": 95.6656258870281, "grad_norm": 0.0004877274332102388, "learning_rate": 4.371558330967925e-06, "loss": 2.530217170715332e-06, "step": 337030 }, { "epoch": 95.66846437695146, "grad_norm": 0.0012886826880276203, "learning_rate": 4.368719841044565e-06, "loss": 2.860464155673981e-06, "step": 337040 }, { "epoch": 95.67130286687483, "grad_norm": 0.0017698629526421428, "learning_rate": 4.3658813511212034e-06, "loss": 2.926960587501526e-06, "step": 337050 }, { "epoch": 95.67414135679819, "grad_norm": 0.0009124100324697793, "learning_rate": 4.363042861197843e-06, "loss": 2.5015324354171753e-06, "step": 337060 }, { "epoch": 95.67697984672155, "grad_norm": 0.005183533299714327, "learning_rate": 4.360204371274482e-06, "loss": 3.2588839530944823e-06, "step": 337070 }, { "epoch": 95.6798183366449, "grad_norm": 0.00016494390729349107, "learning_rate": 4.357365881351122e-06, "loss": 2.611801028251648e-06, "step": 337080 }, { "epoch": 95.68265682656826, "grad_norm": 0.000613784184679389, "learning_rate": 4.354527391427761e-06, "loss": 2.7317553758621214e-06, "step": 337090 }, { "epoch": 95.68549531649163, "grad_norm": 0.0015624654479324818, "learning_rate": 4.3516889015044e-06, "loss": 2.2061169147491457e-06, "step": 337100 }, { "epoch": 95.68833380641499, "grad_norm": 0.00015904825704637915, "learning_rate": 4.348850411581039e-06, "loss": 1.514144241809845e-06, "step": 337110 }, { "epoch": 95.69117229633835, "grad_norm": 0.00120069389231503, "learning_rate": 4.346011921657679e-06, "loss": 2.828612923622131e-06, "step": 337120 }, { "epoch": 95.69401078626171, "grad_norm": 0.00015531973622273654, "learning_rate": 4.3431734317343175e-06, "loss": 2.2588297724723817e-06, "step": 337130 }, { "epoch": 95.69684927618506, "grad_norm": 0.0008894591592252254, "learning_rate": 4.340334941810957e-06, "loss": 2.934969961643219e-06, "step": 337140 }, { "epoch": 95.69968776610843, "grad_norm": 0.0005575351533479989, "learning_rate": 4.337496451887596e-06, "loss": 2.42721289396286e-06, "step": 337150 }, { "epoch": 95.70252625603179, "grad_norm": 0.001873596222139895, "learning_rate": 4.334657961964235e-06, "loss": 2.911686897277832e-06, "step": 337160 }, { "epoch": 95.70536474595515, "grad_norm": 0.00029919951339252293, "learning_rate": 4.3318194720408745e-06, "loss": 2.2565945982933044e-06, "step": 337170 }, { "epoch": 95.70820323587851, "grad_norm": 0.0005377530469559133, "learning_rate": 4.328980982117513e-06, "loss": 2.8943642973899843e-06, "step": 337180 }, { "epoch": 95.71104172580188, "grad_norm": 0.0007273258524946868, "learning_rate": 4.326142492194153e-06, "loss": 3.368407487869263e-06, "step": 337190 }, { "epoch": 95.71388021572524, "grad_norm": 0.0005897880764678121, "learning_rate": 4.323304002270792e-06, "loss": 3.4267082810401917e-06, "step": 337200 }, { "epoch": 95.71671870564859, "grad_norm": 0.0002296987659065053, "learning_rate": 4.320465512347431e-06, "loss": 2.0558014512062074e-06, "step": 337210 }, { "epoch": 95.71955719557195, "grad_norm": 9.369369217893109e-05, "learning_rate": 4.31762702242407e-06, "loss": 1.6802921891212464e-06, "step": 337220 }, { "epoch": 95.72239568549531, "grad_norm": 0.001155680394731462, "learning_rate": 4.31478853250071e-06, "loss": 2.6827678084373473e-06, "step": 337230 }, { "epoch": 95.72523417541868, "grad_norm": 0.00030468549812212586, "learning_rate": 4.311950042577349e-06, "loss": 2.931058406829834e-06, "step": 337240 }, { "epoch": 95.72807266534204, "grad_norm": 0.0010273976949974895, "learning_rate": 4.3091115526539886e-06, "loss": 2.484023571014404e-06, "step": 337250 }, { "epoch": 95.7309111552654, "grad_norm": 0.000704424805007875, "learning_rate": 4.306273062730627e-06, "loss": 4.143454134464264e-06, "step": 337260 }, { "epoch": 95.73374964518877, "grad_norm": 0.0004002580826636404, "learning_rate": 4.303434572807267e-06, "loss": 2.5331974029541016e-06, "step": 337270 }, { "epoch": 95.73658813511211, "grad_norm": 0.0013333344832062721, "learning_rate": 4.300596082883906e-06, "loss": 2.964586019515991e-06, "step": 337280 }, { "epoch": 95.73942662503548, "grad_norm": 0.0006092005642130971, "learning_rate": 4.297757592960546e-06, "loss": 4.798732697963715e-06, "step": 337290 }, { "epoch": 95.74226511495884, "grad_norm": 0.0006102673942223191, "learning_rate": 4.2949191030371844e-06, "loss": 2.6825815439224244e-06, "step": 337300 }, { "epoch": 95.7451036048822, "grad_norm": 0.004828766919672489, "learning_rate": 4.292080613113823e-06, "loss": 3.2864511013031008e-06, "step": 337310 }, { "epoch": 95.74794209480557, "grad_norm": 0.0008180899312719703, "learning_rate": 4.289242123190463e-06, "loss": 2.2083520889282225e-06, "step": 337320 }, { "epoch": 95.75078058472893, "grad_norm": 0.0009978811722248793, "learning_rate": 4.286403633267102e-06, "loss": 2.6984140276908875e-06, "step": 337330 }, { "epoch": 95.75361907465229, "grad_norm": 0.0004250838828738779, "learning_rate": 4.2835651433437415e-06, "loss": 2.348609268665314e-06, "step": 337340 }, { "epoch": 95.75645756457564, "grad_norm": 0.0006188508123159409, "learning_rate": 4.28072665342038e-06, "loss": 2.80085951089859e-06, "step": 337350 }, { "epoch": 95.759296054499, "grad_norm": 0.0007076758774928749, "learning_rate": 4.27788816349702e-06, "loss": 2.1548941731452944e-06, "step": 337360 }, { "epoch": 95.76213454442237, "grad_norm": 0.006191427819430828, "learning_rate": 4.275049673573659e-06, "loss": 6.424263119697571e-06, "step": 337370 }, { "epoch": 95.76497303434573, "grad_norm": 0.0003473000251688063, "learning_rate": 4.272211183650298e-06, "loss": 5.346536636352539e-06, "step": 337380 }, { "epoch": 95.76781152426909, "grad_norm": 0.007133839651942253, "learning_rate": 4.269372693726937e-06, "loss": 4.397518932819367e-06, "step": 337390 }, { "epoch": 95.77065001419246, "grad_norm": 0.004497336223721504, "learning_rate": 4.266534203803577e-06, "loss": 2.423115074634552e-06, "step": 337400 }, { "epoch": 95.7734885041158, "grad_norm": 0.004120878875255585, "learning_rate": 4.263695713880216e-06, "loss": 3.3002346754074096e-06, "step": 337410 }, { "epoch": 95.77632699403917, "grad_norm": 0.0005548434564843774, "learning_rate": 4.2608572239568555e-06, "loss": 5.274079740047455e-06, "step": 337420 }, { "epoch": 95.77916548396253, "grad_norm": 0.0006043273024260998, "learning_rate": 4.258018734033494e-06, "loss": 2.6941299438476564e-06, "step": 337430 }, { "epoch": 95.78200397388589, "grad_norm": 0.0008846171549521387, "learning_rate": 4.255180244110134e-06, "loss": 2.2199004888534545e-06, "step": 337440 }, { "epoch": 95.78484246380926, "grad_norm": 0.0003897649294231087, "learning_rate": 4.252341754186773e-06, "loss": 2.350471913814545e-06, "step": 337450 }, { "epoch": 95.78768095373262, "grad_norm": 0.0004358066653367132, "learning_rate": 4.2495032642634125e-06, "loss": 2.0330771803855896e-06, "step": 337460 }, { "epoch": 95.79051944365598, "grad_norm": 0.0002473957138136029, "learning_rate": 4.246664774340051e-06, "loss": 1.4744699001312256e-06, "step": 337470 }, { "epoch": 95.79335793357933, "grad_norm": 0.0003814265946857631, "learning_rate": 4.24382628441669e-06, "loss": 4.125945270061493e-06, "step": 337480 }, { "epoch": 95.79619642350269, "grad_norm": 0.001386295654810965, "learning_rate": 4.24098779449333e-06, "loss": 1.9224360585212706e-06, "step": 337490 }, { "epoch": 95.79903491342606, "grad_norm": 0.0015042820014059544, "learning_rate": 4.238149304569969e-06, "loss": 2.2197142243385316e-06, "step": 337500 }, { "epoch": 95.79903491342606, "eval_accuracy": 0.9891269790805621, "eval_loss": 0.04843762889504433, "eval_runtime": 92.0535, "eval_samples_per_second": 170.846, "eval_steps_per_second": 2.672, "step": 337500 }, { "epoch": 95.80187340334942, "grad_norm": 0.006011142395436764, "learning_rate": 4.235310814646608e-06, "loss": 8.385814726352692e-06, "step": 337510 }, { "epoch": 95.80471189327278, "grad_norm": 0.0008326161769218743, "learning_rate": 4.232472324723247e-06, "loss": 3.1989067792892458e-06, "step": 337520 }, { "epoch": 95.80755038319614, "grad_norm": 0.0012121410109102726, "learning_rate": 4.229633834799886e-06, "loss": 3.4617260098457336e-06, "step": 337530 }, { "epoch": 95.8103888731195, "grad_norm": 0.004209673032164574, "learning_rate": 4.226795344876526e-06, "loss": 3.701448440551758e-06, "step": 337540 }, { "epoch": 95.81322736304286, "grad_norm": 0.003607658203691244, "learning_rate": 4.223956854953165e-06, "loss": 3.93204391002655e-06, "step": 337550 }, { "epoch": 95.81606585296622, "grad_norm": 0.0009147592354565859, "learning_rate": 4.221118365029804e-06, "loss": 3.2143667340278626e-06, "step": 337560 }, { "epoch": 95.81890434288958, "grad_norm": 0.0005204041954129934, "learning_rate": 4.218279875106444e-06, "loss": 4.22745943069458e-06, "step": 337570 }, { "epoch": 95.82174283281294, "grad_norm": 0.00028369916253723204, "learning_rate": 4.215441385183083e-06, "loss": 3.0530616641044615e-06, "step": 337580 }, { "epoch": 95.82458132273631, "grad_norm": 0.0009792792843654752, "learning_rate": 4.2126028952597225e-06, "loss": 2.091564238071442e-06, "step": 337590 }, { "epoch": 95.82741981265967, "grad_norm": 0.0003943463379982859, "learning_rate": 4.209764405336361e-06, "loss": 2.5968998670578004e-06, "step": 337600 }, { "epoch": 95.83025830258302, "grad_norm": 0.0005882128607481718, "learning_rate": 4.206925915413001e-06, "loss": 4.951469600200653e-06, "step": 337610 }, { "epoch": 95.83309679250638, "grad_norm": 0.0003045198100153357, "learning_rate": 4.20408742548964e-06, "loss": 1.7495825886726379e-06, "step": 337620 }, { "epoch": 95.83593528242974, "grad_norm": 0.0004793416883330792, "learning_rate": 4.201248935566279e-06, "loss": 2.1610409021377564e-06, "step": 337630 }, { "epoch": 95.83877377235311, "grad_norm": 0.00044618186075240374, "learning_rate": 4.198410445642918e-06, "loss": 2.551265060901642e-06, "step": 337640 }, { "epoch": 95.84161226227647, "grad_norm": 0.00030493290978483856, "learning_rate": 4.195571955719557e-06, "loss": 3.0025839805603027e-06, "step": 337650 }, { "epoch": 95.84445075219983, "grad_norm": 0.0005767128895968199, "learning_rate": 4.192733465796197e-06, "loss": 2.1249055862426757e-06, "step": 337660 }, { "epoch": 95.8472892421232, "grad_norm": 0.0005430433666333556, "learning_rate": 4.189894975872836e-06, "loss": 2.543441951274872e-06, "step": 337670 }, { "epoch": 95.85012773204654, "grad_norm": 0.0004934126627631485, "learning_rate": 4.187056485949475e-06, "loss": 1.656264066696167e-06, "step": 337680 }, { "epoch": 95.85296622196991, "grad_norm": 0.0008238324662670493, "learning_rate": 4.184217996026114e-06, "loss": 2.7570873498916627e-06, "step": 337690 }, { "epoch": 95.85580471189327, "grad_norm": 0.0030687162652611732, "learning_rate": 4.181379506102753e-06, "loss": 2.2353604435920714e-06, "step": 337700 }, { "epoch": 95.85864320181663, "grad_norm": 0.001026013633236289, "learning_rate": 4.178541016179393e-06, "loss": 1.7277896404266358e-06, "step": 337710 }, { "epoch": 95.86148169174, "grad_norm": 0.00033549501677043736, "learning_rate": 4.1757025262560315e-06, "loss": 2.0049512386322023e-06, "step": 337720 }, { "epoch": 95.86432018166336, "grad_norm": 0.00013455715088639408, "learning_rate": 4.172864036332671e-06, "loss": 3.2942742109298704e-06, "step": 337730 }, { "epoch": 95.86715867158672, "grad_norm": 0.0019980634097009897, "learning_rate": 4.170025546409311e-06, "loss": 2.292357385158539e-06, "step": 337740 }, { "epoch": 95.86999716151007, "grad_norm": 0.0004365041386336088, "learning_rate": 4.16718705648595e-06, "loss": 2.9863789677619933e-06, "step": 337750 }, { "epoch": 95.87283565143343, "grad_norm": 9.58018863457255e-05, "learning_rate": 4.164348566562589e-06, "loss": 2.4750828742980956e-06, "step": 337760 }, { "epoch": 95.8756741413568, "grad_norm": 0.00012443352898117155, "learning_rate": 4.161510076639228e-06, "loss": 4.63295727968216e-06, "step": 337770 }, { "epoch": 95.87851263128016, "grad_norm": 0.0006763589917682111, "learning_rate": 4.158671586715868e-06, "loss": 2.7002766728401185e-06, "step": 337780 }, { "epoch": 95.88135112120352, "grad_norm": 0.0013456422602757812, "learning_rate": 4.155833096792507e-06, "loss": 2.741068601608276e-06, "step": 337790 }, { "epoch": 95.88418961112689, "grad_norm": 0.002043175045400858, "learning_rate": 4.152994606869146e-06, "loss": 2.6831403374671936e-06, "step": 337800 }, { "epoch": 95.88702810105025, "grad_norm": 0.0010224927682429552, "learning_rate": 4.150156116945785e-06, "loss": 3.3114105463027953e-06, "step": 337810 }, { "epoch": 95.8898665909736, "grad_norm": 0.0005563012673519552, "learning_rate": 4.147317627022424e-06, "loss": 2.504885196685791e-06, "step": 337820 }, { "epoch": 95.89270508089696, "grad_norm": 0.0002999592979904264, "learning_rate": 4.144479137099064e-06, "loss": 3.36281955242157e-06, "step": 337830 }, { "epoch": 95.89554357082032, "grad_norm": 0.0003209140559192747, "learning_rate": 4.141640647175703e-06, "loss": 2.1304935216903686e-06, "step": 337840 }, { "epoch": 95.89838206074369, "grad_norm": 0.0011493811616674066, "learning_rate": 4.1388021572523415e-06, "loss": 3.0178576707839967e-06, "step": 337850 }, { "epoch": 95.90122055066705, "grad_norm": 0.0003695295308716595, "learning_rate": 4.135963667328981e-06, "loss": 1.8054619431495666e-06, "step": 337860 }, { "epoch": 95.90405904059041, "grad_norm": 0.0009180061751976609, "learning_rate": 4.13312517740562e-06, "loss": 3.475509583950043e-06, "step": 337870 }, { "epoch": 95.90689753051376, "grad_norm": 0.0010636637452989817, "learning_rate": 4.13028668748226e-06, "loss": 2.5069341063499452e-06, "step": 337880 }, { "epoch": 95.90973602043712, "grad_norm": 0.0006398764089681208, "learning_rate": 4.1274481975588985e-06, "loss": 4.936009645462036e-06, "step": 337890 }, { "epoch": 95.91257451036049, "grad_norm": 0.0018976290011778474, "learning_rate": 4.124609707635538e-06, "loss": 2.335570752620697e-06, "step": 337900 }, { "epoch": 95.91541300028385, "grad_norm": 0.0002800098154693842, "learning_rate": 4.121771217712177e-06, "loss": 1.8736347556114196e-06, "step": 337910 }, { "epoch": 95.91825149020721, "grad_norm": 0.000777558539994061, "learning_rate": 4.118932727788817e-06, "loss": 2.1768733859062196e-06, "step": 337920 }, { "epoch": 95.92108998013057, "grad_norm": 0.004163276869803667, "learning_rate": 4.116094237865456e-06, "loss": 2.4961307644844053e-06, "step": 337930 }, { "epoch": 95.92392847005394, "grad_norm": 0.000705683371052146, "learning_rate": 4.113255747942095e-06, "loss": 2.6311725378036497e-06, "step": 337940 }, { "epoch": 95.92676695997729, "grad_norm": 0.0004196353256702423, "learning_rate": 4.110417258018734e-06, "loss": 2.4868175387382506e-06, "step": 337950 }, { "epoch": 95.92960544990065, "grad_norm": 0.00023518892703577876, "learning_rate": 4.107578768095374e-06, "loss": 2.6989728212356567e-06, "step": 337960 }, { "epoch": 95.93244393982401, "grad_norm": 0.0009927559876814485, "learning_rate": 4.1047402781720125e-06, "loss": 2.3232772946357726e-06, "step": 337970 }, { "epoch": 95.93528242974737, "grad_norm": 0.0005932939238846302, "learning_rate": 4.101901788248652e-06, "loss": 1.8833205103874207e-06, "step": 337980 }, { "epoch": 95.93812091967074, "grad_norm": 0.0002856060746125877, "learning_rate": 4.099063298325291e-06, "loss": 3.4691765904426575e-06, "step": 337990 }, { "epoch": 95.9409594095941, "grad_norm": 0.0003699149237945676, "learning_rate": 4.096224808401931e-06, "loss": 3.7439167499542236e-06, "step": 338000 }, { "epoch": 95.9409594095941, "eval_accuracy": 0.9889362243275895, "eval_loss": 0.0479343980550766, "eval_runtime": 74.4854, "eval_samples_per_second": 211.142, "eval_steps_per_second": 3.303, "step": 338000 }, { "epoch": 95.94379789951746, "grad_norm": 0.00013426259101834148, "learning_rate": 4.0933863184785696e-06, "loss": 2.983585000038147e-06, "step": 338010 }, { "epoch": 95.94663638944081, "grad_norm": 0.002048723166808486, "learning_rate": 4.090547828555208e-06, "loss": 2.0643696188926696e-06, "step": 338020 }, { "epoch": 95.94947487936417, "grad_norm": 0.0005419759545475245, "learning_rate": 4.087709338631848e-06, "loss": 2.2552907466888427e-06, "step": 338030 }, { "epoch": 95.95231336928754, "grad_norm": 0.0003465130866970867, "learning_rate": 4.084870848708487e-06, "loss": 1.3321638107299806e-06, "step": 338040 }, { "epoch": 95.9551518592109, "grad_norm": 0.0006859720451757312, "learning_rate": 4.082032358785127e-06, "loss": 2.084486186504364e-06, "step": 338050 }, { "epoch": 95.95799034913426, "grad_norm": 0.0011773047735914588, "learning_rate": 4.0791938688617654e-06, "loss": 2.650916576385498e-06, "step": 338060 }, { "epoch": 95.96082883905763, "grad_norm": 0.0004380405298434198, "learning_rate": 4.076355378938405e-06, "loss": 2.465583384037018e-06, "step": 338070 }, { "epoch": 95.96366732898099, "grad_norm": 0.0007988603319972754, "learning_rate": 4.073516889015044e-06, "loss": 1.971796154975891e-06, "step": 338080 }, { "epoch": 95.96650581890434, "grad_norm": 0.0004261988215148449, "learning_rate": 4.070678399091684e-06, "loss": 2.4119392037391664e-06, "step": 338090 }, { "epoch": 95.9693443088277, "grad_norm": 0.0006976430886425078, "learning_rate": 4.067839909168323e-06, "loss": 2.1310523152351378e-06, "step": 338100 }, { "epoch": 95.97218279875106, "grad_norm": 0.00017050834139809012, "learning_rate": 4.065001419244962e-06, "loss": 5.503557622432708e-06, "step": 338110 }, { "epoch": 95.97502128867443, "grad_norm": 0.00025319933774881065, "learning_rate": 4.062162929321601e-06, "loss": 2.0515173673629762e-06, "step": 338120 }, { "epoch": 95.97785977859779, "grad_norm": 0.0006779558607377112, "learning_rate": 4.059324439398241e-06, "loss": 2.2649765014648438e-06, "step": 338130 }, { "epoch": 95.98069826852115, "grad_norm": 0.000499283371027559, "learning_rate": 4.0564859494748795e-06, "loss": 1.6380101442337036e-06, "step": 338140 }, { "epoch": 95.9835367584445, "grad_norm": 0.0001650880294619128, "learning_rate": 4.053647459551519e-06, "loss": 3.102608025074005e-06, "step": 338150 }, { "epoch": 95.98637524836786, "grad_norm": 0.0014479618985205889, "learning_rate": 4.050808969628158e-06, "loss": 2.5231391191482542e-06, "step": 338160 }, { "epoch": 95.98921373829123, "grad_norm": 0.00011049298336729407, "learning_rate": 4.047970479704797e-06, "loss": 2.6246532797813414e-06, "step": 338170 }, { "epoch": 95.99205222821459, "grad_norm": 0.00010161624959437177, "learning_rate": 4.0451319897814365e-06, "loss": 4.32133674621582e-06, "step": 338180 }, { "epoch": 95.99489071813795, "grad_norm": 0.0001695464743534103, "learning_rate": 4.042293499858075e-06, "loss": 1.5351921319961549e-06, "step": 338190 }, { "epoch": 95.99772920806132, "grad_norm": 0.0008158560958690941, "learning_rate": 4.039455009934715e-06, "loss": 1.8263235688209534e-06, "step": 338200 }, { "epoch": 96.00056769798468, "grad_norm": 0.0005280728219076991, "learning_rate": 4.036616520011354e-06, "loss": 2.048858914349694e-06, "step": 338210 }, { "epoch": 96.00340618790803, "grad_norm": 0.00024774635676294565, "learning_rate": 4.033778030087993e-06, "loss": 2.736039459705353e-06, "step": 338220 }, { "epoch": 96.00624467783139, "grad_norm": 0.00020125873561482877, "learning_rate": 4.030939540164632e-06, "loss": 1.983344554901123e-06, "step": 338230 }, { "epoch": 96.00908316775475, "grad_norm": 0.0009757567895576358, "learning_rate": 4.028101050241272e-06, "loss": 3.886222839355469e-06, "step": 338240 }, { "epoch": 96.01192165767812, "grad_norm": 0.00022535014431923628, "learning_rate": 4.025262560317911e-06, "loss": 1.6849488019943238e-06, "step": 338250 }, { "epoch": 96.01476014760148, "grad_norm": 0.001682798145338893, "learning_rate": 4.022424070394551e-06, "loss": 2.573244273662567e-06, "step": 338260 }, { "epoch": 96.01759863752484, "grad_norm": 0.00018353297491557896, "learning_rate": 4.019585580471189e-06, "loss": 1.4094635844230653e-06, "step": 338270 }, { "epoch": 96.0204371274482, "grad_norm": 0.00036750134313479066, "learning_rate": 4.016747090547829e-06, "loss": 2.3808330297470094e-06, "step": 338280 }, { "epoch": 96.02327561737155, "grad_norm": 0.0006484862533397973, "learning_rate": 4.013908600624468e-06, "loss": 2.0870938897132874e-06, "step": 338290 }, { "epoch": 96.02611410729492, "grad_norm": 0.0023639649152755737, "learning_rate": 4.011070110701108e-06, "loss": 2.0930543541908265e-06, "step": 338300 }, { "epoch": 96.02895259721828, "grad_norm": 0.0005447327857837081, "learning_rate": 4.0082316207777464e-06, "loss": 2.211146056652069e-06, "step": 338310 }, { "epoch": 96.03179108714164, "grad_norm": 0.00047791804536245763, "learning_rate": 4.005393130854386e-06, "loss": 2.1068379282951353e-06, "step": 338320 }, { "epoch": 96.034629577065, "grad_norm": 0.00016613936168141663, "learning_rate": 4.002554640931025e-06, "loss": 1.909211277961731e-06, "step": 338330 }, { "epoch": 96.03746806698837, "grad_norm": 0.0010385935893282294, "learning_rate": 3.999716151007664e-06, "loss": 2.4292618036270142e-06, "step": 338340 }, { "epoch": 96.04030655691172, "grad_norm": 0.0006948083755560219, "learning_rate": 3.9968776610843035e-06, "loss": 5.292892456054687e-06, "step": 338350 }, { "epoch": 96.04314504683508, "grad_norm": 0.00018272751185577363, "learning_rate": 3.994039171160942e-06, "loss": 2.8362497687339784e-06, "step": 338360 }, { "epoch": 96.04598353675844, "grad_norm": 0.00011389165592845529, "learning_rate": 3.991200681237582e-06, "loss": 2.199970185756683e-06, "step": 338370 }, { "epoch": 96.0488220266818, "grad_norm": 0.0003406562900636345, "learning_rate": 3.988362191314221e-06, "loss": 2.1671876311302185e-06, "step": 338380 }, { "epoch": 96.05166051660517, "grad_norm": 0.0004192977212369442, "learning_rate": 3.98552370139086e-06, "loss": 2.154521644115448e-06, "step": 338390 }, { "epoch": 96.05449900652853, "grad_norm": 0.0005279958131723106, "learning_rate": 3.982685211467499e-06, "loss": 1.965463161468506e-06, "step": 338400 }, { "epoch": 96.0573374964519, "grad_norm": 0.0005289165419526398, "learning_rate": 3.979846721544139e-06, "loss": 1.897476613521576e-06, "step": 338410 }, { "epoch": 96.06017598637524, "grad_norm": 0.0004136967472732067, "learning_rate": 3.977008231620778e-06, "loss": 2.1051615476608277e-06, "step": 338420 }, { "epoch": 96.0630144762986, "grad_norm": 0.0010383391054347157, "learning_rate": 3.9741697416974175e-06, "loss": 2.0366162061691286e-06, "step": 338430 }, { "epoch": 96.06585296622197, "grad_norm": 0.0006573208374902606, "learning_rate": 3.971331251774056e-06, "loss": 2.279132604598999e-06, "step": 338440 }, { "epoch": 96.06869145614533, "grad_norm": 0.00015493448881898075, "learning_rate": 3.968492761850696e-06, "loss": 3.208033740520477e-06, "step": 338450 }, { "epoch": 96.0715299460687, "grad_norm": 0.0015568636590614915, "learning_rate": 3.965654271927335e-06, "loss": 2.0291656255722048e-06, "step": 338460 }, { "epoch": 96.07436843599206, "grad_norm": 0.0033855331130325794, "learning_rate": 3.9628157820039746e-06, "loss": 2.5596469640731812e-06, "step": 338470 }, { "epoch": 96.07720692591542, "grad_norm": 0.00013021190534345806, "learning_rate": 3.959977292080613e-06, "loss": 2.407655119895935e-06, "step": 338480 }, { "epoch": 96.08004541583877, "grad_norm": 0.0012429236667230725, "learning_rate": 3.957138802157252e-06, "loss": 1.751817762851715e-06, "step": 338490 }, { "epoch": 96.08288390576213, "grad_norm": 0.0005682529299519956, "learning_rate": 3.954300312233892e-06, "loss": 1.991353929042816e-06, "step": 338500 }, { "epoch": 96.08288390576213, "eval_accuracy": 0.988872639409932, "eval_loss": 0.04816432297229767, "eval_runtime": 62.6239, "eval_samples_per_second": 251.134, "eval_steps_per_second": 3.928, "step": 338500 }, { "epoch": 96.0857223956855, "grad_norm": 0.00019599667575675994, "learning_rate": 3.951461822310531e-06, "loss": 2.4694949388504027e-06, "step": 338510 }, { "epoch": 96.08856088560886, "grad_norm": 0.00042304041562601924, "learning_rate": 3.94862333238717e-06, "loss": 3.4097582101821898e-06, "step": 338520 }, { "epoch": 96.09139937553222, "grad_norm": 0.00047495588660240173, "learning_rate": 3.945784842463809e-06, "loss": 2.0638108253479004e-06, "step": 338530 }, { "epoch": 96.09423786545558, "grad_norm": 0.0001095071856980212, "learning_rate": 3.942946352540448e-06, "loss": 1.504644751548767e-06, "step": 338540 }, { "epoch": 96.09707635537895, "grad_norm": 0.0015008215559646487, "learning_rate": 3.940107862617088e-06, "loss": 1.9261613488197325e-06, "step": 338550 }, { "epoch": 96.0999148453023, "grad_norm": 0.0005344607634469867, "learning_rate": 3.937269372693727e-06, "loss": 2.637505531311035e-06, "step": 338560 }, { "epoch": 96.10275333522566, "grad_norm": 0.0033748906571418047, "learning_rate": 3.934430882770366e-06, "loss": 3.368966281414032e-06, "step": 338570 }, { "epoch": 96.10559182514902, "grad_norm": 0.0007261699065566063, "learning_rate": 3.931592392847005e-06, "loss": 2.0578503608703612e-06, "step": 338580 }, { "epoch": 96.10843031507238, "grad_norm": 0.0006394449737854302, "learning_rate": 3.928753902923645e-06, "loss": 1.8633902072906494e-06, "step": 338590 }, { "epoch": 96.11126880499575, "grad_norm": 0.0009698125650174916, "learning_rate": 3.9259154130002845e-06, "loss": 1.6940757632255554e-06, "step": 338600 }, { "epoch": 96.11410729491911, "grad_norm": 0.0005868059233762324, "learning_rate": 3.923076923076923e-06, "loss": 2.5834888219833374e-06, "step": 338610 }, { "epoch": 96.11694578484246, "grad_norm": 0.0009254662436433136, "learning_rate": 3.920238433153563e-06, "loss": 3.137625753879547e-06, "step": 338620 }, { "epoch": 96.11978427476582, "grad_norm": 0.0003951620892621577, "learning_rate": 3.917399943230202e-06, "loss": 2.6671215891838075e-06, "step": 338630 }, { "epoch": 96.12262276468918, "grad_norm": 0.000123017089208588, "learning_rate": 3.9145614533068415e-06, "loss": 3.098323941230774e-06, "step": 338640 }, { "epoch": 96.12546125461255, "grad_norm": 0.0005767045659013093, "learning_rate": 3.91172296338348e-06, "loss": 1.8544495105743409e-06, "step": 338650 }, { "epoch": 96.12829974453591, "grad_norm": 0.0005449705058708787, "learning_rate": 3.908884473460119e-06, "loss": 2.272799611091614e-06, "step": 338660 }, { "epoch": 96.13113823445927, "grad_norm": 0.0002575482940301299, "learning_rate": 3.906045983536759e-06, "loss": 2.3746863007545473e-06, "step": 338670 }, { "epoch": 96.13397672438263, "grad_norm": 0.0010065223323181272, "learning_rate": 3.903207493613398e-06, "loss": 2.726353704929352e-06, "step": 338680 }, { "epoch": 96.13681521430598, "grad_norm": 0.001299537718296051, "learning_rate": 3.900369003690037e-06, "loss": 2.0388513803482054e-06, "step": 338690 }, { "epoch": 96.13965370422935, "grad_norm": 0.0005321188364177942, "learning_rate": 3.897530513766676e-06, "loss": 1.944601535797119e-06, "step": 338700 }, { "epoch": 96.14249219415271, "grad_norm": 0.0005362596712075174, "learning_rate": 3.894692023843315e-06, "loss": 2.290681004524231e-06, "step": 338710 }, { "epoch": 96.14533068407607, "grad_norm": 0.00047706326586194336, "learning_rate": 3.891853533919955e-06, "loss": 1.843087375164032e-06, "step": 338720 }, { "epoch": 96.14816917399943, "grad_norm": 9.21074315556325e-05, "learning_rate": 3.8890150439965935e-06, "loss": 2.261810004711151e-06, "step": 338730 }, { "epoch": 96.1510076639228, "grad_norm": 0.00020458766084630042, "learning_rate": 3.886176554073233e-06, "loss": 2.8872862458229063e-06, "step": 338740 }, { "epoch": 96.15384615384616, "grad_norm": 0.0003855237446259707, "learning_rate": 3.883338064149872e-06, "loss": 2.76174396276474e-06, "step": 338750 }, { "epoch": 96.15668464376951, "grad_norm": 0.00028526861569844186, "learning_rate": 3.880499574226512e-06, "loss": 2.081133425235748e-06, "step": 338760 }, { "epoch": 96.15952313369287, "grad_norm": 0.00025025801733136177, "learning_rate": 3.877661084303151e-06, "loss": 1.8589198589324951e-06, "step": 338770 }, { "epoch": 96.16236162361623, "grad_norm": 0.0003776912344619632, "learning_rate": 3.87482259437979e-06, "loss": 2.1757557988166807e-06, "step": 338780 }, { "epoch": 96.1652001135396, "grad_norm": 0.0001562282704981044, "learning_rate": 3.87198410445643e-06, "loss": 2.1101906895637514e-06, "step": 338790 }, { "epoch": 96.16803860346296, "grad_norm": 0.0004422577621880919, "learning_rate": 3.869145614533069e-06, "loss": 2.8541311621665954e-06, "step": 338800 }, { "epoch": 96.17087709338632, "grad_norm": 0.0017424470279365778, "learning_rate": 3.866307124609708e-06, "loss": 1.3163313269615174e-06, "step": 338810 }, { "epoch": 96.17371558330967, "grad_norm": 0.0002212091931141913, "learning_rate": 3.863468634686347e-06, "loss": 3.0707567930221556e-06, "step": 338820 }, { "epoch": 96.17655407323304, "grad_norm": 0.0012087882496416569, "learning_rate": 3.860630144762986e-06, "loss": 2.1783635020256042e-06, "step": 338830 }, { "epoch": 96.1793925631564, "grad_norm": 0.0007299677818082273, "learning_rate": 3.857791654839626e-06, "loss": 1.4780089259147644e-06, "step": 338840 }, { "epoch": 96.18223105307976, "grad_norm": 0.000405046041123569, "learning_rate": 3.854953164916265e-06, "loss": 1.950189471244812e-06, "step": 338850 }, { "epoch": 96.18506954300312, "grad_norm": 0.00021274764731060714, "learning_rate": 3.8521146749929035e-06, "loss": 1.638755202293396e-06, "step": 338860 }, { "epoch": 96.18790803292649, "grad_norm": 0.00047798582818359137, "learning_rate": 3.849276185069543e-06, "loss": 4.093721508979798e-06, "step": 338870 }, { "epoch": 96.19074652284985, "grad_norm": 0.0008169877110049129, "learning_rate": 3.846437695146182e-06, "loss": 1.9960105419158934e-06, "step": 338880 }, { "epoch": 96.1935850127732, "grad_norm": 0.00011544820154085755, "learning_rate": 3.843599205222822e-06, "loss": 2.4784356355667113e-06, "step": 338890 }, { "epoch": 96.19642350269656, "grad_norm": 0.0006181843928061426, "learning_rate": 3.8407607152994605e-06, "loss": 2.298690378665924e-06, "step": 338900 }, { "epoch": 96.19926199261992, "grad_norm": 0.00031347080948762596, "learning_rate": 3.8379222253761e-06, "loss": 1.5769153833389283e-06, "step": 338910 }, { "epoch": 96.20210048254329, "grad_norm": 0.00021774243214167655, "learning_rate": 3.835083735452739e-06, "loss": 1.7933547496795653e-06, "step": 338920 }, { "epoch": 96.20493897246665, "grad_norm": 0.0018880729330703616, "learning_rate": 3.832245245529379e-06, "loss": 1.879781484603882e-06, "step": 338930 }, { "epoch": 96.20777746239001, "grad_norm": 0.00030380228417925537, "learning_rate": 3.829406755606018e-06, "loss": 2.352893352508545e-06, "step": 338940 }, { "epoch": 96.21061595231338, "grad_norm": 0.00029549194732680917, "learning_rate": 3.826568265682657e-06, "loss": 2.21841037273407e-06, "step": 338950 }, { "epoch": 96.21345444223672, "grad_norm": 0.00036700384225696325, "learning_rate": 3.823729775759297e-06, "loss": 2.091005444526672e-06, "step": 338960 }, { "epoch": 96.21629293216009, "grad_norm": 0.0020115459337830544, "learning_rate": 3.820891285835936e-06, "loss": 3.1759962439537047e-06, "step": 338970 }, { "epoch": 96.21913142208345, "grad_norm": 0.0008472168701700866, "learning_rate": 3.8180527959125746e-06, "loss": 2.0874664187431336e-06, "step": 338980 }, { "epoch": 96.22196991200681, "grad_norm": 0.0033066843170672655, "learning_rate": 3.815214305989214e-06, "loss": 2.0042061805725098e-06, "step": 338990 }, { "epoch": 96.22480840193018, "grad_norm": 0.0006545660435222089, "learning_rate": 3.812375816065853e-06, "loss": 2.632103860378265e-06, "step": 339000 }, { "epoch": 96.22480840193018, "eval_accuracy": 0.9891905639982196, "eval_loss": 0.04769554361701012, "eval_runtime": 66.9025, "eval_samples_per_second": 235.073, "eval_steps_per_second": 3.677, "step": 339000 }, { "epoch": 96.22764689185354, "grad_norm": 5.6548320571891963e-05, "learning_rate": 3.8095373261424927e-06, "loss": 1.5694648027420044e-06, "step": 339010 }, { "epoch": 96.2304853817769, "grad_norm": 0.0003870792279485613, "learning_rate": 3.8066988362191316e-06, "loss": 1.4880672097206115e-06, "step": 339020 }, { "epoch": 96.23332387170025, "grad_norm": 0.0003640238428488374, "learning_rate": 3.8038603462957704e-06, "loss": 2.2046267986297606e-06, "step": 339030 }, { "epoch": 96.23616236162361, "grad_norm": 0.000942117883823812, "learning_rate": 3.80102185637241e-06, "loss": 2.0068138837814333e-06, "step": 339040 }, { "epoch": 96.23900085154698, "grad_norm": 0.0014575215755030513, "learning_rate": 3.7981833664490494e-06, "loss": 2.9226765036582945e-06, "step": 339050 }, { "epoch": 96.24183934147034, "grad_norm": 0.004744474310427904, "learning_rate": 3.7953448765256886e-06, "loss": 2.679973840713501e-06, "step": 339060 }, { "epoch": 96.2446778313937, "grad_norm": 0.0015368900494650006, "learning_rate": 3.792506386602328e-06, "loss": 2.2247433662414552e-06, "step": 339070 }, { "epoch": 96.24751632131706, "grad_norm": 0.0004459767078515142, "learning_rate": 3.7896678966789667e-06, "loss": 2.085976302623749e-06, "step": 339080 }, { "epoch": 96.25035481124041, "grad_norm": 0.00012584203795995563, "learning_rate": 3.7868294067556064e-06, "loss": 1.2919306755065918e-06, "step": 339090 }, { "epoch": 96.25319330116378, "grad_norm": 0.0004484803357627243, "learning_rate": 3.7839909168322452e-06, "loss": 1.6711652278900147e-06, "step": 339100 }, { "epoch": 96.25603179108714, "grad_norm": 0.00029574678046628833, "learning_rate": 3.781152426908885e-06, "loss": 1.3325363397598266e-06, "step": 339110 }, { "epoch": 96.2588702810105, "grad_norm": 0.00018217478645965457, "learning_rate": 3.7783139369855237e-06, "loss": 1.776963472366333e-06, "step": 339120 }, { "epoch": 96.26170877093386, "grad_norm": 0.00011048536543967202, "learning_rate": 3.775475447062163e-06, "loss": 2.2124499082565307e-06, "step": 339130 }, { "epoch": 96.26454726085723, "grad_norm": 0.00032325522624887526, "learning_rate": 3.7726369571388027e-06, "loss": 2.8112903237342834e-06, "step": 339140 }, { "epoch": 96.26738575078059, "grad_norm": 0.000535256345756352, "learning_rate": 3.7697984672154415e-06, "loss": 2.292543649673462e-06, "step": 339150 }, { "epoch": 96.27022424070394, "grad_norm": 0.0007639144314453006, "learning_rate": 3.766959977292081e-06, "loss": 2.022460103034973e-06, "step": 339160 }, { "epoch": 96.2730627306273, "grad_norm": 0.0002916711673606187, "learning_rate": 3.76412148736872e-06, "loss": 1.9425526261329653e-06, "step": 339170 }, { "epoch": 96.27590122055067, "grad_norm": 0.0030803782865405083, "learning_rate": 3.761282997445359e-06, "loss": 2.399645745754242e-06, "step": 339180 }, { "epoch": 96.27873971047403, "grad_norm": 0.002009290736168623, "learning_rate": 3.7584445075219985e-06, "loss": 2.2953376173973083e-06, "step": 339190 }, { "epoch": 96.28157820039739, "grad_norm": 0.00028532446594908834, "learning_rate": 3.7556060175986374e-06, "loss": 2.599693834781647e-06, "step": 339200 }, { "epoch": 96.28441669032075, "grad_norm": 0.0004094603064004332, "learning_rate": 3.752767527675277e-06, "loss": 2.0675361156463623e-06, "step": 339210 }, { "epoch": 96.28725518024412, "grad_norm": 0.00024888510233722627, "learning_rate": 3.7499290377519163e-06, "loss": 2.345256507396698e-06, "step": 339220 }, { "epoch": 96.29009367016747, "grad_norm": 0.00010972266318276525, "learning_rate": 3.747090547828555e-06, "loss": 4.8371031880378725e-06, "step": 339230 }, { "epoch": 96.29293216009083, "grad_norm": 6.710040179314092e-05, "learning_rate": 3.744252057905195e-06, "loss": 1.8648803234100343e-06, "step": 339240 }, { "epoch": 96.29577065001419, "grad_norm": 0.00030548908398486674, "learning_rate": 3.7414135679818336e-06, "loss": 1.7952173948287963e-06, "step": 339250 }, { "epoch": 96.29860913993755, "grad_norm": 0.0002990502689499408, "learning_rate": 3.7385750780584733e-06, "loss": 1.846998929977417e-06, "step": 339260 }, { "epoch": 96.30144762986092, "grad_norm": 0.0004074394528288394, "learning_rate": 3.735736588135112e-06, "loss": 2.5799497961997984e-06, "step": 339270 }, { "epoch": 96.30428611978428, "grad_norm": 0.001481143874116242, "learning_rate": 3.732898098211751e-06, "loss": 2.501159906387329e-06, "step": 339280 }, { "epoch": 96.30712460970764, "grad_norm": 0.0010786614147946239, "learning_rate": 3.7300596082883907e-06, "loss": 3.0664727091789245e-06, "step": 339290 }, { "epoch": 96.30996309963099, "grad_norm": 0.00036435481160879135, "learning_rate": 3.72722111836503e-06, "loss": 1.516193151473999e-06, "step": 339300 }, { "epoch": 96.31280158955435, "grad_norm": 0.0005508439498953521, "learning_rate": 3.7243826284416696e-06, "loss": 3.2940879464149475e-06, "step": 339310 }, { "epoch": 96.31564007947772, "grad_norm": 0.00046883305185474455, "learning_rate": 3.7215441385183084e-06, "loss": 1.6570091247558594e-06, "step": 339320 }, { "epoch": 96.31847856940108, "grad_norm": 5.828810026287101e-05, "learning_rate": 3.718705648594948e-06, "loss": 1.6488134860992432e-06, "step": 339330 }, { "epoch": 96.32131705932444, "grad_norm": 0.0001824006758397445, "learning_rate": 3.715867158671587e-06, "loss": 1.582317054271698e-06, "step": 339340 }, { "epoch": 96.3241555492478, "grad_norm": 0.00021406116138678044, "learning_rate": 3.713028668748226e-06, "loss": 2.510286867618561e-06, "step": 339350 }, { "epoch": 96.32699403917115, "grad_norm": 0.00024998513981699944, "learning_rate": 3.7101901788248655e-06, "loss": 1.8570572137832642e-06, "step": 339360 }, { "epoch": 96.32983252909452, "grad_norm": 0.0006364061846397817, "learning_rate": 3.7073516889015043e-06, "loss": 1.2101605534553529e-06, "step": 339370 }, { "epoch": 96.33267101901788, "grad_norm": 0.00026699775480665267, "learning_rate": 3.704513198978144e-06, "loss": 1.4744699001312256e-06, "step": 339380 }, { "epoch": 96.33550950894124, "grad_norm": 0.0007989353034645319, "learning_rate": 3.7016747090547832e-06, "loss": 2.3135915398597716e-06, "step": 339390 }, { "epoch": 96.3383479988646, "grad_norm": 0.00024386221775785089, "learning_rate": 3.698836219131422e-06, "loss": 3.7008896470069887e-06, "step": 339400 }, { "epoch": 96.34118648878797, "grad_norm": 0.0008045517024584115, "learning_rate": 3.6959977292080618e-06, "loss": 3.7491321563720702e-06, "step": 339410 }, { "epoch": 96.34402497871133, "grad_norm": 0.0002641854516696185, "learning_rate": 3.6931592392847006e-06, "loss": 2.6497989892959593e-06, "step": 339420 }, { "epoch": 96.34686346863468, "grad_norm": 0.0005839474615640938, "learning_rate": 3.6903207493613403e-06, "loss": 1.7724931240081788e-06, "step": 339430 }, { "epoch": 96.34970195855804, "grad_norm": 0.0005890113534405828, "learning_rate": 3.687482259437979e-06, "loss": 2.399459481239319e-06, "step": 339440 }, { "epoch": 96.3525404484814, "grad_norm": 0.00024796847719699144, "learning_rate": 3.684643769514618e-06, "loss": 4.279240965843201e-06, "step": 339450 }, { "epoch": 96.35537893840477, "grad_norm": 0.0005481143016368151, "learning_rate": 3.6818052795912576e-06, "loss": 2.2195279598236082e-06, "step": 339460 }, { "epoch": 96.35821742832813, "grad_norm": 0.0004805642238352448, "learning_rate": 3.678966789667897e-06, "loss": 1.6614794731140137e-06, "step": 339470 }, { "epoch": 96.3610559182515, "grad_norm": 0.0003067337966058403, "learning_rate": 3.676128299744536e-06, "loss": 3.0530616641044615e-06, "step": 339480 }, { "epoch": 96.36389440817486, "grad_norm": 0.00031769138877280056, "learning_rate": 3.6732898098211754e-06, "loss": 1.9080936908721926e-06, "step": 339490 }, { "epoch": 96.3667328980982, "grad_norm": 0.0005783465458080173, "learning_rate": 3.6704513198978142e-06, "loss": 1.6668811440467834e-06, "step": 339500 }, { "epoch": 96.3667328980982, "eval_accuracy": 0.9890633941629046, "eval_loss": 0.04829546809196472, "eval_runtime": 52.3202, "eval_samples_per_second": 300.591, "eval_steps_per_second": 4.702, "step": 339500 }, { "epoch": 96.36957138802157, "grad_norm": 0.0016739001730456948, "learning_rate": 3.667612829974454e-06, "loss": 3.8133934140205382e-06, "step": 339510 }, { "epoch": 96.37240987794493, "grad_norm": 0.0003969751996919513, "learning_rate": 3.6647743400510927e-06, "loss": 1.6147270798683166e-06, "step": 339520 }, { "epoch": 96.3752483678683, "grad_norm": 0.0007233413634821773, "learning_rate": 3.6619358501277324e-06, "loss": 2.741068601608276e-06, "step": 339530 }, { "epoch": 96.37808685779166, "grad_norm": 0.0005850993329659104, "learning_rate": 3.6590973602043713e-06, "loss": 2.0211562514305113e-06, "step": 339540 }, { "epoch": 96.38092534771502, "grad_norm": 0.0007609057938680053, "learning_rate": 3.6562588702810105e-06, "loss": 2.7470290660858153e-06, "step": 339550 }, { "epoch": 96.38376383763837, "grad_norm": 0.00021517784625757486, "learning_rate": 3.65342038035765e-06, "loss": 1.955777406692505e-06, "step": 339560 }, { "epoch": 96.38660232756173, "grad_norm": 0.0003698149521369487, "learning_rate": 3.650581890434289e-06, "loss": 1.7549842596054078e-06, "step": 339570 }, { "epoch": 96.3894408174851, "grad_norm": 0.0003613766166381538, "learning_rate": 3.6477434005109287e-06, "loss": 1.312047243118286e-06, "step": 339580 }, { "epoch": 96.39227930740846, "grad_norm": 0.0012671193107962608, "learning_rate": 3.6449049105875675e-06, "loss": 4.609487950801849e-06, "step": 339590 }, { "epoch": 96.39511779733182, "grad_norm": 0.0009646484395489097, "learning_rate": 3.6420664206642064e-06, "loss": 2.2863969206809998e-06, "step": 339600 }, { "epoch": 96.39795628725518, "grad_norm": 0.00015259267820511013, "learning_rate": 3.639227930740846e-06, "loss": 1.7445534467697144e-06, "step": 339610 }, { "epoch": 96.40079477717855, "grad_norm": 0.00046787370229139924, "learning_rate": 3.636389440817485e-06, "loss": 2.562999725341797e-06, "step": 339620 }, { "epoch": 96.4036332671019, "grad_norm": 0.0005138317355886102, "learning_rate": 3.6335509508941246e-06, "loss": 1.5392899513244628e-06, "step": 339630 }, { "epoch": 96.40647175702526, "grad_norm": 0.0012291030725464225, "learning_rate": 3.630712460970764e-06, "loss": 2.261810004711151e-06, "step": 339640 }, { "epoch": 96.40931024694862, "grad_norm": 0.0011967220343649387, "learning_rate": 3.627873971047403e-06, "loss": 2.7639791369438173e-06, "step": 339650 }, { "epoch": 96.41214873687198, "grad_norm": 0.0006093801348470151, "learning_rate": 3.6250354811240423e-06, "loss": 2.1157786250114443e-06, "step": 339660 }, { "epoch": 96.41498722679535, "grad_norm": 0.008574808947741985, "learning_rate": 3.622196991200681e-06, "loss": 3.3468008041381835e-06, "step": 339670 }, { "epoch": 96.41782571671871, "grad_norm": 0.0004443033249117434, "learning_rate": 3.619358501277321e-06, "loss": 2.9765069484710693e-06, "step": 339680 }, { "epoch": 96.42066420664207, "grad_norm": 0.0006881297449581325, "learning_rate": 3.6165200113539597e-06, "loss": 1.4992430806159973e-06, "step": 339690 }, { "epoch": 96.42350269656542, "grad_norm": 0.009919680655002594, "learning_rate": 3.6136815214305994e-06, "loss": 4.668906331062317e-06, "step": 339700 }, { "epoch": 96.42634118648878, "grad_norm": 0.0004535635525826365, "learning_rate": 3.610843031507238e-06, "loss": 2.3849308490753176e-06, "step": 339710 }, { "epoch": 96.42917967641215, "grad_norm": 0.00030672052525915205, "learning_rate": 3.6080045415838775e-06, "loss": 1.9827857613563538e-06, "step": 339720 }, { "epoch": 96.43201816633551, "grad_norm": 0.00039622464100830257, "learning_rate": 3.605166051660517e-06, "loss": 2.614222466945648e-06, "step": 339730 }, { "epoch": 96.43485665625887, "grad_norm": 0.00040695618372410536, "learning_rate": 3.602327561737156e-06, "loss": 3.061071038246155e-06, "step": 339740 }, { "epoch": 96.43769514618224, "grad_norm": 0.002568315016105771, "learning_rate": 3.5994890718137957e-06, "loss": 1.983903348445892e-06, "step": 339750 }, { "epoch": 96.4405336361056, "grad_norm": 0.0016255209920927882, "learning_rate": 3.5966505818904345e-06, "loss": 3.1348317861557007e-06, "step": 339760 }, { "epoch": 96.44337212602895, "grad_norm": 0.0006852048682048917, "learning_rate": 3.5938120919670733e-06, "loss": 2.302415668964386e-06, "step": 339770 }, { "epoch": 96.44621061595231, "grad_norm": 0.005055400542914867, "learning_rate": 3.590973602043713e-06, "loss": 4.584714770317078e-06, "step": 339780 }, { "epoch": 96.44904910587567, "grad_norm": 0.00030317105120047927, "learning_rate": 3.588135112120352e-06, "loss": 1.722387969493866e-06, "step": 339790 }, { "epoch": 96.45188759579904, "grad_norm": 0.00038466963451355696, "learning_rate": 3.5852966221969915e-06, "loss": 1.9822269678115846e-06, "step": 339800 }, { "epoch": 96.4547260857224, "grad_norm": 0.0015899045392870903, "learning_rate": 3.5824581322736308e-06, "loss": 2.748146653175354e-06, "step": 339810 }, { "epoch": 96.45756457564576, "grad_norm": 0.0012030210345983505, "learning_rate": 3.5796196423502696e-06, "loss": 2.306327223777771e-06, "step": 339820 }, { "epoch": 96.46040306556911, "grad_norm": 0.002839375054463744, "learning_rate": 3.5767811524269093e-06, "loss": 2.288632094860077e-06, "step": 339830 }, { "epoch": 96.46324155549247, "grad_norm": 0.00031854608096182346, "learning_rate": 3.573942662503548e-06, "loss": 1.9140541553497313e-06, "step": 339840 }, { "epoch": 96.46608004541584, "grad_norm": 0.0006656370242126286, "learning_rate": 3.571104172580188e-06, "loss": 1.9259750843048096e-06, "step": 339850 }, { "epoch": 96.4689185353392, "grad_norm": 9.791567572392523e-05, "learning_rate": 3.5682656826568266e-06, "loss": 2.259202301502228e-06, "step": 339860 }, { "epoch": 96.47175702526256, "grad_norm": 0.00014989163901191205, "learning_rate": 3.5654271927334655e-06, "loss": 2.426840364933014e-06, "step": 339870 }, { "epoch": 96.47459551518592, "grad_norm": 0.0006320833344943821, "learning_rate": 3.562588702810105e-06, "loss": 1.899339258670807e-06, "step": 339880 }, { "epoch": 96.47743400510929, "grad_norm": 0.0009437575936317444, "learning_rate": 3.5597502128867444e-06, "loss": 1.9937753677368166e-06, "step": 339890 }, { "epoch": 96.48027249503264, "grad_norm": 0.00028847838984802365, "learning_rate": 3.5569117229633837e-06, "loss": 2.096593379974365e-06, "step": 339900 }, { "epoch": 96.483110984956, "grad_norm": 0.0010851130355149508, "learning_rate": 3.554073233040023e-06, "loss": 2.193078398704529e-06, "step": 339910 }, { "epoch": 96.48594947487936, "grad_norm": 0.0008154542301781476, "learning_rate": 3.5512347431166618e-06, "loss": 2.020038664340973e-06, "step": 339920 }, { "epoch": 96.48878796480273, "grad_norm": 0.00014313808060251176, "learning_rate": 3.5483962531933014e-06, "loss": 1.6443431377410888e-06, "step": 339930 }, { "epoch": 96.49162645472609, "grad_norm": 0.0007741495501250029, "learning_rate": 3.5455577632699403e-06, "loss": 1.9149854779243468e-06, "step": 339940 }, { "epoch": 96.49446494464945, "grad_norm": 0.0005139646818861365, "learning_rate": 3.54271927334658e-06, "loss": 1.8371269106864928e-06, "step": 339950 }, { "epoch": 96.49730343457281, "grad_norm": 0.00029549028840847313, "learning_rate": 3.5398807834232188e-06, "loss": 2.180412411689758e-06, "step": 339960 }, { "epoch": 96.50014192449616, "grad_norm": 0.0005479026585817337, "learning_rate": 3.5370422934998585e-06, "loss": 3.167055547237396e-06, "step": 339970 }, { "epoch": 96.50298041441953, "grad_norm": 0.0002182470343541354, "learning_rate": 3.5342038035764977e-06, "loss": 2.9260292649269106e-06, "step": 339980 }, { "epoch": 96.50581890434289, "grad_norm": 0.00017017564096022397, "learning_rate": 3.5313653136531366e-06, "loss": 1.5025958418846131e-06, "step": 339990 }, { "epoch": 96.50865739426625, "grad_norm": 0.000389512802939862, "learning_rate": 3.5285268237297762e-06, "loss": 1.7717480659484864e-06, "step": 340000 }, { "epoch": 96.50865739426625, "eval_accuracy": 0.988999809245247, "eval_loss": 0.048124972730875015, "eval_runtime": 60.7271, "eval_samples_per_second": 258.978, "eval_steps_per_second": 4.051, "step": 340000 }, { "epoch": 96.51149588418961, "grad_norm": 0.00021244297386147082, "learning_rate": 3.525688333806415e-06, "loss": 1.8604099750518798e-06, "step": 340010 }, { "epoch": 96.51433437411298, "grad_norm": 0.0009756603394635022, "learning_rate": 3.5228498438830548e-06, "loss": 2.096407115459442e-06, "step": 340020 }, { "epoch": 96.51717286403633, "grad_norm": 0.0003167391405440867, "learning_rate": 3.5200113539596936e-06, "loss": 5.742162466049195e-06, "step": 340030 }, { "epoch": 96.52001135395969, "grad_norm": 0.0002341320359846577, "learning_rate": 3.5171728640363324e-06, "loss": 3.1672418117523195e-06, "step": 340040 }, { "epoch": 96.52284984388305, "grad_norm": 0.00023956816585268825, "learning_rate": 3.514334374112972e-06, "loss": 4.5888125896453856e-06, "step": 340050 }, { "epoch": 96.52568833380641, "grad_norm": 0.00018776515207719058, "learning_rate": 3.5114958841896114e-06, "loss": 1.931190490722656e-06, "step": 340060 }, { "epoch": 96.52852682372978, "grad_norm": 0.0003029947110917419, "learning_rate": 3.5086573942662506e-06, "loss": 1.8484890460968017e-06, "step": 340070 }, { "epoch": 96.53136531365314, "grad_norm": 0.0006753240595571697, "learning_rate": 3.50581890434289e-06, "loss": 2.317875623703003e-06, "step": 340080 }, { "epoch": 96.5342038035765, "grad_norm": 0.0011692213593050838, "learning_rate": 3.5029804144195287e-06, "loss": 4.973262548446655e-06, "step": 340090 }, { "epoch": 96.53704229349985, "grad_norm": 0.0023792956490069628, "learning_rate": 3.5001419244961684e-06, "loss": 2.380460500717163e-06, "step": 340100 }, { "epoch": 96.53988078342321, "grad_norm": 0.0004879954503849149, "learning_rate": 3.4973034345728072e-06, "loss": 4.725344479084015e-06, "step": 340110 }, { "epoch": 96.54271927334658, "grad_norm": 0.000284145848127082, "learning_rate": 3.494464944649447e-06, "loss": 2.7922913432121275e-06, "step": 340120 }, { "epoch": 96.54555776326994, "grad_norm": 0.000135641879751347, "learning_rate": 3.4916264547260857e-06, "loss": 1.9952654838562013e-06, "step": 340130 }, { "epoch": 96.5483962531933, "grad_norm": 0.0002907783491536975, "learning_rate": 3.488787964802725e-06, "loss": 2.7280300855636595e-06, "step": 340140 }, { "epoch": 96.55123474311667, "grad_norm": 9.014066745294258e-05, "learning_rate": 3.4859494748793647e-06, "loss": 2.6104971766471863e-06, "step": 340150 }, { "epoch": 96.55407323304003, "grad_norm": 0.0006315290811471641, "learning_rate": 3.4831109849560035e-06, "loss": 1.917034387588501e-06, "step": 340160 }, { "epoch": 96.55691172296338, "grad_norm": 0.00027342361863702536, "learning_rate": 3.480272495032643e-06, "loss": 2.630800008773804e-06, "step": 340170 }, { "epoch": 96.55975021288674, "grad_norm": 0.0001535452902317047, "learning_rate": 3.477434005109282e-06, "loss": 3.889389336109161e-06, "step": 340180 }, { "epoch": 96.5625887028101, "grad_norm": 0.0003233767347410321, "learning_rate": 3.474595515185921e-06, "loss": 2.6028603315353395e-06, "step": 340190 }, { "epoch": 96.56542719273347, "grad_norm": 0.0002722029166761786, "learning_rate": 3.4717570252625605e-06, "loss": 1.9082799553871155e-06, "step": 340200 }, { "epoch": 96.56826568265683, "grad_norm": 0.00043706121505238116, "learning_rate": 3.4689185353391994e-06, "loss": 2.6404857635498045e-06, "step": 340210 }, { "epoch": 96.57110417258019, "grad_norm": 0.00021849451877642423, "learning_rate": 3.466080045415839e-06, "loss": 2.1580606698989866e-06, "step": 340220 }, { "epoch": 96.57394266250355, "grad_norm": 0.0009774118661880493, "learning_rate": 3.4632415554924783e-06, "loss": 2.3337081074714662e-06, "step": 340230 }, { "epoch": 96.5767811524269, "grad_norm": 0.002232113154605031, "learning_rate": 3.460403065569117e-06, "loss": 2.810731530189514e-06, "step": 340240 }, { "epoch": 96.57961964235027, "grad_norm": 0.0001930977014126256, "learning_rate": 3.457564575645757e-06, "loss": 1.418590545654297e-06, "step": 340250 }, { "epoch": 96.58245813227363, "grad_norm": 0.00017614553507883102, "learning_rate": 3.4547260857223957e-06, "loss": 9.151175618171692e-07, "step": 340260 }, { "epoch": 96.58529662219699, "grad_norm": 0.0004009850090369582, "learning_rate": 3.4518875957990353e-06, "loss": 2.0192936062812803e-06, "step": 340270 }, { "epoch": 96.58813511212036, "grad_norm": 0.00011801051732618362, "learning_rate": 3.449049105875674e-06, "loss": 3.3777207136154176e-06, "step": 340280 }, { "epoch": 96.59097360204372, "grad_norm": 0.0001654263905948028, "learning_rate": 3.446210615952314e-06, "loss": 2.525560557842255e-06, "step": 340290 }, { "epoch": 96.59381209196707, "grad_norm": 0.001296044560149312, "learning_rate": 3.4433721260289527e-06, "loss": 1.989305019378662e-06, "step": 340300 }, { "epoch": 96.59665058189043, "grad_norm": 0.001304955338127911, "learning_rate": 3.440533636105592e-06, "loss": 3.0837953090667723e-06, "step": 340310 }, { "epoch": 96.59948907181379, "grad_norm": 0.00026693890686146915, "learning_rate": 3.437695146182231e-06, "loss": 1.582317054271698e-06, "step": 340320 }, { "epoch": 96.60232756173716, "grad_norm": 0.000488949881400913, "learning_rate": 3.4348566562588705e-06, "loss": 1.4759600162506103e-06, "step": 340330 }, { "epoch": 96.60516605166052, "grad_norm": 0.00018021190771833062, "learning_rate": 3.43201816633551e-06, "loss": 1.6083940863609314e-06, "step": 340340 }, { "epoch": 96.60800454158388, "grad_norm": 0.0006858204724267125, "learning_rate": 3.429179676412149e-06, "loss": 2.317875623703003e-06, "step": 340350 }, { "epoch": 96.61084303150724, "grad_norm": 0.0007065875688567758, "learning_rate": 3.426341186488788e-06, "loss": 1.4670193195343018e-06, "step": 340360 }, { "epoch": 96.61368152143059, "grad_norm": 0.0006188328843563795, "learning_rate": 3.4235026965654275e-06, "loss": 2.2001564502716065e-06, "step": 340370 }, { "epoch": 96.61652001135396, "grad_norm": 0.0004964584950357676, "learning_rate": 3.4206642066420663e-06, "loss": 2.518855035305023e-06, "step": 340380 }, { "epoch": 96.61935850127732, "grad_norm": 0.0016878421884030104, "learning_rate": 3.417825716718706e-06, "loss": 2.2256746888160707e-06, "step": 340390 }, { "epoch": 96.62219699120068, "grad_norm": 0.0008513984503224492, "learning_rate": 3.4149872267953453e-06, "loss": 1.969747245311737e-06, "step": 340400 }, { "epoch": 96.62503548112404, "grad_norm": 0.0009425572352483869, "learning_rate": 3.412148736871984e-06, "loss": 1.9920989871025085e-06, "step": 340410 }, { "epoch": 96.6278739710474, "grad_norm": 0.0005267277010716498, "learning_rate": 3.4093102469486238e-06, "loss": 2.6967376470565795e-06, "step": 340420 }, { "epoch": 96.63071246097077, "grad_norm": 0.00012353707279544324, "learning_rate": 3.4064717570252626e-06, "loss": 1.860596239566803e-06, "step": 340430 }, { "epoch": 96.63355095089412, "grad_norm": 0.0002506317978259176, "learning_rate": 3.4036332671019023e-06, "loss": 3.2370910048484804e-06, "step": 340440 }, { "epoch": 96.63638944081748, "grad_norm": 0.00029300403548404574, "learning_rate": 3.400794777178541e-06, "loss": 2.0872801542282103e-06, "step": 340450 }, { "epoch": 96.63922793074084, "grad_norm": 0.00010350864613428712, "learning_rate": 3.39795628725518e-06, "loss": 1.7598271369934081e-06, "step": 340460 }, { "epoch": 96.64206642066421, "grad_norm": 0.00014791631838306785, "learning_rate": 3.3951177973318196e-06, "loss": 2.5989487767219543e-06, "step": 340470 }, { "epoch": 96.64490491058757, "grad_norm": 8.537244139006361e-05, "learning_rate": 3.392279307408459e-06, "loss": 2.1850690245628355e-06, "step": 340480 }, { "epoch": 96.64774340051093, "grad_norm": 0.00037140646600164473, "learning_rate": 3.389440817485098e-06, "loss": 3.7260353565216066e-06, "step": 340490 }, { "epoch": 96.6505818904343, "grad_norm": 0.000802824564743787, "learning_rate": 3.3866023275617374e-06, "loss": 2.687238156795502e-06, "step": 340500 }, { "epoch": 96.6505818904343, "eval_accuracy": 0.9888090544922744, "eval_loss": 0.047088515013456345, "eval_runtime": 71.1118, "eval_samples_per_second": 221.159, "eval_steps_per_second": 3.459, "step": 340500 }, { "epoch": 96.65342038035764, "grad_norm": 0.0008784205419942737, "learning_rate": 3.3837638376383762e-06, "loss": 1.913122832775116e-06, "step": 340510 }, { "epoch": 96.65625887028101, "grad_norm": 0.00022669656027574092, "learning_rate": 3.380925347715016e-06, "loss": 1.9123777747154237e-06, "step": 340520 }, { "epoch": 96.65909736020437, "grad_norm": 0.00025305384770035744, "learning_rate": 3.3780868577916548e-06, "loss": 2.836063504219055e-06, "step": 340530 }, { "epoch": 96.66193585012773, "grad_norm": 0.00041781397885642946, "learning_rate": 3.3752483678682944e-06, "loss": 2.2977590560913084e-06, "step": 340540 }, { "epoch": 96.6647743400511, "grad_norm": 0.0006509210797958076, "learning_rate": 3.3724098779449333e-06, "loss": 2.121739089488983e-06, "step": 340550 }, { "epoch": 96.66761282997446, "grad_norm": 0.0001539206859888509, "learning_rate": 3.3695713880215725e-06, "loss": 2.4300068616867064e-06, "step": 340560 }, { "epoch": 96.67045131989781, "grad_norm": 0.000722185242921114, "learning_rate": 3.3667328980982118e-06, "loss": 1.831352710723877e-06, "step": 340570 }, { "epoch": 96.67328980982117, "grad_norm": 0.0012829602928832173, "learning_rate": 3.363894408174851e-06, "loss": 2.162344753742218e-06, "step": 340580 }, { "epoch": 96.67612829974453, "grad_norm": 0.0001125862036133185, "learning_rate": 3.3610559182514907e-06, "loss": 1.856498420238495e-06, "step": 340590 }, { "epoch": 96.6789667896679, "grad_norm": 0.000655328796710819, "learning_rate": 3.3582174283281296e-06, "loss": 1.4785677194595336e-06, "step": 340600 }, { "epoch": 96.68180527959126, "grad_norm": 0.0005195353296585381, "learning_rate": 3.3553789384047692e-06, "loss": 1.907534897327423e-06, "step": 340610 }, { "epoch": 96.68464376951462, "grad_norm": 0.0005653938860632479, "learning_rate": 3.352540448481408e-06, "loss": 2.9830262064933777e-06, "step": 340620 }, { "epoch": 96.68748225943799, "grad_norm": 0.0003851987421512604, "learning_rate": 3.349701958558047e-06, "loss": 2.5441870093345644e-06, "step": 340630 }, { "epoch": 96.69032074936133, "grad_norm": 0.0019188588485121727, "learning_rate": 3.3468634686346866e-06, "loss": 3.073364496231079e-06, "step": 340640 }, { "epoch": 96.6931592392847, "grad_norm": 0.00019978283671662211, "learning_rate": 3.344024978711326e-06, "loss": 2.1845102310180663e-06, "step": 340650 }, { "epoch": 96.69599772920806, "grad_norm": 0.0003001574950758368, "learning_rate": 3.341186488787965e-06, "loss": 1.61733478307724e-06, "step": 340660 }, { "epoch": 96.69883621913142, "grad_norm": 0.0009709279984235764, "learning_rate": 3.3383479988646044e-06, "loss": 2.0772218704223634e-06, "step": 340670 }, { "epoch": 96.70167470905479, "grad_norm": 0.0009379965486004949, "learning_rate": 3.335509508941243e-06, "loss": 1.8831342458724976e-06, "step": 340680 }, { "epoch": 96.70451319897815, "grad_norm": 0.0023289357777684927, "learning_rate": 3.332671019017883e-06, "loss": 2.8869137167930605e-06, "step": 340690 }, { "epoch": 96.70735168890151, "grad_norm": 0.00046338181709870696, "learning_rate": 3.3298325290945217e-06, "loss": 1.6828998923301697e-06, "step": 340700 }, { "epoch": 96.71019017882486, "grad_norm": 0.0009371628984808922, "learning_rate": 3.3269940391711614e-06, "loss": 2.3612752556800843e-06, "step": 340710 }, { "epoch": 96.71302866874822, "grad_norm": 0.00046288795419968665, "learning_rate": 3.3241555492478002e-06, "loss": 2.3266300559043883e-06, "step": 340720 }, { "epoch": 96.71586715867159, "grad_norm": 0.0011593124363571405, "learning_rate": 3.3213170593244395e-06, "loss": 2.6648864150047303e-06, "step": 340730 }, { "epoch": 96.71870564859495, "grad_norm": 0.00026000061188824475, "learning_rate": 3.3184785694010787e-06, "loss": 2.15526670217514e-06, "step": 340740 }, { "epoch": 96.72154413851831, "grad_norm": 0.00026278599398210645, "learning_rate": 3.315640079477718e-06, "loss": 2.0643696188926696e-06, "step": 340750 }, { "epoch": 96.72438262844167, "grad_norm": 0.0012020102003589272, "learning_rate": 3.3128015895543577e-06, "loss": 2.278946340084076e-06, "step": 340760 }, { "epoch": 96.72722111836502, "grad_norm": 0.0008479458629153669, "learning_rate": 3.3099630996309965e-06, "loss": 2.792850136756897e-06, "step": 340770 }, { "epoch": 96.73005960828839, "grad_norm": 0.0011110007762908936, "learning_rate": 3.3071246097076353e-06, "loss": 1.940503716468811e-06, "step": 340780 }, { "epoch": 96.73289809821175, "grad_norm": 0.0007478142506442964, "learning_rate": 3.304286119784275e-06, "loss": 2.8319656848907472e-06, "step": 340790 }, { "epoch": 96.73573658813511, "grad_norm": 0.00015800546680111438, "learning_rate": 3.301447629860914e-06, "loss": 2.9245391488075255e-06, "step": 340800 }, { "epoch": 96.73857507805847, "grad_norm": 0.0006293235346674919, "learning_rate": 3.2986091399375535e-06, "loss": 1.8844380974769593e-06, "step": 340810 }, { "epoch": 96.74141356798184, "grad_norm": 0.0011267374502494931, "learning_rate": 3.2957706500141928e-06, "loss": 2.4005770683288573e-06, "step": 340820 }, { "epoch": 96.7442520579052, "grad_norm": 0.0011219466105103493, "learning_rate": 3.2929321600908316e-06, "loss": 2.432987093925476e-06, "step": 340830 }, { "epoch": 96.74709054782855, "grad_norm": 0.0029010784346610308, "learning_rate": 3.2900936701674713e-06, "loss": 2.6870518922805784e-06, "step": 340840 }, { "epoch": 96.74992903775191, "grad_norm": 0.00023456659982912242, "learning_rate": 3.28725518024411e-06, "loss": 2.310238778591156e-06, "step": 340850 }, { "epoch": 96.75276752767527, "grad_norm": 0.0008182537858374417, "learning_rate": 3.28441669032075e-06, "loss": 2.7194619178771973e-06, "step": 340860 }, { "epoch": 96.75560601759864, "grad_norm": 7.533141615567729e-05, "learning_rate": 3.2815782003973886e-06, "loss": 1.893565058708191e-06, "step": 340870 }, { "epoch": 96.758444507522, "grad_norm": 0.0012227867264300585, "learning_rate": 3.2787397104740275e-06, "loss": 2.55536288022995e-06, "step": 340880 }, { "epoch": 96.76128299744536, "grad_norm": 0.00019117738702334464, "learning_rate": 3.275901220550667e-06, "loss": 2.5562942028045656e-06, "step": 340890 }, { "epoch": 96.76412148736873, "grad_norm": 0.002093892777338624, "learning_rate": 3.2730627306273064e-06, "loss": 3.2145529985427856e-06, "step": 340900 }, { "epoch": 96.76695997729207, "grad_norm": 0.00017257811850868165, "learning_rate": 3.2702242407039457e-06, "loss": 2.3122876882553102e-06, "step": 340910 }, { "epoch": 96.76979846721544, "grad_norm": 0.00043843386811204255, "learning_rate": 3.267385750780585e-06, "loss": 2.606026828289032e-06, "step": 340920 }, { "epoch": 96.7726369571388, "grad_norm": 0.0012683275854215026, "learning_rate": 3.2645472608572246e-06, "loss": 1.94404274225235e-06, "step": 340930 }, { "epoch": 96.77547544706216, "grad_norm": 0.00047895859461277723, "learning_rate": 3.2617087709338634e-06, "loss": 1.9956380128860475e-06, "step": 340940 }, { "epoch": 96.77831393698553, "grad_norm": 0.000228176882956177, "learning_rate": 3.2588702810105023e-06, "loss": 3.643147647380829e-06, "step": 340950 }, { "epoch": 96.78115242690889, "grad_norm": 0.0006842283764854074, "learning_rate": 3.256031791087142e-06, "loss": 2.990849316120148e-06, "step": 340960 }, { "epoch": 96.78399091683225, "grad_norm": 0.00081009475979954, "learning_rate": 3.253193301163781e-06, "loss": 1.728348433971405e-06, "step": 340970 }, { "epoch": 96.7868294067556, "grad_norm": 0.0004905858659185469, "learning_rate": 3.2503548112404205e-06, "loss": 2.7533620595932008e-06, "step": 340980 }, { "epoch": 96.78966789667896, "grad_norm": 0.00024069238861557096, "learning_rate": 3.2475163213170593e-06, "loss": 2.2295862436294556e-06, "step": 340990 }, { "epoch": 96.79250638660233, "grad_norm": 0.0010005724616348743, "learning_rate": 3.2446778313936986e-06, "loss": 2.076290547847748e-06, "step": 341000 }, { "epoch": 96.79250638660233, "eval_accuracy": 0.9892541489158772, "eval_loss": 0.047356992959976196, "eval_runtime": 63.1767, "eval_samples_per_second": 248.937, "eval_steps_per_second": 3.894, "step": 341000 }, { "epoch": 96.79534487652569, "grad_norm": 0.0025253023486584425, "learning_rate": 3.2418393414703382e-06, "loss": 2.9353424906730654e-06, "step": 341010 }, { "epoch": 96.79818336644905, "grad_norm": 0.007583528757095337, "learning_rate": 3.239000851546977e-06, "loss": 3.168731927871704e-06, "step": 341020 }, { "epoch": 96.80102185637242, "grad_norm": 0.002864021575078368, "learning_rate": 3.2361623616236168e-06, "loss": 6.6818669438362125e-06, "step": 341030 }, { "epoch": 96.80386034629576, "grad_norm": 0.00021585370996035635, "learning_rate": 3.2333238717002556e-06, "loss": 2.3078173398971558e-06, "step": 341040 }, { "epoch": 96.80669883621913, "grad_norm": 0.0013689150800928473, "learning_rate": 3.2304853817768944e-06, "loss": 2.035871148109436e-06, "step": 341050 }, { "epoch": 96.80953732614249, "grad_norm": 0.0010955677134916186, "learning_rate": 3.227646891853534e-06, "loss": 8.90977680683136e-06, "step": 341060 }, { "epoch": 96.81237581606585, "grad_norm": 0.00033759017242118716, "learning_rate": 3.2248084019301734e-06, "loss": 4.443898797035218e-06, "step": 341070 }, { "epoch": 96.81521430598922, "grad_norm": 0.0013887049863114953, "learning_rate": 3.2219699120068126e-06, "loss": 1.580454409122467e-06, "step": 341080 }, { "epoch": 96.81805279591258, "grad_norm": 0.001375876134261489, "learning_rate": 3.219131422083452e-06, "loss": 2.897903323173523e-06, "step": 341090 }, { "epoch": 96.82089128583594, "grad_norm": 8.850233280099928e-05, "learning_rate": 3.2162929321600907e-06, "loss": 1.5517696738243103e-06, "step": 341100 }, { "epoch": 96.82372977575929, "grad_norm": 0.0006378666730597615, "learning_rate": 3.2134544422367304e-06, "loss": 2.824142575263977e-06, "step": 341110 }, { "epoch": 96.82656826568265, "grad_norm": 0.0027767892461270094, "learning_rate": 3.2106159523133692e-06, "loss": 6.585381925106049e-06, "step": 341120 }, { "epoch": 96.82940675560602, "grad_norm": 0.006384842097759247, "learning_rate": 3.207777462390009e-06, "loss": 4.8639252781867984e-06, "step": 341130 }, { "epoch": 96.83224524552938, "grad_norm": 0.003089188365265727, "learning_rate": 3.2049389724666477e-06, "loss": 2.4799257516860964e-06, "step": 341140 }, { "epoch": 96.83508373545274, "grad_norm": 0.00029415107565000653, "learning_rate": 3.202100482543287e-06, "loss": 3.766268491744995e-06, "step": 341150 }, { "epoch": 96.8379222253761, "grad_norm": 0.0018679264467209578, "learning_rate": 3.1992619926199263e-06, "loss": 3.4764409065246584e-06, "step": 341160 }, { "epoch": 96.84076071529947, "grad_norm": 0.0006480411975644529, "learning_rate": 3.1964235026965655e-06, "loss": 2.4683773517608643e-06, "step": 341170 }, { "epoch": 96.84359920522282, "grad_norm": 0.0003560574841685593, "learning_rate": 3.193585012773205e-06, "loss": 2.7762725949287415e-06, "step": 341180 }, { "epoch": 96.84643769514618, "grad_norm": 0.000648760877083987, "learning_rate": 3.190746522849844e-06, "loss": 1.5251338481903075e-06, "step": 341190 }, { "epoch": 96.84927618506954, "grad_norm": 0.0020480817183852196, "learning_rate": 3.187908032926483e-06, "loss": 2.3379921913146974e-06, "step": 341200 }, { "epoch": 96.8521146749929, "grad_norm": 0.0008558112313039601, "learning_rate": 3.1850695430031225e-06, "loss": 3.262050449848175e-06, "step": 341210 }, { "epoch": 96.85495316491627, "grad_norm": 0.0006550865364260972, "learning_rate": 3.1822310530797614e-06, "loss": 2.6324763894081115e-06, "step": 341220 }, { "epoch": 96.85779165483963, "grad_norm": 0.0009122836636379361, "learning_rate": 3.179392563156401e-06, "loss": 2.2204592823982237e-06, "step": 341230 }, { "epoch": 96.86063014476298, "grad_norm": 0.0002708474057726562, "learning_rate": 3.1765540732330403e-06, "loss": 2.5659799575805662e-06, "step": 341240 }, { "epoch": 96.86346863468634, "grad_norm": 0.00035980716347694397, "learning_rate": 3.1737155833096796e-06, "loss": 1.6553327441215516e-06, "step": 341250 }, { "epoch": 96.8663071246097, "grad_norm": 0.0011472435435280204, "learning_rate": 3.170877093386319e-06, "loss": 1.6877427697181702e-06, "step": 341260 }, { "epoch": 96.86914561453307, "grad_norm": 0.0010186414001509547, "learning_rate": 3.1680386034629577e-06, "loss": 2.614222466945648e-06, "step": 341270 }, { "epoch": 96.87198410445643, "grad_norm": 0.00029709975933656096, "learning_rate": 3.1652001135395973e-06, "loss": 2.5987625122070313e-06, "step": 341280 }, { "epoch": 96.8748225943798, "grad_norm": 0.0005219282465986907, "learning_rate": 3.162361623616236e-06, "loss": 5.6354328989982605e-06, "step": 341290 }, { "epoch": 96.87766108430316, "grad_norm": 0.0002667440567165613, "learning_rate": 3.159523133692876e-06, "loss": 2.9364600777626038e-06, "step": 341300 }, { "epoch": 96.8804995742265, "grad_norm": 0.00028179885703139007, "learning_rate": 3.1566846437695147e-06, "loss": 3.777258098125458e-06, "step": 341310 }, { "epoch": 96.88333806414987, "grad_norm": 0.0012208117404952645, "learning_rate": 3.153846153846154e-06, "loss": 1.4606863260269166e-06, "step": 341320 }, { "epoch": 96.88617655407323, "grad_norm": 0.0012534010456874967, "learning_rate": 3.151007663922793e-06, "loss": 2.8856098651885987e-06, "step": 341330 }, { "epoch": 96.8890150439966, "grad_norm": 0.000893129559699446, "learning_rate": 3.1481691739994325e-06, "loss": 2.152286469936371e-06, "step": 341340 }, { "epoch": 96.89185353391996, "grad_norm": 0.00033997275750152767, "learning_rate": 3.145330684076072e-06, "loss": 2.8463080525398253e-06, "step": 341350 }, { "epoch": 96.89469202384332, "grad_norm": 0.00019226556469220668, "learning_rate": 3.142492194152711e-06, "loss": 2.0813196897506716e-06, "step": 341360 }, { "epoch": 96.89753051376668, "grad_norm": 0.0005840210942551494, "learning_rate": 3.13965370422935e-06, "loss": 3.086403012275696e-06, "step": 341370 }, { "epoch": 96.90036900369003, "grad_norm": 0.001056335517205298, "learning_rate": 3.1368152143059895e-06, "loss": 1.5622004866600037e-06, "step": 341380 }, { "epoch": 96.9032074936134, "grad_norm": 0.00025093750446103513, "learning_rate": 3.1339767243826283e-06, "loss": 2.7846544981002808e-06, "step": 341390 }, { "epoch": 96.90604598353676, "grad_norm": 0.0003359568945597857, "learning_rate": 3.131138234459268e-06, "loss": 2.039596438407898e-06, "step": 341400 }, { "epoch": 96.90888447346012, "grad_norm": 0.0004753255343530327, "learning_rate": 3.128299744535907e-06, "loss": 1.7020851373672485e-06, "step": 341410 }, { "epoch": 96.91172296338348, "grad_norm": 0.0005578161799348891, "learning_rate": 3.125461254612546e-06, "loss": 2.3232772946357726e-06, "step": 341420 }, { "epoch": 96.91456145330685, "grad_norm": 0.00039521526196040213, "learning_rate": 3.1226227646891858e-06, "loss": 3.102794289588928e-06, "step": 341430 }, { "epoch": 96.91739994323021, "grad_norm": 0.001255666371434927, "learning_rate": 3.1197842747658246e-06, "loss": 2.4534761905670167e-06, "step": 341440 }, { "epoch": 96.92023843315356, "grad_norm": 0.0006969014066271484, "learning_rate": 3.116945784842464e-06, "loss": 4.091300070285797e-06, "step": 341450 }, { "epoch": 96.92307692307692, "grad_norm": 0.0003686983254738152, "learning_rate": 3.114107294919103e-06, "loss": 1.9993633031845094e-06, "step": 341460 }, { "epoch": 96.92591541300028, "grad_norm": 0.00013877179299015552, "learning_rate": 3.1112688049957424e-06, "loss": 2.2046267986297606e-06, "step": 341470 }, { "epoch": 96.92875390292365, "grad_norm": 6.903676694491878e-05, "learning_rate": 3.1084303150723816e-06, "loss": 1.9205734133720396e-06, "step": 341480 }, { "epoch": 96.93159239284701, "grad_norm": 0.00079129304504022, "learning_rate": 3.105591825149021e-06, "loss": 2.631358802318573e-06, "step": 341490 }, { "epoch": 96.93443088277037, "grad_norm": 0.0003292351611889899, "learning_rate": 3.10275333522566e-06, "loss": 2.3243948817253114e-06, "step": 341500 }, { "epoch": 96.93443088277037, "eval_accuracy": 0.9894449036688497, "eval_loss": 0.047126661986112595, "eval_runtime": 98.1051, "eval_samples_per_second": 160.308, "eval_steps_per_second": 2.508, "step": 341500 }, { "epoch": 96.93726937269372, "grad_norm": 0.0011649586958810687, "learning_rate": 3.0999148453022994e-06, "loss": 1.9781291484832764e-06, "step": 341510 }, { "epoch": 96.94010786261708, "grad_norm": 0.0002576629922259599, "learning_rate": 3.0970763553789387e-06, "loss": 2.1530315279960634e-06, "step": 341520 }, { "epoch": 96.94294635254045, "grad_norm": 0.0011510446202009916, "learning_rate": 3.094237865455578e-06, "loss": 1.6335397958755493e-06, "step": 341530 }, { "epoch": 96.94578484246381, "grad_norm": 0.00026315366267226636, "learning_rate": 3.091399375532217e-06, "loss": 2.973712980747223e-06, "step": 341540 }, { "epoch": 96.94862333238717, "grad_norm": 0.0005018851952627301, "learning_rate": 3.088560885608856e-06, "loss": 2.3292377591133118e-06, "step": 341550 }, { "epoch": 96.95146182231053, "grad_norm": 0.0007861221092753112, "learning_rate": 3.0857223956854953e-06, "loss": 2.758204936981201e-06, "step": 341560 }, { "epoch": 96.9543003122339, "grad_norm": 0.0022989611607044935, "learning_rate": 3.0828839057621345e-06, "loss": 2.3132190108299257e-06, "step": 341570 }, { "epoch": 96.95713880215725, "grad_norm": 0.00048399990191683173, "learning_rate": 3.0800454158387738e-06, "loss": 1.4359131455421447e-06, "step": 341580 }, { "epoch": 96.95997729208061, "grad_norm": 0.004735939204692841, "learning_rate": 3.0772069259154135e-06, "loss": 3.5041943192481994e-06, "step": 341590 }, { "epoch": 96.96281578200397, "grad_norm": 0.0015887164045125246, "learning_rate": 3.0743684359920523e-06, "loss": 2.378039062023163e-06, "step": 341600 }, { "epoch": 96.96565427192733, "grad_norm": 0.0008161693695001304, "learning_rate": 3.0715299460686916e-06, "loss": 1.5191733837127686e-06, "step": 341610 }, { "epoch": 96.9684927618507, "grad_norm": 0.003113573184236884, "learning_rate": 3.068691456145331e-06, "loss": 2.7542933821678162e-06, "step": 341620 }, { "epoch": 96.97133125177406, "grad_norm": 0.00013649003813043237, "learning_rate": 3.06585296622197e-06, "loss": 1.6376376152038575e-06, "step": 341630 }, { "epoch": 96.97416974169742, "grad_norm": 0.0007351161912083626, "learning_rate": 3.0630144762986093e-06, "loss": 2.0248815417289732e-06, "step": 341640 }, { "epoch": 96.97700823162077, "grad_norm": 0.0006758134113624692, "learning_rate": 3.0601759863752486e-06, "loss": 2.1105632185935972e-06, "step": 341650 }, { "epoch": 96.97984672154413, "grad_norm": 0.00033368272124789655, "learning_rate": 3.057337496451888e-06, "loss": 1.8784776329994202e-06, "step": 341660 }, { "epoch": 96.9826852114675, "grad_norm": 0.00021219167683739215, "learning_rate": 3.054499006528527e-06, "loss": 2.3618340492248535e-06, "step": 341670 }, { "epoch": 96.98552370139086, "grad_norm": 0.001106511801481247, "learning_rate": 3.0516605166051664e-06, "loss": 2.942979335784912e-06, "step": 341680 }, { "epoch": 96.98836219131422, "grad_norm": 0.00021544346236623824, "learning_rate": 3.0488220266818056e-06, "loss": 1.6981735825538636e-06, "step": 341690 }, { "epoch": 96.99120068123759, "grad_norm": 0.0007163876434788108, "learning_rate": 3.045983536758445e-06, "loss": 2.7222558856010437e-06, "step": 341700 }, { "epoch": 96.99403917116095, "grad_norm": 0.0011837276397272944, "learning_rate": 3.0431450468350837e-06, "loss": 1.9393861293792725e-06, "step": 341710 }, { "epoch": 96.9968776610843, "grad_norm": 0.0006568589597009122, "learning_rate": 3.040306556911723e-06, "loss": 2.308934926986694e-06, "step": 341720 }, { "epoch": 96.99971615100766, "grad_norm": 0.00027491492801345885, "learning_rate": 3.0374680669883622e-06, "loss": 2.9537826776504516e-06, "step": 341730 }, { "epoch": 97.00255464093102, "grad_norm": 0.0001440987252863124, "learning_rate": 3.0349134260573375e-06, "loss": 3.5062770621152594e-06, "step": 341740 }, { "epoch": 97.00539313085439, "grad_norm": 0.0003035899135284126, "learning_rate": 3.0320749361339768e-06, "loss": 1.2679025530815125e-06, "step": 341750 }, { "epoch": 97.00823162077775, "grad_norm": 0.00010538821516092867, "learning_rate": 3.0292364462106164e-06, "loss": 1.895800232887268e-06, "step": 341760 }, { "epoch": 97.01107011070111, "grad_norm": 0.00030342480749823153, "learning_rate": 3.0263979562872553e-06, "loss": 2.0930543541908265e-06, "step": 341770 }, { "epoch": 97.01390860062446, "grad_norm": 0.0005743554211221635, "learning_rate": 3.0235594663638945e-06, "loss": 2.164393663406372e-06, "step": 341780 }, { "epoch": 97.01674709054782, "grad_norm": 0.000419130374211818, "learning_rate": 3.020720976440534e-06, "loss": 1.9764527678489683e-06, "step": 341790 }, { "epoch": 97.01958558047119, "grad_norm": 0.00031600025249645114, "learning_rate": 3.017882486517173e-06, "loss": 1.8462538719177247e-06, "step": 341800 }, { "epoch": 97.02242407039455, "grad_norm": 0.0004498222260735929, "learning_rate": 3.0150439965938123e-06, "loss": 1.85258686542511e-06, "step": 341810 }, { "epoch": 97.02526256031791, "grad_norm": 0.0007479562191292644, "learning_rate": 3.012205506670451e-06, "loss": 2.7256086468696593e-06, "step": 341820 }, { "epoch": 97.02810105024128, "grad_norm": 0.00035883381497114897, "learning_rate": 3.0093670167470904e-06, "loss": 1.9002705812454224e-06, "step": 341830 }, { "epoch": 97.03093954016464, "grad_norm": 0.0002943890867754817, "learning_rate": 3.00652852682373e-06, "loss": 2.325326204299927e-06, "step": 341840 }, { "epoch": 97.03377803008799, "grad_norm": 0.0008825056720525026, "learning_rate": 3.0036900369003693e-06, "loss": 2.055428922176361e-06, "step": 341850 }, { "epoch": 97.03661652001135, "grad_norm": 0.0010868412209674716, "learning_rate": 3.0008515469770086e-06, "loss": 2.6246532797813414e-06, "step": 341860 }, { "epoch": 97.03945500993471, "grad_norm": 0.0011459209490567446, "learning_rate": 2.9980130570536474e-06, "loss": 3.0212104320526123e-06, "step": 341870 }, { "epoch": 97.04229349985808, "grad_norm": 0.0007742708548903465, "learning_rate": 2.9951745671302867e-06, "loss": 1.7765909433364867e-06, "step": 341880 }, { "epoch": 97.04513198978144, "grad_norm": 0.005727311130613089, "learning_rate": 2.992336077206926e-06, "loss": 2.541579306125641e-06, "step": 341890 }, { "epoch": 97.0479704797048, "grad_norm": 0.00038180395495146513, "learning_rate": 2.989497587283565e-06, "loss": 1.7238780856132507e-06, "step": 341900 }, { "epoch": 97.05080896962816, "grad_norm": 0.00041433481965214014, "learning_rate": 2.9866590973602045e-06, "loss": 1.985207200050354e-06, "step": 341910 }, { "epoch": 97.05364745955151, "grad_norm": 0.00048317850450985134, "learning_rate": 2.9838206074368437e-06, "loss": 2.236664295196533e-06, "step": 341920 }, { "epoch": 97.05648594947488, "grad_norm": 0.0016745751490816474, "learning_rate": 2.980982117513483e-06, "loss": 3.082491457462311e-06, "step": 341930 }, { "epoch": 97.05932443939824, "grad_norm": 0.0011112506035715342, "learning_rate": 2.9781436275901222e-06, "loss": 4.272162914276123e-06, "step": 341940 }, { "epoch": 97.0621629293216, "grad_norm": 0.0005307715036906302, "learning_rate": 2.9753051376667615e-06, "loss": 2.738460898399353e-06, "step": 341950 }, { "epoch": 97.06500141924496, "grad_norm": 0.000502106558997184, "learning_rate": 2.9724666477434007e-06, "loss": 2.089701592922211e-06, "step": 341960 }, { "epoch": 97.06783990916833, "grad_norm": 0.0001402329362463206, "learning_rate": 2.96962815782004e-06, "loss": 2.062693238258362e-06, "step": 341970 }, { "epoch": 97.07067839909168, "grad_norm": 0.00019095101743005216, "learning_rate": 2.966789667896679e-06, "loss": 1.8134713172912598e-06, "step": 341980 }, { "epoch": 97.07351688901504, "grad_norm": 0.0015994139248505235, "learning_rate": 2.963951177973318e-06, "loss": 2.5780871510505675e-06, "step": 341990 }, { "epoch": 97.0763553789384, "grad_norm": 0.00019682142010424286, "learning_rate": 2.9611126880499573e-06, "loss": 1.5048310160636901e-06, "step": 342000 }, { "epoch": 97.0763553789384, "eval_accuracy": 0.9893813187511922, "eval_loss": 0.0474490262567997, "eval_runtime": 112.7687, "eval_samples_per_second": 139.462, "eval_steps_per_second": 2.181, "step": 342000 }, { "epoch": 97.07919386886176, "grad_norm": 0.0008276209118776023, "learning_rate": 2.958274198126597e-06, "loss": 1.759268343448639e-06, "step": 342010 }, { "epoch": 97.08203235878513, "grad_norm": 0.00030067097395658493, "learning_rate": 2.9554357082032363e-06, "loss": 2.3543834686279297e-06, "step": 342020 }, { "epoch": 97.08487084870849, "grad_norm": 0.001084481249563396, "learning_rate": 2.952597218279875e-06, "loss": 1.931190490722656e-06, "step": 342030 }, { "epoch": 97.08770933863185, "grad_norm": 0.000564174959436059, "learning_rate": 2.9497587283565144e-06, "loss": 2.024136483669281e-06, "step": 342040 }, { "epoch": 97.0905478285552, "grad_norm": 0.0008339588530361652, "learning_rate": 2.9469202384331536e-06, "loss": 2.0137056708335875e-06, "step": 342050 }, { "epoch": 97.09338631847857, "grad_norm": 0.001038574380800128, "learning_rate": 2.944081748509793e-06, "loss": 2.368725836277008e-06, "step": 342060 }, { "epoch": 97.09622480840193, "grad_norm": 0.0001891477731987834, "learning_rate": 2.941243258586432e-06, "loss": 1.461803913116455e-06, "step": 342070 }, { "epoch": 97.09906329832529, "grad_norm": 0.0009423400624655187, "learning_rate": 2.9384047686630714e-06, "loss": 2.490729093551636e-06, "step": 342080 }, { "epoch": 97.10190178824865, "grad_norm": 0.0003274853515904397, "learning_rate": 2.9355662787397107e-06, "loss": 1.910887658596039e-06, "step": 342090 }, { "epoch": 97.10474027817202, "grad_norm": 0.00040880759479478, "learning_rate": 2.93272778881635e-06, "loss": 2.4665147066116334e-06, "step": 342100 }, { "epoch": 97.10757876809538, "grad_norm": 0.0010421486804261804, "learning_rate": 2.929889298892989e-06, "loss": 2.2314488887786865e-06, "step": 342110 }, { "epoch": 97.11041725801873, "grad_norm": 0.00024179811589419842, "learning_rate": 2.9270508089696284e-06, "loss": 1.3783574104309082e-06, "step": 342120 }, { "epoch": 97.11325574794209, "grad_norm": 0.0011689758393913507, "learning_rate": 2.9242123190462677e-06, "loss": 1.8404796719551087e-06, "step": 342130 }, { "epoch": 97.11609423786545, "grad_norm": 0.00037914852146059275, "learning_rate": 2.9213738291229065e-06, "loss": 1.9706785678863525e-06, "step": 342140 }, { "epoch": 97.11893272778882, "grad_norm": 0.0005393591127358377, "learning_rate": 2.9185353391995458e-06, "loss": 2.2694468498229982e-06, "step": 342150 }, { "epoch": 97.12177121771218, "grad_norm": 0.0006860885769128799, "learning_rate": 2.915696849276185e-06, "loss": 1.6666948795318603e-06, "step": 342160 }, { "epoch": 97.12460970763554, "grad_norm": 6.177816248964518e-05, "learning_rate": 2.9128583593528243e-06, "loss": 2.428703010082245e-06, "step": 342170 }, { "epoch": 97.1274481975589, "grad_norm": 0.000658095465041697, "learning_rate": 2.910019869429464e-06, "loss": 2.094544470310211e-06, "step": 342180 }, { "epoch": 97.13028668748225, "grad_norm": 0.00023575214436277747, "learning_rate": 2.907181379506103e-06, "loss": 1.1429190635681153e-06, "step": 342190 }, { "epoch": 97.13312517740562, "grad_norm": 0.00017767297686077654, "learning_rate": 2.904342889582742e-06, "loss": 1.4290213584899903e-06, "step": 342200 }, { "epoch": 97.13596366732898, "grad_norm": 0.0001074026440619491, "learning_rate": 2.9015043996593813e-06, "loss": 1.6663223505020142e-06, "step": 342210 }, { "epoch": 97.13880215725234, "grad_norm": 0.00135474291164428, "learning_rate": 2.8986659097360206e-06, "loss": 1.66185200214386e-06, "step": 342220 }, { "epoch": 97.1416406471757, "grad_norm": 0.00022452969278674573, "learning_rate": 2.89582741981266e-06, "loss": 2.123229205608368e-06, "step": 342230 }, { "epoch": 97.14447913709907, "grad_norm": 0.0023822111543267965, "learning_rate": 2.892988929889299e-06, "loss": 2.3655593395233154e-06, "step": 342240 }, { "epoch": 97.14731762702242, "grad_norm": 0.001932291779667139, "learning_rate": 2.890150439965938e-06, "loss": 2.266094088554382e-06, "step": 342250 }, { "epoch": 97.15015611694578, "grad_norm": 0.0003950899699702859, "learning_rate": 2.8873119500425776e-06, "loss": 1.6704201698303222e-06, "step": 342260 }, { "epoch": 97.15299460686914, "grad_norm": 0.0003611989668570459, "learning_rate": 2.884473460119217e-06, "loss": 2.000294625759125e-06, "step": 342270 }, { "epoch": 97.1558330967925, "grad_norm": 0.00020791626593563706, "learning_rate": 2.881634970195856e-06, "loss": 2.1364539861679077e-06, "step": 342280 }, { "epoch": 97.15867158671587, "grad_norm": 0.0013498453190550208, "learning_rate": 2.8787964802724954e-06, "loss": 1.74790620803833e-06, "step": 342290 }, { "epoch": 97.16151007663923, "grad_norm": 0.0004576077335514128, "learning_rate": 2.8759579903491342e-06, "loss": 2.440623939037323e-06, "step": 342300 }, { "epoch": 97.1643485665626, "grad_norm": 0.00025739026023074985, "learning_rate": 2.8731195004257735e-06, "loss": 1.7406418919563293e-06, "step": 342310 }, { "epoch": 97.16718705648594, "grad_norm": 0.00015767202421557158, "learning_rate": 2.8702810105024127e-06, "loss": 1.671724021434784e-06, "step": 342320 }, { "epoch": 97.1700255464093, "grad_norm": 0.0004846845113206655, "learning_rate": 2.867442520579052e-06, "loss": 1.7909333109855652e-06, "step": 342330 }, { "epoch": 97.17286403633267, "grad_norm": 0.000674868409987539, "learning_rate": 2.8646040306556912e-06, "loss": 2.5393441319465636e-06, "step": 342340 }, { "epoch": 97.17570252625603, "grad_norm": 0.00037198077188804746, "learning_rate": 2.8617655407323305e-06, "loss": 3.2702460885047913e-06, "step": 342350 }, { "epoch": 97.1785410161794, "grad_norm": 0.0003741905093193054, "learning_rate": 2.8589270508089698e-06, "loss": 1.664087176322937e-06, "step": 342360 }, { "epoch": 97.18137950610276, "grad_norm": 0.002446595346555114, "learning_rate": 2.856088560885609e-06, "loss": 1.7572194337844848e-06, "step": 342370 }, { "epoch": 97.18421799602612, "grad_norm": 0.00023188546765595675, "learning_rate": 2.8532500709622483e-06, "loss": 2.10590660572052e-06, "step": 342380 }, { "epoch": 97.18705648594947, "grad_norm": 0.003563580336049199, "learning_rate": 2.8504115810388875e-06, "loss": 2.2774562239646912e-06, "step": 342390 }, { "epoch": 97.18989497587283, "grad_norm": 0.0003884288598783314, "learning_rate": 2.8475730911155268e-06, "loss": 1.6978010535240173e-06, "step": 342400 }, { "epoch": 97.1927334657962, "grad_norm": 0.00184056896250695, "learning_rate": 2.8447346011921656e-06, "loss": 3.923475742340088e-06, "step": 342410 }, { "epoch": 97.19557195571956, "grad_norm": 0.0004986829590052366, "learning_rate": 2.841896111268805e-06, "loss": 1.8339604139328003e-06, "step": 342420 }, { "epoch": 97.19841044564292, "grad_norm": 0.0004397773591335863, "learning_rate": 2.8390576213454446e-06, "loss": 2.1027401089668276e-06, "step": 342430 }, { "epoch": 97.20124893556628, "grad_norm": 0.0004597703227773309, "learning_rate": 2.836219131422084e-06, "loss": 1.465342938899994e-06, "step": 342440 }, { "epoch": 97.20408742548965, "grad_norm": 0.00017954848590306938, "learning_rate": 2.833380641498723e-06, "loss": 1.6782432794570923e-06, "step": 342450 }, { "epoch": 97.206925915413, "grad_norm": 0.0004980051890015602, "learning_rate": 2.830542151575362e-06, "loss": 2.3050233721733093e-06, "step": 342460 }, { "epoch": 97.20976440533636, "grad_norm": 0.00021913470118306577, "learning_rate": 2.827703661652001e-06, "loss": 2.089887857437134e-06, "step": 342470 }, { "epoch": 97.21260289525972, "grad_norm": 0.0006063045002520084, "learning_rate": 2.8248651717286404e-06, "loss": 1.5377998352050782e-06, "step": 342480 }, { "epoch": 97.21544138518308, "grad_norm": 0.0002922533021774143, "learning_rate": 2.8220266818052797e-06, "loss": 1.4755874872207642e-06, "step": 342490 }, { "epoch": 97.21827987510645, "grad_norm": 0.0007793045369908214, "learning_rate": 2.819188191881919e-06, "loss": 2.8308480978012084e-06, "step": 342500 }, { "epoch": 97.21827987510645, "eval_accuracy": 0.9891269790805621, "eval_loss": 0.04726338014006615, "eval_runtime": 60.8789, "eval_samples_per_second": 258.333, "eval_steps_per_second": 4.041, "step": 342500 }, { "epoch": 97.22111836502981, "grad_norm": 0.0002694716677069664, "learning_rate": 2.816349701958558e-06, "loss": 1.3630837202072143e-06, "step": 342510 }, { "epoch": 97.22395685495316, "grad_norm": 0.00026535941287875175, "learning_rate": 2.8135112120351974e-06, "loss": 1.3072043657302856e-06, "step": 342520 }, { "epoch": 97.22679534487652, "grad_norm": 0.00032877150806598365, "learning_rate": 2.8106727221118367e-06, "loss": 1.966208219528198e-06, "step": 342530 }, { "epoch": 97.22963383479988, "grad_norm": 0.00048748505651019514, "learning_rate": 2.807834232188476e-06, "loss": 3.1091272830963136e-06, "step": 342540 }, { "epoch": 97.23247232472325, "grad_norm": 0.000872998614795506, "learning_rate": 2.8049957422651152e-06, "loss": 1.6357749700546265e-06, "step": 342550 }, { "epoch": 97.23531081464661, "grad_norm": 0.0005086230230517685, "learning_rate": 2.8021572523417545e-06, "loss": 1.6486272215843201e-06, "step": 342560 }, { "epoch": 97.23814930456997, "grad_norm": 0.00016262281860690564, "learning_rate": 2.7993187624183933e-06, "loss": 1.4729797840118407e-06, "step": 342570 }, { "epoch": 97.24098779449334, "grad_norm": 0.0006300406530499458, "learning_rate": 2.7964802724950326e-06, "loss": 1.7879530787467956e-06, "step": 342580 }, { "epoch": 97.24382628441668, "grad_norm": 0.0014768368564546108, "learning_rate": 2.793641782571672e-06, "loss": 2.981536090373993e-06, "step": 342590 }, { "epoch": 97.24666477434005, "grad_norm": 0.0007207709131762385, "learning_rate": 2.7908032926483115e-06, "loss": 2.22623348236084e-06, "step": 342600 }, { "epoch": 97.24950326426341, "grad_norm": 0.001150024007074535, "learning_rate": 2.7879648027249508e-06, "loss": 1.7125159502029419e-06, "step": 342610 }, { "epoch": 97.25234175418677, "grad_norm": 0.0008333928417414427, "learning_rate": 2.7851263128015896e-06, "loss": 1.8229708075523377e-06, "step": 342620 }, { "epoch": 97.25518024411014, "grad_norm": 0.0005204319604672492, "learning_rate": 2.782287822878229e-06, "loss": 2.0392239093780517e-06, "step": 342630 }, { "epoch": 97.2580187340335, "grad_norm": 0.0007221628911793232, "learning_rate": 2.779449332954868e-06, "loss": 2.064742147922516e-06, "step": 342640 }, { "epoch": 97.26085722395686, "grad_norm": 0.00018637620087247342, "learning_rate": 2.7766108430315074e-06, "loss": 1.9393861293792725e-06, "step": 342650 }, { "epoch": 97.26369571388021, "grad_norm": 0.0007109738071449101, "learning_rate": 2.7737723531081466e-06, "loss": 1.9317492842674254e-06, "step": 342660 }, { "epoch": 97.26653420380357, "grad_norm": 0.0004007945826742798, "learning_rate": 2.7709338631847855e-06, "loss": 3.5740435123443603e-06, "step": 342670 }, { "epoch": 97.26937269372694, "grad_norm": 0.0002045521541731432, "learning_rate": 2.768095373261425e-06, "loss": 1.8818303942680358e-06, "step": 342680 }, { "epoch": 97.2722111836503, "grad_norm": 0.0007360826712101698, "learning_rate": 2.7652568833380644e-06, "loss": 1.889839768409729e-06, "step": 342690 }, { "epoch": 97.27504967357366, "grad_norm": 0.0014740912010893226, "learning_rate": 2.7624183934147037e-06, "loss": 1.8827617168426513e-06, "step": 342700 }, { "epoch": 97.27788816349702, "grad_norm": 0.0008180097793228924, "learning_rate": 2.759579903491343e-06, "loss": 2.9319897294044493e-06, "step": 342710 }, { "epoch": 97.28072665342037, "grad_norm": 0.00024789173039607704, "learning_rate": 2.756741413567982e-06, "loss": 1.4731660485267639e-06, "step": 342720 }, { "epoch": 97.28356514334374, "grad_norm": 0.00035118040977977216, "learning_rate": 2.753902923644621e-06, "loss": 1.5558674931526183e-06, "step": 342730 }, { "epoch": 97.2864036332671, "grad_norm": 0.0001722359738778323, "learning_rate": 2.7510644337212603e-06, "loss": 1.2341886758804322e-06, "step": 342740 }, { "epoch": 97.28924212319046, "grad_norm": 0.0007139039225876331, "learning_rate": 2.7482259437978995e-06, "loss": 1.463852822780609e-06, "step": 342750 }, { "epoch": 97.29208061311382, "grad_norm": 0.0022533906158059835, "learning_rate": 2.7453874538745388e-06, "loss": 2.78521329164505e-06, "step": 342760 }, { "epoch": 97.29491910303719, "grad_norm": 0.0003258085635025054, "learning_rate": 2.7425489639511785e-06, "loss": 1.3209879398345948e-06, "step": 342770 }, { "epoch": 97.29775759296055, "grad_norm": 0.00023455313930753618, "learning_rate": 2.7397104740278173e-06, "loss": 1.6652047634124756e-06, "step": 342780 }, { "epoch": 97.3005960828839, "grad_norm": 0.00024628639221191406, "learning_rate": 2.7368719841044565e-06, "loss": 1.2626871466636657e-06, "step": 342790 }, { "epoch": 97.30343457280726, "grad_norm": 0.000605162582360208, "learning_rate": 2.734033494181096e-06, "loss": 1.8730759620666504e-06, "step": 342800 }, { "epoch": 97.30627306273063, "grad_norm": 0.0023965213913470507, "learning_rate": 2.731195004257735e-06, "loss": 2.397596836090088e-06, "step": 342810 }, { "epoch": 97.30911155265399, "grad_norm": 0.0006944876513443887, "learning_rate": 2.7283565143343743e-06, "loss": 2.255477011203766e-06, "step": 342820 }, { "epoch": 97.31195004257735, "grad_norm": 0.0002453619963489473, "learning_rate": 2.725518024411013e-06, "loss": 1.8343329429626464e-06, "step": 342830 }, { "epoch": 97.31478853250071, "grad_norm": 0.0005992723745293915, "learning_rate": 2.7226795344876524e-06, "loss": 1.8151476979255676e-06, "step": 342840 }, { "epoch": 97.31762702242408, "grad_norm": 0.00036365020787343383, "learning_rate": 2.719841044564292e-06, "loss": 1.1781230568885803e-06, "step": 342850 }, { "epoch": 97.32046551234743, "grad_norm": 0.0007946587284095585, "learning_rate": 2.7170025546409313e-06, "loss": 1.7935410141944885e-06, "step": 342860 }, { "epoch": 97.32330400227079, "grad_norm": 0.00016318105917889625, "learning_rate": 2.7141640647175706e-06, "loss": 1.8455088138580321e-06, "step": 342870 }, { "epoch": 97.32614249219415, "grad_norm": 0.0008623762987554073, "learning_rate": 2.71132557479421e-06, "loss": 1.1626631021499635e-06, "step": 342880 }, { "epoch": 97.32898098211751, "grad_norm": 0.0010945482645183802, "learning_rate": 2.7084870848708487e-06, "loss": 2.243369817733765e-06, "step": 342890 }, { "epoch": 97.33181947204088, "grad_norm": 0.00047205694136209786, "learning_rate": 2.705648594947488e-06, "loss": 1.6048550605773926e-06, "step": 342900 }, { "epoch": 97.33465796196424, "grad_norm": 9.318361117038876e-05, "learning_rate": 2.702810105024127e-06, "loss": 1.4888122677803039e-06, "step": 342910 }, { "epoch": 97.3374964518876, "grad_norm": 0.00010809675586642697, "learning_rate": 2.6999716151007665e-06, "loss": 1.8231570720672607e-06, "step": 342920 }, { "epoch": 97.34033494181095, "grad_norm": 0.0005823906394653022, "learning_rate": 2.6971331251774057e-06, "loss": 1.4465302228927613e-06, "step": 342930 }, { "epoch": 97.34317343173431, "grad_norm": 0.0018934328109025955, "learning_rate": 2.694294635254045e-06, "loss": 1.8648803234100343e-06, "step": 342940 }, { "epoch": 97.34601192165768, "grad_norm": 0.0005666061770170927, "learning_rate": 2.6914561453306842e-06, "loss": 2.177245914936066e-06, "step": 342950 }, { "epoch": 97.34885041158104, "grad_norm": 0.00036435003858059645, "learning_rate": 2.6886176554073235e-06, "loss": 1.7104670405387878e-06, "step": 342960 }, { "epoch": 97.3516889015044, "grad_norm": 0.00011240467574680224, "learning_rate": 2.6857791654839628e-06, "loss": 1.3843178749084474e-06, "step": 342970 }, { "epoch": 97.35452739142777, "grad_norm": 0.0004397084703668952, "learning_rate": 2.682940675560602e-06, "loss": 1.5312805771827698e-06, "step": 342980 }, { "epoch": 97.35736588135111, "grad_norm": 0.0004393715353216976, "learning_rate": 2.680102185637241e-06, "loss": 1.4036893844604493e-06, "step": 342990 }, { "epoch": 97.36020437127448, "grad_norm": 0.00040121603524312377, "learning_rate": 2.67726369571388e-06, "loss": 2.327747642993927e-06, "step": 343000 }, { "epoch": 97.36020437127448, "eval_accuracy": 0.9892541489158772, "eval_loss": 0.04739994928240776, "eval_runtime": 61.2111, "eval_samples_per_second": 256.93, "eval_steps_per_second": 4.019, "step": 343000 }, { "epoch": 97.36304286119784, "grad_norm": 0.000649089808575809, "learning_rate": 2.6744252057905194e-06, "loss": 2.0986422896385194e-06, "step": 343010 }, { "epoch": 97.3658813511212, "grad_norm": 0.0005939473630860448, "learning_rate": 2.671586715867159e-06, "loss": 2.397596836090088e-06, "step": 343020 }, { "epoch": 97.36871984104457, "grad_norm": 0.000910041038878262, "learning_rate": 2.6687482259437983e-06, "loss": 2.3480504751205443e-06, "step": 343030 }, { "epoch": 97.37155833096793, "grad_norm": 0.0001625055738259107, "learning_rate": 2.6659097360204376e-06, "loss": 3.8387253880500795e-06, "step": 343040 }, { "epoch": 97.37439682089129, "grad_norm": 0.00040131210698746145, "learning_rate": 2.6630712460970764e-06, "loss": 2.0425766706466673e-06, "step": 343050 }, { "epoch": 97.37723531081464, "grad_norm": 0.000710083229932934, "learning_rate": 2.6602327561737156e-06, "loss": 2.093985676765442e-06, "step": 343060 }, { "epoch": 97.380073800738, "grad_norm": 0.00017343825311399996, "learning_rate": 2.657394266250355e-06, "loss": 1.753680408000946e-06, "step": 343070 }, { "epoch": 97.38291229066137, "grad_norm": 0.00011343250662321225, "learning_rate": 2.654555776326994e-06, "loss": 2.1027401089668276e-06, "step": 343080 }, { "epoch": 97.38575078058473, "grad_norm": 0.0001105196715798229, "learning_rate": 2.6517172864036334e-06, "loss": 2.072751522064209e-06, "step": 343090 }, { "epoch": 97.38858927050809, "grad_norm": 0.0003294650523457676, "learning_rate": 2.6488787964802727e-06, "loss": 1.3198703527450562e-06, "step": 343100 }, { "epoch": 97.39142776043145, "grad_norm": 0.00016496318858116865, "learning_rate": 2.646040306556912e-06, "loss": 1.358240842819214e-06, "step": 343110 }, { "epoch": 97.39426625035482, "grad_norm": 0.008035078644752502, "learning_rate": 2.643201816633551e-06, "loss": 2.345629036426544e-06, "step": 343120 }, { "epoch": 97.39710474027817, "grad_norm": 0.0005388552672229707, "learning_rate": 2.6403633267101904e-06, "loss": 1.5560537576675415e-06, "step": 343130 }, { "epoch": 97.39994323020153, "grad_norm": 7.360181189142168e-05, "learning_rate": 2.6375248367868297e-06, "loss": 2.080574631690979e-06, "step": 343140 }, { "epoch": 97.40278172012489, "grad_norm": 0.0006482512108050287, "learning_rate": 2.6346863468634685e-06, "loss": 1.0600313544273377e-06, "step": 343150 }, { "epoch": 97.40562021004826, "grad_norm": 0.00025266059674322605, "learning_rate": 2.631847856940108e-06, "loss": 2.1649524569511413e-06, "step": 343160 }, { "epoch": 97.40845869997162, "grad_norm": 0.000238728680415079, "learning_rate": 2.629009367016747e-06, "loss": 1.7745420336723328e-06, "step": 343170 }, { "epoch": 97.41129718989498, "grad_norm": 0.0008045545546337962, "learning_rate": 2.6261708770933863e-06, "loss": 3.5118311643600466e-06, "step": 343180 }, { "epoch": 97.41413567981834, "grad_norm": 0.0005127030308358371, "learning_rate": 2.623332387170026e-06, "loss": 1.6365200281143189e-06, "step": 343190 }, { "epoch": 97.41697416974169, "grad_norm": 0.0012983266497030854, "learning_rate": 2.6204938972466652e-06, "loss": 2.7278438210487366e-06, "step": 343200 }, { "epoch": 97.41981265966506, "grad_norm": 0.0004921384388580918, "learning_rate": 2.617655407323304e-06, "loss": 1.6311183571815492e-06, "step": 343210 }, { "epoch": 97.42265114958842, "grad_norm": 0.0004087265988346189, "learning_rate": 2.6148169173999433e-06, "loss": 2.2234395146369935e-06, "step": 343220 }, { "epoch": 97.42548963951178, "grad_norm": 0.0011689089005813003, "learning_rate": 2.6119784274765826e-06, "loss": 3.803707659244537e-06, "step": 343230 }, { "epoch": 97.42832812943514, "grad_norm": 0.00039120769361034036, "learning_rate": 2.609139937553222e-06, "loss": 1.917034387588501e-06, "step": 343240 }, { "epoch": 97.4311666193585, "grad_norm": 0.00021467849728651345, "learning_rate": 2.606301447629861e-06, "loss": 1.8164515495300294e-06, "step": 343250 }, { "epoch": 97.43400510928186, "grad_norm": 0.0006072692922316492, "learning_rate": 2.6034629577065e-06, "loss": 1.8563121557235718e-06, "step": 343260 }, { "epoch": 97.43684359920522, "grad_norm": 0.00017869721341412514, "learning_rate": 2.6006244677831396e-06, "loss": 2.15582549571991e-06, "step": 343270 }, { "epoch": 97.43968208912858, "grad_norm": 0.00019927018729504198, "learning_rate": 2.597785977859779e-06, "loss": 2.61254608631134e-06, "step": 343280 }, { "epoch": 97.44252057905194, "grad_norm": 0.0001473173324484378, "learning_rate": 2.594947487936418e-06, "loss": 2.5024637579917908e-06, "step": 343290 }, { "epoch": 97.4453590689753, "grad_norm": 0.002108482178300619, "learning_rate": 2.5921089980130574e-06, "loss": 2.2336840629577638e-06, "step": 343300 }, { "epoch": 97.44819755889867, "grad_norm": 0.0007104347459971905, "learning_rate": 2.5892705080896962e-06, "loss": 1.8851831555366517e-06, "step": 343310 }, { "epoch": 97.45103604882203, "grad_norm": 0.000899009348358959, "learning_rate": 2.5864320181663355e-06, "loss": 2.457760274410248e-06, "step": 343320 }, { "epoch": 97.45387453874538, "grad_norm": 0.0002458335948176682, "learning_rate": 2.5835935282429747e-06, "loss": 1.3856217265129089e-06, "step": 343330 }, { "epoch": 97.45671302866874, "grad_norm": 0.00076894840458408, "learning_rate": 2.580755038319614e-06, "loss": 2.1247193217277528e-06, "step": 343340 }, { "epoch": 97.45955151859211, "grad_norm": 0.0008747507818043232, "learning_rate": 2.5779165483962533e-06, "loss": 2.2603198885917664e-06, "step": 343350 }, { "epoch": 97.46239000851547, "grad_norm": 0.00013842117914464325, "learning_rate": 2.5750780584728925e-06, "loss": 2.169795334339142e-06, "step": 343360 }, { "epoch": 97.46522849843883, "grad_norm": 0.00018229623674415052, "learning_rate": 2.5722395685495318e-06, "loss": 1.7400830984115601e-06, "step": 343370 }, { "epoch": 97.4680669883622, "grad_norm": 0.00026484730187803507, "learning_rate": 2.569401078626171e-06, "loss": 1.4161691069602965e-06, "step": 343380 }, { "epoch": 97.47090547828556, "grad_norm": 0.0004024746594950557, "learning_rate": 2.5665625887028103e-06, "loss": 2.214685082435608e-06, "step": 343390 }, { "epoch": 97.47374396820891, "grad_norm": 0.00019498489564284682, "learning_rate": 2.5637240987794495e-06, "loss": 2.466142177581787e-06, "step": 343400 }, { "epoch": 97.47658245813227, "grad_norm": 0.0003688344731926918, "learning_rate": 2.560885608856089e-06, "loss": 1.4998018741607667e-06, "step": 343410 }, { "epoch": 97.47942094805563, "grad_norm": 0.0001269198110094294, "learning_rate": 2.5580471189327276e-06, "loss": 1.7920508980751038e-06, "step": 343420 }, { "epoch": 97.482259437979, "grad_norm": 0.0001691057696007192, "learning_rate": 2.555208629009367e-06, "loss": 1.8104910850524902e-06, "step": 343430 }, { "epoch": 97.48509792790236, "grad_norm": 0.0021605852525681257, "learning_rate": 2.5523701390860066e-06, "loss": 3.0172988772392275e-06, "step": 343440 }, { "epoch": 97.48793641782572, "grad_norm": 0.00023539437097497284, "learning_rate": 2.549531649162646e-06, "loss": 1.9390136003494263e-06, "step": 343450 }, { "epoch": 97.49077490774907, "grad_norm": 0.00016069618868641555, "learning_rate": 2.546693159239285e-06, "loss": 1.6808509826660156e-06, "step": 343460 }, { "epoch": 97.49361339767243, "grad_norm": 0.00030334212351590395, "learning_rate": 2.543854669315924e-06, "loss": 1.8000602722167968e-06, "step": 343470 }, { "epoch": 97.4964518875958, "grad_norm": 0.00031912591657601297, "learning_rate": 2.541016179392563e-06, "loss": 1.9723549485206606e-06, "step": 343480 }, { "epoch": 97.49929037751916, "grad_norm": 0.0015808213502168655, "learning_rate": 2.5381776894692024e-06, "loss": 2.8295442461967467e-06, "step": 343490 }, { "epoch": 97.50212886744252, "grad_norm": 0.0001324857003055513, "learning_rate": 2.5353391995458417e-06, "loss": 2.127140760421753e-06, "step": 343500 }, { "epoch": 97.50212886744252, "eval_accuracy": 0.9894449036688497, "eval_loss": 0.046933550387620926, "eval_runtime": 48.8913, "eval_samples_per_second": 321.673, "eval_steps_per_second": 5.032, "step": 343500 }, { "epoch": 97.50496735736589, "grad_norm": 0.0013415913563221693, "learning_rate": 2.532500709622481e-06, "loss": 1.617148518562317e-06, "step": 343510 }, { "epoch": 97.50780584728925, "grad_norm": 0.0006176866590976715, "learning_rate": 2.52966221969912e-06, "loss": 2.226606011390686e-06, "step": 343520 }, { "epoch": 97.5106443372126, "grad_norm": 0.00022053575958125293, "learning_rate": 2.5268237297757595e-06, "loss": 1.438148319721222e-06, "step": 343530 }, { "epoch": 97.51348282713596, "grad_norm": 0.0005560684949159622, "learning_rate": 2.5239852398523987e-06, "loss": 1.8745660781860351e-06, "step": 343540 }, { "epoch": 97.51632131705932, "grad_norm": 0.00062253896612674, "learning_rate": 2.521146749929038e-06, "loss": 1.3850629329681397e-06, "step": 343550 }, { "epoch": 97.51915980698269, "grad_norm": 0.00026681291637942195, "learning_rate": 2.5183082600056772e-06, "loss": 1.918338239192963e-06, "step": 343560 }, { "epoch": 97.52199829690605, "grad_norm": 0.0031745326705276966, "learning_rate": 2.5154697700823165e-06, "loss": 2.333521842956543e-06, "step": 343570 }, { "epoch": 97.52483678682941, "grad_norm": 0.00025191064924001694, "learning_rate": 2.5126312801589553e-06, "loss": 2.0490959286689757e-06, "step": 343580 }, { "epoch": 97.52767527675277, "grad_norm": 0.00035641147405840456, "learning_rate": 2.5097927902355946e-06, "loss": 1.7318874597549438e-06, "step": 343590 }, { "epoch": 97.53051376667612, "grad_norm": 0.001211358467116952, "learning_rate": 2.506954300312234e-06, "loss": 2.271495759487152e-06, "step": 343600 }, { "epoch": 97.53335225659949, "grad_norm": 0.00018980314780492336, "learning_rate": 2.504115810388873e-06, "loss": 2.22083181142807e-06, "step": 343610 }, { "epoch": 97.53619074652285, "grad_norm": 0.00018158272723667324, "learning_rate": 2.5012773204655128e-06, "loss": 2.7041882276535033e-06, "step": 343620 }, { "epoch": 97.53902923644621, "grad_norm": 0.0005511394119821489, "learning_rate": 2.4984388305421516e-06, "loss": 1.8410384654998779e-06, "step": 343630 }, { "epoch": 97.54186772636957, "grad_norm": 0.00021045807807240635, "learning_rate": 2.495600340618791e-06, "loss": 1.8719583749771118e-06, "step": 343640 }, { "epoch": 97.54470621629294, "grad_norm": 0.0012186473468318582, "learning_rate": 2.49276185069543e-06, "loss": 2.679973840713501e-06, "step": 343650 }, { "epoch": 97.5475447062163, "grad_norm": 0.0008200234151445329, "learning_rate": 2.4899233607720694e-06, "loss": 2.234801650047302e-06, "step": 343660 }, { "epoch": 97.55038319613965, "grad_norm": 0.00011472494952613488, "learning_rate": 2.4870848708487086e-06, "loss": 1.739896833896637e-06, "step": 343670 }, { "epoch": 97.55322168606301, "grad_norm": 0.00020229937217663974, "learning_rate": 2.484246380925348e-06, "loss": 1.2716278433799744e-06, "step": 343680 }, { "epoch": 97.55606017598637, "grad_norm": 0.00023513306223321706, "learning_rate": 2.481407891001987e-06, "loss": 1.8853694200515748e-06, "step": 343690 }, { "epoch": 97.55889866590974, "grad_norm": 0.00031329182093031704, "learning_rate": 2.4785694010786264e-06, "loss": 1.3032928109169007e-06, "step": 343700 }, { "epoch": 97.5617371558331, "grad_norm": 0.00011156356777064502, "learning_rate": 2.4757309111552657e-06, "loss": 1.7842277884483337e-06, "step": 343710 }, { "epoch": 97.56457564575646, "grad_norm": 0.0002411973982816562, "learning_rate": 2.472892421231905e-06, "loss": 3.6500394344329832e-06, "step": 343720 }, { "epoch": 97.56741413567981, "grad_norm": 0.00028749523335136473, "learning_rate": 2.470053931308544e-06, "loss": 1.2857839465141296e-06, "step": 343730 }, { "epoch": 97.57025262560317, "grad_norm": 0.0006246230914257467, "learning_rate": 2.467215441385183e-06, "loss": 1.5994533896446229e-06, "step": 343740 }, { "epoch": 97.57309111552654, "grad_norm": 0.0005814659525640309, "learning_rate": 2.4643769514618223e-06, "loss": 1.634657382965088e-06, "step": 343750 }, { "epoch": 97.5759296054499, "grad_norm": 0.00043384035234339535, "learning_rate": 2.4615384615384615e-06, "loss": 1.5331432223320008e-06, "step": 343760 }, { "epoch": 97.57876809537326, "grad_norm": 0.0006583948852494359, "learning_rate": 2.4586999716151008e-06, "loss": 1.911446452140808e-06, "step": 343770 }, { "epoch": 97.58160658529663, "grad_norm": 0.0007097209454514086, "learning_rate": 2.45586148169174e-06, "loss": 2.2644177079200745e-06, "step": 343780 }, { "epoch": 97.58444507521999, "grad_norm": 0.00023174358648248017, "learning_rate": 2.4530229917683793e-06, "loss": 3.1830742955207826e-06, "step": 343790 }, { "epoch": 97.58728356514334, "grad_norm": 0.00042524910531938076, "learning_rate": 2.4501845018450186e-06, "loss": 9.494461119174958e-06, "step": 343800 }, { "epoch": 97.5901220550667, "grad_norm": 0.0008685312932357192, "learning_rate": 2.447346011921658e-06, "loss": 3.018230199813843e-06, "step": 343810 }, { "epoch": 97.59296054499006, "grad_norm": 0.00037593013257719576, "learning_rate": 2.444507521998297e-06, "loss": 1.8358230590820313e-06, "step": 343820 }, { "epoch": 97.59579903491343, "grad_norm": 0.0006175330490805209, "learning_rate": 2.4416690320749363e-06, "loss": 4.252605140209198e-06, "step": 343830 }, { "epoch": 97.59863752483679, "grad_norm": 0.0006767949089407921, "learning_rate": 2.4388305421515756e-06, "loss": 3.4065917134284975e-06, "step": 343840 }, { "epoch": 97.60147601476015, "grad_norm": 0.0017975347582250834, "learning_rate": 2.4359920522282144e-06, "loss": 2.39592045545578e-06, "step": 343850 }, { "epoch": 97.60431450468351, "grad_norm": 0.0003255651972722262, "learning_rate": 2.433153562304854e-06, "loss": 2.0986422896385194e-06, "step": 343860 }, { "epoch": 97.60715299460686, "grad_norm": 0.001074932049959898, "learning_rate": 2.4303150723814934e-06, "loss": 1.86823308467865e-06, "step": 343870 }, { "epoch": 97.60999148453023, "grad_norm": 0.0004100134246982634, "learning_rate": 2.4274765824581326e-06, "loss": 2.107582986354828e-06, "step": 343880 }, { "epoch": 97.61282997445359, "grad_norm": 0.00027002583374269307, "learning_rate": 2.424638092534772e-06, "loss": 1.3260170817375183e-06, "step": 343890 }, { "epoch": 97.61566846437695, "grad_norm": 0.0005163073074072599, "learning_rate": 2.4217996026114107e-06, "loss": 2.5730580091476442e-06, "step": 343900 }, { "epoch": 97.61850695430032, "grad_norm": 0.00036282598739489913, "learning_rate": 2.41896111268805e-06, "loss": 1.9516795873641967e-06, "step": 343910 }, { "epoch": 97.62134544422368, "grad_norm": 0.00046831087092868984, "learning_rate": 2.4161226227646892e-06, "loss": 1.4089047908782959e-06, "step": 343920 }, { "epoch": 97.62418393414703, "grad_norm": 0.00044590607285499573, "learning_rate": 2.4132841328413285e-06, "loss": 1.8421560525894165e-06, "step": 343930 }, { "epoch": 97.62702242407039, "grad_norm": 0.0009436695254407823, "learning_rate": 2.4104456429179677e-06, "loss": 1.872330904006958e-06, "step": 343940 }, { "epoch": 97.62986091399375, "grad_norm": 0.0002966826141346246, "learning_rate": 2.407607152994607e-06, "loss": 2.229958772659302e-06, "step": 343950 }, { "epoch": 97.63269940391712, "grad_norm": 0.0005845209234394133, "learning_rate": 2.4047686630712462e-06, "loss": 1.895613968372345e-06, "step": 343960 }, { "epoch": 97.63553789384048, "grad_norm": 0.0005020223325118423, "learning_rate": 2.4019301731478855e-06, "loss": 1.9801780581474303e-06, "step": 343970 }, { "epoch": 97.63837638376384, "grad_norm": 0.00043829932110384107, "learning_rate": 2.3990916832245248e-06, "loss": 2.2999942302703857e-06, "step": 343980 }, { "epoch": 97.6412148736872, "grad_norm": 0.0004670356574933976, "learning_rate": 2.396253193301164e-06, "loss": 2.2722408175468447e-06, "step": 343990 }, { "epoch": 97.64405336361055, "grad_norm": 0.0005994825041852891, "learning_rate": 2.3934147033778033e-06, "loss": 2.293102443218231e-06, "step": 344000 }, { "epoch": 97.64405336361055, "eval_accuracy": 0.9890633941629046, "eval_loss": 0.04704798385500908, "eval_runtime": 58.6792, "eval_samples_per_second": 268.017, "eval_steps_per_second": 4.192, "step": 344000 }, { "epoch": 97.64689185353392, "grad_norm": 0.00034350765054114163, "learning_rate": 2.390576213454442e-06, "loss": 2.6557594537734985e-06, "step": 344010 }, { "epoch": 97.64973034345728, "grad_norm": 0.0005910228937864304, "learning_rate": 2.3877377235310814e-06, "loss": 1.5461817383766175e-06, "step": 344020 }, { "epoch": 97.65256883338064, "grad_norm": 0.0009875959949567914, "learning_rate": 2.3848992336077206e-06, "loss": 2.137571573257446e-06, "step": 344030 }, { "epoch": 97.655407323304, "grad_norm": 0.0006399855483323336, "learning_rate": 2.3820607436843603e-06, "loss": 2.424605190753937e-06, "step": 344040 }, { "epoch": 97.65824581322737, "grad_norm": 0.0002258748863823712, "learning_rate": 2.3792222537609996e-06, "loss": 2.632662653923035e-06, "step": 344050 }, { "epoch": 97.66108430315073, "grad_norm": 0.0004827012890018523, "learning_rate": 2.3763837638376384e-06, "loss": 2.281181514263153e-06, "step": 344060 }, { "epoch": 97.66392279307408, "grad_norm": 0.0004691421054303646, "learning_rate": 2.3735452739142776e-06, "loss": 2.0012259483337404e-06, "step": 344070 }, { "epoch": 97.66676128299744, "grad_norm": 0.001960443565621972, "learning_rate": 2.370706783990917e-06, "loss": 2.250261604785919e-06, "step": 344080 }, { "epoch": 97.6695997729208, "grad_norm": 0.0005946619203314185, "learning_rate": 2.367868294067556e-06, "loss": 1.453794538974762e-06, "step": 344090 }, { "epoch": 97.67243826284417, "grad_norm": 0.0005280912737362087, "learning_rate": 2.3650298041441954e-06, "loss": 2.6458874344825745e-06, "step": 344100 }, { "epoch": 97.67527675276753, "grad_norm": 0.0001375215797452256, "learning_rate": 2.3621913142208347e-06, "loss": 1.4118850231170654e-06, "step": 344110 }, { "epoch": 97.6781152426909, "grad_norm": 0.0005215554847382009, "learning_rate": 2.359352824297474e-06, "loss": 2.277083694934845e-06, "step": 344120 }, { "epoch": 97.68095373261426, "grad_norm": 0.0019815326668322086, "learning_rate": 2.356514334374113e-06, "loss": 2.4052336812019347e-06, "step": 344130 }, { "epoch": 97.6837922225376, "grad_norm": 0.00043076553265564144, "learning_rate": 2.3536758444507524e-06, "loss": 2.7157366275787354e-06, "step": 344140 }, { "epoch": 97.68663071246097, "grad_norm": 3.84511768061202e-05, "learning_rate": 2.3508373545273917e-06, "loss": 1.514330506324768e-06, "step": 344150 }, { "epoch": 97.68946920238433, "grad_norm": 0.0008436786010861397, "learning_rate": 2.3479988646040305e-06, "loss": 1.3634562492370605e-06, "step": 344160 }, { "epoch": 97.6923076923077, "grad_norm": 0.000690360669977963, "learning_rate": 2.34516037468067e-06, "loss": 1.9241124391555786e-06, "step": 344170 }, { "epoch": 97.69514618223106, "grad_norm": 0.000593969423789531, "learning_rate": 2.342321884757309e-06, "loss": 1.8849968910217285e-06, "step": 344180 }, { "epoch": 97.69798467215442, "grad_norm": 0.004138396121561527, "learning_rate": 2.3394833948339483e-06, "loss": 3.289245069026947e-06, "step": 344190 }, { "epoch": 97.70082316207777, "grad_norm": 0.00019697468087542802, "learning_rate": 2.3366449049105876e-06, "loss": 2.161972224712372e-06, "step": 344200 }, { "epoch": 97.70366165200113, "grad_norm": 0.00013476968160830438, "learning_rate": 2.3338064149872272e-06, "loss": 1.8827617168426513e-06, "step": 344210 }, { "epoch": 97.7065001419245, "grad_norm": 0.0017473411280661821, "learning_rate": 2.330967925063866e-06, "loss": 2.1433457732200623e-06, "step": 344220 }, { "epoch": 97.70933863184786, "grad_norm": 0.0006643691449426115, "learning_rate": 2.3281294351405053e-06, "loss": 1.7328187823295592e-06, "step": 344230 }, { "epoch": 97.71217712177122, "grad_norm": 0.0003368484030943364, "learning_rate": 2.3252909452171446e-06, "loss": 1.9922852516174315e-06, "step": 344240 }, { "epoch": 97.71501561169458, "grad_norm": 0.00048048872849904, "learning_rate": 2.322452455293784e-06, "loss": 2.448633313179016e-06, "step": 344250 }, { "epoch": 97.71785410161795, "grad_norm": 0.00013909069821238518, "learning_rate": 2.319613965370423e-06, "loss": 1.4353543519973755e-06, "step": 344260 }, { "epoch": 97.7206925915413, "grad_norm": 8.335659367730841e-05, "learning_rate": 2.316775475447062e-06, "loss": 1.7588958144187926e-06, "step": 344270 }, { "epoch": 97.72353108146466, "grad_norm": 0.00022797691053710878, "learning_rate": 2.3139369855237016e-06, "loss": 1.5011057257652282e-06, "step": 344280 }, { "epoch": 97.72636957138802, "grad_norm": 0.0017564290901646018, "learning_rate": 2.311098495600341e-06, "loss": 1.6128644347190857e-06, "step": 344290 }, { "epoch": 97.72920806131138, "grad_norm": 0.0019045931985601783, "learning_rate": 2.30826000567698e-06, "loss": 1.920759677886963e-06, "step": 344300 }, { "epoch": 97.73204655123475, "grad_norm": 0.00032993461354635656, "learning_rate": 2.3054215157536194e-06, "loss": 1.743808388710022e-06, "step": 344310 }, { "epoch": 97.73488504115811, "grad_norm": 0.0006762120174244046, "learning_rate": 2.3025830258302582e-06, "loss": 1.3627111911773682e-06, "step": 344320 }, { "epoch": 97.73772353108147, "grad_norm": 0.00011709918908309191, "learning_rate": 2.2997445359068975e-06, "loss": 1.8704682588577271e-06, "step": 344330 }, { "epoch": 97.74056202100482, "grad_norm": 0.0012427528854459524, "learning_rate": 2.2969060459835367e-06, "loss": 2.432987093925476e-06, "step": 344340 }, { "epoch": 97.74340051092818, "grad_norm": 0.002067486522719264, "learning_rate": 2.294067556060176e-06, "loss": 2.4829059839248657e-06, "step": 344350 }, { "epoch": 97.74623900085155, "grad_norm": 0.0013007835950702429, "learning_rate": 2.2912290661368153e-06, "loss": 1.9650906324386597e-06, "step": 344360 }, { "epoch": 97.74907749077491, "grad_norm": 0.00033870668266899884, "learning_rate": 2.2883905762134545e-06, "loss": 1.5869736671447754e-06, "step": 344370 }, { "epoch": 97.75191598069827, "grad_norm": 0.0007747673662379384, "learning_rate": 2.2855520862900938e-06, "loss": 1.7980113625526427e-06, "step": 344380 }, { "epoch": 97.75475447062163, "grad_norm": 0.0008509822655469179, "learning_rate": 2.282713596366733e-06, "loss": 2.9928982257843017e-06, "step": 344390 }, { "epoch": 97.757592960545, "grad_norm": 0.0006253528990782797, "learning_rate": 2.2798751064433723e-06, "loss": 1.3569369912147522e-06, "step": 344400 }, { "epoch": 97.76043145046835, "grad_norm": 9.960692113963887e-05, "learning_rate": 2.2770366165200115e-06, "loss": 2.389587461948395e-06, "step": 344410 }, { "epoch": 97.76326994039171, "grad_norm": 0.0016101858345791698, "learning_rate": 2.274198126596651e-06, "loss": 1.9719824194908143e-06, "step": 344420 }, { "epoch": 97.76610843031507, "grad_norm": 0.0008246049401350319, "learning_rate": 2.2713596366732896e-06, "loss": 1.909397542476654e-06, "step": 344430 }, { "epoch": 97.76894692023843, "grad_norm": 0.0007823153282515705, "learning_rate": 2.268521146749929e-06, "loss": 2.043507993221283e-06, "step": 344440 }, { "epoch": 97.7717854101618, "grad_norm": 0.0003130004624836147, "learning_rate": 2.265682656826568e-06, "loss": 1.835450530052185e-06, "step": 344450 }, { "epoch": 97.77462390008516, "grad_norm": 0.000408362306188792, "learning_rate": 2.262844166903208e-06, "loss": 1.8635764718055725e-06, "step": 344460 }, { "epoch": 97.77746239000851, "grad_norm": 0.00013823219342157245, "learning_rate": 2.260005676979847e-06, "loss": 1.1745840311050415e-06, "step": 344470 }, { "epoch": 97.78030087993187, "grad_norm": 0.002310412470251322, "learning_rate": 2.257167187056486e-06, "loss": 3.0856579542160032e-06, "step": 344480 }, { "epoch": 97.78313936985523, "grad_norm": 0.0005182061577215791, "learning_rate": 2.254328697133125e-06, "loss": 2.040155231952667e-06, "step": 344490 }, { "epoch": 97.7859778597786, "grad_norm": 0.00021101879246998578, "learning_rate": 2.2514902072097644e-06, "loss": 2.004764974117279e-06, "step": 344500 }, { "epoch": 97.7859778597786, "eval_accuracy": 0.9894449036688497, "eval_loss": 0.04711580276489258, "eval_runtime": 62.8424, "eval_samples_per_second": 250.261, "eval_steps_per_second": 3.915, "step": 344500 }, { "epoch": 97.78881634970196, "grad_norm": 0.0002770522842183709, "learning_rate": 2.2486517172864037e-06, "loss": 1.438148319721222e-06, "step": 344510 }, { "epoch": 97.79165483962532, "grad_norm": 0.0002795697364490479, "learning_rate": 2.245813227363043e-06, "loss": 1.9896775484085084e-06, "step": 344520 }, { "epoch": 97.79449332954869, "grad_norm": 0.0004142745747230947, "learning_rate": 2.242974737439682e-06, "loss": 1.8961727619171143e-06, "step": 344530 }, { "epoch": 97.79733181947203, "grad_norm": 0.0002949197369161993, "learning_rate": 2.2401362475163215e-06, "loss": 1.5174970030784608e-06, "step": 344540 }, { "epoch": 97.8001703093954, "grad_norm": 0.0009065337362699211, "learning_rate": 2.2372977575929607e-06, "loss": 2.5525689125061037e-06, "step": 344550 }, { "epoch": 97.80300879931876, "grad_norm": 0.0023902023676782846, "learning_rate": 2.2344592676696e-06, "loss": 2.726353704929352e-06, "step": 344560 }, { "epoch": 97.80584728924212, "grad_norm": 0.0002487730816937983, "learning_rate": 2.2316207777462392e-06, "loss": 1.3539567589759826e-06, "step": 344570 }, { "epoch": 97.80868577916549, "grad_norm": 0.0006366989691741765, "learning_rate": 2.2287822878228785e-06, "loss": 1.3710930943489075e-06, "step": 344580 }, { "epoch": 97.81152426908885, "grad_norm": 0.0019574144389480352, "learning_rate": 2.2259437978995173e-06, "loss": 1.8466264009475708e-06, "step": 344590 }, { "epoch": 97.81436275901221, "grad_norm": 0.00017346118693239987, "learning_rate": 2.2231053079761566e-06, "loss": 2.148933708667755e-06, "step": 344600 }, { "epoch": 97.81720124893556, "grad_norm": 0.002749943407252431, "learning_rate": 2.220266818052796e-06, "loss": 3.171898424625397e-06, "step": 344610 }, { "epoch": 97.82003973885892, "grad_norm": 0.00046537519665434957, "learning_rate": 2.217428328129435e-06, "loss": 1.5439465641975402e-06, "step": 344620 }, { "epoch": 97.82287822878229, "grad_norm": 0.000612776551861316, "learning_rate": 2.2145898382060748e-06, "loss": 1.570768654346466e-06, "step": 344630 }, { "epoch": 97.82571671870565, "grad_norm": 0.00041255532414652407, "learning_rate": 2.2117513482827136e-06, "loss": 2.305768430233002e-06, "step": 344640 }, { "epoch": 97.82855520862901, "grad_norm": 0.0004043650405947119, "learning_rate": 2.208912858359353e-06, "loss": 1.5718862414360046e-06, "step": 344650 }, { "epoch": 97.83139369855238, "grad_norm": 0.00024389478494413197, "learning_rate": 2.206074368435992e-06, "loss": 1.8991529941558838e-06, "step": 344660 }, { "epoch": 97.83423218847572, "grad_norm": 0.0002630633825901896, "learning_rate": 2.2032358785126314e-06, "loss": 1.9254162907600404e-06, "step": 344670 }, { "epoch": 97.83707067839909, "grad_norm": 0.0002163762110285461, "learning_rate": 2.2003973885892706e-06, "loss": 1.9531697034835814e-06, "step": 344680 }, { "epoch": 97.83990916832245, "grad_norm": 0.0005363699165172875, "learning_rate": 2.19755889866591e-06, "loss": 2.164021134376526e-06, "step": 344690 }, { "epoch": 97.84274765824581, "grad_norm": 0.0007250350317917764, "learning_rate": 2.194720408742549e-06, "loss": 1.482851803302765e-06, "step": 344700 }, { "epoch": 97.84558614816918, "grad_norm": 0.0001728822971927002, "learning_rate": 2.1918819188191884e-06, "loss": 1.6011297702789307e-06, "step": 344710 }, { "epoch": 97.84842463809254, "grad_norm": 0.0009655266185291111, "learning_rate": 2.1890434288958277e-06, "loss": 1.2604519724845887e-06, "step": 344720 }, { "epoch": 97.8512631280159, "grad_norm": 0.001667048200033605, "learning_rate": 2.186204938972467e-06, "loss": 1.9811093807220457e-06, "step": 344730 }, { "epoch": 97.85410161793925, "grad_norm": 0.0004745266924146563, "learning_rate": 2.183366449049106e-06, "loss": 3.441423177719116e-06, "step": 344740 }, { "epoch": 97.85694010786261, "grad_norm": 0.0004173857450950891, "learning_rate": 2.180527959125745e-06, "loss": 1.5517696738243103e-06, "step": 344750 }, { "epoch": 97.85977859778598, "grad_norm": 0.0010748931672424078, "learning_rate": 2.1776894692023843e-06, "loss": 1.485086977481842e-06, "step": 344760 }, { "epoch": 97.86261708770934, "grad_norm": 0.00025780239957384765, "learning_rate": 2.1748509792790235e-06, "loss": 1.356378197669983e-06, "step": 344770 }, { "epoch": 97.8654555776327, "grad_norm": 0.0009664091048762202, "learning_rate": 2.172012489355663e-06, "loss": 1.704692840576172e-06, "step": 344780 }, { "epoch": 97.86829406755606, "grad_norm": 0.00027553908876143396, "learning_rate": 2.169173999432302e-06, "loss": 1.872517168521881e-06, "step": 344790 }, { "epoch": 97.87113255747943, "grad_norm": 0.0005043628043495119, "learning_rate": 2.1663355095089413e-06, "loss": 1.5787780284881592e-06, "step": 344800 }, { "epoch": 97.87397104740278, "grad_norm": 0.00013974167814012617, "learning_rate": 2.1634970195855806e-06, "loss": 2.0030885934829714e-06, "step": 344810 }, { "epoch": 97.87680953732614, "grad_norm": 9.29337038542144e-05, "learning_rate": 2.16065852966222e-06, "loss": 1.8795952200889588e-06, "step": 344820 }, { "epoch": 97.8796480272495, "grad_norm": 0.00012628942204173654, "learning_rate": 2.157820039738859e-06, "loss": 2.514384686946869e-06, "step": 344830 }, { "epoch": 97.88248651717286, "grad_norm": 0.004875031765550375, "learning_rate": 2.1549815498154983e-06, "loss": 2.0463019609451292e-06, "step": 344840 }, { "epoch": 97.88532500709623, "grad_norm": 0.0005873528425581753, "learning_rate": 2.1521430598921376e-06, "loss": 2.236664295196533e-06, "step": 344850 }, { "epoch": 97.88816349701959, "grad_norm": 0.0006953069241717458, "learning_rate": 2.1493045699687764e-06, "loss": 1.767277717590332e-06, "step": 344860 }, { "epoch": 97.89100198694295, "grad_norm": 0.0010608512675389647, "learning_rate": 2.1464660800454157e-06, "loss": 2.0802021026611327e-06, "step": 344870 }, { "epoch": 97.8938404768663, "grad_norm": 0.000312367599690333, "learning_rate": 2.1436275901220554e-06, "loss": 3.890693187713623e-06, "step": 344880 }, { "epoch": 97.89667896678966, "grad_norm": 0.01341259852051735, "learning_rate": 2.1407891001986946e-06, "loss": 4.361569881439209e-06, "step": 344890 }, { "epoch": 97.89951745671303, "grad_norm": 0.00030153748230077326, "learning_rate": 2.137950610275334e-06, "loss": 7.910281419754029e-06, "step": 344900 }, { "epoch": 97.90235594663639, "grad_norm": 0.0006150449626147747, "learning_rate": 2.1351121203519727e-06, "loss": 2.5212764739990233e-06, "step": 344910 }, { "epoch": 97.90519443655975, "grad_norm": 0.0008695655269548297, "learning_rate": 2.132273630428612e-06, "loss": 2.846866846084595e-06, "step": 344920 }, { "epoch": 97.90803292648312, "grad_norm": 0.0026992899365723133, "learning_rate": 2.1294351405052512e-06, "loss": 3.635510802268982e-06, "step": 344930 }, { "epoch": 97.91087141640647, "grad_norm": 0.0007832828559912741, "learning_rate": 2.1265966505818905e-06, "loss": 1.4534220099449157e-06, "step": 344940 }, { "epoch": 97.91370990632983, "grad_norm": 0.0013022584607824683, "learning_rate": 2.1237581606585297e-06, "loss": 2.1396204829216004e-06, "step": 344950 }, { "epoch": 97.91654839625319, "grad_norm": 0.00022990905563347042, "learning_rate": 2.120919670735169e-06, "loss": 1.3340264558792115e-06, "step": 344960 }, { "epoch": 97.91938688617655, "grad_norm": 0.0009100668830797076, "learning_rate": 2.1180811808118083e-06, "loss": 1.9276514649391176e-06, "step": 344970 }, { "epoch": 97.92222537609992, "grad_norm": 0.00011413822358008474, "learning_rate": 2.1152426908884475e-06, "loss": 2.5589019060134887e-06, "step": 344980 }, { "epoch": 97.92506386602328, "grad_norm": 0.0008615250117145479, "learning_rate": 2.1124042009650868e-06, "loss": 1.897662878036499e-06, "step": 344990 }, { "epoch": 97.92790235594664, "grad_norm": 0.00017609336646273732, "learning_rate": 2.109565711041726e-06, "loss": 2.099573612213135e-06, "step": 345000 }, { "epoch": 97.92790235594664, "eval_accuracy": 0.9889362243275895, "eval_loss": 0.04788576066493988, "eval_runtime": 88.7917, "eval_samples_per_second": 177.122, "eval_steps_per_second": 2.771, "step": 345000 }, { "epoch": 97.93074084586999, "grad_norm": 0.000138171948492527, "learning_rate": 2.1067272211183653e-06, "loss": 3.139488399028778e-06, "step": 345010 }, { "epoch": 97.93357933579335, "grad_norm": 0.00019464264914859086, "learning_rate": 2.103888731195004e-06, "loss": 1.0907649993896485e-06, "step": 345020 }, { "epoch": 97.93641782571672, "grad_norm": 0.00011738362081814557, "learning_rate": 2.1010502412716434e-06, "loss": 2.4667009711265563e-06, "step": 345030 }, { "epoch": 97.93925631564008, "grad_norm": 0.0008570161298848689, "learning_rate": 2.0982117513482826e-06, "loss": 1.7577782273292542e-06, "step": 345040 }, { "epoch": 97.94209480556344, "grad_norm": 0.00019776901172008365, "learning_rate": 2.0953732614249223e-06, "loss": 2.461299300193787e-06, "step": 345050 }, { "epoch": 97.9449332954868, "grad_norm": 0.0002802093222271651, "learning_rate": 2.0925347715015616e-06, "loss": 1.3664364814758301e-06, "step": 345060 }, { "epoch": 97.94777178541017, "grad_norm": 0.00011899589298991486, "learning_rate": 2.0896962815782004e-06, "loss": 2.214685082435608e-06, "step": 345070 }, { "epoch": 97.95061027533352, "grad_norm": 0.0011783834779635072, "learning_rate": 2.0868577916548397e-06, "loss": 2.1658837795257568e-06, "step": 345080 }, { "epoch": 97.95344876525688, "grad_norm": 0.0002755829773377627, "learning_rate": 2.084019301731479e-06, "loss": 2.050027251243591e-06, "step": 345090 }, { "epoch": 97.95628725518024, "grad_norm": 0.0004931585863232613, "learning_rate": 2.081180811808118e-06, "loss": 2.7375295758247376e-06, "step": 345100 }, { "epoch": 97.9591257451036, "grad_norm": 0.0004751071101054549, "learning_rate": 2.0783423218847574e-06, "loss": 2.0738691091537477e-06, "step": 345110 }, { "epoch": 97.96196423502697, "grad_norm": 0.00045264369691722095, "learning_rate": 2.0755038319613963e-06, "loss": 2.0189210772514345e-06, "step": 345120 }, { "epoch": 97.96480272495033, "grad_norm": 0.0008702457998879254, "learning_rate": 2.072665342038036e-06, "loss": 2.329610288143158e-06, "step": 345130 }, { "epoch": 97.96764121487368, "grad_norm": 8.605870243627578e-05, "learning_rate": 2.069826852114675e-06, "loss": 1.8175691366195678e-06, "step": 345140 }, { "epoch": 97.97047970479704, "grad_norm": 0.000726066529750824, "learning_rate": 2.0669883621913145e-06, "loss": 2.310611307621002e-06, "step": 345150 }, { "epoch": 97.9733181947204, "grad_norm": 0.0003028440405614674, "learning_rate": 2.0641498722679537e-06, "loss": 2.109259366989136e-06, "step": 345160 }, { "epoch": 97.97615668464377, "grad_norm": 0.0008050819742493331, "learning_rate": 2.061311382344593e-06, "loss": 2.5050714612007143e-06, "step": 345170 }, { "epoch": 97.97899517456713, "grad_norm": 0.00015940697630867362, "learning_rate": 2.058472892421232e-06, "loss": 1.2775883078575134e-06, "step": 345180 }, { "epoch": 97.9818336644905, "grad_norm": 0.0003406039031688124, "learning_rate": 2.055634402497871e-06, "loss": 2.0345672965049743e-06, "step": 345190 }, { "epoch": 97.98467215441386, "grad_norm": 0.0011520603438839316, "learning_rate": 2.0527959125745103e-06, "loss": 4.073977470397949e-06, "step": 345200 }, { "epoch": 97.9875106443372, "grad_norm": 0.00016803426842670888, "learning_rate": 2.0499574226511496e-06, "loss": 1.1110678315162659e-06, "step": 345210 }, { "epoch": 97.99034913426057, "grad_norm": 0.0010277990950271487, "learning_rate": 2.0471189327277893e-06, "loss": 2.4218112230300904e-06, "step": 345220 }, { "epoch": 97.99318762418393, "grad_norm": 0.00019661009719129652, "learning_rate": 2.044280442804428e-06, "loss": 2.017989754676819e-06, "step": 345230 }, { "epoch": 97.9960261141073, "grad_norm": 0.0010288767516613007, "learning_rate": 2.0414419528810673e-06, "loss": 2.0503997802734374e-06, "step": 345240 }, { "epoch": 97.99886460403066, "grad_norm": 0.0004613446071743965, "learning_rate": 2.0386034629577066e-06, "loss": 2.5661662220954896e-06, "step": 345250 }, { "epoch": 98.00170309395402, "grad_norm": 0.00039319353527389467, "learning_rate": 2.036048822026682e-06, "loss": 1.689436066953931e-06, "step": 345260 }, { "epoch": 98.00454158387738, "grad_norm": 0.0006962120532989502, "learning_rate": 2.033210332103321e-06, "loss": 2.6926398277282713e-06, "step": 345270 }, { "epoch": 98.00738007380073, "grad_norm": 0.00019768466881942004, "learning_rate": 2.0303718421799604e-06, "loss": 1.5696510672569276e-06, "step": 345280 }, { "epoch": 98.0102185637241, "grad_norm": 0.00032248266506940126, "learning_rate": 2.0275333522565992e-06, "loss": 1.642666757106781e-06, "step": 345290 }, { "epoch": 98.01305705364746, "grad_norm": 0.0009994677966460586, "learning_rate": 2.024694862333239e-06, "loss": 1.7696991562843323e-06, "step": 345300 }, { "epoch": 98.01589554357082, "grad_norm": 0.00043997002649120986, "learning_rate": 2.021856372409878e-06, "loss": 1.2269243597984315e-06, "step": 345310 }, { "epoch": 98.01873403349418, "grad_norm": 0.0003870174987241626, "learning_rate": 2.0190178824865174e-06, "loss": 2.1953135728836058e-06, "step": 345320 }, { "epoch": 98.02157252341755, "grad_norm": 0.00036651419941335917, "learning_rate": 2.0161793925631567e-06, "loss": 1.3822689652442933e-06, "step": 345330 }, { "epoch": 98.02441101334091, "grad_norm": 0.0023569888435304165, "learning_rate": 2.0133409026397955e-06, "loss": 1.8876045942306518e-06, "step": 345340 }, { "epoch": 98.02724950326426, "grad_norm": 0.00012459683057386428, "learning_rate": 2.0105024127164348e-06, "loss": 2.515129745006561e-06, "step": 345350 }, { "epoch": 98.03008799318762, "grad_norm": 0.00018933865067083389, "learning_rate": 2.007663922793074e-06, "loss": 1.6259029507637024e-06, "step": 345360 }, { "epoch": 98.03292648311098, "grad_norm": 0.0007852405542507768, "learning_rate": 2.0048254328697133e-06, "loss": 2.104416489601135e-06, "step": 345370 }, { "epoch": 98.03576497303435, "grad_norm": 9.557865996612236e-05, "learning_rate": 2.0019869429463526e-06, "loss": 1.5225261449813842e-06, "step": 345380 }, { "epoch": 98.03860346295771, "grad_norm": 0.0005735986051149666, "learning_rate": 1.9991484530229922e-06, "loss": 2.18413770198822e-06, "step": 345390 }, { "epoch": 98.04144195288107, "grad_norm": 0.00024907689657993615, "learning_rate": 1.996309963099631e-06, "loss": 1.2878328561782836e-06, "step": 345400 }, { "epoch": 98.04428044280442, "grad_norm": 0.0006857449770905077, "learning_rate": 1.9934714731762703e-06, "loss": 1.3573095202445985e-06, "step": 345410 }, { "epoch": 98.04711893272778, "grad_norm": 0.0007825525244697928, "learning_rate": 1.9906329832529096e-06, "loss": 1.9572675228118896e-06, "step": 345420 }, { "epoch": 98.04995742265115, "grad_norm": 0.0006141485646367073, "learning_rate": 1.987794493329549e-06, "loss": 2.452731132507324e-06, "step": 345430 }, { "epoch": 98.05279591257451, "grad_norm": 0.000885035318788141, "learning_rate": 1.984956003406188e-06, "loss": 1.7371028661727906e-06, "step": 345440 }, { "epoch": 98.05563440249787, "grad_norm": 0.00030373214394785464, "learning_rate": 1.982117513482827e-06, "loss": 2.800486981868744e-06, "step": 345450 }, { "epoch": 98.05847289242124, "grad_norm": 0.000608722388278693, "learning_rate": 1.979279023559466e-06, "loss": 2.660602331161499e-06, "step": 345460 }, { "epoch": 98.0613113823446, "grad_norm": 0.002601111540570855, "learning_rate": 1.976440533636106e-06, "loss": 3.016740083694458e-06, "step": 345470 }, { "epoch": 98.06414987226795, "grad_norm": 0.0012640653876587749, "learning_rate": 1.973602043712745e-06, "loss": 1.913495361804962e-06, "step": 345480 }, { "epoch": 98.06698836219131, "grad_norm": 0.0003054602420888841, "learning_rate": 1.9707635537893844e-06, "loss": 1.4057382941246034e-06, "step": 345490 }, { "epoch": 98.06982685211467, "grad_norm": 0.00010207371087744832, "learning_rate": 1.9679250638660232e-06, "loss": 2.3104250431060793e-06, "step": 345500 }, { "epoch": 98.06982685211467, "eval_accuracy": 0.9890633941629046, "eval_loss": 0.046800363808870316, "eval_runtime": 74.1153, "eval_samples_per_second": 212.196, "eval_steps_per_second": 3.319, "step": 345500 }, { "epoch": 98.07266534203804, "grad_norm": 0.0003940682508982718, "learning_rate": 1.9650865739426625e-06, "loss": 1.9898638129234313e-06, "step": 345510 }, { "epoch": 98.0755038319614, "grad_norm": 0.0005861441604793072, "learning_rate": 1.9622480840193017e-06, "loss": 1.5849247574806213e-06, "step": 345520 }, { "epoch": 98.07834232188476, "grad_norm": 0.0011081146076321602, "learning_rate": 1.959409594095941e-06, "loss": 2.014264464378357e-06, "step": 345530 }, { "epoch": 98.08118081180812, "grad_norm": 0.000675270683132112, "learning_rate": 1.9565711041725802e-06, "loss": 1.411139965057373e-06, "step": 345540 }, { "epoch": 98.08401930173147, "grad_norm": 0.00019783174502663314, "learning_rate": 1.9537326142492195e-06, "loss": 4.545599222183228e-06, "step": 345550 }, { "epoch": 98.08685779165484, "grad_norm": 0.0008667370420880616, "learning_rate": 1.9508941243258588e-06, "loss": 1.7918646335601807e-06, "step": 345560 }, { "epoch": 98.0896962815782, "grad_norm": 0.0007192216580733657, "learning_rate": 1.948055634402498e-06, "loss": 1.6909092664718627e-06, "step": 345570 }, { "epoch": 98.09253477150156, "grad_norm": 0.00048514813533984125, "learning_rate": 1.9452171444791373e-06, "loss": 2.0263716578483583e-06, "step": 345580 }, { "epoch": 98.09537326142492, "grad_norm": 0.00043862577877007425, "learning_rate": 1.9423786545557765e-06, "loss": 1.576170325279236e-06, "step": 345590 }, { "epoch": 98.09821175134829, "grad_norm": 0.00034745034645311534, "learning_rate": 1.939540164632416e-06, "loss": 2.1088868379592896e-06, "step": 345600 }, { "epoch": 98.10105024127165, "grad_norm": 0.0008509599138051271, "learning_rate": 1.9367016747090546e-06, "loss": 1.4692544937133788e-06, "step": 345610 }, { "epoch": 98.103888731195, "grad_norm": 0.0008448082371614873, "learning_rate": 1.933863184785694e-06, "loss": 2.074800431728363e-06, "step": 345620 }, { "epoch": 98.10672722111836, "grad_norm": 0.0009378657559864223, "learning_rate": 1.931024694862333e-06, "loss": 1.52587890625e-06, "step": 345630 }, { "epoch": 98.10956571104172, "grad_norm": 0.00030357774812728167, "learning_rate": 1.928186204938973e-06, "loss": 1.1954456567764281e-06, "step": 345640 }, { "epoch": 98.11240420096509, "grad_norm": 0.0006833866937085986, "learning_rate": 1.925347715015612e-06, "loss": 2.0790845155715943e-06, "step": 345650 }, { "epoch": 98.11524269088845, "grad_norm": 0.0010144998086616397, "learning_rate": 1.922509225092251e-06, "loss": 3.902800381183625e-06, "step": 345660 }, { "epoch": 98.11808118081181, "grad_norm": 0.00140874064527452, "learning_rate": 1.91967073516889e-06, "loss": 1.9051134586334228e-06, "step": 345670 }, { "epoch": 98.12091967073516, "grad_norm": 0.001064162002876401, "learning_rate": 1.9168322452455294e-06, "loss": 1.7959624528884889e-06, "step": 345680 }, { "epoch": 98.12375816065853, "grad_norm": 0.0003725397400557995, "learning_rate": 1.9139937553221687e-06, "loss": 1.3269484043121337e-06, "step": 345690 }, { "epoch": 98.12659665058189, "grad_norm": 0.0001483886007918045, "learning_rate": 1.911155265398808e-06, "loss": 2.0245090126991274e-06, "step": 345700 }, { "epoch": 98.12943514050525, "grad_norm": 0.000644539191853255, "learning_rate": 1.908316775475447e-06, "loss": 1.580268144607544e-06, "step": 345710 }, { "epoch": 98.13227363042861, "grad_norm": 0.00035469833528622985, "learning_rate": 1.9054782855520862e-06, "loss": 1.9658356904983522e-06, "step": 345720 }, { "epoch": 98.13511212035198, "grad_norm": 0.0002703826758079231, "learning_rate": 1.9026397956287255e-06, "loss": 2.065114676952362e-06, "step": 345730 }, { "epoch": 98.13795061027534, "grad_norm": 0.0003551101253833622, "learning_rate": 1.8998013057053648e-06, "loss": 2.296827733516693e-06, "step": 345740 }, { "epoch": 98.14078910019869, "grad_norm": 0.0005117572727613151, "learning_rate": 1.8969628157820042e-06, "loss": 1.6082078218460083e-06, "step": 345750 }, { "epoch": 98.14362759012205, "grad_norm": 0.0005323051009327173, "learning_rate": 1.8941243258586435e-06, "loss": 1.592002809047699e-06, "step": 345760 }, { "epoch": 98.14646608004541, "grad_norm": 0.0007163113914430141, "learning_rate": 1.8912858359352823e-06, "loss": 2.50302255153656e-06, "step": 345770 }, { "epoch": 98.14930456996878, "grad_norm": 0.0007639443501830101, "learning_rate": 1.8884473460119218e-06, "loss": 1.2548640370368958e-06, "step": 345780 }, { "epoch": 98.15214305989214, "grad_norm": 0.00012820684059988707, "learning_rate": 1.885608856088561e-06, "loss": 1.9337981939315797e-06, "step": 345790 }, { "epoch": 98.1549815498155, "grad_norm": 0.0008031714241951704, "learning_rate": 1.8827703661652003e-06, "loss": 1.3235956430435181e-06, "step": 345800 }, { "epoch": 98.15782003973887, "grad_norm": 0.00014913876657374203, "learning_rate": 1.8799318762418396e-06, "loss": 1.5398487448692323e-06, "step": 345810 }, { "epoch": 98.16065852966221, "grad_norm": 0.0005374293541535735, "learning_rate": 1.8770933863184786e-06, "loss": 1.815520226955414e-06, "step": 345820 }, { "epoch": 98.16349701958558, "grad_norm": 0.0002632646937854588, "learning_rate": 1.8742548963951179e-06, "loss": 2.4823471903800965e-06, "step": 345830 }, { "epoch": 98.16633550950894, "grad_norm": 0.002044110093265772, "learning_rate": 1.8714164064717571e-06, "loss": 1.937896013259888e-06, "step": 345840 }, { "epoch": 98.1691739994323, "grad_norm": 0.00010432737326482311, "learning_rate": 1.8685779165483964e-06, "loss": 1.4219433069229126e-06, "step": 345850 }, { "epoch": 98.17201248935567, "grad_norm": 0.0003453675308264792, "learning_rate": 1.8657394266250356e-06, "loss": 2.0738691091537477e-06, "step": 345860 }, { "epoch": 98.17485097927903, "grad_norm": 0.0003557269519660622, "learning_rate": 1.8629009367016749e-06, "loss": 1.698918640613556e-06, "step": 345870 }, { "epoch": 98.17768946920238, "grad_norm": 0.0003684193652588874, "learning_rate": 1.860062446778314e-06, "loss": 1.3232231140136718e-06, "step": 345880 }, { "epoch": 98.18052795912574, "grad_norm": 0.000884881941601634, "learning_rate": 1.8572239568549532e-06, "loss": 1.7104670405387878e-06, "step": 345890 }, { "epoch": 98.1833664490491, "grad_norm": 0.0005908351740799844, "learning_rate": 1.8543854669315924e-06, "loss": 2.1310523152351378e-06, "step": 345900 }, { "epoch": 98.18620493897247, "grad_norm": 0.0003035744302906096, "learning_rate": 1.8515469770082317e-06, "loss": 1.9792467355728148e-06, "step": 345910 }, { "epoch": 98.18904342889583, "grad_norm": 0.00036274417652748525, "learning_rate": 1.8487084870848712e-06, "loss": 2.292916178703308e-06, "step": 345920 }, { "epoch": 98.19188191881919, "grad_norm": 0.00037296611117199063, "learning_rate": 1.84586999716151e-06, "loss": 1.5294179320335389e-06, "step": 345930 }, { "epoch": 98.19472040874255, "grad_norm": 0.0001665267627686262, "learning_rate": 1.8430315072381493e-06, "loss": 2.010352909564972e-06, "step": 345940 }, { "epoch": 98.1975588986659, "grad_norm": 0.0007144836126826704, "learning_rate": 1.8401930173147885e-06, "loss": 1.5137717127799989e-06, "step": 345950 }, { "epoch": 98.20039738858927, "grad_norm": 0.000879493192769587, "learning_rate": 1.837354527391428e-06, "loss": 2.5063753128051756e-06, "step": 345960 }, { "epoch": 98.20323587851263, "grad_norm": 0.0002400573284830898, "learning_rate": 1.8345160374680672e-06, "loss": 1.2332573533058167e-06, "step": 345970 }, { "epoch": 98.20607436843599, "grad_norm": 0.001851582434028387, "learning_rate": 1.831677547544706e-06, "loss": 1.46571546792984e-06, "step": 345980 }, { "epoch": 98.20891285835935, "grad_norm": 0.0003564444195944816, "learning_rate": 1.8288390576213453e-06, "loss": 1.9783154129981993e-06, "step": 345990 }, { "epoch": 98.21175134828272, "grad_norm": 7.171909237513319e-05, "learning_rate": 1.8260005676979848e-06, "loss": 1.32732093334198e-06, "step": 346000 }, { "epoch": 98.21175134828272, "eval_accuracy": 0.9891269790805621, "eval_loss": 0.047385457903146744, "eval_runtime": 45.4437, "eval_samples_per_second": 346.076, "eval_steps_per_second": 5.413, "step": 346000 }, { "epoch": 98.21458983820608, "grad_norm": 0.0005065810400992632, "learning_rate": 1.823162077774624e-06, "loss": 1.2703239917755127e-06, "step": 346010 }, { "epoch": 98.21742832812943, "grad_norm": 0.00033522272133268416, "learning_rate": 1.8203235878512633e-06, "loss": 1.368299126625061e-06, "step": 346020 }, { "epoch": 98.22026681805279, "grad_norm": 0.003335559507831931, "learning_rate": 1.8174850979279026e-06, "loss": 2.39834189414978e-06, "step": 346030 }, { "epoch": 98.22310530797616, "grad_norm": 0.0004425986553542316, "learning_rate": 1.8146466080045416e-06, "loss": 1.7736107110977174e-06, "step": 346040 }, { "epoch": 98.22594379789952, "grad_norm": 0.00011843894026242197, "learning_rate": 1.8118081180811809e-06, "loss": 1.8624588847160339e-06, "step": 346050 }, { "epoch": 98.22878228782288, "grad_norm": 0.0004992829635739326, "learning_rate": 1.8089696281578201e-06, "loss": 1.2598931789398193e-06, "step": 346060 }, { "epoch": 98.23162077774624, "grad_norm": 0.0004138145304750651, "learning_rate": 1.8061311382344594e-06, "loss": 1.4601275324821472e-06, "step": 346070 }, { "epoch": 98.2344592676696, "grad_norm": 0.00022885992075316608, "learning_rate": 1.8032926483110987e-06, "loss": 1.4083459973335267e-06, "step": 346080 }, { "epoch": 98.23729775759296, "grad_norm": 0.0006295144557952881, "learning_rate": 1.8004541583877377e-06, "loss": 1.5895813703536987e-06, "step": 346090 }, { "epoch": 98.24013624751632, "grad_norm": 0.000876859703566879, "learning_rate": 1.797615668464377e-06, "loss": 1.9663944840431214e-06, "step": 346100 }, { "epoch": 98.24297473743968, "grad_norm": 0.0003517804725561291, "learning_rate": 1.7947771785410162e-06, "loss": 1.97775661945343e-06, "step": 346110 }, { "epoch": 98.24581322736304, "grad_norm": 5.378512651077472e-05, "learning_rate": 1.7919386886176555e-06, "loss": 1.5335157513618468e-06, "step": 346120 }, { "epoch": 98.2486517172864, "grad_norm": 0.0003927347715944052, "learning_rate": 1.789100198694295e-06, "loss": 3.260001540184021e-06, "step": 346130 }, { "epoch": 98.25149020720977, "grad_norm": 0.0006162014906294644, "learning_rate": 1.7862617087709338e-06, "loss": 1.854635775089264e-06, "step": 346140 }, { "epoch": 98.25432869713312, "grad_norm": 0.0002690551627893001, "learning_rate": 1.783423218847573e-06, "loss": 7.292628288269043e-06, "step": 346150 }, { "epoch": 98.25716718705648, "grad_norm": 0.00020713903359137475, "learning_rate": 1.7805847289242123e-06, "loss": 3.040209412574768e-06, "step": 346160 }, { "epoch": 98.26000567697984, "grad_norm": 0.007111099548637867, "learning_rate": 1.7777462390008518e-06, "loss": 3.36281955242157e-06, "step": 346170 }, { "epoch": 98.26284416690321, "grad_norm": 9.98258256004192e-05, "learning_rate": 1.774907749077491e-06, "loss": 2.2333115339279175e-06, "step": 346180 }, { "epoch": 98.26568265682657, "grad_norm": 0.00032707315403968096, "learning_rate": 1.7720692591541303e-06, "loss": 1.8805265426635743e-06, "step": 346190 }, { "epoch": 98.26852114674993, "grad_norm": 0.0009795763762667775, "learning_rate": 1.769230769230769e-06, "loss": 2.7425587177276613e-06, "step": 346200 }, { "epoch": 98.2713596366733, "grad_norm": 0.0005231343675404787, "learning_rate": 1.7663922793074086e-06, "loss": 1.9628554582595824e-06, "step": 346210 }, { "epoch": 98.27419812659664, "grad_norm": 0.0003068464866373688, "learning_rate": 1.7635537893840478e-06, "loss": 1.6344711184501648e-06, "step": 346220 }, { "epoch": 98.27703661652001, "grad_norm": 0.00012749772577080876, "learning_rate": 1.760715299460687e-06, "loss": 1.469627022743225e-06, "step": 346230 }, { "epoch": 98.27987510644337, "grad_norm": 0.0008912001503631473, "learning_rate": 1.7578768095373263e-06, "loss": 1.163594424724579e-06, "step": 346240 }, { "epoch": 98.28271359636673, "grad_norm": 0.0014883524272590876, "learning_rate": 1.7550383196139654e-06, "loss": 2.6399269700050353e-06, "step": 346250 }, { "epoch": 98.2855520862901, "grad_norm": 0.0007754358812235296, "learning_rate": 1.7521998296906046e-06, "loss": 1.7967075109481812e-06, "step": 346260 }, { "epoch": 98.28839057621346, "grad_norm": 0.0002691053377930075, "learning_rate": 1.749361339767244e-06, "loss": 1.5893951058387756e-06, "step": 346270 }, { "epoch": 98.29122906613682, "grad_norm": 0.00047330293455161154, "learning_rate": 1.7465228498438832e-06, "loss": 2.115592360496521e-06, "step": 346280 }, { "epoch": 98.29406755606017, "grad_norm": 0.0002860502863768488, "learning_rate": 1.7436843599205224e-06, "loss": 1.4765188097953797e-06, "step": 346290 }, { "epoch": 98.29690604598353, "grad_norm": 0.0007825068896636367, "learning_rate": 1.7408458699971615e-06, "loss": 1.93621963262558e-06, "step": 346300 }, { "epoch": 98.2997445359069, "grad_norm": 0.00038985293940640986, "learning_rate": 1.7380073800738007e-06, "loss": 1.8203631043434142e-06, "step": 346310 }, { "epoch": 98.30258302583026, "grad_norm": 0.0006309894961304963, "learning_rate": 1.73516889015044e-06, "loss": 1.7544254660606384e-06, "step": 346320 }, { "epoch": 98.30542151575362, "grad_norm": 0.0002859177184291184, "learning_rate": 1.7323304002270792e-06, "loss": 1.8512830138206482e-06, "step": 346330 }, { "epoch": 98.30826000567698, "grad_norm": 0.00024671561550348997, "learning_rate": 1.7294919103037187e-06, "loss": 1.9684433937072753e-06, "step": 346340 }, { "epoch": 98.31109849560033, "grad_norm": 0.00015791875193826854, "learning_rate": 1.726653420380358e-06, "loss": 1.8326565623283386e-06, "step": 346350 }, { "epoch": 98.3139369855237, "grad_norm": 6.517859583254904e-05, "learning_rate": 1.7238149304569968e-06, "loss": 1.722387969493866e-06, "step": 346360 }, { "epoch": 98.31677547544706, "grad_norm": 9.732350736157969e-05, "learning_rate": 1.720976440533636e-06, "loss": 1.4727935194969178e-06, "step": 346370 }, { "epoch": 98.31961396537042, "grad_norm": 0.000452986394520849, "learning_rate": 1.7181379506102755e-06, "loss": 2.6086345314979553e-06, "step": 346380 }, { "epoch": 98.32245245529379, "grad_norm": 0.00037947596865706146, "learning_rate": 1.7152994606869148e-06, "loss": 5.029141902923584e-06, "step": 346390 }, { "epoch": 98.32529094521715, "grad_norm": 0.0011061604600399733, "learning_rate": 1.712460970763554e-06, "loss": 2.7313828468322756e-06, "step": 346400 }, { "epoch": 98.32812943514051, "grad_norm": 0.00035675379331223667, "learning_rate": 1.7096224808401929e-06, "loss": 1.8181279301643372e-06, "step": 346410 }, { "epoch": 98.33096792506386, "grad_norm": 0.0004493498126976192, "learning_rate": 1.7067839909168323e-06, "loss": 1.7933547496795653e-06, "step": 346420 }, { "epoch": 98.33380641498722, "grad_norm": 0.000241183239268139, "learning_rate": 1.7039455009934716e-06, "loss": 1.5294179320335389e-06, "step": 346430 }, { "epoch": 98.33664490491059, "grad_norm": 0.00025251496117562056, "learning_rate": 1.7011070110701108e-06, "loss": 2.8792768716812133e-06, "step": 346440 }, { "epoch": 98.33948339483395, "grad_norm": 0.0017961518606171012, "learning_rate": 1.6982685211467501e-06, "loss": 1.6616657376289368e-06, "step": 346450 }, { "epoch": 98.34232188475731, "grad_norm": 0.00160698383115232, "learning_rate": 1.6954300312233892e-06, "loss": 2.6319175958633423e-06, "step": 346460 }, { "epoch": 98.34516037468067, "grad_norm": 0.0010640928521752357, "learning_rate": 1.6925915413000284e-06, "loss": 1.704692840576172e-06, "step": 346470 }, { "epoch": 98.34799886460404, "grad_norm": 0.00040837228880263865, "learning_rate": 1.6897530513766677e-06, "loss": 1.6868114471435547e-06, "step": 346480 }, { "epoch": 98.35083735452739, "grad_norm": 0.000339298538165167, "learning_rate": 1.686914561453307e-06, "loss": 2.1899119019508363e-06, "step": 346490 }, { "epoch": 98.35367584445075, "grad_norm": 0.0007641548872925341, "learning_rate": 1.6840760715299462e-06, "loss": 2.942979335784912e-06, "step": 346500 }, { "epoch": 98.35367584445075, "eval_accuracy": 0.9891905639982196, "eval_loss": 0.04704376310110092, "eval_runtime": 41.8081, "eval_samples_per_second": 376.171, "eval_steps_per_second": 5.884, "step": 346500 }, { "epoch": 98.35651433437411, "grad_norm": 0.0008108978508971632, "learning_rate": 1.6812375816065852e-06, "loss": 1.7248094081878662e-06, "step": 346510 }, { "epoch": 98.35935282429747, "grad_norm": 0.000651123991701752, "learning_rate": 1.6783990916832245e-06, "loss": 2.064742147922516e-06, "step": 346520 }, { "epoch": 98.36219131422084, "grad_norm": 0.0005061875563114882, "learning_rate": 1.6755606017598637e-06, "loss": 2.008303999900818e-06, "step": 346530 }, { "epoch": 98.3650298041442, "grad_norm": 0.0007615742506459355, "learning_rate": 1.672722111836503e-06, "loss": 1.7954036593437194e-06, "step": 346540 }, { "epoch": 98.36786829406756, "grad_norm": 0.0004301096487324685, "learning_rate": 1.6698836219131425e-06, "loss": 1.767277717590332e-06, "step": 346550 }, { "epoch": 98.37070678399091, "grad_norm": 0.00022225196880754083, "learning_rate": 1.6670451319897817e-06, "loss": 1.9274652004241943e-06, "step": 346560 }, { "epoch": 98.37354527391427, "grad_norm": 0.00016606994904577732, "learning_rate": 1.6642066420664206e-06, "loss": 1.337192952632904e-06, "step": 346570 }, { "epoch": 98.37638376383764, "grad_norm": 0.00038672483060508966, "learning_rate": 1.6613681521430598e-06, "loss": 2.181529998779297e-06, "step": 346580 }, { "epoch": 98.379222253761, "grad_norm": 6.0134214436402544e-05, "learning_rate": 1.6585296622196993e-06, "loss": 2.1355226635932922e-06, "step": 346590 }, { "epoch": 98.38206074368436, "grad_norm": 0.0010437057353556156, "learning_rate": 1.6556911722963385e-06, "loss": 1.4415010809898376e-06, "step": 346600 }, { "epoch": 98.38489923360773, "grad_norm": 0.00023772267741151154, "learning_rate": 1.6528526823729778e-06, "loss": 2.0710751414299013e-06, "step": 346610 }, { "epoch": 98.38773772353107, "grad_norm": 0.0012852194486185908, "learning_rate": 1.6500141924496166e-06, "loss": 1.944601535797119e-06, "step": 346620 }, { "epoch": 98.39057621345444, "grad_norm": 0.0004306363989599049, "learning_rate": 1.647175702526256e-06, "loss": 1.3452023267745973e-06, "step": 346630 }, { "epoch": 98.3934147033778, "grad_norm": 0.00047104491386562586, "learning_rate": 1.6443372126028954e-06, "loss": 2.0166859030723573e-06, "step": 346640 }, { "epoch": 98.39625319330116, "grad_norm": 0.0013375465059652925, "learning_rate": 1.6414987226795346e-06, "loss": 2.360902726650238e-06, "step": 346650 }, { "epoch": 98.39909168322453, "grad_norm": 0.00018008561164606363, "learning_rate": 1.6386602327561739e-06, "loss": 1.1371448636054993e-06, "step": 346660 }, { "epoch": 98.40193017314789, "grad_norm": 0.00035262980964034796, "learning_rate": 1.635821742832813e-06, "loss": 2.006441354751587e-06, "step": 346670 }, { "epoch": 98.40476866307125, "grad_norm": 0.0005694772116839886, "learning_rate": 1.6329832529094522e-06, "loss": 1.4754012227058411e-06, "step": 346680 }, { "epoch": 98.4076071529946, "grad_norm": 0.00021353051124606282, "learning_rate": 1.6301447629860914e-06, "loss": 2.6172026991844176e-06, "step": 346690 }, { "epoch": 98.41044564291796, "grad_norm": 0.00034742290154099464, "learning_rate": 1.6273062730627307e-06, "loss": 2.139247953891754e-06, "step": 346700 }, { "epoch": 98.41328413284133, "grad_norm": 0.00010668396134860814, "learning_rate": 1.62446778313937e-06, "loss": 1.6238540410995483e-06, "step": 346710 }, { "epoch": 98.41612262276469, "grad_norm": 0.00028121680952608585, "learning_rate": 1.6216292932160094e-06, "loss": 1.937896013259888e-06, "step": 346720 }, { "epoch": 98.41896111268805, "grad_norm": 0.001257482566870749, "learning_rate": 1.6187908032926482e-06, "loss": 1.4545395970344543e-06, "step": 346730 }, { "epoch": 98.42179960261142, "grad_norm": 0.00019863038323819637, "learning_rate": 1.6159523133692875e-06, "loss": 1.4964491128921508e-06, "step": 346740 }, { "epoch": 98.42463809253478, "grad_norm": 0.0007768716313876212, "learning_rate": 1.6131138234459268e-06, "loss": 1.4517456293106079e-06, "step": 346750 }, { "epoch": 98.42747658245813, "grad_norm": 0.00028933523572050035, "learning_rate": 1.6102753335225662e-06, "loss": 9.71369445323944e-07, "step": 346760 }, { "epoch": 98.43031507238149, "grad_norm": 0.001248708227649331, "learning_rate": 1.6074368435992055e-06, "loss": 2.9016286134719848e-06, "step": 346770 }, { "epoch": 98.43315356230485, "grad_norm": 0.0003700660017784685, "learning_rate": 1.6045983536758443e-06, "loss": 1.6782432794570923e-06, "step": 346780 }, { "epoch": 98.43599205222822, "grad_norm": 0.00020057310757692903, "learning_rate": 1.6017598637524836e-06, "loss": 1.7911195755004883e-06, "step": 346790 }, { "epoch": 98.43883054215158, "grad_norm": 0.0010083595989271998, "learning_rate": 1.598921373829123e-06, "loss": 3.2156705856323244e-06, "step": 346800 }, { "epoch": 98.44166903207494, "grad_norm": 0.00010441680205985904, "learning_rate": 1.5960828839057623e-06, "loss": 1.7825514078140259e-06, "step": 346810 }, { "epoch": 98.4445075219983, "grad_norm": 0.000574376608710736, "learning_rate": 1.5932443939824016e-06, "loss": 1.8185004591941833e-06, "step": 346820 }, { "epoch": 98.44734601192165, "grad_norm": 0.0004224987351335585, "learning_rate": 1.5904059040590404e-06, "loss": 3.3829361200332642e-06, "step": 346830 }, { "epoch": 98.45018450184502, "grad_norm": 0.0003659736830741167, "learning_rate": 1.5875674141356799e-06, "loss": 1.6462057828903197e-06, "step": 346840 }, { "epoch": 98.45302299176838, "grad_norm": 0.00038805778604000807, "learning_rate": 1.5847289242123191e-06, "loss": 2.0055100321769715e-06, "step": 346850 }, { "epoch": 98.45586148169174, "grad_norm": 0.00017185953038278967, "learning_rate": 1.5818904342889584e-06, "loss": 1.5441328287124634e-06, "step": 346860 }, { "epoch": 98.4586999716151, "grad_norm": 0.00016781782323960215, "learning_rate": 1.5790519443655976e-06, "loss": 1.8291175365447998e-06, "step": 346870 }, { "epoch": 98.46153846153847, "grad_norm": 0.0005253733834251761, "learning_rate": 1.5762134544422369e-06, "loss": 1.644529402256012e-06, "step": 346880 }, { "epoch": 98.46437695146182, "grad_norm": 0.0002061304694507271, "learning_rate": 1.573374964518876e-06, "loss": 1.4945864677429199e-06, "step": 346890 }, { "epoch": 98.46721544138518, "grad_norm": 0.0005170723889023066, "learning_rate": 1.5705364745955152e-06, "loss": 1.6326084733009338e-06, "step": 346900 }, { "epoch": 98.47005393130854, "grad_norm": 0.0007525775581598282, "learning_rate": 1.5676979846721545e-06, "loss": 3.2408162951469423e-06, "step": 346910 }, { "epoch": 98.4728924212319, "grad_norm": 0.003787672147154808, "learning_rate": 1.5648594947487937e-06, "loss": 2.2042542695999147e-06, "step": 346920 }, { "epoch": 98.47573091115527, "grad_norm": 0.00027775001944974065, "learning_rate": 1.562021004825433e-06, "loss": 9.067356586456299e-07, "step": 346930 }, { "epoch": 98.47856940107863, "grad_norm": 0.0001767907670000568, "learning_rate": 1.5591825149020722e-06, "loss": 1.4349818229675293e-06, "step": 346940 }, { "epoch": 98.481407891002, "grad_norm": 0.00012414061347953975, "learning_rate": 1.5563440249787113e-06, "loss": 1.8453225493431092e-06, "step": 346950 }, { "epoch": 98.48424638092534, "grad_norm": 0.0001758667203830555, "learning_rate": 1.5535055350553505e-06, "loss": 1.6758218407630921e-06, "step": 346960 }, { "epoch": 98.4870848708487, "grad_norm": 0.0006802586140111089, "learning_rate": 1.55066704513199e-06, "loss": 3.413669764995575e-06, "step": 346970 }, { "epoch": 98.48992336077207, "grad_norm": 6.803323776694015e-05, "learning_rate": 1.547828555208629e-06, "loss": 1.7777085304260253e-06, "step": 346980 }, { "epoch": 98.49276185069543, "grad_norm": 0.000642057100776583, "learning_rate": 1.5449900652852683e-06, "loss": 3.8513913750648495e-06, "step": 346990 }, { "epoch": 98.4956003406188, "grad_norm": 0.0002180196315748617, "learning_rate": 1.5421515753619076e-06, "loss": 1.3278797268867492e-06, "step": 347000 }, { "epoch": 98.4956003406188, "eval_accuracy": 0.9891269790805621, "eval_loss": 0.048075295984745026, "eval_runtime": 46.4767, "eval_samples_per_second": 338.385, "eval_steps_per_second": 5.293, "step": 347000 }, { "epoch": 98.49843883054216, "grad_norm": 0.0007713508093729615, "learning_rate": 1.5393130854385468e-06, "loss": 1.949630677700043e-06, "step": 347010 }, { "epoch": 98.50127732046552, "grad_norm": 0.010299026034772396, "learning_rate": 1.536474595515186e-06, "loss": 3.234855830669403e-06, "step": 347020 }, { "epoch": 98.50411581038887, "grad_norm": 0.00036700654891319573, "learning_rate": 1.5336361055918251e-06, "loss": 1.909397542476654e-06, "step": 347030 }, { "epoch": 98.50695430031223, "grad_norm": 0.0003156933526042849, "learning_rate": 1.5307976156684644e-06, "loss": 1.9222497940063477e-06, "step": 347040 }, { "epoch": 98.5097927902356, "grad_norm": 0.00021137695875950158, "learning_rate": 1.5279591257451036e-06, "loss": 1.375935971736908e-06, "step": 347050 }, { "epoch": 98.51263128015896, "grad_norm": 4.9243481043959036e-05, "learning_rate": 1.5251206358217429e-06, "loss": 2.1463260054588317e-06, "step": 347060 }, { "epoch": 98.51546977008232, "grad_norm": 0.00022995482140686363, "learning_rate": 1.5222821458983821e-06, "loss": 2.391450107097626e-06, "step": 347070 }, { "epoch": 98.51830826000568, "grad_norm": 0.00031266541918739676, "learning_rate": 1.5194436559750214e-06, "loss": 2.0913779735565185e-06, "step": 347080 }, { "epoch": 98.52114674992903, "grad_norm": 0.0008211274398490787, "learning_rate": 1.5166051660516604e-06, "loss": 1.5363097190856933e-06, "step": 347090 }, { "epoch": 98.5239852398524, "grad_norm": 0.00017274191486649215, "learning_rate": 1.5137666761283e-06, "loss": 1.0574236512184144e-06, "step": 347100 }, { "epoch": 98.52682372977576, "grad_norm": 0.0001389442477375269, "learning_rate": 1.510928186204939e-06, "loss": 1.4038756489753724e-06, "step": 347110 }, { "epoch": 98.52966221969912, "grad_norm": 0.00041692901868373156, "learning_rate": 1.5080896962815782e-06, "loss": 1.6003847122192384e-06, "step": 347120 }, { "epoch": 98.53250070962248, "grad_norm": 0.0008993787341751158, "learning_rate": 1.5052512063582175e-06, "loss": 2.3232772946357726e-06, "step": 347130 }, { "epoch": 98.53533919954585, "grad_norm": 0.0010457240277901292, "learning_rate": 1.5024127164348567e-06, "loss": 2.0621344447135924e-06, "step": 347140 }, { "epoch": 98.53817768946921, "grad_norm": 0.00045508172479458153, "learning_rate": 1.499574226511496e-06, "loss": 2.654828131198883e-06, "step": 347150 }, { "epoch": 98.54101617939256, "grad_norm": 0.00023494205379392952, "learning_rate": 1.4967357365881352e-06, "loss": 2.2180378437042235e-06, "step": 347160 }, { "epoch": 98.54385466931592, "grad_norm": 0.0006366737652570009, "learning_rate": 1.4938972466647743e-06, "loss": 1.8149614334106445e-06, "step": 347170 }, { "epoch": 98.54669315923928, "grad_norm": 0.00027367123402655125, "learning_rate": 1.4910587567414138e-06, "loss": 1.866370439529419e-06, "step": 347180 }, { "epoch": 98.54953164916265, "grad_norm": 0.00039208013913594186, "learning_rate": 1.4882202668180528e-06, "loss": 1.5644356608390808e-06, "step": 347190 }, { "epoch": 98.55237013908601, "grad_norm": 0.00039940208080224693, "learning_rate": 1.485381776894692e-06, "loss": 2.1204352378845217e-06, "step": 347200 }, { "epoch": 98.55520862900937, "grad_norm": 0.0010973454918712378, "learning_rate": 1.4825432869713313e-06, "loss": 2.7621164917945864e-06, "step": 347210 }, { "epoch": 98.55804711893273, "grad_norm": 0.0004107860731892288, "learning_rate": 1.4797047970479706e-06, "loss": 1.9859522581100465e-06, "step": 347220 }, { "epoch": 98.56088560885608, "grad_norm": 0.00043365141027607024, "learning_rate": 1.4768663071246098e-06, "loss": 1.9486993551254273e-06, "step": 347230 }, { "epoch": 98.56372409877945, "grad_norm": 0.0005683760391548276, "learning_rate": 1.474027817201249e-06, "loss": 2.6293098926544188e-06, "step": 347240 }, { "epoch": 98.56656258870281, "grad_norm": 0.0008943714783526957, "learning_rate": 1.4711893272778881e-06, "loss": 1.5845522284507752e-06, "step": 347250 }, { "epoch": 98.56940107862617, "grad_norm": 0.0009907082421705127, "learning_rate": 1.4683508373545274e-06, "loss": 1.9136816263198854e-06, "step": 347260 }, { "epoch": 98.57223956854953, "grad_norm": 0.0002946381864603609, "learning_rate": 1.4655123474311667e-06, "loss": 1.4116987586021423e-06, "step": 347270 }, { "epoch": 98.5750780584729, "grad_norm": 0.00010216700320597738, "learning_rate": 1.462673857507806e-06, "loss": 2.467632293701172e-06, "step": 347280 }, { "epoch": 98.57791654839626, "grad_norm": 0.00021061206643935293, "learning_rate": 1.4598353675844452e-06, "loss": 1.1939555406570435e-06, "step": 347290 }, { "epoch": 98.58075503831961, "grad_norm": 0.002437722170725465, "learning_rate": 1.4569968776610842e-06, "loss": 2.894178032875061e-06, "step": 347300 }, { "epoch": 98.58359352824297, "grad_norm": 0.00029573729261755943, "learning_rate": 1.4541583877377237e-06, "loss": 1.7076730728149413e-06, "step": 347310 }, { "epoch": 98.58643201816633, "grad_norm": 0.00041280948789790273, "learning_rate": 1.451319897814363e-06, "loss": 1.551210880279541e-06, "step": 347320 }, { "epoch": 98.5892705080897, "grad_norm": 0.00021338232909329236, "learning_rate": 1.448481407891002e-06, "loss": 1.3140961527824402e-06, "step": 347330 }, { "epoch": 98.59210899801306, "grad_norm": 0.0005603953613899648, "learning_rate": 1.4456429179676412e-06, "loss": 1.6286969184875488e-06, "step": 347340 }, { "epoch": 98.59494748793642, "grad_norm": 0.0015363154234364629, "learning_rate": 1.4428044280442805e-06, "loss": 1.6488134860992432e-06, "step": 347350 }, { "epoch": 98.59778597785977, "grad_norm": 0.0005515636876225471, "learning_rate": 1.4399659381209198e-06, "loss": 1.5638768672943116e-06, "step": 347360 }, { "epoch": 98.60062446778313, "grad_norm": 0.0002480536641087383, "learning_rate": 1.437127448197559e-06, "loss": 1.2120231986045838e-06, "step": 347370 }, { "epoch": 98.6034629577065, "grad_norm": 0.0008952056523412466, "learning_rate": 1.434288958274198e-06, "loss": 1.5795230865478516e-06, "step": 347380 }, { "epoch": 98.60630144762986, "grad_norm": 0.0002850462624337524, "learning_rate": 1.4314504683508375e-06, "loss": 1.617148518562317e-06, "step": 347390 }, { "epoch": 98.60913993755322, "grad_norm": 0.0005478545208461583, "learning_rate": 1.4286119784274768e-06, "loss": 1.0525807738304139e-06, "step": 347400 }, { "epoch": 98.61197842747659, "grad_norm": 0.000145839192555286, "learning_rate": 1.4257734885041158e-06, "loss": 2.3515895009040833e-06, "step": 347410 }, { "epoch": 98.61481691739995, "grad_norm": 0.00022239431564230472, "learning_rate": 1.422934998580755e-06, "loss": 1.1101365089416504e-06, "step": 347420 }, { "epoch": 98.6176554073233, "grad_norm": 0.0003568778629414737, "learning_rate": 1.4200965086573943e-06, "loss": 1.7637386918067932e-06, "step": 347430 }, { "epoch": 98.62049389724666, "grad_norm": 0.0003833418304566294, "learning_rate": 1.4172580187340336e-06, "loss": 1.743994653224945e-06, "step": 347440 }, { "epoch": 98.62333238717002, "grad_norm": 0.00020711023535113782, "learning_rate": 1.4144195288106729e-06, "loss": 1.5294179320335389e-06, "step": 347450 }, { "epoch": 98.62617087709339, "grad_norm": 0.0002944854204542935, "learning_rate": 1.411581038887312e-06, "loss": 2.61794775724411e-06, "step": 347460 }, { "epoch": 98.62900936701675, "grad_norm": 0.0016732970252633095, "learning_rate": 1.4087425489639512e-06, "loss": 1.7130747437477113e-06, "step": 347470 }, { "epoch": 98.63184785694011, "grad_norm": 0.0001308688661083579, "learning_rate": 1.4059040590405906e-06, "loss": 1.4062970876693726e-06, "step": 347480 }, { "epoch": 98.63468634686348, "grad_norm": 0.0007554730982519686, "learning_rate": 1.4030655691172297e-06, "loss": 2.467632293701172e-06, "step": 347490 }, { "epoch": 98.63752483678682, "grad_norm": 0.0005047633312642574, "learning_rate": 1.400227079193869e-06, "loss": 1.7860904335975647e-06, "step": 347500 }, { "epoch": 98.63752483678682, "eval_accuracy": 0.9894449036688497, "eval_loss": 0.046956513077020645, "eval_runtime": 60.3226, "eval_samples_per_second": 260.715, "eval_steps_per_second": 4.078, "step": 347500 }, { "epoch": 98.64036332671019, "grad_norm": 0.0001266433682758361, "learning_rate": 1.397388589270508e-06, "loss": 1.5584751963615418e-06, "step": 347510 }, { "epoch": 98.64320181663355, "grad_norm": 0.0005480398540385067, "learning_rate": 1.3945500993471474e-06, "loss": 1.8324702978134154e-06, "step": 347520 }, { "epoch": 98.64604030655691, "grad_norm": 0.00021490122890099883, "learning_rate": 1.3917116094237867e-06, "loss": 1.8225982785224915e-06, "step": 347530 }, { "epoch": 98.64887879648028, "grad_norm": 0.001208526431582868, "learning_rate": 1.3888731195004257e-06, "loss": 1.852773129940033e-06, "step": 347540 }, { "epoch": 98.65171728640364, "grad_norm": 0.0008000292582437396, "learning_rate": 1.386034629577065e-06, "loss": 1.958198845386505e-06, "step": 347550 }, { "epoch": 98.65455577632699, "grad_norm": 0.00022689724573865533, "learning_rate": 1.3831961396537045e-06, "loss": 1.4510005712509155e-06, "step": 347560 }, { "epoch": 98.65739426625035, "grad_norm": 0.0006833263905718923, "learning_rate": 1.3803576497303435e-06, "loss": 1.041218638420105e-06, "step": 347570 }, { "epoch": 98.66023275617371, "grad_norm": 0.0007743309251964092, "learning_rate": 1.3775191598069828e-06, "loss": 1.9904226064682005e-06, "step": 347580 }, { "epoch": 98.66307124609708, "grad_norm": 0.0001650525809964165, "learning_rate": 1.3746806698836218e-06, "loss": 2.107396721839905e-06, "step": 347590 }, { "epoch": 98.66590973602044, "grad_norm": 0.000262069224845618, "learning_rate": 1.3718421799602613e-06, "loss": 1.1879950761795043e-06, "step": 347600 }, { "epoch": 98.6687482259438, "grad_norm": 0.0005497651291079819, "learning_rate": 1.3690036900369005e-06, "loss": 2.5833025574684145e-06, "step": 347610 }, { "epoch": 98.67158671586716, "grad_norm": 0.00012052543752361089, "learning_rate": 1.3661652001135396e-06, "loss": 1.5530735254287719e-06, "step": 347620 }, { "epoch": 98.67442520579051, "grad_norm": 0.0002650695387274027, "learning_rate": 1.3633267101901789e-06, "loss": 1.8972903490066529e-06, "step": 347630 }, { "epoch": 98.67726369571388, "grad_norm": 0.0001450440176995471, "learning_rate": 1.3604882202668181e-06, "loss": 1.5655532479286194e-06, "step": 347640 }, { "epoch": 98.68010218563724, "grad_norm": 0.00046615657629445195, "learning_rate": 1.3576497303434574e-06, "loss": 1.511909067630768e-06, "step": 347650 }, { "epoch": 98.6829406755606, "grad_norm": 0.00031438053702004254, "learning_rate": 1.3548112404200966e-06, "loss": 2.942979335784912e-06, "step": 347660 }, { "epoch": 98.68577916548396, "grad_norm": 0.0004937838530167937, "learning_rate": 1.3519727504967357e-06, "loss": 1.7898157238960266e-06, "step": 347670 }, { "epoch": 98.68861765540733, "grad_norm": 0.0002499028923921287, "learning_rate": 1.349134260573375e-06, "loss": 1.529976725578308e-06, "step": 347680 }, { "epoch": 98.69145614533069, "grad_norm": 8.354705641977489e-05, "learning_rate": 1.3462957706500144e-06, "loss": 1.6838312149047852e-06, "step": 347690 }, { "epoch": 98.69429463525404, "grad_norm": 0.0003882484161294997, "learning_rate": 1.3434572807266534e-06, "loss": 1.7248094081878662e-06, "step": 347700 }, { "epoch": 98.6971331251774, "grad_norm": 0.00013155363558325917, "learning_rate": 1.3406187908032927e-06, "loss": 1.9665807485580443e-06, "step": 347710 }, { "epoch": 98.69997161510076, "grad_norm": 0.0004210584156680852, "learning_rate": 1.337780300879932e-06, "loss": 1.467578113079071e-06, "step": 347720 }, { "epoch": 98.70281010502413, "grad_norm": 0.0010747899068519473, "learning_rate": 1.3349418109565712e-06, "loss": 2.0870938897132874e-06, "step": 347730 }, { "epoch": 98.70564859494749, "grad_norm": 0.00038975957431830466, "learning_rate": 1.3321033210332105e-06, "loss": 1.3614073395729065e-06, "step": 347740 }, { "epoch": 98.70848708487085, "grad_norm": 0.00030655786395072937, "learning_rate": 1.3292648311098495e-06, "loss": 1.7240643501281739e-06, "step": 347750 }, { "epoch": 98.71132557479422, "grad_norm": 0.000733366992790252, "learning_rate": 1.3264263411864888e-06, "loss": 1.6462057828903197e-06, "step": 347760 }, { "epoch": 98.71416406471756, "grad_norm": 0.00011280652688583359, "learning_rate": 1.3235878512631282e-06, "loss": 1.3802200555801392e-06, "step": 347770 }, { "epoch": 98.71700255464093, "grad_norm": 0.0002164343313779682, "learning_rate": 1.3207493613397673e-06, "loss": 1.6983598470687865e-06, "step": 347780 }, { "epoch": 98.71984104456429, "grad_norm": 5.386988050304353e-05, "learning_rate": 1.3179108714164065e-06, "loss": 1.6890466213226318e-06, "step": 347790 }, { "epoch": 98.72267953448765, "grad_norm": 0.00016925625095609576, "learning_rate": 1.3150723814930458e-06, "loss": 1.2829899787902831e-06, "step": 347800 }, { "epoch": 98.72551802441102, "grad_norm": 0.000226595439016819, "learning_rate": 1.312233891569685e-06, "loss": 1.9432976841926574e-06, "step": 347810 }, { "epoch": 98.72835651433438, "grad_norm": 0.0008522415300831199, "learning_rate": 1.3093954016463243e-06, "loss": 1.6747042536735535e-06, "step": 347820 }, { "epoch": 98.73119500425773, "grad_norm": 0.00030034786323085427, "learning_rate": 1.3065569117229634e-06, "loss": 1.7035752534866333e-06, "step": 347830 }, { "epoch": 98.73403349418109, "grad_norm": 0.0006078009610064328, "learning_rate": 1.3037184217996026e-06, "loss": 1.7832964658737182e-06, "step": 347840 }, { "epoch": 98.73687198410445, "grad_norm": 0.0007617381634190679, "learning_rate": 1.3008799318762419e-06, "loss": 1.7816200852394104e-06, "step": 347850 }, { "epoch": 98.73971047402782, "grad_norm": 0.0006627843831665814, "learning_rate": 1.2980414419528811e-06, "loss": 2.2027641534805296e-06, "step": 347860 }, { "epoch": 98.74254896395118, "grad_norm": 0.00032571470364928246, "learning_rate": 1.2952029520295204e-06, "loss": 1.3010576367378235e-06, "step": 347870 }, { "epoch": 98.74538745387454, "grad_norm": 0.0005604938487522304, "learning_rate": 1.2923644621061596e-06, "loss": 2.007186412811279e-06, "step": 347880 }, { "epoch": 98.7482259437979, "grad_norm": 0.0003258677897974849, "learning_rate": 1.2895259721827987e-06, "loss": 1.6676262021064757e-06, "step": 347890 }, { "epoch": 98.75106443372125, "grad_norm": 0.0008331894059665501, "learning_rate": 1.2866874822594382e-06, "loss": 2.1144747734069825e-06, "step": 347900 }, { "epoch": 98.75390292364462, "grad_norm": 0.000779016874730587, "learning_rate": 1.2838489923360772e-06, "loss": 2.1342188119888305e-06, "step": 347910 }, { "epoch": 98.75674141356798, "grad_norm": 0.00017549435142427683, "learning_rate": 1.2810105024127165e-06, "loss": 1.490861177444458e-06, "step": 347920 }, { "epoch": 98.75957990349134, "grad_norm": 0.00020334326836746186, "learning_rate": 1.2781720124893557e-06, "loss": 1.5482306480407716e-06, "step": 347930 }, { "epoch": 98.7624183934147, "grad_norm": 0.000311135605443269, "learning_rate": 1.275333522565995e-06, "loss": 9.30391252040863e-07, "step": 347940 }, { "epoch": 98.76525688333807, "grad_norm": 0.0002596154226921499, "learning_rate": 1.2724950326426342e-06, "loss": 1.3086944818496705e-06, "step": 347950 }, { "epoch": 98.76809537326143, "grad_norm": 0.0002317965409019962, "learning_rate": 1.2696565427192735e-06, "loss": 1.6786158084869386e-06, "step": 347960 }, { "epoch": 98.77093386318478, "grad_norm": 0.0003361384733580053, "learning_rate": 1.2668180527959125e-06, "loss": 2.3562461137771606e-06, "step": 347970 }, { "epoch": 98.77377235310814, "grad_norm": 0.00029493789770640433, "learning_rate": 1.2639795628725518e-06, "loss": 1.3597309589385986e-06, "step": 347980 }, { "epoch": 98.7766108430315, "grad_norm": 8.529925253242254e-05, "learning_rate": 1.261141072949191e-06, "loss": 1.2002885341644287e-06, "step": 347990 }, { "epoch": 98.77944933295487, "grad_norm": 4.619416722562164e-05, "learning_rate": 1.2583025830258303e-06, "loss": 2.121739089488983e-06, "step": 348000 }, { "epoch": 98.77944933295487, "eval_accuracy": 0.9896356584218223, "eval_loss": 0.047118719667196274, "eval_runtime": 47.6143, "eval_samples_per_second": 330.3, "eval_steps_per_second": 5.167, "step": 348000 }, { "epoch": 98.78228782287823, "grad_norm": 0.00034911843249574304, "learning_rate": 1.2554640931024696e-06, "loss": 1.331046223640442e-06, "step": 348010 }, { "epoch": 98.7851263128016, "grad_norm": 0.00029923440888524055, "learning_rate": 1.2526256031791088e-06, "loss": 1.3645738363265992e-06, "step": 348020 }, { "epoch": 98.78796480272496, "grad_norm": 0.001022741082124412, "learning_rate": 1.249787113255748e-06, "loss": 1.3394281268119812e-06, "step": 348030 }, { "epoch": 98.7908032926483, "grad_norm": 0.00039613721310161054, "learning_rate": 1.2469486233323873e-06, "loss": 1.3854354619979858e-06, "step": 348040 }, { "epoch": 98.79364178257167, "grad_norm": 0.0003977595188189298, "learning_rate": 1.2441101334090264e-06, "loss": 1.6877427697181702e-06, "step": 348050 }, { "epoch": 98.79648027249503, "grad_norm": 0.0001415089500369504, "learning_rate": 1.2412716434856656e-06, "loss": 2.9064714908599855e-06, "step": 348060 }, { "epoch": 98.7993187624184, "grad_norm": 0.0016236966475844383, "learning_rate": 1.238433153562305e-06, "loss": 1.6802921891212464e-06, "step": 348070 }, { "epoch": 98.80215725234176, "grad_norm": 0.00013884114741813391, "learning_rate": 1.2355946636389442e-06, "loss": 2.357177436351776e-06, "step": 348080 }, { "epoch": 98.80499574226512, "grad_norm": 0.00031195252086035907, "learning_rate": 1.2327561737155834e-06, "loss": 1.6663223505020142e-06, "step": 348090 }, { "epoch": 98.80783423218847, "grad_norm": 0.0005106867756694555, "learning_rate": 1.2299176837922225e-06, "loss": 2.2975727915763855e-06, "step": 348100 }, { "epoch": 98.81067272211183, "grad_norm": 0.0001943350216606632, "learning_rate": 1.227079193868862e-06, "loss": 1.5053898096084596e-06, "step": 348110 }, { "epoch": 98.8135112120352, "grad_norm": 0.00036214423016645014, "learning_rate": 1.2242407039455012e-06, "loss": 1.673772931098938e-06, "step": 348120 }, { "epoch": 98.81634970195856, "grad_norm": 0.00012022367445752025, "learning_rate": 1.2214022140221402e-06, "loss": 1.9166618585586548e-06, "step": 348130 }, { "epoch": 98.81918819188192, "grad_norm": 0.0003619114577304572, "learning_rate": 1.2185637240987795e-06, "loss": 2.2588297724723817e-06, "step": 348140 }, { "epoch": 98.82202668180528, "grad_norm": 0.00156393984798342, "learning_rate": 1.2157252341754187e-06, "loss": 2.232193946838379e-06, "step": 348150 }, { "epoch": 98.82486517172865, "grad_norm": 0.0004832730919588357, "learning_rate": 1.212886744252058e-06, "loss": 1.7216429114341735e-06, "step": 348160 }, { "epoch": 98.827703661652, "grad_norm": 0.0010889016557484865, "learning_rate": 1.2100482543286973e-06, "loss": 2.098269760608673e-06, "step": 348170 }, { "epoch": 98.83054215157536, "grad_norm": 0.00032889432623051107, "learning_rate": 1.2072097644053363e-06, "loss": 1.9935891032218932e-06, "step": 348180 }, { "epoch": 98.83338064149872, "grad_norm": 0.00041601472184993327, "learning_rate": 1.2043712744819756e-06, "loss": 2.7533620595932008e-06, "step": 348190 }, { "epoch": 98.83621913142208, "grad_norm": 0.0007794255507178605, "learning_rate": 1.201532784558615e-06, "loss": 1.504458487033844e-06, "step": 348200 }, { "epoch": 98.83905762134545, "grad_norm": 0.0006034303805790842, "learning_rate": 1.198694294635254e-06, "loss": 2.364441752433777e-06, "step": 348210 }, { "epoch": 98.84189611126881, "grad_norm": 0.00035265469341538846, "learning_rate": 1.1958558047118933e-06, "loss": 1.6069039702415467e-06, "step": 348220 }, { "epoch": 98.84473460119217, "grad_norm": 0.00019406380306463689, "learning_rate": 1.1930173147885326e-06, "loss": 1.6421079635620118e-06, "step": 348230 }, { "epoch": 98.84757309111552, "grad_norm": 0.00035041425144299865, "learning_rate": 1.1901788248651718e-06, "loss": 2.205371856689453e-06, "step": 348240 }, { "epoch": 98.85041158103888, "grad_norm": 0.00044059762149117887, "learning_rate": 1.187340334941811e-06, "loss": 1.8805265426635743e-06, "step": 348250 }, { "epoch": 98.85325007096225, "grad_norm": 0.000214608502574265, "learning_rate": 1.1845018450184501e-06, "loss": 1.9572675228118896e-06, "step": 348260 }, { "epoch": 98.85608856088561, "grad_norm": 0.000540400214958936, "learning_rate": 1.1816633550950894e-06, "loss": 1.4180317521095275e-06, "step": 348270 }, { "epoch": 98.85892705080897, "grad_norm": 0.0003238402714487165, "learning_rate": 1.1788248651717289e-06, "loss": 1.241639256477356e-06, "step": 348280 }, { "epoch": 98.86176554073234, "grad_norm": 0.00048580774455331266, "learning_rate": 1.175986375248368e-06, "loss": 1.7877668142318725e-06, "step": 348290 }, { "epoch": 98.86460403065568, "grad_norm": 0.0009727736469358206, "learning_rate": 1.1731478853250072e-06, "loss": 3.3035874366760252e-06, "step": 348300 }, { "epoch": 98.86744252057905, "grad_norm": 0.000605001172516495, "learning_rate": 1.1703093954016462e-06, "loss": 1.356564462184906e-06, "step": 348310 }, { "epoch": 98.87028101050241, "grad_norm": 0.0007220696425065398, "learning_rate": 1.1674709054782857e-06, "loss": 2.3134052753448486e-06, "step": 348320 }, { "epoch": 98.87311950042577, "grad_norm": 0.00032582541462033987, "learning_rate": 1.164632415554925e-06, "loss": 1.4370307326316833e-06, "step": 348330 }, { "epoch": 98.87595799034914, "grad_norm": 0.0005600993754342198, "learning_rate": 1.161793925631564e-06, "loss": 1.3189390301704407e-06, "step": 348340 }, { "epoch": 98.8787964802725, "grad_norm": 0.001003890996798873, "learning_rate": 1.1589554357082032e-06, "loss": 1.9161030650138856e-06, "step": 348350 }, { "epoch": 98.88163497019586, "grad_norm": 0.0003938230511266738, "learning_rate": 1.1561169457848425e-06, "loss": 1.597963273525238e-06, "step": 348360 }, { "epoch": 98.88447346011921, "grad_norm": 0.00027754914481192827, "learning_rate": 1.1532784558614818e-06, "loss": 1.7328187823295592e-06, "step": 348370 }, { "epoch": 98.88731195004257, "grad_norm": 9.922900062520057e-05, "learning_rate": 1.150439965938121e-06, "loss": 1.9975006580352785e-06, "step": 348380 }, { "epoch": 98.89015043996594, "grad_norm": 0.00026144456933252513, "learning_rate": 1.14760147601476e-06, "loss": 3.775767982006073e-06, "step": 348390 }, { "epoch": 98.8929889298893, "grad_norm": 0.0009466019691899419, "learning_rate": 1.1447629860913993e-06, "loss": 1.7721205949783325e-06, "step": 348400 }, { "epoch": 98.89582741981266, "grad_norm": 0.0002138684649253264, "learning_rate": 1.1419244961680388e-06, "loss": 2.557411789894104e-06, "step": 348410 }, { "epoch": 98.89866590973602, "grad_norm": 0.0003607260587159544, "learning_rate": 1.1390860062446778e-06, "loss": 1.3830140233039856e-06, "step": 348420 }, { "epoch": 98.90150439965939, "grad_norm": 9.895514085656032e-05, "learning_rate": 1.136247516321317e-06, "loss": 1.4476478099822999e-06, "step": 348430 }, { "epoch": 98.90434288958274, "grad_norm": 0.0007234315853565931, "learning_rate": 1.1334090263979564e-06, "loss": 1.232139766216278e-06, "step": 348440 }, { "epoch": 98.9071813795061, "grad_norm": 0.00023914591292850673, "learning_rate": 1.1305705364745956e-06, "loss": 1.4994293451309204e-06, "step": 348450 }, { "epoch": 98.91001986942946, "grad_norm": 0.000341115053743124, "learning_rate": 1.1277320465512349e-06, "loss": 3.160536289215088e-06, "step": 348460 }, { "epoch": 98.91285835935282, "grad_norm": 0.0002426345890853554, "learning_rate": 1.124893556627874e-06, "loss": 1.7898157238960266e-06, "step": 348470 }, { "epoch": 98.91569684927619, "grad_norm": 0.0006061008316464722, "learning_rate": 1.1220550667045132e-06, "loss": 1.833215355873108e-06, "step": 348480 }, { "epoch": 98.91853533919955, "grad_norm": 0.00022747066395822912, "learning_rate": 1.1192165767811526e-06, "loss": 1.3189390301704407e-06, "step": 348490 }, { "epoch": 98.92137382912291, "grad_norm": 0.00016233036876656115, "learning_rate": 1.1163780868577917e-06, "loss": 1.5718862414360046e-06, "step": 348500 }, { "epoch": 98.92137382912291, "eval_accuracy": 0.9893177338335347, "eval_loss": 0.04727412387728691, "eval_runtime": 41.785, "eval_samples_per_second": 376.379, "eval_steps_per_second": 5.887, "step": 348500 }, { "epoch": 98.92421231904626, "grad_norm": 0.00040798206464387476, "learning_rate": 1.113539596934431e-06, "loss": 2.0850449800491335e-06, "step": 348510 }, { "epoch": 98.92705080896962, "grad_norm": 9.576324373483658e-05, "learning_rate": 1.1107011070110702e-06, "loss": 1.376122236251831e-06, "step": 348520 }, { "epoch": 98.92988929889299, "grad_norm": 0.005433047655969858, "learning_rate": 1.1078626170877095e-06, "loss": 3.193877637386322e-06, "step": 348530 }, { "epoch": 98.93272778881635, "grad_norm": 0.0005383390816859901, "learning_rate": 1.1050241271643487e-06, "loss": 1.366250216960907e-06, "step": 348540 }, { "epoch": 98.93556627873971, "grad_norm": 0.00033316086046397686, "learning_rate": 1.1021856372409878e-06, "loss": 1.482851803302765e-06, "step": 348550 }, { "epoch": 98.93840476866308, "grad_norm": 0.0005250046378932893, "learning_rate": 1.099347147317627e-06, "loss": 1.7026439309120179e-06, "step": 348560 }, { "epoch": 98.94124325858643, "grad_norm": 0.0001567946601426229, "learning_rate": 1.0965086573942663e-06, "loss": 2.4477019906044005e-06, "step": 348570 }, { "epoch": 98.94408174850979, "grad_norm": 0.0003004310710821301, "learning_rate": 1.0936701674709055e-06, "loss": 1.4098361134529113e-06, "step": 348580 }, { "epoch": 98.94692023843315, "grad_norm": 0.0015358409145846963, "learning_rate": 1.0908316775475448e-06, "loss": 1.5759840607643128e-06, "step": 348590 }, { "epoch": 98.94975872835651, "grad_norm": 0.00022266845917329192, "learning_rate": 1.087993187624184e-06, "loss": 1.3843178749084474e-06, "step": 348600 }, { "epoch": 98.95259721827988, "grad_norm": 0.0006196031463332474, "learning_rate": 1.085154697700823e-06, "loss": 1.457706093788147e-06, "step": 348610 }, { "epoch": 98.95543570820324, "grad_norm": 0.000537363754119724, "learning_rate": 1.0823162077774626e-06, "loss": 1.5176832675933837e-06, "step": 348620 }, { "epoch": 98.9582741981266, "grad_norm": 0.0001994408667087555, "learning_rate": 1.0794777178541016e-06, "loss": 2.983585000038147e-06, "step": 348630 }, { "epoch": 98.96111268804995, "grad_norm": 0.0018911653896793723, "learning_rate": 1.0766392279307409e-06, "loss": 1.7847865819931031e-06, "step": 348640 }, { "epoch": 98.96395117797331, "grad_norm": 0.0002622452739160508, "learning_rate": 1.0738007380073801e-06, "loss": 1.6402453184127808e-06, "step": 348650 }, { "epoch": 98.96678966789668, "grad_norm": 0.001364318304695189, "learning_rate": 1.0709622480840194e-06, "loss": 1.6927719116210937e-06, "step": 348660 }, { "epoch": 98.96962815782004, "grad_norm": 0.00042521595605649054, "learning_rate": 1.0681237581606586e-06, "loss": 1.6631558537483215e-06, "step": 348670 }, { "epoch": 98.9724666477434, "grad_norm": 0.00044888403499498963, "learning_rate": 1.0652852682372979e-06, "loss": 2.419576048851013e-06, "step": 348680 }, { "epoch": 98.97530513766677, "grad_norm": 0.0010389823000878096, "learning_rate": 1.062446778313937e-06, "loss": 1.5772879123687743e-06, "step": 348690 }, { "epoch": 98.97814362759013, "grad_norm": 0.00012628795229829848, "learning_rate": 1.0596082883905764e-06, "loss": 1.6609206795692445e-06, "step": 348700 }, { "epoch": 98.98098211751348, "grad_norm": 0.0003439889696892351, "learning_rate": 1.0567697984672154e-06, "loss": 1.2654811143875122e-06, "step": 348710 }, { "epoch": 98.98382060743684, "grad_norm": 0.0002785241522360593, "learning_rate": 1.0539313085438547e-06, "loss": 1.3289973139762878e-06, "step": 348720 }, { "epoch": 98.9866590973602, "grad_norm": 0.00037026870995759964, "learning_rate": 1.051092818620494e-06, "loss": 2.2623687982559202e-06, "step": 348730 }, { "epoch": 98.98949758728357, "grad_norm": 0.00025561865186318755, "learning_rate": 1.0482543286971332e-06, "loss": 1.5385448932647705e-06, "step": 348740 }, { "epoch": 98.99233607720693, "grad_norm": 0.0003418461710680276, "learning_rate": 1.0454158387737725e-06, "loss": 1.61733478307724e-06, "step": 348750 }, { "epoch": 98.99517456713029, "grad_norm": 0.00015574449207633734, "learning_rate": 1.0425773488504117e-06, "loss": 1.2639909982681275e-06, "step": 348760 }, { "epoch": 98.99801305705365, "grad_norm": 7.684370211791247e-05, "learning_rate": 1.0397388589270508e-06, "loss": 1.4340505003929138e-06, "step": 348770 }, { "epoch": 99.000851546977, "grad_norm": 0.0014780862256884575, "learning_rate": 1.037184217996026e-06, "loss": 2.0814044546568765e-06, "step": 348780 }, { "epoch": 99.00369003690037, "grad_norm": 0.004319149535149336, "learning_rate": 1.0343457280726655e-06, "loss": 2.2981315851211547e-06, "step": 348790 }, { "epoch": 99.00652852682373, "grad_norm": 0.0005086021847091615, "learning_rate": 1.0315072381493046e-06, "loss": 1.7985701560974122e-06, "step": 348800 }, { "epoch": 99.00936701674709, "grad_norm": 0.0003112682024948299, "learning_rate": 1.0286687482259438e-06, "loss": 1.2198463082313537e-06, "step": 348810 }, { "epoch": 99.01220550667045, "grad_norm": 0.00017280117026530206, "learning_rate": 1.0258302583025829e-06, "loss": 1.107528805732727e-06, "step": 348820 }, { "epoch": 99.01504399659382, "grad_norm": 0.0011283045168966055, "learning_rate": 1.0229917683792224e-06, "loss": 1.8756836652755737e-06, "step": 348830 }, { "epoch": 99.01788248651717, "grad_norm": 0.00012842906289733946, "learning_rate": 1.0201532784558616e-06, "loss": 1.9237399101257323e-06, "step": 348840 }, { "epoch": 99.02072097644053, "grad_norm": 0.0003497885772958398, "learning_rate": 1.0173147885325007e-06, "loss": 1.4474615454673768e-06, "step": 348850 }, { "epoch": 99.02355946636389, "grad_norm": 0.00011409987200750038, "learning_rate": 1.01447629860914e-06, "loss": 2.793222665786743e-06, "step": 348860 }, { "epoch": 99.02639795628725, "grad_norm": 0.000358770223101601, "learning_rate": 1.0116378086857794e-06, "loss": 1.5936791896820069e-06, "step": 348870 }, { "epoch": 99.02923644621062, "grad_norm": 0.0007031502900645137, "learning_rate": 1.0087993187624184e-06, "loss": 3.723427653312683e-06, "step": 348880 }, { "epoch": 99.03207493613398, "grad_norm": 0.0006881663575768471, "learning_rate": 1.0059608288390577e-06, "loss": 1.4077872037887572e-06, "step": 348890 }, { "epoch": 99.03491342605734, "grad_norm": 0.00033638524473644793, "learning_rate": 1.0031223389156967e-06, "loss": 1.6804784536361695e-06, "step": 348900 }, { "epoch": 99.03775191598069, "grad_norm": 0.0007959342910908163, "learning_rate": 1.0002838489923362e-06, "loss": 2.037174999713898e-06, "step": 348910 }, { "epoch": 99.04059040590406, "grad_norm": 0.000304780260194093, "learning_rate": 9.974453590689755e-07, "loss": 1.3327226042747498e-06, "step": 348920 }, { "epoch": 99.04342889582742, "grad_norm": 0.00020968817989341915, "learning_rate": 9.946068691456145e-07, "loss": 1.4936551451683044e-06, "step": 348930 }, { "epoch": 99.04626738575078, "grad_norm": 0.0004131268651690334, "learning_rate": 9.917683792222538e-07, "loss": 1.3446435332298278e-06, "step": 348940 }, { "epoch": 99.04910587567414, "grad_norm": 0.00041704310569912195, "learning_rate": 9.88929889298893e-07, "loss": 1.3383105397224426e-06, "step": 348950 }, { "epoch": 99.0519443655975, "grad_norm": 0.001293187728151679, "learning_rate": 9.860913993755323e-07, "loss": 1.7855316400527955e-06, "step": 348960 }, { "epoch": 99.05478285552087, "grad_norm": 0.0006938673323020339, "learning_rate": 9.832529094521715e-07, "loss": 1.8345192074775695e-06, "step": 348970 }, { "epoch": 99.05762134544422, "grad_norm": 0.0002210534003097564, "learning_rate": 9.804144195288106e-07, "loss": 2.504140138626099e-06, "step": 348980 }, { "epoch": 99.06045983536758, "grad_norm": 0.0006732524489052594, "learning_rate": 9.775759296054498e-07, "loss": 1.2895092368125915e-06, "step": 348990 }, { "epoch": 99.06329832529094, "grad_norm": 0.0006828542100265622, "learning_rate": 9.747374396820893e-07, "loss": 2.3709610104560854e-06, "step": 349000 }, { "epoch": 99.06329832529094, "eval_accuracy": 0.9892541489158772, "eval_loss": 0.046981874853372574, "eval_runtime": 49.9449, "eval_samples_per_second": 314.887, "eval_steps_per_second": 4.925, "step": 349000 }, { "epoch": 99.0661368152143, "grad_norm": 0.0009712048922665417, "learning_rate": 9.718989497587283e-07, "loss": 1.5236437320709229e-06, "step": 349010 }, { "epoch": 99.06897530513767, "grad_norm": 0.0006035659462213516, "learning_rate": 9.690604598353676e-07, "loss": 1.4819204807281495e-06, "step": 349020 }, { "epoch": 99.07181379506103, "grad_norm": 0.0002816425112541765, "learning_rate": 9.662219699120069e-07, "loss": 1.9097700715065e-06, "step": 349030 }, { "epoch": 99.07465228498438, "grad_norm": 0.0040766154415905476, "learning_rate": 9.633834799886461e-07, "loss": 1.9552186131477357e-06, "step": 349040 }, { "epoch": 99.07749077490774, "grad_norm": 0.00023978619719855487, "learning_rate": 9.605449900652854e-07, "loss": 1.7233192920684815e-06, "step": 349050 }, { "epoch": 99.08032926483111, "grad_norm": 0.004862856585532427, "learning_rate": 9.577065001419244e-07, "loss": 2.1807849407196044e-06, "step": 349060 }, { "epoch": 99.08316775475447, "grad_norm": 0.00028548965929076076, "learning_rate": 9.548680102185637e-07, "loss": 1.4839693903923036e-06, "step": 349070 }, { "epoch": 99.08600624467783, "grad_norm": 0.00047168831224553287, "learning_rate": 9.52029520295203e-07, "loss": 1.2908130884170532e-06, "step": 349080 }, { "epoch": 99.0888447346012, "grad_norm": 0.00017757115710992366, "learning_rate": 9.491910303718422e-07, "loss": 2.0813196897506716e-06, "step": 349090 }, { "epoch": 99.09168322452456, "grad_norm": 0.0011135173263028264, "learning_rate": 9.463525404484814e-07, "loss": 2.0258128643035887e-06, "step": 349100 }, { "epoch": 99.09452171444791, "grad_norm": 0.0004956948687322438, "learning_rate": 9.435140505251207e-07, "loss": 1.2431293725967407e-06, "step": 349110 }, { "epoch": 99.09736020437127, "grad_norm": 0.0003600322233978659, "learning_rate": 9.406755606017599e-07, "loss": 1.6108155250549316e-06, "step": 349120 }, { "epoch": 99.10019869429463, "grad_norm": 0.0005400209920480847, "learning_rate": 9.378370706783991e-07, "loss": 2.199411392211914e-06, "step": 349130 }, { "epoch": 99.103037184218, "grad_norm": 0.00029704999178647995, "learning_rate": 9.349985807550383e-07, "loss": 1.2401491403579711e-06, "step": 349140 }, { "epoch": 99.10587567414136, "grad_norm": 0.00048026506556198, "learning_rate": 9.321600908316775e-07, "loss": 2.004578709602356e-06, "step": 349150 }, { "epoch": 99.10871416406472, "grad_norm": 0.0008104791631922126, "learning_rate": 9.293216009083169e-07, "loss": 1.1021271347999574e-06, "step": 349160 }, { "epoch": 99.11155265398808, "grad_norm": 0.0009562408085912466, "learning_rate": 9.26483110984956e-07, "loss": 1.5560537576675415e-06, "step": 349170 }, { "epoch": 99.11439114391143, "grad_norm": 0.0010604837443679571, "learning_rate": 9.236446210615953e-07, "loss": 1.556985080242157e-06, "step": 349180 }, { "epoch": 99.1172296338348, "grad_norm": 0.0001603506680112332, "learning_rate": 9.208061311382346e-07, "loss": 2.1323561668395995e-06, "step": 349190 }, { "epoch": 99.12006812375816, "grad_norm": 0.00038042673259042203, "learning_rate": 9.179676412148737e-07, "loss": 1.5575438737869264e-06, "step": 349200 }, { "epoch": 99.12290661368152, "grad_norm": 0.0009902362944558263, "learning_rate": 9.15129151291513e-07, "loss": 1.621246337890625e-06, "step": 349210 }, { "epoch": 99.12574510360488, "grad_norm": 0.002867442322894931, "learning_rate": 9.122906613681521e-07, "loss": 2.3147091269493104e-06, "step": 349220 }, { "epoch": 99.12858359352825, "grad_norm": 0.0013306204928085208, "learning_rate": 9.094521714447914e-07, "loss": 1.8458813428878784e-06, "step": 349230 }, { "epoch": 99.13142208345161, "grad_norm": 0.0002051671763183549, "learning_rate": 9.066136815214307e-07, "loss": 1.6003847122192384e-06, "step": 349240 }, { "epoch": 99.13426057337496, "grad_norm": 5.657254587276839e-05, "learning_rate": 9.037751915980698e-07, "loss": 1.7309561371803283e-06, "step": 349250 }, { "epoch": 99.13709906329832, "grad_norm": 0.0003398172848392278, "learning_rate": 9.009367016747091e-07, "loss": 1.4221295714378357e-06, "step": 349260 }, { "epoch": 99.13993755322169, "grad_norm": 0.0001436503225704655, "learning_rate": 8.980982117513484e-07, "loss": 1.7171725630760192e-06, "step": 349270 }, { "epoch": 99.14277604314505, "grad_norm": 0.0003375340602360666, "learning_rate": 8.952597218279875e-07, "loss": 1.3537704944610595e-06, "step": 349280 }, { "epoch": 99.14561453306841, "grad_norm": 0.00035833707079291344, "learning_rate": 8.924212319046268e-07, "loss": 1.4476478099822999e-06, "step": 349290 }, { "epoch": 99.14845302299177, "grad_norm": 0.00010353037941968068, "learning_rate": 8.89582741981266e-07, "loss": 1.2703239917755127e-06, "step": 349300 }, { "epoch": 99.15129151291512, "grad_norm": 0.0006804461008869112, "learning_rate": 8.867442520579052e-07, "loss": 1.8250197172164916e-06, "step": 349310 }, { "epoch": 99.15413000283849, "grad_norm": 0.00045636913273483515, "learning_rate": 8.839057621345445e-07, "loss": 1.2712553143501282e-06, "step": 349320 }, { "epoch": 99.15696849276185, "grad_norm": 0.00013889692490920424, "learning_rate": 8.810672722111836e-07, "loss": 1.0661780834197998e-06, "step": 349330 }, { "epoch": 99.15980698268521, "grad_norm": 0.0001434010046068579, "learning_rate": 8.782287822878229e-07, "loss": 1.9470229744911193e-06, "step": 349340 }, { "epoch": 99.16264547260857, "grad_norm": 0.0007971425075083971, "learning_rate": 8.753902923644622e-07, "loss": 1.5538185834884644e-06, "step": 349350 }, { "epoch": 99.16548396253194, "grad_norm": 0.00016148597933351994, "learning_rate": 8.725518024411013e-07, "loss": 1.8300488591194153e-06, "step": 349360 }, { "epoch": 99.1683224524553, "grad_norm": 0.0007067923434078693, "learning_rate": 8.697133125177407e-07, "loss": 2.2534281015396117e-06, "step": 349370 }, { "epoch": 99.17116094237865, "grad_norm": 0.00015164310752879828, "learning_rate": 8.668748225943798e-07, "loss": 1.3316050171852111e-06, "step": 349380 }, { "epoch": 99.17399943230201, "grad_norm": 0.00023704096383880824, "learning_rate": 8.640363326710191e-07, "loss": 1.4705583453178406e-06, "step": 349390 }, { "epoch": 99.17683792222537, "grad_norm": 0.00020265317289158702, "learning_rate": 8.611978427476583e-07, "loss": 2.0381063222885133e-06, "step": 349400 }, { "epoch": 99.17967641214874, "grad_norm": 0.00026728917146101594, "learning_rate": 8.583593528242975e-07, "loss": 1.226365566253662e-06, "step": 349410 }, { "epoch": 99.1825149020721, "grad_norm": 0.00014779649791307747, "learning_rate": 8.555208629009367e-07, "loss": 1.356378197669983e-06, "step": 349420 }, { "epoch": 99.18535339199546, "grad_norm": 0.0005766909453086555, "learning_rate": 8.526823729775761e-07, "loss": 1.7356127500534057e-06, "step": 349430 }, { "epoch": 99.18819188191883, "grad_norm": 0.0003461216692812741, "learning_rate": 8.498438830542151e-07, "loss": 1.4901161193847656e-06, "step": 349440 }, { "epoch": 99.19103037184217, "grad_norm": 0.00033154874108731747, "learning_rate": 8.470053931308545e-07, "loss": 1.4955177903175354e-06, "step": 349450 }, { "epoch": 99.19386886176554, "grad_norm": 0.00011016992357326671, "learning_rate": 8.441669032074935e-07, "loss": 1.765228807926178e-06, "step": 349460 }, { "epoch": 99.1967073516889, "grad_norm": 0.0007647150196135044, "learning_rate": 8.413284132841329e-07, "loss": 1.813657581806183e-06, "step": 349470 }, { "epoch": 99.19954584161226, "grad_norm": 0.00021423808357212692, "learning_rate": 8.384899233607722e-07, "loss": 2.71722674369812e-06, "step": 349480 }, { "epoch": 99.20238433153563, "grad_norm": 0.00045220108586363494, "learning_rate": 8.356514334374113e-07, "loss": 1.6685575246810912e-06, "step": 349490 }, { "epoch": 99.20522282145899, "grad_norm": 0.003249697620049119, "learning_rate": 8.328129435140506e-07, "loss": 1.9000843167304993e-06, "step": 349500 }, { "epoch": 99.20522282145899, "eval_accuracy": 0.9895720735041648, "eval_loss": 0.04705456271767616, "eval_runtime": 64.4132, "eval_samples_per_second": 244.158, "eval_steps_per_second": 3.819, "step": 349500 }, { "epoch": 99.20806131138234, "grad_norm": 0.00021089836081955582, "learning_rate": 8.299744535906898e-07, "loss": 1.3940036296844482e-06, "step": 349510 }, { "epoch": 99.2108998013057, "grad_norm": 0.00021090275549795479, "learning_rate": 8.27135963667329e-07, "loss": 1.31949782371521e-06, "step": 349520 }, { "epoch": 99.21373829122906, "grad_norm": 0.00012719331425614655, "learning_rate": 8.242974737439682e-07, "loss": 2.137571573257446e-06, "step": 349530 }, { "epoch": 99.21657678115243, "grad_norm": 0.0002005935530178249, "learning_rate": 8.214589838206074e-07, "loss": 1.8320977687835694e-06, "step": 349540 }, { "epoch": 99.21941527107579, "grad_norm": 0.0013802603352814913, "learning_rate": 8.186204938972466e-07, "loss": 1.8376857042312622e-06, "step": 349550 }, { "epoch": 99.22225376099915, "grad_norm": 0.0007126769050955772, "learning_rate": 8.15782003973886e-07, "loss": 3.1070783734321593e-06, "step": 349560 }, { "epoch": 99.22509225092251, "grad_norm": 0.00011423160322010517, "learning_rate": 8.12943514050525e-07, "loss": 1.3587996363639832e-06, "step": 349570 }, { "epoch": 99.22793074084586, "grad_norm": 0.0016104317037388682, "learning_rate": 8.101050241271644e-07, "loss": 1.9904226064682005e-06, "step": 349580 }, { "epoch": 99.23076923076923, "grad_norm": 0.00021399282559286803, "learning_rate": 8.072665342038037e-07, "loss": 1.4334917068481446e-06, "step": 349590 }, { "epoch": 99.23360772069259, "grad_norm": 0.000914550619199872, "learning_rate": 8.044280442804428e-07, "loss": 1.7277896404266358e-06, "step": 349600 }, { "epoch": 99.23644621061595, "grad_norm": 0.0012129007373005152, "learning_rate": 8.015895543570821e-07, "loss": 2.0025297999382017e-06, "step": 349610 }, { "epoch": 99.23928470053932, "grad_norm": 0.00044136575888842344, "learning_rate": 7.987510644337212e-07, "loss": 1.7443671822547913e-06, "step": 349620 }, { "epoch": 99.24212319046268, "grad_norm": 0.0005107195465825498, "learning_rate": 7.959125745103605e-07, "loss": 1.7810612916946412e-06, "step": 349630 }, { "epoch": 99.24496168038604, "grad_norm": 0.00046971565461717546, "learning_rate": 7.930740845869999e-07, "loss": 1.2623146176338197e-06, "step": 349640 }, { "epoch": 99.24780017030939, "grad_norm": 0.0016321793664246798, "learning_rate": 7.902355946636389e-07, "loss": 2.1675601601600648e-06, "step": 349650 }, { "epoch": 99.25063866023275, "grad_norm": 0.0015641784993931651, "learning_rate": 7.873971047402783e-07, "loss": 1.9056722521781922e-06, "step": 349660 }, { "epoch": 99.25347715015612, "grad_norm": 0.0008022701949812472, "learning_rate": 7.845586148169175e-07, "loss": 1.9550323486328123e-06, "step": 349670 }, { "epoch": 99.25631564007948, "grad_norm": 0.0002672205155249685, "learning_rate": 7.817201248935567e-07, "loss": 1.695007085800171e-06, "step": 349680 }, { "epoch": 99.25915413000284, "grad_norm": 0.0016917749308049679, "learning_rate": 7.788816349701959e-07, "loss": 1.893751323223114e-06, "step": 349690 }, { "epoch": 99.2619926199262, "grad_norm": 0.000711156870238483, "learning_rate": 7.760431450468351e-07, "loss": 1.6322359442710876e-06, "step": 349700 }, { "epoch": 99.26483110984957, "grad_norm": 5.848689761478454e-05, "learning_rate": 7.732046551234743e-07, "loss": 1.5407800674438477e-06, "step": 349710 }, { "epoch": 99.26766959977292, "grad_norm": 0.0005663202027790248, "learning_rate": 7.703661652001136e-07, "loss": 1.8911436200141906e-06, "step": 349720 }, { "epoch": 99.27050808969628, "grad_norm": 4.9390877393307164e-05, "learning_rate": 7.675276752767528e-07, "loss": 1.245364546775818e-06, "step": 349730 }, { "epoch": 99.27334657961964, "grad_norm": 0.00023438839707523584, "learning_rate": 7.64689185353392e-07, "loss": 1.6475096344947815e-06, "step": 349740 }, { "epoch": 99.276185069543, "grad_norm": 0.0011282926425337791, "learning_rate": 7.618506954300313e-07, "loss": 1.9993633031845094e-06, "step": 349750 }, { "epoch": 99.27902355946637, "grad_norm": 0.00041014334419742227, "learning_rate": 7.590122055066704e-07, "loss": 2.2958964109420775e-06, "step": 349760 }, { "epoch": 99.28186204938973, "grad_norm": 0.0019818171858787537, "learning_rate": 7.561737155833098e-07, "loss": 1.7512589693069459e-06, "step": 349770 }, { "epoch": 99.28470053931308, "grad_norm": 0.0002901838452089578, "learning_rate": 7.533352256599489e-07, "loss": 1.634657382965088e-06, "step": 349780 }, { "epoch": 99.28753902923644, "grad_norm": 0.0003299955860711634, "learning_rate": 7.504967357365882e-07, "loss": 2.2936612367630007e-06, "step": 349790 }, { "epoch": 99.2903775191598, "grad_norm": 0.0005186297348700464, "learning_rate": 7.476582458132273e-07, "loss": 1.32732093334198e-06, "step": 349800 }, { "epoch": 99.29321600908317, "grad_norm": 0.0003598602197598666, "learning_rate": 7.448197558898667e-07, "loss": 2.689473330974579e-06, "step": 349810 }, { "epoch": 99.29605449900653, "grad_norm": 0.00044098799116909504, "learning_rate": 7.419812659665058e-07, "loss": 1.7104670405387878e-06, "step": 349820 }, { "epoch": 99.2988929889299, "grad_norm": 0.0009058733121491969, "learning_rate": 7.391427760431451e-07, "loss": 2.820231020450592e-06, "step": 349830 }, { "epoch": 99.30173147885326, "grad_norm": 0.0006676492048427463, "learning_rate": 7.363042861197843e-07, "loss": 2.2675842046737673e-06, "step": 349840 }, { "epoch": 99.3045699687766, "grad_norm": 0.0002516669046599418, "learning_rate": 7.334657961964236e-07, "loss": 1.543201506137848e-06, "step": 349850 }, { "epoch": 99.30740845869997, "grad_norm": 0.0022409886587411165, "learning_rate": 7.306273062730628e-07, "loss": 2.1023675799369813e-06, "step": 349860 }, { "epoch": 99.31024694862333, "grad_norm": 0.0007509898277930915, "learning_rate": 7.27788816349702e-07, "loss": 1.8225982785224915e-06, "step": 349870 }, { "epoch": 99.3130854385467, "grad_norm": 0.0003440749424044043, "learning_rate": 7.249503264263412e-07, "loss": 1.5281140804290771e-06, "step": 349880 }, { "epoch": 99.31592392847006, "grad_norm": 0.00014024136180523783, "learning_rate": 7.221118365029804e-07, "loss": 1.3627111911773682e-06, "step": 349890 }, { "epoch": 99.31876241839342, "grad_norm": 0.00037908233935013413, "learning_rate": 7.192733465796197e-07, "loss": 1.4554709196090698e-06, "step": 349900 }, { "epoch": 99.32160090831678, "grad_norm": 0.00046966804075054824, "learning_rate": 7.164348566562588e-07, "loss": 1.6747042536735535e-06, "step": 349910 }, { "epoch": 99.32443939824013, "grad_norm": 0.0003997432067990303, "learning_rate": 7.135963667328981e-07, "loss": 2.1696090698242187e-06, "step": 349920 }, { "epoch": 99.3272778881635, "grad_norm": 0.0002833537582773715, "learning_rate": 7.107578768095374e-07, "loss": 1.5342608094215394e-06, "step": 349930 }, { "epoch": 99.33011637808686, "grad_norm": 0.0006929088849574327, "learning_rate": 7.079193868861766e-07, "loss": 1.7309561371803283e-06, "step": 349940 }, { "epoch": 99.33295486801022, "grad_norm": 0.00011547574831638485, "learning_rate": 7.050808969628158e-07, "loss": 1.4049932360649108e-06, "step": 349950 }, { "epoch": 99.33579335793358, "grad_norm": 0.0001244536688318476, "learning_rate": 7.02242407039455e-07, "loss": 1.6277655959129333e-06, "step": 349960 }, { "epoch": 99.33863184785695, "grad_norm": 0.00031295380904339254, "learning_rate": 6.994039171160943e-07, "loss": 1.5208497643470764e-06, "step": 349970 }, { "epoch": 99.34147033778031, "grad_norm": 0.0012577236630022526, "learning_rate": 6.965654271927335e-07, "loss": 1.623295247554779e-06, "step": 349980 }, { "epoch": 99.34430882770366, "grad_norm": 0.00032415043096989393, "learning_rate": 6.937269372693727e-07, "loss": 1.7231330275535584e-06, "step": 349990 }, { "epoch": 99.34714731762702, "grad_norm": 0.00044645534944720566, "learning_rate": 6.908884473460119e-07, "loss": 1.4241784811019898e-06, "step": 350000 }, { "epoch": 99.34714731762702, "eval_accuracy": 0.9895084885865073, "eval_loss": 0.046921294182538986, "eval_runtime": 68.975, "eval_samples_per_second": 228.01, "eval_steps_per_second": 3.567, "step": 350000 }, { "epoch": 99.34998580755038, "grad_norm": 0.00024770147865638137, "learning_rate": 6.880499574226512e-07, "loss": 1.6940757632255554e-06, "step": 350010 }, { "epoch": 99.35282429747375, "grad_norm": 0.00035927450517192483, "learning_rate": 6.852114674992905e-07, "loss": 1.7911195755004883e-06, "step": 350020 }, { "epoch": 99.35566278739711, "grad_norm": 0.0001076655462384224, "learning_rate": 6.823729775759296e-07, "loss": 2.2668391466140747e-06, "step": 350030 }, { "epoch": 99.35850127732047, "grad_norm": 0.0003050509840250015, "learning_rate": 6.795344876525689e-07, "loss": 1.4536082744598388e-06, "step": 350040 }, { "epoch": 99.36133976724382, "grad_norm": 0.00010164286504732445, "learning_rate": 6.766959977292081e-07, "loss": 1.58604234457016e-06, "step": 350050 }, { "epoch": 99.36417825716718, "grad_norm": 0.00034057110315188766, "learning_rate": 6.738575078058474e-07, "loss": 1.6177073121070862e-06, "step": 350060 }, { "epoch": 99.36701674709055, "grad_norm": 0.00035933576873503625, "learning_rate": 6.710190178824865e-07, "loss": 1.1838972568511963e-06, "step": 350070 }, { "epoch": 99.36985523701391, "grad_norm": 0.00039825300336815417, "learning_rate": 6.681805279591258e-07, "loss": 1.7240643501281739e-06, "step": 350080 }, { "epoch": 99.37269372693727, "grad_norm": 0.0017218120628967881, "learning_rate": 6.65342038035765e-07, "loss": 1.8171966075897217e-06, "step": 350090 }, { "epoch": 99.37553221686063, "grad_norm": 0.0003614549059420824, "learning_rate": 6.625035481124042e-07, "loss": 1.4971941709518432e-06, "step": 350100 }, { "epoch": 99.378370706784, "grad_norm": 0.00016661942936480045, "learning_rate": 6.596650581890435e-07, "loss": 1.5560537576675415e-06, "step": 350110 }, { "epoch": 99.38120919670735, "grad_norm": 9.382669668411836e-05, "learning_rate": 6.568265682656826e-07, "loss": 1.0658055543899537e-06, "step": 350120 }, { "epoch": 99.38404768663071, "grad_norm": 0.0020047600846737623, "learning_rate": 6.53988078342322e-07, "loss": 1.8591061234474183e-06, "step": 350130 }, { "epoch": 99.38688617655407, "grad_norm": 0.0005226510111242533, "learning_rate": 6.511495884189611e-07, "loss": 1.4299526810646058e-06, "step": 350140 }, { "epoch": 99.38972466647743, "grad_norm": 0.0005690635880455375, "learning_rate": 6.483110984956004e-07, "loss": 1.8790364265441894e-06, "step": 350150 }, { "epoch": 99.3925631564008, "grad_norm": 0.00028430859674699605, "learning_rate": 6.454726085722395e-07, "loss": 1.3032928109169007e-06, "step": 350160 }, { "epoch": 99.39540164632416, "grad_norm": 0.0004014828591607511, "learning_rate": 6.426341186488789e-07, "loss": 2.210400998592377e-06, "step": 350170 }, { "epoch": 99.39824013624752, "grad_norm": 0.0015451101353392005, "learning_rate": 6.39795628725518e-07, "loss": 1.7737969756126403e-06, "step": 350180 }, { "epoch": 99.40107862617087, "grad_norm": 0.0014818849740549922, "learning_rate": 6.369571388021573e-07, "loss": 2.274662256240845e-06, "step": 350190 }, { "epoch": 99.40391711609423, "grad_norm": 0.00042324475361965597, "learning_rate": 6.341186488787965e-07, "loss": 1.7091631889343262e-06, "step": 350200 }, { "epoch": 99.4067556060176, "grad_norm": 0.001086413743905723, "learning_rate": 6.312801589554358e-07, "loss": 1.7391517758369447e-06, "step": 350210 }, { "epoch": 99.40959409594096, "grad_norm": 0.0002895318320952356, "learning_rate": 6.28441669032075e-07, "loss": 1.8553808331489563e-06, "step": 350220 }, { "epoch": 99.41243258586432, "grad_norm": 0.0001279666175832972, "learning_rate": 6.256031791087142e-07, "loss": 1.936592161655426e-06, "step": 350230 }, { "epoch": 99.41527107578769, "grad_norm": 0.0002284910442540422, "learning_rate": 6.227646891853534e-07, "loss": 1.8866732716560363e-06, "step": 350240 }, { "epoch": 99.41810956571103, "grad_norm": 0.00023585349845234305, "learning_rate": 6.199261992619926e-07, "loss": 1.2909993529319764e-06, "step": 350250 }, { "epoch": 99.4209480556344, "grad_norm": 0.0002458991657476872, "learning_rate": 6.170877093386319e-07, "loss": 1.334957778453827e-06, "step": 350260 }, { "epoch": 99.42378654555776, "grad_norm": 0.0002462246047798544, "learning_rate": 6.142492194152711e-07, "loss": 1.5912577509880065e-06, "step": 350270 }, { "epoch": 99.42662503548112, "grad_norm": 0.0009564609499648213, "learning_rate": 6.114107294919103e-07, "loss": 1.7920508980751038e-06, "step": 350280 }, { "epoch": 99.42946352540449, "grad_norm": 0.00016928814875427634, "learning_rate": 6.085722395685496e-07, "loss": 1.7669051885604859e-06, "step": 350290 }, { "epoch": 99.43230201532785, "grad_norm": 0.0013389191590249538, "learning_rate": 6.057337496451888e-07, "loss": 1.928769052028656e-06, "step": 350300 }, { "epoch": 99.43514050525121, "grad_norm": 0.0012385669397190213, "learning_rate": 6.02895259721828e-07, "loss": 1.4584511518478394e-06, "step": 350310 }, { "epoch": 99.43797899517456, "grad_norm": 0.0002995823451783508, "learning_rate": 6.000567697984672e-07, "loss": 1.2101605534553529e-06, "step": 350320 }, { "epoch": 99.44081748509792, "grad_norm": 0.0002764595556072891, "learning_rate": 5.972182798751065e-07, "loss": 1.8328428268432617e-06, "step": 350330 }, { "epoch": 99.44365597502129, "grad_norm": 0.00037400139262899756, "learning_rate": 5.943797899517457e-07, "loss": 1.9781291484832764e-06, "step": 350340 }, { "epoch": 99.44649446494465, "grad_norm": 0.012494825758039951, "learning_rate": 5.915413000283849e-07, "loss": 3.2104551792144774e-06, "step": 350350 }, { "epoch": 99.44933295486801, "grad_norm": 0.0009028355707414448, "learning_rate": 5.887028101050241e-07, "loss": 2.290681004524231e-06, "step": 350360 }, { "epoch": 99.45217144479138, "grad_norm": 0.0003331872576382011, "learning_rate": 5.858643201816634e-07, "loss": 1.8639490008354188e-06, "step": 350370 }, { "epoch": 99.45500993471474, "grad_norm": 0.001027044840157032, "learning_rate": 5.830258302583027e-07, "loss": 1.7639249563217163e-06, "step": 350380 }, { "epoch": 99.45784842463809, "grad_norm": 0.00038464198587462306, "learning_rate": 5.801873403349418e-07, "loss": 2.016313374042511e-06, "step": 350390 }, { "epoch": 99.46068691456145, "grad_norm": 0.0009570083348080516, "learning_rate": 5.773488504115811e-07, "loss": 1.6847625374794006e-06, "step": 350400 }, { "epoch": 99.46352540448481, "grad_norm": 0.0006120885955169797, "learning_rate": 5.745103604882203e-07, "loss": 1.4280900359153748e-06, "step": 350410 }, { "epoch": 99.46636389440818, "grad_norm": 0.0002429663436487317, "learning_rate": 5.716718705648596e-07, "loss": 2.9223039746284487e-06, "step": 350420 }, { "epoch": 99.46920238433154, "grad_norm": 0.0003175221791025251, "learning_rate": 5.688333806414987e-07, "loss": 1.471489667892456e-06, "step": 350430 }, { "epoch": 99.4720408742549, "grad_norm": 0.0007024907972663641, "learning_rate": 5.65994890718138e-07, "loss": 2.098269760608673e-06, "step": 350440 }, { "epoch": 99.47487936417826, "grad_norm": 0.00040924976929090917, "learning_rate": 5.631564007947772e-07, "loss": 1.1729076504707337e-06, "step": 350450 }, { "epoch": 99.47771785410161, "grad_norm": 0.00018207063840236515, "learning_rate": 5.603179108714164e-07, "loss": 1.1961907148361207e-06, "step": 350460 }, { "epoch": 99.48055634402498, "grad_norm": 0.0003955562424380332, "learning_rate": 5.574794209480557e-07, "loss": 1.0436400771141051e-06, "step": 350470 }, { "epoch": 99.48339483394834, "grad_norm": 0.00038581807166337967, "learning_rate": 5.546409310246949e-07, "loss": 1.4079734683036804e-06, "step": 350480 }, { "epoch": 99.4862333238717, "grad_norm": 0.00028650203603319824, "learning_rate": 5.518024411013342e-07, "loss": 1.4593824744224548e-06, "step": 350490 }, { "epoch": 99.48907181379506, "grad_norm": 0.00028422041214071214, "learning_rate": 5.489639511779733e-07, "loss": 1.331232488155365e-06, "step": 350500 }, { "epoch": 99.48907181379506, "eval_accuracy": 0.9892541489158772, "eval_loss": 0.04649721458554268, "eval_runtime": 78.5195, "eval_samples_per_second": 200.294, "eval_steps_per_second": 3.133, "step": 350500 }, { "epoch": 99.49191030371843, "grad_norm": 0.0018372409977018833, "learning_rate": 5.461254612546126e-07, "loss": 1.4591962099075317e-06, "step": 350510 }, { "epoch": 99.49474879364178, "grad_norm": 0.0005086309975013137, "learning_rate": 5.432869713312517e-07, "loss": 1.498498022556305e-06, "step": 350520 }, { "epoch": 99.49758728356514, "grad_norm": 0.00021801040566060692, "learning_rate": 5.404484814078911e-07, "loss": 1.9060447812080382e-06, "step": 350530 }, { "epoch": 99.5004257734885, "grad_norm": 0.0005987531039863825, "learning_rate": 5.376099914845302e-07, "loss": 2.853758633136749e-06, "step": 350540 }, { "epoch": 99.50326426341186, "grad_norm": 0.0012395456433296204, "learning_rate": 5.347715015611695e-07, "loss": 1.663714647293091e-06, "step": 350550 }, { "epoch": 99.50610275333523, "grad_norm": 0.0013606514548882842, "learning_rate": 5.319330116378087e-07, "loss": 1.543201506137848e-06, "step": 350560 }, { "epoch": 99.50894124325859, "grad_norm": 3.488768561510369e-05, "learning_rate": 5.29094521714448e-07, "loss": 1.5946105122566224e-06, "step": 350570 }, { "epoch": 99.51177973318195, "grad_norm": 0.0001925306423800066, "learning_rate": 5.262560317910872e-07, "loss": 1.7972663044929504e-06, "step": 350580 }, { "epoch": 99.5146182231053, "grad_norm": 6.806576129747555e-05, "learning_rate": 5.234175418677264e-07, "loss": 1.173466444015503e-06, "step": 350590 }, { "epoch": 99.51745671302866, "grad_norm": 0.00044746193452738225, "learning_rate": 5.205790519443656e-07, "loss": 1.230090856552124e-06, "step": 350600 }, { "epoch": 99.52029520295203, "grad_norm": 0.00028001004830002785, "learning_rate": 5.177405620210049e-07, "loss": 1.3634562492370605e-06, "step": 350610 }, { "epoch": 99.52313369287539, "grad_norm": 0.000399083539377898, "learning_rate": 5.149020720976441e-07, "loss": 1.7136335372924805e-06, "step": 350620 }, { "epoch": 99.52597218279875, "grad_norm": 0.000678252661600709, "learning_rate": 5.120635821742833e-07, "loss": 1.4659017324447632e-06, "step": 350630 }, { "epoch": 99.52881067272212, "grad_norm": 9.57929078140296e-05, "learning_rate": 5.092250922509225e-07, "loss": 1.4044344425201416e-06, "step": 350640 }, { "epoch": 99.53164916264548, "grad_norm": 0.0002984898164868355, "learning_rate": 5.063866023275618e-07, "loss": 1.8186867237091064e-06, "step": 350650 }, { "epoch": 99.53448765256883, "grad_norm": 0.0030644049402326345, "learning_rate": 5.03548112404201e-07, "loss": 1.9701197743415833e-06, "step": 350660 }, { "epoch": 99.53732614249219, "grad_norm": 0.0006310230237431824, "learning_rate": 5.007096224808402e-07, "loss": 2.0889565348625183e-06, "step": 350670 }, { "epoch": 99.54016463241555, "grad_norm": 0.00016767732449807227, "learning_rate": 4.978711325574794e-07, "loss": 1.0697171092033385e-06, "step": 350680 }, { "epoch": 99.54300312233892, "grad_norm": 0.00030828884337097406, "learning_rate": 4.950326426341187e-07, "loss": 1.7262995243072509e-06, "step": 350690 }, { "epoch": 99.54584161226228, "grad_norm": 0.0004200589610263705, "learning_rate": 4.921941527107579e-07, "loss": 1.6549602150917053e-06, "step": 350700 }, { "epoch": 99.54868010218564, "grad_norm": 0.0007194136851467192, "learning_rate": 4.893556627873971e-07, "loss": 2.764351665973663e-06, "step": 350710 }, { "epoch": 99.551518592109, "grad_norm": 0.0010918054031208158, "learning_rate": 4.865171728640363e-07, "loss": 2.309493720531464e-06, "step": 350720 }, { "epoch": 99.55435708203235, "grad_norm": 0.0015534176491200924, "learning_rate": 4.836786829406756e-07, "loss": 1.7838552594184876e-06, "step": 350730 }, { "epoch": 99.55719557195572, "grad_norm": 0.00031141284853219986, "learning_rate": 4.808401930173149e-07, "loss": 1.2448057532310485e-06, "step": 350740 }, { "epoch": 99.56003406187908, "grad_norm": 0.00011598422861425206, "learning_rate": 4.78001703093954e-07, "loss": 1.0266900062561036e-06, "step": 350750 }, { "epoch": 99.56287255180244, "grad_norm": 0.00020207031047903, "learning_rate": 4.7516321317059327e-07, "loss": 1.2081116437911988e-06, "step": 350760 }, { "epoch": 99.5657110417258, "grad_norm": 0.003695564577355981, "learning_rate": 4.723247232472325e-07, "loss": 2.1988525986671448e-06, "step": 350770 }, { "epoch": 99.56854953164917, "grad_norm": 0.0008091258350759745, "learning_rate": 4.6948623332387173e-07, "loss": 1.8669292330741881e-06, "step": 350780 }, { "epoch": 99.57138802157252, "grad_norm": 0.0001302639429923147, "learning_rate": 4.6664774340051093e-07, "loss": 1.2122094631195068e-06, "step": 350790 }, { "epoch": 99.57422651149588, "grad_norm": 0.0007066135294735432, "learning_rate": 4.6380925347715014e-07, "loss": 1.2531876564025878e-06, "step": 350800 }, { "epoch": 99.57706500141924, "grad_norm": 0.00028411223320290446, "learning_rate": 4.6097076355378945e-07, "loss": 2.170540392398834e-06, "step": 350810 }, { "epoch": 99.5799034913426, "grad_norm": 0.00016029585094656795, "learning_rate": 4.5813227363042865e-07, "loss": 8.998438715934754e-07, "step": 350820 }, { "epoch": 99.58274198126597, "grad_norm": 0.00026848542620427907, "learning_rate": 4.5529378370706785e-07, "loss": 1.8360093235969544e-06, "step": 350830 }, { "epoch": 99.58558047118933, "grad_norm": 0.00020425427646841854, "learning_rate": 4.5245529378370706e-07, "loss": 1.3358891010284425e-06, "step": 350840 }, { "epoch": 99.5884189611127, "grad_norm": 0.0005843313992954791, "learning_rate": 4.4961680386034637e-07, "loss": 1.6005709767341613e-06, "step": 350850 }, { "epoch": 99.59125745103604, "grad_norm": 0.0001917238550959155, "learning_rate": 4.4677831393698557e-07, "loss": 2.065300941467285e-06, "step": 350860 }, { "epoch": 99.5940959409594, "grad_norm": 0.00015250343130901456, "learning_rate": 4.439398240136248e-07, "loss": 1.8484890460968017e-06, "step": 350870 }, { "epoch": 99.59693443088277, "grad_norm": 0.0002446840226184577, "learning_rate": 4.41101334090264e-07, "loss": 1.7268583178520203e-06, "step": 350880 }, { "epoch": 99.59977292080613, "grad_norm": 0.0001931223669089377, "learning_rate": 4.382628441669033e-07, "loss": 1.8460676074028016e-06, "step": 350890 }, { "epoch": 99.6026114107295, "grad_norm": 0.00012345201685093343, "learning_rate": 4.354243542435425e-07, "loss": 1.7909333109855652e-06, "step": 350900 }, { "epoch": 99.60544990065286, "grad_norm": 0.000408349180361256, "learning_rate": 4.325858643201817e-07, "loss": 1.197308301925659e-06, "step": 350910 }, { "epoch": 99.60828839057622, "grad_norm": 0.0003535927680786699, "learning_rate": 4.297473743968209e-07, "loss": 1.4418736100196839e-06, "step": 350920 }, { "epoch": 99.61112688049957, "grad_norm": 0.00010238432150799781, "learning_rate": 4.2690888447346016e-07, "loss": 1.623295247554779e-06, "step": 350930 }, { "epoch": 99.61396537042293, "grad_norm": 6.96939678164199e-05, "learning_rate": 4.2407039455009937e-07, "loss": 9.933486580848693e-07, "step": 350940 }, { "epoch": 99.6168038603463, "grad_norm": 0.0002073478972306475, "learning_rate": 4.2123190462673857e-07, "loss": 1.4629215002059936e-06, "step": 350950 }, { "epoch": 99.61964235026966, "grad_norm": 0.00034597908961586654, "learning_rate": 4.1839341470337777e-07, "loss": 2.169981598854065e-06, "step": 350960 }, { "epoch": 99.62248084019302, "grad_norm": 0.0012465291656553745, "learning_rate": 4.155549247800171e-07, "loss": 2.3532658815383913e-06, "step": 350970 }, { "epoch": 99.62531933011638, "grad_norm": 0.0005321289063431323, "learning_rate": 4.127164348566563e-07, "loss": 1.761317253112793e-06, "step": 350980 }, { "epoch": 99.62815782003973, "grad_norm": 0.00021517342247534543, "learning_rate": 4.098779449332955e-07, "loss": 2.19494104385376e-06, "step": 350990 }, { "epoch": 99.6309963099631, "grad_norm": 0.00038492854218930006, "learning_rate": 4.070394550099347e-07, "loss": 1.41095370054245e-06, "step": 351000 }, { "epoch": 99.6309963099631, "eval_accuracy": 0.9893813187511922, "eval_loss": 0.04710870236158371, "eval_runtime": 64.583, "eval_samples_per_second": 243.516, "eval_steps_per_second": 3.809, "step": 351000 }, { "epoch": 99.63383479988646, "grad_norm": 0.00017644894251134247, "learning_rate": 4.04200965086574e-07, "loss": 1.3373792171478271e-06, "step": 351010 }, { "epoch": 99.63667328980982, "grad_norm": 0.0004090402217116207, "learning_rate": 4.013624751632132e-07, "loss": 1.4835968613624573e-06, "step": 351020 }, { "epoch": 99.63951177973318, "grad_norm": 8.555579552194104e-05, "learning_rate": 3.985239852398524e-07, "loss": 1.987069845199585e-06, "step": 351030 }, { "epoch": 99.64235026965655, "grad_norm": 0.0002320492931175977, "learning_rate": 3.956854953164916e-07, "loss": 2.530217170715332e-06, "step": 351040 }, { "epoch": 99.64518875957991, "grad_norm": 7.404158532153815e-05, "learning_rate": 3.9284700539313093e-07, "loss": 1.2632459402084351e-06, "step": 351050 }, { "epoch": 99.64802724950326, "grad_norm": 0.0005256673903204501, "learning_rate": 3.9000851546977013e-07, "loss": 1.562759280204773e-06, "step": 351060 }, { "epoch": 99.65086573942662, "grad_norm": 0.00010371848475188017, "learning_rate": 3.8717002554640934e-07, "loss": 1.9859522581100465e-06, "step": 351070 }, { "epoch": 99.65370422934998, "grad_norm": 0.00016521783254574984, "learning_rate": 3.843315356230486e-07, "loss": 2.1746382117271423e-06, "step": 351080 }, { "epoch": 99.65654271927335, "grad_norm": 0.000659873359836638, "learning_rate": 3.814930456996878e-07, "loss": 1.9632279872894287e-06, "step": 351090 }, { "epoch": 99.65938120919671, "grad_norm": 0.0008000677917152643, "learning_rate": 3.7865455577632706e-07, "loss": 1.5323981642723084e-06, "step": 351100 }, { "epoch": 99.66221969912007, "grad_norm": 5.1759681809926406e-05, "learning_rate": 3.7581606585296626e-07, "loss": 1.370161771774292e-06, "step": 351110 }, { "epoch": 99.66505818904344, "grad_norm": 0.00024756393395364285, "learning_rate": 3.7297757592960546e-07, "loss": 2.4298205971717834e-06, "step": 351120 }, { "epoch": 99.66789667896678, "grad_norm": 0.0003755518700927496, "learning_rate": 3.7013908600624467e-07, "loss": 1.4923512935638429e-06, "step": 351130 }, { "epoch": 99.67073516889015, "grad_norm": 0.001376625499688089, "learning_rate": 3.673005960828839e-07, "loss": 2.234242856502533e-06, "step": 351140 }, { "epoch": 99.67357365881351, "grad_norm": 2.6915718990494497e-05, "learning_rate": 3.6446210615952313e-07, "loss": 2.1260231733322145e-06, "step": 351150 }, { "epoch": 99.67641214873687, "grad_norm": 0.0005663794581778347, "learning_rate": 3.616236162361624e-07, "loss": 1.421011984348297e-06, "step": 351160 }, { "epoch": 99.67925063866024, "grad_norm": 0.0009822973515838385, "learning_rate": 3.587851263128016e-07, "loss": 1.312047243118286e-06, "step": 351170 }, { "epoch": 99.6820891285836, "grad_norm": 0.0004068612470291555, "learning_rate": 3.5594663638944085e-07, "loss": 1.2412667274475097e-06, "step": 351180 }, { "epoch": 99.68492761850696, "grad_norm": 0.00019105029059574008, "learning_rate": 3.5310814646608005e-07, "loss": 1.4280900359153748e-06, "step": 351190 }, { "epoch": 99.68776610843031, "grad_norm": 0.0004666531749535352, "learning_rate": 3.502696565427193e-07, "loss": 2.1262094378471375e-06, "step": 351200 }, { "epoch": 99.69060459835367, "grad_norm": 0.00023525647702626884, "learning_rate": 3.474311666193585e-07, "loss": 2.1746382117271423e-06, "step": 351210 }, { "epoch": 99.69344308827704, "grad_norm": 0.0004924590466544032, "learning_rate": 3.4459267669599777e-07, "loss": 2.1923333406448364e-06, "step": 351220 }, { "epoch": 99.6962815782004, "grad_norm": 0.00017888820730149746, "learning_rate": 3.41754186772637e-07, "loss": 1.5562400221824646e-06, "step": 351230 }, { "epoch": 99.69912006812376, "grad_norm": 0.00033886355231516063, "learning_rate": 3.3891569684927623e-07, "loss": 1.2196600437164306e-06, "step": 351240 }, { "epoch": 99.70195855804712, "grad_norm": 0.0008855383493937552, "learning_rate": 3.3607720692591544e-07, "loss": 1.7929822206497193e-06, "step": 351250 }, { "epoch": 99.70479704797047, "grad_norm": 0.00021900494175497442, "learning_rate": 3.332387170025547e-07, "loss": 9.391456842422485e-07, "step": 351260 }, { "epoch": 99.70763553789384, "grad_norm": 0.0002716032322496176, "learning_rate": 3.304002270791939e-07, "loss": 1.7743557691574097e-06, "step": 351270 }, { "epoch": 99.7104740278172, "grad_norm": 0.0005239273305051029, "learning_rate": 3.2756173715583316e-07, "loss": 1.5974044799804688e-06, "step": 351280 }, { "epoch": 99.71331251774056, "grad_norm": 0.000933088012970984, "learning_rate": 3.2472324723247236e-07, "loss": 1.7175450921058655e-06, "step": 351290 }, { "epoch": 99.71615100766392, "grad_norm": 0.0005145029863342643, "learning_rate": 3.2188475730911156e-07, "loss": 1.751631498336792e-06, "step": 351300 }, { "epoch": 99.71898949758729, "grad_norm": 0.0001288892381126061, "learning_rate": 3.190462673857508e-07, "loss": 1.5744939446449279e-06, "step": 351310 }, { "epoch": 99.72182798751065, "grad_norm": 0.0002268505486426875, "learning_rate": 3.1620777746239e-07, "loss": 1.3209879398345948e-06, "step": 351320 }, { "epoch": 99.724666477434, "grad_norm": 0.0006425357423722744, "learning_rate": 3.1336928753902923e-07, "loss": 1.3966113328933715e-06, "step": 351330 }, { "epoch": 99.72750496735736, "grad_norm": 0.00014145593740977347, "learning_rate": 3.105307976156685e-07, "loss": 1.5763565897941589e-06, "step": 351340 }, { "epoch": 99.73034345728072, "grad_norm": 0.0022049504332244396, "learning_rate": 3.076923076923077e-07, "loss": 2.1127983927726745e-06, "step": 351350 }, { "epoch": 99.73318194720409, "grad_norm": 0.0005425037816166878, "learning_rate": 3.0485381776894695e-07, "loss": 1.9377097487449645e-06, "step": 351360 }, { "epoch": 99.73602043712745, "grad_norm": 0.0006237825145944953, "learning_rate": 3.0201532784558615e-07, "loss": 1.681596040725708e-06, "step": 351370 }, { "epoch": 99.73885892705081, "grad_norm": 0.0001560751989018172, "learning_rate": 2.991768379222254e-07, "loss": 9.924173355102538e-07, "step": 351380 }, { "epoch": 99.74169741697418, "grad_norm": 0.000257141946349293, "learning_rate": 2.963383479988646e-07, "loss": 1.401454210281372e-06, "step": 351390 }, { "epoch": 99.74453590689753, "grad_norm": 0.0011090439511463046, "learning_rate": 2.934998580755038e-07, "loss": 2.04872339963913e-06, "step": 351400 }, { "epoch": 99.74737439682089, "grad_norm": 0.00042331276927143335, "learning_rate": 2.906613681521431e-07, "loss": 1.4670193195343018e-06, "step": 351410 }, { "epoch": 99.75021288674425, "grad_norm": 0.00023698917357251048, "learning_rate": 2.878228782287823e-07, "loss": 1.8689781427383422e-06, "step": 351420 }, { "epoch": 99.75305137666761, "grad_norm": 0.00034864505869336426, "learning_rate": 2.8498438830542154e-07, "loss": 2.061575651168823e-06, "step": 351430 }, { "epoch": 99.75588986659098, "grad_norm": 0.00025331409415230155, "learning_rate": 2.8214589838206074e-07, "loss": 1.977384090423584e-06, "step": 351440 }, { "epoch": 99.75872835651434, "grad_norm": 8.088190224952996e-05, "learning_rate": 2.793074084587e-07, "loss": 2.073310315608978e-06, "step": 351450 }, { "epoch": 99.76156684643769, "grad_norm": 0.0002582153829280287, "learning_rate": 2.764689185353392e-07, "loss": 1.946091651916504e-06, "step": 351460 }, { "epoch": 99.76440533636105, "grad_norm": 0.0001874156150734052, "learning_rate": 2.7363042861197846e-07, "loss": 1.16005539894104e-06, "step": 351470 }, { "epoch": 99.76724382628441, "grad_norm": 0.0003655053733382374, "learning_rate": 2.7079193868861766e-07, "loss": 1.34296715259552e-06, "step": 351480 }, { "epoch": 99.77008231620778, "grad_norm": 0.0004864856309723109, "learning_rate": 2.679534487652569e-07, "loss": 1.292303204536438e-06, "step": 351490 }, { "epoch": 99.77292080613114, "grad_norm": 0.0013965711696073413, "learning_rate": 2.651149588418961e-07, "loss": 1.5826895833015443e-06, "step": 351500 }, { "epoch": 99.77292080613114, "eval_accuracy": 0.9895720735041648, "eval_loss": 0.04714611917734146, "eval_runtime": 80.406, "eval_samples_per_second": 195.595, "eval_steps_per_second": 3.059, "step": 351500 }, { "epoch": 99.7757592960545, "grad_norm": 0.00024365301942452788, "learning_rate": 2.6227646891853533e-07, "loss": 1.1898577213287353e-06, "step": 351510 }, { "epoch": 99.77859778597787, "grad_norm": 0.0001737471466185525, "learning_rate": 2.5943797899517453e-07, "loss": 1.887790858745575e-06, "step": 351520 }, { "epoch": 99.78143627590121, "grad_norm": 0.0002201138559030369, "learning_rate": 2.565994890718138e-07, "loss": 1.9056722521781922e-06, "step": 351530 }, { "epoch": 99.78427476582458, "grad_norm": 0.0003995270817540586, "learning_rate": 2.53760999148453e-07, "loss": 1.8719583749771118e-06, "step": 351540 }, { "epoch": 99.78711325574794, "grad_norm": 0.0008136632386595011, "learning_rate": 2.5092250922509225e-07, "loss": 1.658499240875244e-06, "step": 351550 }, { "epoch": 99.7899517456713, "grad_norm": 0.0004060455539729446, "learning_rate": 2.4808401930173146e-07, "loss": 1.200661063194275e-06, "step": 351560 }, { "epoch": 99.79279023559467, "grad_norm": 0.00031175740878097713, "learning_rate": 2.452455293783707e-07, "loss": 1.5366822481155396e-06, "step": 351570 }, { "epoch": 99.79562872551803, "grad_norm": 0.00011137047840747982, "learning_rate": 2.424070394550099e-07, "loss": 1.2507662177085876e-06, "step": 351580 }, { "epoch": 99.79846721544139, "grad_norm": 0.00036889969487674534, "learning_rate": 2.395685495316492e-07, "loss": 1.3111159205436706e-06, "step": 351590 }, { "epoch": 99.80130570536474, "grad_norm": 0.0006080343155190349, "learning_rate": 2.3673005960828838e-07, "loss": 1.6693025827407838e-06, "step": 351600 }, { "epoch": 99.8041441952881, "grad_norm": 0.0005863899132236838, "learning_rate": 2.3389156968492764e-07, "loss": 2.5082379579544066e-06, "step": 351610 }, { "epoch": 99.80698268521147, "grad_norm": 0.0006657577469013631, "learning_rate": 2.3105307976156684e-07, "loss": 2.29906290769577e-06, "step": 351620 }, { "epoch": 99.80982117513483, "grad_norm": 0.00023339501058217138, "learning_rate": 2.282145898382061e-07, "loss": 1.658126711845398e-06, "step": 351630 }, { "epoch": 99.81265966505819, "grad_norm": 0.00021853741782251745, "learning_rate": 2.253760999148453e-07, "loss": 1.463852822780609e-06, "step": 351640 }, { "epoch": 99.81549815498155, "grad_norm": 0.00036571803502738476, "learning_rate": 2.2253760999148453e-07, "loss": 2.162344753742218e-06, "step": 351650 }, { "epoch": 99.81833664490492, "grad_norm": 0.0009427728946320713, "learning_rate": 2.1969912006812376e-07, "loss": 1.3982877135276795e-06, "step": 351660 }, { "epoch": 99.82117513482827, "grad_norm": 0.000624504522420466, "learning_rate": 2.16860630144763e-07, "loss": 2.0673498511314394e-06, "step": 351670 }, { "epoch": 99.82401362475163, "grad_norm": 0.00020052316540386528, "learning_rate": 2.140221402214022e-07, "loss": 1.8494203686714172e-06, "step": 351680 }, { "epoch": 99.82685211467499, "grad_norm": 0.0005313308211043477, "learning_rate": 2.1118365029804145e-07, "loss": 1.6856938600540161e-06, "step": 351690 }, { "epoch": 99.82969060459835, "grad_norm": 0.0014309068210422993, "learning_rate": 2.0834516037468066e-07, "loss": 1.48136168718338e-06, "step": 351700 }, { "epoch": 99.83252909452172, "grad_norm": 0.0007540996302850544, "learning_rate": 2.0550667045131992e-07, "loss": 1.5981495380401611e-06, "step": 351710 }, { "epoch": 99.83536758444508, "grad_norm": 0.0015388652682304382, "learning_rate": 2.0266818052795912e-07, "loss": 2.5270506739616395e-06, "step": 351720 }, { "epoch": 99.83820607436843, "grad_norm": 0.0005176429986022413, "learning_rate": 1.9982969060459838e-07, "loss": 1.7298385500907899e-06, "step": 351730 }, { "epoch": 99.84104456429179, "grad_norm": 0.0006174053414724767, "learning_rate": 1.9699120068123758e-07, "loss": 1.963600516319275e-06, "step": 351740 }, { "epoch": 99.84388305421515, "grad_norm": 0.0022205167915672064, "learning_rate": 1.941527107578768e-07, "loss": 3.051385283470154e-06, "step": 351750 }, { "epoch": 99.84672154413852, "grad_norm": 0.0002653722476679832, "learning_rate": 1.9131422083451604e-07, "loss": 1.0857358574867248e-06, "step": 351760 }, { "epoch": 99.84956003406188, "grad_norm": 0.00202915258705616, "learning_rate": 1.8847573091115527e-07, "loss": 1.6683712601661683e-06, "step": 351770 }, { "epoch": 99.85239852398524, "grad_norm": 0.00042459191172383726, "learning_rate": 1.856372409877945e-07, "loss": 1.5834346413612366e-06, "step": 351780 }, { "epoch": 99.8552370139086, "grad_norm": 0.0002400799421593547, "learning_rate": 1.8279875106443373e-07, "loss": 1.8322840332984925e-06, "step": 351790 }, { "epoch": 99.85807550383196, "grad_norm": 0.00027483003214001656, "learning_rate": 1.7996026114107297e-07, "loss": 2.2880733013153078e-06, "step": 351800 }, { "epoch": 99.86091399375532, "grad_norm": 0.00021931457740720361, "learning_rate": 1.771217712177122e-07, "loss": 2.039782702922821e-06, "step": 351810 }, { "epoch": 99.86375248367868, "grad_norm": 0.002255306812003255, "learning_rate": 1.7428328129435143e-07, "loss": 2.0815059542655945e-06, "step": 351820 }, { "epoch": 99.86659097360204, "grad_norm": 0.0005799899809062481, "learning_rate": 1.7144479137099066e-07, "loss": 1.6240403056144714e-06, "step": 351830 }, { "epoch": 99.8694294635254, "grad_norm": 0.00017314650176558644, "learning_rate": 1.6860630144762986e-07, "loss": 1.508183777332306e-06, "step": 351840 }, { "epoch": 99.87226795344877, "grad_norm": 0.0015294469194486737, "learning_rate": 1.657678115242691e-07, "loss": 1.9045546650886536e-06, "step": 351850 }, { "epoch": 99.87510644337213, "grad_norm": 0.0006603759247809649, "learning_rate": 1.6292932160090832e-07, "loss": 1.3064593076705932e-06, "step": 351860 }, { "epoch": 99.87794493329548, "grad_norm": 0.00013510730059351772, "learning_rate": 1.6009083167754755e-07, "loss": 2.358853816986084e-06, "step": 351870 }, { "epoch": 99.88078342321884, "grad_norm": 0.00015268384595401585, "learning_rate": 1.5725234175418678e-07, "loss": 1.4487653970718383e-06, "step": 351880 }, { "epoch": 99.8836219131422, "grad_norm": 0.0047590890899300575, "learning_rate": 1.5441385183082602e-07, "loss": 2.0794570446014406e-06, "step": 351890 }, { "epoch": 99.88646040306557, "grad_norm": 0.0002623025211505592, "learning_rate": 1.5157536190746525e-07, "loss": 2.031587064266205e-06, "step": 351900 }, { "epoch": 99.88929889298893, "grad_norm": 0.0002870059397537261, "learning_rate": 1.4873687198410448e-07, "loss": 1.6761943697929382e-06, "step": 351910 }, { "epoch": 99.8921373829123, "grad_norm": 0.00035656665568239987, "learning_rate": 1.458983820607437e-07, "loss": 2.395175397396088e-06, "step": 351920 }, { "epoch": 99.89497587283566, "grad_norm": 0.0008936547092162073, "learning_rate": 1.430598921373829e-07, "loss": 1.417100429534912e-06, "step": 351930 }, { "epoch": 99.89781436275901, "grad_norm": 0.0009044281905516982, "learning_rate": 1.4022140221402214e-07, "loss": 1.7588958144187926e-06, "step": 351940 }, { "epoch": 99.90065285268237, "grad_norm": 0.0011674013221636415, "learning_rate": 1.3738291229066137e-07, "loss": 1.8719583749771118e-06, "step": 351950 }, { "epoch": 99.90349134260573, "grad_norm": 0.00046923322952352464, "learning_rate": 1.345444223673006e-07, "loss": 2.4981796741485597e-06, "step": 351960 }, { "epoch": 99.9063298325291, "grad_norm": 0.0005134146776981652, "learning_rate": 1.3170593244393983e-07, "loss": 1.1391937732696534e-06, "step": 351970 }, { "epoch": 99.90916832245246, "grad_norm": 0.0001696024846751243, "learning_rate": 1.2886744252057906e-07, "loss": 1.6236677765846253e-06, "step": 351980 }, { "epoch": 99.91200681237582, "grad_norm": 0.00017475061758887023, "learning_rate": 1.260289525972183e-07, "loss": 1.8253922462463379e-06, "step": 351990 }, { "epoch": 99.91484530229917, "grad_norm": 0.0012148086680099368, "learning_rate": 1.2319046267385753e-07, "loss": 2.4940818548202515e-06, "step": 352000 }, { "epoch": 99.91484530229917, "eval_accuracy": 0.9895720735041648, "eval_loss": 0.04702833667397499, "eval_runtime": 60.2723, "eval_samples_per_second": 260.932, "eval_steps_per_second": 4.081, "step": 352000 } ], "logging_steps": 10, "max_steps": 352300, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.372658316277044e+21, "train_batch_size": 64, "trial_name": null, "trial_params": null }